Mercurial > octave
comparison liboctave/numeric/oct-fftw.cc @ 31607:aac27ad79be6 stable
maint: Re-indent code after switch to using namespace macros.
* build-env.h, build-env.in.cc, Cell.h, __betainc__.cc, __eigs__.cc,
__ftp__.cc, __ichol__.cc, __ilu__.cc, __isprimelarge__.cc, __magick_read__.cc,
__pchip_deriv__.cc, amd.cc, base-text-renderer.cc, base-text-renderer.h,
besselj.cc, bitfcns.cc, bsxfun.cc, c-file-ptr-stream.h, call-stack.cc,
call-stack.h, ccolamd.cc, cellfun.cc, chol.cc, colamd.cc, dasrt.cc, data.cc,
debug.cc, defaults.cc, defaults.h, det.cc, display.cc, display.h, dlmread.cc,
dynamic-ld.cc, dynamic-ld.h, ellipj.cc, environment.cc, environment.h,
error.cc, error.h, errwarn.h, event-manager.cc, event-manager.h,
event-queue.cc, event-queue.h, fcn-info.cc, fcn-info.h, fft.cc, fft2.cc,
file-io.cc, filter.cc, find.cc, ft-text-renderer.cc, ft-text-renderer.h,
gcd.cc, gl-render.cc, gl-render.h, gl2ps-print.cc, gl2ps-print.h,
graphics-toolkit.cc, graphics-toolkit.h, graphics.cc, gsvd.cc, gtk-manager.cc,
gtk-manager.h, help.cc, help.h, hook-fcn.cc, hook-fcn.h, input.cc, input.h,
interpreter-private.cc, interpreter-private.h, interpreter.cc, interpreter.h,
inv.cc, jsondecode.cc, jsonencode.cc, latex-text-renderer.cc,
latex-text-renderer.h, load-path.cc, load-path.h, load-save.cc, load-save.h,
lookup.cc, ls-hdf5.cc, ls-mat4.cc, ls-mat5.cc, lsode.cc, lu.cc, mappers.cc,
matrix_type.cc, max.cc, mex.cc, mexproto.h, mxarray.h, mxtypes.in.h,
oct-errno.in.cc, oct-hdf5-types.cc, oct-hist.cc, oct-hist.h, oct-map.cc,
oct-map.h, oct-opengl.h, oct-prcstrm.h, oct-process.cc, oct-process.h,
oct-stdstrm.h, oct-stream.cc, oct-stream.h, oct-strstrm.h,
octave-default-image.h, ordqz.cc, ordschur.cc, pager.cc, pager.h, pinv.cc,
pow2.cc, pr-output.cc, psi.cc, qr.cc, quadcc.cc, rand.cc, regexp.cc,
settings.cc, settings.h, sighandlers.cc, sighandlers.h, sparse-xpow.cc,
sqrtm.cc, stack-frame.cc, stack-frame.h, stream-euler.cc, strfns.cc, svd.cc,
syminfo.cc, syminfo.h, symrcm.cc, symrec.cc, symrec.h, symscope.cc, symscope.h,
symtab.cc, symtab.h, sysdep.cc, sysdep.h, text-engine.cc, text-engine.h,
text-renderer.cc, text-renderer.h, time.cc, toplev.cc, typecast.cc,
url-handle-manager.cc, url-handle-manager.h, urlwrite.cc, utils.cc, utils.h,
variables.cc, variables.h, xdiv.cc, __delaunayn__.cc, __init_fltk__.cc,
__init_gnuplot__.cc, __ode15__.cc, __voronoi__.cc, audioread.cc, convhulln.cc,
gzip.cc, cdef-class.cc, cdef-class.h, cdef-fwd.h, cdef-manager.cc,
cdef-manager.h, cdef-method.cc, cdef-method.h, cdef-object.cc, cdef-object.h,
cdef-package.cc, cdef-package.h, cdef-property.cc, cdef-property.h,
cdef-utils.cc, cdef-utils.h, ov-base-diag.cc, ov-base-int.cc, ov-base-mat.cc,
ov-base-mat.h, ov-base-scalar.cc, ov-base.cc, ov-base.h, ov-bool-mat.cc,
ov-bool-mat.h, ov-bool-sparse.cc, ov-bool.cc, ov-builtin.h, ov-cell.cc,
ov-ch-mat.cc, ov-class.cc, ov-class.h, ov-classdef.cc, ov-classdef.h,
ov-complex.cc, ov-cx-diag.cc, ov-cx-mat.cc, ov-cx-sparse.cc, ov-dld-fcn.cc,
ov-dld-fcn.h, ov-fcn-handle.cc, ov-fcn-handle.h, ov-fcn.h, ov-float.cc,
ov-flt-complex.cc, ov-flt-cx-diag.cc, ov-flt-cx-mat.cc, ov-flt-re-diag.cc,
ov-flt-re-mat.cc, ov-flt-re-mat.h, ov-intx.h, ov-java.cc, ov-lazy-idx.cc,
ov-legacy-range.cc, ov-magic-int.cc, ov-mex-fcn.cc, ov-mex-fcn.h,
ov-null-mat.cc, ov-perm.cc, ov-range.cc, ov-re-diag.cc, ov-re-mat.cc,
ov-re-mat.h, ov-re-sparse.cc, ov-scalar.cc, ov-str-mat.cc, ov-struct.cc,
ov-typeinfo.cc, ov-typeinfo.h, ov-usr-fcn.cc, ov-usr-fcn.h, ov.cc, ov.h, ovl.h,
octave.cc, octave.h, op-b-sbm.cc, op-bm-sbm.cc, op-cs-scm.cc, op-fm-fcm.cc,
op-fs-fcm.cc, op-s-scm.cc, op-scm-cs.cc, op-scm-s.cc, op-sm-cs.cc, ops.h,
anon-fcn-validator.cc, anon-fcn-validator.h, bp-table.cc, bp-table.h,
comment-list.cc, comment-list.h, filepos.h, lex.h, oct-lvalue.cc, oct-lvalue.h,
parse.h, profiler.cc, profiler.h, pt-anon-scopes.cc, pt-anon-scopes.h,
pt-arg-list.cc, pt-arg-list.h, pt-args-block.cc, pt-args-block.h,
pt-array-list.cc, pt-array-list.h, pt-assign.cc, pt-assign.h, pt-binop.cc,
pt-binop.h, pt-bp.cc, pt-bp.h, pt-cbinop.cc, pt-cbinop.h, pt-cell.cc,
pt-cell.h, pt-check.cc, pt-check.h, pt-classdef.cc, pt-classdef.h, pt-cmd.h,
pt-colon.cc, pt-colon.h, pt-const.cc, pt-const.h, pt-decl.cc, pt-decl.h,
pt-eval.cc, pt-eval.h, pt-except.cc, pt-except.h, pt-exp.cc, pt-exp.h,
pt-fcn-handle.cc, pt-fcn-handle.h, pt-id.cc, pt-id.h, pt-idx.cc, pt-idx.h,
pt-jump.h, pt-loop.cc, pt-loop.h, pt-mat.cc, pt-mat.h, pt-misc.cc, pt-misc.h,
pt-pr-code.cc, pt-pr-code.h, pt-select.cc, pt-select.h, pt-spmd.cc, pt-spmd.h,
pt-stmt.cc, pt-stmt.h, pt-tm-const.cc, pt-tm-const.h, pt-unop.cc, pt-unop.h,
pt-walk.cc, pt-walk.h, pt.cc, pt.h, token.cc, token.h, Range.cc, Range.h,
idx-vector.cc, idx-vector.h, range-fwd.h, CollocWt.cc, CollocWt.h,
aepbalance.cc, aepbalance.h, chol.cc, chol.h, gepbalance.cc, gepbalance.h,
gsvd.cc, gsvd.h, hess.cc, hess.h, lo-mappers.cc, lo-mappers.h, lo-specfun.cc,
lo-specfun.h, lu.cc, lu.h, oct-convn.cc, oct-convn.h, oct-fftw.cc, oct-fftw.h,
oct-norm.cc, oct-norm.h, oct-rand.cc, oct-rand.h, oct-spparms.cc,
oct-spparms.h, qr.cc, qr.h, qrp.cc, qrp.h, randgamma.cc, randgamma.h,
randmtzig.cc, randmtzig.h, randpoisson.cc, randpoisson.h, schur.cc, schur.h,
sparse-chol.cc, sparse-chol.h, sparse-lu.cc, sparse-lu.h, sparse-qr.cc,
sparse-qr.h, svd.cc, svd.h, child-list.cc, child-list.h, dir-ops.cc, dir-ops.h,
file-ops.cc, file-ops.h, file-stat.cc, file-stat.h, lo-sysdep.cc, lo-sysdep.h,
lo-sysinfo.cc, lo-sysinfo.h, mach-info.cc, mach-info.h, oct-env.cc, oct-env.h,
oct-group.cc, oct-group.h, oct-password.cc, oct-password.h, oct-syscalls.cc,
oct-syscalls.h, oct-time.cc, oct-time.h, oct-uname.cc, oct-uname.h,
action-container.cc, action-container.h, base-list.h, cmd-edit.cc, cmd-edit.h,
cmd-hist.cc, cmd-hist.h, f77-fcn.h, file-info.cc, file-info.h,
lo-array-errwarn.cc, lo-array-errwarn.h, lo-hash.cc, lo-hash.h, lo-ieee.h,
lo-regexp.cc, lo-regexp.h, lo-utils.cc, lo-utils.h, oct-base64.cc,
oct-base64.h, oct-glob.cc, oct-glob.h, oct-inttypes.h, oct-mutex.cc,
oct-mutex.h, oct-refcount.h, oct-shlib.cc, oct-shlib.h, oct-sparse.cc,
oct-sparse.h, oct-string.h, octave-preserve-stream-state.h, pathsearch.cc,
pathsearch.h, quit.cc, quit.h, unwind-prot.cc, unwind-prot.h, url-transfer.cc,
url-transfer.h:
Re-indent code after switch to using namespace macros.
author | Rik <rik@octave.org> |
---|---|
date | Thu, 01 Dec 2022 18:02:15 -0800 |
parents | e88a07dec498 |
children | 597f3ee61a48 |
comparison
equal
deleted
inserted
replaced
31605:e88a07dec498 | 31607:aac27ad79be6 |
---|---|
43 | 43 |
44 OCTAVE_BEGIN_NAMESPACE(octave) | 44 OCTAVE_BEGIN_NAMESPACE(octave) |
45 | 45 |
46 #if defined (HAVE_FFTW) | 46 #if defined (HAVE_FFTW) |
47 | 47 |
48 fftw_planner *fftw_planner::s_instance = nullptr; | 48 fftw_planner *fftw_planner::s_instance = nullptr; |
49 | 49 |
50 // Helper class to create and cache FFTW plans for both 1D and | 50 // Helper class to create and cache FFTW plans for both 1D and |
51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create | 51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create |
52 // the plans, which in theory is suboptimal, but provides quite | 52 // the plans, which in theory is suboptimal, but provides quite |
53 // reasonable performance in practice. | 53 // reasonable performance in practice. |
54 | 54 |
55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3 | 55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3 |
56 // will destroy the input and output arrays. We must, therefore, create a | 56 // will destroy the input and output arrays. We must, therefore, create a |
57 // temporary input array with the same size and 16-byte alignment as | 57 // temporary input array with the same size and 16-byte alignment as |
58 // the original array when using a different planner strategy. | 58 // the original array when using a different planner strategy. |
59 // Note that we also use any wisdom that is available, either in a | 59 // Note that we also use any wisdom that is available, either in a |
60 // FFTW3 system wide file or as supplied by the user. | 60 // FFTW3 system wide file or as supplied by the user. |
61 | 61 |
62 // FIXME: if we can ensure 16 byte alignment in Array<T> | 62 // FIXME: if we can ensure 16 byte alignment in Array<T> |
63 // (<T> *data) the FFTW3 can use SIMD instructions for further | 63 // (<T> *data) the FFTW3 can use SIMD instructions for further |
64 // acceleration. | 64 // acceleration. |
65 | 65 |
66 // Note that it is profitable to store the FFTW3 plans, for small FFTs. | 66 // Note that it is profitable to store the FFTW3 plans, for small FFTs. |
67 | 67 |
68 fftw_planner::fftw_planner (void) | 68 fftw_planner::fftw_planner (void) |
69 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), | 69 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), |
70 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) | 70 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) |
71 { | 71 { |
72 m_plan[0] = m_plan[1] = nullptr; | 72 m_plan[0] = m_plan[1] = nullptr; |
73 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; | 73 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; |
74 m_simd_align[0] = m_simd_align[1] = false; | 74 m_simd_align[0] = m_simd_align[1] = false; |
75 m_inplace[0] = m_inplace[1] = false; | 75 m_inplace[0] = m_inplace[1] = false; |
76 m_n[0] = m_n[1] = dim_vector (); | 76 m_n[0] = m_n[1] = dim_vector (); |
77 | 77 |
78 #if defined (HAVE_FFTW3_THREADS) | 78 #if defined (HAVE_FFTW3_THREADS) |
79 int init_ret = fftw_init_threads (); | 79 int init_ret = fftw_init_threads (); |
80 if (! init_ret) | 80 if (! init_ret) |
81 (*current_liboctave_error_handler) ("Error initializing FFTW threads"); | 81 (*current_liboctave_error_handler) ("Error initializing FFTW threads"); |
82 | 82 |
83 // Check number of processors available to the current process | 83 // Check number of processors available to the current process |
84 m_nthreads = | 84 m_nthreads = |
85 octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); | 85 octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); |
86 | 86 |
87 // Limit number of threads to 3 by default | 87 // Limit number of threads to 3 by default |
88 // See: https://octave.discourse.group/t/3121 | 88 // See: https://octave.discourse.group/t/3121 |
89 // This can be later changed with fftw ("threads", nthreads). | 89 // This can be later changed with fftw ("threads", nthreads). |
90 if (m_nthreads > 3) | 90 if (m_nthreads > 3) |
91 m_nthreads = 3; | 91 m_nthreads = 3; |
92 | 92 |
93 fftw_plan_with_nthreads (m_nthreads); | 93 fftw_plan_with_nthreads (m_nthreads); |
94 #endif | 94 #endif |
95 | 95 |
96 // If we have a system wide wisdom file, import it. | 96 // If we have a system wide wisdom file, import it. |
97 fftw_import_system_wisdom (); | 97 fftw_import_system_wisdom (); |
98 } | 98 } |
99 | 99 |
100 fftw_planner::~fftw_planner (void) | 100 fftw_planner::~fftw_planner (void) |
101 { | 101 { |
102 fftw_plan *plan_p; | 102 fftw_plan *plan_p; |
103 | 103 |
104 plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); | 104 plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); |
105 if (*plan_p) | 105 if (*plan_p) |
106 fftw_destroy_plan (*plan_p); | 106 fftw_destroy_plan (*plan_p); |
107 | 107 |
108 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]); | 108 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]); |
109 if (*plan_p) | 109 if (*plan_p) |
110 fftw_destroy_plan (*plan_p); | 110 fftw_destroy_plan (*plan_p); |
111 | 111 |
112 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]); | 112 plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]); |
113 if (*plan_p) | 113 if (*plan_p) |
114 fftw_destroy_plan (*plan_p); | 114 fftw_destroy_plan (*plan_p); |
115 } | 115 } |
116 | 116 |
117 bool | 117 bool |
118 fftw_planner::instance_ok (void) | 118 fftw_planner::instance_ok (void) |
119 { | 119 { |
120 bool retval = true; | 120 bool retval = true; |
121 | 121 |
122 if (! s_instance) | 122 if (! s_instance) |
123 { | 123 { |
124 s_instance = new fftw_planner (); | 124 s_instance = new fftw_planner (); |
125 singleton_cleanup_list::add (cleanup_instance); | 125 singleton_cleanup_list::add (cleanup_instance); |
126 } | 126 } |
127 | 127 |
128 return retval; | 128 return retval; |
129 } | 129 } |
130 | 130 |
131 void | 131 void |
132 fftw_planner::threads (int nt) | 132 fftw_planner::threads (int nt) |
133 { | 133 { |
134 #if defined (HAVE_FFTW3_THREADS) | 134 #if defined (HAVE_FFTW3_THREADS) |
135 if (instance_ok () && nt != threads ()) | 135 if (instance_ok () && nt != threads ()) |
136 { | 136 { |
137 s_instance->m_nthreads = nt; | 137 s_instance->m_nthreads = nt; |
138 fftw_plan_with_nthreads (nt); | 138 fftw_plan_with_nthreads (nt); |
139 // Clear the current plans. | 139 // Clear the current plans. |
140 s_instance->m_rplan = nullptr; | 140 s_instance->m_rplan = nullptr; |
141 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; | 141 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; |
142 } | 142 } |
143 #else | 143 #else |
144 octave_unused_parameter (nt); | 144 octave_unused_parameter (nt); |
145 | 145 |
146 (*current_liboctave_warning_handler) | 146 (*current_liboctave_warning_handler) |
147 ("unable to change number of threads without FFTW thread support"); | 147 ("unable to change number of threads without FFTW thread support"); |
148 #endif | 148 #endif |
149 } | 149 } |
150 | 150 |
151 #define CHECK_SIMD_ALIGNMENT(x) \ | 151 #define CHECK_SIMD_ALIGNMENT(x) \ |
152 (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0) | 152 (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0) |
153 | 153 |
154 void * | 154 void * |
155 fftw_planner::do_create_plan (int dir, const int rank, | 155 fftw_planner::do_create_plan (int dir, const int rank, |
156 const dim_vector& dims, | 156 const dim_vector& dims, |
157 octave_idx_type howmany, | 157 octave_idx_type howmany, |
158 octave_idx_type stride, | 158 octave_idx_type stride, |
159 octave_idx_type dist, | 159 octave_idx_type dist, |
160 const Complex *in, Complex *out) | 160 const Complex *in, Complex *out) |
161 { | 161 { |
162 int which = (dir == FFTW_FORWARD) ? 0 : 1; | 162 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
163 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]); | 163 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]); |
164 bool create_new_plan = false; | 164 bool create_new_plan = false; |
165 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | 165 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
166 bool ioinplace = (in == out); | 166 bool ioinplace = (in == out); |
167 | 167 |
168 // Don't create a new plan if we have a non SIMD plan already but | 168 // Don't create a new plan if we have a non SIMD plan already but |
169 // can do SIMD. This prevents endlessly recreating plans if we | 169 // can do SIMD. This prevents endlessly recreating plans if we |
170 // change the alignment. | 170 // change the alignment. |
171 | 171 |
172 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride | 172 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride |
173 || m_r[which] != rank || m_h[which] != howmany | 173 || m_r[which] != rank || m_h[which] != howmany |
174 || ioinplace != m_inplace[which] | 174 || ioinplace != m_inplace[which] |
175 || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) | 175 || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) |
176 create_new_plan = true; | 176 create_new_plan = true; |
177 else | 177 else |
178 { | 178 { |
179 // We still might not have the same shape of array. | 179 // We still might not have the same shape of array. |
180 | 180 |
181 for (int i = 0; i < rank; i++) | 181 for (int i = 0; i < rank; i++) |
182 if (dims(i) != m_n[which](i)) | 182 if (dims(i) != m_n[which](i)) |
183 { | |
184 create_new_plan = true; | |
185 break; | |
186 } | |
187 } | |
188 | |
189 if (create_new_plan) | |
190 { | |
191 m_d[which] = dist; | |
192 m_s[which] = stride; | |
193 m_r[which] = rank; | |
194 m_h[which] = howmany; | |
195 m_simd_align[which] = ioalign; | |
196 m_inplace[which] = ioinplace; | |
197 m_n[which] = dims; | |
198 | |
199 // Note reversal of dimensions for column major storage in FFTW. | |
200 octave_idx_type nn = 1; | |
201 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
202 | |
203 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
204 { | 183 { |
205 tmp[i] = dims(j); | 184 create_new_plan = true; |
206 nn *= dims(j); | |
207 } | |
208 | |
209 int plan_flags = 0; | |
210 bool plan_destroys_in = true; | |
211 | |
212 switch (m_meth) | |
213 { | |
214 case UNKNOWN: | |
215 case ESTIMATE: | |
216 plan_flags |= FFTW_ESTIMATE; | |
217 plan_destroys_in = false; | |
218 break; | |
219 case MEASURE: | |
220 plan_flags |= FFTW_MEASURE; | |
221 break; | |
222 case PATIENT: | |
223 plan_flags |= FFTW_PATIENT; | |
224 break; | |
225 case EXHAUSTIVE: | |
226 plan_flags |= FFTW_EXHAUSTIVE; | |
227 break; | |
228 case HYBRID: | |
229 if (nn < 8193) | |
230 plan_flags |= FFTW_MEASURE; | |
231 else | |
232 { | |
233 plan_flags |= FFTW_ESTIMATE; | |
234 plan_destroys_in = false; | |
235 } | |
236 break; | 185 break; |
237 } | 186 } |
238 | 187 } |
239 if (ioalign) | 188 |
240 plan_flags &= ~FFTW_UNALIGNED; | 189 if (create_new_plan) |
241 else | 190 { |
242 plan_flags |= FFTW_UNALIGNED; | 191 m_d[which] = dist; |
243 | 192 m_s[which] = stride; |
244 if (*cur_plan_p) | 193 m_r[which] = rank; |
245 fftw_destroy_plan (*cur_plan_p); | 194 m_h[which] = howmany; |
246 | 195 m_simd_align[which] = ioalign; |
247 if (plan_destroys_in) | 196 m_inplace[which] = ioinplace; |
197 m_n[which] = dims; | |
198 | |
199 // Note reversal of dimensions for column major storage in FFTW. | |
200 octave_idx_type nn = 1; | |
201 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
202 | |
203 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
204 { | |
205 tmp[i] = dims(j); | |
206 nn *= dims(j); | |
207 } | |
208 | |
209 int plan_flags = 0; | |
210 bool plan_destroys_in = true; | |
211 | |
212 switch (m_meth) | |
213 { | |
214 case UNKNOWN: | |
215 case ESTIMATE: | |
216 plan_flags |= FFTW_ESTIMATE; | |
217 plan_destroys_in = false; | |
218 break; | |
219 case MEASURE: | |
220 plan_flags |= FFTW_MEASURE; | |
221 break; | |
222 case PATIENT: | |
223 plan_flags |= FFTW_PATIENT; | |
224 break; | |
225 case EXHAUSTIVE: | |
226 plan_flags |= FFTW_EXHAUSTIVE; | |
227 break; | |
228 case HYBRID: | |
229 if (nn < 8193) | |
230 plan_flags |= FFTW_MEASURE; | |
231 else | |
232 { | |
233 plan_flags |= FFTW_ESTIMATE; | |
234 plan_destroys_in = false; | |
235 } | |
236 break; | |
237 } | |
238 | |
239 if (ioalign) | |
240 plan_flags &= ~FFTW_UNALIGNED; | |
241 else | |
242 plan_flags |= FFTW_UNALIGNED; | |
243 | |
244 if (*cur_plan_p) | |
245 fftw_destroy_plan (*cur_plan_p); | |
246 | |
247 if (plan_destroys_in) | |
248 { | |
249 // Create matrix with the same size and 16-byte alignment as input | |
250 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); | |
251 itmp = reinterpret_cast<Complex *> | |
252 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
253 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
254 | |
255 *cur_plan_p | |
256 = fftw_plan_many_dft (rank, tmp, howmany, | |
257 reinterpret_cast<fftw_complex *> (itmp), | |
258 nullptr, stride, dist, | |
259 reinterpret_cast<fftw_complex *> (out), | |
260 nullptr, stride, dist, dir, plan_flags); | |
261 } | |
262 else | |
263 { | |
264 *cur_plan_p | |
265 = fftw_plan_many_dft (rank, tmp, howmany, | |
266 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), | |
267 nullptr, stride, dist, | |
268 reinterpret_cast<fftw_complex *> (out), | |
269 nullptr, stride, dist, dir, plan_flags); | |
270 } | |
271 | |
272 if (*cur_plan_p == nullptr) | |
273 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
274 } | |
275 | |
276 return *cur_plan_p; | |
277 } | |
278 | |
279 void * | |
280 fftw_planner::do_create_plan (const int rank, const dim_vector& dims, | |
281 octave_idx_type howmany, | |
282 octave_idx_type stride, | |
283 octave_idx_type dist, | |
284 const double *in, Complex *out) | |
285 { | |
286 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); | |
287 bool create_new_plan = false; | |
288 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
289 | |
290 // Don't create a new plan if we have a non SIMD plan already but | |
291 // can do SIMD. This prevents endlessly recreating plans if we | |
292 // change the alignment. | |
293 | |
294 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank | |
295 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) | |
296 create_new_plan = true; | |
297 else | |
298 { | |
299 // We still might not have the same shape of array. | |
300 | |
301 for (int i = 0; i < rank; i++) | |
302 if (dims(i) != m_rn(i)) | |
248 { | 303 { |
249 // Create matrix with the same size and 16-byte alignment as input | 304 create_new_plan = true; |
250 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); | |
251 itmp = reinterpret_cast<Complex *> | |
252 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
253 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
254 | |
255 *cur_plan_p | |
256 = fftw_plan_many_dft (rank, tmp, howmany, | |
257 reinterpret_cast<fftw_complex *> (itmp), | |
258 nullptr, stride, dist, | |
259 reinterpret_cast<fftw_complex *> (out), | |
260 nullptr, stride, dist, dir, plan_flags); | |
261 } | |
262 else | |
263 { | |
264 *cur_plan_p | |
265 = fftw_plan_many_dft (rank, tmp, howmany, | |
266 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), | |
267 nullptr, stride, dist, | |
268 reinterpret_cast<fftw_complex *> (out), | |
269 nullptr, stride, dist, dir, plan_flags); | |
270 } | |
271 | |
272 if (*cur_plan_p == nullptr) | |
273 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
274 } | |
275 | |
276 return *cur_plan_p; | |
277 } | |
278 | |
279 void * | |
280 fftw_planner::do_create_plan (const int rank, const dim_vector& dims, | |
281 octave_idx_type howmany, | |
282 octave_idx_type stride, | |
283 octave_idx_type dist, | |
284 const double *in, Complex *out) | |
285 { | |
286 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); | |
287 bool create_new_plan = false; | |
288 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
289 | |
290 // Don't create a new plan if we have a non SIMD plan already but | |
291 // can do SIMD. This prevents endlessly recreating plans if we | |
292 // change the alignment. | |
293 | |
294 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank | |
295 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) | |
296 create_new_plan = true; | |
297 else | |
298 { | |
299 // We still might not have the same shape of array. | |
300 | |
301 for (int i = 0; i < rank; i++) | |
302 if (dims(i) != m_rn(i)) | |
303 { | |
304 create_new_plan = true; | |
305 break; | |
306 } | |
307 } | |
308 | |
309 if (create_new_plan) | |
310 { | |
311 m_rd = dist; | |
312 m_rs = stride; | |
313 m_rr = rank; | |
314 m_rh = howmany; | |
315 m_rsimd_align = ioalign; | |
316 m_rn = dims; | |
317 | |
318 // Note reversal of dimensions for column major storage in FFTW. | |
319 octave_idx_type nn = 1; | |
320 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
321 | |
322 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
323 { | |
324 tmp[i] = dims(j); | |
325 nn *= dims(j); | |
326 } | |
327 | |
328 int plan_flags = 0; | |
329 bool plan_destroys_in = true; | |
330 | |
331 switch (m_meth) | |
332 { | |
333 case UNKNOWN: | |
334 case ESTIMATE: | |
335 plan_flags |= FFTW_ESTIMATE; | |
336 plan_destroys_in = false; | |
337 break; | |
338 case MEASURE: | |
339 plan_flags |= FFTW_MEASURE; | |
340 break; | |
341 case PATIENT: | |
342 plan_flags |= FFTW_PATIENT; | |
343 break; | |
344 case EXHAUSTIVE: | |
345 plan_flags |= FFTW_EXHAUSTIVE; | |
346 break; | |
347 case HYBRID: | |
348 if (nn < 8193) | |
349 plan_flags |= FFTW_MEASURE; | |
350 else | |
351 { | |
352 plan_flags |= FFTW_ESTIMATE; | |
353 plan_destroys_in = false; | |
354 } | |
355 break; | 305 break; |
356 } | 306 } |
357 | 307 } |
358 if (ioalign) | 308 |
359 plan_flags &= ~FFTW_UNALIGNED; | 309 if (create_new_plan) |
360 else | 310 { |
361 plan_flags |= FFTW_UNALIGNED; | 311 m_rd = dist; |
362 | 312 m_rs = stride; |
363 if (*cur_plan_p) | 313 m_rr = rank; |
364 fftw_destroy_plan (*cur_plan_p); | 314 m_rh = howmany; |
365 | 315 m_rsimd_align = ioalign; |
366 if (plan_destroys_in) | 316 m_rn = dims; |
317 | |
318 // Note reversal of dimensions for column major storage in FFTW. | |
319 octave_idx_type nn = 1; | |
320 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
321 | |
322 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
323 { | |
324 tmp[i] = dims(j); | |
325 nn *= dims(j); | |
326 } | |
327 | |
328 int plan_flags = 0; | |
329 bool plan_destroys_in = true; | |
330 | |
331 switch (m_meth) | |
332 { | |
333 case UNKNOWN: | |
334 case ESTIMATE: | |
335 plan_flags |= FFTW_ESTIMATE; | |
336 plan_destroys_in = false; | |
337 break; | |
338 case MEASURE: | |
339 plan_flags |= FFTW_MEASURE; | |
340 break; | |
341 case PATIENT: | |
342 plan_flags |= FFTW_PATIENT; | |
343 break; | |
344 case EXHAUSTIVE: | |
345 plan_flags |= FFTW_EXHAUSTIVE; | |
346 break; | |
347 case HYBRID: | |
348 if (nn < 8193) | |
349 plan_flags |= FFTW_MEASURE; | |
350 else | |
351 { | |
352 plan_flags |= FFTW_ESTIMATE; | |
353 plan_destroys_in = false; | |
354 } | |
355 break; | |
356 } | |
357 | |
358 if (ioalign) | |
359 plan_flags &= ~FFTW_UNALIGNED; | |
360 else | |
361 plan_flags |= FFTW_UNALIGNED; | |
362 | |
363 if (*cur_plan_p) | |
364 fftw_destroy_plan (*cur_plan_p); | |
365 | |
366 if (plan_destroys_in) | |
367 { | |
368 // Create matrix with the same size and 16-byte alignment as input | |
369 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); | |
370 itmp = reinterpret_cast<double *> | |
371 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
372 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
373 | |
374 *cur_plan_p | |
375 = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, | |
376 nullptr, stride, dist, | |
377 reinterpret_cast<fftw_complex *> (out), | |
378 nullptr, stride, dist, plan_flags); | |
379 } | |
380 else | |
381 { | |
382 *cur_plan_p | |
383 = fftw_plan_many_dft_r2c (rank, tmp, howmany, | |
384 (const_cast<double *> (in)), | |
385 nullptr, stride, dist, | |
386 reinterpret_cast<fftw_complex *> (out), | |
387 nullptr, stride, dist, plan_flags); | |
388 } | |
389 | |
390 if (*cur_plan_p == nullptr) | |
391 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
392 } | |
393 | |
394 return *cur_plan_p; | |
395 } | |
396 | |
397 fftw_planner::FftwMethod | |
398 fftw_planner::do_method (void) | |
399 { | |
400 return m_meth; | |
401 } | |
402 | |
403 fftw_planner::FftwMethod | |
404 fftw_planner::do_method (FftwMethod _meth) | |
405 { | |
406 FftwMethod ret = m_meth; | |
407 if (_meth == ESTIMATE || _meth == MEASURE | |
408 || _meth == PATIENT || _meth == EXHAUSTIVE | |
409 || _meth == HYBRID) | |
410 { | |
411 if (m_meth != _meth) | |
412 { | |
413 m_meth = _meth; | |
414 if (m_rplan) | |
415 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan)); | |
416 if (m_plan[0]) | |
417 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0])); | |
418 if (m_plan[1]) | |
419 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1])); | |
420 m_rplan = m_plan[0] = m_plan[1] = nullptr; | |
421 } | |
422 } | |
423 else | |
424 ret = UNKNOWN; | |
425 return ret; | |
426 } | |
427 | |
428 float_fftw_planner *float_fftw_planner::s_instance = nullptr; | |
429 | |
430 float_fftw_planner::float_fftw_planner (void) | |
431 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), | |
432 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) | |
433 { | |
434 m_plan[0] = m_plan[1] = nullptr; | |
435 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; | |
436 m_simd_align[0] = m_simd_align[1] = false; | |
437 m_inplace[0] = m_inplace[1] = false; | |
438 m_n[0] = m_n[1] = dim_vector (); | |
439 | |
440 #if defined (HAVE_FFTW3F_THREADS) | |
441 int init_ret = fftwf_init_threads (); | |
442 if (! init_ret) | |
443 (*current_liboctave_error_handler) ("Error initializing FFTW3F threads"); | |
444 | |
445 // Use number of processors available to the current process | |
446 // This can be later changed with fftw ("threads", nthreads). | |
447 m_nthreads = | |
448 octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); | |
449 | |
450 fftwf_plan_with_nthreads (m_nthreads); | |
451 #endif | |
452 | |
453 // If we have a system wide wisdom file, import it. | |
454 fftwf_import_system_wisdom (); | |
455 } | |
456 | |
457 float_fftw_planner::~float_fftw_planner (void) | |
458 { | |
459 fftwf_plan *plan_p; | |
460 | |
461 plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); | |
462 if (*plan_p) | |
463 fftwf_destroy_plan (*plan_p); | |
464 | |
465 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]); | |
466 if (*plan_p) | |
467 fftwf_destroy_plan (*plan_p); | |
468 | |
469 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]); | |
470 if (*plan_p) | |
471 fftwf_destroy_plan (*plan_p); | |
472 } | |
473 | |
474 bool | |
475 float_fftw_planner::instance_ok (void) | |
476 { | |
477 bool retval = true; | |
478 | |
479 if (! s_instance) | |
480 { | |
481 s_instance = new float_fftw_planner (); | |
482 singleton_cleanup_list::add (cleanup_instance); | |
483 } | |
484 | |
485 return retval; | |
486 } | |
487 | |
488 void | |
489 float_fftw_planner::threads (int nt) | |
490 { | |
491 #if defined (HAVE_FFTW3F_THREADS) | |
492 if (instance_ok () && nt != threads ()) | |
493 { | |
494 s_instance->m_nthreads = nt; | |
495 fftwf_plan_with_nthreads (nt); | |
496 // Clear the current plans. | |
497 s_instance->m_rplan = nullptr; | |
498 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; | |
499 } | |
500 #else | |
501 octave_unused_parameter (nt); | |
502 | |
503 (*current_liboctave_warning_handler) | |
504 ("unable to change number of threads without FFTW thread support"); | |
505 #endif | |
506 } | |
507 | |
508 void * | |
509 float_fftw_planner::do_create_plan (int dir, const int rank, | |
510 const dim_vector& dims, | |
511 octave_idx_type howmany, | |
512 octave_idx_type stride, | |
513 octave_idx_type dist, | |
514 const FloatComplex *in, | |
515 FloatComplex *out) | |
516 { | |
517 int which = (dir == FFTW_FORWARD) ? 0 : 1; | |
518 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]); | |
519 bool create_new_plan = false; | |
520 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
521 bool ioinplace = (in == out); | |
522 | |
523 // Don't create a new plan if we have a non SIMD plan already but | |
524 // can do SIMD. This prevents endlessly recreating plans if we | |
525 // change the alignment. | |
526 | |
527 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride | |
528 || m_r[which] != rank || m_h[which] != howmany | |
529 || ioinplace != m_inplace[which] | |
530 || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) | |
531 create_new_plan = true; | |
532 else | |
533 { | |
534 // We still might not have the same shape of array. | |
535 | |
536 for (int i = 0; i < rank; i++) | |
537 if (dims(i) != m_n[which](i)) | |
367 { | 538 { |
368 // Create matrix with the same size and 16-byte alignment as input | 539 create_new_plan = true; |
369 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); | |
370 itmp = reinterpret_cast<double *> | |
371 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
372 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
373 | |
374 *cur_plan_p | |
375 = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, | |
376 nullptr, stride, dist, | |
377 reinterpret_cast<fftw_complex *> (out), | |
378 nullptr, stride, dist, plan_flags); | |
379 } | |
380 else | |
381 { | |
382 *cur_plan_p | |
383 = fftw_plan_many_dft_r2c (rank, tmp, howmany, | |
384 (const_cast<double *> (in)), | |
385 nullptr, stride, dist, | |
386 reinterpret_cast<fftw_complex *> (out), | |
387 nullptr, stride, dist, plan_flags); | |
388 } | |
389 | |
390 if (*cur_plan_p == nullptr) | |
391 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
392 } | |
393 | |
394 return *cur_plan_p; | |
395 } | |
396 | |
397 fftw_planner::FftwMethod | |
398 fftw_planner::do_method (void) | |
399 { | |
400 return m_meth; | |
401 } | |
402 | |
403 fftw_planner::FftwMethod | |
404 fftw_planner::do_method (FftwMethod _meth) | |
405 { | |
406 FftwMethod ret = m_meth; | |
407 if (_meth == ESTIMATE || _meth == MEASURE | |
408 || _meth == PATIENT || _meth == EXHAUSTIVE | |
409 || _meth == HYBRID) | |
410 { | |
411 if (m_meth != _meth) | |
412 { | |
413 m_meth = _meth; | |
414 if (m_rplan) | |
415 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan)); | |
416 if (m_plan[0]) | |
417 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0])); | |
418 if (m_plan[1]) | |
419 fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1])); | |
420 m_rplan = m_plan[0] = m_plan[1] = nullptr; | |
421 } | |
422 } | |
423 else | |
424 ret = UNKNOWN; | |
425 return ret; | |
426 } | |
427 | |
428 float_fftw_planner *float_fftw_planner::s_instance = nullptr; | |
429 | |
430 float_fftw_planner::float_fftw_planner (void) | |
431 : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), | |
432 m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) | |
433 { | |
434 m_plan[0] = m_plan[1] = nullptr; | |
435 m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; | |
436 m_simd_align[0] = m_simd_align[1] = false; | |
437 m_inplace[0] = m_inplace[1] = false; | |
438 m_n[0] = m_n[1] = dim_vector (); | |
439 | |
440 #if defined (HAVE_FFTW3F_THREADS) | |
441 int init_ret = fftwf_init_threads (); | |
442 if (! init_ret) | |
443 (*current_liboctave_error_handler) ("Error initializing FFTW3F threads"); | |
444 | |
445 // Use number of processors available to the current process | |
446 // This can be later changed with fftw ("threads", nthreads). | |
447 m_nthreads = | |
448 octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); | |
449 | |
450 fftwf_plan_with_nthreads (m_nthreads); | |
451 #endif | |
452 | |
453 // If we have a system wide wisdom file, import it. | |
454 fftwf_import_system_wisdom (); | |
455 } | |
456 | |
457 float_fftw_planner::~float_fftw_planner (void) | |
458 { | |
459 fftwf_plan *plan_p; | |
460 | |
461 plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); | |
462 if (*plan_p) | |
463 fftwf_destroy_plan (*plan_p); | |
464 | |
465 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]); | |
466 if (*plan_p) | |
467 fftwf_destroy_plan (*plan_p); | |
468 | |
469 plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]); | |
470 if (*plan_p) | |
471 fftwf_destroy_plan (*plan_p); | |
472 } | |
473 | |
474 bool | |
475 float_fftw_planner::instance_ok (void) | |
476 { | |
477 bool retval = true; | |
478 | |
479 if (! s_instance) | |
480 { | |
481 s_instance = new float_fftw_planner (); | |
482 singleton_cleanup_list::add (cleanup_instance); | |
483 } | |
484 | |
485 return retval; | |
486 } | |
487 | |
488 void | |
489 float_fftw_planner::threads (int nt) | |
490 { | |
491 #if defined (HAVE_FFTW3F_THREADS) | |
492 if (instance_ok () && nt != threads ()) | |
493 { | |
494 s_instance->m_nthreads = nt; | |
495 fftwf_plan_with_nthreads (nt); | |
496 // Clear the current plans. | |
497 s_instance->m_rplan = nullptr; | |
498 s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; | |
499 } | |
500 #else | |
501 octave_unused_parameter (nt); | |
502 | |
503 (*current_liboctave_warning_handler) | |
504 ("unable to change number of threads without FFTW thread support"); | |
505 #endif | |
506 } | |
507 | |
508 void * | |
509 float_fftw_planner::do_create_plan (int dir, const int rank, | |
510 const dim_vector& dims, | |
511 octave_idx_type howmany, | |
512 octave_idx_type stride, | |
513 octave_idx_type dist, | |
514 const FloatComplex *in, | |
515 FloatComplex *out) | |
516 { | |
517 int which = (dir == FFTW_FORWARD) ? 0 : 1; | |
518 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]); | |
519 bool create_new_plan = false; | |
520 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
521 bool ioinplace = (in == out); | |
522 | |
523 // Don't create a new plan if we have a non SIMD plan already but | |
524 // can do SIMD. This prevents endlessly recreating plans if we | |
525 // change the alignment. | |
526 | |
527 if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride | |
528 || m_r[which] != rank || m_h[which] != howmany | |
529 || ioinplace != m_inplace[which] | |
530 || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) | |
531 create_new_plan = true; | |
532 else | |
533 { | |
534 // We still might not have the same shape of array. | |
535 | |
536 for (int i = 0; i < rank; i++) | |
537 if (dims(i) != m_n[which](i)) | |
538 { | |
539 create_new_plan = true; | |
540 break; | |
541 } | |
542 } | |
543 | |
544 if (create_new_plan) | |
545 { | |
546 m_d[which] = dist; | |
547 m_s[which] = stride; | |
548 m_r[which] = rank; | |
549 m_h[which] = howmany; | |
550 m_simd_align[which] = ioalign; | |
551 m_inplace[which] = ioinplace; | |
552 m_n[which] = dims; | |
553 | |
554 // Note reversal of dimensions for column major storage in FFTW. | |
555 octave_idx_type nn = 1; | |
556 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
557 | |
558 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
559 { | |
560 tmp[i] = dims(j); | |
561 nn *= dims(j); | |
562 } | |
563 | |
564 int plan_flags = 0; | |
565 bool plan_destroys_in = true; | |
566 | |
567 switch (m_meth) | |
568 { | |
569 case UNKNOWN: | |
570 case ESTIMATE: | |
571 plan_flags |= FFTW_ESTIMATE; | |
572 plan_destroys_in = false; | |
573 break; | |
574 case MEASURE: | |
575 plan_flags |= FFTW_MEASURE; | |
576 break; | |
577 case PATIENT: | |
578 plan_flags |= FFTW_PATIENT; | |
579 break; | |
580 case EXHAUSTIVE: | |
581 plan_flags |= FFTW_EXHAUSTIVE; | |
582 break; | |
583 case HYBRID: | |
584 if (nn < 8193) | |
585 plan_flags |= FFTW_MEASURE; | |
586 else | |
587 { | |
588 plan_flags |= FFTW_ESTIMATE; | |
589 plan_destroys_in = false; | |
590 } | |
591 break; | 540 break; |
592 } | 541 } |
593 | 542 } |
594 if (ioalign) | 543 |
595 plan_flags &= ~FFTW_UNALIGNED; | 544 if (create_new_plan) |
596 else | 545 { |
597 plan_flags |= FFTW_UNALIGNED; | 546 m_d[which] = dist; |
598 | 547 m_s[which] = stride; |
599 if (*cur_plan_p) | 548 m_r[which] = rank; |
600 fftwf_destroy_plan (*cur_plan_p); | 549 m_h[which] = howmany; |
601 | 550 m_simd_align[which] = ioalign; |
602 if (plan_destroys_in) | 551 m_inplace[which] = ioinplace; |
552 m_n[which] = dims; | |
553 | |
554 // Note reversal of dimensions for column major storage in FFTW. | |
555 octave_idx_type nn = 1; | |
556 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
557 | |
558 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
559 { | |
560 tmp[i] = dims(j); | |
561 nn *= dims(j); | |
562 } | |
563 | |
564 int plan_flags = 0; | |
565 bool plan_destroys_in = true; | |
566 | |
567 switch (m_meth) | |
568 { | |
569 case UNKNOWN: | |
570 case ESTIMATE: | |
571 plan_flags |= FFTW_ESTIMATE; | |
572 plan_destroys_in = false; | |
573 break; | |
574 case MEASURE: | |
575 plan_flags |= FFTW_MEASURE; | |
576 break; | |
577 case PATIENT: | |
578 plan_flags |= FFTW_PATIENT; | |
579 break; | |
580 case EXHAUSTIVE: | |
581 plan_flags |= FFTW_EXHAUSTIVE; | |
582 break; | |
583 case HYBRID: | |
584 if (nn < 8193) | |
585 plan_flags |= FFTW_MEASURE; | |
586 else | |
587 { | |
588 plan_flags |= FFTW_ESTIMATE; | |
589 plan_destroys_in = false; | |
590 } | |
591 break; | |
592 } | |
593 | |
594 if (ioalign) | |
595 plan_flags &= ~FFTW_UNALIGNED; | |
596 else | |
597 plan_flags |= FFTW_UNALIGNED; | |
598 | |
599 if (*cur_plan_p) | |
600 fftwf_destroy_plan (*cur_plan_p); | |
601 | |
602 if (plan_destroys_in) | |
603 { | |
604 // Create matrix with the same size and 16-byte alignment as input | |
605 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); | |
606 itmp = reinterpret_cast<FloatComplex *> | |
607 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
608 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
609 | |
610 *cur_plan_p | |
611 = fftwf_plan_many_dft (rank, tmp, howmany, | |
612 reinterpret_cast<fftwf_complex *> (itmp), | |
613 nullptr, stride, dist, | |
614 reinterpret_cast<fftwf_complex *> (out), | |
615 nullptr, stride, dist, dir, plan_flags); | |
616 } | |
617 else | |
618 { | |
619 *cur_plan_p | |
620 = fftwf_plan_many_dft (rank, tmp, howmany, | |
621 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), | |
622 nullptr, stride, dist, | |
623 reinterpret_cast<fftwf_complex *> (out), | |
624 nullptr, stride, dist, dir, plan_flags); | |
625 } | |
626 | |
627 if (*cur_plan_p == nullptr) | |
628 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
629 } | |
630 | |
631 return *cur_plan_p; | |
632 } | |
633 | |
634 void * | |
635 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims, | |
636 octave_idx_type howmany, | |
637 octave_idx_type stride, | |
638 octave_idx_type dist, | |
639 const float *in, FloatComplex *out) | |
640 { | |
641 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); | |
642 bool create_new_plan = false; | |
643 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
644 | |
645 // Don't create a new plan if we have a non SIMD plan already but | |
646 // can do SIMD. This prevents endlessly recreating plans if we | |
647 // change the alignment. | |
648 | |
649 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank | |
650 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) | |
651 create_new_plan = true; | |
652 else | |
653 { | |
654 // We still might not have the same shape of array. | |
655 | |
656 for (int i = 0; i < rank; i++) | |
657 if (dims(i) != m_rn(i)) | |
603 { | 658 { |
604 // Create matrix with the same size and 16-byte alignment as input | 659 create_new_plan = true; |
605 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); | |
606 itmp = reinterpret_cast<FloatComplex *> | |
607 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | |
608 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | |
609 | |
610 *cur_plan_p | |
611 = fftwf_plan_many_dft (rank, tmp, howmany, | |
612 reinterpret_cast<fftwf_complex *> (itmp), | |
613 nullptr, stride, dist, | |
614 reinterpret_cast<fftwf_complex *> (out), | |
615 nullptr, stride, dist, dir, plan_flags); | |
616 } | |
617 else | |
618 { | |
619 *cur_plan_p | |
620 = fftwf_plan_many_dft (rank, tmp, howmany, | |
621 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), | |
622 nullptr, stride, dist, | |
623 reinterpret_cast<fftwf_complex *> (out), | |
624 nullptr, stride, dist, dir, plan_flags); | |
625 } | |
626 | |
627 if (*cur_plan_p == nullptr) | |
628 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
629 } | |
630 | |
631 return *cur_plan_p; | |
632 } | |
633 | |
634 void * | |
635 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims, | |
636 octave_idx_type howmany, | |
637 octave_idx_type stride, | |
638 octave_idx_type dist, | |
639 const float *in, FloatComplex *out) | |
640 { | |
641 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); | |
642 bool create_new_plan = false; | |
643 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); | |
644 | |
645 // Don't create a new plan if we have a non SIMD plan already but | |
646 // can do SIMD. This prevents endlessly recreating plans if we | |
647 // change the alignment. | |
648 | |
649 if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank | |
650 || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) | |
651 create_new_plan = true; | |
652 else | |
653 { | |
654 // We still might not have the same shape of array. | |
655 | |
656 for (int i = 0; i < rank; i++) | |
657 if (dims(i) != m_rn(i)) | |
658 { | |
659 create_new_plan = true; | |
660 break; | |
661 } | |
662 } | |
663 | |
664 if (create_new_plan) | |
665 { | |
666 m_rd = dist; | |
667 m_rs = stride; | |
668 m_rr = rank; | |
669 m_rh = howmany; | |
670 m_rsimd_align = ioalign; | |
671 m_rn = dims; | |
672 | |
673 // Note reversal of dimensions for column major storage in FFTW. | |
674 octave_idx_type nn = 1; | |
675 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
676 | |
677 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
678 { | |
679 tmp[i] = dims(j); | |
680 nn *= dims(j); | |
681 } | |
682 | |
683 int plan_flags = 0; | |
684 bool plan_destroys_in = true; | |
685 | |
686 switch (m_meth) | |
687 { | |
688 case UNKNOWN: | |
689 case ESTIMATE: | |
690 plan_flags |= FFTW_ESTIMATE; | |
691 plan_destroys_in = false; | |
692 break; | |
693 case MEASURE: | |
694 plan_flags |= FFTW_MEASURE; | |
695 break; | |
696 case PATIENT: | |
697 plan_flags |= FFTW_PATIENT; | |
698 break; | |
699 case EXHAUSTIVE: | |
700 plan_flags |= FFTW_EXHAUSTIVE; | |
701 break; | |
702 case HYBRID: | |
703 if (nn < 8193) | |
704 plan_flags |= FFTW_MEASURE; | |
705 else | |
706 { | |
707 plan_flags |= FFTW_ESTIMATE; | |
708 plan_destroys_in = false; | |
709 } | |
710 break; | 660 break; |
711 } | 661 } |
712 | 662 } |
713 if (ioalign) | 663 |
714 plan_flags &= ~FFTW_UNALIGNED; | 664 if (create_new_plan) |
715 else | 665 { |
716 plan_flags |= FFTW_UNALIGNED; | 666 m_rd = dist; |
717 | 667 m_rs = stride; |
718 if (*cur_plan_p) | 668 m_rr = rank; |
719 fftwf_destroy_plan (*cur_plan_p); | 669 m_rh = howmany; |
720 | 670 m_rsimd_align = ioalign; |
721 if (plan_destroys_in) | 671 m_rn = dims; |
722 { | 672 |
723 // Create matrix with the same size and 16-byte alignment as input | 673 // Note reversal of dimensions for column major storage in FFTW. |
724 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); | 674 octave_idx_type nn = 1; |
725 itmp = reinterpret_cast<float *> | 675 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
726 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + | 676 |
727 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); | 677 for (int i = 0, j = rank-1; i < rank; i++, j--) |
728 | 678 { |
729 *cur_plan_p | 679 tmp[i] = dims(j); |
730 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, | 680 nn *= dims(j); |
731 nullptr, stride, dist, | 681 } |
732 reinterpret_cast<fftwf_complex *> (out), | 682 |
733 nullptr, stride, dist, plan_flags); | 683 int plan_flags = 0; |
734 } | 684 bool plan_destroys_in = true; |
735 else | 685 |
736 { | 686 switch (m_meth) |
737 *cur_plan_p | 687 { |
738 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, | 688 case UNKNOWN: |
739 (const_cast<float *> (in)), | 689 case ESTIMATE: |
740 nullptr, stride, dist, | 690 plan_flags |= FFTW_ESTIMATE; |
741 reinterpret_cast<fftwf_complex *> (out), | 691 plan_destroys_in = false; |
742 nullptr, stride, dist, plan_flags); | 692 break; |
743 } | 693 case MEASURE: |
744 | 694 plan_flags |= FFTW_MEASURE; |
745 if (*cur_plan_p == nullptr) | 695 break; |
746 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | 696 case PATIENT: |
747 } | 697 plan_flags |= FFTW_PATIENT; |
748 | 698 break; |
749 return *cur_plan_p; | 699 case EXHAUSTIVE: |
750 } | 700 plan_flags |= FFTW_EXHAUSTIVE; |
751 | 701 break; |
752 float_fftw_planner::FftwMethod | 702 case HYBRID: |
753 float_fftw_planner::do_method (void) | 703 if (nn < 8193) |
754 { | 704 plan_flags |= FFTW_MEASURE; |
755 return m_meth; | 705 else |
756 } | 706 { |
757 | 707 plan_flags |= FFTW_ESTIMATE; |
758 float_fftw_planner::FftwMethod | 708 plan_destroys_in = false; |
759 float_fftw_planner::do_method (FftwMethod _meth) | 709 } |
760 { | 710 break; |
761 FftwMethod ret = m_meth; | 711 } |
762 if (_meth == ESTIMATE || _meth == MEASURE | 712 |
763 || _meth == PATIENT || _meth == EXHAUSTIVE | 713 if (ioalign) |
764 || _meth == HYBRID) | 714 plan_flags &= ~FFTW_UNALIGNED; |
765 { | 715 else |
766 if (m_meth != _meth) | 716 plan_flags |= FFTW_UNALIGNED; |
767 { | 717 |
768 m_meth = _meth; | 718 if (*cur_plan_p) |
769 if (m_rplan) | 719 fftwf_destroy_plan (*cur_plan_p); |
770 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan)); | 720 |
771 if (m_plan[0]) | 721 if (plan_destroys_in) |
772 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0])); | 722 { |
773 if (m_plan[1]) | 723 // Create matrix with the same size and 16-byte alignment as input |
774 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1])); | 724 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); |
775 m_rplan = m_plan[0] = m_plan[1] = nullptr; | 725 itmp = reinterpret_cast<float *> |
776 } | 726 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + |
777 } | 727 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); |
778 else | 728 |
779 ret = UNKNOWN; | 729 *cur_plan_p |
780 return ret; | 730 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, |
781 } | 731 nullptr, stride, dist, |
782 | 732 reinterpret_cast<fftwf_complex *> (out), |
783 template <typename T> | 733 nullptr, stride, dist, plan_flags); |
784 static inline void | 734 } |
785 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc, | 735 else |
786 octave_idx_type stride, octave_idx_type dist) | 736 { |
787 { | 737 *cur_plan_p |
788 octave_quit (); | 738 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, |
789 | 739 (const_cast<float *> (in)), |
790 // Fill in the missing data. | 740 nullptr, stride, dist, |
791 | 741 reinterpret_cast<fftwf_complex *> (out), |
792 for (std::size_t i = 0; i < nr; i++) | 742 nullptr, stride, dist, plan_flags); |
743 } | |
744 | |
745 if (*cur_plan_p == nullptr) | |
746 (*current_liboctave_error_handler) ("Error creating FFTW plan"); | |
747 } | |
748 | |
749 return *cur_plan_p; | |
750 } | |
751 | |
752 float_fftw_planner::FftwMethod | |
753 float_fftw_planner::do_method (void) | |
754 { | |
755 return m_meth; | |
756 } | |
757 | |
758 float_fftw_planner::FftwMethod | |
759 float_fftw_planner::do_method (FftwMethod _meth) | |
760 { | |
761 FftwMethod ret = m_meth; | |
762 if (_meth == ESTIMATE || _meth == MEASURE | |
763 || _meth == PATIENT || _meth == EXHAUSTIVE | |
764 || _meth == HYBRID) | |
765 { | |
766 if (m_meth != _meth) | |
767 { | |
768 m_meth = _meth; | |
769 if (m_rplan) | |
770 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan)); | |
771 if (m_plan[0]) | |
772 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0])); | |
773 if (m_plan[1]) | |
774 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1])); | |
775 m_rplan = m_plan[0] = m_plan[1] = nullptr; | |
776 } | |
777 } | |
778 else | |
779 ret = UNKNOWN; | |
780 return ret; | |
781 } | |
782 | |
783 template <typename T> | |
784 static inline void | |
785 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc, | |
786 octave_idx_type stride, octave_idx_type dist) | |
787 { | |
788 octave_quit (); | |
789 | |
790 // Fill in the missing data. | |
791 | |
792 for (std::size_t i = 0; i < nr; i++) | |
793 for (std::size_t j = nc/2+1; j < nc; j++) | |
794 out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]); | |
795 | |
796 octave_quit (); | |
797 } | |
798 | |
799 template <typename T> | |
800 static inline void | |
801 convert_packcomplex_Nd (T *out, const dim_vector& dv) | |
802 { | |
803 std::size_t nc = dv(0); | |
804 std::size_t nr = dv(1); | |
805 std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1); | |
806 std::size_t nrp = nr * np; | |
807 T *ptr1, *ptr2; | |
808 | |
809 octave_quit (); | |
810 | |
811 // Create space for the missing elements. | |
812 | |
813 for (std::size_t i = 0; i < nrp; i++) | |
814 { | |
815 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); | |
816 ptr2 = out + i * nc; | |
817 for (std::size_t j = 0; j < nc/2+1; j++) | |
818 *ptr2++ = *ptr1++; | |
819 } | |
820 | |
821 octave_quit (); | |
822 | |
823 // Fill in the missing data for the rank = 2 case directly for speed. | |
824 | |
825 for (std::size_t i = 0; i < np; i++) | |
826 { | |
827 for (std::size_t j = 1; j < nr; j++) | |
828 for (std::size_t k = nc/2+1; k < nc; k++) | |
829 out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]); | |
830 | |
793 for (std::size_t j = nc/2+1; j < nc; j++) | 831 for (std::size_t j = nc/2+1; j < nc; j++) |
794 out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]); | 832 out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]); |
795 | 833 } |
796 octave_quit (); | 834 |
797 } | 835 octave_quit (); |
798 | 836 |
799 template <typename T> | 837 // Now do the permutations needed for rank > 2 cases. |
800 static inline void | 838 |
801 convert_packcomplex_Nd (T *out, const dim_vector& dv) | 839 std::size_t jstart = dv(0) * dv(1); |
802 { | 840 std::size_t kstep = dv(0); |
803 std::size_t nc = dv(0); | 841 std::size_t nel = dv.numel (); |
804 std::size_t nr = dv(1); | 842 |
805 std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1); | 843 for (int inner = 2; inner < dv.ndims (); inner++) |
806 std::size_t nrp = nr * np; | 844 { |
807 T *ptr1, *ptr2; | 845 std::size_t jmax = jstart * dv(inner); |
808 | 846 for (std::size_t i = 0; i < nel; i+=jmax) |
809 octave_quit (); | 847 for (std::size_t j = jstart, jj = jmax-jstart; j < jj; |
810 | 848 j+=jstart, jj-=jstart) |
811 // Create space for the missing elements. | 849 for (std::size_t k = 0; k < jstart; k+= kstep) |
812 | 850 for (std::size_t l = nc/2+1; l < nc; l++) |
813 for (std::size_t i = 0; i < nrp; i++) | 851 { |
814 { | 852 T tmp = out[i+ j + k + l]; |
815 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); | 853 out[i + j + k + l] = out[i + jj + k + l]; |
816 ptr2 = out + i * nc; | 854 out[i + jj + k + l] = tmp; |
817 for (std::size_t j = 0; j < nc/2+1; j++) | 855 } |
818 *ptr2++ = *ptr1++; | 856 jstart = jmax; |
819 } | 857 } |
820 | 858 |
821 octave_quit (); | 859 octave_quit (); |
822 | 860 } |
823 // Fill in the missing data for the rank = 2 case directly for speed. | 861 |
824 | 862 int |
825 for (std::size_t i = 0; i < np; i++) | 863 fftw::fft (const double *in, Complex *out, std::size_t npts, |
826 { | 864 std::size_t nsamples, octave_idx_type stride, |
827 for (std::size_t j = 1; j < nr; j++) | 865 octave_idx_type dist) |
828 for (std::size_t k = nc/2+1; k < nc; k++) | 866 { |
829 out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]); | 867 dist = (dist < 0 ? npts : dist); |
830 | 868 |
831 for (std::size_t j = nc/2+1; j < nc; j++) | 869 dim_vector dv (npts, 1); |
832 out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]); | 870 void *vplan = fftw_planner::create_plan (1, dv, nsamples, |
833 } | 871 stride, dist, in, out); |
834 | 872 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
835 octave_quit (); | 873 |
836 | 874 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), |
837 // Now do the permutations needed for rank > 2 cases. | 875 reinterpret_cast<fftw_complex *> (out)); |
838 | 876 |
839 std::size_t jstart = dv(0) * dv(1); | 877 // Need to create other half of the transform. |
840 std::size_t kstep = dv(0); | 878 |
841 std::size_t nel = dv.numel (); | 879 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
842 | 880 |
843 for (int inner = 2; inner < dv.ndims (); inner++) | 881 return 0; |
844 { | 882 } |
845 std::size_t jmax = jstart * dv(inner); | 883 |
846 for (std::size_t i = 0; i < nel; i+=jmax) | 884 int |
847 for (std::size_t j = jstart, jj = jmax-jstart; j < jj; | 885 fftw::fft (const Complex *in, Complex *out, std::size_t npts, |
848 j+=jstart, jj-=jstart) | 886 std::size_t nsamples, octave_idx_type stride, |
849 for (std::size_t k = 0; k < jstart; k+= kstep) | 887 octave_idx_type dist) |
850 for (std::size_t l = nc/2+1; l < nc; l++) | 888 { |
851 { | 889 dist = (dist < 0 ? npts : dist); |
852 T tmp = out[i+ j + k + l]; | 890 |
853 out[i + j + k + l] = out[i + jj + k + l]; | 891 dim_vector dv (npts, 1); |
854 out[i + jj + k + l] = tmp; | 892 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv, |
855 } | 893 nsamples, stride, |
856 jstart = jmax; | 894 dist, in, out); |
857 } | 895 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
858 | 896 |
859 octave_quit (); | 897 fftw_execute_dft (m_plan, |
860 } | 898 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
861 | 899 reinterpret_cast<fftw_complex *> (out)); |
862 int | 900 |
863 fftw::fft (const double *in, Complex *out, std::size_t npts, | 901 return 0; |
864 std::size_t nsamples, octave_idx_type stride, | 902 } |
865 octave_idx_type dist) | 903 |
866 { | 904 int |
867 dist = (dist < 0 ? npts : dist); | 905 fftw::ifft (const Complex *in, Complex *out, std::size_t npts, |
868 | 906 std::size_t nsamples, octave_idx_type stride, |
869 dim_vector dv (npts, 1); | 907 octave_idx_type dist) |
870 void *vplan = fftw_planner::create_plan (1, dv, nsamples, | 908 { |
871 stride, dist, in, out); | 909 dist = (dist < 0 ? npts : dist); |
872 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | 910 |
873 | 911 dim_vector dv (npts, 1); |
874 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), | 912 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples, |
875 reinterpret_cast<fftw_complex *> (out)); | 913 stride, dist, in, out); |
876 | 914 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
877 // Need to create other half of the transform. | 915 |
878 | 916 fftw_execute_dft (m_plan, |
879 convert_packcomplex_1d (out, nsamples, npts, stride, dist); | 917 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
880 | 918 reinterpret_cast<fftw_complex *> (out)); |
881 return 0; | 919 |
882 } | 920 const Complex scale = npts; |
883 | 921 for (std::size_t j = 0; j < nsamples; j++) |
884 int | |
885 fftw::fft (const Complex *in, Complex *out, std::size_t npts, | |
886 std::size_t nsamples, octave_idx_type stride, | |
887 octave_idx_type dist) | |
888 { | |
889 dist = (dist < 0 ? npts : dist); | |
890 | |
891 dim_vector dv (npts, 1); | |
892 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv, | |
893 nsamples, stride, | |
894 dist, in, out); | |
895 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | |
896 | |
897 fftw_execute_dft (m_plan, | |
898 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
899 reinterpret_cast<fftw_complex *> (out)); | |
900 | |
901 return 0; | |
902 } | |
903 | |
904 int | |
905 fftw::ifft (const Complex *in, Complex *out, std::size_t npts, | |
906 std::size_t nsamples, octave_idx_type stride, | |
907 octave_idx_type dist) | |
908 { | |
909 dist = (dist < 0 ? npts : dist); | |
910 | |
911 dim_vector dv (npts, 1); | |
912 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples, | |
913 stride, dist, in, out); | |
914 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | |
915 | |
916 fftw_execute_dft (m_plan, | |
917 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
918 reinterpret_cast<fftw_complex *> (out)); | |
919 | |
920 const Complex scale = npts; | |
921 for (std::size_t j = 0; j < nsamples; j++) | |
922 for (std::size_t i = 0; i < npts; i++) | |
923 out[i*stride + j*dist] /= scale; | |
924 | |
925 return 0; | |
926 } | |
927 | |
928 int | |
929 fftw::fftNd (const double *in, Complex *out, const int rank, | |
930 const dim_vector& dv) | |
931 { | |
932 octave_idx_type dist = 1; | |
933 for (int i = 0; i < rank; i++) | |
934 dist *= dv(i); | |
935 | |
936 // Fool with the position of the start of the output matrix, so that | |
937 // creating other half of the matrix won't cause cache problems. | |
938 | |
939 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); | |
940 | |
941 void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist, | |
942 in, out + offset); | |
943 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | |
944 | |
945 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), | |
946 reinterpret_cast<fftw_complex *> (out+ offset)); | |
947 | |
948 // Need to create other half of the transform. | |
949 | |
950 convert_packcomplex_Nd (out, dv); | |
951 | |
952 return 0; | |
953 } | |
954 | |
955 int | |
956 fftw::fftNd (const Complex *in, Complex *out, const int rank, | |
957 const dim_vector& dv) | |
958 { | |
959 octave_idx_type dist = 1; | |
960 for (int i = 0; i < rank; i++) | |
961 dist *= dv(i); | |
962 | |
963 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv, | |
964 1, 1, dist, in, out); | |
965 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | |
966 | |
967 fftw_execute_dft (m_plan, | |
968 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
969 reinterpret_cast<fftw_complex *> (out)); | |
970 | |
971 return 0; | |
972 } | |
973 | |
974 int | |
975 fftw::ifftNd (const Complex *in, Complex *out, const int rank, | |
976 const dim_vector& dv) | |
977 { | |
978 octave_idx_type dist = 1; | |
979 for (int i = 0; i < rank; i++) | |
980 dist *= dv(i); | |
981 | |
982 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, | |
983 1, 1, dist, in, out); | |
984 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); | |
985 | |
986 fftw_execute_dft (m_plan, | |
987 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
988 reinterpret_cast<fftw_complex *> (out)); | |
989 | |
990 const std::size_t npts = dv.numel (); | |
991 const Complex scale = npts; | |
992 for (std::size_t i = 0; i < npts; i++) | 922 for (std::size_t i = 0; i < npts; i++) |
993 out[i] /= scale; | 923 out[i*stride + j*dist] /= scale; |
994 | 924 |
995 return 0; | 925 return 0; |
996 } | 926 } |
997 | 927 |
998 int | 928 int |
999 fftw::fft (const float *in, FloatComplex *out, std::size_t npts, | 929 fftw::fftNd (const double *in, Complex *out, const int rank, |
1000 std::size_t nsamples, octave_idx_type stride, | 930 const dim_vector& dv) |
1001 octave_idx_type dist) | 931 { |
1002 { | 932 octave_idx_type dist = 1; |
1003 dist = (dist < 0 ? npts : dist); | 933 for (int i = 0; i < rank; i++) |
1004 | 934 dist *= dv(i); |
1005 dim_vector dv (npts, 1); | 935 |
1006 void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride, | 936 // Fool with the position of the start of the output matrix, so that |
1007 dist, in, out); | 937 // creating other half of the matrix won't cause cache problems. |
1008 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 938 |
1009 | 939 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
1010 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), | 940 |
1011 reinterpret_cast<fftwf_complex *> (out)); | 941 void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist, |
1012 | 942 in, out + offset); |
1013 // Need to create other half of the transform. | 943 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
1014 | 944 |
1015 convert_packcomplex_1d (out, nsamples, npts, stride, dist); | 945 fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), |
1016 | 946 reinterpret_cast<fftw_complex *> (out+ offset)); |
1017 return 0; | 947 |
1018 } | 948 // Need to create other half of the transform. |
1019 | 949 |
1020 int | 950 convert_packcomplex_Nd (out, dv); |
1021 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts, | 951 |
1022 std::size_t nsamples, octave_idx_type stride, | 952 return 0; |
1023 octave_idx_type dist) | 953 } |
1024 { | 954 |
1025 dist = (dist < 0 ? npts : dist); | 955 int |
1026 | 956 fftw::fftNd (const Complex *in, Complex *out, const int rank, |
1027 dim_vector dv (npts, 1); | 957 const dim_vector& dv) |
1028 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv, | 958 { |
1029 nsamples, stride, dist, | 959 octave_idx_type dist = 1; |
1030 in, out); | 960 for (int i = 0; i < rank; i++) |
1031 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 961 dist *= dv(i); |
1032 | 962 |
1033 fftwf_execute_dft (m_plan, | 963 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv, |
1034 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | 964 1, 1, dist, in, out); |
1035 reinterpret_cast<fftwf_complex *> (out)); | 965 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
1036 | 966 |
1037 return 0; | 967 fftw_execute_dft (m_plan, |
1038 } | 968 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
1039 | 969 reinterpret_cast<fftw_complex *> (out)); |
1040 int | 970 |
1041 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts, | 971 return 0; |
1042 std::size_t nsamples, octave_idx_type stride, | 972 } |
1043 octave_idx_type dist) | 973 |
1044 { | 974 int |
1045 dist = (dist < 0 ? npts : dist); | 975 fftw::ifftNd (const Complex *in, Complex *out, const int rank, |
1046 | 976 const dim_vector& dv) |
1047 dim_vector dv (npts, 1); | 977 { |
1048 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, | 978 octave_idx_type dist = 1; |
1049 nsamples, stride, dist, | 979 for (int i = 0; i < rank; i++) |
1050 in, out); | 980 dist *= dv(i); |
1051 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 981 |
1052 | 982 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, |
1053 fftwf_execute_dft (m_plan, | 983 1, 1, dist, in, out); |
1054 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | 984 fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); |
1055 reinterpret_cast<fftwf_complex *> (out)); | 985 |
1056 | 986 fftw_execute_dft (m_plan, |
1057 const FloatComplex scale = npts; | 987 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
1058 for (std::size_t j = 0; j < nsamples; j++) | 988 reinterpret_cast<fftw_complex *> (out)); |
1059 for (std::size_t i = 0; i < npts; i++) | 989 |
1060 out[i*stride + j*dist] /= scale; | 990 const std::size_t npts = dv.numel (); |
1061 | 991 const Complex scale = npts; |
1062 return 0; | 992 for (std::size_t i = 0; i < npts; i++) |
1063 } | 993 out[i] /= scale; |
1064 | 994 |
1065 int | 995 return 0; |
1066 fftw::fftNd (const float *in, FloatComplex *out, const int rank, | 996 } |
1067 const dim_vector& dv) | 997 |
1068 { | 998 int |
1069 octave_idx_type dist = 1; | 999 fftw::fft (const float *in, FloatComplex *out, std::size_t npts, |
1070 for (int i = 0; i < rank; i++) | 1000 std::size_t nsamples, octave_idx_type stride, |
1071 dist *= dv(i); | 1001 octave_idx_type dist) |
1072 | 1002 { |
1073 // Fool with the position of the start of the output matrix, so that | 1003 dist = (dist < 0 ? npts : dist); |
1074 // creating other half of the matrix won't cause cache problems. | 1004 |
1075 | 1005 dim_vector dv (npts, 1); |
1076 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); | 1006 void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride, |
1077 | 1007 dist, in, out); |
1078 void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist, | 1008 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); |
1079 in, out + offset); | 1009 |
1080 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 1010 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), |
1081 | 1011 reinterpret_cast<fftwf_complex *> (out)); |
1082 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), | 1012 |
1083 reinterpret_cast<fftwf_complex *> (out+ offset)); | 1013 // Need to create other half of the transform. |
1084 | 1014 |
1085 // Need to create other half of the transform. | 1015 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
1086 | 1016 |
1087 convert_packcomplex_Nd (out, dv); | 1017 return 0; |
1088 | 1018 } |
1089 return 0; | 1019 |
1090 } | 1020 int |
1091 | 1021 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts, |
1092 int | 1022 std::size_t nsamples, octave_idx_type stride, |
1093 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, | 1023 octave_idx_type dist) |
1094 const dim_vector& dv) | 1024 { |
1095 { | 1025 dist = (dist < 0 ? npts : dist); |
1096 octave_idx_type dist = 1; | 1026 |
1097 for (int i = 0; i < rank; i++) | 1027 dim_vector dv (npts, 1); |
1098 dist *= dv(i); | 1028 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv, |
1099 | 1029 nsamples, stride, dist, |
1100 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv, | 1030 in, out); |
1101 1, 1, dist, in, out); | 1031 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); |
1102 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 1032 |
1103 | 1033 fftwf_execute_dft (m_plan, |
1104 fftwf_execute_dft (m_plan, | 1034 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
1105 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | 1035 reinterpret_cast<fftwf_complex *> (out)); |
1106 reinterpret_cast<fftwf_complex *> (out)); | 1036 |
1107 | 1037 return 0; |
1108 return 0; | 1038 } |
1109 } | 1039 |
1110 | 1040 int |
1111 int | 1041 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts, |
1112 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, | 1042 std::size_t nsamples, octave_idx_type stride, |
1113 const dim_vector& dv) | 1043 octave_idx_type dist) |
1114 { | 1044 { |
1115 octave_idx_type dist = 1; | 1045 dist = (dist < 0 ? npts : dist); |
1116 for (int i = 0; i < rank; i++) | 1046 |
1117 dist *= dv(i); | 1047 dim_vector dv (npts, 1); |
1118 | 1048 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, |
1119 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, | 1049 nsamples, stride, dist, |
1120 1, 1, dist, in, out); | 1050 in, out); |
1121 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | 1051 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); |
1122 | 1052 |
1123 fftwf_execute_dft (m_plan, | 1053 fftwf_execute_dft (m_plan, |
1124 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | 1054 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
1125 reinterpret_cast<fftwf_complex *> (out)); | 1055 reinterpret_cast<fftwf_complex *> (out)); |
1126 | 1056 |
1127 const std::size_t npts = dv.numel (); | 1057 const FloatComplex scale = npts; |
1128 const FloatComplex scale = npts; | 1058 for (std::size_t j = 0; j < nsamples; j++) |
1129 for (std::size_t i = 0; i < npts; i++) | 1059 for (std::size_t i = 0; i < npts; i++) |
1130 out[i] /= scale; | 1060 out[i*stride + j*dist] /= scale; |
1131 | 1061 |
1132 return 0; | 1062 return 0; |
1133 } | 1063 } |
1064 | |
1065 int | |
1066 fftw::fftNd (const float *in, FloatComplex *out, const int rank, | |
1067 const dim_vector& dv) | |
1068 { | |
1069 octave_idx_type dist = 1; | |
1070 for (int i = 0; i < rank; i++) | |
1071 dist *= dv(i); | |
1072 | |
1073 // Fool with the position of the start of the output matrix, so that | |
1074 // creating other half of the matrix won't cause cache problems. | |
1075 | |
1076 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); | |
1077 | |
1078 void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist, | |
1079 in, out + offset); | |
1080 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | |
1081 | |
1082 fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), | |
1083 reinterpret_cast<fftwf_complex *> (out+ offset)); | |
1084 | |
1085 // Need to create other half of the transform. | |
1086 | |
1087 convert_packcomplex_Nd (out, dv); | |
1088 | |
1089 return 0; | |
1090 } | |
1091 | |
1092 int | |
1093 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, | |
1094 const dim_vector& dv) | |
1095 { | |
1096 octave_idx_type dist = 1; | |
1097 for (int i = 0; i < rank; i++) | |
1098 dist *= dv(i); | |
1099 | |
1100 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv, | |
1101 1, 1, dist, in, out); | |
1102 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | |
1103 | |
1104 fftwf_execute_dft (m_plan, | |
1105 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | |
1106 reinterpret_cast<fftwf_complex *> (out)); | |
1107 | |
1108 return 0; | |
1109 } | |
1110 | |
1111 int | |
1112 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, | |
1113 const dim_vector& dv) | |
1114 { | |
1115 octave_idx_type dist = 1; | |
1116 for (int i = 0; i < rank; i++) | |
1117 dist *= dv(i); | |
1118 | |
1119 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, | |
1120 1, 1, dist, in, out); | |
1121 fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); | |
1122 | |
1123 fftwf_execute_dft (m_plan, | |
1124 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), | |
1125 reinterpret_cast<fftwf_complex *> (out)); | |
1126 | |
1127 const std::size_t npts = dv.numel (); | |
1128 const FloatComplex scale = npts; | |
1129 for (std::size_t i = 0; i < npts; i++) | |
1130 out[i] /= scale; | |
1131 | |
1132 return 0; | |
1133 } | |
1134 | 1134 |
1135 #endif | 1135 #endif |
1136 | 1136 |
1137 std::string | 1137 std::string |
1138 fftw_version (void) | 1138 fftw_version (void) |
1139 { | 1139 { |
1140 #if defined (HAVE_FFTW) | 1140 #if defined (HAVE_FFTW) |
1141 return ::fftw_version; | 1141 return ::fftw_version; |
1142 #else | 1142 #else |
1143 return "none"; | 1143 return "none"; |
1144 #endif | 1144 #endif |
1145 } | 1145 } |
1146 | 1146 |
1147 std::string | 1147 std::string |
1148 fftwf_version (void) | 1148 fftwf_version (void) |
1149 { | 1149 { |
1150 #if defined (HAVE_FFTW) | 1150 #if defined (HAVE_FFTW) |
1151 return ::fftwf_version; | 1151 return ::fftwf_version; |
1152 #else | 1152 #else |
1153 return "none"; | 1153 return "none"; |
1154 #endif | 1154 #endif |
1155 } | 1155 } |
1156 | 1156 |
1157 OCTAVE_END_NAMESPACE(octave) | 1157 OCTAVE_END_NAMESPACE(octave) |