Mercurial > octave
annotate liboctave/oct-fftw.cc @ 8920:eb63fbe60fab
update copyright notices
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sat, 07 Mar 2009 10:41:27 -0500 |
parents | 25bc2d31e1bf |
children | fb933db0c517 |
rev | line source |
---|---|
3828 | 1 /* |
2 | |
8920 | 3 Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2008 John W. Eaton |
7017 | 4 |
3828 | 5 This file is part of Octave. |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
7016 | 9 Free Software Foundation; either version 3 of the License, or (at your |
10 option) any later version. | |
3828 | 11 |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
7016 | 18 along with Octave; see the file COPYING. If not, see |
19 <http://www.gnu.org/licenses/>. | |
3828 | 20 |
21 */ | |
22 | |
23 #ifdef HAVE_CONFIG_H | |
24 #include <config.h> | |
25 #endif | |
26 | |
4773 | 27 #if defined (HAVE_FFTW3) |
3828 | 28 |
4775 | 29 #include <iostream> |
30 #include <vector> | |
31 | |
4786 | 32 #include "lo-error.h" |
3828 | 33 #include "oct-fftw.h" |
4786 | 34 #include "quit.h" |
8377
25bc2d31e1bf
improve OCTAVE_LOCAL_BUFFER
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
35 #include "oct-locbuf.h" |
3828 | 36 |
4809 | 37 // Helper class to create and cache fftw plans for both 1d and |
6228 | 38 // 2d. This implementation defaults to using FFTW_ESTIMATE to create |
39 // the plans, which in theory is suboptimal, but provides quit | |
40 // reasonable performance. | |
4773 | 41 |
42 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 | |
6228 | 43 // destroys the input and output arrays. We must therefore create a |
44 // temporary input array with the same size and 16-byte alignment as | |
45 // the original array and use that for the planner. Note that we also | |
46 // use any wisdom that is available, either in a FFTW3 system wide file | |
47 // or as supplied by the user. | |
4773 | 48 |
5775 | 49 // FIXME -- if we can ensure 16 byte alignment in Array<T> |
4809 | 50 // (<T> *data) the FFTW3 can use SIMD instructions for further |
51 // acceleration. | |
4773 | 52 |
4809 | 53 // Note that it is profitable to store the FFTW3 plans, for small |
54 // ffts. | |
3828 | 55 |
4808 | 56 octave_fftw_planner::octave_fftw_planner (void) |
3828 | 57 { |
6228 | 58 meth = ESTIMATE; |
3828 | 59 |
60 plan[0] = plan[1] = 0; | |
4773 | 61 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
4808 | 62 simd_align[0] = simd_align[1] = false; |
5044 | 63 inplace[0] = inplace[1] = false; |
4808 | 64 n[0] = n[1] = dim_vector (); |
4773 | 65 |
66 rplan = 0; | |
67 rd = rs = rr = rh = 0; | |
4808 | 68 rsimd_align = false; |
4773 | 69 rn = dim_vector (); |
5044 | 70 |
4809 | 71 // If we have a system wide wisdom file, import it. |
4808 | 72 fftw_import_system_wisdom (); |
3828 | 73 } |
74 | |
6228 | 75 octave_fftw_planner::FftwMethod |
76 octave_fftw_planner::method (void) | |
77 { | |
78 return meth; | |
79 } | |
80 | |
81 octave_fftw_planner::FftwMethod | |
82 octave_fftw_planner::method (FftwMethod _meth) | |
83 { | |
84 FftwMethod ret = meth; | |
85 if (_meth == ESTIMATE || _meth == MEASURE || | |
86 _meth == PATIENT || _meth == EXHAUSTIVE || | |
87 _meth == HYBRID) | |
88 { | |
89 if (meth != _meth) | |
90 { | |
91 meth = _meth; | |
92 if (rplan) | |
93 fftw_destroy_plan (rplan); | |
94 if (plan[0]) | |
95 fftw_destroy_plan (plan[0]); | |
96 if (plan[1]) | |
97 fftw_destroy_plan (plan[1]); | |
98 rplan = plan[0] = plan[1] = 0; | |
99 } | |
100 } | |
101 else | |
102 ret = UNKNOWN; | |
103 return ret; | |
104 } | |
105 | |
4808 | 106 #define CHECK_SIMD_ALIGNMENT(x) \ |
6482 | 107 (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0) |
4808 | 108 |
3828 | 109 fftw_plan |
4773 | 110 octave_fftw_planner::create_plan (int dir, const int rank, |
5275 | 111 const dim_vector dims, octave_idx_type howmany, |
112 octave_idx_type stride, octave_idx_type dist, | |
4773 | 113 const Complex *in, Complex *out) |
3828 | 114 { |
4773 | 115 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
3828 | 116 fftw_plan *cur_plan_p = &plan[which]; |
117 bool create_new_plan = false; | |
4808 | 118 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
5044 | 119 bool ioinplace = (in == out); |
3828 | 120 |
4809 | 121 // Don't create a new plan if we have a non SIMD plan already but |
122 // can do SIMD. This prevents endlessly recreating plans if we | |
123 // change the alignment. | |
124 | |
4783 | 125 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
5044 | 126 || r[which] != rank || h[which] != howmany |
127 || ioinplace != inplace[which] | |
4808 | 128 || ((ioalign != simd_align[which]) ? !ioalign : false)) |
4773 | 129 create_new_plan = true; |
130 else | |
4809 | 131 { |
132 // We still might not have the same shape of array. | |
133 | |
134 for (int i = 0; i < rank; i++) | |
135 if (dims(i) != n[which](i)) | |
136 { | |
137 create_new_plan = true; | |
138 break; | |
139 } | |
140 } | |
3828 | 141 |
142 if (create_new_plan) | |
143 { | |
4773 | 144 d[which] = dist; |
145 s[which] = stride; | |
146 r[which] = rank; | |
147 h[which] = howmany; | |
4808 | 148 simd_align[which] = ioalign; |
5044 | 149 inplace[which] = ioinplace; |
4773 | 150 n[which] = dims; |
151 | |
6228 | 152 // Note reversal of dimensions for column major storage in FFTW. |
153 octave_idx_type nn = 1; | |
154 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
155 | |
156 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
157 { | |
158 tmp[i] = dims(j); | |
159 nn *= dims(j); | |
160 } | |
161 | |
162 int plan_flags = 0; | |
163 bool plan_destroys_in = true; | |
164 | |
165 switch (meth) | |
166 { | |
167 case UNKNOWN: | |
168 case ESTIMATE: | |
169 plan_flags |= FFTW_ESTIMATE; | |
170 plan_destroys_in = false; | |
171 break; | |
172 case MEASURE: | |
173 plan_flags |= FFTW_MEASURE; | |
174 break; | |
175 case PATIENT: | |
176 plan_flags |= FFTW_PATIENT; | |
177 break; | |
178 case EXHAUSTIVE: | |
179 plan_flags |= FFTW_EXHAUSTIVE; | |
180 break; | |
181 case HYBRID: | |
182 if (nn < 8193) | |
183 plan_flags |= FFTW_MEASURE; | |
184 else | |
185 { | |
186 plan_flags |= FFTW_ESTIMATE; | |
187 plan_destroys_in = false; | |
188 } | |
189 break; | |
190 } | |
191 | |
4808 | 192 if (ioalign) |
193 plan_flags &= ~FFTW_UNALIGNED; | |
194 else | |
195 plan_flags |= FFTW_UNALIGNED; | |
196 | |
3828 | 197 if (*cur_plan_p) |
198 fftw_destroy_plan (*cur_plan_p); | |
199 | |
6228 | 200 if (plan_destroys_in) |
201 { | |
202 // Create matrix with the same size and 16-byte alignment as input | |
203 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); | |
204 itmp = reinterpret_cast<Complex *> | |
205 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + | |
206 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); | |
4809 | 207 |
6228 | 208 *cur_plan_p = |
209 fftw_plan_many_dft (rank, tmp, howmany, | |
210 reinterpret_cast<fftw_complex *> (itmp), | |
211 0, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
212 0, stride, dist, dir, plan_flags); | |
213 } | |
214 else | |
215 { | |
216 *cur_plan_p = | |
217 fftw_plan_many_dft (rank, tmp, howmany, | |
4773 | 218 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), |
4774 | 219 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
220 0, stride, dist, dir, plan_flags); | |
6228 | 221 } |
3828 | 222 |
223 if (*cur_plan_p == 0) | |
224 (*current_liboctave_error_handler) ("Error creating fftw plan"); | |
225 } | |
226 | |
227 return *cur_plan_p; | |
228 } | |
229 | |
4773 | 230 fftw_plan |
231 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, | |
5275 | 232 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, |
4773 | 233 const double *in, Complex *out) |
3828 | 234 { |
4773 | 235 fftw_plan *cur_plan_p = &rplan; |
3828 | 236 bool create_new_plan = false; |
4808 | 237 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
3828 | 238 |
4809 | 239 // Don't create a new plan if we have a non SIMD plan already but |
240 // can do SIMD. This prevents endlessly recreating plans if we | |
241 // change the alignment. | |
242 | |
4783 | 243 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
4808 | 244 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) |
4773 | 245 create_new_plan = true; |
246 else | |
4809 | 247 { |
248 // We still might not have the same shape of array. | |
249 | |
250 for (int i = 0; i < rank; i++) | |
251 if (dims(i) != rn(i)) | |
252 { | |
253 create_new_plan = true; | |
254 break; | |
255 } | |
256 } | |
3828 | 257 |
258 if (create_new_plan) | |
259 { | |
4773 | 260 rd = dist; |
261 rs = stride; | |
262 rr = rank; | |
263 rh = howmany; | |
4808 | 264 rsimd_align = ioalign; |
4773 | 265 rn = dims; |
266 | |
6228 | 267 // Note reversal of dimensions for column major storage in FFTW. |
268 octave_idx_type nn = 1; | |
269 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
270 | |
271 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
272 { | |
273 tmp[i] = dims(j); | |
274 nn *= dims(j); | |
275 } | |
276 | |
277 int plan_flags = 0; | |
278 bool plan_destroys_in = true; | |
279 | |
280 switch (meth) | |
281 { | |
282 case UNKNOWN: | |
283 case ESTIMATE: | |
284 plan_flags |= FFTW_ESTIMATE; | |
285 plan_destroys_in = false; | |
286 break; | |
287 case MEASURE: | |
288 plan_flags |= FFTW_MEASURE; | |
289 break; | |
290 case PATIENT: | |
291 plan_flags |= FFTW_PATIENT; | |
292 break; | |
293 case EXHAUSTIVE: | |
294 plan_flags |= FFTW_EXHAUSTIVE; | |
295 break; | |
296 case HYBRID: | |
297 if (nn < 8193) | |
298 plan_flags |= FFTW_MEASURE; | |
299 else | |
300 { | |
301 plan_flags |= FFTW_ESTIMATE; | |
302 plan_destroys_in = false; | |
303 } | |
304 break; | |
305 } | |
306 | |
4808 | 307 if (ioalign) |
308 plan_flags &= ~FFTW_UNALIGNED; | |
309 else | |
310 plan_flags |= FFTW_UNALIGNED; | |
311 | |
3828 | 312 if (*cur_plan_p) |
4773 | 313 fftw_destroy_plan (*cur_plan_p); |
3828 | 314 |
6228 | 315 if (plan_destroys_in) |
316 { | |
317 // Create matrix with the same size and 16-byte alignment as input | |
318 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); | |
319 itmp = reinterpret_cast<double *> | |
320 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + | |
321 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); | |
4809 | 322 |
6228 | 323 *cur_plan_p = |
324 fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, | |
325 0, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
326 0, stride, dist, plan_flags); | |
327 } | |
328 else | |
329 { | |
330 *cur_plan_p = | |
331 fftw_plan_many_dft_r2c (rank, tmp, howmany, | |
4773 | 332 (const_cast<double *> (in)), |
4774 | 333 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
334 0, stride, dist, plan_flags); | |
6228 | 335 } |
3828 | 336 |
337 if (*cur_plan_p == 0) | |
4773 | 338 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
3828 | 339 } |
340 | |
341 return *cur_plan_p; | |
342 } | |
343 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
344 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
345 octave_float_fftw_planner::octave_float_fftw_planner (void) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
346 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
347 meth = ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
348 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
349 plan[0] = plan[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
350 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
351 simd_align[0] = simd_align[1] = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
352 inplace[0] = inplace[1] = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
353 n[0] = n[1] = dim_vector (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
354 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
355 rplan = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
356 rd = rs = rr = rh = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
357 rsimd_align = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
358 rn = dim_vector (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
359 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
360 // If we have a system wide wisdom file, import it. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
361 fftwf_import_system_wisdom (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
362 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
363 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
364 octave_float_fftw_planner::FftwMethod |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
365 octave_float_fftw_planner::method (void) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
366 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
367 return meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
368 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
369 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
370 octave_float_fftw_planner::FftwMethod |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
371 octave_float_fftw_planner::method (FftwMethod _meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
372 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
373 FftwMethod ret = meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
374 if (_meth == ESTIMATE || _meth == MEASURE || |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
375 _meth == PATIENT || _meth == EXHAUSTIVE || |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
376 _meth == HYBRID) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
377 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
378 if (meth != _meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
379 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
380 meth = _meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
381 if (rplan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
382 fftwf_destroy_plan (rplan); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
383 if (plan[0]) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
384 fftwf_destroy_plan (plan[0]); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
385 if (plan[1]) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
386 fftwf_destroy_plan (plan[1]); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
387 rplan = plan[0] = plan[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
388 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
389 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
390 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
391 ret = UNKNOWN; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
392 return ret; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
393 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
394 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
395 fftwf_plan |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
396 octave_float_fftw_planner::create_plan (int dir, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
397 const dim_vector dims, octave_idx_type howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
398 octave_idx_type stride, octave_idx_type dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
399 const FloatComplex *in, FloatComplex *out) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
400 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
401 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
402 fftwf_plan *cur_plan_p = &plan[which]; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
403 bool create_new_plan = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
404 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
405 bool ioinplace = (in == out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
406 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
407 // Don't create a new plan if we have a non SIMD plan already but |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
408 // can do SIMD. This prevents endlessly recreating plans if we |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
409 // change the alignment. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
410 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
411 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
412 || r[which] != rank || h[which] != howmany |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
413 || ioinplace != inplace[which] |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
414 || ((ioalign != simd_align[which]) ? !ioalign : false)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
415 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
416 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
417 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
418 // We still might not have the same shape of array. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
419 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
420 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
421 if (dims(i) != n[which](i)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
422 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
423 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
424 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
425 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
426 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
427 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
428 if (create_new_plan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
429 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
430 d[which] = dist; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
431 s[which] = stride; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
432 r[which] = rank; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
433 h[which] = howmany; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
434 simd_align[which] = ioalign; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
435 inplace[which] = ioinplace; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
436 n[which] = dims; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
437 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
438 // Note reversal of dimensions for column major storage in FFTW. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
439 octave_idx_type nn = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
440 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
441 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
442 for (int i = 0, j = rank-1; i < rank; i++, j--) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
443 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
444 tmp[i] = dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
445 nn *= dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
446 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
447 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
448 int plan_flags = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
449 bool plan_destroys_in = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
450 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
451 switch (meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
452 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
453 case UNKNOWN: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
454 case ESTIMATE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
455 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
456 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
457 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
458 case MEASURE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
459 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
460 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
461 case PATIENT: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
462 plan_flags |= FFTW_PATIENT; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
463 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
464 case EXHAUSTIVE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
465 plan_flags |= FFTW_EXHAUSTIVE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
466 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
467 case HYBRID: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
468 if (nn < 8193) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
469 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
470 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
471 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
472 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
473 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
474 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
475 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
476 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
477 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
478 if (ioalign) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
479 plan_flags &= ~FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
480 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
481 plan_flags |= FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
482 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
483 if (*cur_plan_p) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
484 fftwf_destroy_plan (*cur_plan_p); |
3828 | 485 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
486 if (plan_destroys_in) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
487 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
488 // Create matrix with the same size and 16-byte alignment as input |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
489 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
490 itmp = reinterpret_cast<FloatComplex *> |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
491 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
492 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
493 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
494 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
495 fftwf_plan_many_dft (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
496 reinterpret_cast<fftwf_complex *> (itmp), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
497 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
498 0, stride, dist, dir, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
499 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
500 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
501 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
502 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
503 fftwf_plan_many_dft (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
504 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
505 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
506 0, stride, dist, dir, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
507 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
508 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
509 if (*cur_plan_p == 0) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
510 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
511 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
512 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
513 return *cur_plan_p; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
514 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
515 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
516 fftwf_plan |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
517 octave_float_fftw_planner::create_plan (const int rank, const dim_vector dims, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
518 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
519 const float *in, FloatComplex *out) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
520 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
521 fftwf_plan *cur_plan_p = &rplan; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
522 bool create_new_plan = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
523 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
524 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
525 // Don't create a new plan if we have a non SIMD plan already but |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
526 // can do SIMD. This prevents endlessly recreating plans if we |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
527 // change the alignment. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
528 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
529 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
530 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
531 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
532 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
533 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
534 // We still might not have the same shape of array. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
535 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
536 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
537 if (dims(i) != rn(i)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
538 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
539 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
540 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
541 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
542 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
543 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
544 if (create_new_plan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
545 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
546 rd = dist; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
547 rs = stride; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
548 rr = rank; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
549 rh = howmany; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
550 rsimd_align = ioalign; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
551 rn = dims; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
552 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
553 // Note reversal of dimensions for column major storage in FFTW. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
554 octave_idx_type nn = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
555 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
556 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
557 for (int i = 0, j = rank-1; i < rank; i++, j--) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
558 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
559 tmp[i] = dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
560 nn *= dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
561 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
562 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
563 int plan_flags = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
564 bool plan_destroys_in = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
565 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
566 switch (meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
567 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
568 case UNKNOWN: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
569 case ESTIMATE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
570 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
571 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
572 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
573 case MEASURE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
574 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
575 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
576 case PATIENT: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
577 plan_flags |= FFTW_PATIENT; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
578 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
579 case EXHAUSTIVE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
580 plan_flags |= FFTW_EXHAUSTIVE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
581 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
582 case HYBRID: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
583 if (nn < 8193) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
584 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
585 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
586 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
587 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
588 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
589 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
590 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
591 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
592 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
593 if (ioalign) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
594 plan_flags &= ~FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
595 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
596 plan_flags |= FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
597 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
598 if (*cur_plan_p) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
599 fftwf_destroy_plan (*cur_plan_p); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
600 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
601 if (plan_destroys_in) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
602 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
603 // Create matrix with the same size and 16-byte alignment as input |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
604 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
605 itmp = reinterpret_cast<float *> |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
606 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
607 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
608 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
609 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
610 fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
611 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
612 0, stride, dist, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
613 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
614 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
615 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
616 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
617 fftwf_plan_many_dft_r2c (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
618 (const_cast<float *> (in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
619 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
620 0, stride, dist, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
621 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
622 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
623 if (*cur_plan_p == 0) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
624 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
625 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
626 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
627 return *cur_plan_p; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
628 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
629 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
630 octave_fftw_planner fftw_planner; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
631 octave_float_fftw_planner float_fftw_planner; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
632 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
633 template <class T> |
4775 | 634 static inline void |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
635 convert_packcomplex_1d (T *out, size_t nr, size_t nc, |
5275 | 636 octave_idx_type stride, octave_idx_type dist) |
4773 | 637 { |
4785 | 638 OCTAVE_QUIT; |
639 | |
640 // Fill in the missing data. | |
641 | |
4773 | 642 for (size_t i = 0; i < nr; i++) |
643 for (size_t j = nc/2+1; j < nc; j++) | |
644 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]); | |
4785 | 645 |
646 OCTAVE_QUIT; | |
4773 | 647 } |
648 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
649 template <class T> |
4775 | 650 static inline void |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
651 convert_packcomplex_Nd (T *out, const dim_vector &dv) |
3828 | 652 { |
4773 | 653 size_t nc = dv(0); |
654 size_t nr = dv(1); | |
4808 | 655 size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1); |
4773 | 656 size_t nrp = nr * np; |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
657 T *ptr1, *ptr2; |
4773 | 658 |
4785 | 659 OCTAVE_QUIT; |
660 | |
661 // Create space for the missing elements. | |
662 | |
4773 | 663 for (size_t i = 0; i < nrp; i++) |
664 { | |
665 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); | |
666 ptr2 = out + i * nc; | |
667 for (size_t j = 0; j < nc/2+1; j++) | |
668 *ptr2++ = *ptr1++; | |
669 } | |
670 | |
4785 | 671 OCTAVE_QUIT; |
672 | |
673 // Fill in the missing data for the rank = 2 case directly for speed. | |
674 | |
4773 | 675 for (size_t i = 0; i < np; i++) |
676 { | |
677 for (size_t j = 1; j < nr; j++) | |
678 for (size_t k = nc/2+1; k < nc; k++) | |
679 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]); | |
680 | |
681 for (size_t j = nc/2+1; j < nc; j++) | |
682 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]); | |
683 } | |
684 | |
4785 | 685 OCTAVE_QUIT; |
686 | |
687 // Now do the permutations needed for rank > 2 cases. | |
688 | |
4773 | 689 size_t jstart = dv(0) * dv(1); |
690 size_t kstep = dv(0); | |
691 size_t nel = dv.numel (); | |
4785 | 692 |
4808 | 693 for (int inner = 2; inner < dv.length (); inner++) |
4773 | 694 { |
695 size_t jmax = jstart * dv(inner); | |
696 for (size_t i = 0; i < nel; i+=jmax) | |
697 for (size_t j = jstart, jj = jmax-jstart; j < jj; | |
698 j+=jstart, jj-=jstart) | |
699 for (size_t k = 0; k < jstart; k+= kstep) | |
700 for (size_t l = nc/2+1; l < nc; l++) | |
701 { | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
702 T tmp = out[i+ j + k + l]; |
4773 | 703 out[i + j + k + l] = out[i + jj + k + l]; |
704 out[i + jj + k + l] = tmp; | |
705 } | |
706 jstart = jmax; | |
707 } | |
4785 | 708 |
709 OCTAVE_QUIT; | |
4773 | 710 } |
711 | |
712 int | |
713 octave_fftw::fft (const double *in, Complex *out, size_t npts, | |
5275 | 714 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773 | 715 { |
716 dist = (dist < 0 ? npts : dist); | |
717 | |
718 dim_vector dv (npts); | |
719 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
720 in, out); |
4773 | 721 |
722 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
723 reinterpret_cast<fftw_complex *> (out)); |
4773 | 724 |
4809 | 725 // Need to create other half of the transform. |
726 | |
4773 | 727 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
3828 | 728 |
729 return 0; | |
730 } | |
731 | |
732 int | |
4773 | 733 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, |
5275 | 734 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
3828 | 735 { |
4773 | 736 dist = (dist < 0 ? npts : dist); |
737 | |
738 dim_vector dv (npts); | |
739 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, | |
740 stride, dist, in, out); | |
741 | |
742 fftw_execute_dft (plan, | |
743 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
744 reinterpret_cast<fftw_complex *> (out)); | |
745 | |
746 return 0; | |
747 } | |
748 | |
749 int | |
750 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, | |
5275 | 751 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773 | 752 { |
753 dist = (dist < 0 ? npts : dist); | |
754 | |
755 dim_vector dv (npts); | |
756 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, | |
757 stride, dist, in, out); | |
758 | |
759 fftw_execute_dft (plan, | |
760 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
761 reinterpret_cast<fftw_complex *> (out)); | |
3828 | 762 |
763 const Complex scale = npts; | |
4773 | 764 for (size_t j = 0; j < nsamples; j++) |
765 for (size_t i = 0; i < npts; i++) | |
766 out[i*stride + j*dist] /= scale; | |
3828 | 767 |
768 return 0; | |
769 } | |
770 | |
771 int | |
4773 | 772 octave_fftw::fftNd (const double *in, Complex *out, const int rank, |
773 const dim_vector &dv) | |
3828 | 774 { |
5275 | 775 octave_idx_type dist = 1; |
4773 | 776 for (int i = 0; i < rank; i++) |
777 dist *= dv(i); | |
778 | |
779 // Fool with the position of the start of the output matrix, so that | |
4809 | 780 // creating other half of the matrix won't cause cache problems. |
781 | |
5275 | 782 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
4773 | 783 |
784 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist, | |
785 in, out + offset); | |
786 | |
787 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
788 reinterpret_cast<fftw_complex *> (out+ offset)); | |
789 | |
4809 | 790 // Need to create other half of the transform. |
791 | |
4773 | 792 convert_packcomplex_Nd (out, dv); |
3828 | 793 |
794 return 0; | |
795 } | |
796 | |
797 int | |
4773 | 798 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, |
799 const dim_vector &dv) | |
3828 | 800 { |
5275 | 801 octave_idx_type dist = 1; |
4773 | 802 for (int i = 0; i < rank; i++) |
803 dist *= dv(i); | |
804 | |
805 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, | |
806 dist, in, out); | |
807 | |
808 fftw_execute_dft (plan, | |
809 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
810 reinterpret_cast<fftw_complex *> (out)); | |
811 | |
812 return 0; | |
813 } | |
3828 | 814 |
4773 | 815 int |
816 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, | |
4784 | 817 const dim_vector &dv) |
4773 | 818 { |
5275 | 819 octave_idx_type dist = 1; |
4773 | 820 for (int i = 0; i < rank; i++) |
821 dist *= dv(i); | |
822 | |
823 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, | |
824 dist, in, out); | |
825 | |
826 fftw_execute_dft (plan, | |
827 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
828 reinterpret_cast<fftw_complex *> (out)); | |
829 | |
830 const size_t npts = dv.numel (); | |
3828 | 831 const Complex scale = npts; |
832 for (size_t i = 0; i < npts; i++) | |
4773 | 833 out[i] /= scale; |
3828 | 834 |
835 return 0; | |
836 } | |
837 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
838 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
839 octave_fftw::fft (const float *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
840 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
841 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
842 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
843 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
844 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
845 fftwf_plan plan = float_fftw_planner.create_plan (1, dv, nsamples, stride, dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
846 in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
847 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
848 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
849 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
850 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
851 // Need to create other half of the transform. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
852 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
853 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
854 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
855 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
856 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
857 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
858 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
859 octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
860 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
861 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
862 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
863 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
864 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
865 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
866 stride, dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
867 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
868 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
869 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
870 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
871 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
872 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
873 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
874 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
875 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
876 octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
877 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
878 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
879 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
880 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
881 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
882 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
883 stride, dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
884 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
885 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
886 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
887 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
888 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
889 const FloatComplex scale = npts; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
890 for (size_t j = 0; j < nsamples; j++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
891 for (size_t i = 0; i < npts; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
892 out[i*stride + j*dist] /= scale; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
893 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
894 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
895 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
896 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
897 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
898 octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
899 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
900 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
901 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
902 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
903 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
904 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
905 // Fool with the position of the start of the output matrix, so that |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
906 // creating other half of the matrix won't cause cache problems. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
907 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
908 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
909 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
910 fftwf_plan plan = float_fftw_planner.create_plan (rank, dv, 1, 1, dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
911 in, out + offset); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
912 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
913 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
914 reinterpret_cast<fftwf_complex *> (out+ offset)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
915 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
916 // Need to create other half of the transform. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
917 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
918 convert_packcomplex_Nd (out, dv); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
919 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
920 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
921 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
922 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
923 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
924 octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
925 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
926 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
927 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
928 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
929 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
930 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
931 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
932 dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
933 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
934 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
935 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
936 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
937 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
938 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
939 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
940 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
941 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
942 octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
943 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
944 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
945 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
946 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
947 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
948 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
949 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
950 dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
951 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
952 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
953 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
954 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
955 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
956 const size_t npts = dv.numel (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
957 const FloatComplex scale = npts; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
958 for (size_t i = 0; i < npts; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
959 out[i] /= scale; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
960 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
961 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
962 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
963 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
964 |
3828 | 965 #endif |
966 | |
967 /* | |
968 ;;; Local Variables: *** | |
969 ;;; mode: C++ *** | |
970 ;;; End: *** | |
971 */ | |
972 |