3828
|
1 /* |
|
2 |
|
3 This file is part of Octave. |
|
4 |
|
5 Octave is free software; you can redistribute it and/or modify it |
|
6 under the terms of the GNU General Public License as published by the |
|
7 Free Software Foundation; either version 2, or (at your option) any |
|
8 later version. |
|
9 |
|
10 Octave is distributed in the hope that it will be useful, but WITHOUT |
|
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 for more details. |
|
14 |
|
15 You should have received a copy of the GNU General Public License |
|
16 along with Octave; see the file COPYING. If not, write to the Free |
5307
|
17 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
18 02110-1301, USA. |
3828
|
19 |
|
20 */ |
|
21 |
|
22 #ifdef HAVE_CONFIG_H |
|
23 #include <config.h> |
|
24 #endif |
|
25 |
4773
|
26 #if defined (HAVE_FFTW3) |
3828
|
27 |
4775
|
28 #include <iostream> |
|
29 #include <vector> |
|
30 |
4786
|
31 #include "lo-error.h" |
3828
|
32 #include "oct-fftw.h" |
4786
|
33 #include "quit.h" |
3828
|
34 |
4809
|
35 // Helper class to create and cache fftw plans for both 1d and |
6228
|
36 // 2d. This implementation defaults to using FFTW_ESTIMATE to create |
|
37 // the plans, which in theory is suboptimal, but provides quit |
|
38 // reasonable performance. |
4773
|
39 |
|
40 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 |
6228
|
41 // destroys the input and output arrays. We must therefore create a |
|
42 // temporary input array with the same size and 16-byte alignment as |
|
43 // the original array and use that for the planner. Note that we also |
|
44 // use any wisdom that is available, either in a FFTW3 system wide file |
|
45 // or as supplied by the user. |
4773
|
46 |
5775
|
47 // FIXME -- if we can ensure 16 byte alignment in Array<T> |
4809
|
48 // (<T> *data) the FFTW3 can use SIMD instructions for further |
|
49 // acceleration. |
4773
|
50 |
4809
|
51 // Note that it is profitable to store the FFTW3 plans, for small |
|
52 // ffts. |
3828
|
53 |
4808
|
54 octave_fftw_planner::octave_fftw_planner (void) |
3828
|
55 { |
6228
|
56 meth = ESTIMATE; |
3828
|
57 |
|
58 plan[0] = plan[1] = 0; |
4773
|
59 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
4808
|
60 simd_align[0] = simd_align[1] = false; |
5044
|
61 inplace[0] = inplace[1] = false; |
4808
|
62 n[0] = n[1] = dim_vector (); |
4773
|
63 |
|
64 rplan = 0; |
|
65 rd = rs = rr = rh = 0; |
4808
|
66 rsimd_align = false; |
4773
|
67 rn = dim_vector (); |
5044
|
68 |
4809
|
69 // If we have a system wide wisdom file, import it. |
4808
|
70 fftw_import_system_wisdom (); |
3828
|
71 } |
|
72 |
6228
|
73 octave_fftw_planner::FftwMethod |
|
74 octave_fftw_planner::method (void) |
|
75 { |
|
76 return meth; |
|
77 } |
|
78 |
|
79 octave_fftw_planner::FftwMethod |
|
80 octave_fftw_planner::method (FftwMethod _meth) |
|
81 { |
|
82 FftwMethod ret = meth; |
|
83 if (_meth == ESTIMATE || _meth == MEASURE || |
|
84 _meth == PATIENT || _meth == EXHAUSTIVE || |
|
85 _meth == HYBRID) |
|
86 { |
|
87 if (meth != _meth) |
|
88 { |
|
89 meth = _meth; |
|
90 if (rplan) |
|
91 fftw_destroy_plan (rplan); |
|
92 if (plan[0]) |
|
93 fftw_destroy_plan (plan[0]); |
|
94 if (plan[1]) |
|
95 fftw_destroy_plan (plan[1]); |
|
96 rplan = plan[0] = plan[1] = 0; |
|
97 } |
|
98 } |
|
99 else |
|
100 ret = UNKNOWN; |
|
101 return ret; |
|
102 } |
|
103 |
4808
|
104 #define CHECK_SIMD_ALIGNMENT(x) \ |
|
105 ((reinterpret_cast<ptrdiff_t> (x)) & 0xF == 0) |
|
106 |
3828
|
107 fftw_plan |
4773
|
108 octave_fftw_planner::create_plan (int dir, const int rank, |
5275
|
109 const dim_vector dims, octave_idx_type howmany, |
|
110 octave_idx_type stride, octave_idx_type dist, |
4773
|
111 const Complex *in, Complex *out) |
3828
|
112 { |
4773
|
113 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
3828
|
114 fftw_plan *cur_plan_p = &plan[which]; |
|
115 bool create_new_plan = false; |
4808
|
116 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
5044
|
117 bool ioinplace = (in == out); |
3828
|
118 |
4809
|
119 // Don't create a new plan if we have a non SIMD plan already but |
|
120 // can do SIMD. This prevents endlessly recreating plans if we |
|
121 // change the alignment. |
|
122 |
4783
|
123 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
5044
|
124 || r[which] != rank || h[which] != howmany |
|
125 || ioinplace != inplace[which] |
4808
|
126 || ((ioalign != simd_align[which]) ? !ioalign : false)) |
4773
|
127 create_new_plan = true; |
|
128 else |
4809
|
129 { |
|
130 // We still might not have the same shape of array. |
|
131 |
|
132 for (int i = 0; i < rank; i++) |
|
133 if (dims(i) != n[which](i)) |
|
134 { |
|
135 create_new_plan = true; |
|
136 break; |
|
137 } |
|
138 } |
3828
|
139 |
|
140 if (create_new_plan) |
|
141 { |
4773
|
142 d[which] = dist; |
|
143 s[which] = stride; |
|
144 r[which] = rank; |
|
145 h[which] = howmany; |
4808
|
146 simd_align[which] = ioalign; |
5044
|
147 inplace[which] = ioinplace; |
4773
|
148 n[which] = dims; |
|
149 |
6228
|
150 // Note reversal of dimensions for column major storage in FFTW. |
|
151 octave_idx_type nn = 1; |
|
152 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
|
153 |
|
154 for (int i = 0, j = rank-1; i < rank; i++, j--) |
|
155 { |
|
156 tmp[i] = dims(j); |
|
157 nn *= dims(j); |
|
158 } |
|
159 |
|
160 int plan_flags = 0; |
|
161 bool plan_destroys_in = true; |
|
162 |
|
163 switch (meth) |
|
164 { |
|
165 case UNKNOWN: |
|
166 case ESTIMATE: |
|
167 plan_flags |= FFTW_ESTIMATE; |
|
168 plan_destroys_in = false; |
|
169 break; |
|
170 case MEASURE: |
|
171 plan_flags |= FFTW_MEASURE; |
|
172 break; |
|
173 case PATIENT: |
|
174 plan_flags |= FFTW_PATIENT; |
|
175 break; |
|
176 case EXHAUSTIVE: |
|
177 plan_flags |= FFTW_EXHAUSTIVE; |
|
178 break; |
|
179 case HYBRID: |
|
180 if (nn < 8193) |
|
181 plan_flags |= FFTW_MEASURE; |
|
182 else |
|
183 { |
|
184 plan_flags |= FFTW_ESTIMATE; |
|
185 plan_destroys_in = false; |
|
186 } |
|
187 break; |
|
188 } |
|
189 |
4808
|
190 if (ioalign) |
|
191 plan_flags &= ~FFTW_UNALIGNED; |
|
192 else |
|
193 plan_flags |= FFTW_UNALIGNED; |
|
194 |
3828
|
195 if (*cur_plan_p) |
|
196 fftw_destroy_plan (*cur_plan_p); |
|
197 |
6228
|
198 if (plan_destroys_in) |
|
199 { |
|
200 // Create matrix with the same size and 16-byte alignment as input |
|
201 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); |
|
202 itmp = reinterpret_cast<Complex *> |
|
203 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
|
204 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
4809
|
205 |
6228
|
206 *cur_plan_p = |
|
207 fftw_plan_many_dft (rank, tmp, howmany, |
|
208 reinterpret_cast<fftw_complex *> (itmp), |
|
209 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
210 0, stride, dist, dir, plan_flags); |
|
211 } |
|
212 else |
|
213 { |
|
214 *cur_plan_p = |
|
215 fftw_plan_many_dft (rank, tmp, howmany, |
4773
|
216 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), |
4774
|
217 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
218 0, stride, dist, dir, plan_flags); |
6228
|
219 } |
3828
|
220 |
|
221 if (*cur_plan_p == 0) |
|
222 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
|
223 } |
|
224 |
|
225 return *cur_plan_p; |
|
226 } |
|
227 |
4773
|
228 fftw_plan |
|
229 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, |
5275
|
230 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, |
4773
|
231 const double *in, Complex *out) |
3828
|
232 { |
4773
|
233 fftw_plan *cur_plan_p = &rplan; |
3828
|
234 bool create_new_plan = false; |
4808
|
235 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
3828
|
236 |
4809
|
237 // Don't create a new plan if we have a non SIMD plan already but |
|
238 // can do SIMD. This prevents endlessly recreating plans if we |
|
239 // change the alignment. |
|
240 |
4783
|
241 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
4808
|
242 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) |
4773
|
243 create_new_plan = true; |
|
244 else |
4809
|
245 { |
|
246 // We still might not have the same shape of array. |
|
247 |
|
248 for (int i = 0; i < rank; i++) |
|
249 if (dims(i) != rn(i)) |
|
250 { |
|
251 create_new_plan = true; |
|
252 break; |
|
253 } |
|
254 } |
3828
|
255 |
|
256 if (create_new_plan) |
|
257 { |
4773
|
258 rd = dist; |
|
259 rs = stride; |
|
260 rr = rank; |
|
261 rh = howmany; |
4808
|
262 rsimd_align = ioalign; |
4773
|
263 rn = dims; |
|
264 |
6228
|
265 // Note reversal of dimensions for column major storage in FFTW. |
|
266 octave_idx_type nn = 1; |
|
267 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
|
268 |
|
269 for (int i = 0, j = rank-1; i < rank; i++, j--) |
|
270 { |
|
271 tmp[i] = dims(j); |
|
272 nn *= dims(j); |
|
273 } |
|
274 |
|
275 int plan_flags = 0; |
|
276 bool plan_destroys_in = true; |
|
277 |
|
278 switch (meth) |
|
279 { |
|
280 case UNKNOWN: |
|
281 case ESTIMATE: |
|
282 plan_flags |= FFTW_ESTIMATE; |
|
283 plan_destroys_in = false; |
|
284 break; |
|
285 case MEASURE: |
|
286 plan_flags |= FFTW_MEASURE; |
|
287 break; |
|
288 case PATIENT: |
|
289 plan_flags |= FFTW_PATIENT; |
|
290 break; |
|
291 case EXHAUSTIVE: |
|
292 plan_flags |= FFTW_EXHAUSTIVE; |
|
293 break; |
|
294 case HYBRID: |
|
295 if (nn < 8193) |
|
296 plan_flags |= FFTW_MEASURE; |
|
297 else |
|
298 { |
|
299 plan_flags |= FFTW_ESTIMATE; |
|
300 plan_destroys_in = false; |
|
301 } |
|
302 break; |
|
303 } |
|
304 |
4808
|
305 if (ioalign) |
|
306 plan_flags &= ~FFTW_UNALIGNED; |
|
307 else |
|
308 plan_flags |= FFTW_UNALIGNED; |
|
309 |
3828
|
310 if (*cur_plan_p) |
4773
|
311 fftw_destroy_plan (*cur_plan_p); |
3828
|
312 |
6228
|
313 if (plan_destroys_in) |
|
314 { |
|
315 // Create matrix with the same size and 16-byte alignment as input |
|
316 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); |
|
317 itmp = reinterpret_cast<double *> |
|
318 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
|
319 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
4809
|
320 |
6228
|
321 *cur_plan_p = |
|
322 fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, |
|
323 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
324 0, stride, dist, plan_flags); |
|
325 } |
|
326 else |
|
327 { |
|
328 *cur_plan_p = |
|
329 fftw_plan_many_dft_r2c (rank, tmp, howmany, |
4773
|
330 (const_cast<double *> (in)), |
4774
|
331 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
332 0, stride, dist, plan_flags); |
6228
|
333 } |
3828
|
334 |
|
335 if (*cur_plan_p == 0) |
4773
|
336 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
3828
|
337 } |
|
338 |
|
339 return *cur_plan_p; |
|
340 } |
|
341 |
6228
|
342 octave_fftw_planner fftw_planner; |
3828
|
343 |
4775
|
344 static inline void |
|
345 convert_packcomplex_1d (Complex *out, size_t nr, size_t nc, |
5275
|
346 octave_idx_type stride, octave_idx_type dist) |
4773
|
347 { |
4785
|
348 OCTAVE_QUIT; |
|
349 |
|
350 // Fill in the missing data. |
|
351 |
4773
|
352 for (size_t i = 0; i < nr; i++) |
|
353 for (size_t j = nc/2+1; j < nc; j++) |
|
354 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]); |
4785
|
355 |
|
356 OCTAVE_QUIT; |
4773
|
357 } |
|
358 |
4775
|
359 static inline void |
|
360 convert_packcomplex_Nd (Complex *out, const dim_vector &dv) |
3828
|
361 { |
4773
|
362 size_t nc = dv(0); |
|
363 size_t nr = dv(1); |
4808
|
364 size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1); |
4773
|
365 size_t nrp = nr * np; |
|
366 Complex *ptr1, *ptr2; |
|
367 |
4785
|
368 OCTAVE_QUIT; |
|
369 |
|
370 // Create space for the missing elements. |
|
371 |
4773
|
372 for (size_t i = 0; i < nrp; i++) |
|
373 { |
|
374 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); |
|
375 ptr2 = out + i * nc; |
|
376 for (size_t j = 0; j < nc/2+1; j++) |
|
377 *ptr2++ = *ptr1++; |
|
378 } |
|
379 |
4785
|
380 OCTAVE_QUIT; |
|
381 |
|
382 // Fill in the missing data for the rank = 2 case directly for speed. |
|
383 |
4773
|
384 for (size_t i = 0; i < np; i++) |
|
385 { |
|
386 for (size_t j = 1; j < nr; j++) |
|
387 for (size_t k = nc/2+1; k < nc; k++) |
|
388 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]); |
|
389 |
|
390 for (size_t j = nc/2+1; j < nc; j++) |
|
391 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]); |
|
392 } |
|
393 |
4785
|
394 OCTAVE_QUIT; |
|
395 |
|
396 // Now do the permutations needed for rank > 2 cases. |
|
397 |
4773
|
398 size_t jstart = dv(0) * dv(1); |
|
399 size_t kstep = dv(0); |
|
400 size_t nel = dv.numel (); |
4785
|
401 |
4808
|
402 for (int inner = 2; inner < dv.length (); inner++) |
4773
|
403 { |
|
404 size_t jmax = jstart * dv(inner); |
|
405 for (size_t i = 0; i < nel; i+=jmax) |
|
406 for (size_t j = jstart, jj = jmax-jstart; j < jj; |
|
407 j+=jstart, jj-=jstart) |
|
408 for (size_t k = 0; k < jstart; k+= kstep) |
|
409 for (size_t l = nc/2+1; l < nc; l++) |
|
410 { |
|
411 Complex tmp = out[i+ j + k + l]; |
|
412 out[i + j + k + l] = out[i + jj + k + l]; |
|
413 out[i + jj + k + l] = tmp; |
|
414 } |
|
415 jstart = jmax; |
|
416 } |
4785
|
417 |
|
418 OCTAVE_QUIT; |
4773
|
419 } |
|
420 |
|
421 int |
|
422 octave_fftw::fft (const double *in, Complex *out, size_t npts, |
5275
|
423 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773
|
424 { |
|
425 dist = (dist < 0 ? npts : dist); |
|
426 |
|
427 dim_vector dv (npts); |
|
428 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, |
|
429 in, out); |
|
430 |
|
431 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), |
|
432 reinterpret_cast<fftw_complex *> (out)); |
|
433 |
4809
|
434 // Need to create other half of the transform. |
|
435 |
4773
|
436 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
3828
|
437 |
|
438 return 0; |
|
439 } |
|
440 |
|
441 int |
4773
|
442 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, |
5275
|
443 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
3828
|
444 { |
4773
|
445 dist = (dist < 0 ? npts : dist); |
|
446 |
|
447 dim_vector dv (npts); |
|
448 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, |
|
449 stride, dist, in, out); |
|
450 |
|
451 fftw_execute_dft (plan, |
|
452 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
453 reinterpret_cast<fftw_complex *> (out)); |
|
454 |
|
455 return 0; |
|
456 } |
|
457 |
|
458 int |
|
459 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, |
5275
|
460 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773
|
461 { |
|
462 dist = (dist < 0 ? npts : dist); |
|
463 |
|
464 dim_vector dv (npts); |
|
465 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, |
|
466 stride, dist, in, out); |
|
467 |
|
468 fftw_execute_dft (plan, |
|
469 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
470 reinterpret_cast<fftw_complex *> (out)); |
3828
|
471 |
|
472 const Complex scale = npts; |
4773
|
473 for (size_t j = 0; j < nsamples; j++) |
|
474 for (size_t i = 0; i < npts; i++) |
|
475 out[i*stride + j*dist] /= scale; |
3828
|
476 |
|
477 return 0; |
|
478 } |
|
479 |
|
480 int |
4773
|
481 octave_fftw::fftNd (const double *in, Complex *out, const int rank, |
|
482 const dim_vector &dv) |
3828
|
483 { |
5275
|
484 octave_idx_type dist = 1; |
4773
|
485 for (int i = 0; i < rank; i++) |
|
486 dist *= dv(i); |
|
487 |
|
488 // Fool with the position of the start of the output matrix, so that |
4809
|
489 // creating other half of the matrix won't cause cache problems. |
|
490 |
5275
|
491 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
4773
|
492 |
|
493 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist, |
|
494 in, out + offset); |
|
495 |
|
496 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), |
|
497 reinterpret_cast<fftw_complex *> (out+ offset)); |
|
498 |
4809
|
499 // Need to create other half of the transform. |
|
500 |
4773
|
501 convert_packcomplex_Nd (out, dv); |
3828
|
502 |
|
503 return 0; |
|
504 } |
|
505 |
|
506 int |
4773
|
507 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, |
|
508 const dim_vector &dv) |
3828
|
509 { |
5275
|
510 octave_idx_type dist = 1; |
4773
|
511 for (int i = 0; i < rank; i++) |
|
512 dist *= dv(i); |
|
513 |
|
514 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, |
|
515 dist, in, out); |
|
516 |
|
517 fftw_execute_dft (plan, |
|
518 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
519 reinterpret_cast<fftw_complex *> (out)); |
|
520 |
|
521 return 0; |
|
522 } |
3828
|
523 |
4773
|
524 int |
|
525 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, |
4784
|
526 const dim_vector &dv) |
4773
|
527 { |
5275
|
528 octave_idx_type dist = 1; |
4773
|
529 for (int i = 0; i < rank; i++) |
|
530 dist *= dv(i); |
|
531 |
|
532 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, |
|
533 dist, in, out); |
|
534 |
|
535 fftw_execute_dft (plan, |
|
536 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
537 reinterpret_cast<fftw_complex *> (out)); |
|
538 |
|
539 const size_t npts = dv.numel (); |
3828
|
540 const Complex scale = npts; |
|
541 for (size_t i = 0; i < npts; i++) |
4773
|
542 out[i] /= scale; |
3828
|
543 |
|
544 return 0; |
|
545 } |
|
546 |
|
547 #endif |
|
548 |
|
549 /* |
|
550 ;;; Local Variables: *** |
|
551 ;;; mode: C++ *** |
|
552 ;;; End: *** |
|
553 */ |
|
554 |