3828
|
1 /* |
|
2 |
|
3 This file is part of Octave. |
|
4 |
|
5 Octave is free software; you can redistribute it and/or modify it |
|
6 under the terms of the GNU General Public License as published by the |
|
7 Free Software Foundation; either version 2, or (at your option) any |
|
8 later version. |
|
9 |
|
10 Octave is distributed in the hope that it will be useful, but WITHOUT |
|
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 for more details. |
|
14 |
|
15 You should have received a copy of the GNU General Public License |
|
16 along with Octave; see the file COPYING. If not, write to the Free |
|
17 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|
18 |
|
19 */ |
|
20 |
|
21 #ifdef HAVE_CONFIG_H |
|
22 #include <config.h> |
|
23 #endif |
|
24 |
4773
|
25 #if defined (HAVE_FFTW3) |
3828
|
26 |
4775
|
27 #include <iostream> |
|
28 #include <vector> |
|
29 |
4786
|
30 #include "lo-error.h" |
3828
|
31 #include "oct-fftw.h" |
4786
|
32 #include "quit.h" |
3828
|
33 |
|
34 // Helper class to create and cache fftw plans for both 1d and 2d. This |
|
35 // implementation uses FFTW_ESTIMATE to create the plans, which in theory |
4773
|
36 // is suboptimal, but provides quite reasonable performance. |
|
37 |
|
38 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 |
|
39 // destroys the input and output arrays. So with the form of the |
|
40 // current code we definitely want FFTW_ESTIMATE!! However, we use |
|
41 // any wsidom that is available, either in a FFTW3 system wide file |
|
42 // or as supplied by the user. |
|
43 |
|
44 // XXX FIXME XXX If we can ensure 16 byte alignment in Array<T> (<T> *data) |
|
45 // the FFTW3 can use SIMD instructions for further acceleration. |
|
46 |
|
47 // Note that it is profitable to store the FFTW3 plans, for small ffts |
3828
|
48 |
|
49 class |
|
50 octave_fftw_planner |
|
51 { |
|
52 public: |
|
53 octave_fftw_planner (); |
|
54 |
4773
|
55 fftw_plan create_plan (int dir, const int rank, const dim_vector dims, |
|
56 int howmany, int stride, int dist, |
|
57 const Complex *in, Complex *out); |
|
58 fftw_plan create_plan (const int rank, const dim_vector dims, |
|
59 int howmany, int stride, int dist, |
|
60 const double *in, Complex *out); |
3828
|
61 |
|
62 private: |
|
63 int plan_flags; |
|
64 |
4773
|
65 // Plan for fft and ifft of complex values |
3828
|
66 fftw_plan plan[2]; |
4773
|
67 int d[2]; // dist |
|
68 int s[2]; // stride |
|
69 int r[2]; // rank |
|
70 int h[2]; // howmany |
|
71 dim_vector n[2]; // dims |
|
72 char ialign[2]; |
|
73 char oalign[2]; |
3828
|
74 |
4773
|
75 // Plan for fft of real values |
|
76 fftw_plan rplan; |
|
77 int rd; // dist |
|
78 int rs; // stride |
|
79 int rr; // rank |
|
80 int rh; // howmany |
|
81 dim_vector rn; // dims |
|
82 char rialign; |
|
83 char roalign; |
3828
|
84 }; |
|
85 |
|
86 octave_fftw_planner::octave_fftw_planner () |
|
87 { |
|
88 plan_flags = FFTW_ESTIMATE; |
|
89 |
|
90 plan[0] = plan[1] = 0; |
4773
|
91 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
|
92 ialign[0] = ialign[1] = oalign[0] = oalign[1] = 0; |
|
93 n[0] = n[1] = dim_vector(); |
|
94 |
|
95 rplan = 0; |
|
96 rd = rs = rr = rh = 0; |
|
97 rialign = roalign = 0; |
|
98 rn = dim_vector (); |
|
99 |
|
100 // If we have a system wide wisdom file, import it |
|
101 fftw_import_system_wisdom ( ); |
3828
|
102 } |
|
103 |
|
104 fftw_plan |
4773
|
105 octave_fftw_planner::create_plan (int dir, const int rank, |
|
106 const dim_vector dims, int howmany, |
|
107 int stride, int dist, |
|
108 const Complex *in, Complex *out) |
3828
|
109 { |
4773
|
110 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
3828
|
111 fftw_plan *cur_plan_p = &plan[which]; |
|
112 bool create_new_plan = false; |
4783
|
113 char in_align = (reinterpret_cast<long> (in)) & 0xF; |
|
114 char out_align = (reinterpret_cast<long> (out)) & 0xF; |
3828
|
115 |
4783
|
116 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
|
117 || r[which] != rank || h[which] != howmany |
|
118 || ialign[which] != in_align || oalign[which] != out_align) |
4773
|
119 create_new_plan = true; |
|
120 else |
|
121 // We still might not have the same shape of array |
|
122 for (int i = 0; i < rank; i++) |
|
123 if (dims(i) != n[which](i)) |
|
124 { |
|
125 create_new_plan = true; |
|
126 break; |
|
127 } |
3828
|
128 |
|
129 if (create_new_plan) |
|
130 { |
4773
|
131 d[which] = dist; |
|
132 s[which] = stride; |
|
133 r[which] = rank; |
|
134 h[which] = howmany; |
|
135 ialign[which] = in_align; |
|
136 oalign[which] = out_align; |
|
137 n[which] = dims; |
|
138 |
3828
|
139 if (*cur_plan_p) |
|
140 fftw_destroy_plan (*cur_plan_p); |
|
141 |
4773
|
142 // Note reversal of dimensions for column major storage in FFTW |
|
143 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
|
144 for (int i = 0, j = rank-1; i < rank; i++, j--) |
|
145 tmp[i] = dims(j); |
|
146 |
|
147 *cur_plan_p = |
|
148 fftw_plan_many_dft (rank, tmp, howmany, |
|
149 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), |
4774
|
150 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
151 0, stride, dist, dir, plan_flags); |
3828
|
152 |
|
153 if (*cur_plan_p == 0) |
|
154 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
|
155 } |
|
156 |
|
157 return *cur_plan_p; |
|
158 } |
|
159 |
4773
|
160 fftw_plan |
|
161 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, |
|
162 int howmany, int stride, int dist, |
|
163 const double *in, Complex *out) |
3828
|
164 { |
4773
|
165 fftw_plan *cur_plan_p = &rplan; |
3828
|
166 bool create_new_plan = false; |
4783
|
167 char in_align = (reinterpret_cast<long> (in)) & 0xF; |
|
168 char out_align = (reinterpret_cast<long> (out)) & 0xF; |
3828
|
169 |
4783
|
170 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
|
171 || rh != howmany || rialign != in_align || roalign != out_align) |
4773
|
172 create_new_plan = true; |
|
173 else |
|
174 // We still might not have the same shape of array |
|
175 for (int i = 0; i < rank; i++) |
|
176 if (dims(i) != rn(i)) |
|
177 { |
|
178 create_new_plan = true; |
|
179 break; |
|
180 } |
3828
|
181 |
|
182 if (create_new_plan) |
|
183 { |
4773
|
184 rd = dist; |
|
185 rs = stride; |
|
186 rr = rank; |
|
187 rh = howmany; |
|
188 rialign = in_align; |
|
189 roalign = out_align; |
|
190 rn = dims; |
|
191 |
3828
|
192 if (*cur_plan_p) |
4773
|
193 fftw_destroy_plan (*cur_plan_p); |
3828
|
194 |
4773
|
195 // Note reversal of dimensions for column major storage in FFTW |
|
196 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
|
197 for (int i = 0, j = rank-1; i < rank; i++, j--) |
|
198 tmp[i] = dims(j); |
|
199 |
|
200 *cur_plan_p = |
|
201 fftw_plan_many_dft_r2c (rank, tmp, howmany, |
|
202 (const_cast<double *> (in)), |
4774
|
203 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
|
204 0, stride, dist, plan_flags); |
3828
|
205 |
|
206 if (*cur_plan_p == 0) |
4773
|
207 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
3828
|
208 } |
|
209 |
|
210 return *cur_plan_p; |
|
211 } |
|
212 |
|
213 static octave_fftw_planner fftw_planner; |
|
214 |
4775
|
215 static inline void |
|
216 convert_packcomplex_1d (Complex *out, size_t nr, size_t nc, |
|
217 int stride, int dist) |
4773
|
218 { |
4785
|
219 OCTAVE_QUIT; |
|
220 |
|
221 // Fill in the missing data. |
|
222 |
4773
|
223 for (size_t i = 0; i < nr; i++) |
|
224 for (size_t j = nc/2+1; j < nc; j++) |
|
225 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]); |
4785
|
226 |
|
227 OCTAVE_QUIT; |
4773
|
228 } |
|
229 |
4775
|
230 static inline void |
|
231 convert_packcomplex_Nd (Complex *out, const dim_vector &dv) |
3828
|
232 { |
4773
|
233 size_t nc = dv(0); |
|
234 size_t nr = dv(1); |
|
235 size_t np = (dv.length() > 2 ? dv.numel () / nc / nr : 1); |
|
236 size_t nrp = nr * np; |
|
237 Complex *ptr1, *ptr2; |
|
238 |
4785
|
239 OCTAVE_QUIT; |
|
240 |
|
241 // Create space for the missing elements. |
|
242 |
4773
|
243 for (size_t i = 0; i < nrp; i++) |
|
244 { |
|
245 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); |
|
246 ptr2 = out + i * nc; |
|
247 for (size_t j = 0; j < nc/2+1; j++) |
|
248 *ptr2++ = *ptr1++; |
|
249 } |
|
250 |
4785
|
251 OCTAVE_QUIT; |
|
252 |
|
253 // Fill in the missing data for the rank = 2 case directly for speed. |
|
254 |
4773
|
255 for (size_t i = 0; i < np; i++) |
|
256 { |
|
257 for (size_t j = 1; j < nr; j++) |
|
258 for (size_t k = nc/2+1; k < nc; k++) |
|
259 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]); |
|
260 |
|
261 for (size_t j = nc/2+1; j < nc; j++) |
|
262 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]); |
|
263 } |
|
264 |
4785
|
265 OCTAVE_QUIT; |
|
266 |
|
267 // Now do the permutations needed for rank > 2 cases. |
|
268 |
4773
|
269 size_t jstart = dv(0) * dv(1); |
|
270 size_t kstep = dv(0); |
|
271 size_t nel = dv.numel (); |
4785
|
272 |
4773
|
273 for (int inner = 2; inner < dv.length(); inner++) |
|
274 { |
|
275 size_t jmax = jstart * dv(inner); |
|
276 for (size_t i = 0; i < nel; i+=jmax) |
|
277 for (size_t j = jstart, jj = jmax-jstart; j < jj; |
|
278 j+=jstart, jj-=jstart) |
|
279 for (size_t k = 0; k < jstart; k+= kstep) |
|
280 for (size_t l = nc/2+1; l < nc; l++) |
|
281 { |
|
282 Complex tmp = out[i+ j + k + l]; |
|
283 out[i + j + k + l] = out[i + jj + k + l]; |
|
284 out[i + jj + k + l] = tmp; |
|
285 } |
|
286 jstart = jmax; |
|
287 } |
4785
|
288 |
|
289 OCTAVE_QUIT; |
4773
|
290 } |
|
291 |
|
292 int |
|
293 octave_fftw::fft (const double *in, Complex *out, size_t npts, |
|
294 size_t nsamples, int stride, int dist) |
|
295 { |
|
296 dist = (dist < 0 ? npts : dist); |
|
297 |
|
298 dim_vector dv (npts); |
|
299 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, |
|
300 in, out); |
|
301 |
|
302 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), |
|
303 reinterpret_cast<fftw_complex *> (out)); |
|
304 |
|
305 // Need to create other half of the transform |
|
306 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
3828
|
307 |
|
308 return 0; |
|
309 } |
|
310 |
|
311 int |
4773
|
312 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, |
|
313 size_t nsamples, int stride, int dist) |
3828
|
314 { |
4773
|
315 dist = (dist < 0 ? npts : dist); |
|
316 |
|
317 dim_vector dv (npts); |
|
318 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, |
|
319 stride, dist, in, out); |
|
320 |
|
321 fftw_execute_dft (plan, |
|
322 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
323 reinterpret_cast<fftw_complex *> (out)); |
|
324 |
|
325 return 0; |
|
326 } |
|
327 |
|
328 int |
|
329 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, |
|
330 size_t nsamples, int stride, int dist) |
|
331 { |
|
332 dist = (dist < 0 ? npts : dist); |
|
333 |
|
334 dim_vector dv (npts); |
|
335 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, |
|
336 stride, dist, in, out); |
|
337 |
|
338 fftw_execute_dft (plan, |
|
339 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
340 reinterpret_cast<fftw_complex *> (out)); |
3828
|
341 |
|
342 const Complex scale = npts; |
4773
|
343 for (size_t j = 0; j < nsamples; j++) |
|
344 for (size_t i = 0; i < npts; i++) |
|
345 out[i*stride + j*dist] /= scale; |
3828
|
346 |
|
347 return 0; |
|
348 } |
|
349 |
|
350 int |
4773
|
351 octave_fftw::fftNd (const double *in, Complex *out, const int rank, |
|
352 const dim_vector &dv) |
3828
|
353 { |
4773
|
354 int dist = 1; |
|
355 for (int i = 0; i < rank; i++) |
|
356 dist *= dv(i); |
|
357 |
|
358 // Fool with the position of the start of the output matrix, so that |
|
359 // creating other half of the matrix won't cause cache problems |
|
360 int offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
|
361 |
|
362 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist, |
|
363 in, out + offset); |
|
364 |
|
365 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), |
|
366 reinterpret_cast<fftw_complex *> (out+ offset)); |
|
367 |
|
368 // Need to create other half of the transform |
|
369 convert_packcomplex_Nd (out, dv); |
3828
|
370 |
|
371 return 0; |
|
372 } |
|
373 |
|
374 int |
4773
|
375 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, |
|
376 const dim_vector &dv) |
3828
|
377 { |
4773
|
378 int dist = 1; |
|
379 for (int i = 0; i < rank; i++) |
|
380 dist *= dv(i); |
|
381 |
|
382 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, |
|
383 dist, in, out); |
|
384 |
|
385 fftw_execute_dft (plan, |
|
386 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
387 reinterpret_cast<fftw_complex *> (out)); |
|
388 |
|
389 return 0; |
|
390 } |
3828
|
391 |
4773
|
392 int |
|
393 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, |
4784
|
394 const dim_vector &dv) |
4773
|
395 { |
|
396 int dist = 1; |
|
397 for (int i = 0; i < rank; i++) |
|
398 dist *= dv(i); |
|
399 |
|
400 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, |
|
401 dist, in, out); |
|
402 |
|
403 fftw_execute_dft (plan, |
|
404 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), |
|
405 reinterpret_cast<fftw_complex *> (out)); |
|
406 |
|
407 const size_t npts = dv.numel (); |
3828
|
408 const Complex scale = npts; |
|
409 for (size_t i = 0; i < npts; i++) |
4773
|
410 out[i] /= scale; |
3828
|
411 |
|
412 return 0; |
|
413 } |
|
414 |
|
415 #endif |
|
416 |
|
417 /* |
|
418 ;;; Local Variables: *** |
|
419 ;;; mode: C++ *** |
|
420 ;;; End: *** |
|
421 */ |
|
422 |