Mercurial > octave-nkf
comparison liboctave/oct-fftw.cc @ 4773:ccfbd6047a54
[project @ 2004-02-16 19:02:32 by jwe]
author | jwe |
---|---|
date | Mon, 16 Feb 2004 19:02:33 +0000 |
parents | 24bf1bcbba8a |
children | 0ff45249d321 |
comparison
equal
deleted
inserted
replaced
4772:9eed17b2c8d1 | 4773:ccfbd6047a54 |
---|---|
20 | 20 |
21 #ifdef HAVE_CONFIG_H | 21 #ifdef HAVE_CONFIG_H |
22 #include <config.h> | 22 #include <config.h> |
23 #endif | 23 #endif |
24 | 24 |
25 #ifdef HAVE_FFTW | 25 #if defined (HAVE_FFTW3) |
26 | 26 |
27 #include "oct-fftw.h" | 27 #include "oct-fftw.h" |
28 #include "lo-error.h" | 28 #include "lo-error.h" |
29 | 29 #include <iostream> |
30 | 30 |
31 // Helper class to create and cache fftw plans for both 1d and 2d. This | 31 // Helper class to create and cache fftw plans for both 1d and 2d. This |
32 // implementation uses FFTW_ESTIMATE to create the plans, which in theory | 32 // implementation uses FFTW_ESTIMATE to create the plans, which in theory |
33 // is suboptimal, but provides quite reasonable performance. Future | 33 // is suboptimal, but provides quite reasonable performance. |
34 // enhancement will be to add a dynamically loadable interface ("fftw") | 34 |
35 // to manipulate fftw wisdom so that users may choose the appropriate | 35 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 |
36 // planner. | 36 // destroys the input and output arrays. So with the form of the |
37 // current code we definitely want FFTW_ESTIMATE!! However, we use | |
38 // any wsidom that is available, either in a FFTW3 system wide file | |
39 // or as supplied by the user. | |
40 | |
41 // XXX FIXME XXX If we can ensure 16 byte alignment in Array<T> (<T> *data) | |
42 // the FFTW3 can use SIMD instructions for further acceleration. | |
43 | |
44 // Note that it is profitable to store the FFTW3 plans, for small ffts | |
37 | 45 |
38 class | 46 class |
39 octave_fftw_planner | 47 octave_fftw_planner |
40 { | 48 { |
41 public: | 49 public: |
42 octave_fftw_planner (); | 50 octave_fftw_planner (); |
43 | 51 |
44 fftw_plan create_plan (fftw_direction, size_t); | 52 fftw_plan create_plan (int dir, const int rank, const dim_vector dims, |
45 fftwnd_plan create_plan2d (fftw_direction, size_t, size_t); | 53 int howmany, int stride, int dist, |
54 const Complex *in, Complex *out); | |
55 fftw_plan create_plan (const int rank, const dim_vector dims, | |
56 int howmany, int stride, int dist, | |
57 const double *in, Complex *out); | |
46 | 58 |
47 private: | 59 private: |
48 int plan_flags; | 60 int plan_flags; |
49 | 61 |
62 // Plan for fft and ifft of complex values | |
50 fftw_plan plan[2]; | 63 fftw_plan plan[2]; |
51 fftwnd_plan plan2d[2]; | 64 int d[2]; // dist |
52 | 65 int s[2]; // stride |
53 size_t n[2]; | 66 int r[2]; // rank |
54 size_t n2d[2][2]; | 67 int h[2]; // howmany |
68 dim_vector n[2]; // dims | |
69 char ialign[2]; | |
70 char oalign[2]; | |
71 | |
72 // Plan for fft of real values | |
73 fftw_plan rplan; | |
74 int rd; // dist | |
75 int rs; // stride | |
76 int rr; // rank | |
77 int rh; // howmany | |
78 dim_vector rn; // dims | |
79 char rialign; | |
80 char roalign; | |
55 }; | 81 }; |
56 | 82 |
57 octave_fftw_planner::octave_fftw_planner () | 83 octave_fftw_planner::octave_fftw_planner () |
58 { | 84 { |
59 plan_flags = FFTW_ESTIMATE; | 85 plan_flags = FFTW_ESTIMATE; |
60 | 86 |
61 plan[0] = plan[1] = 0; | 87 plan[0] = plan[1] = 0; |
62 plan2d[0] = plan2d[1] = 0; | 88 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
63 | 89 ialign[0] = ialign[1] = oalign[0] = oalign[1] = 0; |
64 n[0] = n[1] = 0; | 90 n[0] = n[1] = dim_vector(); |
65 n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0; | 91 |
92 rplan = 0; | |
93 rd = rs = rr = rh = 0; | |
94 rialign = roalign = 0; | |
95 rn = dim_vector (); | |
96 | |
97 // If we have a system wide wisdom file, import it | |
98 fftw_import_system_wisdom ( ); | |
66 } | 99 } |
67 | 100 |
68 fftw_plan | 101 fftw_plan |
69 octave_fftw_planner::create_plan (fftw_direction dir, size_t npts) | 102 octave_fftw_planner::create_plan (int dir, const int rank, |
70 { | 103 const dim_vector dims, int howmany, |
71 size_t which = (dir == FFTW_FORWARD) ? 0 : 1; | 104 int stride, int dist, |
105 const Complex *in, Complex *out) | |
106 { | |
107 int which = (dir == FFTW_FORWARD) ? 0 : 1; | |
72 fftw_plan *cur_plan_p = &plan[which]; | 108 fftw_plan *cur_plan_p = &plan[which]; |
73 bool create_new_plan = false; | 109 bool create_new_plan = false; |
74 | 110 char in_align = ((int) in) & 0xF; |
75 if (plan[which] == 0 || n[which] != npts) | 111 char out_align = ((int) out) & 0xF; |
76 { | 112 |
77 create_new_plan = true; | 113 if (plan[which] == 0 || d[which] != dist || s[which] != stride || |
78 n[which] = npts; | 114 r[which] != rank || h[which] != howmany || |
79 } | 115 ialign[which] != in_align || oalign[which] != out_align) |
116 create_new_plan = true; | |
117 else | |
118 // We still might not have the same shape of array | |
119 for (int i = 0; i < rank; i++) | |
120 if (dims(i) != n[which](i)) | |
121 { | |
122 create_new_plan = true; | |
123 break; | |
124 } | |
80 | 125 |
81 if (create_new_plan) | 126 if (create_new_plan) |
82 { | 127 { |
128 d[which] = dist; | |
129 s[which] = stride; | |
130 r[which] = rank; | |
131 h[which] = howmany; | |
132 ialign[which] = in_align; | |
133 oalign[which] = out_align; | |
134 n[which] = dims; | |
135 | |
83 if (*cur_plan_p) | 136 if (*cur_plan_p) |
84 fftw_destroy_plan (*cur_plan_p); | 137 fftw_destroy_plan (*cur_plan_p); |
85 | 138 |
86 *cur_plan_p = fftw_create_plan (npts, dir, plan_flags); | 139 // Note reversal of dimensions for column major storage in FFTW |
140 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
141 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
142 tmp[i] = dims(j); | |
143 | |
144 *cur_plan_p = | |
145 fftw_plan_many_dft (rank, tmp, howmany, | |
146 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), | |
147 NULL, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
148 NULL, stride, dist, dir, plan_flags); | |
87 | 149 |
88 if (*cur_plan_p == 0) | 150 if (*cur_plan_p == 0) |
89 (*current_liboctave_error_handler) ("Error creating fftw plan"); | 151 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
90 } | 152 } |
91 | 153 |
92 return *cur_plan_p; | 154 return *cur_plan_p; |
93 } | 155 } |
94 | 156 |
95 fftwnd_plan | 157 fftw_plan |
96 octave_fftw_planner::create_plan2d (fftw_direction dir, | 158 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, |
97 size_t nrows, size_t ncols) | 159 int howmany, int stride, int dist, |
98 { | 160 const double *in, Complex *out) |
99 size_t which = (dir == FFTW_FORWARD) ? 0 : 1; | 161 { |
100 fftwnd_plan *cur_plan_p = &plan2d[which]; | 162 fftw_plan *cur_plan_p = &rplan; |
101 bool create_new_plan = false; | 163 bool create_new_plan = false; |
102 | 164 char in_align = ((int) in) & 0xF; |
103 if (plan2d[which] == 0 || n2d[which][0] != nrows || n2d[which][1] != ncols) | 165 char out_align = ((int) out) & 0xF; |
104 { | 166 |
105 create_new_plan = true; | 167 if (rplan == 0 || rd != dist || rs != stride || |
106 | 168 rr != rank || rh != howmany || |
107 n2d[which][0] = nrows; | 169 rialign != in_align || roalign != out_align) |
108 n2d[which][1] = ncols; | 170 create_new_plan = true; |
109 } | 171 else |
172 // We still might not have the same shape of array | |
173 for (int i = 0; i < rank; i++) | |
174 if (dims(i) != rn(i)) | |
175 { | |
176 create_new_plan = true; | |
177 break; | |
178 } | |
110 | 179 |
111 if (create_new_plan) | 180 if (create_new_plan) |
112 { | 181 { |
182 rd = dist; | |
183 rs = stride; | |
184 rr = rank; | |
185 rh = howmany; | |
186 rialign = in_align; | |
187 roalign = out_align; | |
188 rn = dims; | |
189 | |
113 if (*cur_plan_p) | 190 if (*cur_plan_p) |
114 fftwnd_destroy_plan (*cur_plan_p); | 191 fftw_destroy_plan (*cur_plan_p); |
115 | 192 |
116 *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, | 193 // Note reversal of dimensions for column major storage in FFTW |
117 plan_flags | FFTW_IN_PLACE); | 194 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
195 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
196 tmp[i] = dims(j); | |
197 | |
198 *cur_plan_p = | |
199 fftw_plan_many_dft_r2c (rank, tmp, howmany, | |
200 (const_cast<double *> (in)), | |
201 NULL, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
202 NULL, stride, dist, plan_flags); | |
118 | 203 |
119 if (*cur_plan_p == 0) | 204 if (*cur_plan_p == 0) |
120 (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); | 205 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
121 } | 206 } |
122 | 207 |
123 return *cur_plan_p; | 208 return *cur_plan_p; |
124 } | 209 } |
125 | 210 |
126 static octave_fftw_planner fftw_planner; | 211 static octave_fftw_planner fftw_planner; |
127 | 212 |
128 int | 213 static inline void convert_packcomplex_1d (Complex *out, size_t nr, |
129 octave_fftw::fft (const Complex *in, Complex *out, size_t npts) | 214 size_t nc, int stride, int dist) |
130 { | 215 { |
131 fftw_one (fftw_planner.create_plan (FFTW_FORWARD, npts), | 216 // Fill in the missing data |
132 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), | 217 for (size_t i = 0; i < nr; i++) |
133 reinterpret_cast<fftw_complex *> (out)); | 218 for (size_t j = nc/2+1; j < nc; j++) |
134 | 219 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]); |
135 return 0; | 220 } |
136 } | 221 |
137 | 222 static inline void convert_packcomplex_Nd (Complex *out, |
138 int | 223 const dim_vector &dv) |
139 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts) | 224 { |
140 { | 225 size_t nc = dv(0); |
141 fftw_one (fftw_planner.create_plan (FFTW_BACKWARD, npts), | 226 size_t nr = dv(1); |
142 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), | 227 size_t np = (dv.length() > 2 ? dv.numel () / nc / nr : 1); |
143 reinterpret_cast<fftw_complex *> (out)); | 228 size_t nrp = nr * np; |
144 | 229 Complex *ptr1, *ptr2; |
230 | |
231 // Create space for the missing elements | |
232 for (size_t i = 0; i < nrp; i++) | |
233 { | |
234 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); | |
235 ptr2 = out + i * nc; | |
236 for (size_t j = 0; j < nc/2+1; j++) | |
237 *ptr2++ = *ptr1++; | |
238 } | |
239 | |
240 // Fill in the missing data for the rank = 2 case directly for speed | |
241 for (size_t i = 0; i < np; i++) | |
242 { | |
243 for (size_t j = 1; j < nr; j++) | |
244 for (size_t k = nc/2+1; k < nc; k++) | |
245 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]); | |
246 | |
247 for (size_t j = nc/2+1; j < nc; j++) | |
248 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]); | |
249 } | |
250 | |
251 // Now do the permutations needed for rank > 2 cases | |
252 size_t jstart = dv(0) * dv(1); | |
253 size_t kstep = dv(0); | |
254 size_t nel = dv.numel (); | |
255 for (int inner = 2; inner < dv.length(); inner++) | |
256 { | |
257 size_t jmax = jstart * dv(inner); | |
258 for (size_t i = 0; i < nel; i+=jmax) | |
259 for (size_t j = jstart, jj = jmax-jstart; j < jj; | |
260 j+=jstart, jj-=jstart) | |
261 for (size_t k = 0; k < jstart; k+= kstep) | |
262 for (size_t l = nc/2+1; l < nc; l++) | |
263 { | |
264 Complex tmp = out[i+ j + k + l]; | |
265 out[i + j + k + l] = out[i + jj + k + l]; | |
266 out[i + jj + k + l] = tmp; | |
267 } | |
268 jstart = jmax; | |
269 } | |
270 } | |
271 | |
272 int | |
273 octave_fftw::fft (const double *in, Complex *out, size_t npts, | |
274 size_t nsamples, int stride, int dist) | |
275 { | |
276 dist = (dist < 0 ? npts : dist); | |
277 | |
278 dim_vector dv (npts); | |
279 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, | |
280 in, out); | |
281 | |
282 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
283 reinterpret_cast<fftw_complex *> (out)); | |
284 | |
285 // Need to create other half of the transform | |
286 convert_packcomplex_1d (out, nsamples, npts, stride, dist); | |
287 | |
288 return 0; | |
289 } | |
290 | |
291 int | |
292 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, | |
293 size_t nsamples, int stride, int dist) | |
294 { | |
295 dist = (dist < 0 ? npts : dist); | |
296 | |
297 dim_vector dv (npts); | |
298 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, | |
299 stride, dist, in, out); | |
300 | |
301 fftw_execute_dft (plan, | |
302 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
303 reinterpret_cast<fftw_complex *> (out)); | |
304 | |
305 return 0; | |
306 } | |
307 | |
308 int | |
309 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, | |
310 size_t nsamples, int stride, int dist) | |
311 { | |
312 dist = (dist < 0 ? npts : dist); | |
313 | |
314 dim_vector dv (npts); | |
315 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, | |
316 stride, dist, in, out); | |
317 | |
318 fftw_execute_dft (plan, | |
319 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
320 reinterpret_cast<fftw_complex *> (out)); | |
321 | |
322 const Complex scale = npts; | |
323 for (size_t j = 0; j < nsamples; j++) | |
324 for (size_t i = 0; i < npts; i++) | |
325 out[i*stride + j*dist] /= scale; | |
326 | |
327 return 0; | |
328 } | |
329 | |
330 int | |
331 octave_fftw::fftNd (const double *in, Complex *out, const int rank, | |
332 const dim_vector &dv) | |
333 { | |
334 int dist = 1; | |
335 for (int i = 0; i < rank; i++) | |
336 dist *= dv(i); | |
337 | |
338 // Fool with the position of the start of the output matrix, so that | |
339 // creating other half of the matrix won't cause cache problems | |
340 int offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); | |
341 | |
342 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist, | |
343 in, out + offset); | |
344 | |
345 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
346 reinterpret_cast<fftw_complex *> (out+ offset)); | |
347 | |
348 // Need to create other half of the transform | |
349 convert_packcomplex_Nd (out, dv); | |
350 | |
351 return 0; | |
352 } | |
353 | |
354 int | |
355 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, | |
356 const dim_vector &dv) | |
357 { | |
358 int dist = 1; | |
359 for (int i = 0; i < rank; i++) | |
360 dist *= dv(i); | |
361 | |
362 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, | |
363 dist, in, out); | |
364 | |
365 fftw_execute_dft (plan, | |
366 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
367 reinterpret_cast<fftw_complex *> (out)); | |
368 | |
369 return 0; | |
370 } | |
371 | |
372 int | |
373 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, | |
374 const dim_vector &dv) | |
375 { | |
376 int dist = 1; | |
377 for (int i = 0; i < rank; i++) | |
378 dist *= dv(i); | |
379 | |
380 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, | |
381 dist, in, out); | |
382 | |
383 fftw_execute_dft (plan, | |
384 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
385 reinterpret_cast<fftw_complex *> (out)); | |
386 | |
387 const size_t npts = dv.numel (); | |
145 const Complex scale = npts; | 388 const Complex scale = npts; |
146 for (size_t i = 0; i < npts; i++) | 389 for (size_t i = 0; i < npts; i++) |
147 out[i] /= scale; | 390 out[i] /= scale; |
148 | |
149 return 0; | |
150 } | |
151 | |
152 int | |
153 octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc) | |
154 { | |
155 fftwnd_one (fftw_planner.create_plan2d (FFTW_FORWARD, nr, nc), | |
156 reinterpret_cast<fftw_complex *> (inout), | |
157 0); | |
158 | |
159 return 0; | |
160 } | |
161 | |
162 int | |
163 octave_fftw::ifft2d (Complex *inout, size_t nr, size_t nc) | |
164 { | |
165 fftwnd_one (fftw_planner.create_plan2d (FFTW_BACKWARD, nr, nc), | |
166 reinterpret_cast<fftw_complex *> (inout), | |
167 0); | |
168 | |
169 const size_t npts = nr * nc; | |
170 const Complex scale = npts; | |
171 for (size_t i = 0; i < npts; i++) | |
172 inout[i] /= scale; | |
173 | 391 |
174 return 0; | 392 return 0; |
175 } | 393 } |
176 | 394 |
177 #endif | 395 #endif |