comparison liboctave/oct-fftw.cc @ 4773:ccfbd6047a54

[project @ 2004-02-16 19:02:32 by jwe]
author jwe
date Mon, 16 Feb 2004 19:02:33 +0000
parents 24bf1bcbba8a
children 0ff45249d321
comparison
equal deleted inserted replaced
4772:9eed17b2c8d1 4773:ccfbd6047a54
20 20
21 #ifdef HAVE_CONFIG_H 21 #ifdef HAVE_CONFIG_H
22 #include <config.h> 22 #include <config.h>
23 #endif 23 #endif
24 24
25 #ifdef HAVE_FFTW 25 #if defined (HAVE_FFTW3)
26 26
27 #include "oct-fftw.h" 27 #include "oct-fftw.h"
28 #include "lo-error.h" 28 #include "lo-error.h"
29 29 #include <iostream>
30 30
31 // Helper class to create and cache fftw plans for both 1d and 2d. This 31 // Helper class to create and cache fftw plans for both 1d and 2d. This
32 // implementation uses FFTW_ESTIMATE to create the plans, which in theory 32 // implementation uses FFTW_ESTIMATE to create the plans, which in theory
33 // is suboptimal, but provides quite reasonable performance. Future 33 // is suboptimal, but provides quite reasonable performance.
34 // enhancement will be to add a dynamically loadable interface ("fftw") 34
35 // to manipulate fftw wisdom so that users may choose the appropriate 35 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3
36 // planner. 36 // destroys the input and output arrays. So with the form of the
37 // current code we definitely want FFTW_ESTIMATE!! However, we use
38 // any wsidom that is available, either in a FFTW3 system wide file
39 // or as supplied by the user.
40
41 // XXX FIXME XXX If we can ensure 16 byte alignment in Array<T> (<T> *data)
42 // the FFTW3 can use SIMD instructions for further acceleration.
43
44 // Note that it is profitable to store the FFTW3 plans, for small ffts
37 45
38 class 46 class
39 octave_fftw_planner 47 octave_fftw_planner
40 { 48 {
41 public: 49 public:
42 octave_fftw_planner (); 50 octave_fftw_planner ();
43 51
44 fftw_plan create_plan (fftw_direction, size_t); 52 fftw_plan create_plan (int dir, const int rank, const dim_vector dims,
45 fftwnd_plan create_plan2d (fftw_direction, size_t, size_t); 53 int howmany, int stride, int dist,
54 const Complex *in, Complex *out);
55 fftw_plan create_plan (const int rank, const dim_vector dims,
56 int howmany, int stride, int dist,
57 const double *in, Complex *out);
46 58
47 private: 59 private:
48 int plan_flags; 60 int plan_flags;
49 61
62 // Plan for fft and ifft of complex values
50 fftw_plan plan[2]; 63 fftw_plan plan[2];
51 fftwnd_plan plan2d[2]; 64 int d[2]; // dist
52 65 int s[2]; // stride
53 size_t n[2]; 66 int r[2]; // rank
54 size_t n2d[2][2]; 67 int h[2]; // howmany
68 dim_vector n[2]; // dims
69 char ialign[2];
70 char oalign[2];
71
72 // Plan for fft of real values
73 fftw_plan rplan;
74 int rd; // dist
75 int rs; // stride
76 int rr; // rank
77 int rh; // howmany
78 dim_vector rn; // dims
79 char rialign;
80 char roalign;
55 }; 81 };
56 82
57 octave_fftw_planner::octave_fftw_planner () 83 octave_fftw_planner::octave_fftw_planner ()
58 { 84 {
59 plan_flags = FFTW_ESTIMATE; 85 plan_flags = FFTW_ESTIMATE;
60 86
61 plan[0] = plan[1] = 0; 87 plan[0] = plan[1] = 0;
62 plan2d[0] = plan2d[1] = 0; 88 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
63 89 ialign[0] = ialign[1] = oalign[0] = oalign[1] = 0;
64 n[0] = n[1] = 0; 90 n[0] = n[1] = dim_vector();
65 n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0; 91
92 rplan = 0;
93 rd = rs = rr = rh = 0;
94 rialign = roalign = 0;
95 rn = dim_vector ();
96
97 // If we have a system wide wisdom file, import it
98 fftw_import_system_wisdom ( );
66 } 99 }
67 100
68 fftw_plan 101 fftw_plan
69 octave_fftw_planner::create_plan (fftw_direction dir, size_t npts) 102 octave_fftw_planner::create_plan (int dir, const int rank,
70 { 103 const dim_vector dims, int howmany,
71 size_t which = (dir == FFTW_FORWARD) ? 0 : 1; 104 int stride, int dist,
105 const Complex *in, Complex *out)
106 {
107 int which = (dir == FFTW_FORWARD) ? 0 : 1;
72 fftw_plan *cur_plan_p = &plan[which]; 108 fftw_plan *cur_plan_p = &plan[which];
73 bool create_new_plan = false; 109 bool create_new_plan = false;
74 110 char in_align = ((int) in) & 0xF;
75 if (plan[which] == 0 || n[which] != npts) 111 char out_align = ((int) out) & 0xF;
76 { 112
77 create_new_plan = true; 113 if (plan[which] == 0 || d[which] != dist || s[which] != stride ||
78 n[which] = npts; 114 r[which] != rank || h[which] != howmany ||
79 } 115 ialign[which] != in_align || oalign[which] != out_align)
116 create_new_plan = true;
117 else
118 // We still might not have the same shape of array
119 for (int i = 0; i < rank; i++)
120 if (dims(i) != n[which](i))
121 {
122 create_new_plan = true;
123 break;
124 }
80 125
81 if (create_new_plan) 126 if (create_new_plan)
82 { 127 {
128 d[which] = dist;
129 s[which] = stride;
130 r[which] = rank;
131 h[which] = howmany;
132 ialign[which] = in_align;
133 oalign[which] = out_align;
134 n[which] = dims;
135
83 if (*cur_plan_p) 136 if (*cur_plan_p)
84 fftw_destroy_plan (*cur_plan_p); 137 fftw_destroy_plan (*cur_plan_p);
85 138
86 *cur_plan_p = fftw_create_plan (npts, dir, plan_flags); 139 // Note reversal of dimensions for column major storage in FFTW
140 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
141 for (int i = 0, j = rank-1; i < rank; i++, j--)
142 tmp[i] = dims(j);
143
144 *cur_plan_p =
145 fftw_plan_many_dft (rank, tmp, howmany,
146 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
147 NULL, stride, dist, reinterpret_cast<fftw_complex *> (out),
148 NULL, stride, dist, dir, plan_flags);
87 149
88 if (*cur_plan_p == 0) 150 if (*cur_plan_p == 0)
89 (*current_liboctave_error_handler) ("Error creating fftw plan"); 151 (*current_liboctave_error_handler) ("Error creating fftw plan");
90 } 152 }
91 153
92 return *cur_plan_p; 154 return *cur_plan_p;
93 } 155 }
94 156
95 fftwnd_plan 157 fftw_plan
96 octave_fftw_planner::create_plan2d (fftw_direction dir, 158 octave_fftw_planner::create_plan (const int rank, const dim_vector dims,
97 size_t nrows, size_t ncols) 159 int howmany, int stride, int dist,
98 { 160 const double *in, Complex *out)
99 size_t which = (dir == FFTW_FORWARD) ? 0 : 1; 161 {
100 fftwnd_plan *cur_plan_p = &plan2d[which]; 162 fftw_plan *cur_plan_p = &rplan;
101 bool create_new_plan = false; 163 bool create_new_plan = false;
102 164 char in_align = ((int) in) & 0xF;
103 if (plan2d[which] == 0 || n2d[which][0] != nrows || n2d[which][1] != ncols) 165 char out_align = ((int) out) & 0xF;
104 { 166
105 create_new_plan = true; 167 if (rplan == 0 || rd != dist || rs != stride ||
106 168 rr != rank || rh != howmany ||
107 n2d[which][0] = nrows; 169 rialign != in_align || roalign != out_align)
108 n2d[which][1] = ncols; 170 create_new_plan = true;
109 } 171 else
172 // We still might not have the same shape of array
173 for (int i = 0; i < rank; i++)
174 if (dims(i) != rn(i))
175 {
176 create_new_plan = true;
177 break;
178 }
110 179
111 if (create_new_plan) 180 if (create_new_plan)
112 { 181 {
182 rd = dist;
183 rs = stride;
184 rr = rank;
185 rh = howmany;
186 rialign = in_align;
187 roalign = out_align;
188 rn = dims;
189
113 if (*cur_plan_p) 190 if (*cur_plan_p)
114 fftwnd_destroy_plan (*cur_plan_p); 191 fftw_destroy_plan (*cur_plan_p);
115 192
116 *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, 193 // Note reversal of dimensions for column major storage in FFTW
117 plan_flags | FFTW_IN_PLACE); 194 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
195 for (int i = 0, j = rank-1; i < rank; i++, j--)
196 tmp[i] = dims(j);
197
198 *cur_plan_p =
199 fftw_plan_many_dft_r2c (rank, tmp, howmany,
200 (const_cast<double *> (in)),
201 NULL, stride, dist, reinterpret_cast<fftw_complex *> (out),
202 NULL, stride, dist, plan_flags);
118 203
119 if (*cur_plan_p == 0) 204 if (*cur_plan_p == 0)
120 (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); 205 (*current_liboctave_error_handler) ("Error creating fftw plan");
121 } 206 }
122 207
123 return *cur_plan_p; 208 return *cur_plan_p;
124 } 209 }
125 210
126 static octave_fftw_planner fftw_planner; 211 static octave_fftw_planner fftw_planner;
127 212
128 int 213 static inline void convert_packcomplex_1d (Complex *out, size_t nr,
129 octave_fftw::fft (const Complex *in, Complex *out, size_t npts) 214 size_t nc, int stride, int dist)
130 { 215 {
131 fftw_one (fftw_planner.create_plan (FFTW_FORWARD, npts), 216 // Fill in the missing data
132 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), 217 for (size_t i = 0; i < nr; i++)
133 reinterpret_cast<fftw_complex *> (out)); 218 for (size_t j = nc/2+1; j < nc; j++)
134 219 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]);
135 return 0; 220 }
136 } 221
137 222 static inline void convert_packcomplex_Nd (Complex *out,
138 int 223 const dim_vector &dv)
139 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts) 224 {
140 { 225 size_t nc = dv(0);
141 fftw_one (fftw_planner.create_plan (FFTW_BACKWARD, npts), 226 size_t nr = dv(1);
142 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), 227 size_t np = (dv.length() > 2 ? dv.numel () / nc / nr : 1);
143 reinterpret_cast<fftw_complex *> (out)); 228 size_t nrp = nr * np;
144 229 Complex *ptr1, *ptr2;
230
231 // Create space for the missing elements
232 for (size_t i = 0; i < nrp; i++)
233 {
234 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
235 ptr2 = out + i * nc;
236 for (size_t j = 0; j < nc/2+1; j++)
237 *ptr2++ = *ptr1++;
238 }
239
240 // Fill in the missing data for the rank = 2 case directly for speed
241 for (size_t i = 0; i < np; i++)
242 {
243 for (size_t j = 1; j < nr; j++)
244 for (size_t k = nc/2+1; k < nc; k++)
245 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]);
246
247 for (size_t j = nc/2+1; j < nc; j++)
248 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]);
249 }
250
251 // Now do the permutations needed for rank > 2 cases
252 size_t jstart = dv(0) * dv(1);
253 size_t kstep = dv(0);
254 size_t nel = dv.numel ();
255 for (int inner = 2; inner < dv.length(); inner++)
256 {
257 size_t jmax = jstart * dv(inner);
258 for (size_t i = 0; i < nel; i+=jmax)
259 for (size_t j = jstart, jj = jmax-jstart; j < jj;
260 j+=jstart, jj-=jstart)
261 for (size_t k = 0; k < jstart; k+= kstep)
262 for (size_t l = nc/2+1; l < nc; l++)
263 {
264 Complex tmp = out[i+ j + k + l];
265 out[i + j + k + l] = out[i + jj + k + l];
266 out[i + jj + k + l] = tmp;
267 }
268 jstart = jmax;
269 }
270 }
271
272 int
273 octave_fftw::fft (const double *in, Complex *out, size_t npts,
274 size_t nsamples, int stride, int dist)
275 {
276 dist = (dist < 0 ? npts : dist);
277
278 dim_vector dv (npts);
279 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist,
280 in, out);
281
282 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
283 reinterpret_cast<fftw_complex *> (out));
284
285 // Need to create other half of the transform
286 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
287
288 return 0;
289 }
290
291 int
292 octave_fftw::fft (const Complex *in, Complex *out, size_t npts,
293 size_t nsamples, int stride, int dist)
294 {
295 dist = (dist < 0 ? npts : dist);
296
297 dim_vector dv (npts);
298 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples,
299 stride, dist, in, out);
300
301 fftw_execute_dft (plan,
302 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
303 reinterpret_cast<fftw_complex *> (out));
304
305 return 0;
306 }
307
308 int
309 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts,
310 size_t nsamples, int stride, int dist)
311 {
312 dist = (dist < 0 ? npts : dist);
313
314 dim_vector dv (npts);
315 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples,
316 stride, dist, in, out);
317
318 fftw_execute_dft (plan,
319 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
320 reinterpret_cast<fftw_complex *> (out));
321
322 const Complex scale = npts;
323 for (size_t j = 0; j < nsamples; j++)
324 for (size_t i = 0; i < npts; i++)
325 out[i*stride + j*dist] /= scale;
326
327 return 0;
328 }
329
330 int
331 octave_fftw::fftNd (const double *in, Complex *out, const int rank,
332 const dim_vector &dv)
333 {
334 int dist = 1;
335 for (int i = 0; i < rank; i++)
336 dist *= dv(i);
337
338 // Fool with the position of the start of the output matrix, so that
339 // creating other half of the matrix won't cause cache problems
340 int offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
341
342 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist,
343 in, out + offset);
344
345 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
346 reinterpret_cast<fftw_complex *> (out+ offset));
347
348 // Need to create other half of the transform
349 convert_packcomplex_Nd (out, dv);
350
351 return 0;
352 }
353
354 int
355 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank,
356 const dim_vector &dv)
357 {
358 int dist = 1;
359 for (int i = 0; i < rank; i++)
360 dist *= dv(i);
361
362 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1,
363 dist, in, out);
364
365 fftw_execute_dft (plan,
366 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
367 reinterpret_cast<fftw_complex *> (out));
368
369 return 0;
370 }
371
372 int
373 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank,
374 const dim_vector &dv)
375 {
376 int dist = 1;
377 for (int i = 0; i < rank; i++)
378 dist *= dv(i);
379
380 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1,
381 dist, in, out);
382
383 fftw_execute_dft (plan,
384 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
385 reinterpret_cast<fftw_complex *> (out));
386
387 const size_t npts = dv.numel ();
145 const Complex scale = npts; 388 const Complex scale = npts;
146 for (size_t i = 0; i < npts; i++) 389 for (size_t i = 0; i < npts; i++)
147 out[i] /= scale; 390 out[i] /= scale;
148
149 return 0;
150 }
151
152 int
153 octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc)
154 {
155 fftwnd_one (fftw_planner.create_plan2d (FFTW_FORWARD, nr, nc),
156 reinterpret_cast<fftw_complex *> (inout),
157 0);
158
159 return 0;
160 }
161
162 int
163 octave_fftw::ifft2d (Complex *inout, size_t nr, size_t nc)
164 {
165 fftwnd_one (fftw_planner.create_plan2d (FFTW_BACKWARD, nr, nc),
166 reinterpret_cast<fftw_complex *> (inout),
167 0);
168
169 const size_t npts = nr * nc;
170 const Complex scale = npts;
171 for (size_t i = 0; i < npts; i++)
172 inout[i] /= scale;
173 391
174 return 0; 392 return 0;
175 } 393 }
176 394
177 #endif 395 #endif