diff liboctave/oct-fftw.cc @ 6228:aa5df9ba98d5

[project @ 2007-01-05 22:49:03 by dbateman]
author dbateman
date Fri, 05 Jan 2007 22:49:57 +0000
parents ace8d8d26933
children 7e958a1532c6
line wrap: on
line diff
--- a/liboctave/oct-fftw.cc	Fri Jan 05 20:23:59 2007 +0000
+++ b/liboctave/oct-fftw.cc	Fri Jan 05 22:49:57 2007 +0000
@@ -33,15 +33,16 @@
 #include "quit.h"
 
 // Helper class to create and cache fftw plans for both 1d and
-// 2d. This implementation uses FFTW_ESTIMATE to create the plans,
-// which in theory is suboptimal, but provides quite reasonable
-// performance.
+// 2d. This implementation defaults to using FFTW_ESTIMATE to create
+// the plans, which in theory is suboptimal, but provides quit
+// reasonable performance.
 
 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3
-// destroys the input and output arrays. So with the form of the
-// current code we definitely want FFTW_ESTIMATE!! However, we use any
-// wsidom that is available, either in a FFTW3 system wide file or as
-// supplied by the user.
+// destroys the input and output arrays. We must therefore create a
+// temporary input array with the same size and 16-byte alignment as
+// the original array and use that for the planner. Note that we also
+// use any wisdom that is available, either in a FFTW3 system wide file
+//  or as supplied by the user.
 
 // FIXME -- if we can ensure 16 byte alignment in Array<T>
 // (<T> *data) the FFTW3 can use SIMD instructions for further
@@ -50,72 +51,9 @@
 // Note that it is profitable to store the FFTW3 plans, for small
 // ffts.
 
-class
-octave_fftw_planner
-{
-public:
-
-  octave_fftw_planner (void);
-
-  fftw_plan create_plan (int dir, const int rank, const dim_vector dims, 
-			 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, 
-			 const Complex *in, Complex *out);
-
-  fftw_plan create_plan (const int rank, const dim_vector dims, 
-			 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, 
-			 const double *in, Complex *out);
-
-private:
-
-  int plan_flags;
-
-  // FIXME -- perhaps this should be split into two classes?
-
-  // Plan for fft and ifft of complex values
-  fftw_plan plan[2];
-
-  // dist
-  octave_idx_type d[2];
-
-  // stride
-  octave_idx_type s[2];
-
-  // rank
-  int r[2];
-
-  // howmany
-  octave_idx_type h[2];
-
-  // dims
-  dim_vector n[2];
-
-  bool simd_align[2];
-  bool inplace[2];
-
-  // Plan for fft of real values
-  fftw_plan rplan;
-
-  // dist
-  octave_idx_type rd;
-
-  // stride
-  octave_idx_type rs;
-
-  // rank
-  int rr;
-
-  // howmany
-  octave_idx_type rh;
-
-  // dims
-  dim_vector rn;
-
-  bool rsimd_align;
-};
-
 octave_fftw_planner::octave_fftw_planner (void)
 {
-  plan_flags = FFTW_ESTIMATE;
+  meth = ESTIMATE;
 
   plan[0] = plan[1] = 0;
   d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
@@ -132,6 +70,37 @@
   fftw_import_system_wisdom ();
 }
 
+octave_fftw_planner::FftwMethod
+octave_fftw_planner::method (void)
+{
+  return meth;
+}
+
+octave_fftw_planner::FftwMethod
+octave_fftw_planner::method (FftwMethod _meth)
+{
+  FftwMethod ret = meth;
+  if (_meth == ESTIMATE || _meth == MEASURE || 
+      _meth == PATIENT || _meth == EXHAUSTIVE ||
+      _meth == HYBRID)
+    {
+      if (meth != _meth) 
+	{
+	  meth = _meth;
+	  if (rplan)
+	    fftw_destroy_plan (rplan);
+	  if (plan[0])
+	    fftw_destroy_plan (plan[0]);
+	  if (plan[1])
+	    fftw_destroy_plan (plan[1]);
+	  rplan = plan[0] = plan[1] = 0;
+	}
+    }
+  else
+    ret = UNKNOWN;
+  return ret;
+}
+
 #define CHECK_SIMD_ALIGNMENT(x) \
   ((reinterpret_cast<ptrdiff_t> (x)) & 0xF == 0)
 
@@ -178,6 +147,46 @@
       inplace[which] = ioinplace;
       n[which] = dims;
 
+      // Note reversal of dimensions for column major storage in FFTW.
+      octave_idx_type nn = 1;
+      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+
+      for (int i = 0, j = rank-1; i < rank; i++, j--)
+	{
+	  tmp[i] = dims(j);
+	  nn *= dims(j);
+	}
+
+      int plan_flags = 0;
+      bool plan_destroys_in = true;
+
+      switch (meth) 
+	{
+	case UNKNOWN:
+	case ESTIMATE:
+	  plan_flags |= FFTW_ESTIMATE;
+	  plan_destroys_in = false;
+	  break;
+	case MEASURE:
+	  plan_flags |= FFTW_MEASURE;
+	  break;
+	case PATIENT:
+	  plan_flags |= FFTW_PATIENT;
+	  break;
+	case EXHAUSTIVE:
+	  plan_flags |= FFTW_EXHAUSTIVE;
+	  break;
+	case HYBRID:
+	  if (nn < 8193)
+	    plan_flags |= FFTW_MEASURE;
+	  else
+	    {
+	      plan_flags |= FFTW_ESTIMATE;
+	      plan_destroys_in = false;
+	    }
+	  break;
+	}
+
       if (ioalign)
 	plan_flags &= ~FFTW_UNALIGNED;
       else
@@ -186,18 +195,28 @@
       if (*cur_plan_p)
 	fftw_destroy_plan (*cur_plan_p);
 
-      // Note reversal of dimensions for column major storage in FFTW.
-
-      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+      if (plan_destroys_in)
+	{
+	  // Create matrix with the same size and 16-byte alignment as input
+	  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
+	  itmp = reinterpret_cast<Complex *>
+	    (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + 
+	     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
 
-      for (int i = 0, j = rank-1; i < rank; i++, j--)
-	tmp[i] = dims(j);
-
-      *cur_plan_p =
-	fftw_plan_many_dft (rank, tmp, howmany,
+	  *cur_plan_p =
+	    fftw_plan_many_dft (rank, tmp, howmany,
+	      reinterpret_cast<fftw_complex *> (itmp),
+	      0, stride, dist, reinterpret_cast<fftw_complex *> (out),
+	      0, stride, dist, dir, plan_flags);
+	}
+      else
+	{
+	  *cur_plan_p =
+	    fftw_plan_many_dft (rank, tmp, howmany,
 	      reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
 	      0, stride, dist, reinterpret_cast<fftw_complex *> (out),
 	      0, stride, dist, dir, plan_flags);
+	}
 
       if (*cur_plan_p == 0)
 	(*current_liboctave_error_handler) ("Error creating fftw plan");
@@ -243,6 +262,46 @@
       rsimd_align = ioalign;
       rn = dims;
 
+      // Note reversal of dimensions for column major storage in FFTW.
+      octave_idx_type nn = 1;
+      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+
+      for (int i = 0, j = rank-1; i < rank; i++, j--)
+	{
+	  tmp[i] = dims(j);
+	  nn *= dims(j);
+	}
+
+      int plan_flags = 0;
+      bool plan_destroys_in = true;
+
+      switch (meth) 
+	{
+	case UNKNOWN:
+	case ESTIMATE:
+	  plan_flags |= FFTW_ESTIMATE;
+	  plan_destroys_in = false;
+	  break;
+	case MEASURE:
+	  plan_flags |= FFTW_MEASURE;
+	  break;
+	case PATIENT:
+	  plan_flags |= FFTW_PATIENT;
+	  break;
+	case EXHAUSTIVE:
+	  plan_flags |= FFTW_EXHAUSTIVE;
+	  break;
+	case HYBRID:
+	  if (nn < 8193)
+	    plan_flags |= FFTW_MEASURE;
+	  else
+	    {
+	      plan_flags |= FFTW_ESTIMATE;
+	      plan_destroys_in = false;
+	    }
+	  break;
+	}
+
       if (ioalign)
 	plan_flags &= ~FFTW_UNALIGNED;
       else
@@ -251,18 +310,27 @@
       if (*cur_plan_p)
 	fftw_destroy_plan (*cur_plan_p);
 
-      // Note reversal of dimensions for column major storage in FFTW.
-
-      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+      if (plan_destroys_in)
+	{
+	  // Create matrix with the same size and 16-byte alignment as input
+	  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
+	  itmp = reinterpret_cast<double *>
+	    (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + 
+	     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
 
-      for (int i = 0, j = rank-1; i < rank; i++, j--)
-	tmp[i] = dims(j);
-
-      *cur_plan_p =
-	fftw_plan_many_dft_r2c (rank, tmp, howmany,
+	  *cur_plan_p =
+	    fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
+	      0, stride, dist, reinterpret_cast<fftw_complex *> (out),
+	      0, stride, dist, plan_flags);
+	}
+      else
+	{
+	  *cur_plan_p =
+	    fftw_plan_many_dft_r2c (rank, tmp, howmany,
 	      (const_cast<double *> (in)),
 	      0, stride, dist, reinterpret_cast<fftw_complex *> (out),
 	      0, stride, dist, plan_flags);
+	}
 
       if (*cur_plan_p == 0)
 	(*current_liboctave_error_handler) ("Error creating fftw plan");
@@ -271,7 +339,7 @@
   return *cur_plan_p;
 }
 
-static octave_fftw_planner fftw_planner;
+octave_fftw_planner fftw_planner;
 
 static inline void
 convert_packcomplex_1d (Complex *out, size_t nr, size_t nc,