diff liboctave/oct-fftw.cc @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents a1dbe9d80eee
children 25bc2d31e1bf
line wrap: on
line diff
--- a/liboctave/oct-fftw.cc	Wed May 14 18:09:56 2008 +0200
+++ b/liboctave/oct-fftw.cc	Sun Apr 27 22:34:17 2008 +0200
@@ -340,10 +340,298 @@
   return *cur_plan_p;
 }
 
-octave_fftw_planner fftw_planner;
+
+octave_float_fftw_planner::octave_float_fftw_planner (void)
+{
+  meth = ESTIMATE;
+
+  plan[0] = plan[1] = 0;
+  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
+  simd_align[0] = simd_align[1] = false;
+  inplace[0] = inplace[1] = false;
+  n[0] = n[1] = dim_vector ();
+
+  rplan = 0;
+  rd = rs = rr = rh = 0;
+  rsimd_align = false;
+  rn = dim_vector ();
+  
+  // If we have a system wide wisdom file, import it.
+  fftwf_import_system_wisdom ();
+}
+
+octave_float_fftw_planner::FftwMethod
+octave_float_fftw_planner::method (void)
+{
+  return meth;
+}
+
+octave_float_fftw_planner::FftwMethod
+octave_float_fftw_planner::method (FftwMethod _meth)
+{
+  FftwMethod ret = meth;
+  if (_meth == ESTIMATE || _meth == MEASURE || 
+      _meth == PATIENT || _meth == EXHAUSTIVE ||
+      _meth == HYBRID)
+    {
+      if (meth != _meth) 
+	{
+	  meth = _meth;
+	  if (rplan)
+	    fftwf_destroy_plan (rplan);
+	  if (plan[0])
+	    fftwf_destroy_plan (plan[0]);
+	  if (plan[1])
+	    fftwf_destroy_plan (plan[1]);
+	  rplan = plan[0] = plan[1] = 0;
+	}
+    }
+  else
+    ret = UNKNOWN;
+  return ret;
+}
+
+fftwf_plan
+octave_float_fftw_planner::create_plan (int dir, const int rank,
+				  const dim_vector dims, octave_idx_type howmany,
+				  octave_idx_type stride, octave_idx_type dist, 
+				  const FloatComplex *in, FloatComplex *out)
+{
+  int which = (dir == FFTW_FORWARD) ? 0 : 1;
+  fftwf_plan *cur_plan_p = &plan[which];
+  bool create_new_plan = false;
+  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
+  bool ioinplace = (in == out);
+
+  // Don't create a new plan if we have a non SIMD plan already but
+  // can do SIMD.  This prevents endlessly recreating plans if we
+  // change the alignment.
+
+  if (plan[which] == 0 || d[which] != dist || s[which] != stride
+      || r[which] != rank || h[which] != howmany 
+      || ioinplace != inplace[which]
+      || ((ioalign != simd_align[which]) ? !ioalign : false))
+    create_new_plan = true;
+  else
+    {
+      // We still might not have the same shape of array.
+
+      for (int i = 0; i < rank; i++)
+	if (dims(i) != n[which](i))
+	  {
+	    create_new_plan = true;
+	    break;
+	  }
+    }
+
+  if (create_new_plan)
+    {
+      d[which] = dist;
+      s[which] = stride;
+      r[which] = rank;
+      h[which] = howmany;
+      simd_align[which] = ioalign;
+      inplace[which] = ioinplace;
+      n[which] = dims;
+
+      // Note reversal of dimensions for column major storage in FFTW.
+      octave_idx_type nn = 1;
+      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+
+      for (int i = 0, j = rank-1; i < rank; i++, j--)
+	{
+	  tmp[i] = dims(j);
+	  nn *= dims(j);
+	}
+
+      int plan_flags = 0;
+      bool plan_destroys_in = true;
+
+      switch (meth) 
+	{
+	case UNKNOWN:
+	case ESTIMATE:
+	  plan_flags |= FFTW_ESTIMATE;
+	  plan_destroys_in = false;
+	  break;
+	case MEASURE:
+	  plan_flags |= FFTW_MEASURE;
+	  break;
+	case PATIENT:
+	  plan_flags |= FFTW_PATIENT;
+	  break;
+	case EXHAUSTIVE:
+	  plan_flags |= FFTW_EXHAUSTIVE;
+	  break;
+	case HYBRID:
+	  if (nn < 8193)
+	    plan_flags |= FFTW_MEASURE;
+	  else
+	    {
+	      plan_flags |= FFTW_ESTIMATE;
+	      plan_destroys_in = false;
+	    }
+	  break;
+	}
+
+      if (ioalign)
+	plan_flags &= ~FFTW_UNALIGNED;
+      else
+	plan_flags |= FFTW_UNALIGNED;
+
+      if (*cur_plan_p)
+	fftwf_destroy_plan (*cur_plan_p);
 
+      if (plan_destroys_in)
+	{
+	  // Create matrix with the same size and 16-byte alignment as input
+	  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
+	  itmp = reinterpret_cast<FloatComplex *>
+	    (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + 
+	     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
+
+	  *cur_plan_p =
+	    fftwf_plan_many_dft (rank, tmp, howmany,
+	      reinterpret_cast<fftwf_complex *> (itmp),
+	      0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
+	      0, stride, dist, dir, plan_flags);
+	}
+      else
+	{
+	  *cur_plan_p =
+	    fftwf_plan_many_dft (rank, tmp, howmany,
+	      reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
+	      0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
+	      0, stride, dist, dir, plan_flags);
+	}
+
+      if (*cur_plan_p == 0)
+	(*current_liboctave_error_handler) ("Error creating fftw plan");
+    }
+
+  return *cur_plan_p;
+}
+ 
+fftwf_plan
+octave_float_fftw_planner::create_plan (const int rank, const dim_vector dims, 
+				  octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, 
+				  const float *in, FloatComplex *out)
+{
+  fftwf_plan *cur_plan_p = &rplan;
+  bool create_new_plan = false;
+  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
+
+  // Don't create a new plan if we have a non SIMD plan already but
+  // can do SIMD.  This prevents endlessly recreating plans if we
+  // change the alignment.
+
+  if (rplan == 0 || rd != dist || rs != stride || rr != rank
+      || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
+    create_new_plan = true;
+  else
+    {
+      // We still might not have the same shape of array.
+
+      for (int i = 0; i < rank; i++)
+	if (dims(i) != rn(i))
+	  {
+	    create_new_plan = true;
+	    break;
+	  }
+    }
+
+  if (create_new_plan)
+    {
+      rd = dist;
+      rs = stride;
+      rr = rank;
+      rh = howmany;
+      rsimd_align = ioalign;
+      rn = dims;
+
+      // Note reversal of dimensions for column major storage in FFTW.
+      octave_idx_type nn = 1;
+      OCTAVE_LOCAL_BUFFER (int, tmp, rank);
+
+      for (int i = 0, j = rank-1; i < rank; i++, j--)
+	{
+	  tmp[i] = dims(j);
+	  nn *= dims(j);
+	}
+
+      int plan_flags = 0;
+      bool plan_destroys_in = true;
+
+      switch (meth) 
+	{
+	case UNKNOWN:
+	case ESTIMATE:
+	  plan_flags |= FFTW_ESTIMATE;
+	  plan_destroys_in = false;
+	  break;
+	case MEASURE:
+	  plan_flags |= FFTW_MEASURE;
+	  break;
+	case PATIENT:
+	  plan_flags |= FFTW_PATIENT;
+	  break;
+	case EXHAUSTIVE:
+	  plan_flags |= FFTW_EXHAUSTIVE;
+	  break;
+	case HYBRID:
+	  if (nn < 8193)
+	    plan_flags |= FFTW_MEASURE;
+	  else
+	    {
+	      plan_flags |= FFTW_ESTIMATE;
+	      plan_destroys_in = false;
+	    }
+	  break;
+	}
+
+      if (ioalign)
+	plan_flags &= ~FFTW_UNALIGNED;
+      else
+	plan_flags |= FFTW_UNALIGNED;
+
+      if (*cur_plan_p)
+	fftwf_destroy_plan (*cur_plan_p);
+
+      if (plan_destroys_in)
+	{
+	  // Create matrix with the same size and 16-byte alignment as input
+	  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
+	  itmp = reinterpret_cast<float *>
+	    (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + 
+	     ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
+
+	  *cur_plan_p =
+	    fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
+	      0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
+	      0, stride, dist, plan_flags);
+	}
+      else
+	{
+	  *cur_plan_p =
+	    fftwf_plan_many_dft_r2c (rank, tmp, howmany,
+	      (const_cast<float *> (in)),
+	      0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
+	      0, stride, dist, plan_flags);
+	}
+
+      if (*cur_plan_p == 0)
+	(*current_liboctave_error_handler) ("Error creating fftw plan");
+    }
+
+  return *cur_plan_p;
+}
+
+octave_fftw_planner fftw_planner;
+octave_float_fftw_planner float_fftw_planner;
+
+template <class T>
 static inline void
-convert_packcomplex_1d (Complex *out, size_t nr, size_t nc,
+convert_packcomplex_1d (T *out, size_t nr, size_t nc,
 			octave_idx_type stride, octave_idx_type dist)
 {
   OCTAVE_QUIT;
@@ -357,14 +645,15 @@
   OCTAVE_QUIT;
 }
 
+template <class T>
 static inline void
-convert_packcomplex_Nd (Complex *out, const dim_vector &dv)
+convert_packcomplex_Nd (T *out, const dim_vector &dv)
 {
   size_t nc = dv(0);
   size_t nr = dv(1);
   size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1);
   size_t nrp = nr * np;
-  Complex *ptr1, *ptr2;
+  T *ptr1, *ptr2;
 
   OCTAVE_QUIT;
 
@@ -409,7 +698,7 @@
 	  for (size_t k = 0; k < jstart; k+= kstep)
 	    for (size_t l = nc/2+1; l < nc; l++)
 	      {
-		Complex tmp = out[i+ j + k + l];
+		T tmp = out[i+ j + k + l];
 		out[i + j + k + l] =  out[i + jj + k + l];
 		out[i + jj + k + l] = tmp;
 	      }
@@ -427,10 +716,10 @@
 
   dim_vector dv (npts);
   fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist,
-					     in, out);
+					      in, out);
 
   fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
-			reinterpret_cast<fftw_complex *> (out));
+			 reinterpret_cast<fftw_complex *> (out));
 
   // Need to create other half of the transform.
 
@@ -545,6 +834,133 @@
   return 0;
 }
 
+int
+octave_fftw::fft (const float *in, FloatComplex *out, size_t npts, 
+		  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
+{
+  dist = (dist < 0 ? npts : dist);
+
+  dim_vector dv (npts);
+  fftwf_plan plan = float_fftw_planner.create_plan (1, dv, nsamples, stride, dist,
+					     in, out);
+
+  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
+			reinterpret_cast<fftwf_complex *> (out));
+
+  // Need to create other half of the transform.
+
+  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
+
+  return 0;
+}
+
+int
+octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts, 
+		  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
+{
+  dist = (dist < 0 ? npts : dist);
+
+  dim_vector dv (npts);
+  fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples,
+					     stride, dist, in, out);
+
+  fftwf_execute_dft (plan, 
+	reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
+	reinterpret_cast<fftwf_complex *> (out));
+
+  return 0;
+}
+
+int
+octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts, 
+		   size_t nsamples, octave_idx_type stride, octave_idx_type dist)
+{
+  dist = (dist < 0 ? npts : dist);
+
+  dim_vector dv (npts);
+  fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples,
+					     stride, dist, in, out);
+
+  fftwf_execute_dft (plan, 
+	reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
+	reinterpret_cast<fftwf_complex *> (out));
+
+  const FloatComplex scale = npts;
+  for (size_t j = 0; j < nsamples; j++)
+    for (size_t i = 0; i < npts; i++)
+      out[i*stride + j*dist] /= scale;
+
+  return 0;
+}
+
+int
+octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank, 
+		    const dim_vector &dv)
+{
+  octave_idx_type dist = 1;
+  for (int i = 0; i < rank; i++)
+    dist *= dv(i);
+
+  // Fool with the position of the start of the output matrix, so that
+  // creating other half of the matrix won't cause cache problems.
+
+  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); 
+  
+  fftwf_plan plan = float_fftw_planner.create_plan (rank, dv, 1, 1, dist,
+					     in, out + offset);
+
+  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
+			reinterpret_cast<fftwf_complex *> (out+ offset));
+
+  // Need to create other half of the transform.
+
+  convert_packcomplex_Nd (out, dv);
+
+  return 0;
+}
+
+int
+octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, 
+		    const dim_vector &dv)
+{
+  octave_idx_type dist = 1;
+  for (int i = 0; i < rank; i++)
+    dist *= dv(i);
+
+  fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1,
+					     dist, in, out);
+
+  fftwf_execute_dft (plan, 
+	reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
+	reinterpret_cast<fftwf_complex *> (out));
+
+  return 0;
+}
+
+int
+octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, 
+		     const dim_vector &dv)
+{
+  octave_idx_type dist = 1;
+  for (int i = 0; i < rank; i++)
+    dist *= dv(i);
+
+  fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1,
+					     dist, in, out);
+
+  fftwf_execute_dft (plan, 
+	reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
+	reinterpret_cast<fftwf_complex *> (out));
+
+  const size_t npts = dv.numel ();
+  const FloatComplex scale = npts;
+  for (size_t i = 0; i < npts; i++)
+    out[i] /= scale;
+
+  return 0;
+}
+
+
 #endif
 
 /*