Mercurial > octave

--- a/liboctave/numeric/module.mk	Mon May 16 23:26:07 2016 -0400
+++ b/liboctave/numeric/module.mk	Mon May 16 23:20:50 2016 -0400
@@ -55,11 +55,6 @@
   liboctave/numeric/sparse-qr.h \
   liboctave/numeric/svd.h

-NUMERIC_C_SRC = \
-  liboctave/numeric/randgamma.c \
-  liboctave/numeric/randmtzig.c \
-  liboctave/numeric/randpoisson.c
-
 NUMERIC_SRC = \
   liboctave/numeric/CollocWt.cc \
   liboctave/numeric/DASPK.cc \
@@ -85,13 +80,15 @@
   liboctave/numeric/oct-spparms.cc \
   liboctave/numeric/qr.cc \
   liboctave/numeric/qrp.cc \
+  liboctave/numeric/randgamma.cc \
+  liboctave/numeric/randmtzig.cc \
+  liboctave/numeric/randpoisson.cc \
   liboctave/numeric/schur.cc \
   liboctave/numeric/sparse-chol.cc \
   liboctave/numeric/sparse-dmsolve.cc \
   liboctave/numeric/sparse-lu.cc \
   liboctave/numeric/sparse-qr.cc \
-  liboctave/numeric/svd.cc \
-  $(NUMERIC_C_SRC)
+  liboctave/numeric/svd.cc

 LIBOCTAVE_TEMPLATE_SRC += \
   liboctave/numeric/bsxfun-defs.cc
--- a/liboctave/numeric/randgamma.c	Mon May 16 23:26:07 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,201 +0,0 @@
-/*
-
-Copyright (C) 2006-2015 John W. Eaton
-
-This file is part of Octave.
-
-Octave is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-Octave is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Octave; see the file COPYING.  If not, see
-<http://www.gnu.org/licenses/>.
-
-*/
-
-/* Original version written by Paul Kienzle distributed as free
-   software in the in the public domain.  */
-
-/*
-
-double randg (a)
-void fill_randg (a,n,x)
-
-Generate a series of standard gamma distributions.
-
-See: Marsaglia G and Tsang W (2000), "A simple method for generating
-gamma variables", ACM Transactions on Mathematical Software 26(3) 363-372
-
-Needs the following defines:
-* NAN: value to return for Not-A-Number
-* RUNI: uniform generator on (0,1)
-* RNOR: normal generator
-* REXP: exponential generator, or -log(RUNI) if one isn't available
-* INFINITE: function to test whether a value is infinite
-
-Test using:
-  mean = a
-  variance = a
-  skewness = 2/sqrt(a)
-  kurtosis = 3 + 6/sqrt(a)
-
-Note that randg can be used to generate many distributions:
-
-gamma(a,b) for a>0, b>0 (from R)
-  r = b*randg(a)
-beta(a,b) for a>0, b>0
-  r1 = randg(a,1)
-  r = r1 / (r1 + randg(b,1))
-Erlang(a,n)
-  r = a*randg(n)
-chisq(df) for df>0
-  r = 2*randg(df/2)
-t(df) for 0<df<inf (use randn if df is infinite)
-  r = randn () / sqrt(2*randg(df/2)/df)
-F(n1,n2) for 0<n1, 0<n2
-  r1 = 2*randg(n1/2)/n1 or 1 if n1 is infinite
-  r2 = 2*randg(n2/2)/n2 or 1 if n2 is infinite
-  r = r1 / r2
-negative binonial (n, p) for n>0, 0<p<=1
-  r = randp((1-p)/p * randg(n))
-  (from R, citing Devroye(1986), Non-Uniform Random Variate Generation)
-non-central chisq(df,L), for df>=0 and L>0 (use chisq if L=0)
-  r = randp(L/2)
-  r(r>0) = 2*randg(r(r>0))
-  r(df>0) += 2*randg(df(df>0)/2)
-  (from R, citing formula 29.5b-c in Johnson, Kotz, Balkrishnan(1995))
-Dirichlet(a1,...,ak) for ai > 0
-  r = (randg(a1),...,randg(ak))
-  r = r / sum(r)
-  (from GSL, citing Law & Kelton(1991), Simulation Modeling and Analysis)
-*/
-
-#if defined (HAVE_CONFIG_H)
-#  include "config.h"
-#endif
-
-#include <stdio.h>
-
-#include "lo-ieee.h"
-#include "lo-math.h"
-#include "randmtzig.h"
-#include "randgamma.h"
-
-#undef NAN
-#define NAN octave_NaN
-#define INFINITE lo_ieee_isinf
-#define RUNI oct_randu()
-#define RNOR oct_randn()
-#define REXP oct_rande()
-
-void
-oct_fill_randg (double a, octave_idx_type n, double *r)
-{
-  octave_idx_type i;
-  /* If a < 1, start by generating gamma (1+a) */
-  const double d =  (a < 1. ? 1.+a : a) - 1./3.;
-  const double c = 1./sqrt (9.*d);
-
-  /* Handle invalid cases */
-  if (a <= 0 || INFINITE(a))
-    {
-      for (i=0; i < n; i++)
-        r[i] = NAN;
-      return;
-    }
-
-  for (i=0; i < n; i++)
-    {
-      double x, xsq, v, u;
-    restart:
-      x = RNOR;
-      v = (1+c*x);
-      v *= v*v;
-      if (v <= 0)
-        goto restart; /* rare, so don't bother moving up */
-      u = RUNI;
-      xsq = x*x;
-      if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v)))
-        goto restart;
-      r[i] = d*v;
-    }
-  if (a < 1)
-    {
-      /* Use gamma(a) = gamma(1+a)*U^(1/a) */
-      /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */
-      for (i = 0; i < n; i++)
-        r[i] *= exp (-REXP/a);
-    }
-}
-
-double
-oct_randg (double a)
-{
-  double ret;
-  oct_fill_randg (a,1,&ret);
-  return ret;
-}
-
-#undef NAN
-#undef RUNI
-#undef RNOR
-#undef REXP
-#define NAN octave_Float_NaN
-#define RUNI oct_float_randu()
-#define RNOR oct_float_randn()
-#define REXP oct_float_rande()
-
-void
-oct_fill_float_randg (float a, octave_idx_type n, float *r)
-{
-  octave_idx_type i;
-  /* If a < 1, start by generating gamma(1+a) */
-  const float d =  (a < 1. ? 1.+a : a) - 1./3.;
-  const float c = 1./sqrt (9.*d);
-
-  /* Handle invalid cases */
-  if (a <= 0 || INFINITE(a))
-    {
-      for (i=0; i < n; i++)
-        r[i] = NAN;
-      return;
-    }
-
-  for (i=0; i < n; i++)
-    {
-      float x, xsq, v, u;
-    frestart:
-      x = RNOR;
-      v = (1+c*x);
-      v *= v*v;
-      if (v <= 0)
-        goto frestart; /* rare, so don't bother moving up */
-      u = RUNI;
-      xsq = x*x;
-      if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v)))
-        goto frestart;
-      r[i] = d*v;
-    }
-  if (a < 1)
-    {
-      /* Use gamma(a) = gamma(1+a)*U^(1/a) */
-      /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */
-      for (i = 0; i < n; i++)
-        r[i] *= exp (-REXP/a);
-    }
-}
-
-float
-oct_float_randg (float a)
-{
-  float ret;
-  oct_fill_float_randg (a,1,&ret);
-  return ret;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/numeric/randgamma.cc	Mon May 16 23:20:50 2016 -0400
@@ -0,0 +1,199 @@
+/*
+
+Copyright (C) 2006-2015 John W. Eaton
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+/* Original version written by Paul Kienzle distributed as free
+   software in the in the public domain.  */
+
+/*
+
+double randg (a)
+void fill_randg (a,n,x)
+
+Generate a series of standard gamma distributions.
+
+See: Marsaglia G and Tsang W (2000), "A simple method for generating
+gamma variables", ACM Transactions on Mathematical Software 26(3) 363-372
+
+Needs the following defines:
+* NAN: value to return for Not-A-Number
+* RUNI: uniform generator on (0,1)
+* RNOR: normal generator
+* REXP: exponential generator, or -log(RUNI) if one isn't available
+* INFINITE: function to test whether a value is infinite
+
+Test using:
+  mean = a
+  variance = a
+  skewness = 2/sqrt(a)
+  kurtosis = 3 + 6/sqrt(a)
+
+Note that randg can be used to generate many distributions:
+
+gamma(a,b) for a>0, b>0 (from R)
+  r = b*randg(a)
+beta(a,b) for a>0, b>0
+  r1 = randg(a,1)
+  r = r1 / (r1 + randg(b,1))
+Erlang(a,n)
+  r = a*randg(n)
+chisq(df) for df>0
+  r = 2*randg(df/2)
+t(df) for 0<df<inf (use randn if df is infinite)
+  r = randn () / sqrt(2*randg(df/2)/df)
+F(n1,n2) for 0<n1, 0<n2
+  r1 = 2*randg(n1/2)/n1 or 1 if n1 is infinite
+  r2 = 2*randg(n2/2)/n2 or 1 if n2 is infinite
+  r = r1 / r2
+negative binonial (n, p) for n>0, 0<p<=1
+  r = randp((1-p)/p * randg(n))
+  (from R, citing Devroye(1986), Non-Uniform Random Variate Generation)
+non-central chisq(df,L), for df>=0 and L>0 (use chisq if L=0)
+  r = randp(L/2)
+  r(r>0) = 2*randg(r(r>0))
+  r(df>0) += 2*randg(df(df>0)/2)
+  (from R, citing formula 29.5b-c in Johnson, Kotz, Balkrishnan(1995))
+Dirichlet(a1,...,ak) for ai > 0
+  r = (randg(a1),...,randg(ak))
+  r = r / sum(r)
+  (from GSL, citing Law & Kelton(1991), Simulation Modeling and Analysis)
+*/
+
+#if defined (HAVE_CONFIG_H)
+#  include "config.h"
+#endif
+
+#include "lo-ieee.h"
+#include "lo-math.h"
+#include "randmtzig.h"
+#include "randgamma.h"
+
+#undef NAN
+#define NAN octave_NaN
+#define INFINITE lo_ieee_isinf
+#define RUNI oct_randu()
+#define RNOR oct_randn()
+#define REXP oct_rande()
+
+void
+oct_fill_randg (double a, octave_idx_type n, double *r)
+{
+  octave_idx_type i;
+  /* If a < 1, start by generating gamma (1+a) */
+  const double d =  (a < 1. ? 1.+a : a) - 1./3.;
+  const double c = 1./sqrt (9.*d);
+
+  /* Handle invalid cases */
+  if (a <= 0 || INFINITE(a))
+    {
+      for (i=0; i < n; i++)
+        r[i] = NAN;
+      return;
+    }
+
+  for (i=0; i < n; i++)
+    {
+      double x, xsq, v, u;
+    restart:
+      x = RNOR;
+      v = (1+c*x);
+      v *= v*v;
+      if (v <= 0)
+        goto restart; /* rare, so don't bother moving up */
+      u = RUNI;
+      xsq = x*x;
+      if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v)))
+        goto restart;
+      r[i] = d*v;
+    }
+  if (a < 1)
+    {
+      /* Use gamma(a) = gamma(1+a)*U^(1/a) */
+      /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */
+      for (i = 0; i < n; i++)
+        r[i] *= exp (-REXP/a);
+    }
+}
+
+double
+oct_randg (double a)
+{
+  double ret;
+  oct_fill_randg (a,1,&ret);
+  return ret;
+}
+
+#undef NAN
+#undef RUNI
+#undef RNOR
+#undef REXP
+#define NAN octave_Float_NaN
+#define RUNI oct_float_randu()
+#define RNOR oct_float_randn()
+#define REXP oct_float_rande()
+
+void
+oct_fill_float_randg (float a, octave_idx_type n, float *r)
+{
+  octave_idx_type i;
+  /* If a < 1, start by generating gamma(1+a) */
+  const float d =  (a < 1. ? 1.+a : a) - 1./3.;
+  const float c = 1./sqrt (9.*d);
+
+  /* Handle invalid cases */
+  if (a <= 0 || INFINITE(a))
+    {
+      for (i=0; i < n; i++)
+        r[i] = NAN;
+      return;
+    }
+
+  for (i=0; i < n; i++)
+    {
+      float x, xsq, v, u;
+    frestart:
+      x = RNOR;
+      v = (1+c*x);
+      v *= v*v;
+      if (v <= 0)
+        goto frestart; /* rare, so don't bother moving up */
+      u = RUNI;
+      xsq = x*x;
+      if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v)))
+        goto frestart;
+      r[i] = d*v;
+    }
+  if (a < 1)
+    {
+      /* Use gamma(a) = gamma(1+a)*U^(1/a) */
+      /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */
+      for (i = 0; i < n; i++)
+        r[i] *= exp (-REXP/a);
+    }
+}
+
+float
+oct_float_randg (float a)
+{
+  float ret;
+  oct_fill_float_randg (a,1,&ret);
+  return ret;
+}
--- a/liboctave/numeric/randgamma.h	Mon May 16 23:26:07 2016 -0400
+++ b/liboctave/numeric/randgamma.h	Mon May 16 23:20:50 2016 -0400
@@ -28,10 +28,6 @@

 #include "octave-config.h"

-#ifdef __cplusplus
-extern "C" {
-#endif
-
 extern OCTAVE_API double oct_randg (double a);
 extern OCTAVE_API void oct_fill_randg (double a, octave_idx_type n, double *p);

@@ -39,8 +35,4 @@
 extern OCTAVE_API void oct_fill_float_randg (float a, octave_idx_type n,
                                              float *p);

-#ifdef __cplusplus
-}
 #endif
-
-#endif
--- a/liboctave/numeric/randmtzig.c	Mon May 16 23:26:07 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,870 +0,0 @@
-/*
-
-Copyright (C) 2006-2015 John W. Eaton
-
-This file is part of Octave.
-
-Octave is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-Octave is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Octave; see the file COPYING.  If not, see
-<http://www.gnu.org/licenses/>.
-
-*/
-
-/*
-   A C-program for MT19937, with initialization improved 2002/2/10.
-   Coded by Takuji Nishimura and Makoto Matsumoto.
-   This is a faster version by taking Shawn Cokus's optimization,
-   Matthe Bellew's simplification, Isaku Wada's real version.
-   David Bateman added normal and exponential distributions following
-   Marsaglia and Tang's Ziggurat algorithm.
-
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   Copyright (C) 2004, David Bateman
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-
-     3. The names of its contributors may not be used to endorse or promote
-        products derived from this software without specific prior written
-        permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-   Any feedback is very welcome.
-   http://www.math.keio.ac.jp/matumoto/emt.html
-   email: matumoto@math.keio.ac.jp
-
-   * 2004-01-19 Paul Kienzle
-   * * comment out main
-   * add init_by_entropy, get_state, set_state
-   * * converted to allow compiling by C++ compiler
-   *
-   * 2004-01-25 David Bateman
-   * * Add Marsaglia and Tsang Ziggurat code
-   *
-   * 2004-07-13 Paul Kienzle
-   * * make into an independent library with some docs.
-   * * introduce new main and test code.
-   *
-   * 2004-07-28 Paul Kienzle & David Bateman
-   * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa
-   * * make the naming scheme more uniform
-   * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch.
-   *
-   * 2005-02-23 Paul Kienzle
-   * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control
-   *
-   * 2006-04-01 David Bateman
-   * * convert for use in octave, declaring static functions only used
-   *   here and adding oct_ to functions visible externally
-   * * inverse sense of ALLBITS
-   *
-   * 2012-05-18 David Bateman
-   * * Remove randu64 and ALLBIT option
-   * * Add the single precision generators
-   */
-
-/*
-   === Build instructions ===
-
-   Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is
-   available.  This is not necessary if your architecture has
-   /dev/urandom defined.
-
-   Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86.
-   You can force X86 behavior with -DUSE_X86_32=1, or suppress it with
-   -DUSE_X86_32=0. You should also consider -march=i686 or similar for
-   extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit
-   x86 architectures.
-
-   If you want to replace the Mersenne Twister with another
-   generator then redefine randi32 appropriately.
-
-   === Usage instructions ===
-   Before using any of the generators, initialize the state with one of
-   oct_init_by_int, oct_init_by_array or oct_init_by_entropy.
-
-   All generators share the same state vector.
-
-   === Mersenne Twister ===
-   void oct_init_by_int (uint32_t s)           32-bit initial state
-   void oct_init_by_array (uint32_t k[],int m) m*32-bit initial state
-   void oct_init_by_entropy (void)             random initial state
-   void oct_get_state (uint32_t save[MT_N+1])  saves state in array
-   void oct_set_state (uint32_t save[MT_N+1])  restores state from array
-   static uint32_t randmt (void)               returns 32-bit unsigned int
-
-   === inline generators ===
-   static uint32_t randi32 (void)   returns 32-bit unsigned int
-   static uint64_t randi53 (void)   returns 53-bit unsigned int
-   static uint64_t randi54 (void)   returns 54-bit unsigned int
-   static float randu32 (void)      returns 32-bit uniform in (0,1)
-   static double randu53 (void)     returns 53-bit uniform in (0,1)
-
-   double oct_randu (void)       returns M-bit uniform in (0,1)
-   double oct_randn (void)       returns M-bit standard normal
-   double oct_rande (void)       returns N-bit standard exponential
-
-   float oct_float_randu (void)       returns M-bit uniform in (0,1)
-   float oct_float_randn (void)       returns M-bit standard normal
-   float oct_float_rande (void)       returns N-bit standard exponential
-
-   === Array generators ===
-   void oct_fill_randi32 (octave_idx_type, uint32_t [])
-   void oct_fill_randi64 (octave_idx_type, uint64_t [])
-
-   void oct_fill_randu (octave_idx_type, double [])
-   void oct_fill_randn (octave_idx_type, double [])
-   void oct_fill_rande (octave_idx_type, double [])
-
-   void oct_fill_float_randu (octave_idx_type, float [])
-   void oct_fill_float_randn (octave_idx_type, float [])
-   void oct_fill_float_rande (octave_idx_type, float [])
-*/
-
-#if defined (HAVE_CONFIG_H)
-#  include "config.h"
-#endif
-
-#include <stdio.h>
-#include <time.h>
-
-#ifdef HAVE_GETTIMEOFDAY
-#  include <sys/time.h>
-#endif
-
-#include "lo-math.h"
-#include "randmtzig.h"
-
-/* FIXME: may want to suppress X86 if sizeof(long) > 4 */
-#if ! defined (USE_X86_32)
-#  if defined (i386) || defined (HAVE_X86_32)
-#    define USE_X86_32 1
-#  else
-#    define USE_X86_32 0
-#  endif
-#endif
-
-/* ===== Mersenne Twister 32-bit generator ===== */
-
-#define MT_M 397
-#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
-#define UMASK 0x80000000UL /* most significant w-r bits */
-#define LMASK 0x7fffffffUL /* least significant r bits */
-#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )
-#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL))
-
-static uint32_t *next;
-static uint32_t state[MT_N]; /* the array for the state vector  */
-static int left = 1;
-static int initf = 0;
-static int initt = 1;
-static int inittf = 1;
-
-/* initializes state[MT_N] with a seed */
-void
-oct_init_by_int (uint32_t s)
-{
-  int j;
-  state[0] = s & 0xffffffffUL;
-  for (j = 1; j < MT_N; j++)
-    {
-      state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j);
-      /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-      /* In the previous versions, MSBs of the seed affect   */
-      /* only MSBs of the array state[].                        */
-      /* 2002/01/09 modified by Makoto Matsumoto             */
-      state[j] &= 0xffffffffUL;  /* for >32 bit machines */
-    }
-  left = 1;
-  initf = 1;
-}
-
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-void
-oct_init_by_array (uint32_t *init_key, int key_length)
-{
-  int i, j, k;
-  oct_init_by_int (19650218UL);
-  i = 1;
-  j = 0;
-  k = (MT_N > key_length ? MT_N : key_length);
-  for (; k; k--)
-    {
-      state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL))
-                 + init_key[j] + j; /* non linear */
-      state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-      i++;
-      j++;
-      if (i >= MT_N)
-        {
-          state[0] = state[MT_N-1];
-          i = 1;
-        }
-      if (j >= key_length)
-        j = 0;
-    }
-  for (k = MT_N - 1; k; k--)
-    {
-      state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL))
-                 - i; /* non linear */
-      state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-      i++;
-      if (i >= MT_N)
-        {
-          state[0] = state[MT_N-1];
-          i = 1;
-        }
-    }
-
-  state[0] = 0x80000000UL; /* MSB is 1; assuring nonzero initial array */
-  left = 1;
-  initf = 1;
-}
-
-void
-oct_init_by_entropy (void)
-{
-  uint32_t entropy[MT_N];
-  int n = 0;
-
-  /* Look for entropy in /dev/urandom */
-  FILE* urandom = fopen ("/dev/urandom", "rb");
-  if (urandom)
-    {
-      while (n < MT_N)
-        {
-          unsigned char word[4];
-          if (fread (word, 4, 1, urandom) != 1)
-            break;
-          entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+((uint32_t)word[3]<<24);
-        }
-      fclose (urandom);
-    }
-
-  /* If there isn't enough entropy, gather some from various sources */
-  if (n < MT_N)
-    entropy[n++] = time (NULL); /* Current time in seconds */
-  if (n < MT_N)
-    entropy[n++] = clock ();    /* CPU time used (usec) */
-#ifdef HAVE_GETTIMEOFDAY
-  if (n < MT_N)
-    {
-      struct timeval tv;
-      if (gettimeofday (&tv, NULL) != -1)
-        entropy[n++] = tv.tv_usec;   /* Fractional part of current time */
-    }
-#endif
-  /* Send all the entropy into the initial state vector */
-  oct_init_by_array (entropy,n);
-}
-
-void
-oct_set_state (uint32_t *save)
-{
-  int i;
-  for (i = 0; i < MT_N; i++)
-    state[i] = save[i];
-  left = save[MT_N];
-  next = state + (MT_N - left + 1);
-}
-
-void
-oct_get_state (uint32_t *save)
-{
-  int i;
-  for (i = 0; i < MT_N; i++)
-    save[i] = state[i];
-  save[MT_N] = left;
-}
-
-static void
-next_state (void)
-{
-  uint32_t *p = state;
-  int j;
-
-  /* if init_by_int() has not been called, */
-  /* a default initial seed is used         */
-  /* if (initf==0) init_by_int(5489UL); */
-  /* Or better yet, a random seed! */
-  if (initf == 0)
-    oct_init_by_entropy ();
-
-  left = MT_N;
-  next = state;
-
-  for (j = MT_N - MT_M + 1; --j; p++)
-    *p = p[MT_M] ^ TWIST(p[0], p[1]);
-
-  for (j = MT_M; --j; p++)
-    *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]);
-
-  *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]);
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-static uint32_t
-randmt (void)
-{
-  uint32_t y;
-
-  if (--left == 0)
-    next_state ();
-  y = *next++;
-
-  /* Tempering */
-  y ^= (y >> 11);
-  y ^= (y << 7) & 0x9d2c5680UL;
-  y ^= (y << 15) & 0xefc60000UL;
-  return (y ^ (y >> 18));
-}
-
-/* ===== Uniform generators ===== */
-
-/* Select which 32 bit generator to use */
-#define randi32 randmt
-
-static uint64_t
-randi53 (void)
-{
-  const uint32_t lo = randi32 ();
-  const uint32_t hi = randi32 () & 0x1FFFFF;
-#ifdef HAVE_X86_32
-  uint64_t u;
-  uint32_t *p = (uint32_t *)&u;
-  p[0] = lo;
-  p[1] = hi;
-  return u;
-#else
-  return (( (uint64_t)hi << 32) | lo);
-#endif
-}
-
-static uint64_t
-randi54 (void)
-{
-  const uint32_t lo = randi32 ();
-  const uint32_t hi = randi32 () & 0x3FFFFF;
-#ifdef HAVE_X86_32
-  uint64_t u;
-  uint32_t *p = (uint32_t *)&u;
-  p[0] = lo;
-  p[1] = hi;
-  return u;
-#else
-  return (( (uint64_t)hi << 32 ) | lo);
-#endif
-}
-
-/* generates a random number on (0,1)-real-interval */
-static float
-randu32 (void)
-{
-  return ((float)randi32 () + 0.5) * (1.0/4294967296.0);
-  /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1) with 53-bit resolution */
-static double
-randu53 (void)
-{
-  const uint32_t a = randi32 () >> 5;
-  const uint32_t b = randi32 () >> 6;
-  return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0);
-}
-
-/* Determine mantissa for uniform doubles */
-double
-oct_randu (void)
-{
-  return randu53 ();
-}
-
-/* Determine mantissa for uniform floats */
-float
-oct_float_randu (void)
-{
-  return randu32 ();
-}
-
-/* ===== Ziggurat normal and exponential generators ===== */
-
-#define ZIGGURAT_TABLE_SIZE 256
-
-#define ZIGGURAT_NOR_R 3.6541528853610088
-#define ZIGGURAT_NOR_INV_R 0.27366123732975828
-#define NOR_SECTION_AREA 0.00492867323399
-
-#define ZIGGURAT_EXP_R 7.69711747013104972
-#define ZIGGURAT_EXP_INV_R 0.129918765548341586
-#define EXP_SECTION_AREA 0.0039496598225815571993
-
-#define ZIGINT uint64_t
-#define EMANTISSA 9007199254740992.0  /* 53 bit mantissa */
-#define ERANDI randi53() /* 53 bits for mantissa */
-#define NMANTISSA EMANTISSA
-#define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */
-#define RANDU randu53()
-
-static ZIGINT ki[ZIGGURAT_TABLE_SIZE];
-static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE];
-static ZIGINT ke[ZIGGURAT_TABLE_SIZE];
-static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE];
-
-/*
-This code is based on the paper Marsaglia and Tsang, "The ziggurat method
-for generating random variables", Journ. Statistical Software. Code was
-presented in this paper for a Ziggurat of 127 levels and using a 32 bit
-integer random number generator. This version of the code, uses the
-Mersenne Twister as the integer generator and uses 256 levels in the
-Ziggurat. This has several advantages.
-
-  1) As Marsaglia and Tsang themselves states, the more levels the few
-     times the expensive tail algorithm must be called
-  2) The cycle time of the generator is determined by the integer
-     generator, thus the use of a Mersenne Twister for the core random
-     generator makes this cycle extremely long.
-  3) The license on the original code was unclear, thus rewriting the code
-     from the article means we are free of copyright issues.
-  4) Compile flag for full 53-bit random mantissa.
-
-It should be stated that the authors made my life easier, by the fact that
-the algorithm developed in the text of the article is for a 256 level
-ziggurat, even if the code itself isn't...
-
-One modification to the algorithm developed in the article, is that it is
-assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in
-terms like 2^32 in the code. As the normal distribution is defined between
--Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus
-in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for
-this term.  The exponential distribution is one sided so we use the
-full 32 bits.  We use EMANTISSA for this term.
-
-It appears that I'm slightly slower than the code in the article, this
-is partially due to a better generator of random integers than they
-use. But might also be that the case of rapid return was optimized by
-inlining the relevant code with a #define. As the basic Mersenne
-Twister is only 25% faster than this code I suspect that the main
-reason is just the use of the Mersenne Twister and not the inlining,
-so I'm not going to try and optimize further.
-*/
-
-static void
-create_ziggurat_tables (void)
-{
-  int i;
-  double x, x1;
-
-  /* Ziggurat tables for the normal distribution */
-  x1 = ZIGGURAT_NOR_R;
-  wi[255] = x1 / NMANTISSA;
-  fi[255] = exp (-0.5 * x1 * x1);
-
-  /* Index zero is special for tail strip, where Marsaglia and Tsang
-   * defines this as
-   * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
-   * where v is the area of each strip of the ziggurat.
-   */
-  ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA);
-  wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA;
-  fi[0] = 1.;
-
-  for (i = 254; i > 0; i--)
-    {
-      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
-       * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
-       */
-      x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + fi[i+1]));
-      ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
-      wi[i] = x / NMANTISSA;
-      fi[i] = exp (-0.5 * x * x);
-      x1 = x;
-    }
-
-  ki[1] = 0;
-
-  /* Zigurrat tables for the exponential distribution */
-  x1 = ZIGGURAT_EXP_R;
-  we[255] = x1 / EMANTISSA;
-  fe[255] = exp (-x1);
-
-  /* Index zero is special for tail strip, where Marsaglia and Tsang
-   * defines this as
-   * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
-   * where v is the area of each strip of the ziggurat.
-   */
-  ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA);
-  we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA;
-  fe[0] = 1.;
-
-  for (i = 254; i > 0; i--)
-    {
-      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
-       * need inverse operator of y = exp(-x) -> x = -ln(y)
-       */
-      x = - log (EXP_SECTION_AREA / x1 + fe[i+1]);
-      ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
-      we[i] = x / EMANTISSA;
-      fe[i] = exp (-x);
-      x1 = x;
-    }
-  ke[1] = 0;
-
-  initt = 0;
-}
-
-/*
- * Here is the guts of the algorithm. As Marsaglia and Tsang state the
- * algorithm in their paper
- *
- * 1) Calculate a random signed integer j and let i be the index
- *     provided by the rightmost 8-bits of j
- * 2) Set x = j * w_i. If j < k_i return x
- * 3) If i = 0, then return x from the tail
- * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
- * 5) goto step 1
- *
- * Where f is the functional form of the distribution, which for a normal
- * distribution is exp(-0.5*x*x)
- */
-
-double
-oct_randn (void)
-{
-  if (initt)
-    create_ziggurat_tables ();
-
-  while (1)
-    {
-      /* The following code is specialized for 32-bit mantissa.
-       * Compared to the arbitrary mantissa code, there is a performance
-       * gain for 32-bits:  PPC: 2%, MIPS: 8%, x86: 40%
-       * There is a bigger performance gain compared to using a full
-       * 53-bit mantissa:  PPC: 60%, MIPS: 65%, x86: 240%
-       * Of course, different compilers and operating systems may
-       * have something to do with this.
-       */
-# ifdef HAVE_X86_32
-      /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */
-      double x;
-      int si,idx;
-      uint32_t lo, hi;
-      int64_t rabs;
-      uint32_t *p = (uint32_t *)&rabs;
-      lo = randi32 ();
-      idx = lo & 0xFF;
-      hi = randi32 ();
-      si = hi & UMASK;
-      p[0] = lo;
-      p[1] = hi & 0x1FFFFF;
-      x = ( si ? -rabs : rabs ) * wi[idx];
-# else /* ! HAVE_X86_32 */
-      /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */
-      const uint64_t r = NRANDI;
-      const int64_t rabs = r >> 1;
-      const int idx = (int)(rabs & 0xFF);
-      const double x = ( (r & 1) ? -rabs : rabs) * wi[idx];
-# endif /* ! HAVE_X86_32 */
-      if (rabs < (int64_t)ki[idx])
-        return x;        /* 99.3% of the time we return here 1st try */
-      else if (idx == 0)
-        {
-          /* As stated in Marsaglia and Tsang
-           *
-           * For the normal tail, the method of Marsaglia[5] provides:
-           * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
-           * then return r+x. Except that r+x is always in the positive
-           * tail!!!! Any thing random might be used to determine the
-           * sign, but as we already have r we might as well use it
-           *
-           * [PAK] but not the bottom 8 bits, since they are all 0 here!
-           */
-          double xx, yy;
-          do
-            {
-              xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
-              yy = - log (RANDU);
-            }
-          while ( yy+yy <= xx*xx);
-          return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
-        }
-      else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp (-0.5*x*x))
-        return x;
-    }
-}
-
-double
-oct_rande (void)
-{
-  if (initt)
-    create_ziggurat_tables ();
-
-  while (1)
-    {
-      ZIGINT ri = ERANDI;
-      const int idx = (int)(ri & 0xFF);
-      const double x = ri * we[idx];
-      if (ri < ke[idx])
-        return x;               /* 98.9% of the time we return here 1st try */
-      else if (idx == 0)
-        {
-          /* As stated in Marsaglia and Tsang
-           *
-           * For the exponential tail, the method of Marsaglia[5] provides:
-           * x = r - ln(U);
-           */
-          return ZIGGURAT_EXP_R - log (RANDU);
-        }
-      else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp (-x))
-        return x;
-    }
-}
-
-#undef ZIGINT
-#undef EMANTISSA
-#undef ERANDI
-#undef NMANTISSA
-#undef NRANDI
-#undef RANDU
-
-#define ZIGINT uint32_t
-#define EMANTISSA 4294967296.0 /* 32 bit mantissa */
-#define ERANDI randi32() /* 32 bits for mantissa */
-#define NMANTISSA 2147483648.0 /* 31 bit mantissa */
-#define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */
-#define RANDU randu32()
-
-static ZIGINT fki[ZIGGURAT_TABLE_SIZE];
-static float fwi[ZIGGURAT_TABLE_SIZE], ffi[ZIGGURAT_TABLE_SIZE];
-static ZIGINT fke[ZIGGURAT_TABLE_SIZE];
-static float fwe[ZIGGURAT_TABLE_SIZE], ffe[ZIGGURAT_TABLE_SIZE];
-
-
-static void
-create_ziggurat_float_tables (void)
-{
-  int i;
-  float x, x1;
-
-  /* Ziggurat tables for the normal distribution */
-  x1 = ZIGGURAT_NOR_R;
-  fwi[255] = x1 / NMANTISSA;
-  ffi[255] = exp (-0.5 * x1 * x1);
-
-  /* Index zero is special for tail strip, where Marsaglia and Tsang
-   * defines this as
-   * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
-   * where v is the area of each strip of the ziggurat.
-   */
-  fki[0] = (ZIGINT) (x1 * ffi[255] / NOR_SECTION_AREA * NMANTISSA);
-  fwi[0] = NOR_SECTION_AREA / ffi[255] / NMANTISSA;
-  ffi[0] = 1.;
-
-  for (i = 254; i > 0; i--)
-    {
-      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
-       * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
-       */
-      x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + ffi[i+1]));
-      fki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
-      fwi[i] = x / NMANTISSA;
-      ffi[i] = exp (-0.5 * x * x);
-      x1 = x;
-    }
-
-  fki[1] = 0;
-
-  /* Zigurrat tables for the exponential distribution */
-  x1 = ZIGGURAT_EXP_R;
-  fwe[255] = x1 / EMANTISSA;
-  ffe[255] = exp (-x1);
-
-  /* Index zero is special for tail strip, where Marsaglia and Tsang
-   * defines this as
-   * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
-   * where v is the area of each strip of the ziggurat.
-   */
-  fke[0] = (ZIGINT) (x1 * ffe[255] / EXP_SECTION_AREA * EMANTISSA);
-  fwe[0] = EXP_SECTION_AREA / ffe[255] / EMANTISSA;
-  ffe[0] = 1.;
-
-  for (i = 254; i > 0; i--)
-    {
-      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
-       * need inverse operator of y = exp(-x) -> x = -ln(y)
-       */
-      x = - log (EXP_SECTION_AREA / x1 + ffe[i+1]);
-      fke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
-      fwe[i] = x / EMANTISSA;
-      ffe[i] = exp (-x);
-      x1 = x;
-    }
-  fke[1] = 0;
-
-  inittf = 0;
-}
-
-/*
- * Here is the guts of the algorithm. As Marsaglia and Tsang state the
- * algorithm in their paper
- *
- * 1) Calculate a random signed integer j and let i be the index
- *     provided by the rightmost 8-bits of j
- * 2) Set x = j * w_i. If j < k_i return x
- * 3) If i = 0, then return x from the tail
- * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
- * 5) goto step 1
- *
- * Where f is the functional form of the distribution, which for a normal
- * distribution is exp(-0.5*x*x)
- */
-
-float
-oct_float_randn (void)
-{
-  if (inittf)
-    create_ziggurat_float_tables ();
-
-  while (1)
-    {
-      /* 32-bit mantissa */
-      const uint32_t r = randi32 ();
-      const uint32_t rabs = r & LMASK;
-      const int idx = (int)(r & 0xFF);
-      const float x = ((int32_t)r) * fwi[idx];
-      if (rabs < fki[idx])
-        return x;        /* 99.3% of the time we return here 1st try */
-      else if (idx == 0)
-        {
-          /* As stated in Marsaglia and Tsang
-           *
-           * For the normal tail, the method of Marsaglia[5] provides:
-           * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
-           * then return r+x. Except that r+x is always in the positive
-           * tail!!!! Any thing random might be used to determine the
-           * sign, but as we already have r we might as well use it
-           *
-           * [PAK] but not the bottom 8 bits, since they are all 0 here!
-           */
-          float xx, yy;
-          do
-            {
-              xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
-              yy = - log (RANDU);
-            }
-          while ( yy+yy <= xx*xx);
-          return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
-        }
-      else if ((ffi[idx-1] - ffi[idx]) * RANDU + ffi[idx] < exp (-0.5*x*x))
-        return x;
-    }
-}
-
-float
-oct_float_rande (void)
-{
-  if (inittf)
-    create_ziggurat_float_tables ();
-
-  while (1)
-    {
-      ZIGINT ri = ERANDI;
-      const int idx = (int)(ri & 0xFF);
-      const float x = ri * fwe[idx];
-      if (ri < fke[idx])
-        return x;               /* 98.9% of the time we return here 1st try */
-      else if (idx == 0)
-        {
-          /* As stated in Marsaglia and Tsang
-           *
-           * For the exponential tail, the method of Marsaglia[5] provides:
-           * x = r - ln(U);
-           */
-          return ZIGGURAT_EXP_R - log (RANDU);
-        }
-      else if ((ffe[idx-1] - ffe[idx]) * RANDU + ffe[idx] < exp (-x))
-        return x;
-    }
-}
-
-/* Array generators */
-void
-oct_fill_randu (octave_idx_type n, double *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_randu ();
-}
-
-void
-oct_fill_randn (octave_idx_type n, double *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_randn ();
-}
-
-void
-oct_fill_rande (octave_idx_type n, double *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_rande ();
-}
-
-void
-oct_fill_float_randu (octave_idx_type n, float *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_float_randu ();
-}
-
-void
-oct_fill_float_randn (octave_idx_type n, float *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_float_randn ();
-}
-
-void
-oct_fill_float_rande (octave_idx_type n, float *p)
-{
-  octave_idx_type i;
-  for (i = 0; i < n; i++)
-    p[i] = oct_float_rande ();
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/numeric/randmtzig.cc	Mon May 16 23:20:50 2016 -0400
@@ -0,0 +1,870 @@
+/*
+
+Copyright (C) 2006-2015 John W. Eaton
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+/*
+   A C-program for MT19937, with initialization improved 2002/2/10.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+   This is a faster version by taking Shawn Cokus's optimization,
+   Matthe Bellew's simplification, Isaku Wada's real version.
+   David Bateman added normal and exponential distributions following
+   Marsaglia and Tang's Ziggurat algorithm.
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   Copyright (C) 2004, David Bateman
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.keio.ac.jp/matumoto/emt.html
+   email: matumoto@math.keio.ac.jp
+
+   * 2004-01-19 Paul Kienzle
+   * * comment out main
+   * add init_by_entropy, get_state, set_state
+   * * converted to allow compiling by C++ compiler
+   *
+   * 2004-01-25 David Bateman
+   * * Add Marsaglia and Tsang Ziggurat code
+   *
+   * 2004-07-13 Paul Kienzle
+   * * make into an independent library with some docs.
+   * * introduce new main and test code.
+   *
+   * 2004-07-28 Paul Kienzle & David Bateman
+   * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa
+   * * make the naming scheme more uniform
+   * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch.
+   *
+   * 2005-02-23 Paul Kienzle
+   * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control
+   *
+   * 2006-04-01 David Bateman
+   * * convert for use in octave, declaring static functions only used
+   *   here and adding oct_ to functions visible externally
+   * * inverse sense of ALLBITS
+   *
+   * 2012-05-18 David Bateman
+   * * Remove randu64 and ALLBIT option
+   * * Add the single precision generators
+   */
+
+/*
+   === Build instructions ===
+
+   Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is
+   available.  This is not necessary if your architecture has
+   /dev/urandom defined.
+
+   Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86.
+   You can force X86 behavior with -DUSE_X86_32=1, or suppress it with
+   -DUSE_X86_32=0. You should also consider -march=i686 or similar for
+   extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit
+   x86 architectures.
+
+   If you want to replace the Mersenne Twister with another
+   generator then redefine randi32 appropriately.
+
+   === Usage instructions ===
+   Before using any of the generators, initialize the state with one of
+   oct_init_by_int, oct_init_by_array or oct_init_by_entropy.
+
+   All generators share the same state vector.
+
+   === Mersenne Twister ===
+   void oct_init_by_int (uint32_t s)           32-bit initial state
+   void oct_init_by_array (uint32_t k[],int m) m*32-bit initial state
+   void oct_init_by_entropy (void)             random initial state
+   void oct_get_state (uint32_t save[MT_N+1])  saves state in array
+   void oct_set_state (uint32_t save[MT_N+1])  restores state from array
+   static uint32_t randmt (void)               returns 32-bit unsigned int
+
+   === inline generators ===
+   static uint32_t randi32 (void)   returns 32-bit unsigned int
+   static uint64_t randi53 (void)   returns 53-bit unsigned int
+   static uint64_t randi54 (void)   returns 54-bit unsigned int
+   static float randu32 (void)      returns 32-bit uniform in (0,1)
+   static double randu53 (void)     returns 53-bit uniform in (0,1)
+
+   double oct_randu (void)       returns M-bit uniform in (0,1)
+   double oct_randn (void)       returns M-bit standard normal
+   double oct_rande (void)       returns N-bit standard exponential
+
+   float oct_float_randu (void)       returns M-bit uniform in (0,1)
+   float oct_float_randn (void)       returns M-bit standard normal
+   float oct_float_rande (void)       returns N-bit standard exponential
+
+   === Array generators ===
+   void oct_fill_randi32 (octave_idx_type, uint32_t [])
+   void oct_fill_randi64 (octave_idx_type, uint64_t [])
+
+   void oct_fill_randu (octave_idx_type, double [])
+   void oct_fill_randn (octave_idx_type, double [])
+   void oct_fill_rande (octave_idx_type, double [])
+
+   void oct_fill_float_randu (octave_idx_type, float [])
+   void oct_fill_float_randn (octave_idx_type, float [])
+   void oct_fill_float_rande (octave_idx_type, float [])
+*/
+
+#if defined (HAVE_CONFIG_H)
+#  include "config.h"
+#endif
+
+#include <cstdio>
+#include <ctime>
+
+#ifdef HAVE_GETTIMEOFDAY
+#  include <sys/time.h>
+#endif
+
+#include "lo-math.h"
+#include "randmtzig.h"
+
+/* FIXME: may want to suppress X86 if sizeof(long) > 4 */
+#if ! defined (USE_X86_32)
+#  if defined (i386) || defined (HAVE_X86_32)
+#    define USE_X86_32 1
+#  else
+#    define USE_X86_32 0
+#  endif
+#endif
+
+/* ===== Mersenne Twister 32-bit generator ===== */
+
+#define MT_M 397
+#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
+#define UMASK 0x80000000UL /* most significant w-r bits */
+#define LMASK 0x7fffffffUL /* least significant r bits */
+#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )
+#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL))
+
+static uint32_t *next;
+static uint32_t state[MT_N]; /* the array for the state vector  */
+static int left = 1;
+static int initf = 0;
+static int initt = 1;
+static int inittf = 1;
+
+/* initializes state[MT_N] with a seed */
+void
+oct_init_by_int (uint32_t s)
+{
+  int j;
+  state[0] = s & 0xffffffffUL;
+  for (j = 1; j < MT_N; j++)
+    {
+      state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j);
+      /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+      /* In the previous versions, MSBs of the seed affect   */
+      /* only MSBs of the array state[].                        */
+      /* 2002/01/09 modified by Makoto Matsumoto             */
+      state[j] &= 0xffffffffUL;  /* for >32 bit machines */
+    }
+  left = 1;
+  initf = 1;
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+void
+oct_init_by_array (uint32_t *init_key, int key_length)
+{
+  int i, j, k;
+  oct_init_by_int (19650218UL);
+  i = 1;
+  j = 0;
+  k = (MT_N > key_length ? MT_N : key_length);
+  for (; k; k--)
+    {
+      state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL))
+                 + init_key[j] + j; /* non linear */
+      state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+      i++;
+      j++;
+      if (i >= MT_N)
+        {
+          state[0] = state[MT_N-1];
+          i = 1;
+        }
+      if (j >= key_length)
+        j = 0;
+    }
+  for (k = MT_N - 1; k; k--)
+    {
+      state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL))
+                 - i; /* non linear */
+      state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+      i++;
+      if (i >= MT_N)
+        {
+          state[0] = state[MT_N-1];
+          i = 1;
+        }
+    }
+
+  state[0] = 0x80000000UL; /* MSB is 1; assuring nonzero initial array */
+  left = 1;
+  initf = 1;
+}
+
+void
+oct_init_by_entropy (void)
+{
+  uint32_t entropy[MT_N];
+  int n = 0;
+
+  /* Look for entropy in /dev/urandom */
+  FILE* urandom = fopen ("/dev/urandom", "rb");
+  if (urandom)
+    {
+      while (n < MT_N)
+        {
+          unsigned char word[4];
+          if (fread (word, 4, 1, urandom) != 1)
+            break;
+          entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(static_cast<uint32_t>(word[3])<<24);
+        }
+      fclose (urandom);
+    }
+
+  /* If there isn't enough entropy, gather some from various sources */
+  if (n < MT_N)
+    entropy[n++] = time (0); /* Current time in seconds */
+  if (n < MT_N)
+    entropy[n++] = clock ();    /* CPU time used (usec) */
+#ifdef HAVE_GETTIMEOFDAY
+  if (n < MT_N)
+    {
+      struct timeval tv;
+      if (gettimeofday (&tv, 0) != -1)
+        entropy[n++] = tv.tv_usec;   /* Fractional part of current time */
+    }
+#endif
+  /* Send all the entropy into the initial state vector */
+  oct_init_by_array (entropy,n);
+}
+
+void
+oct_set_state (uint32_t *save)
+{
+  int i;
+  for (i = 0; i < MT_N; i++)
+    state[i] = save[i];
+  left = save[MT_N];
+  next = state + (MT_N - left + 1);
+}
+
+void
+oct_get_state (uint32_t *save)
+{
+  int i;
+  for (i = 0; i < MT_N; i++)
+    save[i] = state[i];
+  save[MT_N] = left;
+}
+
+static void
+next_state (void)
+{
+  uint32_t *p = state;
+  int j;
+
+  /* if init_by_int() has not been called, */
+  /* a default initial seed is used         */
+  /* if (initf==0) init_by_int(5489UL); */
+  /* Or better yet, a random seed! */
+  if (initf == 0)
+    oct_init_by_entropy ();
+
+  left = MT_N;
+  next = state;
+
+  for (j = MT_N - MT_M + 1; --j; p++)
+    *p = p[MT_M] ^ TWIST(p[0], p[1]);
+
+  for (j = MT_M; --j; p++)
+    *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]);
+
+  *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]);
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+static uint32_t
+randmt (void)
+{
+  uint32_t y;
+
+  if (--left == 0)
+    next_state ();
+  y = *next++;
+
+  /* Tempering */
+  y ^= (y >> 11);
+  y ^= (y << 7) & 0x9d2c5680UL;
+  y ^= (y << 15) & 0xefc60000UL;
+  return (y ^ (y >> 18));
+}
+
+/* ===== Uniform generators ===== */
+
+/* Select which 32 bit generator to use */
+#define randi32 randmt
+
+static uint64_t
+randi53 (void)
+{
+  const uint32_t lo = randi32 ();
+  const uint32_t hi = randi32 () & 0x1FFFFF;
+#ifdef HAVE_X86_32
+  uint64_t u;
+  uint32_t *p = (uint32_t *)&u;
+  p[0] = lo;
+  p[1] = hi;
+  return u;
+#else
+  return ((static_cast<uint64_t> (hi) << 32) | lo);
+#endif
+}
+
+static uint64_t
+randi54 (void)
+{
+  const uint32_t lo = randi32 ();
+  const uint32_t hi = randi32 () & 0x3FFFFF;
+#ifdef HAVE_X86_32
+  uint64_t u;
+  uint32_t *p = static_cast<uint32_t *> (&u);
+  p[0] = lo;
+  p[1] = hi;
+  return u;
+#else
+  return ((static_cast<uint64_t> (hi) << 32) | lo);
+#endif
+}
+
+/* generates a random number on (0,1)-real-interval */
+static float
+randu32 (void)
+{
+  return (static_cast<float> (randi32 ()) + 0.5) * (1.0/4294967296.0);
+  /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1) with 53-bit resolution */
+static double
+randu53 (void)
+{
+  const uint32_t a = randi32 () >> 5;
+  const uint32_t b = randi32 () >> 6;
+  return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0);
+}
+
+/* Determine mantissa for uniform doubles */
+double
+oct_randu (void)
+{
+  return randu53 ();
+}
+
+/* Determine mantissa for uniform floats */
+float
+oct_float_randu (void)
+{
+  return randu32 ();
+}
+
+/* ===== Ziggurat normal and exponential generators ===== */
+
+#define ZIGGURAT_TABLE_SIZE 256
+
+#define ZIGGURAT_NOR_R 3.6541528853610088
+#define ZIGGURAT_NOR_INV_R 0.27366123732975828
+#define NOR_SECTION_AREA 0.00492867323399
+
+#define ZIGGURAT_EXP_R 7.69711747013104972
+#define ZIGGURAT_EXP_INV_R 0.129918765548341586
+#define EXP_SECTION_AREA 0.0039496598225815571993
+
+#define ZIGINT uint64_t
+#define EMANTISSA 9007199254740992.0  /* 53 bit mantissa */
+#define ERANDI randi53() /* 53 bits for mantissa */
+#define NMANTISSA EMANTISSA
+#define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */
+#define RANDU randu53()
+
+static ZIGINT ki[ZIGGURAT_TABLE_SIZE];
+static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE];
+static ZIGINT ke[ZIGGURAT_TABLE_SIZE];
+static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE];
+
+/*
+This code is based on the paper Marsaglia and Tsang, "The ziggurat method
+for generating random variables", Journ. Statistical Software. Code was
+presented in this paper for a Ziggurat of 127 levels and using a 32 bit
+integer random number generator. This version of the code, uses the
+Mersenne Twister as the integer generator and uses 256 levels in the
+Ziggurat. This has several advantages.
+
+  1) As Marsaglia and Tsang themselves states, the more levels the few
+     times the expensive tail algorithm must be called
+  2) The cycle time of the generator is determined by the integer
+     generator, thus the use of a Mersenne Twister for the core random
+     generator makes this cycle extremely long.
+  3) The license on the original code was unclear, thus rewriting the code
+     from the article means we are free of copyright issues.
+  4) Compile flag for full 53-bit random mantissa.
+
+It should be stated that the authors made my life easier, by the fact that
+the algorithm developed in the text of the article is for a 256 level
+ziggurat, even if the code itself isn't...
+
+One modification to the algorithm developed in the article, is that it is
+assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in
+terms like 2^32 in the code. As the normal distribution is defined between
+-Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus
+in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for
+this term.  The exponential distribution is one sided so we use the
+full 32 bits.  We use EMANTISSA for this term.
+
+It appears that I'm slightly slower than the code in the article, this
+is partially due to a better generator of random integers than they
+use. But might also be that the case of rapid return was optimized by
+inlining the relevant code with a #define. As the basic Mersenne
+Twister is only 25% faster than this code I suspect that the main
+reason is just the use of the Mersenne Twister and not the inlining,
+so I'm not going to try and optimize further.
+*/
+
+static void
+create_ziggurat_tables (void)
+{
+  int i;
+  double x, x1;
+
+  /* Ziggurat tables for the normal distribution */
+  x1 = ZIGGURAT_NOR_R;
+  wi[255] = x1 / NMANTISSA;
+  fi[255] = exp (-0.5 * x1 * x1);
+
+  /* Index zero is special for tail strip, where Marsaglia and Tsang
+   * defines this as
+   * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
+   * where v is the area of each strip of the ziggurat.
+   */
+  ki[0] = static_cast<ZIGINT> (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA);
+  wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA;
+  fi[0] = 1.;
+
+  for (i = 254; i > 0; i--)
+    {
+      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+       * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
+       */
+      x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + fi[i+1]));
+      ki[i+1] = static_cast<ZIGINT> (x / x1 * NMANTISSA);
+      wi[i] = x / NMANTISSA;
+      fi[i] = exp (-0.5 * x * x);
+      x1 = x;
+    }
+
+  ki[1] = 0;
+
+  /* Zigurrat tables for the exponential distribution */
+  x1 = ZIGGURAT_EXP_R;
+  we[255] = x1 / EMANTISSA;
+  fe[255] = exp (-x1);
+
+  /* Index zero is special for tail strip, where Marsaglia and Tsang
+   * defines this as
+   * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
+   * where v is the area of each strip of the ziggurat.
+   */
+  ke[0] = static_cast<ZIGINT> (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA);
+  we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA;
+  fe[0] = 1.;
+
+  for (i = 254; i > 0; i--)
+    {
+      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+       * need inverse operator of y = exp(-x) -> x = -ln(y)
+       */
+      x = - log (EXP_SECTION_AREA / x1 + fe[i+1]);
+      ke[i+1] = static_cast<ZIGINT> (x / x1 * EMANTISSA);
+      we[i] = x / EMANTISSA;
+      fe[i] = exp (-x);
+      x1 = x;
+    }
+  ke[1] = 0;
+
+  initt = 0;
+}
+
+/*
+ * Here is the guts of the algorithm. As Marsaglia and Tsang state the
+ * algorithm in their paper
+ *
+ * 1) Calculate a random signed integer j and let i be the index
+ *     provided by the rightmost 8-bits of j
+ * 2) Set x = j * w_i. If j < k_i return x
+ * 3) If i = 0, then return x from the tail
+ * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
+ * 5) goto step 1
+ *
+ * Where f is the functional form of the distribution, which for a normal
+ * distribution is exp(-0.5*x*x)
+ */
+
+double
+oct_randn (void)
+{
+  if (initt)
+    create_ziggurat_tables ();
+
+  while (1)
+    {
+      /* The following code is specialized for 32-bit mantissa.
+       * Compared to the arbitrary mantissa code, there is a performance
+       * gain for 32-bits:  PPC: 2%, MIPS: 8%, x86: 40%
+       * There is a bigger performance gain compared to using a full
+       * 53-bit mantissa:  PPC: 60%, MIPS: 65%, x86: 240%
+       * Of course, different compilers and operating systems may
+       * have something to do with this.
+       */
+# ifdef HAVE_X86_32
+      /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */
+      double x;
+      int si,idx;
+      uint32_t lo, hi;
+      int64_t rabs;
+      uint32_t *p = (uint32_t *)&rabs;
+      lo = randi32 ();
+      idx = lo & 0xFF;
+      hi = randi32 ();
+      si = hi & UMASK;
+      p[0] = lo;
+      p[1] = hi & 0x1FFFFF;
+      x = ( si ? -rabs : rabs ) * wi[idx];
+# else /* ! HAVE_X86_32 */
+      /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */
+      const uint64_t r = NRANDI;
+      const int64_t rabs = r >> 1;
+      const int idx = static_cast<int> (rabs & 0xFF);
+      const double x = ( (r & 1) ? -rabs : rabs) * wi[idx];
+# endif /* ! HAVE_X86_32 */
+      if (rabs < static_cast<int64_t> (ki[idx]))
+        return x;        /* 99.3% of the time we return here 1st try */
+      else if (idx == 0)
+        {
+          /* As stated in Marsaglia and Tsang
+           *
+           * For the normal tail, the method of Marsaglia[5] provides:
+           * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
+           * then return r+x. Except that r+x is always in the positive
+           * tail!!!! Any thing random might be used to determine the
+           * sign, but as we already have r we might as well use it
+           *
+           * [PAK] but not the bottom 8 bits, since they are all 0 here!
+           */
+          double xx, yy;
+          do
+            {
+              xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
+              yy = - log (RANDU);
+            }
+          while ( yy+yy <= xx*xx);
+          return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
+        }
+      else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp (-0.5*x*x))
+        return x;
+    }
+}
+
+double
+oct_rande (void)
+{
+  if (initt)
+    create_ziggurat_tables ();
+
+  while (1)
+    {
+      ZIGINT ri = ERANDI;
+      const int idx = static_cast<int> (ri & 0xFF);
+      const double x = ri * we[idx];
+      if (ri < ke[idx])
+        return x;               /* 98.9% of the time we return here 1st try */
+      else if (idx == 0)
+        {
+          /* As stated in Marsaglia and Tsang
+           *
+           * For the exponential tail, the method of Marsaglia[5] provides:
+           * x = r - ln(U);
+           */
+          return ZIGGURAT_EXP_R - log (RANDU);
+        }
+      else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp (-x))
+        return x;
+    }
+}
+
+#undef ZIGINT
+#undef EMANTISSA
+#undef ERANDI
+#undef NMANTISSA
+#undef NRANDI
+#undef RANDU
+
+#define ZIGINT uint32_t
+#define EMANTISSA 4294967296.0 /* 32 bit mantissa */
+#define ERANDI randi32() /* 32 bits for mantissa */
+#define NMANTISSA 2147483648.0 /* 31 bit mantissa */
+#define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */
+#define RANDU randu32()
+
+static ZIGINT fki[ZIGGURAT_TABLE_SIZE];
+static float fwi[ZIGGURAT_TABLE_SIZE], ffi[ZIGGURAT_TABLE_SIZE];
+static ZIGINT fke[ZIGGURAT_TABLE_SIZE];
+static float fwe[ZIGGURAT_TABLE_SIZE], ffe[ZIGGURAT_TABLE_SIZE];
+
+
+static void
+create_ziggurat_float_tables (void)
+{
+  int i;
+  float x, x1;
+
+  /* Ziggurat tables for the normal distribution */
+  x1 = ZIGGURAT_NOR_R;
+  fwi[255] = x1 / NMANTISSA;
+  ffi[255] = exp (-0.5 * x1 * x1);
+
+  /* Index zero is special for tail strip, where Marsaglia and Tsang
+   * defines this as
+   * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
+   * where v is the area of each strip of the ziggurat.
+   */
+  fki[0] = static_cast<ZIGINT> (x1 * ffi[255] / NOR_SECTION_AREA * NMANTISSA);
+  fwi[0] = NOR_SECTION_AREA / ffi[255] / NMANTISSA;
+  ffi[0] = 1.;
+
+  for (i = 254; i > 0; i--)
+    {
+      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+       * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
+       */
+      x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + ffi[i+1]));
+      fki[i+1] = static_cast<ZIGINT> (x / x1 * NMANTISSA);
+      fwi[i] = x / NMANTISSA;
+      ffi[i] = exp (-0.5 * x * x);
+      x1 = x;
+    }
+
+  fki[1] = 0;
+
+  /* Zigurrat tables for the exponential distribution */
+  x1 = ZIGGURAT_EXP_R;
+  fwe[255] = x1 / EMANTISSA;
+  ffe[255] = exp (-x1);
+
+  /* Index zero is special for tail strip, where Marsaglia and Tsang
+   * defines this as
+   * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
+   * where v is the area of each strip of the ziggurat.
+   */
+  fke[0] = static_cast<ZIGINT> (x1 * ffe[255] / EXP_SECTION_AREA * EMANTISSA);
+  fwe[0] = EXP_SECTION_AREA / ffe[255] / EMANTISSA;
+  ffe[0] = 1.;
+
+  for (i = 254; i > 0; i--)
+    {
+      /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+       * need inverse operator of y = exp(-x) -> x = -ln(y)
+       */
+      x = - log (EXP_SECTION_AREA / x1 + ffe[i+1]);
+      fke[i+1] = static_cast<ZIGINT> (x / x1 * EMANTISSA);
+      fwe[i] = x / EMANTISSA;
+      ffe[i] = exp (-x);
+      x1 = x;
+    }
+  fke[1] = 0;
+
+  inittf = 0;
+}
+
+/*
+ * Here is the guts of the algorithm. As Marsaglia and Tsang state the
+ * algorithm in their paper
+ *
+ * 1) Calculate a random signed integer j and let i be the index
+ *     provided by the rightmost 8-bits of j
+ * 2) Set x = j * w_i. If j < k_i return x
+ * 3) If i = 0, then return x from the tail
+ * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
+ * 5) goto step 1
+ *
+ * Where f is the functional form of the distribution, which for a normal
+ * distribution is exp(-0.5*x*x)
+ */
+
+float
+oct_float_randn (void)
+{
+  if (inittf)
+    create_ziggurat_float_tables ();
+
+  while (1)
+    {
+      /* 32-bit mantissa */
+      const uint32_t r = randi32 ();
+      const uint32_t rabs = r & LMASK;
+      const int idx = static_cast<int> (r & 0xFF);
+      const float x = static_cast<int32_t> (r) * fwi[idx];
+      if (rabs < fki[idx])
+        return x;        /* 99.3% of the time we return here 1st try */
+      else if (idx == 0)
+        {
+          /* As stated in Marsaglia and Tsang
+           *
+           * For the normal tail, the method of Marsaglia[5] provides:
+           * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
+           * then return r+x. Except that r+x is always in the positive
+           * tail!!!! Any thing random might be used to determine the
+           * sign, but as we already have r we might as well use it
+           *
+           * [PAK] but not the bottom 8 bits, since they are all 0 here!
+           */
+          float xx, yy;
+          do
+            {
+              xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
+              yy = - log (RANDU);
+            }
+          while ( yy+yy <= xx*xx);
+          return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
+        }
+      else if ((ffi[idx-1] - ffi[idx]) * RANDU + ffi[idx] < exp (-0.5*x*x))
+        return x;
+    }
+}
+
+float
+oct_float_rande (void)
+{
+  if (inittf)
+    create_ziggurat_float_tables ();
+
+  while (1)
+    {
+      ZIGINT ri = ERANDI;
+      const int idx = static_cast<int> (ri & 0xFF);
+      const float x = ri * fwe[idx];
+      if (ri < fke[idx])
+        return x;               /* 98.9% of the time we return here 1st try */
+      else if (idx == 0)
+        {
+          /* As stated in Marsaglia and Tsang
+           *
+           * For the exponential tail, the method of Marsaglia[5] provides:
+           * x = r - ln(U);
+           */
+          return ZIGGURAT_EXP_R - log (RANDU);
+        }
+      else if ((ffe[idx-1] - ffe[idx]) * RANDU + ffe[idx] < exp (-x))
+        return x;
+    }
+}
+
+/* Array generators */
+void
+oct_fill_randu (octave_idx_type n, double *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_randu ();
+}
+
+void
+oct_fill_randn (octave_idx_type n, double *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_randn ();
+}
+
+void
+oct_fill_rande (octave_idx_type n, double *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_rande ();
+}
+
+void
+oct_fill_float_randu (octave_idx_type n, float *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_float_randu ();
+}
+
+void
+oct_fill_float_randn (octave_idx_type n, float *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_float_randn ();
+}
+
+void
+oct_fill_float_rande (octave_idx_type n, float *p)
+{
+  octave_idx_type i;
+  for (i = 0; i < n; i++)
+    p[i] = oct_float_rande ();
+}
--- a/liboctave/numeric/randmtzig.h	Mon May 16 23:26:07 2016 -0400
+++ b/liboctave/numeric/randmtzig.h	Mon May 16 23:20:50 2016 -0400
@@ -68,18 +68,14 @@

 #define MT_N 624

-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Mersenne Twister.  */
+// Mersenne Twister.
 extern OCTAVE_API void oct_init_by_int (uint32_t s);
 extern OCTAVE_API void oct_init_by_array (uint32_t *init_key, int key_length);
 extern OCTAVE_API void oct_init_by_entropy (void);
 extern OCTAVE_API void oct_set_state (uint32_t *save);
 extern OCTAVE_API void oct_get_state (uint32_t *save);

-/* Array generators.  */
+// Array generators.
 extern OCTAVE_API double oct_randu (void);
 extern OCTAVE_API double oct_randn (void);
 extern OCTAVE_API double oct_rande (void);
@@ -88,7 +84,7 @@
 extern OCTAVE_API float oct_float_randn (void);
 extern OCTAVE_API float oct_float_rande (void);

-/* Array generators.  */
+// Array generators.
 extern OCTAVE_API void oct_fill_randu (octave_idx_type n, double *p);
 extern OCTAVE_API void oct_fill_randn (octave_idx_type n, double *p);
 extern OCTAVE_API void oct_fill_rande (octave_idx_type n, double *p);
@@ -97,8 +93,4 @@
 extern OCTAVE_API void oct_fill_float_randn (octave_idx_type n, float *p);
 extern OCTAVE_API void oct_fill_float_rande (octave_idx_type n, float *p);

-#ifdef __cplusplus
-}
 #endif
-
-#endif
--- a/liboctave/numeric/randpoisson.c	Mon May 16 23:26:07 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,624 +0,0 @@
-/*
-
-Copyright (C) 2006-2015 John W. Eaton
-
-This file is part of Octave.
-
-Octave is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-Octave is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with Octave; see the file COPYING.  If not, see
-<http://www.gnu.org/licenses/>.
-
-*/
-
-/* Original version written by Paul Kienzle distributed as free
-   software in the in the public domain.  */
-
-/* Needs the following defines:
- * NAN: value to return for Not-A-Number
- * RUNI: uniform generator on (0,1)
- * RNOR: normal generator
- * LGAMMA: log gamma function
- * INFINITE: function to test whether a value is infinite
- */
-
-#if defined (HAVE_CONFIG_H)
-#  include "config.h"
-#endif
-
-#include <stdio.h>
-
-#include "f77-fcn.h"
-#include "lo-error.h"
-#include "lo-ieee.h"
-#include "lo-math.h"
-#include "randmtzig.h"
-#include "randpoisson.h"
-
-#undef NAN
-#define NAN octave_NaN
-#undef INFINITE
-#define INFINITE lo_ieee_isinf
-#define RUNI oct_randu()
-#define RNOR oct_randn()
-#define LGAMMA xlgamma
-
-F77_RET_T
-F77_FUNC (dlgams, DLGAMS) (const double *, double *, double *);
-
-static double
-xlgamma (double x)
-{
-  double result;
-#ifdef HAVE_LGAMMA
-  result = lgamma (x);
-#else
-  double sgngam;
-
-  if (lo_ieee_isnan (x))
-    result = x;
-  else if (x <= 0 || lo_ieee_isinf (x))
-    result = octave_Inf;
-  else
-    F77_XFCN (dlgams, DLGAMS, (&x, &result, &sgngam));
-#endif
-  return result;
-}
-
-/* ---- pprsc.c from Stadloeber's winrand --- */
-
-/* flogfak(k) = ln(k!) */
-static double
-flogfak (double k)
-{
-#define C0  9.18938533204672742e-01
-#define C1  8.33333333333333333e-02
-#define C3 -2.77777777777777778e-03
-#define C5  7.93650793650793651e-04
-#define C7 -5.95238095238095238e-04
-
-  static double logfak[30L] =
-  {
-    0.00000000000000000,   0.00000000000000000,   0.69314718055994531,
-    1.79175946922805500,   3.17805383034794562,   4.78749174278204599,
-    6.57925121201010100,   8.52516136106541430,  10.60460290274525023,
-    12.80182748008146961,  15.10441257307551530,  17.50230784587388584,
-    19.98721449566188615,  22.55216385312342289,  25.19122118273868150,
-    27.89927138384089157,  30.67186010608067280,  33.50507345013688888,
-    36.39544520803305358,  39.33988418719949404,  42.33561646075348503,
-    45.38013889847690803,  48.47118135183522388,  51.60667556776437357,
-    54.78472939811231919,  58.00360522298051994,  61.26170176100200198,
-    64.55753862700633106,  67.88974313718153498,  71.25703896716800901
-  };
-
-  double r, rr;
-
-  if (k >= 30.0)
-    {
-      r  = 1.0 / k;
-      rr = r * r;
-      return ((k + 0.5)*log (k) - k + C0 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7))));
-    }
-  else
-    return (logfak[(int)k]);
-}
-
-
-/******************************************************************
- *                                                                *
- * Poisson Distribution - Patchwork Rejection/Inversion           *
- *                                                                *
- ******************************************************************
- *                                                                *
- * For parameter my < 10, Tabulated Inversion is applied.         *
- * For my >= 10, Patchwork Rejection is employed:                 *
- * The area below the histogram function f(x) is rearranged in    *
- * its body by certain point reflections. Within a large center   *
- * interval variates are sampled efficiently by rejection from    *
- * uniform hats. Rectangular immediate acceptance regions speed   *
- * up the generation. The remaining tails are covered by          *
- * exponential functions.                                         *
- *                                                                *
- ******************************************************************
- *                                                                *
- * FUNCTION :   - pprsc samples a random number from the Poisson  *
- *                distribution with parameter my > 0.             *
- * REFERENCE :  - H. Zechner (1994): Efficient sampling from      *
- *                continuous and discrete unimodal distributions, *
- *                Doctoral Dissertation, 156 pp., Technical       *
- *                University Graz, Austria.                       *
- * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with    *
- *                unsigned long integer *seed.                    *
- *                                                                *
- * Implemented by H. Zechner, January 1994                        *
- * Revised by F. Niederl, July 1994                               *
- *                                                                *
- ******************************************************************/
-
-static double
-f (double k, double l_nu, double c_pm)
-{
-  return exp (k * l_nu - flogfak (k) - c_pm);
-}
-
-static double
-pprsc (double my)
-{
-  static double my_last = -1.0;
-  static double m,  k2, k4, k1, k5;
-  static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm,
-                f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
-  double        Dk, X, Y;
-  double        Ds, U, V, W;
-
-  if (my != my_last)
-    {                               /* set-up           */
-      my_last = my;
-      /* approximate deviation of reflection points k2, k4 from my - 1/2 */
-      Ds = sqrt (my + 0.25);
-
-      /* mode m, reflection points k2 and k4, and points k1 and k5,      */
-      /* which delimit the centre region of h(x)                         */
-      m  = floor (my);
-      k2 = ceil (my - 0.5 - Ds);
-      k4 = floor (my - 0.5 + Ds);
-      k1 = k2 + k2 - m + 1L;
-      k5 = k4 + k4 - m;
-
-      /* range width of the critical left and right centre region        */
-      dl = (k2 - k1);
-      dr = (k5 - k4);
-
-      /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */
-      r1 = my / k1;
-      r2 = my / k2;
-      r4 = my / (k4 + 1.0);
-      r5 = my / (k5 + 1.0);
-
-      /* reciprocal values of the scale parameters of exp. tail envelope */
-      ll =  log (r1);                                /* expon. tail left */
-      lr = -log (r5);                                /* expon. tail right*/
-
-      /* Poisson constants, necessary for computing function values f(k) */
-      l_my = log (my);
-      c_pm = m * l_my - flogfak (m);
-
-      /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5          */
-      f2 = f (k2, l_my, c_pm);
-      f4 = f (k4, l_my, c_pm);
-      f1 = f (k1, l_my, c_pm);
-      f5 = f (k5, l_my, c_pm);
-
-      /* area of the two centre and the two exponential tail regions     */
-      /* area of the two immediate acceptance regions between k2, k4     */
-      p1 = f2 * (dl + 1.0);                            /* immed. left    */
-      p2 = f2 * dl         + p1;                       /* centre left    */
-      p3 = f4 * (dr + 1.0) + p2;                       /* immed. right   */
-      p4 = f4 * dr         + p3;                       /* centre right   */
-      p5 = f1 / ll         + p4;                       /* exp. tail left */
-      p6 = f5 / lr         + p5;                       /* exp. tail right*/
-    }
-
-  for (;;)
-    {
-      /* generate uniform number U -- U(0, p6)                           */
-      /* case distinction corresponding to U                             */
-      if ((U = RUNI * p6) < p2)
-        {                                            /* centre left      */
-
-          /* immediate acceptance region
-             R2 = [k2, m) *[0, f2),  X = k2, ... m -1 */
-          if ((V = U - p1) < 0.0)  return (k2 + floor (U/f2));
-          /* immediate acceptance region
-             R1 = [k1, k2)*[0, f1),  X = k1, ... k2-1 */
-          if ((W = V / dl) < f1 )  return (k1 + floor (V/f1));
-
-          /* computation of candidate X < k2, and its counterpart Y > k2 */
-          /* either squeeze-acceptance of X or acceptance-rejection of Y */
-          Dk = floor (dl * RUNI) + 1.0;
-          if (W <= f2 - Dk * (f2 - f2/r2))
-            {                                        /* quick accept of  */
-              return (k2 - Dk);                      /* X = k2 - Dk      */
-            }
-          if ((V = f2 + f2 - W) < 1.0)
-            {                                        /* quick reject of Y*/
-              Y = k2 + Dk;
-              if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0))
-                {                                    /* quick accept of  */
-                  return (Y);                        /* Y = k2 + Dk      */
-                }
-              if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
-            }
-          X = k2 - Dk;
-        }
-      else if (U < p4)
-        {                                            /* centre right     */
-          /*  immediate acceptance region
-              R3 = [m, k4+1)*[0, f4), X = m, ... k4    */
-          if ((V = U - p3) < 0.0)  return (k4 - floor ((U - p2)/f4));
-          /* immediate acceptance region
-             R4 = [k4+1, k5+1)*[0, f5)                */
-          if ((W = V / dr) < f5 )  return (k5 - floor (V/f5));
-
-          /* computation of candidate X > k4, and its counterpart Y < k4 */
-          /* either squeeze-acceptance of X or acceptance-rejection of Y */
-          Dk = floor (dr * RUNI) + 1.0;
-          if (W <= f4 - Dk * (f4 - f4*r4))
-            {                                        /* quick accept of  */
-              return (k4 + Dk);                      /* X = k4 + Dk      */
-            }
-          if ((V = f4 + f4 - W) < 1.0)
-            {                                        /* quick reject of Y*/
-              Y = k4 - Dk;
-              if (V <= f4 + Dk * (1.0 - f4)/ dr)
-                {                                    /* quick accept of  */
-                  return (Y);                        /* Y = k4 - Dk      */
-                }
-              if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
-            }
-          X = k4 + Dk;
-        }
-      else
-        {
-          W = RUNI;
-          if (U < p5)
-            {                                        /* expon. tail left */
-              Dk = floor (1.0 - log (W)/ll);
-              if ((X = k1 - Dk) < 0L)  continue;     /* 0 <= X <= k1 - 1 */
-              W *= (U - p4) * ll;                    /* W -- U(0, h(x))  */
-              if (W <= f1 - Dk * (f1 - f1/r1))
-                return (X);                          /* quick accept of X*/
-            }
-          else
-            {                                        /* expon. tail right*/
-              Dk = floor (1.0 - log (W)/lr);
-              X  = k5 + Dk;                          /* X >= k5 + 1      */
-              W *= (U - p5) * lr;                    /* W -- U(0, h(x))  */
-              if (W <= f5 - Dk * (f5 - f5*r5))
-                return (X);                          /* quick accept of X*/
-            }
-        }
-
-      /* acceptance-rejection test of candidate X from the original area */
-      /* test, whether  W <= f(k),    with  W = U*h(x)  and  U -- U(0, 1)*/
-      /* log f(X) = (X - m)*log(my) - log X! + log m!                    */
-      if (log (W) <= X * l_my - flogfak (X) - c_pm)  return (X);
-    }
-}
-/* ---- pprsc.c end ------ */
-
-
-/* The remainder of the file is by Paul Kienzle */
-
-/* Given uniform u, find x such that CDF(L,x)==u.  Return x. */
-static void
-poisson_cdf_lookup (double lambda, double *p, size_t n)
-{
-  /* Table size is predicated on the maximum value of lambda
-   * we want to store in the table, and the maximum value of
-   * returned by the uniform random number generator on [0,1).
-   * With lambda==10 and u_max = 1 - 1/(2^32+1), we
-   * have poisson_pdf(lambda,36) < 1-u_max.  If instead our
-   * generator uses more bits of mantissa or returns a value
-   * in the range [0,1], then for lambda==10 we need a table
-   * size of 46 instead.  For long doubles, the table size
-   * will need to be longer still.  */
-#define TABLESIZE 46
-  double t[TABLESIZE];
-
-  /* Precompute the table for the u up to and including 0.458.
-   * We will almost certainly need it. */
-  int intlambda = (int)floor (lambda);
-  double P;
-  int tableidx;
-  size_t i = n;
-
-  t[0] = P = exp (-lambda);
-  for (tableidx = 1; tableidx <= intlambda; tableidx++)
-    {
-      P = P*lambda/(double)tableidx;
-      t[tableidx] = t[tableidx-1] + P;
-    }
-
-  while (i-- > 0)
-    {
-      double u = RUNI;
-
-      /* If u > 0.458 we know we can jump to floor(lambda) before
-       * comparing (this observation is based on Stadlober's winrand
-       * code). For lambda >= 1, this will be a win.  Lambda < 1
-       * is already fast, so adding an extra comparison is not a
-       * problem. */
-      int k = (u > 0.458 ? intlambda : 0);
-
-      /* We aren't using a for loop here because when we find the
-       * right k we want to jump to the next iteration of the
-       * outer loop, and the continue statement will only work for
-       * the inner loop. */
-    nextk:
-      if (u <= t[k])
-        {
-          p[i] = (double) k;
-          continue;
-        }
-      if (++k < tableidx)
-        goto nextk;
-
-      /* We only need high values of the table very rarely so we
-       * don't automatically compute the entire table. */
-      while (tableidx < TABLESIZE)
-        {
-          P = P*lambda/(double)tableidx;
-          t[tableidx] = t[tableidx-1] + P;
-          /* Make sure we converge to 1.0 just in case u is uniform
-           * on [0,1] rather than [0,1). */
-          if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0;
-          tableidx++;
-          if (u <= t[tableidx-1]) break;
-        }
-
-      /* We are assuming that the table size is big enough here.
-       * This should be true even if RUNI is returning values in
-       * the range [0,1] rather than [0,1). */
-      p[i] = (double)(tableidx-1);
-    }
-}
-
-static void
-poisson_cdf_lookup_float (double lambda, float *p, size_t n)
-{
-  double t[TABLESIZE];
-
-  /* Precompute the table for the u up to and including 0.458.
-   * We will almost certainly need it. */
-  int intlambda = (int)floor (lambda);
-  double P;
-  int tableidx;
-  size_t i = n;
-
-  t[0] = P = exp (-lambda);
-  for (tableidx = 1; tableidx <= intlambda; tableidx++)
-    {
-      P = P*lambda/(double)tableidx;
-      t[tableidx] = t[tableidx-1] + P;
-    }
-
-  while (i-- > 0)
-    {
-      double u = RUNI;
-      int k = (u > 0.458 ? intlambda : 0);
-    nextk:
-      if (u <= t[k])
-        {
-          p[i] = (float) k;
-          continue;
-        }
-      if (++k < tableidx)
-        goto nextk;
-
-      while (tableidx < TABLESIZE)
-        {
-          P = P*lambda/(double)tableidx;
-          t[tableidx] = t[tableidx-1] + P;
-          if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0;
-          tableidx++;
-          if (u <= t[tableidx-1]) break;
-        }
-
-      p[i] = (float)(tableidx-1);
-    }
-}
-
-/* From Press, et al., Numerical Recipes */
-static void
-poisson_rejection (double lambda, double *p, size_t n)
-{
-  double sq = sqrt (2.0*lambda);
-  double alxm = log (lambda);
-  double g = lambda*alxm - LGAMMA(lambda+1.0);
-  size_t i;
-
-  for (i = 0; i < n; i++)
-    {
-      double y, em, t;
-      do
-        {
-          do
-            {
-              y = tan (M_PI*RUNI);
-              em = sq * y + lambda;
-            } while (em < 0.0);
-          em = floor (em);
-          t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g);
-        } while (RUNI > t);
-      p[i] = em;
-    }
-}
-
-/* From Press, et al., Numerical Recipes */
-static void
-poisson_rejection_float (double lambda, float *p, size_t n)
-{
-  double sq = sqrt (2.0*lambda);
-  double alxm = log (lambda);
-  double g = lambda*alxm - LGAMMA(lambda+1.0);
-  size_t i;
-
-  for (i = 0; i < n; i++)
-    {
-      double y, em, t;
-      do
-        {
-          do
-            {
-              y = tan (M_PI*RUNI);
-              em = sq * y + lambda;
-            } while (em < 0.0);
-          em = floor (em);
-          t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g);
-        } while (RUNI > t);
-      p[i] = em;
-    }
-}
-
-/* The cutoff of L <= 1e8 in the following two functions before using
- * the normal approximation is based on:
- *   > L=1e8; x=floor(linspace(0,2*L,1000));
- *   > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L)))
- *   ans = 1.1376e-28
- * For L=1e7, the max is around 1e-9, which is within the step size of RUNI.
- * For L>1e10 the pprsc function breaks down, as I saw from the histogram
- * of a large sample, so 1e8 is both small enough and large enough. */
-
-/* Generate a set of poisson numbers with the same distribution */
-void
-oct_fill_randp (double L, octave_idx_type n, double *p)
-{
-  octave_idx_type i;
-  if (L < 0.0 || INFINITE(L))
-    {
-      for (i=0; i<n; i++)
-        p[i] = NAN;
-    }
-  else if (L <= 10.0)
-    {
-      poisson_cdf_lookup (L, p, n);
-    }
-  else if (L <= 1e8)
-    {
-      for (i=0; i<n; i++)
-        p[i] = pprsc (L);
-    }
-  else
-    {
-      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
-      const double sqrtL = sqrt (L);
-      for (i = 0; i < n; i++)
-        {
-          p[i] = floor (RNOR*sqrtL + L + 0.5);
-          if (p[i] < 0.0)
-            p[i] = 0.0; /* will probably never happen */
-        }
-    }
-}
-
-/* Generate one poisson variate */
-double
-oct_randp (double L)
-{
-  double ret;
-  if (L < 0.0) ret = NAN;
-  else if (L <= 12.0)
-    {
-      /* From Press, et al. Numerical recipes */
-      double g = exp (-L);
-      int em = -1;
-      double t = 1.0;
-      do
-        {
-          ++em;
-          t *= RUNI;
-        } while (t > g);
-      ret = em;
-    }
-  else if (L <= 1e8)
-    {
-      /* numerical recipes */
-      poisson_rejection (L, &ret, 1);
-    }
-  else if (INFINITE(L))
-    {
-      /* FIXME: R uses NaN, but the normal approximation suggests that
-       * limit should be Inf.  Which is correct? */
-      ret = NAN;
-    }
-  else
-    {
-      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
-      ret = floor (RNOR*sqrt (L) + L + 0.5);
-      if (ret < 0.0) ret = 0.0; /* will probably never happen */
-    }
-  return ret;
-}
-
-/* Generate a set of poisson numbers with the same distribution */
-void
-oct_fill_float_randp (float FL, octave_idx_type n, float *p)
-{
-  double L = FL;
-  octave_idx_type i;
-  if (L < 0.0 || INFINITE(L))
-    {
-      for (i=0; i<n; i++)
-        p[i] = NAN;
-    }
-  else if (L <= 10.0)
-    {
-      poisson_cdf_lookup_float (L, p, n);
-    }
-  else if (L <= 1e8)
-    {
-      for (i=0; i<n; i++)
-        p[i] = pprsc (L);
-    }
-  else
-    {
-      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
-      const double sqrtL = sqrt (L);
-      for (i = 0; i < n; i++)
-        {
-          p[i] = floor (RNOR*sqrtL + L + 0.5);
-          if (p[i] < 0.0)
-            p[i] = 0.0; /* will probably never happen */
-        }
-    }
-}
-
-/* Generate one poisson variate */
-float
-oct_float_randp (float FL)
-{
-  double L = FL;
-  float ret;
-  if (L < 0.0) ret = NAN;
-  else if (L <= 12.0)
-    {
-      /* From Press, et al. Numerical recipes */
-      double g = exp (-L);
-      int em = -1;
-      double t = 1.0;
-      do
-        {
-          ++em;
-          t *= RUNI;
-        } while (t > g);
-      ret = em;
-    }
-  else if (L <= 1e8)
-    {
-      /* numerical recipes */
-      poisson_rejection_float (L, &ret, 1);
-    }
-  else if (INFINITE(L))
-    {
-      /* FIXME: R uses NaN, but the normal approximation suggests that
-       * limit should be Inf. Which is correct? */
-      ret = NAN;
-    }
-  else
-    {
-      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
-      ret = floor (RNOR*sqrt (L) + L + 0.5);
-      if (ret < 0.0) ret = 0.0; /* will probably never happen */
-    }
-  return ret;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/numeric/randpoisson.cc	Mon May 16 23:20:50 2016 -0400
@@ -0,0 +1,622 @@
+/*
+
+Copyright (C) 2006-2015 John W. Eaton
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+/* Original version written by Paul Kienzle distributed as free
+   software in the in the public domain.  */
+
+/* Needs the following defines:
+ * NAN: value to return for Not-A-Number
+ * RUNI: uniform generator on (0,1)
+ * RNOR: normal generator
+ * LGAMMA: log gamma function
+ * INFINITE: function to test whether a value is infinite
+ */
+
+#if defined (HAVE_CONFIG_H)
+#  include "config.h"
+#endif
+
+#include "f77-fcn.h"
+#include "lo-error.h"
+#include "lo-ieee.h"
+#include "lo-math.h"
+#include "randmtzig.h"
+#include "randpoisson.h"
+
+#undef NAN
+#define NAN octave_NaN
+#undef INFINITE
+#define INFINITE lo_ieee_isinf
+#define RUNI oct_randu()
+#define RNOR oct_randn()
+#define LGAMMA xlgamma
+
+F77_RET_T
+F77_FUNC (dlgams, DLGAMS) (const double *, double *, double *);
+
+static double
+xlgamma (double x)
+{
+  double result;
+#ifdef HAVE_LGAMMA
+  result = lgamma (x);
+#else
+  double sgngam;
+
+  if (lo_ieee_isnan (x))
+    result = x;
+  else if (x <= 0 || lo_ieee_isinf (x))
+    result = octave_Inf;
+  else
+    F77_XFCN (dlgams, DLGAMS, (&x, &result, &sgngam));
+#endif
+  return result;
+}
+
+/* ---- pprsc.c from Stadloeber's winrand --- */
+
+/* flogfak(k) = ln(k!) */
+static double
+flogfak (double k)
+{
+#define C0  9.18938533204672742e-01
+#define C1  8.33333333333333333e-02
+#define C3 -2.77777777777777778e-03
+#define C5  7.93650793650793651e-04
+#define C7 -5.95238095238095238e-04
+
+  static double logfak[30L] =
+  {
+    0.00000000000000000,   0.00000000000000000,   0.69314718055994531,
+    1.79175946922805500,   3.17805383034794562,   4.78749174278204599,
+    6.57925121201010100,   8.52516136106541430,  10.60460290274525023,
+    12.80182748008146961,  15.10441257307551530,  17.50230784587388584,
+    19.98721449566188615,  22.55216385312342289,  25.19122118273868150,
+    27.89927138384089157,  30.67186010608067280,  33.50507345013688888,
+    36.39544520803305358,  39.33988418719949404,  42.33561646075348503,
+    45.38013889847690803,  48.47118135183522388,  51.60667556776437357,
+    54.78472939811231919,  58.00360522298051994,  61.26170176100200198,
+    64.55753862700633106,  67.88974313718153498,  71.25703896716800901
+  };
+
+  double r, rr;
+
+  if (k >= 30.0)
+    {
+      r  = 1.0 / k;
+      rr = r * r;
+      return ((k + 0.5)*log (k) - k + C0 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7))));
+    }
+  else
+    return (logfak[static_cast<int> (k)]);
+}
+
+
+/******************************************************************
+ *                                                                *
+ * Poisson Distribution - Patchwork Rejection/Inversion           *
+ *                                                                *
+ ******************************************************************
+ *                                                                *
+ * For parameter my < 10, Tabulated Inversion is applied.         *
+ * For my >= 10, Patchwork Rejection is employed:                 *
+ * The area below the histogram function f(x) is rearranged in    *
+ * its body by certain point reflections. Within a large center   *
+ * interval variates are sampled efficiently by rejection from    *
+ * uniform hats. Rectangular immediate acceptance regions speed   *
+ * up the generation. The remaining tails are covered by          *
+ * exponential functions.                                         *
+ *                                                                *
+ ******************************************************************
+ *                                                                *
+ * FUNCTION :   - pprsc samples a random number from the Poisson  *
+ *                distribution with parameter my > 0.             *
+ * REFERENCE :  - H. Zechner (1994): Efficient sampling from      *
+ *                continuous and discrete unimodal distributions, *
+ *                Doctoral Dissertation, 156 pp., Technical       *
+ *                University Graz, Austria.                       *
+ * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with    *
+ *                unsigned long integer *seed.                    *
+ *                                                                *
+ * Implemented by H. Zechner, January 1994                        *
+ * Revised by F. Niederl, July 1994                               *
+ *                                                                *
+ ******************************************************************/
+
+static double
+f (double k, double l_nu, double c_pm)
+{
+  return exp (k * l_nu - flogfak (k) - c_pm);
+}
+
+static double
+pprsc (double my)
+{
+  static double my_last = -1.0;
+  static double m,  k2, k4, k1, k5;
+  static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm,
+                f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
+  double        Dk, X, Y;
+  double        Ds, U, V, W;
+
+  if (my != my_last)
+    {                               /* set-up           */
+      my_last = my;
+      /* approximate deviation of reflection points k2, k4 from my - 1/2 */
+      Ds = sqrt (my + 0.25);
+
+      /* mode m, reflection points k2 and k4, and points k1 and k5,      */
+      /* which delimit the centre region of h(x)                         */
+      m  = floor (my);
+      k2 = ceil (my - 0.5 - Ds);
+      k4 = floor (my - 0.5 + Ds);
+      k1 = k2 + k2 - m + 1L;
+      k5 = k4 + k4 - m;
+
+      /* range width of the critical left and right centre region        */
+      dl = (k2 - k1);
+      dr = (k5 - k4);
+
+      /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */
+      r1 = my / k1;
+      r2 = my / k2;
+      r4 = my / (k4 + 1.0);
+      r5 = my / (k5 + 1.0);
+
+      /* reciprocal values of the scale parameters of exp. tail envelope */
+      ll =  log (r1);                                /* expon. tail left */
+      lr = -log (r5);                                /* expon. tail right*/
+
+      /* Poisson constants, necessary for computing function values f(k) */
+      l_my = log (my);
+      c_pm = m * l_my - flogfak (m);
+
+      /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5          */
+      f2 = f (k2, l_my, c_pm);
+      f4 = f (k4, l_my, c_pm);
+      f1 = f (k1, l_my, c_pm);
+      f5 = f (k5, l_my, c_pm);
+
+      /* area of the two centre and the two exponential tail regions     */
+      /* area of the two immediate acceptance regions between k2, k4     */
+      p1 = f2 * (dl + 1.0);                            /* immed. left    */
+      p2 = f2 * dl         + p1;                       /* centre left    */
+      p3 = f4 * (dr + 1.0) + p2;                       /* immed. right   */
+      p4 = f4 * dr         + p3;                       /* centre right   */
+      p5 = f1 / ll         + p4;                       /* exp. tail left */
+      p6 = f5 / lr         + p5;                       /* exp. tail right*/
+    }
+
+  for (;;)
+    {
+      /* generate uniform number U -- U(0, p6)                           */
+      /* case distinction corresponding to U                             */
+      if ((U = RUNI * p6) < p2)
+        {                                            /* centre left      */
+
+          /* immediate acceptance region
+             R2 = [k2, m) *[0, f2),  X = k2, ... m -1 */
+          if ((V = U - p1) < 0.0)  return (k2 + floor (U/f2));
+          /* immediate acceptance region
+             R1 = [k1, k2)*[0, f1),  X = k1, ... k2-1 */
+          if ((W = V / dl) < f1 )  return (k1 + floor (V/f1));
+
+          /* computation of candidate X < k2, and its counterpart Y > k2 */
+          /* either squeeze-acceptance of X or acceptance-rejection of Y */
+          Dk = floor (dl * RUNI) + 1.0;
+          if (W <= f2 - Dk * (f2 - f2/r2))
+            {                                        /* quick accept of  */
+              return (k2 - Dk);                      /* X = k2 - Dk      */
+            }
+          if ((V = f2 + f2 - W) < 1.0)
+            {                                        /* quick reject of Y*/
+              Y = k2 + Dk;
+              if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0))
+                {                                    /* quick accept of  */
+                  return (Y);                        /* Y = k2 + Dk      */
+                }
+              if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
+            }
+          X = k2 - Dk;
+        }
+      else if (U < p4)
+        {                                            /* centre right     */
+          /*  immediate acceptance region
+              R3 = [m, k4+1)*[0, f4), X = m, ... k4    */
+          if ((V = U - p3) < 0.0)  return (k4 - floor ((U - p2)/f4));
+          /* immediate acceptance region
+             R4 = [k4+1, k5+1)*[0, f5)                */
+          if ((W = V / dr) < f5 )  return (k5 - floor (V/f5));
+
+          /* computation of candidate X > k4, and its counterpart Y < k4 */
+          /* either squeeze-acceptance of X or acceptance-rejection of Y */
+          Dk = floor (dr * RUNI) + 1.0;
+          if (W <= f4 - Dk * (f4 - f4*r4))
+            {                                        /* quick accept of  */
+              return (k4 + Dk);                      /* X = k4 + Dk      */
+            }
+          if ((V = f4 + f4 - W) < 1.0)
+            {                                        /* quick reject of Y*/
+              Y = k4 - Dk;
+              if (V <= f4 + Dk * (1.0 - f4)/ dr)
+                {                                    /* quick accept of  */
+                  return (Y);                        /* Y = k4 - Dk      */
+                }
+              if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
+            }
+          X = k4 + Dk;
+        }
+      else
+        {
+          W = RUNI;
+          if (U < p5)
+            {                                        /* expon. tail left */
+              Dk = floor (1.0 - log (W)/ll);
+              if ((X = k1 - Dk) < 0L)  continue;     /* 0 <= X <= k1 - 1 */
+              W *= (U - p4) * ll;                    /* W -- U(0, h(x))  */
+              if (W <= f1 - Dk * (f1 - f1/r1))
+                return (X);                          /* quick accept of X*/
+            }
+          else
+            {                                        /* expon. tail right*/
+              Dk = floor (1.0 - log (W)/lr);
+              X  = k5 + Dk;                          /* X >= k5 + 1      */
+              W *= (U - p5) * lr;                    /* W -- U(0, h(x))  */
+              if (W <= f5 - Dk * (f5 - f5*r5))
+                return (X);                          /* quick accept of X*/
+            }
+        }
+
+      /* acceptance-rejection test of candidate X from the original area */
+      /* test, whether  W <= f(k),    with  W = U*h(x)  and  U -- U(0, 1)*/
+      /* log f(X) = (X - m)*log(my) - log X! + log m!                    */
+      if (log (W) <= X * l_my - flogfak (X) - c_pm)  return (X);
+    }
+}
+/* ---- pprsc.c end ------ */
+
+
+/* The remainder of the file is by Paul Kienzle */
+
+/* Given uniform u, find x such that CDF(L,x)==u.  Return x. */
+static void
+poisson_cdf_lookup (double lambda, double *p, size_t n)
+{
+  /* Table size is predicated on the maximum value of lambda
+   * we want to store in the table, and the maximum value of
+   * returned by the uniform random number generator on [0,1).
+   * With lambda==10 and u_max = 1 - 1/(2^32+1), we
+   * have poisson_pdf(lambda,36) < 1-u_max.  If instead our
+   * generator uses more bits of mantissa or returns a value
+   * in the range [0,1], then for lambda==10 we need a table
+   * size of 46 instead.  For long doubles, the table size
+   * will need to be longer still.  */
+#define TABLESIZE 46
+  double t[TABLESIZE];
+
+  /* Precompute the table for the u up to and including 0.458.
+   * We will almost certainly need it. */
+  int intlambda = static_cast<int> (floor (lambda));
+  double P;
+  int tableidx;
+  size_t i = n;
+
+  t[0] = P = exp (-lambda);
+  for (tableidx = 1; tableidx <= intlambda; tableidx++)
+    {
+      P = P*lambda/static_cast<double> (tableidx);
+      t[tableidx] = t[tableidx-1] + P;
+    }
+
+  while (i-- > 0)
+    {
+      double u = RUNI;
+
+      /* If u > 0.458 we know we can jump to floor(lambda) before
+       * comparing (this observation is based on Stadlober's winrand
+       * code). For lambda >= 1, this will be a win.  Lambda < 1
+       * is already fast, so adding an extra comparison is not a
+       * problem. */
+      int k = (u > 0.458 ? intlambda : 0);
+
+      /* We aren't using a for loop here because when we find the
+       * right k we want to jump to the next iteration of the
+       * outer loop, and the continue statement will only work for
+       * the inner loop. */
+    nextk:
+      if (u <= t[k])
+        {
+          p[i] = static_cast<double> (k);
+          continue;
+        }
+      if (++k < tableidx)
+        goto nextk;
+
+      /* We only need high values of the table very rarely so we
+       * don't automatically compute the entire table. */
+      while (tableidx < TABLESIZE)
+        {
+          P = P*lambda/static_cast<double> (tableidx);
+          t[tableidx] = t[tableidx-1] + P;
+          /* Make sure we converge to 1.0 just in case u is uniform
+           * on [0,1] rather than [0,1). */
+          if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0;
+          tableidx++;
+          if (u <= t[tableidx-1]) break;
+        }
+
+      /* We are assuming that the table size is big enough here.
+       * This should be true even if RUNI is returning values in
+       * the range [0,1] rather than [0,1). */
+      p[i] = static_cast<double> (tableidx-1);
+    }
+}
+
+static void
+poisson_cdf_lookup_float (double lambda, float *p, size_t n)
+{
+  double t[TABLESIZE];
+
+  /* Precompute the table for the u up to and including 0.458.
+   * We will almost certainly need it. */
+  int intlambda = static_cast<int> (floor (lambda));
+  double P;
+  int tableidx;
+  size_t i = n;
+
+  t[0] = P = exp (-lambda);
+  for (tableidx = 1; tableidx <= intlambda; tableidx++)
+    {
+      P = P*lambda/static_cast<double> (tableidx);
+      t[tableidx] = t[tableidx-1] + P;
+    }
+
+  while (i-- > 0)
+    {
+      double u = RUNI;
+      int k = (u > 0.458 ? intlambda : 0);
+    nextk:
+      if (u <= t[k])
+        {
+          p[i] = static_cast<float> (k);
+          continue;
+        }
+      if (++k < tableidx)
+        goto nextk;
+
+      while (tableidx < TABLESIZE)
+        {
+          P = P*lambda/static_cast<double> (tableidx);
+          t[tableidx] = t[tableidx-1] + P;
+          if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0;
+          tableidx++;
+          if (u <= t[tableidx-1]) break;
+        }
+
+      p[i] = static_cast<float> (tableidx-1);
+    }
+}
+
+/* From Press, et al., Numerical Recipes */
+static void
+poisson_rejection (double lambda, double *p, size_t n)
+{
+  double sq = sqrt (2.0*lambda);
+  double alxm = log (lambda);
+  double g = lambda*alxm - LGAMMA(lambda+1.0);
+  size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      double y, em, t;
+      do
+        {
+          do
+            {
+              y = tan (M_PI*RUNI);
+              em = sq * y + lambda;
+            } while (em < 0.0);
+          em = floor (em);
+          t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g);
+        } while (RUNI > t);
+      p[i] = em;
+    }
+}
+
+/* From Press, et al., Numerical Recipes */
+static void
+poisson_rejection_float (double lambda, float *p, size_t n)
+{
+  double sq = sqrt (2.0*lambda);
+  double alxm = log (lambda);
+  double g = lambda*alxm - LGAMMA(lambda+1.0);
+  size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      double y, em, t;
+      do
+        {
+          do
+            {
+              y = tan (M_PI*RUNI);
+              em = sq * y + lambda;
+            } while (em < 0.0);
+          em = floor (em);
+          t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g);
+        } while (RUNI > t);
+      p[i] = em;
+    }
+}
+
+/* The cutoff of L <= 1e8 in the following two functions before using
+ * the normal approximation is based on:
+ *   > L=1e8; x=floor(linspace(0,2*L,1000));
+ *   > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L)))
+ *   ans = 1.1376e-28
+ * For L=1e7, the max is around 1e-9, which is within the step size of RUNI.
+ * For L>1e10 the pprsc function breaks down, as I saw from the histogram
+ * of a large sample, so 1e8 is both small enough and large enough. */
+
+/* Generate a set of poisson numbers with the same distribution */
+void
+oct_fill_randp (double L, octave_idx_type n, double *p)
+{
+  octave_idx_type i;
+  if (L < 0.0 || INFINITE(L))
+    {
+      for (i=0; i<n; i++)
+        p[i] = NAN;
+    }
+  else if (L <= 10.0)
+    {
+      poisson_cdf_lookup (L, p, n);
+    }
+  else if (L <= 1e8)
+    {
+      for (i=0; i<n; i++)
+        p[i] = pprsc (L);
+    }
+  else
+    {
+      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
+      const double sqrtL = sqrt (L);
+      for (i = 0; i < n; i++)
+        {
+          p[i] = floor (RNOR*sqrtL + L + 0.5);
+          if (p[i] < 0.0)
+            p[i] = 0.0; /* will probably never happen */
+        }
+    }
+}
+
+/* Generate one poisson variate */
+double
+oct_randp (double L)
+{
+  double ret;
+  if (L < 0.0) ret = NAN;
+  else if (L <= 12.0)
+    {
+      /* From Press, et al. Numerical recipes */
+      double g = exp (-L);
+      int em = -1;
+      double t = 1.0;
+      do
+        {
+          ++em;
+          t *= RUNI;
+        } while (t > g);
+      ret = em;
+    }
+  else if (L <= 1e8)
+    {
+      /* numerical recipes */
+      poisson_rejection (L, &ret, 1);
+    }
+  else if (INFINITE(L))
+    {
+      /* FIXME: R uses NaN, but the normal approximation suggests that
+       * limit should be Inf.  Which is correct? */
+      ret = NAN;
+    }
+  else
+    {
+      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
+      ret = floor (RNOR*sqrt (L) + L + 0.5);
+      if (ret < 0.0) ret = 0.0; /* will probably never happen */
+    }
+  return ret;
+}
+
+/* Generate a set of poisson numbers with the same distribution */
+void
+oct_fill_float_randp (float FL, octave_idx_type n, float *p)
+{
+  double L = FL;
+  octave_idx_type i;
+  if (L < 0.0 || INFINITE(L))
+    {
+      for (i=0; i<n; i++)
+        p[i] = NAN;
+    }
+  else if (L <= 10.0)
+    {
+      poisson_cdf_lookup_float (L, p, n);
+    }
+  else if (L <= 1e8)
+    {
+      for (i=0; i<n; i++)
+        p[i] = pprsc (L);
+    }
+  else
+    {
+      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
+      const double sqrtL = sqrt (L);
+      for (i = 0; i < n; i++)
+        {
+          p[i] = floor (RNOR*sqrtL + L + 0.5);
+          if (p[i] < 0.0)
+            p[i] = 0.0; /* will probably never happen */
+        }
+    }
+}
+
+/* Generate one poisson variate */
+float
+oct_float_randp (float FL)
+{
+  double L = FL;
+  float ret;
+  if (L < 0.0) ret = NAN;
+  else if (L <= 12.0)
+    {
+      /* From Press, et al. Numerical recipes */
+      double g = exp (-L);
+      int em = -1;
+      double t = 1.0;
+      do
+        {
+          ++em;
+          t *= RUNI;
+        } while (t > g);
+      ret = em;
+    }
+  else if (L <= 1e8)
+    {
+      /* numerical recipes */
+      poisson_rejection_float (L, &ret, 1);
+    }
+  else if (INFINITE(L))
+    {
+      /* FIXME: R uses NaN, but the normal approximation suggests that
+       * limit should be Inf. Which is correct? */
+      ret = NAN;
+    }
+  else
+    {
+      /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
+      ret = floor (RNOR*sqrt (L) + L + 0.5);
+      if (ret < 0.0) ret = 0.0; /* will probably never happen */
+    }
+  return ret;
+}
--- a/liboctave/numeric/randpoisson.h	Mon May 16 23:26:07 2016 -0400
+++ b/liboctave/numeric/randpoisson.h	Mon May 16 23:20:50 2016 -0400
@@ -28,10 +28,6 @@

 #include "octave-config.h"

-#ifdef __cplusplus
-extern "C" {
-#endif
-
 extern OCTAVE_API double
 oct_randp (double L);

@@ -44,8 +40,4 @@
 extern OCTAVE_API void
 oct_fill_float_randp (float L, octave_idx_type n, float *p);

-#ifdef __cplusplus
-}
 #endif
-
-#endif