Mercurial > octave
changeset 21720:b28c26ae737c
use C++ for random number generator sources
* randgamma.cc: Rename from randgamma.c. Update for C++.
* randmtzig.cc: Rename from randmtzig.c. Update for C++.
* randpoisson.cc: Rename from randpoisson.c. Update for C++.
* randgamma.h, randmtzig.h, randpoisson.h: Update for C++.
* liboctave/numeric/module.mk: Update.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 16 May 2016 23:20:50 -0400 |
parents | ff054947d132 |
children | bcc30b45a225 |
files | liboctave/numeric/module.mk liboctave/numeric/randgamma.c liboctave/numeric/randgamma.cc liboctave/numeric/randgamma.h liboctave/numeric/randmtzig.c liboctave/numeric/randmtzig.cc liboctave/numeric/randmtzig.h liboctave/numeric/randpoisson.c liboctave/numeric/randpoisson.cc liboctave/numeric/randpoisson.h |
diffstat | 10 files changed, 1698 insertions(+), 1729 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/numeric/module.mk Mon May 16 23:26:07 2016 -0400 +++ b/liboctave/numeric/module.mk Mon May 16 23:20:50 2016 -0400 @@ -55,11 +55,6 @@ liboctave/numeric/sparse-qr.h \ liboctave/numeric/svd.h -NUMERIC_C_SRC = \ - liboctave/numeric/randgamma.c \ - liboctave/numeric/randmtzig.c \ - liboctave/numeric/randpoisson.c - NUMERIC_SRC = \ liboctave/numeric/CollocWt.cc \ liboctave/numeric/DASPK.cc \ @@ -85,13 +80,15 @@ liboctave/numeric/oct-spparms.cc \ liboctave/numeric/qr.cc \ liboctave/numeric/qrp.cc \ + liboctave/numeric/randgamma.cc \ + liboctave/numeric/randmtzig.cc \ + liboctave/numeric/randpoisson.cc \ liboctave/numeric/schur.cc \ liboctave/numeric/sparse-chol.cc \ liboctave/numeric/sparse-dmsolve.cc \ liboctave/numeric/sparse-lu.cc \ liboctave/numeric/sparse-qr.cc \ - liboctave/numeric/svd.cc \ - $(NUMERIC_C_SRC) + liboctave/numeric/svd.cc LIBOCTAVE_TEMPLATE_SRC += \ liboctave/numeric/bsxfun-defs.cc
--- a/liboctave/numeric/randgamma.c Mon May 16 23:26:07 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,201 +0,0 @@ -/* - -Copyright (C) 2006-2015 John W. Eaton - -This file is part of Octave. - -Octave is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -Octave is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with Octave; see the file COPYING. If not, see -<http://www.gnu.org/licenses/>. - -*/ - -/* Original version written by Paul Kienzle distributed as free - software in the in the public domain. */ - -/* - -double randg (a) -void fill_randg (a,n,x) - -Generate a series of standard gamma distributions. - -See: Marsaglia G and Tsang W (2000), "A simple method for generating -gamma variables", ACM Transactions on Mathematical Software 26(3) 363-372 - -Needs the following defines: -* NAN: value to return for Not-A-Number -* RUNI: uniform generator on (0,1) -* RNOR: normal generator -* REXP: exponential generator, or -log(RUNI) if one isn't available -* INFINITE: function to test whether a value is infinite - -Test using: - mean = a - variance = a - skewness = 2/sqrt(a) - kurtosis = 3 + 6/sqrt(a) - -Note that randg can be used to generate many distributions: - -gamma(a,b) for a>0, b>0 (from R) - r = b*randg(a) -beta(a,b) for a>0, b>0 - r1 = randg(a,1) - r = r1 / (r1 + randg(b,1)) -Erlang(a,n) - r = a*randg(n) -chisq(df) for df>0 - r = 2*randg(df/2) -t(df) for 0<df<inf (use randn if df is infinite) - r = randn () / sqrt(2*randg(df/2)/df) -F(n1,n2) for 0<n1, 0<n2 - r1 = 2*randg(n1/2)/n1 or 1 if n1 is infinite - r2 = 2*randg(n2/2)/n2 or 1 if n2 is infinite - r = r1 / r2 -negative binonial (n, p) for n>0, 0<p<=1 - r = randp((1-p)/p * randg(n)) - (from R, citing Devroye(1986), Non-Uniform Random Variate Generation) -non-central chisq(df,L), for df>=0 and L>0 (use chisq if L=0) - r = randp(L/2) - r(r>0) = 2*randg(r(r>0)) - r(df>0) += 2*randg(df(df>0)/2) - (from R, citing formula 29.5b-c in Johnson, Kotz, Balkrishnan(1995)) -Dirichlet(a1,...,ak) for ai > 0 - r = (randg(a1),...,randg(ak)) - r = r / sum(r) - (from GSL, citing Law & Kelton(1991), Simulation Modeling and Analysis) -*/ - -#if defined (HAVE_CONFIG_H) -# include "config.h" -#endif - -#include <stdio.h> - -#include "lo-ieee.h" -#include "lo-math.h" -#include "randmtzig.h" -#include "randgamma.h" - -#undef NAN -#define NAN octave_NaN -#define INFINITE lo_ieee_isinf -#define RUNI oct_randu() -#define RNOR oct_randn() -#define REXP oct_rande() - -void -oct_fill_randg (double a, octave_idx_type n, double *r) -{ - octave_idx_type i; - /* If a < 1, start by generating gamma (1+a) */ - const double d = (a < 1. ? 1.+a : a) - 1./3.; - const double c = 1./sqrt (9.*d); - - /* Handle invalid cases */ - if (a <= 0 || INFINITE(a)) - { - for (i=0; i < n; i++) - r[i] = NAN; - return; - } - - for (i=0; i < n; i++) - { - double x, xsq, v, u; - restart: - x = RNOR; - v = (1+c*x); - v *= v*v; - if (v <= 0) - goto restart; /* rare, so don't bother moving up */ - u = RUNI; - xsq = x*x; - if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v))) - goto restart; - r[i] = d*v; - } - if (a < 1) - { - /* Use gamma(a) = gamma(1+a)*U^(1/a) */ - /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */ - for (i = 0; i < n; i++) - r[i] *= exp (-REXP/a); - } -} - -double -oct_randg (double a) -{ - double ret; - oct_fill_randg (a,1,&ret); - return ret; -} - -#undef NAN -#undef RUNI -#undef RNOR -#undef REXP -#define NAN octave_Float_NaN -#define RUNI oct_float_randu() -#define RNOR oct_float_randn() -#define REXP oct_float_rande() - -void -oct_fill_float_randg (float a, octave_idx_type n, float *r) -{ - octave_idx_type i; - /* If a < 1, start by generating gamma(1+a) */ - const float d = (a < 1. ? 1.+a : a) - 1./3.; - const float c = 1./sqrt (9.*d); - - /* Handle invalid cases */ - if (a <= 0 || INFINITE(a)) - { - for (i=0; i < n; i++) - r[i] = NAN; - return; - } - - for (i=0; i < n; i++) - { - float x, xsq, v, u; - frestart: - x = RNOR; - v = (1+c*x); - v *= v*v; - if (v <= 0) - goto frestart; /* rare, so don't bother moving up */ - u = RUNI; - xsq = x*x; - if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v))) - goto frestart; - r[i] = d*v; - } - if (a < 1) - { - /* Use gamma(a) = gamma(1+a)*U^(1/a) */ - /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */ - for (i = 0; i < n; i++) - r[i] *= exp (-REXP/a); - } -} - -float -oct_float_randg (float a) -{ - float ret; - oct_fill_float_randg (a,1,&ret); - return ret; -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/numeric/randgamma.cc Mon May 16 23:20:50 2016 -0400 @@ -0,0 +1,199 @@ +/* + +Copyright (C) 2006-2015 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<http://www.gnu.org/licenses/>. + +*/ + +/* Original version written by Paul Kienzle distributed as free + software in the in the public domain. */ + +/* + +double randg (a) +void fill_randg (a,n,x) + +Generate a series of standard gamma distributions. + +See: Marsaglia G and Tsang W (2000), "A simple method for generating +gamma variables", ACM Transactions on Mathematical Software 26(3) 363-372 + +Needs the following defines: +* NAN: value to return for Not-A-Number +* RUNI: uniform generator on (0,1) +* RNOR: normal generator +* REXP: exponential generator, or -log(RUNI) if one isn't available +* INFINITE: function to test whether a value is infinite + +Test using: + mean = a + variance = a + skewness = 2/sqrt(a) + kurtosis = 3 + 6/sqrt(a) + +Note that randg can be used to generate many distributions: + +gamma(a,b) for a>0, b>0 (from R) + r = b*randg(a) +beta(a,b) for a>0, b>0 + r1 = randg(a,1) + r = r1 / (r1 + randg(b,1)) +Erlang(a,n) + r = a*randg(n) +chisq(df) for df>0 + r = 2*randg(df/2) +t(df) for 0<df<inf (use randn if df is infinite) + r = randn () / sqrt(2*randg(df/2)/df) +F(n1,n2) for 0<n1, 0<n2 + r1 = 2*randg(n1/2)/n1 or 1 if n1 is infinite + r2 = 2*randg(n2/2)/n2 or 1 if n2 is infinite + r = r1 / r2 +negative binonial (n, p) for n>0, 0<p<=1 + r = randp((1-p)/p * randg(n)) + (from R, citing Devroye(1986), Non-Uniform Random Variate Generation) +non-central chisq(df,L), for df>=0 and L>0 (use chisq if L=0) + r = randp(L/2) + r(r>0) = 2*randg(r(r>0)) + r(df>0) += 2*randg(df(df>0)/2) + (from R, citing formula 29.5b-c in Johnson, Kotz, Balkrishnan(1995)) +Dirichlet(a1,...,ak) for ai > 0 + r = (randg(a1),...,randg(ak)) + r = r / sum(r) + (from GSL, citing Law & Kelton(1991), Simulation Modeling and Analysis) +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include "lo-ieee.h" +#include "lo-math.h" +#include "randmtzig.h" +#include "randgamma.h" + +#undef NAN +#define NAN octave_NaN +#define INFINITE lo_ieee_isinf +#define RUNI oct_randu() +#define RNOR oct_randn() +#define REXP oct_rande() + +void +oct_fill_randg (double a, octave_idx_type n, double *r) +{ + octave_idx_type i; + /* If a < 1, start by generating gamma (1+a) */ + const double d = (a < 1. ? 1.+a : a) - 1./3.; + const double c = 1./sqrt (9.*d); + + /* Handle invalid cases */ + if (a <= 0 || INFINITE(a)) + { + for (i=0; i < n; i++) + r[i] = NAN; + return; + } + + for (i=0; i < n; i++) + { + double x, xsq, v, u; + restart: + x = RNOR; + v = (1+c*x); + v *= v*v; + if (v <= 0) + goto restart; /* rare, so don't bother moving up */ + u = RUNI; + xsq = x*x; + if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v))) + goto restart; + r[i] = d*v; + } + if (a < 1) + { + /* Use gamma(a) = gamma(1+a)*U^(1/a) */ + /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */ + for (i = 0; i < n; i++) + r[i] *= exp (-REXP/a); + } +} + +double +oct_randg (double a) +{ + double ret; + oct_fill_randg (a,1,&ret); + return ret; +} + +#undef NAN +#undef RUNI +#undef RNOR +#undef REXP +#define NAN octave_Float_NaN +#define RUNI oct_float_randu() +#define RNOR oct_float_randn() +#define REXP oct_float_rande() + +void +oct_fill_float_randg (float a, octave_idx_type n, float *r) +{ + octave_idx_type i; + /* If a < 1, start by generating gamma(1+a) */ + const float d = (a < 1. ? 1.+a : a) - 1./3.; + const float c = 1./sqrt (9.*d); + + /* Handle invalid cases */ + if (a <= 0 || INFINITE(a)) + { + for (i=0; i < n; i++) + r[i] = NAN; + return; + } + + for (i=0; i < n; i++) + { + float x, xsq, v, u; + frestart: + x = RNOR; + v = (1+c*x); + v *= v*v; + if (v <= 0) + goto frestart; /* rare, so don't bother moving up */ + u = RUNI; + xsq = x*x; + if (u >= 1.-0.0331*xsq*xsq && log (u) >= 0.5*xsq + d*(1-v+log (v))) + goto frestart; + r[i] = d*v; + } + if (a < 1) + { + /* Use gamma(a) = gamma(1+a)*U^(1/a) */ + /* Given REXP = -log(U) then U^(1/a) = exp(-REXP/a) */ + for (i = 0; i < n; i++) + r[i] *= exp (-REXP/a); + } +} + +float +oct_float_randg (float a) +{ + float ret; + oct_fill_float_randg (a,1,&ret); + return ret; +}
--- a/liboctave/numeric/randgamma.h Mon May 16 23:26:07 2016 -0400 +++ b/liboctave/numeric/randgamma.h Mon May 16 23:20:50 2016 -0400 @@ -28,10 +28,6 @@ #include "octave-config.h" -#ifdef __cplusplus -extern "C" { -#endif - extern OCTAVE_API double oct_randg (double a); extern OCTAVE_API void oct_fill_randg (double a, octave_idx_type n, double *p); @@ -39,8 +35,4 @@ extern OCTAVE_API void oct_fill_float_randg (float a, octave_idx_type n, float *p); -#ifdef __cplusplus -} #endif - -#endif
--- a/liboctave/numeric/randmtzig.c Mon May 16 23:26:07 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,870 +0,0 @@ -/* - -Copyright (C) 2006-2015 John W. Eaton - -This file is part of Octave. - -Octave is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -Octave is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with Octave; see the file COPYING. If not, see -<http://www.gnu.org/licenses/>. - -*/ - -/* - A C-program for MT19937, with initialization improved 2002/2/10. - Coded by Takuji Nishimura and Makoto Matsumoto. - This is a faster version by taking Shawn Cokus's optimization, - Matthe Bellew's simplification, Isaku Wada's real version. - David Bateman added normal and exponential distributions following - Marsaglia and Tang's Ziggurat algorithm. - - Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, - Copyright (C) 2004, David Bateman - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - Any feedback is very welcome. - http://www.math.keio.ac.jp/matumoto/emt.html - email: matumoto@math.keio.ac.jp - - * 2004-01-19 Paul Kienzle - * * comment out main - * add init_by_entropy, get_state, set_state - * * converted to allow compiling by C++ compiler - * - * 2004-01-25 David Bateman - * * Add Marsaglia and Tsang Ziggurat code - * - * 2004-07-13 Paul Kienzle - * * make into an independent library with some docs. - * * introduce new main and test code. - * - * 2004-07-28 Paul Kienzle & David Bateman - * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa - * * make the naming scheme more uniform - * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. - * - * 2005-02-23 Paul Kienzle - * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control - * - * 2006-04-01 David Bateman - * * convert for use in octave, declaring static functions only used - * here and adding oct_ to functions visible externally - * * inverse sense of ALLBITS - * - * 2012-05-18 David Bateman - * * Remove randu64 and ALLBIT option - * * Add the single precision generators - */ - -/* - === Build instructions === - - Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is - available. This is not necessary if your architecture has - /dev/urandom defined. - - Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. - You can force X86 behavior with -DUSE_X86_32=1, or suppress it with - -DUSE_X86_32=0. You should also consider -march=i686 or similar for - extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit - x86 architectures. - - If you want to replace the Mersenne Twister with another - generator then redefine randi32 appropriately. - - === Usage instructions === - Before using any of the generators, initialize the state with one of - oct_init_by_int, oct_init_by_array or oct_init_by_entropy. - - All generators share the same state vector. - - === Mersenne Twister === - void oct_init_by_int (uint32_t s) 32-bit initial state - void oct_init_by_array (uint32_t k[],int m) m*32-bit initial state - void oct_init_by_entropy (void) random initial state - void oct_get_state (uint32_t save[MT_N+1]) saves state in array - void oct_set_state (uint32_t save[MT_N+1]) restores state from array - static uint32_t randmt (void) returns 32-bit unsigned int - - === inline generators === - static uint32_t randi32 (void) returns 32-bit unsigned int - static uint64_t randi53 (void) returns 53-bit unsigned int - static uint64_t randi54 (void) returns 54-bit unsigned int - static float randu32 (void) returns 32-bit uniform in (0,1) - static double randu53 (void) returns 53-bit uniform in (0,1) - - double oct_randu (void) returns M-bit uniform in (0,1) - double oct_randn (void) returns M-bit standard normal - double oct_rande (void) returns N-bit standard exponential - - float oct_float_randu (void) returns M-bit uniform in (0,1) - float oct_float_randn (void) returns M-bit standard normal - float oct_float_rande (void) returns N-bit standard exponential - - === Array generators === - void oct_fill_randi32 (octave_idx_type, uint32_t []) - void oct_fill_randi64 (octave_idx_type, uint64_t []) - - void oct_fill_randu (octave_idx_type, double []) - void oct_fill_randn (octave_idx_type, double []) - void oct_fill_rande (octave_idx_type, double []) - - void oct_fill_float_randu (octave_idx_type, float []) - void oct_fill_float_randn (octave_idx_type, float []) - void oct_fill_float_rande (octave_idx_type, float []) -*/ - -#if defined (HAVE_CONFIG_H) -# include "config.h" -#endif - -#include <stdio.h> -#include <time.h> - -#ifdef HAVE_GETTIMEOFDAY -# include <sys/time.h> -#endif - -#include "lo-math.h" -#include "randmtzig.h" - -/* FIXME: may want to suppress X86 if sizeof(long) > 4 */ -#if ! defined (USE_X86_32) -# if defined (i386) || defined (HAVE_X86_32) -# define USE_X86_32 1 -# else -# define USE_X86_32 0 -# endif -#endif - -/* ===== Mersenne Twister 32-bit generator ===== */ - -#define MT_M 397 -#define MATRIX_A 0x9908b0dfUL /* constant vector a */ -#define UMASK 0x80000000UL /* most significant w-r bits */ -#define LMASK 0x7fffffffUL /* least significant r bits */ -#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) -#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) - -static uint32_t *next; -static uint32_t state[MT_N]; /* the array for the state vector */ -static int left = 1; -static int initf = 0; -static int initt = 1; -static int inittf = 1; - -/* initializes state[MT_N] with a seed */ -void -oct_init_by_int (uint32_t s) -{ - int j; - state[0] = s & 0xffffffffUL; - for (j = 1; j < MT_N; j++) - { - state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array state[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - state[j] &= 0xffffffffUL; /* for >32 bit machines */ - } - left = 1; - initf = 1; -} - -/* initialize by an array with array-length */ -/* init_key is the array for initializing keys */ -/* key_length is its length */ -void -oct_init_by_array (uint32_t *init_key, int key_length) -{ - int i, j, k; - oct_init_by_int (19650218UL); - i = 1; - j = 0; - k = (MT_N > key_length ? MT_N : key_length); - for (; k; k--) - { - state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) - + init_key[j] + j; /* non linear */ - state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; - j++; - if (i >= MT_N) - { - state[0] = state[MT_N-1]; - i = 1; - } - if (j >= key_length) - j = 0; - } - for (k = MT_N - 1; k; k--) - { - state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) - - i; /* non linear */ - state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; - if (i >= MT_N) - { - state[0] = state[MT_N-1]; - i = 1; - } - } - - state[0] = 0x80000000UL; /* MSB is 1; assuring nonzero initial array */ - left = 1; - initf = 1; -} - -void -oct_init_by_entropy (void) -{ - uint32_t entropy[MT_N]; - int n = 0; - - /* Look for entropy in /dev/urandom */ - FILE* urandom = fopen ("/dev/urandom", "rb"); - if (urandom) - { - while (n < MT_N) - { - unsigned char word[4]; - if (fread (word, 4, 1, urandom) != 1) - break; - entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+((uint32_t)word[3]<<24); - } - fclose (urandom); - } - - /* If there isn't enough entropy, gather some from various sources */ - if (n < MT_N) - entropy[n++] = time (NULL); /* Current time in seconds */ - if (n < MT_N) - entropy[n++] = clock (); /* CPU time used (usec) */ -#ifdef HAVE_GETTIMEOFDAY - if (n < MT_N) - { - struct timeval tv; - if (gettimeofday (&tv, NULL) != -1) - entropy[n++] = tv.tv_usec; /* Fractional part of current time */ - } -#endif - /* Send all the entropy into the initial state vector */ - oct_init_by_array (entropy,n); -} - -void -oct_set_state (uint32_t *save) -{ - int i; - for (i = 0; i < MT_N; i++) - state[i] = save[i]; - left = save[MT_N]; - next = state + (MT_N - left + 1); -} - -void -oct_get_state (uint32_t *save) -{ - int i; - for (i = 0; i < MT_N; i++) - save[i] = state[i]; - save[MT_N] = left; -} - -static void -next_state (void) -{ - uint32_t *p = state; - int j; - - /* if init_by_int() has not been called, */ - /* a default initial seed is used */ - /* if (initf==0) init_by_int(5489UL); */ - /* Or better yet, a random seed! */ - if (initf == 0) - oct_init_by_entropy (); - - left = MT_N; - next = state; - - for (j = MT_N - MT_M + 1; --j; p++) - *p = p[MT_M] ^ TWIST(p[0], p[1]); - - for (j = MT_M; --j; p++) - *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); - - *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]); -} - -/* generates a random number on [0,0xffffffff]-interval */ -static uint32_t -randmt (void) -{ - uint32_t y; - - if (--left == 0) - next_state (); - y = *next++; - - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & 0x9d2c5680UL; - y ^= (y << 15) & 0xefc60000UL; - return (y ^ (y >> 18)); -} - -/* ===== Uniform generators ===== */ - -/* Select which 32 bit generator to use */ -#define randi32 randmt - -static uint64_t -randi53 (void) -{ - const uint32_t lo = randi32 (); - const uint32_t hi = randi32 () & 0x1FFFFF; -#ifdef HAVE_X86_32 - uint64_t u; - uint32_t *p = (uint32_t *)&u; - p[0] = lo; - p[1] = hi; - return u; -#else - return (( (uint64_t)hi << 32) | lo); -#endif -} - -static uint64_t -randi54 (void) -{ - const uint32_t lo = randi32 (); - const uint32_t hi = randi32 () & 0x3FFFFF; -#ifdef HAVE_X86_32 - uint64_t u; - uint32_t *p = (uint32_t *)&u; - p[0] = lo; - p[1] = hi; - return u; -#else - return (( (uint64_t)hi << 32 ) | lo); -#endif -} - -/* generates a random number on (0,1)-real-interval */ -static float -randu32 (void) -{ - return ((float)randi32 () + 0.5) * (1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on (0,1) with 53-bit resolution */ -static double -randu53 (void) -{ - const uint32_t a = randi32 () >> 5; - const uint32_t b = randi32 () >> 6; - return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0); -} - -/* Determine mantissa for uniform doubles */ -double -oct_randu (void) -{ - return randu53 (); -} - -/* Determine mantissa for uniform floats */ -float -oct_float_randu (void) -{ - return randu32 (); -} - -/* ===== Ziggurat normal and exponential generators ===== */ - -#define ZIGGURAT_TABLE_SIZE 256 - -#define ZIGGURAT_NOR_R 3.6541528853610088 -#define ZIGGURAT_NOR_INV_R 0.27366123732975828 -#define NOR_SECTION_AREA 0.00492867323399 - -#define ZIGGURAT_EXP_R 7.69711747013104972 -#define ZIGGURAT_EXP_INV_R 0.129918765548341586 -#define EXP_SECTION_AREA 0.0039496598225815571993 - -#define ZIGINT uint64_t -#define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ -#define ERANDI randi53() /* 53 bits for mantissa */ -#define NMANTISSA EMANTISSA -#define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */ -#define RANDU randu53() - -static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; -static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; -static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; -static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; - -/* -This code is based on the paper Marsaglia and Tsang, "The ziggurat method -for generating random variables", Journ. Statistical Software. Code was -presented in this paper for a Ziggurat of 127 levels and using a 32 bit -integer random number generator. This version of the code, uses the -Mersenne Twister as the integer generator and uses 256 levels in the -Ziggurat. This has several advantages. - - 1) As Marsaglia and Tsang themselves states, the more levels the few - times the expensive tail algorithm must be called - 2) The cycle time of the generator is determined by the integer - generator, thus the use of a Mersenne Twister for the core random - generator makes this cycle extremely long. - 3) The license on the original code was unclear, thus rewriting the code - from the article means we are free of copyright issues. - 4) Compile flag for full 53-bit random mantissa. - -It should be stated that the authors made my life easier, by the fact that -the algorithm developed in the text of the article is for a 256 level -ziggurat, even if the code itself isn't... - -One modification to the algorithm developed in the article, is that it is -assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in -terms like 2^32 in the code. As the normal distribution is defined between --Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus -in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for -this term. The exponential distribution is one sided so we use the -full 32 bits. We use EMANTISSA for this term. - -It appears that I'm slightly slower than the code in the article, this -is partially due to a better generator of random integers than they -use. But might also be that the case of rapid return was optimized by -inlining the relevant code with a #define. As the basic Mersenne -Twister is only 25% faster than this code I suspect that the main -reason is just the use of the Mersenne Twister and not the inlining, -so I'm not going to try and optimize further. -*/ - -static void -create_ziggurat_tables (void) -{ - int i; - double x, x1; - - /* Ziggurat tables for the normal distribution */ - x1 = ZIGGURAT_NOR_R; - wi[255] = x1 / NMANTISSA; - fi[255] = exp (-0.5 * x1 * x1); - - /* Index zero is special for tail strip, where Marsaglia and Tsang - * defines this as - * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, - * where v is the area of each strip of the ziggurat. - */ - ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); - wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; - fi[0] = 1.; - - for (i = 254; i > 0; i--) - { - /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus - * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) - */ - x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + fi[i+1])); - ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); - wi[i] = x / NMANTISSA; - fi[i] = exp (-0.5 * x * x); - x1 = x; - } - - ki[1] = 0; - - /* Zigurrat tables for the exponential distribution */ - x1 = ZIGGURAT_EXP_R; - we[255] = x1 / EMANTISSA; - fe[255] = exp (-x1); - - /* Index zero is special for tail strip, where Marsaglia and Tsang - * defines this as - * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, - * where v is the area of each strip of the ziggurat. - */ - ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); - we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; - fe[0] = 1.; - - for (i = 254; i > 0; i--) - { - /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus - * need inverse operator of y = exp(-x) -> x = -ln(y) - */ - x = - log (EXP_SECTION_AREA / x1 + fe[i+1]); - ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); - we[i] = x / EMANTISSA; - fe[i] = exp (-x); - x1 = x; - } - ke[1] = 0; - - initt = 0; -} - -/* - * Here is the guts of the algorithm. As Marsaglia and Tsang state the - * algorithm in their paper - * - * 1) Calculate a random signed integer j and let i be the index - * provided by the rightmost 8-bits of j - * 2) Set x = j * w_i. If j < k_i return x - * 3) If i = 0, then return x from the tail - * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x - * 5) goto step 1 - * - * Where f is the functional form of the distribution, which for a normal - * distribution is exp(-0.5*x*x) - */ - -double -oct_randn (void) -{ - if (initt) - create_ziggurat_tables (); - - while (1) - { - /* The following code is specialized for 32-bit mantissa. - * Compared to the arbitrary mantissa code, there is a performance - * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% - * There is a bigger performance gain compared to using a full - * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% - * Of course, different compilers and operating systems may - * have something to do with this. - */ -# ifdef HAVE_X86_32 - /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ - double x; - int si,idx; - uint32_t lo, hi; - int64_t rabs; - uint32_t *p = (uint32_t *)&rabs; - lo = randi32 (); - idx = lo & 0xFF; - hi = randi32 (); - si = hi & UMASK; - p[0] = lo; - p[1] = hi & 0x1FFFFF; - x = ( si ? -rabs : rabs ) * wi[idx]; -# else /* ! HAVE_X86_32 */ - /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ - const uint64_t r = NRANDI; - const int64_t rabs = r >> 1; - const int idx = (int)(rabs & 0xFF); - const double x = ( (r & 1) ? -rabs : rabs) * wi[idx]; -# endif /* ! HAVE_X86_32 */ - if (rabs < (int64_t)ki[idx]) - return x; /* 99.3% of the time we return here 1st try */ - else if (idx == 0) - { - /* As stated in Marsaglia and Tsang - * - * For the normal tail, the method of Marsaglia[5] provides: - * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, - * then return r+x. Except that r+x is always in the positive - * tail!!!! Any thing random might be used to determine the - * sign, but as we already have r we might as well use it - * - * [PAK] but not the bottom 8 bits, since they are all 0 here! - */ - double xx, yy; - do - { - xx = - ZIGGURAT_NOR_INV_R * log (RANDU); - yy = - log (RANDU); - } - while ( yy+yy <= xx*xx); - return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); - } - else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp (-0.5*x*x)) - return x; - } -} - -double -oct_rande (void) -{ - if (initt) - create_ziggurat_tables (); - - while (1) - { - ZIGINT ri = ERANDI; - const int idx = (int)(ri & 0xFF); - const double x = ri * we[idx]; - if (ri < ke[idx]) - return x; /* 98.9% of the time we return here 1st try */ - else if (idx == 0) - { - /* As stated in Marsaglia and Tsang - * - * For the exponential tail, the method of Marsaglia[5] provides: - * x = r - ln(U); - */ - return ZIGGURAT_EXP_R - log (RANDU); - } - else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp (-x)) - return x; - } -} - -#undef ZIGINT -#undef EMANTISSA -#undef ERANDI -#undef NMANTISSA -#undef NRANDI -#undef RANDU - -#define ZIGINT uint32_t -#define EMANTISSA 4294967296.0 /* 32 bit mantissa */ -#define ERANDI randi32() /* 32 bits for mantissa */ -#define NMANTISSA 2147483648.0 /* 31 bit mantissa */ -#define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */ -#define RANDU randu32() - -static ZIGINT fki[ZIGGURAT_TABLE_SIZE]; -static float fwi[ZIGGURAT_TABLE_SIZE], ffi[ZIGGURAT_TABLE_SIZE]; -static ZIGINT fke[ZIGGURAT_TABLE_SIZE]; -static float fwe[ZIGGURAT_TABLE_SIZE], ffe[ZIGGURAT_TABLE_SIZE]; - - -static void -create_ziggurat_float_tables (void) -{ - int i; - float x, x1; - - /* Ziggurat tables for the normal distribution */ - x1 = ZIGGURAT_NOR_R; - fwi[255] = x1 / NMANTISSA; - ffi[255] = exp (-0.5 * x1 * x1); - - /* Index zero is special for tail strip, where Marsaglia and Tsang - * defines this as - * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, - * where v is the area of each strip of the ziggurat. - */ - fki[0] = (ZIGINT) (x1 * ffi[255] / NOR_SECTION_AREA * NMANTISSA); - fwi[0] = NOR_SECTION_AREA / ffi[255] / NMANTISSA; - ffi[0] = 1.; - - for (i = 254; i > 0; i--) - { - /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus - * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) - */ - x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + ffi[i+1])); - fki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); - fwi[i] = x / NMANTISSA; - ffi[i] = exp (-0.5 * x * x); - x1 = x; - } - - fki[1] = 0; - - /* Zigurrat tables for the exponential distribution */ - x1 = ZIGGURAT_EXP_R; - fwe[255] = x1 / EMANTISSA; - ffe[255] = exp (-x1); - - /* Index zero is special for tail strip, where Marsaglia and Tsang - * defines this as - * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, - * where v is the area of each strip of the ziggurat. - */ - fke[0] = (ZIGINT) (x1 * ffe[255] / EXP_SECTION_AREA * EMANTISSA); - fwe[0] = EXP_SECTION_AREA / ffe[255] / EMANTISSA; - ffe[0] = 1.; - - for (i = 254; i > 0; i--) - { - /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus - * need inverse operator of y = exp(-x) -> x = -ln(y) - */ - x = - log (EXP_SECTION_AREA / x1 + ffe[i+1]); - fke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); - fwe[i] = x / EMANTISSA; - ffe[i] = exp (-x); - x1 = x; - } - fke[1] = 0; - - inittf = 0; -} - -/* - * Here is the guts of the algorithm. As Marsaglia and Tsang state the - * algorithm in their paper - * - * 1) Calculate a random signed integer j and let i be the index - * provided by the rightmost 8-bits of j - * 2) Set x = j * w_i. If j < k_i return x - * 3) If i = 0, then return x from the tail - * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x - * 5) goto step 1 - * - * Where f is the functional form of the distribution, which for a normal - * distribution is exp(-0.5*x*x) - */ - -float -oct_float_randn (void) -{ - if (inittf) - create_ziggurat_float_tables (); - - while (1) - { - /* 32-bit mantissa */ - const uint32_t r = randi32 (); - const uint32_t rabs = r & LMASK; - const int idx = (int)(r & 0xFF); - const float x = ((int32_t)r) * fwi[idx]; - if (rabs < fki[idx]) - return x; /* 99.3% of the time we return here 1st try */ - else if (idx == 0) - { - /* As stated in Marsaglia and Tsang - * - * For the normal tail, the method of Marsaglia[5] provides: - * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, - * then return r+x. Except that r+x is always in the positive - * tail!!!! Any thing random might be used to determine the - * sign, but as we already have r we might as well use it - * - * [PAK] but not the bottom 8 bits, since they are all 0 here! - */ - float xx, yy; - do - { - xx = - ZIGGURAT_NOR_INV_R * log (RANDU); - yy = - log (RANDU); - } - while ( yy+yy <= xx*xx); - return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); - } - else if ((ffi[idx-1] - ffi[idx]) * RANDU + ffi[idx] < exp (-0.5*x*x)) - return x; - } -} - -float -oct_float_rande (void) -{ - if (inittf) - create_ziggurat_float_tables (); - - while (1) - { - ZIGINT ri = ERANDI; - const int idx = (int)(ri & 0xFF); - const float x = ri * fwe[idx]; - if (ri < fke[idx]) - return x; /* 98.9% of the time we return here 1st try */ - else if (idx == 0) - { - /* As stated in Marsaglia and Tsang - * - * For the exponential tail, the method of Marsaglia[5] provides: - * x = r - ln(U); - */ - return ZIGGURAT_EXP_R - log (RANDU); - } - else if ((ffe[idx-1] - ffe[idx]) * RANDU + ffe[idx] < exp (-x)) - return x; - } -} - -/* Array generators */ -void -oct_fill_randu (octave_idx_type n, double *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_randu (); -} - -void -oct_fill_randn (octave_idx_type n, double *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_randn (); -} - -void -oct_fill_rande (octave_idx_type n, double *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_rande (); -} - -void -oct_fill_float_randu (octave_idx_type n, float *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_float_randu (); -} - -void -oct_fill_float_randn (octave_idx_type n, float *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_float_randn (); -} - -void -oct_fill_float_rande (octave_idx_type n, float *p) -{ - octave_idx_type i; - for (i = 0; i < n; i++) - p[i] = oct_float_rande (); -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/numeric/randmtzig.cc Mon May 16 23:20:50 2016 -0400 @@ -0,0 +1,870 @@ +/* + +Copyright (C) 2006-2015 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<http://www.gnu.org/licenses/>. + +*/ + +/* + A C-program for MT19937, with initialization improved 2002/2/10. + Coded by Takuji Nishimura and Makoto Matsumoto. + This is a faster version by taking Shawn Cokus's optimization, + Matthe Bellew's simplification, Isaku Wada's real version. + David Bateman added normal and exponential distributions following + Marsaglia and Tang's Ziggurat algorithm. + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + Copyright (C) 2004, David Bateman + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp + + * 2004-01-19 Paul Kienzle + * * comment out main + * add init_by_entropy, get_state, set_state + * * converted to allow compiling by C++ compiler + * + * 2004-01-25 David Bateman + * * Add Marsaglia and Tsang Ziggurat code + * + * 2004-07-13 Paul Kienzle + * * make into an independent library with some docs. + * * introduce new main and test code. + * + * 2004-07-28 Paul Kienzle & David Bateman + * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa + * * make the naming scheme more uniform + * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. + * + * 2005-02-23 Paul Kienzle + * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control + * + * 2006-04-01 David Bateman + * * convert for use in octave, declaring static functions only used + * here and adding oct_ to functions visible externally + * * inverse sense of ALLBITS + * + * 2012-05-18 David Bateman + * * Remove randu64 and ALLBIT option + * * Add the single precision generators + */ + +/* + === Build instructions === + + Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is + available. This is not necessary if your architecture has + /dev/urandom defined. + + Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. + You can force X86 behavior with -DUSE_X86_32=1, or suppress it with + -DUSE_X86_32=0. You should also consider -march=i686 or similar for + extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit + x86 architectures. + + If you want to replace the Mersenne Twister with another + generator then redefine randi32 appropriately. + + === Usage instructions === + Before using any of the generators, initialize the state with one of + oct_init_by_int, oct_init_by_array or oct_init_by_entropy. + + All generators share the same state vector. + + === Mersenne Twister === + void oct_init_by_int (uint32_t s) 32-bit initial state + void oct_init_by_array (uint32_t k[],int m) m*32-bit initial state + void oct_init_by_entropy (void) random initial state + void oct_get_state (uint32_t save[MT_N+1]) saves state in array + void oct_set_state (uint32_t save[MT_N+1]) restores state from array + static uint32_t randmt (void) returns 32-bit unsigned int + + === inline generators === + static uint32_t randi32 (void) returns 32-bit unsigned int + static uint64_t randi53 (void) returns 53-bit unsigned int + static uint64_t randi54 (void) returns 54-bit unsigned int + static float randu32 (void) returns 32-bit uniform in (0,1) + static double randu53 (void) returns 53-bit uniform in (0,1) + + double oct_randu (void) returns M-bit uniform in (0,1) + double oct_randn (void) returns M-bit standard normal + double oct_rande (void) returns N-bit standard exponential + + float oct_float_randu (void) returns M-bit uniform in (0,1) + float oct_float_randn (void) returns M-bit standard normal + float oct_float_rande (void) returns N-bit standard exponential + + === Array generators === + void oct_fill_randi32 (octave_idx_type, uint32_t []) + void oct_fill_randi64 (octave_idx_type, uint64_t []) + + void oct_fill_randu (octave_idx_type, double []) + void oct_fill_randn (octave_idx_type, double []) + void oct_fill_rande (octave_idx_type, double []) + + void oct_fill_float_randu (octave_idx_type, float []) + void oct_fill_float_randn (octave_idx_type, float []) + void oct_fill_float_rande (octave_idx_type, float []) +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include <cstdio> +#include <ctime> + +#ifdef HAVE_GETTIMEOFDAY +# include <sys/time.h> +#endif + +#include "lo-math.h" +#include "randmtzig.h" + +/* FIXME: may want to suppress X86 if sizeof(long) > 4 */ +#if ! defined (USE_X86_32) +# if defined (i386) || defined (HAVE_X86_32) +# define USE_X86_32 1 +# else +# define USE_X86_32 0 +# endif +#endif + +/* ===== Mersenne Twister 32-bit generator ===== */ + +#define MT_M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UMASK 0x80000000UL /* most significant w-r bits */ +#define LMASK 0x7fffffffUL /* least significant r bits */ +#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) +#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) + +static uint32_t *next; +static uint32_t state[MT_N]; /* the array for the state vector */ +static int left = 1; +static int initf = 0; +static int initt = 1; +static int inittf = 1; + +/* initializes state[MT_N] with a seed */ +void +oct_init_by_int (uint32_t s) +{ + int j; + state[0] = s & 0xffffffffUL; + for (j = 1; j < MT_N; j++) + { + state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array state[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + state[j] &= 0xffffffffUL; /* for >32 bit machines */ + } + left = 1; + initf = 1; +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +void +oct_init_by_array (uint32_t *init_key, int key_length) +{ + int i, j, k; + oct_init_by_int (19650218UL); + i = 1; + j = 0; + k = (MT_N > key_length ? MT_N : key_length); + for (; k; k--) + { + state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) + + init_key[j] + j; /* non linear */ + state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= MT_N) + { + state[0] = state[MT_N-1]; + i = 1; + } + if (j >= key_length) + j = 0; + } + for (k = MT_N - 1; k; k--) + { + state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) + - i; /* non linear */ + state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i >= MT_N) + { + state[0] = state[MT_N-1]; + i = 1; + } + } + + state[0] = 0x80000000UL; /* MSB is 1; assuring nonzero initial array */ + left = 1; + initf = 1; +} + +void +oct_init_by_entropy (void) +{ + uint32_t entropy[MT_N]; + int n = 0; + + /* Look for entropy in /dev/urandom */ + FILE* urandom = fopen ("/dev/urandom", "rb"); + if (urandom) + { + while (n < MT_N) + { + unsigned char word[4]; + if (fread (word, 4, 1, urandom) != 1) + break; + entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(static_cast<uint32_t>(word[3])<<24); + } + fclose (urandom); + } + + /* If there isn't enough entropy, gather some from various sources */ + if (n < MT_N) + entropy[n++] = time (0); /* Current time in seconds */ + if (n < MT_N) + entropy[n++] = clock (); /* CPU time used (usec) */ +#ifdef HAVE_GETTIMEOFDAY + if (n < MT_N) + { + struct timeval tv; + if (gettimeofday (&tv, 0) != -1) + entropy[n++] = tv.tv_usec; /* Fractional part of current time */ + } +#endif + /* Send all the entropy into the initial state vector */ + oct_init_by_array (entropy,n); +} + +void +oct_set_state (uint32_t *save) +{ + int i; + for (i = 0; i < MT_N; i++) + state[i] = save[i]; + left = save[MT_N]; + next = state + (MT_N - left + 1); +} + +void +oct_get_state (uint32_t *save) +{ + int i; + for (i = 0; i < MT_N; i++) + save[i] = state[i]; + save[MT_N] = left; +} + +static void +next_state (void) +{ + uint32_t *p = state; + int j; + + /* if init_by_int() has not been called, */ + /* a default initial seed is used */ + /* if (initf==0) init_by_int(5489UL); */ + /* Or better yet, a random seed! */ + if (initf == 0) + oct_init_by_entropy (); + + left = MT_N; + next = state; + + for (j = MT_N - MT_M + 1; --j; p++) + *p = p[MT_M] ^ TWIST(p[0], p[1]); + + for (j = MT_M; --j; p++) + *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); + + *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]); +} + +/* generates a random number on [0,0xffffffff]-interval */ +static uint32_t +randmt (void) +{ + uint32_t y; + + if (--left == 0) + next_state (); + y = *next++; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + return (y ^ (y >> 18)); +} + +/* ===== Uniform generators ===== */ + +/* Select which 32 bit generator to use */ +#define randi32 randmt + +static uint64_t +randi53 (void) +{ + const uint32_t lo = randi32 (); + const uint32_t hi = randi32 () & 0x1FFFFF; +#ifdef HAVE_X86_32 + uint64_t u; + uint32_t *p = (uint32_t *)&u; + p[0] = lo; + p[1] = hi; + return u; +#else + return ((static_cast<uint64_t> (hi) << 32) | lo); +#endif +} + +static uint64_t +randi54 (void) +{ + const uint32_t lo = randi32 (); + const uint32_t hi = randi32 () & 0x3FFFFF; +#ifdef HAVE_X86_32 + uint64_t u; + uint32_t *p = static_cast<uint32_t *> (&u); + p[0] = lo; + p[1] = hi; + return u; +#else + return ((static_cast<uint64_t> (hi) << 32) | lo); +#endif +} + +/* generates a random number on (0,1)-real-interval */ +static float +randu32 (void) +{ + return (static_cast<float> (randi32 ()) + 0.5) * (1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1) with 53-bit resolution */ +static double +randu53 (void) +{ + const uint32_t a = randi32 () >> 5; + const uint32_t b = randi32 () >> 6; + return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0); +} + +/* Determine mantissa for uniform doubles */ +double +oct_randu (void) +{ + return randu53 (); +} + +/* Determine mantissa for uniform floats */ +float +oct_float_randu (void) +{ + return randu32 (); +} + +/* ===== Ziggurat normal and exponential generators ===== */ + +#define ZIGGURAT_TABLE_SIZE 256 + +#define ZIGGURAT_NOR_R 3.6541528853610088 +#define ZIGGURAT_NOR_INV_R 0.27366123732975828 +#define NOR_SECTION_AREA 0.00492867323399 + +#define ZIGGURAT_EXP_R 7.69711747013104972 +#define ZIGGURAT_EXP_INV_R 0.129918765548341586 +#define EXP_SECTION_AREA 0.0039496598225815571993 + +#define ZIGINT uint64_t +#define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ +#define ERANDI randi53() /* 53 bits for mantissa */ +#define NMANTISSA EMANTISSA +#define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */ +#define RANDU randu53() + +static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; +static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; +static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; +static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; + +/* +This code is based on the paper Marsaglia and Tsang, "The ziggurat method +for generating random variables", Journ. Statistical Software. Code was +presented in this paper for a Ziggurat of 127 levels and using a 32 bit +integer random number generator. This version of the code, uses the +Mersenne Twister as the integer generator and uses 256 levels in the +Ziggurat. This has several advantages. + + 1) As Marsaglia and Tsang themselves states, the more levels the few + times the expensive tail algorithm must be called + 2) The cycle time of the generator is determined by the integer + generator, thus the use of a Mersenne Twister for the core random + generator makes this cycle extremely long. + 3) The license on the original code was unclear, thus rewriting the code + from the article means we are free of copyright issues. + 4) Compile flag for full 53-bit random mantissa. + +It should be stated that the authors made my life easier, by the fact that +the algorithm developed in the text of the article is for a 256 level +ziggurat, even if the code itself isn't... + +One modification to the algorithm developed in the article, is that it is +assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in +terms like 2^32 in the code. As the normal distribution is defined between +-Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus +in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for +this term. The exponential distribution is one sided so we use the +full 32 bits. We use EMANTISSA for this term. + +It appears that I'm slightly slower than the code in the article, this +is partially due to a better generator of random integers than they +use. But might also be that the case of rapid return was optimized by +inlining the relevant code with a #define. As the basic Mersenne +Twister is only 25% faster than this code I suspect that the main +reason is just the use of the Mersenne Twister and not the inlining, +so I'm not going to try and optimize further. +*/ + +static void +create_ziggurat_tables (void) +{ + int i; + double x, x1; + + /* Ziggurat tables for the normal distribution */ + x1 = ZIGGURAT_NOR_R; + wi[255] = x1 / NMANTISSA; + fi[255] = exp (-0.5 * x1 * x1); + + /* Index zero is special for tail strip, where Marsaglia and Tsang + * defines this as + * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, + * where v is the area of each strip of the ziggurat. + */ + ki[0] = static_cast<ZIGINT> (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); + wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; + fi[0] = 1.; + + for (i = 254; i > 0; i--) + { + /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus + * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) + */ + x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + fi[i+1])); + ki[i+1] = static_cast<ZIGINT> (x / x1 * NMANTISSA); + wi[i] = x / NMANTISSA; + fi[i] = exp (-0.5 * x * x); + x1 = x; + } + + ki[1] = 0; + + /* Zigurrat tables for the exponential distribution */ + x1 = ZIGGURAT_EXP_R; + we[255] = x1 / EMANTISSA; + fe[255] = exp (-x1); + + /* Index zero is special for tail strip, where Marsaglia and Tsang + * defines this as + * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, + * where v is the area of each strip of the ziggurat. + */ + ke[0] = static_cast<ZIGINT> (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); + we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; + fe[0] = 1.; + + for (i = 254; i > 0; i--) + { + /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus + * need inverse operator of y = exp(-x) -> x = -ln(y) + */ + x = - log (EXP_SECTION_AREA / x1 + fe[i+1]); + ke[i+1] = static_cast<ZIGINT> (x / x1 * EMANTISSA); + we[i] = x / EMANTISSA; + fe[i] = exp (-x); + x1 = x; + } + ke[1] = 0; + + initt = 0; +} + +/* + * Here is the guts of the algorithm. As Marsaglia and Tsang state the + * algorithm in their paper + * + * 1) Calculate a random signed integer j and let i be the index + * provided by the rightmost 8-bits of j + * 2) Set x = j * w_i. If j < k_i return x + * 3) If i = 0, then return x from the tail + * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x + * 5) goto step 1 + * + * Where f is the functional form of the distribution, which for a normal + * distribution is exp(-0.5*x*x) + */ + +double +oct_randn (void) +{ + if (initt) + create_ziggurat_tables (); + + while (1) + { + /* The following code is specialized for 32-bit mantissa. + * Compared to the arbitrary mantissa code, there is a performance + * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% + * There is a bigger performance gain compared to using a full + * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% + * Of course, different compilers and operating systems may + * have something to do with this. + */ +# ifdef HAVE_X86_32 + /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ + double x; + int si,idx; + uint32_t lo, hi; + int64_t rabs; + uint32_t *p = (uint32_t *)&rabs; + lo = randi32 (); + idx = lo & 0xFF; + hi = randi32 (); + si = hi & UMASK; + p[0] = lo; + p[1] = hi & 0x1FFFFF; + x = ( si ? -rabs : rabs ) * wi[idx]; +# else /* ! HAVE_X86_32 */ + /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ + const uint64_t r = NRANDI; + const int64_t rabs = r >> 1; + const int idx = static_cast<int> (rabs & 0xFF); + const double x = ( (r & 1) ? -rabs : rabs) * wi[idx]; +# endif /* ! HAVE_X86_32 */ + if (rabs < static_cast<int64_t> (ki[idx])) + return x; /* 99.3% of the time we return here 1st try */ + else if (idx == 0) + { + /* As stated in Marsaglia and Tsang + * + * For the normal tail, the method of Marsaglia[5] provides: + * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, + * then return r+x. Except that r+x is always in the positive + * tail!!!! Any thing random might be used to determine the + * sign, but as we already have r we might as well use it + * + * [PAK] but not the bottom 8 bits, since they are all 0 here! + */ + double xx, yy; + do + { + xx = - ZIGGURAT_NOR_INV_R * log (RANDU); + yy = - log (RANDU); + } + while ( yy+yy <= xx*xx); + return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); + } + else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp (-0.5*x*x)) + return x; + } +} + +double +oct_rande (void) +{ + if (initt) + create_ziggurat_tables (); + + while (1) + { + ZIGINT ri = ERANDI; + const int idx = static_cast<int> (ri & 0xFF); + const double x = ri * we[idx]; + if (ri < ke[idx]) + return x; /* 98.9% of the time we return here 1st try */ + else if (idx == 0) + { + /* As stated in Marsaglia and Tsang + * + * For the exponential tail, the method of Marsaglia[5] provides: + * x = r - ln(U); + */ + return ZIGGURAT_EXP_R - log (RANDU); + } + else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp (-x)) + return x; + } +} + +#undef ZIGINT +#undef EMANTISSA +#undef ERANDI +#undef NMANTISSA +#undef NRANDI +#undef RANDU + +#define ZIGINT uint32_t +#define EMANTISSA 4294967296.0 /* 32 bit mantissa */ +#define ERANDI randi32() /* 32 bits for mantissa */ +#define NMANTISSA 2147483648.0 /* 31 bit mantissa */ +#define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */ +#define RANDU randu32() + +static ZIGINT fki[ZIGGURAT_TABLE_SIZE]; +static float fwi[ZIGGURAT_TABLE_SIZE], ffi[ZIGGURAT_TABLE_SIZE]; +static ZIGINT fke[ZIGGURAT_TABLE_SIZE]; +static float fwe[ZIGGURAT_TABLE_SIZE], ffe[ZIGGURAT_TABLE_SIZE]; + + +static void +create_ziggurat_float_tables (void) +{ + int i; + float x, x1; + + /* Ziggurat tables for the normal distribution */ + x1 = ZIGGURAT_NOR_R; + fwi[255] = x1 / NMANTISSA; + ffi[255] = exp (-0.5 * x1 * x1); + + /* Index zero is special for tail strip, where Marsaglia and Tsang + * defines this as + * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, + * where v is the area of each strip of the ziggurat. + */ + fki[0] = static_cast<ZIGINT> (x1 * ffi[255] / NOR_SECTION_AREA * NMANTISSA); + fwi[0] = NOR_SECTION_AREA / ffi[255] / NMANTISSA; + ffi[0] = 1.; + + for (i = 254; i > 0; i--) + { + /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus + * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) + */ + x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + ffi[i+1])); + fki[i+1] = static_cast<ZIGINT> (x / x1 * NMANTISSA); + fwi[i] = x / NMANTISSA; + ffi[i] = exp (-0.5 * x * x); + x1 = x; + } + + fki[1] = 0; + + /* Zigurrat tables for the exponential distribution */ + x1 = ZIGGURAT_EXP_R; + fwe[255] = x1 / EMANTISSA; + ffe[255] = exp (-x1); + + /* Index zero is special for tail strip, where Marsaglia and Tsang + * defines this as + * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, + * where v is the area of each strip of the ziggurat. + */ + fke[0] = static_cast<ZIGINT> (x1 * ffe[255] / EXP_SECTION_AREA * EMANTISSA); + fwe[0] = EXP_SECTION_AREA / ffe[255] / EMANTISSA; + ffe[0] = 1.; + + for (i = 254; i > 0; i--) + { + /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus + * need inverse operator of y = exp(-x) -> x = -ln(y) + */ + x = - log (EXP_SECTION_AREA / x1 + ffe[i+1]); + fke[i+1] = static_cast<ZIGINT> (x / x1 * EMANTISSA); + fwe[i] = x / EMANTISSA; + ffe[i] = exp (-x); + x1 = x; + } + fke[1] = 0; + + inittf = 0; +} + +/* + * Here is the guts of the algorithm. As Marsaglia and Tsang state the + * algorithm in their paper + * + * 1) Calculate a random signed integer j and let i be the index + * provided by the rightmost 8-bits of j + * 2) Set x = j * w_i. If j < k_i return x + * 3) If i = 0, then return x from the tail + * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x + * 5) goto step 1 + * + * Where f is the functional form of the distribution, which for a normal + * distribution is exp(-0.5*x*x) + */ + +float +oct_float_randn (void) +{ + if (inittf) + create_ziggurat_float_tables (); + + while (1) + { + /* 32-bit mantissa */ + const uint32_t r = randi32 (); + const uint32_t rabs = r & LMASK; + const int idx = static_cast<int> (r & 0xFF); + const float x = static_cast<int32_t> (r) * fwi[idx]; + if (rabs < fki[idx]) + return x; /* 99.3% of the time we return here 1st try */ + else if (idx == 0) + { + /* As stated in Marsaglia and Tsang + * + * For the normal tail, the method of Marsaglia[5] provides: + * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, + * then return r+x. Except that r+x is always in the positive + * tail!!!! Any thing random might be used to determine the + * sign, but as we already have r we might as well use it + * + * [PAK] but not the bottom 8 bits, since they are all 0 here! + */ + float xx, yy; + do + { + xx = - ZIGGURAT_NOR_INV_R * log (RANDU); + yy = - log (RANDU); + } + while ( yy+yy <= xx*xx); + return ((rabs & 0x100) ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); + } + else if ((ffi[idx-1] - ffi[idx]) * RANDU + ffi[idx] < exp (-0.5*x*x)) + return x; + } +} + +float +oct_float_rande (void) +{ + if (inittf) + create_ziggurat_float_tables (); + + while (1) + { + ZIGINT ri = ERANDI; + const int idx = static_cast<int> (ri & 0xFF); + const float x = ri * fwe[idx]; + if (ri < fke[idx]) + return x; /* 98.9% of the time we return here 1st try */ + else if (idx == 0) + { + /* As stated in Marsaglia and Tsang + * + * For the exponential tail, the method of Marsaglia[5] provides: + * x = r - ln(U); + */ + return ZIGGURAT_EXP_R - log (RANDU); + } + else if ((ffe[idx-1] - ffe[idx]) * RANDU + ffe[idx] < exp (-x)) + return x; + } +} + +/* Array generators */ +void +oct_fill_randu (octave_idx_type n, double *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_randu (); +} + +void +oct_fill_randn (octave_idx_type n, double *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_randn (); +} + +void +oct_fill_rande (octave_idx_type n, double *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_rande (); +} + +void +oct_fill_float_randu (octave_idx_type n, float *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_float_randu (); +} + +void +oct_fill_float_randn (octave_idx_type n, float *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_float_randn (); +} + +void +oct_fill_float_rande (octave_idx_type n, float *p) +{ + octave_idx_type i; + for (i = 0; i < n; i++) + p[i] = oct_float_rande (); +}
--- a/liboctave/numeric/randmtzig.h Mon May 16 23:26:07 2016 -0400 +++ b/liboctave/numeric/randmtzig.h Mon May 16 23:20:50 2016 -0400 @@ -68,18 +68,14 @@ #define MT_N 624 -#ifdef __cplusplus -extern "C" { -#endif - -/* Mersenne Twister. */ +// Mersenne Twister. extern OCTAVE_API void oct_init_by_int (uint32_t s); extern OCTAVE_API void oct_init_by_array (uint32_t *init_key, int key_length); extern OCTAVE_API void oct_init_by_entropy (void); extern OCTAVE_API void oct_set_state (uint32_t *save); extern OCTAVE_API void oct_get_state (uint32_t *save); -/* Array generators. */ +// Array generators. extern OCTAVE_API double oct_randu (void); extern OCTAVE_API double oct_randn (void); extern OCTAVE_API double oct_rande (void); @@ -88,7 +84,7 @@ extern OCTAVE_API float oct_float_randn (void); extern OCTAVE_API float oct_float_rande (void); -/* Array generators. */ +// Array generators. extern OCTAVE_API void oct_fill_randu (octave_idx_type n, double *p); extern OCTAVE_API void oct_fill_randn (octave_idx_type n, double *p); extern OCTAVE_API void oct_fill_rande (octave_idx_type n, double *p); @@ -97,8 +93,4 @@ extern OCTAVE_API void oct_fill_float_randn (octave_idx_type n, float *p); extern OCTAVE_API void oct_fill_float_rande (octave_idx_type n, float *p); -#ifdef __cplusplus -} #endif - -#endif
--- a/liboctave/numeric/randpoisson.c Mon May 16 23:26:07 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,624 +0,0 @@ -/* - -Copyright (C) 2006-2015 John W. Eaton - -This file is part of Octave. - -Octave is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -Octave is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with Octave; see the file COPYING. If not, see -<http://www.gnu.org/licenses/>. - -*/ - -/* Original version written by Paul Kienzle distributed as free - software in the in the public domain. */ - -/* Needs the following defines: - * NAN: value to return for Not-A-Number - * RUNI: uniform generator on (0,1) - * RNOR: normal generator - * LGAMMA: log gamma function - * INFINITE: function to test whether a value is infinite - */ - -#if defined (HAVE_CONFIG_H) -# include "config.h" -#endif - -#include <stdio.h> - -#include "f77-fcn.h" -#include "lo-error.h" -#include "lo-ieee.h" -#include "lo-math.h" -#include "randmtzig.h" -#include "randpoisson.h" - -#undef NAN -#define NAN octave_NaN -#undef INFINITE -#define INFINITE lo_ieee_isinf -#define RUNI oct_randu() -#define RNOR oct_randn() -#define LGAMMA xlgamma - -F77_RET_T -F77_FUNC (dlgams, DLGAMS) (const double *, double *, double *); - -static double -xlgamma (double x) -{ - double result; -#ifdef HAVE_LGAMMA - result = lgamma (x); -#else - double sgngam; - - if (lo_ieee_isnan (x)) - result = x; - else if (x <= 0 || lo_ieee_isinf (x)) - result = octave_Inf; - else - F77_XFCN (dlgams, DLGAMS, (&x, &result, &sgngam)); -#endif - return result; -} - -/* ---- pprsc.c from Stadloeber's winrand --- */ - -/* flogfak(k) = ln(k!) */ -static double -flogfak (double k) -{ -#define C0 9.18938533204672742e-01 -#define C1 8.33333333333333333e-02 -#define C3 -2.77777777777777778e-03 -#define C5 7.93650793650793651e-04 -#define C7 -5.95238095238095238e-04 - - static double logfak[30L] = - { - 0.00000000000000000, 0.00000000000000000, 0.69314718055994531, - 1.79175946922805500, 3.17805383034794562, 4.78749174278204599, - 6.57925121201010100, 8.52516136106541430, 10.60460290274525023, - 12.80182748008146961, 15.10441257307551530, 17.50230784587388584, - 19.98721449566188615, 22.55216385312342289, 25.19122118273868150, - 27.89927138384089157, 30.67186010608067280, 33.50507345013688888, - 36.39544520803305358, 39.33988418719949404, 42.33561646075348503, - 45.38013889847690803, 48.47118135183522388, 51.60667556776437357, - 54.78472939811231919, 58.00360522298051994, 61.26170176100200198, - 64.55753862700633106, 67.88974313718153498, 71.25703896716800901 - }; - - double r, rr; - - if (k >= 30.0) - { - r = 1.0 / k; - rr = r * r; - return ((k + 0.5)*log (k) - k + C0 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7)))); - } - else - return (logfak[(int)k]); -} - - -/****************************************************************** - * * - * Poisson Distribution - Patchwork Rejection/Inversion * - * * - ****************************************************************** - * * - * For parameter my < 10, Tabulated Inversion is applied. * - * For my >= 10, Patchwork Rejection is employed: * - * The area below the histogram function f(x) is rearranged in * - * its body by certain point reflections. Within a large center * - * interval variates are sampled efficiently by rejection from * - * uniform hats. Rectangular immediate acceptance regions speed * - * up the generation. The remaining tails are covered by * - * exponential functions. * - * * - ****************************************************************** - * * - * FUNCTION : - pprsc samples a random number from the Poisson * - * distribution with parameter my > 0. * - * REFERENCE : - H. Zechner (1994): Efficient sampling from * - * continuous and discrete unimodal distributions, * - * Doctoral Dissertation, 156 pp., Technical * - * University Graz, Austria. * - * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with * - * unsigned long integer *seed. * - * * - * Implemented by H. Zechner, January 1994 * - * Revised by F. Niederl, July 1994 * - * * - ******************************************************************/ - -static double -f (double k, double l_nu, double c_pm) -{ - return exp (k * l_nu - flogfak (k) - c_pm); -} - -static double -pprsc (double my) -{ - static double my_last = -1.0; - static double m, k2, k4, k1, k5; - static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm, - f1, f2, f4, f5, p1, p2, p3, p4, p5, p6; - double Dk, X, Y; - double Ds, U, V, W; - - if (my != my_last) - { /* set-up */ - my_last = my; - /* approximate deviation of reflection points k2, k4 from my - 1/2 */ - Ds = sqrt (my + 0.25); - - /* mode m, reflection points k2 and k4, and points k1 and k5, */ - /* which delimit the centre region of h(x) */ - m = floor (my); - k2 = ceil (my - 0.5 - Ds); - k4 = floor (my - 0.5 + Ds); - k1 = k2 + k2 - m + 1L; - k5 = k4 + k4 - m; - - /* range width of the critical left and right centre region */ - dl = (k2 - k1); - dr = (k5 - k4); - - /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */ - r1 = my / k1; - r2 = my / k2; - r4 = my / (k4 + 1.0); - r5 = my / (k5 + 1.0); - - /* reciprocal values of the scale parameters of exp. tail envelope */ - ll = log (r1); /* expon. tail left */ - lr = -log (r5); /* expon. tail right*/ - - /* Poisson constants, necessary for computing function values f(k) */ - l_my = log (my); - c_pm = m * l_my - flogfak (m); - - /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5 */ - f2 = f (k2, l_my, c_pm); - f4 = f (k4, l_my, c_pm); - f1 = f (k1, l_my, c_pm); - f5 = f (k5, l_my, c_pm); - - /* area of the two centre and the two exponential tail regions */ - /* area of the two immediate acceptance regions between k2, k4 */ - p1 = f2 * (dl + 1.0); /* immed. left */ - p2 = f2 * dl + p1; /* centre left */ - p3 = f4 * (dr + 1.0) + p2; /* immed. right */ - p4 = f4 * dr + p3; /* centre right */ - p5 = f1 / ll + p4; /* exp. tail left */ - p6 = f5 / lr + p5; /* exp. tail right*/ - } - - for (;;) - { - /* generate uniform number U -- U(0, p6) */ - /* case distinction corresponding to U */ - if ((U = RUNI * p6) < p2) - { /* centre left */ - - /* immediate acceptance region - R2 = [k2, m) *[0, f2), X = k2, ... m -1 */ - if ((V = U - p1) < 0.0) return (k2 + floor (U/f2)); - /* immediate acceptance region - R1 = [k1, k2)*[0, f1), X = k1, ... k2-1 */ - if ((W = V / dl) < f1 ) return (k1 + floor (V/f1)); - - /* computation of candidate X < k2, and its counterpart Y > k2 */ - /* either squeeze-acceptance of X or acceptance-rejection of Y */ - Dk = floor (dl * RUNI) + 1.0; - if (W <= f2 - Dk * (f2 - f2/r2)) - { /* quick accept of */ - return (k2 - Dk); /* X = k2 - Dk */ - } - if ((V = f2 + f2 - W) < 1.0) - { /* quick reject of Y*/ - Y = k2 + Dk; - if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0)) - { /* quick accept of */ - return (Y); /* Y = k2 + Dk */ - } - if (V <= f (Y, l_my, c_pm)) return (Y); /* final accept of Y*/ - } - X = k2 - Dk; - } - else if (U < p4) - { /* centre right */ - /* immediate acceptance region - R3 = [m, k4+1)*[0, f4), X = m, ... k4 */ - if ((V = U - p3) < 0.0) return (k4 - floor ((U - p2)/f4)); - /* immediate acceptance region - R4 = [k4+1, k5+1)*[0, f5) */ - if ((W = V / dr) < f5 ) return (k5 - floor (V/f5)); - - /* computation of candidate X > k4, and its counterpart Y < k4 */ - /* either squeeze-acceptance of X or acceptance-rejection of Y */ - Dk = floor (dr * RUNI) + 1.0; - if (W <= f4 - Dk * (f4 - f4*r4)) - { /* quick accept of */ - return (k4 + Dk); /* X = k4 + Dk */ - } - if ((V = f4 + f4 - W) < 1.0) - { /* quick reject of Y*/ - Y = k4 - Dk; - if (V <= f4 + Dk * (1.0 - f4)/ dr) - { /* quick accept of */ - return (Y); /* Y = k4 - Dk */ - } - if (V <= f (Y, l_my, c_pm)) return (Y); /* final accept of Y*/ - } - X = k4 + Dk; - } - else - { - W = RUNI; - if (U < p5) - { /* expon. tail left */ - Dk = floor (1.0 - log (W)/ll); - if ((X = k1 - Dk) < 0L) continue; /* 0 <= X <= k1 - 1 */ - W *= (U - p4) * ll; /* W -- U(0, h(x)) */ - if (W <= f1 - Dk * (f1 - f1/r1)) - return (X); /* quick accept of X*/ - } - else - { /* expon. tail right*/ - Dk = floor (1.0 - log (W)/lr); - X = k5 + Dk; /* X >= k5 + 1 */ - W *= (U - p5) * lr; /* W -- U(0, h(x)) */ - if (W <= f5 - Dk * (f5 - f5*r5)) - return (X); /* quick accept of X*/ - } - } - - /* acceptance-rejection test of candidate X from the original area */ - /* test, whether W <= f(k), with W = U*h(x) and U -- U(0, 1)*/ - /* log f(X) = (X - m)*log(my) - log X! + log m! */ - if (log (W) <= X * l_my - flogfak (X) - c_pm) return (X); - } -} -/* ---- pprsc.c end ------ */ - - -/* The remainder of the file is by Paul Kienzle */ - -/* Given uniform u, find x such that CDF(L,x)==u. Return x. */ -static void -poisson_cdf_lookup (double lambda, double *p, size_t n) -{ - /* Table size is predicated on the maximum value of lambda - * we want to store in the table, and the maximum value of - * returned by the uniform random number generator on [0,1). - * With lambda==10 and u_max = 1 - 1/(2^32+1), we - * have poisson_pdf(lambda,36) < 1-u_max. If instead our - * generator uses more bits of mantissa or returns a value - * in the range [0,1], then for lambda==10 we need a table - * size of 46 instead. For long doubles, the table size - * will need to be longer still. */ -#define TABLESIZE 46 - double t[TABLESIZE]; - - /* Precompute the table for the u up to and including 0.458. - * We will almost certainly need it. */ - int intlambda = (int)floor (lambda); - double P; - int tableidx; - size_t i = n; - - t[0] = P = exp (-lambda); - for (tableidx = 1; tableidx <= intlambda; tableidx++) - { - P = P*lambda/(double)tableidx; - t[tableidx] = t[tableidx-1] + P; - } - - while (i-- > 0) - { - double u = RUNI; - - /* If u > 0.458 we know we can jump to floor(lambda) before - * comparing (this observation is based on Stadlober's winrand - * code). For lambda >= 1, this will be a win. Lambda < 1 - * is already fast, so adding an extra comparison is not a - * problem. */ - int k = (u > 0.458 ? intlambda : 0); - - /* We aren't using a for loop here because when we find the - * right k we want to jump to the next iteration of the - * outer loop, and the continue statement will only work for - * the inner loop. */ - nextk: - if (u <= t[k]) - { - p[i] = (double) k; - continue; - } - if (++k < tableidx) - goto nextk; - - /* We only need high values of the table very rarely so we - * don't automatically compute the entire table. */ - while (tableidx < TABLESIZE) - { - P = P*lambda/(double)tableidx; - t[tableidx] = t[tableidx-1] + P; - /* Make sure we converge to 1.0 just in case u is uniform - * on [0,1] rather than [0,1). */ - if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; - tableidx++; - if (u <= t[tableidx-1]) break; - } - - /* We are assuming that the table size is big enough here. - * This should be true even if RUNI is returning values in - * the range [0,1] rather than [0,1). */ - p[i] = (double)(tableidx-1); - } -} - -static void -poisson_cdf_lookup_float (double lambda, float *p, size_t n) -{ - double t[TABLESIZE]; - - /* Precompute the table for the u up to and including 0.458. - * We will almost certainly need it. */ - int intlambda = (int)floor (lambda); - double P; - int tableidx; - size_t i = n; - - t[0] = P = exp (-lambda); - for (tableidx = 1; tableidx <= intlambda; tableidx++) - { - P = P*lambda/(double)tableidx; - t[tableidx] = t[tableidx-1] + P; - } - - while (i-- > 0) - { - double u = RUNI; - int k = (u > 0.458 ? intlambda : 0); - nextk: - if (u <= t[k]) - { - p[i] = (float) k; - continue; - } - if (++k < tableidx) - goto nextk; - - while (tableidx < TABLESIZE) - { - P = P*lambda/(double)tableidx; - t[tableidx] = t[tableidx-1] + P; - if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; - tableidx++; - if (u <= t[tableidx-1]) break; - } - - p[i] = (float)(tableidx-1); - } -} - -/* From Press, et al., Numerical Recipes */ -static void -poisson_rejection (double lambda, double *p, size_t n) -{ - double sq = sqrt (2.0*lambda); - double alxm = log (lambda); - double g = lambda*alxm - LGAMMA(lambda+1.0); - size_t i; - - for (i = 0; i < n; i++) - { - double y, em, t; - do - { - do - { - y = tan (M_PI*RUNI); - em = sq * y + lambda; - } while (em < 0.0); - em = floor (em); - t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g); - } while (RUNI > t); - p[i] = em; - } -} - -/* From Press, et al., Numerical Recipes */ -static void -poisson_rejection_float (double lambda, float *p, size_t n) -{ - double sq = sqrt (2.0*lambda); - double alxm = log (lambda); - double g = lambda*alxm - LGAMMA(lambda+1.0); - size_t i; - - for (i = 0; i < n; i++) - { - double y, em, t; - do - { - do - { - y = tan (M_PI*RUNI); - em = sq * y + lambda; - } while (em < 0.0); - em = floor (em); - t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g); - } while (RUNI > t); - p[i] = em; - } -} - -/* The cutoff of L <= 1e8 in the following two functions before using - * the normal approximation is based on: - * > L=1e8; x=floor(linspace(0,2*L,1000)); - * > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L))) - * ans = 1.1376e-28 - * For L=1e7, the max is around 1e-9, which is within the step size of RUNI. - * For L>1e10 the pprsc function breaks down, as I saw from the histogram - * of a large sample, so 1e8 is both small enough and large enough. */ - -/* Generate a set of poisson numbers with the same distribution */ -void -oct_fill_randp (double L, octave_idx_type n, double *p) -{ - octave_idx_type i; - if (L < 0.0 || INFINITE(L)) - { - for (i=0; i<n; i++) - p[i] = NAN; - } - else if (L <= 10.0) - { - poisson_cdf_lookup (L, p, n); - } - else if (L <= 1e8) - { - for (i=0; i<n; i++) - p[i] = pprsc (L); - } - else - { - /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ - const double sqrtL = sqrt (L); - for (i = 0; i < n; i++) - { - p[i] = floor (RNOR*sqrtL + L + 0.5); - if (p[i] < 0.0) - p[i] = 0.0; /* will probably never happen */ - } - } -} - -/* Generate one poisson variate */ -double -oct_randp (double L) -{ - double ret; - if (L < 0.0) ret = NAN; - else if (L <= 12.0) - { - /* From Press, et al. Numerical recipes */ - double g = exp (-L); - int em = -1; - double t = 1.0; - do - { - ++em; - t *= RUNI; - } while (t > g); - ret = em; - } - else if (L <= 1e8) - { - /* numerical recipes */ - poisson_rejection (L, &ret, 1); - } - else if (INFINITE(L)) - { - /* FIXME: R uses NaN, but the normal approximation suggests that - * limit should be Inf. Which is correct? */ - ret = NAN; - } - else - { - /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ - ret = floor (RNOR*sqrt (L) + L + 0.5); - if (ret < 0.0) ret = 0.0; /* will probably never happen */ - } - return ret; -} - -/* Generate a set of poisson numbers with the same distribution */ -void -oct_fill_float_randp (float FL, octave_idx_type n, float *p) -{ - double L = FL; - octave_idx_type i; - if (L < 0.0 || INFINITE(L)) - { - for (i=0; i<n; i++) - p[i] = NAN; - } - else if (L <= 10.0) - { - poisson_cdf_lookup_float (L, p, n); - } - else if (L <= 1e8) - { - for (i=0; i<n; i++) - p[i] = pprsc (L); - } - else - { - /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ - const double sqrtL = sqrt (L); - for (i = 0; i < n; i++) - { - p[i] = floor (RNOR*sqrtL + L + 0.5); - if (p[i] < 0.0) - p[i] = 0.0; /* will probably never happen */ - } - } -} - -/* Generate one poisson variate */ -float -oct_float_randp (float FL) -{ - double L = FL; - float ret; - if (L < 0.0) ret = NAN; - else if (L <= 12.0) - { - /* From Press, et al. Numerical recipes */ - double g = exp (-L); - int em = -1; - double t = 1.0; - do - { - ++em; - t *= RUNI; - } while (t > g); - ret = em; - } - else if (L <= 1e8) - { - /* numerical recipes */ - poisson_rejection_float (L, &ret, 1); - } - else if (INFINITE(L)) - { - /* FIXME: R uses NaN, but the normal approximation suggests that - * limit should be Inf. Which is correct? */ - ret = NAN; - } - else - { - /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ - ret = floor (RNOR*sqrt (L) + L + 0.5); - if (ret < 0.0) ret = 0.0; /* will probably never happen */ - } - return ret; -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/numeric/randpoisson.cc Mon May 16 23:20:50 2016 -0400 @@ -0,0 +1,622 @@ +/* + +Copyright (C) 2006-2015 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<http://www.gnu.org/licenses/>. + +*/ + +/* Original version written by Paul Kienzle distributed as free + software in the in the public domain. */ + +/* Needs the following defines: + * NAN: value to return for Not-A-Number + * RUNI: uniform generator on (0,1) + * RNOR: normal generator + * LGAMMA: log gamma function + * INFINITE: function to test whether a value is infinite + */ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include "f77-fcn.h" +#include "lo-error.h" +#include "lo-ieee.h" +#include "lo-math.h" +#include "randmtzig.h" +#include "randpoisson.h" + +#undef NAN +#define NAN octave_NaN +#undef INFINITE +#define INFINITE lo_ieee_isinf +#define RUNI oct_randu() +#define RNOR oct_randn() +#define LGAMMA xlgamma + +F77_RET_T +F77_FUNC (dlgams, DLGAMS) (const double *, double *, double *); + +static double +xlgamma (double x) +{ + double result; +#ifdef HAVE_LGAMMA + result = lgamma (x); +#else + double sgngam; + + if (lo_ieee_isnan (x)) + result = x; + else if (x <= 0 || lo_ieee_isinf (x)) + result = octave_Inf; + else + F77_XFCN (dlgams, DLGAMS, (&x, &result, &sgngam)); +#endif + return result; +} + +/* ---- pprsc.c from Stadloeber's winrand --- */ + +/* flogfak(k) = ln(k!) */ +static double +flogfak (double k) +{ +#define C0 9.18938533204672742e-01 +#define C1 8.33333333333333333e-02 +#define C3 -2.77777777777777778e-03 +#define C5 7.93650793650793651e-04 +#define C7 -5.95238095238095238e-04 + + static double logfak[30L] = + { + 0.00000000000000000, 0.00000000000000000, 0.69314718055994531, + 1.79175946922805500, 3.17805383034794562, 4.78749174278204599, + 6.57925121201010100, 8.52516136106541430, 10.60460290274525023, + 12.80182748008146961, 15.10441257307551530, 17.50230784587388584, + 19.98721449566188615, 22.55216385312342289, 25.19122118273868150, + 27.89927138384089157, 30.67186010608067280, 33.50507345013688888, + 36.39544520803305358, 39.33988418719949404, 42.33561646075348503, + 45.38013889847690803, 48.47118135183522388, 51.60667556776437357, + 54.78472939811231919, 58.00360522298051994, 61.26170176100200198, + 64.55753862700633106, 67.88974313718153498, 71.25703896716800901 + }; + + double r, rr; + + if (k >= 30.0) + { + r = 1.0 / k; + rr = r * r; + return ((k + 0.5)*log (k) - k + C0 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7)))); + } + else + return (logfak[static_cast<int> (k)]); +} + + +/****************************************************************** + * * + * Poisson Distribution - Patchwork Rejection/Inversion * + * * + ****************************************************************** + * * + * For parameter my < 10, Tabulated Inversion is applied. * + * For my >= 10, Patchwork Rejection is employed: * + * The area below the histogram function f(x) is rearranged in * + * its body by certain point reflections. Within a large center * + * interval variates are sampled efficiently by rejection from * + * uniform hats. Rectangular immediate acceptance regions speed * + * up the generation. The remaining tails are covered by * + * exponential functions. * + * * + ****************************************************************** + * * + * FUNCTION : - pprsc samples a random number from the Poisson * + * distribution with parameter my > 0. * + * REFERENCE : - H. Zechner (1994): Efficient sampling from * + * continuous and discrete unimodal distributions, * + * Doctoral Dissertation, 156 pp., Technical * + * University Graz, Austria. * + * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with * + * unsigned long integer *seed. * + * * + * Implemented by H. Zechner, January 1994 * + * Revised by F. Niederl, July 1994 * + * * + ******************************************************************/ + +static double +f (double k, double l_nu, double c_pm) +{ + return exp (k * l_nu - flogfak (k) - c_pm); +} + +static double +pprsc (double my) +{ + static double my_last = -1.0; + static double m, k2, k4, k1, k5; + static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm, + f1, f2, f4, f5, p1, p2, p3, p4, p5, p6; + double Dk, X, Y; + double Ds, U, V, W; + + if (my != my_last) + { /* set-up */ + my_last = my; + /* approximate deviation of reflection points k2, k4 from my - 1/2 */ + Ds = sqrt (my + 0.25); + + /* mode m, reflection points k2 and k4, and points k1 and k5, */ + /* which delimit the centre region of h(x) */ + m = floor (my); + k2 = ceil (my - 0.5 - Ds); + k4 = floor (my - 0.5 + Ds); + k1 = k2 + k2 - m + 1L; + k5 = k4 + k4 - m; + + /* range width of the critical left and right centre region */ + dl = (k2 - k1); + dr = (k5 - k4); + + /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */ + r1 = my / k1; + r2 = my / k2; + r4 = my / (k4 + 1.0); + r5 = my / (k5 + 1.0); + + /* reciprocal values of the scale parameters of exp. tail envelope */ + ll = log (r1); /* expon. tail left */ + lr = -log (r5); /* expon. tail right*/ + + /* Poisson constants, necessary for computing function values f(k) */ + l_my = log (my); + c_pm = m * l_my - flogfak (m); + + /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5 */ + f2 = f (k2, l_my, c_pm); + f4 = f (k4, l_my, c_pm); + f1 = f (k1, l_my, c_pm); + f5 = f (k5, l_my, c_pm); + + /* area of the two centre and the two exponential tail regions */ + /* area of the two immediate acceptance regions between k2, k4 */ + p1 = f2 * (dl + 1.0); /* immed. left */ + p2 = f2 * dl + p1; /* centre left */ + p3 = f4 * (dr + 1.0) + p2; /* immed. right */ + p4 = f4 * dr + p3; /* centre right */ + p5 = f1 / ll + p4; /* exp. tail left */ + p6 = f5 / lr + p5; /* exp. tail right*/ + } + + for (;;) + { + /* generate uniform number U -- U(0, p6) */ + /* case distinction corresponding to U */ + if ((U = RUNI * p6) < p2) + { /* centre left */ + + /* immediate acceptance region + R2 = [k2, m) *[0, f2), X = k2, ... m -1 */ + if ((V = U - p1) < 0.0) return (k2 + floor (U/f2)); + /* immediate acceptance region + R1 = [k1, k2)*[0, f1), X = k1, ... k2-1 */ + if ((W = V / dl) < f1 ) return (k1 + floor (V/f1)); + + /* computation of candidate X < k2, and its counterpart Y > k2 */ + /* either squeeze-acceptance of X or acceptance-rejection of Y */ + Dk = floor (dl * RUNI) + 1.0; + if (W <= f2 - Dk * (f2 - f2/r2)) + { /* quick accept of */ + return (k2 - Dk); /* X = k2 - Dk */ + } + if ((V = f2 + f2 - W) < 1.0) + { /* quick reject of Y*/ + Y = k2 + Dk; + if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0)) + { /* quick accept of */ + return (Y); /* Y = k2 + Dk */ + } + if (V <= f (Y, l_my, c_pm)) return (Y); /* final accept of Y*/ + } + X = k2 - Dk; + } + else if (U < p4) + { /* centre right */ + /* immediate acceptance region + R3 = [m, k4+1)*[0, f4), X = m, ... k4 */ + if ((V = U - p3) < 0.0) return (k4 - floor ((U - p2)/f4)); + /* immediate acceptance region + R4 = [k4+1, k5+1)*[0, f5) */ + if ((W = V / dr) < f5 ) return (k5 - floor (V/f5)); + + /* computation of candidate X > k4, and its counterpart Y < k4 */ + /* either squeeze-acceptance of X or acceptance-rejection of Y */ + Dk = floor (dr * RUNI) + 1.0; + if (W <= f4 - Dk * (f4 - f4*r4)) + { /* quick accept of */ + return (k4 + Dk); /* X = k4 + Dk */ + } + if ((V = f4 + f4 - W) < 1.0) + { /* quick reject of Y*/ + Y = k4 - Dk; + if (V <= f4 + Dk * (1.0 - f4)/ dr) + { /* quick accept of */ + return (Y); /* Y = k4 - Dk */ + } + if (V <= f (Y, l_my, c_pm)) return (Y); /* final accept of Y*/ + } + X = k4 + Dk; + } + else + { + W = RUNI; + if (U < p5) + { /* expon. tail left */ + Dk = floor (1.0 - log (W)/ll); + if ((X = k1 - Dk) < 0L) continue; /* 0 <= X <= k1 - 1 */ + W *= (U - p4) * ll; /* W -- U(0, h(x)) */ + if (W <= f1 - Dk * (f1 - f1/r1)) + return (X); /* quick accept of X*/ + } + else + { /* expon. tail right*/ + Dk = floor (1.0 - log (W)/lr); + X = k5 + Dk; /* X >= k5 + 1 */ + W *= (U - p5) * lr; /* W -- U(0, h(x)) */ + if (W <= f5 - Dk * (f5 - f5*r5)) + return (X); /* quick accept of X*/ + } + } + + /* acceptance-rejection test of candidate X from the original area */ + /* test, whether W <= f(k), with W = U*h(x) and U -- U(0, 1)*/ + /* log f(X) = (X - m)*log(my) - log X! + log m! */ + if (log (W) <= X * l_my - flogfak (X) - c_pm) return (X); + } +} +/* ---- pprsc.c end ------ */ + + +/* The remainder of the file is by Paul Kienzle */ + +/* Given uniform u, find x such that CDF(L,x)==u. Return x. */ +static void +poisson_cdf_lookup (double lambda, double *p, size_t n) +{ + /* Table size is predicated on the maximum value of lambda + * we want to store in the table, and the maximum value of + * returned by the uniform random number generator on [0,1). + * With lambda==10 and u_max = 1 - 1/(2^32+1), we + * have poisson_pdf(lambda,36) < 1-u_max. If instead our + * generator uses more bits of mantissa or returns a value + * in the range [0,1], then for lambda==10 we need a table + * size of 46 instead. For long doubles, the table size + * will need to be longer still. */ +#define TABLESIZE 46 + double t[TABLESIZE]; + + /* Precompute the table for the u up to and including 0.458. + * We will almost certainly need it. */ + int intlambda = static_cast<int> (floor (lambda)); + double P; + int tableidx; + size_t i = n; + + t[0] = P = exp (-lambda); + for (tableidx = 1; tableidx <= intlambda; tableidx++) + { + P = P*lambda/static_cast<double> (tableidx); + t[tableidx] = t[tableidx-1] + P; + } + + while (i-- > 0) + { + double u = RUNI; + + /* If u > 0.458 we know we can jump to floor(lambda) before + * comparing (this observation is based on Stadlober's winrand + * code). For lambda >= 1, this will be a win. Lambda < 1 + * is already fast, so adding an extra comparison is not a + * problem. */ + int k = (u > 0.458 ? intlambda : 0); + + /* We aren't using a for loop here because when we find the + * right k we want to jump to the next iteration of the + * outer loop, and the continue statement will only work for + * the inner loop. */ + nextk: + if (u <= t[k]) + { + p[i] = static_cast<double> (k); + continue; + } + if (++k < tableidx) + goto nextk; + + /* We only need high values of the table very rarely so we + * don't automatically compute the entire table. */ + while (tableidx < TABLESIZE) + { + P = P*lambda/static_cast<double> (tableidx); + t[tableidx] = t[tableidx-1] + P; + /* Make sure we converge to 1.0 just in case u is uniform + * on [0,1] rather than [0,1). */ + if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; + tableidx++; + if (u <= t[tableidx-1]) break; + } + + /* We are assuming that the table size is big enough here. + * This should be true even if RUNI is returning values in + * the range [0,1] rather than [0,1). */ + p[i] = static_cast<double> (tableidx-1); + } +} + +static void +poisson_cdf_lookup_float (double lambda, float *p, size_t n) +{ + double t[TABLESIZE]; + + /* Precompute the table for the u up to and including 0.458. + * We will almost certainly need it. */ + int intlambda = static_cast<int> (floor (lambda)); + double P; + int tableidx; + size_t i = n; + + t[0] = P = exp (-lambda); + for (tableidx = 1; tableidx <= intlambda; tableidx++) + { + P = P*lambda/static_cast<double> (tableidx); + t[tableidx] = t[tableidx-1] + P; + } + + while (i-- > 0) + { + double u = RUNI; + int k = (u > 0.458 ? intlambda : 0); + nextk: + if (u <= t[k]) + { + p[i] = static_cast<float> (k); + continue; + } + if (++k < tableidx) + goto nextk; + + while (tableidx < TABLESIZE) + { + P = P*lambda/static_cast<double> (tableidx); + t[tableidx] = t[tableidx-1] + P; + if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; + tableidx++; + if (u <= t[tableidx-1]) break; + } + + p[i] = static_cast<float> (tableidx-1); + } +} + +/* From Press, et al., Numerical Recipes */ +static void +poisson_rejection (double lambda, double *p, size_t n) +{ + double sq = sqrt (2.0*lambda); + double alxm = log (lambda); + double g = lambda*alxm - LGAMMA(lambda+1.0); + size_t i; + + for (i = 0; i < n; i++) + { + double y, em, t; + do + { + do + { + y = tan (M_PI*RUNI); + em = sq * y + lambda; + } while (em < 0.0); + em = floor (em); + t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g); + } while (RUNI > t); + p[i] = em; + } +} + +/* From Press, et al., Numerical Recipes */ +static void +poisson_rejection_float (double lambda, float *p, size_t n) +{ + double sq = sqrt (2.0*lambda); + double alxm = log (lambda); + double g = lambda*alxm - LGAMMA(lambda+1.0); + size_t i; + + for (i = 0; i < n; i++) + { + double y, em, t; + do + { + do + { + y = tan (M_PI*RUNI); + em = sq * y + lambda; + } while (em < 0.0); + em = floor (em); + t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g); + } while (RUNI > t); + p[i] = em; + } +} + +/* The cutoff of L <= 1e8 in the following two functions before using + * the normal approximation is based on: + * > L=1e8; x=floor(linspace(0,2*L,1000)); + * > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L))) + * ans = 1.1376e-28 + * For L=1e7, the max is around 1e-9, which is within the step size of RUNI. + * For L>1e10 the pprsc function breaks down, as I saw from the histogram + * of a large sample, so 1e8 is both small enough and large enough. */ + +/* Generate a set of poisson numbers with the same distribution */ +void +oct_fill_randp (double L, octave_idx_type n, double *p) +{ + octave_idx_type i; + if (L < 0.0 || INFINITE(L)) + { + for (i=0; i<n; i++) + p[i] = NAN; + } + else if (L <= 10.0) + { + poisson_cdf_lookup (L, p, n); + } + else if (L <= 1e8) + { + for (i=0; i<n; i++) + p[i] = pprsc (L); + } + else + { + /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ + const double sqrtL = sqrt (L); + for (i = 0; i < n; i++) + { + p[i] = floor (RNOR*sqrtL + L + 0.5); + if (p[i] < 0.0) + p[i] = 0.0; /* will probably never happen */ + } + } +} + +/* Generate one poisson variate */ +double +oct_randp (double L) +{ + double ret; + if (L < 0.0) ret = NAN; + else if (L <= 12.0) + { + /* From Press, et al. Numerical recipes */ + double g = exp (-L); + int em = -1; + double t = 1.0; + do + { + ++em; + t *= RUNI; + } while (t > g); + ret = em; + } + else if (L <= 1e8) + { + /* numerical recipes */ + poisson_rejection (L, &ret, 1); + } + else if (INFINITE(L)) + { + /* FIXME: R uses NaN, but the normal approximation suggests that + * limit should be Inf. Which is correct? */ + ret = NAN; + } + else + { + /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ + ret = floor (RNOR*sqrt (L) + L + 0.5); + if (ret < 0.0) ret = 0.0; /* will probably never happen */ + } + return ret; +} + +/* Generate a set of poisson numbers with the same distribution */ +void +oct_fill_float_randp (float FL, octave_idx_type n, float *p) +{ + double L = FL; + octave_idx_type i; + if (L < 0.0 || INFINITE(L)) + { + for (i=0; i<n; i++) + p[i] = NAN; + } + else if (L <= 10.0) + { + poisson_cdf_lookup_float (L, p, n); + } + else if (L <= 1e8) + { + for (i=0; i<n; i++) + p[i] = pprsc (L); + } + else + { + /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ + const double sqrtL = sqrt (L); + for (i = 0; i < n; i++) + { + p[i] = floor (RNOR*sqrtL + L + 0.5); + if (p[i] < 0.0) + p[i] = 0.0; /* will probably never happen */ + } + } +} + +/* Generate one poisson variate */ +float +oct_float_randp (float FL) +{ + double L = FL; + float ret; + if (L < 0.0) ret = NAN; + else if (L <= 12.0) + { + /* From Press, et al. Numerical recipes */ + double g = exp (-L); + int em = -1; + double t = 1.0; + do + { + ++em; + t *= RUNI; + } while (t > g); + ret = em; + } + else if (L <= 1e8) + { + /* numerical recipes */ + poisson_rejection_float (L, &ret, 1); + } + else if (INFINITE(L)) + { + /* FIXME: R uses NaN, but the normal approximation suggests that + * limit should be Inf. Which is correct? */ + ret = NAN; + } + else + { + /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ + ret = floor (RNOR*sqrt (L) + L + 0.5); + if (ret < 0.0) ret = 0.0; /* will probably never happen */ + } + return ret; +}
--- a/liboctave/numeric/randpoisson.h Mon May 16 23:26:07 2016 -0400 +++ b/liboctave/numeric/randpoisson.h Mon May 16 23:20:50 2016 -0400 @@ -28,10 +28,6 @@ #include "octave-config.h" -#ifdef __cplusplus -extern "C" { -#endif - extern OCTAVE_API double oct_randp (double L); @@ -44,8 +40,4 @@ extern OCTAVE_API void oct_fill_float_randp (float L, octave_idx_type n, float *p); -#ifdef __cplusplus -} #endif - -#endif