7019
|
1 /* |
|
2 |
|
3 Copyright (C) 2006, 2007 John W. Eaton |
|
4 |
|
5 This file is part of Octave. |
|
6 |
|
7 Octave is free software; you can redistribute it and/or modify it |
|
8 under the terms of the GNU General Public License as published by the |
|
9 Free Software Foundation; either version 3 of the License, or (at your |
|
10 option) any later version. |
|
11 |
|
12 Octave is distributed in the hope that it will be useful, but WITHOUT |
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
15 for more details. |
|
16 |
|
17 You should have received a copy of the GNU General Public License |
|
18 along with Octave; see the file COPYING. If not, see |
|
19 <http://www.gnu.org/licenses/>. |
|
20 |
|
21 */ |
|
22 |
5742
|
23 /* |
|
24 A C-program for MT19937, with initialization improved 2002/2/10. |
|
25 Coded by Takuji Nishimura and Makoto Matsumoto. |
|
26 This is a faster version by taking Shawn Cokus's optimization, |
|
27 Matthe Bellew's simplification, Isaku Wada's real version. |
|
28 David Bateman added normal and exponential distributions following |
|
29 Marsaglia and Tang's Ziggurat algorithm. |
|
30 |
|
31 Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, |
|
32 Copyright (C) 2004, David Bateman |
|
33 All rights reserved. |
|
34 |
|
35 Redistribution and use in source and binary forms, with or without |
|
36 modification, are permitted provided that the following conditions |
|
37 are met: |
|
38 |
|
39 1. Redistributions of source code must retain the above copyright |
|
40 notice, this list of conditions and the following disclaimer. |
|
41 |
|
42 2. Redistributions in binary form must reproduce the above copyright |
|
43 notice, this list of conditions and the following disclaimer in the |
|
44 documentation and/or other materials provided with the distribution. |
|
45 |
|
46 3. The names of its contributors may not be used to endorse or promote |
|
47 products derived from this software without specific prior written |
|
48 permission. |
|
49 |
|
50 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
51 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
52 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
53 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
54 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
55 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
56 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
57 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
58 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
59 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
60 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
61 |
|
62 |
|
63 Any feedback is very welcome. |
|
64 http://www.math.keio.ac.jp/matumoto/emt.html |
|
65 email: matumoto@math.keio.ac.jp |
|
66 |
|
67 * 2006-04-01 David Bateman |
|
68 * * convert for use in octave, declaring static functions only used |
|
69 * here and adding oct_ to functions visible externally |
|
70 * * inverse sense of ALLBITS |
|
71 * 2004-01-19 Paul Kienzle |
|
72 * * comment out main |
|
73 * add init_by_entropy, get_state, set_state |
|
74 * * converted to allow compiling by C++ compiler |
|
75 * |
|
76 * 2004-01-25 David Bateman |
|
77 * * Add Marsaglia and Tsang Ziggurat code |
|
78 * |
|
79 * 2004-07-13 Paul Kienzle |
|
80 * * make into an independent library with some docs. |
|
81 * * introduce new main and test code. |
|
82 * |
|
83 * 2004-07-28 Paul Kienzle & David Bateman |
|
84 * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa |
|
85 * * make the naming scheme more uniform |
|
86 * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. |
|
87 * |
|
88 * 2005-02-23 Paul Kienzle |
|
89 * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control |
|
90 */ |
|
91 |
|
92 /* |
|
93 === Build instructions === |
|
94 |
|
95 Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is |
|
96 available. This is not necessary if your architecture has |
|
97 /dev/urandom defined. |
|
98 |
|
99 Compile with -DALLBITS to disable 53-bit random numbers. This is about |
|
100 50% slower than using 32-bit random numbers. |
|
101 |
|
102 Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. |
|
103 You can force X86 behaviour with -DUSE_X86_32=1, or suppress it with |
|
104 -DUSE_X86_32=0. You should also consider -march=i686 or similar for |
|
105 extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit |
|
106 x86 architectures. |
|
107 |
|
108 If you want to replace the Mersenne Twister with another |
|
109 generator then redefine randi32 appropriately. |
|
110 |
|
111 === Usage instructions === |
|
112 Before using any of the generators, initialize the state with one of |
|
113 oct_init_by_int, oct_init_by_array or oct_init_by_entropy. |
|
114 |
|
115 All generators share the same state vector. |
|
116 |
|
117 === Mersenne Twister === |
|
118 void oct_init_by_int(uint32_t s) 32-bit initial state |
|
119 void oct_init_by_array(uint32_t k[],int m) m*32-bit initial state |
|
120 void oct_init_by_entropy(void) random initial state |
|
121 void oct_get_state(uint32_t save[MT_N+1]) saves state in array |
|
122 void oct_set_state(uint32_t save[MT_N+1]) restores state from array |
5766
|
123 static uint32_t randmt(void) returns 32-bit unsigned int |
5742
|
124 |
|
125 === inline generators === |
5766
|
126 static uint32_t randi32(void) returns 32-bit unsigned int |
|
127 static uint64_t randi53(void) returns 53-bit unsigned int |
|
128 static uint64_t randi54(void) returns 54-bit unsigned int |
|
129 static uint64_t randi64(void) returns 64-bit unsigned int |
|
130 static double randu32(void) returns 32-bit uniform in (0,1) |
|
131 static double randu53(void) returns 53-bit uniform in (0,1) |
5742
|
132 |
|
133 double oct_randu(void) returns M-bit uniform in (0,1) |
|
134 double oct_randn(void) returns M-bit standard normal |
|
135 double oct_rande(void) returns N-bit standard exponential |
|
136 |
|
137 === Array generators === |
|
138 void oct_fill_randi32(octave_idx_type, uint32_t []) |
|
139 void oct_fill_randi64(octave_idx_type, uint64_t []) |
|
140 void oct_fill_randu(octave_idx_type, double []) |
|
141 void oct_fill_randn(octave_idx_type, double []) |
|
142 void oct_fill_rande(octave_idx_type, double []) |
|
143 |
|
144 */ |
|
145 |
|
146 #if defined (HAVE_CONFIG_H) |
|
147 #include <config.h> |
|
148 #endif |
|
149 |
|
150 #include <math.h> |
|
151 #include <stdio.h> |
|
152 #include <time.h> |
|
153 |
|
154 #ifdef HAVE_GETTIMEOFDAY |
|
155 #include <sys/time.h> |
|
156 #endif |
|
157 |
|
158 #include "randmtzig.h" |
|
159 |
5775
|
160 /* FIXME may want to suppress X86 if sizeof(long)>4 */ |
5742
|
161 #if !defined(USE_X86_32) |
|
162 # if defined(i386) || defined(HAVE_X86_32) |
|
163 # define USE_X86_32 1 |
|
164 # else |
|
165 # define USE_X86_32 0 |
|
166 # endif |
|
167 #endif |
|
168 |
|
169 /* ===== Mersenne Twister 32-bit generator ===== */ |
|
170 |
|
171 #define MT_M 397 |
|
172 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ |
|
173 #define UMASK 0x80000000UL /* most significant w-r bits */ |
|
174 #define LMASK 0x7fffffffUL /* least significant r bits */ |
|
175 #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) |
|
176 #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) |
|
177 |
|
178 static uint32_t *next; |
|
179 static uint32_t state[MT_N]; /* the array for the state vector */ |
|
180 static int left = 1; |
|
181 static int initf = 0; |
|
182 static int initt = 1; |
|
183 |
|
184 /* initializes state[MT_N] with a seed */ |
|
185 void |
|
186 oct_init_by_int (uint32_t s) |
|
187 { |
|
188 int j; |
|
189 state[0] = s & 0xffffffffUL; |
|
190 for (j = 1; j < MT_N; j++) { |
|
191 state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); |
|
192 /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ |
|
193 /* In the previous versions, MSBs of the seed affect */ |
|
194 /* only MSBs of the array state[]. */ |
|
195 /* 2002/01/09 modified by Makoto Matsumoto */ |
|
196 state[j] &= 0xffffffffUL; /* for >32 bit machines */ |
|
197 } |
|
198 left = 1; |
|
199 initf = 1; |
|
200 } |
|
201 |
|
202 /* initialize by an array with array-length */ |
|
203 /* init_key is the array for initializing keys */ |
|
204 /* key_length is its length */ |
|
205 void |
|
206 oct_init_by_array (uint32_t init_key[], int key_length) |
|
207 { |
|
208 int i, j, k; |
|
209 oct_init_by_int (19650218UL); |
|
210 i = 1; |
|
211 j = 0; |
|
212 k = (MT_N > key_length ? MT_N : key_length); |
|
213 for (; k; k--) |
|
214 { |
|
215 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) |
|
216 + init_key[j] + j; /* non linear */ |
|
217 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ |
|
218 i++; |
|
219 j++; |
|
220 if (i >= MT_N) |
|
221 { |
|
222 state[0] = state[MT_N-1]; |
|
223 i = 1; |
|
224 } |
|
225 if (j >= key_length) |
|
226 j = 0; |
|
227 } |
|
228 for (k = MT_N - 1; k; k--) |
|
229 { |
|
230 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) |
|
231 - i; /* non linear */ |
|
232 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ |
|
233 i++; |
|
234 if (i >= MT_N) |
|
235 { |
|
236 state[0] = state[MT_N-1]; |
|
237 i = 1; |
|
238 } |
|
239 } |
|
240 |
|
241 state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ |
|
242 left = 1; |
|
243 initf = 1; |
|
244 } |
|
245 |
|
246 void |
|
247 oct_init_by_entropy (void) |
|
248 { |
|
249 uint32_t entropy[MT_N]; |
|
250 int n = 0; |
|
251 |
|
252 /* Look for entropy in /dev/urandom */ |
|
253 FILE* urandom =fopen("/dev/urandom", "rb"); |
|
254 if (urandom) |
|
255 { |
|
256 while (n < MT_N) |
|
257 { |
|
258 unsigned char word[4]; |
|
259 if (fread(word, 4, 1, urandom) != 1) |
|
260 break; |
|
261 entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(word[3]<<24); |
|
262 } |
|
263 fclose(urandom); |
|
264 } |
|
265 |
|
266 /* If there isn't enough entropy, gather some from various sources */ |
|
267 if (n < MT_N) |
|
268 entropy[n++] = time(NULL); /* Current time in seconds */ |
|
269 if (n < MT_N) |
|
270 entropy[n++] = clock(); /* CPU time used (usec) */ |
|
271 #ifdef HAVE_GETTIMEOFDAY |
|
272 if (n < MT_N) |
|
273 { |
|
274 struct timeval tv; |
|
275 if (gettimeofday(&tv, NULL) != -1) |
|
276 entropy[n++] = tv.tv_usec; /* Fractional part of current time */ |
|
277 } |
|
278 #endif |
|
279 /* Send all the entropy into the initial state vector */ |
|
280 oct_init_by_array(entropy,n); |
|
281 } |
|
282 |
|
283 void |
|
284 oct_set_state (uint32_t save[]) |
|
285 { |
|
286 int i; |
|
287 for (i=0; i < MT_N; i++) |
|
288 state[i] = save[i]; |
|
289 left = save[MT_N]; |
|
290 next = state + (MT_N - left + 1); |
|
291 } |
|
292 |
|
293 void |
|
294 oct_get_state (uint32_t save[]) |
|
295 { |
|
296 int i; |
|
297 for (i = 0; i < MT_N; i++) |
|
298 save[i] = state[i]; |
|
299 save[MT_N] = left; |
|
300 } |
|
301 |
|
302 static void |
|
303 next_state (void) |
|
304 { |
|
305 uint32_t *p = state; |
|
306 int j; |
|
307 |
|
308 /* if init_by_int() has not been called, */ |
|
309 /* a default initial seed is used */ |
|
310 /* if (initf==0) init_by_int(5489UL); */ |
|
311 /* Or better yet, a random seed! */ |
|
312 if (initf == 0) |
|
313 oct_init_by_entropy(); |
|
314 |
|
315 left = MT_N; |
|
316 next = state; |
|
317 |
|
318 for (j = MT_N - MT_M + 1; --j; p++) |
|
319 *p = p[MT_M] ^ TWIST(p[0], p[1]); |
|
320 |
|
321 for (j = MT_M; --j; p++) |
|
322 *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); |
|
323 |
|
324 *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]); |
|
325 } |
|
326 |
|
327 /* generates a random number on [0,0xffffffff]-interval */ |
5766
|
328 static uint32_t |
5742
|
329 randmt (void) |
|
330 { |
|
331 register uint32_t y; |
|
332 |
|
333 if (--left == 0) |
|
334 next_state(); |
|
335 y = *next++; |
|
336 |
|
337 /* Tempering */ |
|
338 y ^= (y >> 11); |
|
339 y ^= (y << 7) & 0x9d2c5680UL; |
|
340 y ^= (y << 15) & 0xefc60000UL; |
|
341 return (y ^ (y >> 18)); |
|
342 } |
|
343 |
|
344 /* ===== Uniform generators ===== */ |
|
345 |
|
346 /* Select which 32 bit generator to use */ |
|
347 #define randi32 randmt |
|
348 |
5766
|
349 static uint64_t |
5742
|
350 randi53 (void) |
|
351 { |
|
352 const uint32_t lo = randi32(); |
|
353 const uint32_t hi = randi32()&0x1FFFFF; |
|
354 #if HAVE_X86_32 |
|
355 uint64_t u; |
|
356 uint32_t *p = (uint32_t *)&u; |
|
357 p[0] = lo; |
|
358 p[1] = hi; |
|
359 return u; |
|
360 #else |
|
361 return (((uint64_t)hi<<32)|lo); |
|
362 #endif |
|
363 } |
|
364 |
5766
|
365 static uint64_t |
5742
|
366 randi54 (void) |
|
367 { |
|
368 const uint32_t lo = randi32(); |
|
369 const uint32_t hi = randi32()&0x3FFFFF; |
|
370 #if HAVE_X86_32 |
|
371 uint64_t u; |
|
372 uint32_t *p = (uint32_t *)&u; |
|
373 p[0] = lo; |
|
374 p[1] = hi; |
|
375 return u; |
|
376 #else |
|
377 return (((uint64_t)hi<<32)|lo); |
|
378 #endif |
|
379 } |
|
380 |
6959
|
381 #if 0 |
|
382 // FIXME -- this doesn't seem to be used anywhere; should it be removed? |
5766
|
383 static uint64_t |
5742
|
384 randi64 (void) |
|
385 { |
|
386 const uint32_t lo = randi32(); |
|
387 const uint32_t hi = randi32(); |
|
388 #if HAVE_X86_32 |
|
389 uint64_t u; |
|
390 uint32_t *p = (uint32_t *)&u; |
|
391 p[0] = lo; |
|
392 p[1] = hi; |
|
393 return u; |
|
394 #else |
|
395 return (((uint64_t)hi<<32)|lo); |
|
396 #endif |
|
397 } |
6959
|
398 #endif |
5742
|
399 |
6959
|
400 #ifdef ALLBITS |
5742
|
401 /* generates a random number on (0,1)-real-interval */ |
5766
|
402 static double |
5742
|
403 randu32 (void) |
|
404 { |
|
405 return ((double)randi32() + 0.5) * (1.0/4294967296.0); |
|
406 /* divided by 2^32 */ |
|
407 } |
6959
|
408 #else |
5742
|
409 /* generates a random number on (0,1) with 53-bit resolution */ |
5766
|
410 static double |
5742
|
411 randu53 (void) |
|
412 { |
|
413 const uint32_t a=randi32()>>5; |
|
414 const uint32_t b=randi32()>>6; |
6959
|
415 return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0); |
5742
|
416 } |
6959
|
417 #endif |
5742
|
418 |
|
419 /* Determine mantissa for uniform doubles */ |
|
420 double |
|
421 oct_randu (void) |
|
422 { |
6959
|
423 #ifdef ALLBITS |
|
424 return randu32 (); |
5742
|
425 #else |
6959
|
426 return randu53 (); |
|
427 #endif |
5742
|
428 } |
|
429 |
|
430 /* ===== Ziggurat normal and exponential generators ===== */ |
|
431 #ifdef ALLBITS |
|
432 # define ZIGINT uint32_t |
|
433 # define EMANTISSA 4294967296.0 /* 32 bit mantissa */ |
|
434 # define ERANDI randi32() /* 32 bits for mantissa */ |
|
435 # define NMANTISSA 2147483648.0 /* 31 bit mantissa */ |
|
436 # define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */ |
|
437 # define RANDU randu32() |
|
438 #else |
|
439 # define ZIGINT uint64_t |
|
440 # define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ |
|
441 # define ERANDI randi53() /* 53 bits for mantissa */ |
|
442 # define NMANTISSA EMANTISSA |
|
443 # define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */ |
|
444 # define RANDU randu53() |
|
445 #endif |
|
446 |
|
447 #define ZIGGURAT_TABLE_SIZE 256 |
|
448 |
|
449 #define ZIGGURAT_NOR_R 3.6541528853610088 |
|
450 #define ZIGGURAT_NOR_INV_R 0.27366123732975828 |
|
451 #define NOR_SECTION_AREA 0.00492867323399 |
|
452 |
|
453 #define ZIGGURAT_EXP_R 7.69711747013104972 |
|
454 #define ZIGGURAT_EXP_INV_R 0.129918765548341586 |
|
455 #define EXP_SECTION_AREA 0.0039496598225815571993 |
|
456 |
|
457 static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; |
|
458 static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; |
|
459 static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; |
|
460 static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; |
|
461 |
|
462 /* |
|
463 This code is based on the paper Marsaglia and Tsang, "The ziggurat method |
|
464 for generating random variables", Journ. Statistical Software. Code was |
|
465 presented in this paper for a Ziggurat of 127 levels and using a 32 bit |
|
466 integer random number generator. This version of the code, uses the |
|
467 Mersenne Twister as the integer generator and uses 256 levels in the |
|
468 Ziggurat. This has several advantages. |
|
469 |
|
470 1) As Marsaglia and Tsang themselves states, the more levels the few |
|
471 times the expensive tail algorithm must be called |
|
472 2) The cycle time of the generator is determined by the integer |
|
473 generator, thus the use of a Mersenne Twister for the core random |
|
474 generator makes this cycle extremely long. |
|
475 3) The license on the original code was unclear, thus rewriting the code |
|
476 from the article means we are free of copyright issues. |
|
477 4) Compile flag for full 53-bit random mantissa. |
|
478 |
|
479 It should be stated that the authors made my life easier, by the fact that |
|
480 the algorithm developed in the text of the article is for a 256 level |
|
481 ziggurat, even if the code itself isn't... |
|
482 |
|
483 One modification to the algorithm developed in the article, is that it is |
|
484 assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in |
|
485 terms like 2^32 in the code. As the normal distribution is defined between |
|
486 -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus |
|
487 in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for |
|
488 this term. The exponential distribution is one sided so we use the |
|
489 full 32 bits. We use EMANTISSA for this term. |
|
490 |
|
491 It appears that I'm slightly slower than the code in the article, this |
|
492 is partially due to a better generator of random integers than they |
|
493 use. But might also be that the case of rapid return was optimized by |
|
494 inlining the relevant code with a #define. As the basic Mersenne |
|
495 Twister is only 25% faster than this code I suspect that the main |
|
496 reason is just the use of the Mersenne Twister and not the inlining, |
|
497 so I'm not going to try and optimize further. |
|
498 */ |
|
499 |
|
500 static void |
|
501 create_ziggurat_tables (void) |
|
502 { |
|
503 int i; |
|
504 double x, x1; |
|
505 |
|
506 /* Ziggurat tables for the normal distribution */ |
|
507 x1 = ZIGGURAT_NOR_R; |
|
508 wi[255] = x1 / NMANTISSA; |
|
509 fi[255] = exp (-0.5 * x1 * x1); |
|
510 |
|
511 /* Index zero is special for tail strip, where Marsaglia and Tsang |
|
512 * defines this as |
|
513 * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, |
|
514 * where v is the area of each strip of the ziggurat. |
|
515 */ |
|
516 ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); |
|
517 wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; |
|
518 fi[0] = 1.; |
|
519 |
|
520 for (i = 254; i > 0; i--) |
|
521 { |
|
522 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus |
|
523 * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) |
|
524 */ |
|
525 x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1])); |
|
526 ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); |
|
527 wi[i] = x / NMANTISSA; |
|
528 fi[i] = exp (-0.5 * x * x); |
|
529 x1 = x; |
|
530 } |
|
531 |
|
532 ki[1] = 0; |
|
533 |
|
534 /* Zigurrat tables for the exponential distribution */ |
|
535 x1 = ZIGGURAT_EXP_R; |
|
536 we[255] = x1 / EMANTISSA; |
|
537 fe[255] = exp (-x1); |
|
538 |
|
539 /* Index zero is special for tail strip, where Marsaglia and Tsang |
|
540 * defines this as |
|
541 * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, |
|
542 * where v is the area of each strip of the ziggurat. |
|
543 */ |
|
544 ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); |
|
545 we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; |
|
546 fe[0] = 1.; |
|
547 |
|
548 for (i = 254; i > 0; i--) |
|
549 { |
|
550 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus |
|
551 * need inverse operator of y = exp(-x) -> x = -ln(y) |
|
552 */ |
|
553 x = - log(EXP_SECTION_AREA / x1 + fe[i+1]); |
|
554 ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); |
|
555 we[i] = x / EMANTISSA; |
|
556 fe[i] = exp (-x); |
|
557 x1 = x; |
|
558 } |
|
559 ke[1] = 0; |
|
560 |
|
561 initt = 0; |
|
562 } |
|
563 |
|
564 /* |
|
565 * Here is the guts of the algorithm. As Marsaglia and Tsang state the |
|
566 * algorithm in their paper |
|
567 * |
|
568 * 1) Calculate a random signed integer j and let i be the index |
|
569 * provided by the rightmost 8-bits of j |
|
570 * 2) Set x = j * w_i. If j < k_i return x |
|
571 * 3) If i = 0, then return x from the tail |
|
572 * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x |
|
573 * 5) goto step 1 |
|
574 * |
|
575 * Where f is the functional form of the distribution, which for a normal |
|
576 * distribution is exp(-0.5*x*x) |
|
577 */ |
|
578 |
|
579 double |
|
580 oct_randn (void) |
|
581 { |
|
582 if (initt) |
|
583 create_ziggurat_tables(); |
|
584 |
|
585 while (1) |
|
586 { |
|
587 /* The following code is specialized for 32-bit mantissa. |
|
588 * Compared to the arbitrary mantissa code, there is a performance |
|
589 * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% |
|
590 * There is a bigger performance gain compared to using a full |
|
591 * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% |
|
592 * Of course, different compilers and operating systems may |
|
593 * have something to do with this. |
|
594 */ |
|
595 #if !defined(ALLBITS) |
|
596 # if HAVE_X86_32 |
|
597 /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ |
|
598 double x; |
|
599 int si,idx; |
|
600 register uint32_t lo, hi; |
|
601 int64_t rabs; |
|
602 uint32_t *p = (uint32_t *)&rabs; |
|
603 lo = randi32(); |
|
604 idx = lo&0xFF; |
|
605 hi = randi32(); |
|
606 si = hi&UMASK; |
|
607 p[0] = lo; |
|
608 p[1] = hi&0x1FFFFF; |
|
609 x = ( si ? -rabs : rabs ) * wi[idx]; |
|
610 # else /* !HAVE_X86_32 */ |
|
611 /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ |
|
612 const uint64_t r = NRANDI; |
|
613 const int64_t rabs=r>>1; |
|
614 const int idx = (int)(rabs&0xFF); |
|
615 const double x = ( r&1 ? -rabs : rabs) * wi[idx]; |
|
616 # endif /* !HAVE_X86_32 */ |
|
617 if (rabs < (int64_t)ki[idx]) |
|
618 #else /* ALLBITS */ |
|
619 /* 32-bit mantissa */ |
|
620 const uint32_t r = randi32(); |
|
621 const uint32_t rabs = r&LMASK; |
|
622 const int idx = (int)(r&0xFF); |
|
623 const double x = ((int32_t)r) * wi[idx]; |
|
624 if (rabs < ki[idx]) |
|
625 #endif /* ALLBITS */ |
|
626 return x; /* 99.3% of the time we return here 1st try */ |
|
627 else if (idx == 0) |
|
628 { |
|
629 /* As stated in Marsaglia and Tsang |
|
630 * |
|
631 * For the normal tail, the method of Marsaglia[5] provides: |
|
632 * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, |
|
633 * then return r+x. Except that r+x is always in the positive |
|
634 * tail!!!! Any thing random might be used to determine the |
|
635 * sign, but as we already have r we might as well use it |
|
636 * |
|
637 * [PAK] but not the bottom 8 bits, since they are all 0 here! |
|
638 */ |
|
639 double xx, yy; |
|
640 do |
|
641 { |
|
642 xx = - ZIGGURAT_NOR_INV_R * log (RANDU); |
|
643 yy = - log (RANDU); |
|
644 } |
|
645 while ( yy+yy <= xx*xx); |
|
646 return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); |
|
647 } |
|
648 else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) |
|
649 return x; |
|
650 } |
|
651 } |
|
652 |
|
653 double |
|
654 oct_rande (void) |
|
655 { |
|
656 if (initt) |
|
657 create_ziggurat_tables(); |
|
658 |
|
659 while (1) |
|
660 { |
|
661 ZIGINT ri = ERANDI; |
|
662 const int idx = (int)(ri & 0xFF); |
|
663 const double x = ri * we[idx]; |
|
664 if (ri < ke[idx]) |
|
665 return x; // 98.9% of the time we return here 1st try |
|
666 else if (idx == 0) |
|
667 { |
|
668 /* As stated in Marsaglia and Tsang |
|
669 * |
|
670 * For the exponential tail, the method of Marsaglia[5] provides: |
|
671 * x = r - ln(U); |
|
672 */ |
|
673 return ZIGGURAT_EXP_R - log(RANDU); |
|
674 } |
|
675 else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x)) |
|
676 return x; |
|
677 } |
|
678 } |
|
679 |
|
680 /* Array generators */ |
|
681 void |
|
682 oct_fill_randu (octave_idx_type n, double *p) |
|
683 { |
|
684 octave_idx_type i; |
|
685 for (i = 0; i < n; i++) |
|
686 p[i] = oct_randu(); |
|
687 } |
|
688 |
|
689 void |
|
690 oct_fill_randn (octave_idx_type n, double *p) |
|
691 { |
|
692 octave_idx_type i; |
|
693 for (i = 0; i < n; i++) |
|
694 p[i] = oct_randn(); |
|
695 } |
|
696 |
|
697 void |
|
698 oct_fill_rande (octave_idx_type n, double *p) |
|
699 { |
|
700 octave_idx_type i; |
|
701 for (i = 0; i < n; i++) |
|
702 p[i] = oct_rande(); |
|
703 } |
|
704 |
|
705 /* |
|
706 ;;; Local Variables: *** |
|
707 ;;; mode: C *** |
|
708 ;;; End: *** |
|
709 */ |