# HG changeset patch # User Arun Giridhar # Date 1635554927 14400 # Node ID a49c635b179df113a5bcc0d51562c2601e182ae1 # Parent bfbe6e528af520bb49d23b7661320a4efb9130cc isprime.m: Speed up for large integers using Miller-Rabin test (bug #61312). * libinterp/corefcn/__isprimelarge__.cc: Implement Miller-Rabin test. * libinterp/corefcn/module.mk: Add new file to build system. * scripts/specfun/isprime.m: Use new function for large integers. * scripts/specfun/factor.m: Use "isprime" that is now more efficient. * NEWS: Add note about performance improvement. diff -r bfbe6e528af5 -r a49c635b179d NEWS --- a/NEWS Sat Oct 30 09:47:47 2021 -0400 +++ b/NEWS Fri Oct 29 20:48:47 2021 -0400 @@ -82,7 +82,10 @@ (A*x - b)`. Previously, Octave computed a minimum-norm solution. - The `factor` function has been overhauled for speed. For large -composite numbers > 1e14, it can be up to 10,000 times faster. +inputs > 1e14, it can be up to 10,000 times faster. + +- The `isprime` function uses a new primality testing algorithm +that is up to 50,000 times faster for inputs > 1e14. - The `betainc` function now calculates an exact output for the important special cases where a or b are 1. diff -r bfbe6e528af5 -r a49c635b179d libinterp/corefcn/__isprimelarge__.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/corefcn/__isprimelarge__.cc Fri Oct 29 20:48:47 2021 -0400 @@ -0,0 +1,199 @@ +//////////////////////////////////////////////////////////////////////// +// +// Copyright (C) 1994-2021 The Octave Project Developers +// +// See the file COPYRIGHT.md in the top-level directory of this +// distribution or . +// +// This file is part of Octave. +// +// Octave is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Octave is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Octave; see the file COPYING. If not, see +// . +// +//////////////////////////////////////////////////////////////////////// + +#include "defun.h" +#include "error.h" +#include "ovl.h" + +#include + +OCTAVE_NAMESPACE_BEGIN + +// This function implements the Schrage technique for modular multiplication. +// The returned value is equivalent to "mod (a*b, modulus)" +// but calculated without overflow. +uint64_t safemultiply (uint64_t a, uint64_t b, uint64_t modulus) +{ + if (! a || ! b) + return 0; + else if (b == 1) + return a; + else if (a == 1) + return b; + else if (a > b) + { + uint64_t tmp = a; + a = b; + b = tmp; + } + + uint64_t q = modulus / a; + uint64_t r = modulus - q * a; + uint64_t term1 = a * (b % q); + uint64_t term2 = (r < q) ? r * (b / q) : safemultiply (r, b / q, modulus); + return (term1 > term2) ? (term1 - term2) : (term1 + modulus - term2); +} + +// This function returns "mod (a^b, modulus)" +// but calculated without overflow. +uint64_t safepower (uint64_t a, uint64_t b, uint64_t modulus) +{ + uint64_t retval = 1; + while (b > 0) + { + if (b & 1) + retval = safemultiply (retval, a, modulus); + b >>= 1; + a = safemultiply (a, a, modulus); + } + return retval; +} + +// This function implements a single round of Miller-Rabin primality testing. +// Returns false if composite, true if pseudoprime for this divisor. +bool millerrabin (uint64_t div, uint64_t d, uint64_t r, uint64_t n) +{ + uint64_t x = safepower (div, d, n); + if (x == 1 || x == n-1) + return true; + + for (uint64_t j = 1; j < r; j++) + { + x = safemultiply (x, x, n); + if (x == n-1) + return true; + } + return false; +} + +// This function uses the Miller-Rabin test to find out whether the input is +// prime or composite. The input is required to be a scalar 64-bit integer. +bool isprimescalar (uint64_t n) +{ + // Fast return for even numbers. n==2 is excluded by the time this function is called. + if (! (n & 1)) + return false; + + // n is now odd. Rewrite n as d * 2^r + 1, where d is odd. + uint64_t d = n-1; + uint64_t r = 0; + while (! (d & 1)) + { + d >>= 1; + r++; + } + + // Miller-Rabin test with the first 12 primes. + // If the number passes all 12 tests, then it is prime. + // If it fails any, then it is composite. + // The first 12 primes suffice to test all 64-bit integers. + if (! millerrabin ( 2, d, r, n)) return false; + if (! millerrabin ( 3, d, r, n)) return false; + if (! millerrabin ( 5, d, r, n)) return false; + if (! millerrabin ( 7, d, r, n)) return false; + if (! millerrabin (11, d, r, n)) return false; + if (! millerrabin (13, d, r, n)) return false; + if (! millerrabin (17, d, r, n)) return false; + if (! millerrabin (19, d, r, n)) return false; + if (! millerrabin (23, d, r, n)) return false; + if (! millerrabin (29, d, r, n)) return false; + if (! millerrabin (31, d, r, n)) return false; + if (! millerrabin (37, d, r, n)) return false; + // If we are all the way here, then it is prime. + return true; + + /* + Mathematical references for the curious as to why we need only + the 12 smallest primes for testing all 64-bit numbers: + (1) https://oeis.org/A014233 + Comment: a(12) > 2^64. Hence the primality of numbers < 2^64 can be + determined by asserting strong pseudoprimality to all prime bases <= 37 + (=prime(12)). Testing to prime bases <=31 does not suffice, + as a(11) < 2^64 and a(11) is a strong pseudoprime + to all prime bases <= 31 (=prime(11)). - Joerg Arndt, Jul 04 2012 + (2) https://arxiv.org/abs/1509.00864 + Strong Pseudoprimes to Twelve Prime Bases + Jonathan P. Sorenson, Jonathan Webster + + In addition, a source listed here: https://miller-rabin.appspot.com/ + reports that all 64-bit numbers can be covered with only 7 divisors, + namely 2, 325, 9375, 28178, 450775, 9780504, and 1795265022. + There was no peer-reviewed article to back it up though, + so this code uses the 12 primes <= 37. + */ + +} + +DEFUN (__isprimelarge__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{x} =} __isprimelarge__ (@var{n}) +Use the Miller-Rabin test to find out whether the elements of N are prime or +composite. The input N is required to be a vector or array of 64-bit integers. +You should call isprime(N) instead of directly calling this function. + +@seealso{isprime, factor} +@end deftypefn */) +{ + int nargin = args.length (); + if (nargin != 1) + print_usage (); + + // This function is intended for internal use by isprime.m, + // so the following error handling should not be necessary. But it is + // probably good practice for any curious users calling it directly. + uint64NDArray vec = args(0).xuint64_array_value + ("__isprimelarge__: unable to convert input. Call isprime() instead."); + + boolNDArray retval (vec.dims(), false); + + for (octave_idx_type i = vec.numel() - 1; i >= 0; i--) + retval(i) = isprimescalar (vec(i)); + // Note: If vec(i) <= 37, this function could go into an infinite loop. + // That situation does not arise when calling this from isprime.m + // but it could arise if the user calls this function directly with low input + // or negative input. + // But it turns out that adding this validation: + // "if (vec(i) <= 37) then raise an error else call isprimescalar (vec(i))" + // slows this function down by over 20% for some inputs, + // so it is better to leave all the input validation in isprime.m + // and not add it here. The function DOCSTRING now explicitly says: + // "You should call isprime(N) instead of directly calling this function." + + return ovl (retval); +} + +/* +%!assert (__isprimelarge__ (41:50), logical ([1 0 1 0 0 0 1 0 0 0])) +%!assert (__isprimelarge__ (uint64 (12345)), false) +%!assert (__isprimelarge__ (uint64 (2147483647)), true) +%!assert (__isprimelarge__ (uint64 (2305843009213693951)), true) +%!assert (__isprimelarge__ (uint64 (18446744073709551557)), true) + +%!assert (__isprimelarge__ ([uint64(12345), uint64(2147483647), uint64(2305843009213693951), uint64(18446744073709551557)]), logical ([0 1 1 1])) + +%!error (__isprimelarge__ ({'foo'; 'bar'})) +*/ + +OCTAVE_NAMESPACE_END diff -r bfbe6e528af5 -r a49c635b179d libinterp/corefcn/module.mk --- a/libinterp/corefcn/module.mk Sat Oct 30 09:47:47 2021 -0400 +++ b/libinterp/corefcn/module.mk Fri Oct 29 20:48:47 2021 -0400 @@ -119,6 +119,7 @@ %reldir%/__gammainc__.cc \ %reldir%/__ichol__.cc \ %reldir%/__ilu__.cc \ + %reldir%/__isprimelarge__.cc \ %reldir%/__lin_interpn__.cc \ %reldir%/__magick_read__.cc \ %reldir%/__pchip_deriv__.cc \ diff -r bfbe6e528af5 -r a49c635b179d scripts/specfun/factor.m --- a/scripts/specfun/factor.m Sat Oct 30 09:47:47 2021 -0400 +++ b/scripts/specfun/factor.m Fri Oct 29 20:48:47 2021 -0400 @@ -51,13 +51,16 @@ error ("factor: Q must be a real non-negative integer"); endif - ## Special case of no primes less than sqrt (q). - if (q < 4) + ## Special case if q is prime, because isprime() is now much faster than factor(). + ## This also absorbs the case of q < 4, where there are no primes less than sqrt(q). + if (q < 4 || isprime (q)) pf = q; n = 1; return; endif + ## If we are here, then q is composite. + cls = class (q); # store class if (isfloat (q) && q > flintmax (q)) error ("factor: Q too large to factor (> flintmax)"); diff -r bfbe6e528af5 -r a49c635b179d scripts/specfun/isprime.m --- a/scripts/specfun/isprime.m Sat Oct 30 09:47:47 2021 -0400 +++ b/scripts/specfun/isprime.m Fri Oct 29 20:48:47 2021 -0400 @@ -58,9 +58,14 @@ ## @end group ## @end example ## -## Programming Note: @code{isprime} is appropriate if the maximum value in -## @var{x} is not too large (< 1e15). For larger values special purpose -## factorization code should be used. +## Programming Note: @code{isprime} is suitable for all @var{x} +## in the range abs(@var{x}) +## @tex +## $ < 2^{64}$. +## @end tex +## @ifnottex +## < 2^64. +## @end ifnottex ## ## Compatibility Note: @sc{matlab} does not extend the definition of prime ## numbers and will produce an error if given negative or complex inputs. @@ -85,55 +90,84 @@ return; endif - ## Code strategy is to build a table with the list of possible primes - ## and then quickly compare entries in x with the table of primes using - ## lookup(). The table size is limited to save memory and computation - ## time during its creation. All entries larger than the maximum in the - ## table are checked by straightforward division. + ## Code strategy is to quickly compare entries in x with small primes + ## using lookup(), then do direct division on larger numbers up to + ## a threshold, then call Miller-Rabin for numbers over the threshold. + + x = abs (x); # handle negative entries - x = abs (x); # handle negative entries - maxn = max (x(:)); - ## generate prime table of suitable length. - ## 1e7 threshold requires ~0.15 seconds of computation, 1e8 requires 1.8. - maxp = min (maxn, max (sqrt (maxn), 1e7)); - pr = primes (maxp); + ## Generate prime table of suitable length up to maxp. + ## The value of maxp needs to be at least 37, + ## because of the method used by __isprimelarge__ below. + maxp = 37; + pr = [2 3 5 7 11 13 17 19 23 29 31 37]; t = lookup (pr, x, "b"); # quick search for table matches. - ## process any remaining large entries - m = x(x > maxp); - if (! isempty (m)) - if (maxn <= intmax ("uint32")) - m = uint32 (m); - elseif (maxn <= intmax ("uint64")) - m = uint64 (m); - else - warning ("isprime: X contains integers too large to be tested"); - endif + THRESHOLD = 29e9; + ## FIXME: THRESHOLD is the input value at which Miller-Rabin + ## becomes more efficient than direct division. For smaller numbers, + ## use direct division. For larger numbers, use Miller-Rabin. + ## + ## From numerical experiments in Oct 2021, this was observed: + ## THRESHOLD Division Miller-Rabin + ## 380e9 75.0466s 30.868s + ## 100e9 45.0874s 29.1794s + ## 10e9 18.5075s 25.4392s + ## 50e9 31.6317s 27.7251s + ## 25e9 25.1215s 27.004s + ## 38e9 31.4674s 28.1336s + ## 30e9 28.3848s 27.9982s + ## which is close enough to interpolate, so final threshold = 29 billion. + ## + ## The test code was this: + ## n = THRESHOLD - (1:1e7); tic; isprime(n); toc + ## n = THRESHOLD + (1:1e7); tic; isprime(n); toc + ## + ## Two notes for future programmers: + ## + ## 1. Test and tune THRESHOLD periodically. Miller-Rabin is only CPU-limited, + ## while factorization by division is very memory-intensive. This is + ## plainly noticeable from the loudness of the computer fans when running + ## the division technique! CPU speed and RAM speed scale differently over + ## time, so test and tune THRESHOLD periodically. + ## + ## 2. If you make improvements elsewhere in the code that favor one over + ## the other (not symmetric), you should also retune THRESHOLD afterwards. + ## If the Miller-Rabin part is sped up, the optimum THRESHOLD will + ## decrease, and if factorization is sped up, it will increase. + ## - ## Start by dividing through by the small primes until the remaining - ## list of entries is small (and most likely prime themselves). - pr = cast (pr(pr <= sqrt (maxn)), class (m)); - for p = pr + ## Process large entries that are still suitable for direct division + m = x (x > maxp & x <= THRESHOLD); + if ( ! isempty (m)) + ## Start by dividing through by the small primes until the remaining list + ## of entries is small (and most likely prime themselves). + pr2 = primes (sqrt (max (m))); + t |= lookup (pr2, x, "b"); + for p = pr2 m = m(rem (m, p) != 0); if (numel (m) < numel (pr) / 10) break; endif endfor - ## Check the remaining list of possible primes against the - ## remaining prime factors which were not tested in the for loop. - ## This is just an optimization to use arrayfun over for loo - pr = pr(pr > p); - mm = arrayfun (@(x) all (rem (x, pr)), m); + ## Check the remaining list of possible primes against the remaining + ## prime factors which were not tested in the for loop. + ## This is just an optimization to use arrayfun over for loop. + pr2 = pr2 (pr2 > p); + mm = arrayfun (@(x) all (rem (x, pr2)), m); m = m(mm); ## Add any remaining entries, which are truly prime, to the results. - if (! isempty (m)) - m = cast (sort (m), class (x)); - t |= lookup (m, x, "b"); + if ( ! isempty (m)) + t |= lookup (sort (m), x, "b"); endif endif + ## Process remaining entries (everything above THRESHOLD) with Miller-Rabin + ii = (x(:)' > THRESHOLD); + t(ii) = __isprimelarge__ (x(ii)); + endfunction function t = isgaussianprime (z) @@ -162,6 +196,7 @@ %!assert (isprime (3), true) %!assert (isprime (4), false) +%!assert (isprime (uint64 (18446744073709551557)), true) %!assert (isprime (5i), false) %!assert (isprime (7i), true) %!assert (isprime ([1+2i, (2+3i)*(-1+2i)]), [true, false])