changeset 30261:a49c635b179d

isprime.m: Speed up for large integers using Miller-Rabin test (bug #61312). * libinterp/corefcn/__isprimelarge__.cc: Implement Miller-Rabin test. * libinterp/corefcn/module.mk: Add new file to build system. * scripts/specfun/isprime.m: Use new function for large integers. * scripts/specfun/factor.m: Use "isprime" that is now more efficient. * NEWS: Add note about performance improvement.
author Arun Giridhar <arungiridhar@gmail.com>
date Fri, 29 Oct 2021 20:48:47 -0400
parents bfbe6e528af5
children f43902a87bf1
files NEWS libinterp/corefcn/__isprimelarge__.cc libinterp/corefcn/module.mk scripts/specfun/factor.m scripts/specfun/isprime.m
diffstat 5 files changed, 280 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Sat Oct 30 09:47:47 2021 -0400
+++ b/NEWS	Fri Oct 29 20:48:47 2021 -0400
@@ -82,7 +82,10 @@
 (A*x - b)`.  Previously, Octave computed a minimum-norm solution.
 
 - The `factor` function has been overhauled for speed.  For large
-composite numbers > 1e14, it can be up to 10,000 times faster.
+inputs > 1e14, it can be up to 10,000 times faster.
+
+- The `isprime` function uses a new primality testing algorithm
+that is up to 50,000 times faster for inputs > 1e14.
 
 - The `betainc` function now calculates an exact output for the
 important special cases where a or b are 1.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libinterp/corefcn/__isprimelarge__.cc	Fri Oct 29 20:48:47 2021 -0400
@@ -0,0 +1,199 @@
+////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 1994-2021 The Octave Project Developers
+//
+// See the file COPYRIGHT.md in the top-level directory of this
+// distribution or <https://octave.org/copyright/>.
+//
+// This file is part of Octave.
+//
+// Octave is free software: you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Octave is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Octave; see the file COPYING.  If not, see
+// <https://www.gnu.org/licenses/>.
+//
+////////////////////////////////////////////////////////////////////////
+
+#include "defun.h"
+#include "error.h"
+#include "ovl.h"
+
+#include <iostream>
+
+OCTAVE_NAMESPACE_BEGIN
+
+// This function implements the Schrage technique for modular multiplication.
+// The returned value is equivalent to "mod (a*b, modulus)"
+// but calculated without overflow.
+uint64_t safemultiply (uint64_t a, uint64_t b, uint64_t modulus)
+{
+  if (! a || ! b)
+    return 0;
+  else if (b == 1)
+    return a;
+  else if (a == 1)
+    return b;
+  else if (a > b)
+    {
+      uint64_t tmp = a;
+      a = b;
+      b = tmp;
+    }
+
+  uint64_t q = modulus / a;
+  uint64_t r = modulus - q * a;
+  uint64_t term1 = a * (b % q);
+  uint64_t term2 = (r < q) ? r * (b / q) : safemultiply (r, b / q, modulus);
+  return (term1 > term2) ? (term1 - term2) : (term1 + modulus - term2);
+}
+
+// This function returns "mod (a^b, modulus)"
+// but calculated without overflow.
+uint64_t safepower (uint64_t a, uint64_t b, uint64_t modulus)
+{
+  uint64_t retval = 1;
+  while (b > 0)
+    {
+      if (b & 1)
+        retval = safemultiply (retval, a, modulus);
+      b >>= 1;
+      a = safemultiply (a, a, modulus);
+    }
+  return retval;
+}
+
+// This function implements a single round of Miller-Rabin primality testing.
+// Returns false if composite, true if pseudoprime for this divisor.
+bool millerrabin (uint64_t div, uint64_t d, uint64_t r, uint64_t n)
+{
+  uint64_t x = safepower (div, d, n);
+  if (x == 1 || x == n-1)
+    return true;
+
+  for (uint64_t j = 1; j < r; j++)
+    {
+      x = safemultiply (x, x, n);
+      if (x == n-1)
+        return true;
+    }
+  return false;
+}
+
+// This function uses the Miller-Rabin test to find out whether the input is
+// prime or composite. The input is required to be a scalar 64-bit integer.
+bool isprimescalar (uint64_t n)
+{
+  // Fast return for even numbers. n==2 is excluded by the time this function is called.
+  if (! (n & 1))
+    return false;
+
+  // n is now odd. Rewrite n as d * 2^r + 1, where d is odd.
+  uint64_t d = n-1;
+  uint64_t r = 0;
+  while (! (d & 1))
+  {
+    d >>= 1;
+    r++;
+  }
+
+  // Miller-Rabin test with the first 12 primes.
+  // If the number passes all 12 tests, then it is prime.
+  // If it fails any, then it is composite.
+  // The first 12 primes suffice to test all 64-bit integers.
+  if (! millerrabin ( 2, d, r, n))  return false;
+  if (! millerrabin ( 3, d, r, n))  return false;
+  if (! millerrabin ( 5, d, r, n))  return false;
+  if (! millerrabin ( 7, d, r, n))  return false;
+  if (! millerrabin (11, d, r, n))  return false;
+  if (! millerrabin (13, d, r, n))  return false;
+  if (! millerrabin (17, d, r, n))  return false;
+  if (! millerrabin (19, d, r, n))  return false;
+  if (! millerrabin (23, d, r, n))  return false;
+  if (! millerrabin (29, d, r, n))  return false;
+  if (! millerrabin (31, d, r, n))  return false;
+  if (! millerrabin (37, d, r, n))  return false;
+  // If we are all the way here, then it is prime.
+  return true;
+
+  /*
+  Mathematical references for the curious as to why we need only
+  the 12 smallest primes for testing all 64-bit numbers:
+  (1) https://oeis.org/A014233
+      Comment: a(12) > 2^64.  Hence the primality of numbers < 2^64 can be
+      determined by asserting strong pseudoprimality to all prime bases <= 37
+      (=prime(12)). Testing to prime bases <=31 does not suffice,
+      as a(11) < 2^64 and a(11) is a strong pseudoprime
+      to all prime bases <= 31 (=prime(11)). - Joerg Arndt, Jul 04 2012
+  (2) https://arxiv.org/abs/1509.00864
+      Strong Pseudoprimes to Twelve Prime Bases
+      Jonathan P. Sorenson, Jonathan Webster
+
+  In addition, a source listed here: https://miller-rabin.appspot.com/
+  reports that all 64-bit numbers can be covered with only 7 divisors,
+  namely 2, 325, 9375, 28178, 450775, 9780504, and 1795265022.
+  There was no peer-reviewed article to back it up though,
+  so this code uses the 12 primes <= 37.
+  */
+
+}
+
+DEFUN (__isprimelarge__, args, ,
+       doc: /* -*- texinfo -*-
+@deftypefn  {} {@var{x} =} __isprimelarge__ (@var{n})
+Use the Miller-Rabin test to find out whether the elements of N are prime or
+composite. The input N is required to be a vector or array of 64-bit integers.
+You should call isprime(N) instead of directly calling this function.
+
+@seealso{isprime, factor}
+@end deftypefn */)
+{
+  int nargin = args.length ();
+  if (nargin != 1)
+    print_usage ();
+
+  // This function is intended for internal use by isprime.m,
+  // so the following error handling should not be necessary. But it is
+  // probably good practice for any curious users calling it directly.
+  uint64NDArray vec = args(0).xuint64_array_value
+    ("__isprimelarge__: unable to convert input. Call isprime() instead.");
+
+  boolNDArray retval (vec.dims(), false);
+
+  for (octave_idx_type i = vec.numel() - 1; i >= 0; i--)
+    retval(i) = isprimescalar (vec(i));
+  // Note: If vec(i) <= 37, this function could go into an infinite loop.
+  // That situation does not arise when calling this from isprime.m
+  // but it could arise if the user calls this function directly with low input
+  // or negative input.
+  // But it turns out that adding this validation:
+  //   "if (vec(i) <= 37) then raise an error else call isprimescalar (vec(i))"
+  // slows this function down by over 20% for some inputs,
+  // so it is better to leave all the input validation in isprime.m
+  // and not add it here. The function DOCSTRING now explicitly says:
+  // "You should call isprime(N) instead of directly calling this function."
+
+  return ovl (retval);
+}
+
+/*
+%!assert (__isprimelarge__ (41:50), logical ([1 0 1 0 0 0 1 0 0 0]))
+%!assert (__isprimelarge__ (uint64 (12345)), false)
+%!assert (__isprimelarge__ (uint64 (2147483647)), true)
+%!assert (__isprimelarge__ (uint64 (2305843009213693951)), true)
+%!assert (__isprimelarge__ (uint64 (18446744073709551557)), true)
+
+%!assert (__isprimelarge__ ([uint64(12345), uint64(2147483647), uint64(2305843009213693951), uint64(18446744073709551557)]), logical ([0 1 1 1]))
+
+%!error <unable to convert input> (__isprimelarge__ ({'foo'; 'bar'}))
+*/
+
+OCTAVE_NAMESPACE_END
--- a/libinterp/corefcn/module.mk	Sat Oct 30 09:47:47 2021 -0400
+++ b/libinterp/corefcn/module.mk	Fri Oct 29 20:48:47 2021 -0400
@@ -119,6 +119,7 @@
   %reldir%/__gammainc__.cc \
   %reldir%/__ichol__.cc \
   %reldir%/__ilu__.cc \
+  %reldir%/__isprimelarge__.cc \
   %reldir%/__lin_interpn__.cc \
   %reldir%/__magick_read__.cc \
   %reldir%/__pchip_deriv__.cc \
--- a/scripts/specfun/factor.m	Sat Oct 30 09:47:47 2021 -0400
+++ b/scripts/specfun/factor.m	Fri Oct 29 20:48:47 2021 -0400
@@ -51,13 +51,16 @@
     error ("factor: Q must be a real non-negative integer");
   endif
 
-  ## Special case of no primes less than sqrt (q).
-  if (q < 4)
+  ## Special case if q is prime, because isprime() is now much faster than factor().
+  ## This also absorbs the case of q < 4, where there are no primes less than sqrt(q).
+  if (q < 4 || isprime (q))
     pf = q;
     n = 1;
     return;
   endif
 
+  ## If we are here, then q is composite.
+
   cls = class (q);  # store class
   if (isfloat (q) && q > flintmax (q))
     error ("factor: Q too large to factor (> flintmax)");
--- a/scripts/specfun/isprime.m	Sat Oct 30 09:47:47 2021 -0400
+++ b/scripts/specfun/isprime.m	Fri Oct 29 20:48:47 2021 -0400
@@ -58,9 +58,14 @@
 ## @end group
 ## @end example
 ##
-## Programming Note: @code{isprime} is appropriate if the maximum value in
-## @var{x} is not too large (< 1e15).  For larger values special purpose
-## factorization code should be used.
+## Programming Note: @code{isprime} is suitable for all @var{x}
+## in the range abs(@var{x})
+## @tex
+## $ < 2^{64}$.
+## @end tex
+## @ifnottex
+##  < 2^64.
+## @end ifnottex
 ##
 ## Compatibility Note: @sc{matlab} does not extend the definition of prime
 ## numbers and will produce an error if given negative or complex inputs.
@@ -85,55 +90,84 @@
     return;
   endif
 
-  ## Code strategy is to build a table with the list of possible primes
-  ## and then quickly compare entries in x with the table of primes using
-  ## lookup().  The table size is limited to save memory and computation
-  ## time during its creation.  All entries larger than the maximum in the
-  ## table are checked by straightforward division.
+  ## Code strategy is to quickly compare entries in x with small primes
+  ## using lookup(), then do direct division on larger numbers up to
+  ## a threshold, then call Miller-Rabin for numbers over the threshold.
+
+  x = abs (x);  # handle negative entries
 
-  x = abs (x);              # handle negative entries
-  maxn = max (x(:));
-  ## generate prime table of suitable length.
-  ## 1e7 threshold requires ~0.15 seconds of computation, 1e8 requires 1.8.
-  maxp = min (maxn, max (sqrt (maxn), 1e7));
-  pr = primes (maxp);
+  ## Generate prime table of suitable length up to maxp.
+  ## The value of maxp needs to be at least 37,
+  ## because of the method used by __isprimelarge__ below.
+  maxp = 37;  
+  pr = [2 3 5 7 11 13 17 19 23 29 31 37];
   t = lookup (pr, x, "b");  # quick search for table matches.
 
-  ## process any remaining large entries
-  m = x(x > maxp);
-  if (! isempty (m))
-    if (maxn <= intmax ("uint32"))
-      m = uint32 (m);
-    elseif (maxn <= intmax ("uint64"))
-      m = uint64 (m);
-    else
-      warning ("isprime: X contains integers too large to be tested");
-    endif
+  THRESHOLD = 29e9;
+  ## FIXME: THRESHOLD is the input value at which Miller-Rabin
+  ## becomes more efficient than direct division. For smaller numbers,
+  ## use direct division. For larger numbers, use Miller-Rabin.
+  ##
+  ## From numerical experiments in Oct 2021, this was observed:
+  ##    THRESHOLD       Division       Miller-Rabin
+  ##        380e9       75.0466s       30.868s
+  ##        100e9       45.0874s       29.1794s
+  ##         10e9       18.5075s       25.4392s
+  ##         50e9       31.6317s       27.7251s
+  ##         25e9       25.1215s       27.004s
+  ##         38e9       31.4674s       28.1336s
+  ##         30e9       28.3848s       27.9982s
+  ## which is close enough to interpolate, so final threshold = 29 billion.
+  ##
+  ## The test code was this: 
+  ##   n = THRESHOLD - (1:1e7); tic; isprime(n); toc 
+  ##   n = THRESHOLD + (1:1e7); tic; isprime(n); toc 
+  ##
+  ## Two notes for future programmers:
+  ##
+  ## 1. Test and tune THRESHOLD periodically. Miller-Rabin is only CPU-limited,
+  ##    while factorization by division is very memory-intensive. This is
+  ##    plainly noticeable from the loudness of the computer fans when running
+  ##    the division technique! CPU speed and RAM speed scale differently over
+  ##    time, so test and tune THRESHOLD periodically.
+  ##
+  ## 2. If you make improvements elsewhere in the code that favor one over
+  ##    the other (not symmetric), you should also retune THRESHOLD afterwards.
+  ##    If the Miller-Rabin part is sped up, the optimum THRESHOLD will
+  ##    decrease, and if factorization is sped up, it will increase.
+  ##
 
-    ## Start by dividing through by the small primes until the remaining
-    ## list of entries is small (and most likely prime themselves).
-    pr = cast (pr(pr <= sqrt (maxn)), class (m));
-    for p = pr
+  ## Process large entries that are still suitable for direct division
+  m = x (x > maxp & x <= THRESHOLD);
+  if ( ! isempty (m))
+    ## Start by dividing through by the small primes until the remaining list
+    ## of entries is small (and most likely prime themselves).
+    pr2 = primes (sqrt (max (m)));
+    t |= lookup (pr2, x, "b");
+    for p = pr2
       m = m(rem (m, p) != 0);
       if (numel (m) < numel (pr) / 10)
         break;
       endif
     endfor
 
-    ## Check the remaining list of possible primes against the
-    ## remaining prime factors which were not tested in the for loop.
-    ## This is just an optimization to use arrayfun over for loo
-    pr = pr(pr > p);
-    mm = arrayfun (@(x) all (rem (x, pr)), m);
+    ## Check the remaining list of possible primes against the remaining
+    ## prime factors which were not tested in the for loop.
+    ## This is just an optimization to use arrayfun over for loop.
+    pr2 = pr2 (pr2 > p);
+    mm = arrayfun (@(x) all (rem (x, pr2)), m);
     m = m(mm);
 
     ## Add any remaining entries, which are truly prime, to the results.
-    if (! isempty (m))
-      m = cast (sort (m), class (x));
-      t |= lookup (m, x, "b");
+    if ( ! isempty (m))
+      t |= lookup (sort (m), x, "b");
     endif
   endif
 
+  ## Process remaining entries (everything above THRESHOLD) with Miller-Rabin
+  ii = (x(:)' > THRESHOLD);
+  t(ii) = __isprimelarge__ (x(ii));
+
 endfunction
 
 function t = isgaussianprime (z)
@@ -162,6 +196,7 @@
 
 %!assert (isprime (3), true)
 %!assert (isprime (4), false)
+%!assert (isprime (uint64 (18446744073709551557)), true)
 %!assert (isprime (5i), false)
 %!assert (isprime (7i), true)
 %!assert (isprime ([1+2i, (2+3i)*(-1+2i)]), [true, false])