comparison scripts/specfun/perms.m @ 31026:f03e1eebf46d

perms.m: Add new input option "unique" to return only unique permutations (bug #60364) perms.m: Add new function to return only unique permutations, expand input validation, expand docstring. NEWS.8.md: Add note about perms taking a new input option.
author Arun Giridhar <arungiridhar@gmail.com>
date Wed, 25 May 2022 17:58:13 -0400
parents 5d3faba0342e
children 8629bb61ef17
comparison
equal deleted inserted replaced
31025:9402f89daaf9 31026:f03e1eebf46d
23 ## 23 ##
24 ######################################################################## 24 ########################################################################
25 25
26 ## -*- texinfo -*- 26 ## -*- texinfo -*-
27 ## @deftypefn {} {@var{P} =} perms (@var{v}) 27 ## @deftypefn {} {@var{P} =} perms (@var{v})
28 ## @deftypefnx {} {@var{P} =} perms (@var{v}, "unique")
28 ## Generate all permutations of vector @var{v} with one row per permutation. 29 ## Generate all permutations of vector @var{v} with one row per permutation.
29 ## 30 ##
30 ## Results are returned in inverse lexicographic order. The result has size 31 ## Results are returned in inverse lexicographic order. The result has size
31 ## @code{factorial (@var{n}) * @var{n}}, where @var{n} is the length of 32 ## @code{factorial (@var{n}) * @var{n}}, where @var{n} is the length of
32 ## @var{v}. Any repetitions are included in the output. To generate just the 33 ## @var{v}. Any repetitions are included in the output.
33 ## unique permutations use @code{unique (perms (@var{v}), "rows")(end:-1:1,:)}. 34 ##
34 ## 35 ## If the optional second argument is the string "unique", then only unique
35 ## Example 36 ## permutations are returned, using a method that uses less memory than
36 ## 37 ## @code{unique (perms (@var{v}), "rows")} and can be faster for
38 ## certain inputs. In this case, the results are not guaranteed to be in any
39 ## particular order.
40 ##
41 ## Example 1
37 ## @example 42 ## @example
38 ## @group 43 ## @group
39 ## perms ([1, 2, 3]) 44 ## perms ([1, 2, 3])
40 ## @result{} 45 ## @result{}
41 ## 3 2 1 46 ## 3 2 1
45 ## 1 3 2 50 ## 1 3 2
46 ## 1 2 3 51 ## 1 2 3
47 ## @end group 52 ## @end group
48 ## @end example 53 ## @end example
49 ## 54 ##
50 ## Programming Note: The maximum length of @var{v} should be less than or 55 ## Example 2
51 ## equal to 10 to limit memory consumption. 56 ## @example
57 ## @group
58 ## perms ([1, 1, 2, 2], "unique")
59 ## @result{}
60 ## 2 2 1 1
61 ## 2 1 2 1
62 ## 2 1 1 2
63 ## 1 2 2 1
64 ## 1 2 1 2
65 ## 1 1 2 2
66 ## @end group
67 ## @end example
68 ##
69 ## Programming Notes: If the "unique" argument is not used, the length of
70 ## @var{v} should be no more than 10-12 to limit memory consumption. To get
71 ## unique results, test both @code{perms (@var{v}, "unique")} and
72 ## @code{unique (perms (@var{v}), "rows")} to compare speed and
73 ## memory usage.
52 ## @seealso{permute, randperm, nchoosek} 74 ## @seealso{permute, randperm, nchoosek}
53 ## @end deftypefn 75 ## @end deftypefn
54 76
55 ## FIXME: In principle it should be more efficient to do indexing using uint8 77 ## FIXME: In principle it should be more efficient to do indexing using uint8
56 ## type. However, benchmarking shows doubles are faster. If this changes in 78 ## type. However, benchmarking shows doubles are faster. If this changes in
57 ## a later version of Octave the index variables here can be made uint8. 79 ## a later version of Octave the index variables here can be made uint8.
58 80
59 function P = perms (v) 81 function P = perms (v, opt)
60 82
61 if (nargin < 1) 83 if (nargin < 1)
62 print_usage (); 84 print_usage ();
85 elseif (nargin == 2)
86 if (! isequal (opt, "unique"))
87 error ("perms: Second option must be the string ""unique"".");
88 else
89 tmp = numel (unique (v));
90 if (tmp == 1)
91 P = reshape (v, 1, []);
92 return
93 elseif (tmp < numel (v))
94 P = __perms_unique__ (v);
95 return
96 endif
97 endif
63 endif 98 endif
64 99
100 ## If we are here, then we want all permutations, not just unique ones.
101 ## Or the user passed the "unique" option but the input had no repetitions.
65 v = v(:).'; 102 v = v(:).';
66 if (isnumeric (v) || ischar (v)) 103 if (isnumeric (v) || ischar (v))
67 ## Order of output is only dependent on the actual values for 104 ## Order of output is only dependent on the actual values for
68 ## character and numeric arrays. 105 ## character and numeric arrays.
69 v = sort (v, "ascend"); 106 v = sort (v, "ascend");
110 endfor 147 endfor
111 endif 148 endif
112 149
113 endfunction 150 endfunction
114 151
152 function P = __perms_unique__ (v)
153 uvec = unique (v);
154 len = numel (v);
155 lenvec = (1:len);
156 ulen = numel (uvec);
157
158 ## Calculate frequencies. Slightly faster to not use hist() or histc().
159 for i = ulen:-1:1
160 freq (i) = nnz (v == uvec(i));
161 endfor
162
163 ## Performance is improved by handling the most frequent elements first.
164 ## This can mess with the output order though. Currently we do not promise
165 ## the results will be in any specific order.
166 [freq, order] = sort (freq, "descend");
167 uvec = uvec (order);
168
169 ## Call nchoosek for each choice and do a Cartesian set product,
170 ## avoiding collisions.
171 indx = nchoosek (lenvec, freq(1));
172 for i = 2:ulen
173 this = nchoosek (lenvec, freq(i));
174
175 new = zeros (rows (indx) + 1e5, columns (indx) + columns (this));
176 ## Preallocate 1e5 extra rows.
177 pos = 0;
178 for j = 1:rows (this)
179 tmp = indx;
180 for k = this(j,:)
181 tmp = tmp (all (tmp ~= k, 2), :);
182 endfor
183
184 r = rows (tmp);
185 c = columns (tmp);
186 if (pos + r > rows(new)) # allocate more memory on the fly
187 new (pos+r+1e5, 1) = 0;
188 endif
189
190 new ((pos+1):(pos+r), 1:c) = tmp;
191 new ((pos+1):(pos+r), (c+1):end) = repmat (this(j,:), r, 1);
192 pos += r;
193
194 # fprintf (1, "%d\t%d\t%d\t%d\t%d\t%f\n", i, j, r, pos, rows(new), toc);
195 endfor
196 indx = new (1:pos, :);
197 endfor
198 clear new tmp this # clear large variables before building P
199
200 indx = (indx-1) * rows (indx) + (1:rows (indx))';
201
202 ## Initialize P to be the same size as indx with the same class as v.
203 P = repmat (v, rows(indx), 1);
204 pos = 0;
205 for i = 1:ulen
206 P (indx (:, (pos + 1):(pos + freq(i)))) = uvec (i);
207 pos += freq(i);
208 endfor
209
210 endfunction
115 211
116 %!assert (rows (perms (1:6)), factorial (6)) 212 %!assert (rows (perms (1:6)), factorial (6))
117 %!assert (perms (pi), pi) 213 %!assert (perms (pi), pi)
118 %!assert (perms ([pi, e]), [pi, e; e, pi]) 214 %!assert (perms ([pi, e]), [pi, e; e, pi])
119 %!assert (perms ([1,2,3]), [3,2,1;3,1,2;2,3,1;2,1,3;1,3,2;1,2,3]) 215 %!assert (perms ([1,2,3]), [3,2,1;3,1,2;2,3,1;2,1,3;1,3,2;1,2,3])
121 %!assert (perms ("abc"), char ("cba", "cab", "bca", "bac", "acb", "abc")) 217 %!assert (perms ("abc"), char ("cba", "cab", "bca", "bac", "acb", "abc"))
122 %!assert (perms ("fobar"), sortrows (unique (perms ("fobar"), "rows"), -(1:5))) 218 %!assert (perms ("fobar"), sortrows (unique (perms ("fobar"), "rows"), -(1:5)))
123 %!assert (unique (perms (1:5)(:))', 1:5) 219 %!assert (unique (perms (1:5)(:))', 1:5)
124 %!assert (perms (int8 (1:4)), int8 (perms (1:4))) 220 %!assert (perms (int8 (1:4)), int8 (perms (1:4)))
125 221
222 %!assert (sortrows (perms ("abb", "unique")), ["abb"; "bab"; "bba"]);
223 %!assert (size (perms ([1 1 1 1 2 2 2 3 3], "unique")), [1260 9])
224 %!assert (size (perms (int8([1 1 1 1 1 2 2 2 2 3 3 3]), "unique")), [27720 12])
225
126 %!error <Invalid call> perms () 226 %!error <Invalid call> perms ()
227 %!error <.*Second option must be the string.*> perms (1:5, "nonexistent")
228 %!error <.*Second option must be the string.*> perms (1:5, {"foo"})
127 229
128 ## Should work for any array type, such as cells and structs, and not 230 ## Should work for any array type, such as cells and structs, and not
129 ## only for numeric data. 231 ## only for numeric data.
130 232
131 %!assert <*52431> (perms ({1}), {1}) 233 %!assert <*52431> (perms ({1}), {1})