Mercurial > octave
comparison scripts/specfun/perms.m @ 31026:f03e1eebf46d
perms.m: Add new input option "unique" to return only unique permutations (bug #60364)
perms.m: Add new function to return only unique permutations, expand input validation,
expand docstring.
NEWS.8.md: Add note about perms taking a new input option.
author | Arun Giridhar <arungiridhar@gmail.com> |
---|---|
date | Wed, 25 May 2022 17:58:13 -0400 |
parents | 5d3faba0342e |
children | 8629bb61ef17 |
comparison
equal
deleted
inserted
replaced
31025:9402f89daaf9 | 31026:f03e1eebf46d |
---|---|
23 ## | 23 ## |
24 ######################################################################## | 24 ######################################################################## |
25 | 25 |
26 ## -*- texinfo -*- | 26 ## -*- texinfo -*- |
27 ## @deftypefn {} {@var{P} =} perms (@var{v}) | 27 ## @deftypefn {} {@var{P} =} perms (@var{v}) |
28 ## @deftypefnx {} {@var{P} =} perms (@var{v}, "unique") | |
28 ## Generate all permutations of vector @var{v} with one row per permutation. | 29 ## Generate all permutations of vector @var{v} with one row per permutation. |
29 ## | 30 ## |
30 ## Results are returned in inverse lexicographic order. The result has size | 31 ## Results are returned in inverse lexicographic order. The result has size |
31 ## @code{factorial (@var{n}) * @var{n}}, where @var{n} is the length of | 32 ## @code{factorial (@var{n}) * @var{n}}, where @var{n} is the length of |
32 ## @var{v}. Any repetitions are included in the output. To generate just the | 33 ## @var{v}. Any repetitions are included in the output. |
33 ## unique permutations use @code{unique (perms (@var{v}), "rows")(end:-1:1,:)}. | 34 ## |
34 ## | 35 ## If the optional second argument is the string "unique", then only unique |
35 ## Example | 36 ## permutations are returned, using a method that uses less memory than |
36 ## | 37 ## @code{unique (perms (@var{v}), "rows")} and can be faster for |
38 ## certain inputs. In this case, the results are not guaranteed to be in any | |
39 ## particular order. | |
40 ## | |
41 ## Example 1 | |
37 ## @example | 42 ## @example |
38 ## @group | 43 ## @group |
39 ## perms ([1, 2, 3]) | 44 ## perms ([1, 2, 3]) |
40 ## @result{} | 45 ## @result{} |
41 ## 3 2 1 | 46 ## 3 2 1 |
45 ## 1 3 2 | 50 ## 1 3 2 |
46 ## 1 2 3 | 51 ## 1 2 3 |
47 ## @end group | 52 ## @end group |
48 ## @end example | 53 ## @end example |
49 ## | 54 ## |
50 ## Programming Note: The maximum length of @var{v} should be less than or | 55 ## Example 2 |
51 ## equal to 10 to limit memory consumption. | 56 ## @example |
57 ## @group | |
58 ## perms ([1, 1, 2, 2], "unique") | |
59 ## @result{} | |
60 ## 2 2 1 1 | |
61 ## 2 1 2 1 | |
62 ## 2 1 1 2 | |
63 ## 1 2 2 1 | |
64 ## 1 2 1 2 | |
65 ## 1 1 2 2 | |
66 ## @end group | |
67 ## @end example | |
68 ## | |
69 ## Programming Notes: If the "unique" argument is not used, the length of | |
70 ## @var{v} should be no more than 10-12 to limit memory consumption. To get | |
71 ## unique results, test both @code{perms (@var{v}, "unique")} and | |
72 ## @code{unique (perms (@var{v}), "rows")} to compare speed and | |
73 ## memory usage. | |
52 ## @seealso{permute, randperm, nchoosek} | 74 ## @seealso{permute, randperm, nchoosek} |
53 ## @end deftypefn | 75 ## @end deftypefn |
54 | 76 |
55 ## FIXME: In principle it should be more efficient to do indexing using uint8 | 77 ## FIXME: In principle it should be more efficient to do indexing using uint8 |
56 ## type. However, benchmarking shows doubles are faster. If this changes in | 78 ## type. However, benchmarking shows doubles are faster. If this changes in |
57 ## a later version of Octave the index variables here can be made uint8. | 79 ## a later version of Octave the index variables here can be made uint8. |
58 | 80 |
59 function P = perms (v) | 81 function P = perms (v, opt) |
60 | 82 |
61 if (nargin < 1) | 83 if (nargin < 1) |
62 print_usage (); | 84 print_usage (); |
85 elseif (nargin == 2) | |
86 if (! isequal (opt, "unique")) | |
87 error ("perms: Second option must be the string ""unique""."); | |
88 else | |
89 tmp = numel (unique (v)); | |
90 if (tmp == 1) | |
91 P = reshape (v, 1, []); | |
92 return | |
93 elseif (tmp < numel (v)) | |
94 P = __perms_unique__ (v); | |
95 return | |
96 endif | |
97 endif | |
63 endif | 98 endif |
64 | 99 |
100 ## If we are here, then we want all permutations, not just unique ones. | |
101 ## Or the user passed the "unique" option but the input had no repetitions. | |
65 v = v(:).'; | 102 v = v(:).'; |
66 if (isnumeric (v) || ischar (v)) | 103 if (isnumeric (v) || ischar (v)) |
67 ## Order of output is only dependent on the actual values for | 104 ## Order of output is only dependent on the actual values for |
68 ## character and numeric arrays. | 105 ## character and numeric arrays. |
69 v = sort (v, "ascend"); | 106 v = sort (v, "ascend"); |
110 endfor | 147 endfor |
111 endif | 148 endif |
112 | 149 |
113 endfunction | 150 endfunction |
114 | 151 |
152 function P = __perms_unique__ (v) | |
153 uvec = unique (v); | |
154 len = numel (v); | |
155 lenvec = (1:len); | |
156 ulen = numel (uvec); | |
157 | |
158 ## Calculate frequencies. Slightly faster to not use hist() or histc(). | |
159 for i = ulen:-1:1 | |
160 freq (i) = nnz (v == uvec(i)); | |
161 endfor | |
162 | |
163 ## Performance is improved by handling the most frequent elements first. | |
164 ## This can mess with the output order though. Currently we do not promise | |
165 ## the results will be in any specific order. | |
166 [freq, order] = sort (freq, "descend"); | |
167 uvec = uvec (order); | |
168 | |
169 ## Call nchoosek for each choice and do a Cartesian set product, | |
170 ## avoiding collisions. | |
171 indx = nchoosek (lenvec, freq(1)); | |
172 for i = 2:ulen | |
173 this = nchoosek (lenvec, freq(i)); | |
174 | |
175 new = zeros (rows (indx) + 1e5, columns (indx) + columns (this)); | |
176 ## Preallocate 1e5 extra rows. | |
177 pos = 0; | |
178 for j = 1:rows (this) | |
179 tmp = indx; | |
180 for k = this(j,:) | |
181 tmp = tmp (all (tmp ~= k, 2), :); | |
182 endfor | |
183 | |
184 r = rows (tmp); | |
185 c = columns (tmp); | |
186 if (pos + r > rows(new)) # allocate more memory on the fly | |
187 new (pos+r+1e5, 1) = 0; | |
188 endif | |
189 | |
190 new ((pos+1):(pos+r), 1:c) = tmp; | |
191 new ((pos+1):(pos+r), (c+1):end) = repmat (this(j,:), r, 1); | |
192 pos += r; | |
193 | |
194 # fprintf (1, "%d\t%d\t%d\t%d\t%d\t%f\n", i, j, r, pos, rows(new), toc); | |
195 endfor | |
196 indx = new (1:pos, :); | |
197 endfor | |
198 clear new tmp this # clear large variables before building P | |
199 | |
200 indx = (indx-1) * rows (indx) + (1:rows (indx))'; | |
201 | |
202 ## Initialize P to be the same size as indx with the same class as v. | |
203 P = repmat (v, rows(indx), 1); | |
204 pos = 0; | |
205 for i = 1:ulen | |
206 P (indx (:, (pos + 1):(pos + freq(i)))) = uvec (i); | |
207 pos += freq(i); | |
208 endfor | |
209 | |
210 endfunction | |
115 | 211 |
116 %!assert (rows (perms (1:6)), factorial (6)) | 212 %!assert (rows (perms (1:6)), factorial (6)) |
117 %!assert (perms (pi), pi) | 213 %!assert (perms (pi), pi) |
118 %!assert (perms ([pi, e]), [pi, e; e, pi]) | 214 %!assert (perms ([pi, e]), [pi, e; e, pi]) |
119 %!assert (perms ([1,2,3]), [3,2,1;3,1,2;2,3,1;2,1,3;1,3,2;1,2,3]) | 215 %!assert (perms ([1,2,3]), [3,2,1;3,1,2;2,3,1;2,1,3;1,3,2;1,2,3]) |
121 %!assert (perms ("abc"), char ("cba", "cab", "bca", "bac", "acb", "abc")) | 217 %!assert (perms ("abc"), char ("cba", "cab", "bca", "bac", "acb", "abc")) |
122 %!assert (perms ("fobar"), sortrows (unique (perms ("fobar"), "rows"), -(1:5))) | 218 %!assert (perms ("fobar"), sortrows (unique (perms ("fobar"), "rows"), -(1:5))) |
123 %!assert (unique (perms (1:5)(:))', 1:5) | 219 %!assert (unique (perms (1:5)(:))', 1:5) |
124 %!assert (perms (int8 (1:4)), int8 (perms (1:4))) | 220 %!assert (perms (int8 (1:4)), int8 (perms (1:4))) |
125 | 221 |
222 %!assert (sortrows (perms ("abb", "unique")), ["abb"; "bab"; "bba"]); | |
223 %!assert (size (perms ([1 1 1 1 2 2 2 3 3], "unique")), [1260 9]) | |
224 %!assert (size (perms (int8([1 1 1 1 1 2 2 2 2 3 3 3]), "unique")), [27720 12]) | |
225 | |
126 %!error <Invalid call> perms () | 226 %!error <Invalid call> perms () |
227 %!error <.*Second option must be the string.*> perms (1:5, "nonexistent") | |
228 %!error <.*Second option must be the string.*> perms (1:5, {"foo"}) | |
127 | 229 |
128 ## Should work for any array type, such as cells and structs, and not | 230 ## Should work for any array type, such as cells and structs, and not |
129 ## only for numeric data. | 231 ## only for numeric data. |
130 | 232 |
131 %!assert <*52431> (perms ({1}), {1}) | 233 %!assert <*52431> (perms ({1}), {1}) |