Mercurial > octave
comparison scripts/set/uniquetol.m @ 31120:4581402b1c5b
uniquetol.m: Simplify code for "ByRows" option (bug #59850).
Use standard form for error() messages and update input validation BIST tests
to pass.
* uniquetol.m: Use "real" instead of "non-complex" in documentation and in
error() messages. Update input validation BIST tests to pass with new
messages. Update documentation example so that text matches actual output
of Octave. Use isreal() rather than iscomplex() in input validation.
New variable "calc_indices" which indicates whether outputs ia, ic should
be calculated. Use "calc_indices" to reduce running unnecessary code.
In ByRows code, eliminate Iall variable and use J for the same purpose.
Eliminate linear search ("any (Iall == i)") with direct lookup ("if (J(i))").
Eliminate variables sumeq, ii. Introduce intermediate variable "Arow_i" for
clarity. Rename "equ" to "eq_rows" for clarity. Use '!' instead of '~'
for logical negation.
author | Rik <rik@octave.org> |
---|---|
date | Tue, 05 Jul 2022 17:14:44 -0700 |
parents | df030ac26390 |
children | fd29c7a50a78 |
comparison
equal
deleted
inserted
replaced
31119:df030ac26390 | 31120:4581402b1c5b |
---|---|
31 ## Return the unique elements of @var{A} within tolerance @var{tol}. | 31 ## Return the unique elements of @var{A} within tolerance @var{tol}. |
32 ## | 32 ## |
33 ## Two values, @var{x} and @var{y}, are within relative tolerance if | 33 ## Two values, @var{x} and @var{y}, are within relative tolerance if |
34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. | 34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. |
35 ## | 35 ## |
36 ## The input @var{A} must be a non-complex floating point type (double or | 36 ## The input @var{A} must be a real (non-complex) floating point type (double |
37 ## single). | 37 ## or single). |
38 ## | 38 ## |
39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double | 39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double |
40 ## precision input or 1e-6 for single precision input. | 40 ## precision input or 1e-6 for single precision input. |
41 ## | 41 ## |
42 ## The function may also be called with the following optional property/value | 42 ## The function may also be called with the following optional property/value |
88 ## @group | 88 ## @group |
89 ## x = [1:5]; | 89 ## x = [1:5]; |
90 ## ## Inverse_Function (Function (x)) should return exactly x | 90 ## ## Inverse_Function (Function (x)) should return exactly x |
91 ## y = exp (log (x)); | 91 ## y = exp (log (x)); |
92 ## D = unique ([x, y]) | 92 ## D = unique ([x, y]) |
93 ## @result{} [1.0000 2.0000 3.0000 3.0000 4.0000 5.0000 5.0000] | 93 ## @result{} [1 2 3 3 4 5 5] |
94 ## C = uniquetol ([x, y]) | 94 ## C = uniquetol ([x, y]) |
95 ## @result{} [1 2 3 4 5] | 95 ## @result{} [1 2 3 4 5] |
96 ## @end group | 96 ## @end group |
97 ## @end example | 97 ## @end example |
98 ## | 98 ## |
103 | 103 |
104 if (nargin < 1) | 104 if (nargin < 1) |
105 print_usage (); | 105 print_usage (); |
106 endif | 106 endif |
107 | 107 |
108 if (! isfloat (A) || iscomplex (A)) | 108 if (! (isfloat (A) && isreal (A))) |
109 error ("Octave:uniquetol:unsupported-type", | 109 error ("Octave:uniquetol:unsupported-type", |
110 "uniquetol: A must be a double or single precision non-complex array"); | 110 "uniquetol: A must be a real floating point array"); |
111 endif | 111 endif |
112 | 112 |
113 if (nargin == 1 || ischar (varargin{1})) | 113 if (nargin == 1 || ischar (varargin{1})) |
114 tol = ifelse (isa (A, "double"), 1e-12, 1e-6); | 114 tol = ifelse (isa (A, "double"), 1e-12, 1e-6); |
115 elseif (! (isfloat (varargin{1}) && isscalar (varargin{1})) | |
116 || iscomplex (varargin{1})) | |
117 error ("Octave:uniquetol:unsupported-type", | |
118 "uniquetol: TOL must be a double or single precision non-complex scalar"); | |
119 else | 115 else |
120 tol = varargin{1}; | 116 tol = varargin{1}; |
121 varargin(1) = []; | 117 varargin(1) = []; |
118 if (! (isfloat (tol) && isreal (tol) && isscalar (tol))) | |
119 error ("Octave:uniquetol:unsupported-type", | |
120 "uniquetol: TOL must be a real floating point scalar"); | |
121 endif | |
122 endif | 122 endif |
123 | 123 |
124 if (mod (numel (varargin), 2)) | 124 if (mod (numel (varargin), 2)) |
125 error ("uniquetol: PROPERTY/VALUE arguments must be passed in pairs"); | 125 error ("uniquetol: PROPERTY/VALUE arguments must occur in pairs"); |
126 endif | 126 endif |
127 | 127 |
128 by_rows = false; | 128 by_rows = false; |
129 output_all_indices = false; | 129 output_all_indices = false; |
130 data_scale = []; | 130 data_scale = []; |
131 calc_indices = nargout > 1; | |
131 | 132 |
132 for k = 1:2:numel (varargin) | 133 for k = 1:2:numel (varargin) |
133 if (! ischar (varargin{k})) | 134 if (! ischar (varargin{k})) |
134 error ("uniquetol: PROPERTY must be a string"); | 135 error ("uniquetol: PROPERTY must be a string"); |
135 endif | 136 endif |
138 by_rows = logical (varargin{k+1}); | 139 by_rows = logical (varargin{k+1}); |
139 if (by_rows && ndims (A) > 2) | 140 if (by_rows && ndims (A) > 2) |
140 error ('uniquetol: A must be a 2-D array when "ByRows" is true'); | 141 error ('uniquetol: A must be a 2-D array when "ByRows" is true'); |
141 endif | 142 endif |
142 elseif (strcmpi (varargin{k}, "OutputAllIndices")) | 143 elseif (strcmpi (varargin{k}, "OutputAllIndices")) |
143 output_all_indices = logical (varargin{k+1}); | 144 output_all_indices = logical (varargin{k+1}) & calc_indices; |
144 elseif (strcmpi (varargin{k}, "DataScale")) | 145 elseif (strcmpi (varargin{k}, "DataScale")) |
145 data_scale = varargin{k+1}(:).'; | 146 data_scale = varargin{k+1}(:).'; |
146 if (! isfloat (data_scale) || iscomplex (data_scale) | 147 if (! isfloat (data_scale) || iscomplex (data_scale) |
147 || any (data_scale(:) < 0) || any (isnan (data_scale(:)))) | 148 || any (data_scale(:) < 0) || any (isnan (data_scale(:)))) |
148 error ("uniquetol: DataScale must be a non-NaN, positive floating point scalar or vector"); | 149 error ("uniquetol: DataScale must be a positive floating point scalar or vector, without NaNs"); |
149 endif | 150 endif |
150 cols_data_scale = columns (data_scale); | 151 cols_data_scale = columns (data_scale); |
151 if (cols_data_scale != 1 && cols_data_scale != columns (A)) | 152 if (cols_data_scale != 1 && cols_data_scale != columns (A)) |
152 error ("uniquetol: invalid DataScale size"); | 153 error ("uniquetol: invalid DataScale size"); |
153 endif | 154 endif |
155 error ("uniquetol: unknown property '%s'", varargin{k}); | 156 error ("uniquetol: unknown property '%s'", varargin{k}); |
156 endif | 157 endif |
157 endfor | 158 endfor |
158 | 159 |
159 if (isempty (A)) | 160 if (isempty (A)) |
161 ## hack for Matlab empty input compatibility | |
160 sz_A = size (A); | 162 sz_A = size (A); |
161 ## hack for Matlab empty input compatibility | |
162 if (by_rows) | 163 if (by_rows) |
163 c = A; | 164 c = A; |
164 sz_A(2) = 1; | 165 sz_A(2) = 1; |
165 ia = ones (sz_A); | 166 ia = ones (sz_A); |
166 ic = ones (sz_A); | 167 ic = ones (sz_A); |
167 else | 168 else |
168 c = ones (0,1); | 169 c = ones (0, 1, class (A)); |
169 if (sz_A(1) == 1) | 170 if (sz_A(1) == 1) |
170 c = c.'; | 171 c = c.'; |
171 endif | 172 endif |
172 ia = ones (0,1); | 173 ia = ones (0, 1); |
173 ic = ones (0,1); | 174 ic = ones (0, 1); |
174 endif | |
175 if (isa (A, "single")) | |
176 ## c follows class of A, ia and ic are always class "double". | |
177 c = single (c); | |
178 endif | 175 endif |
179 return; | 176 return; |
180 endif | 177 endif |
181 | 178 |
182 if (isempty (data_scale)) | 179 if (isempty (data_scale)) |
184 endif | 181 endif |
185 | 182 |
186 tol *= data_scale; | 183 tol *= data_scale; |
187 | 184 |
188 if (by_rows) | 185 if (by_rows) |
189 | 186 ## Start matrix in sorted order, retain sorting and inverting indices. |
190 ##start matrix in sorted order, retain sorting and inverting indices | 187 if (calc_indices) |
191 [A, srtA] = sortrows (A); | 188 [A, srtA] = sortrows (A); |
192 [~, inv_srtA] = sort (srtA); | 189 [~, inv_srtA] = sort (srtA); |
190 else | |
191 A = sortrows (A); | |
192 endif | |
193 | 193 |
194 [nr, nc] = size (A); | 194 [nr, nc] = size (A); |
195 Iall = zeros (nr, 1); | 195 I = zeros (nr, 1); |
196 I = NaN (nc, 1); | |
197 ia = {}; | 196 ia = {}; |
198 J = NaN (nc, 1); | 197 J = zeros (nr, 1); |
199 j = 1; | 198 j = 1; |
200 ii = 0; | |
201 | 199 |
202 for i = 1:nr | 200 for i = 1:nr |
203 if (any (Iall == i)) | 201 if (J(i)) |
204 continue; | 202 continue; # row previously compared equal |
203 endif | |
204 | |
205 Arow_i = A(i,:); | |
206 eq_rows = all (abs (A - Arow_i) <= tol, 2); | |
207 eq_rows(i,1) = eq_rows(i,1) || any (! isfinite (Arow_i), 2); | |
208 if (output_all_indices) | |
209 ia_tmp = find (eq_rows); | |
210 ia{end+1,1} = sort (srtA(ia_tmp)); | |
205 else | 211 else |
206 equ = all (abs (A - A(i,:)) <= tol, 2); | 212 ia_tmp = find (eq_rows, 1); |
207 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2); | 213 endif |
208 sumeq = sum (equ); | 214 I(j) = ia_tmp(1); |
209 ia_tmp = find (equ); | 215 J(eq_rows) = j; |
210 if (output_all_indices) | 216 j += 1; |
211 ia{end+1,1} = sort (srtA(ia_tmp)); | |
212 endif | |
213 Iall(ii+(1:sumeq)) = ia_tmp; | |
214 I(j) = ia_tmp(1); | |
215 J(equ) = j; | |
216 ii += sumeq; | |
217 j += 1; | |
218 endif | |
219 endfor | 217 endfor |
220 | 218 |
221 I(isnan (I)) = []; | 219 I = I(1:j-1); |
222 J(isnan (J)) = []; | |
223 c = A(I,:); | 220 c = A(I,:); |
224 | 221 |
225 if (! output_all_indices) | 222 if (calc_indices) |
226 ia = srtA(I(1:j-1)); | 223 if (! output_all_indices) |
227 endif | 224 ia = srtA(I(1:j-1)); |
228 | 225 endif |
229 ic = J(inv_srtA); | 226 ic = J(inv_srtA); |
227 endif | |
230 | 228 |
231 else | 229 else |
232 isrowvec = isrow (A); | 230 isrowvec = isrow (A); |
233 A = A(:); | 231 A = A(:); |
234 nr = rows (A); | 232 nr = rows (A); |
275 ia = sAi(ue); | 273 ia = sAi(ue); |
276 endif | 274 endif |
277 | 275 |
278 if (anyisnanA) | 276 if (anyisnanA) |
279 rowsc1 = [1:sum(isnanA(:))]'; | 277 rowsc1 = [1:sum(isnanA(:))]'; |
280 if (~all (isnanA)) | 278 if (! all (isnanA)) |
281 rowsc1 += rows (c); | 279 rowsc1 += rows (c); |
282 endif | 280 endif |
283 c(rowsc1) = NaN; | 281 c(rowsc1) = NaN; |
284 ic(isnanA) = rowsc1; | 282 ic(isnanA) = rowsc1; |
285 if (output_all_indices) | 283 if (output_all_indices) |
286 ia(rowsc1) = num2cell (findisnanA); | 284 ia(rowsc1) = num2cell (findisnanA); |
287 else | 285 else |
288 ia(rowsc1) = findisnanA; | 286 ia(rowsc1) = findisnanA; |
289 endif | 287 endif |
290 | 288 |
291 ## if numel(c) was 1, appending NaNs creates a row vector instead of | 289 ## if numel (c) was 1, appending NaNs creates a row vector instead of |
292 ## expected column vector. | 290 ## expected column vector. |
293 if (isrow (c)) | 291 if (isrow (c)) |
294 c = c.'; | 292 c = c.'; |
295 endif | 293 endif |
296 endif | 294 endif |
297 | 295 |
298 ## Matlab compatibility - outputs are column vectors unless the input | 296 ## Matlab compatibility: Outputs are column vectors unless the input |
299 ## is a row vector, in which case the output c is also a row vector. | 297 ## is a row vector, in which case the output c is also a row vector. |
300 ## ia and ic are always column vectors. (verified Matlab 2022a) | 298 ## ia and ic are always column vectors. (verified Matlab 2022a) |
301 if (isrowvec) | 299 if (isrowvec) |
302 c = c.'; | 300 c = c.'; |
303 endif | 301 endif |
304 endif | 302 endif |
303 | |
305 endfunction | 304 endfunction |
306 | 305 |
307 | 306 |
308 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) | 307 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) |
309 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), | 308 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), |
406 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5}); | 405 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5}); |
407 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true); | 406 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true); |
408 %! assert (class (c), "single"); | 407 %! assert (class (c), "single"); |
409 %! assert (class (ia{1}), "double"); | 408 %! assert (class (ia{1}), "double"); |
410 %! assert (class (ic), "double"); | 409 %! assert (class (ic), "double"); |
411 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true); | 410 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "ByRows", true); |
412 %! assert (ia, {2;3;1;6;4;5}); | 411 %! assert (ia, {2;3;1;6;4;5}); |
413 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true); | 412 %! [c, ia, ic] = uniquetol (single (a), |
413 %! "OutputAllIndices", true, "ByRows", true); | |
414 %! assert (class (c), "single"); | 414 %! assert (class (c), "single"); |
415 %! assert (class (ia{1}), "double"); | 415 %! assert (class (ia{1}), "double"); |
416 %! assert (class (ic), "double"); | 416 %! assert (class (ic), "double"); |
417 | 417 |
418 ## Test empty input compatibility | 418 ## Test empty input compatibility |
478 %! assert (class (ic), "double"); | 478 %! assert (class (ic), "double"); |
479 | 479 |
480 | 480 |
481 ## Test input validation | 481 ## Test input validation |
482 %!error <Invalid call> uniquetol () | 482 %!error <Invalid call> uniquetol () |
483 %!error <A must be a double or single precision> uniquetol (int8 (1)) | 483 %!error <A must be a real floating point array> uniquetol (int8 (1)) |
484 %!error <A must be .* non-complex> uniquetol (1i) | 484 %!error <A must be a real floating point array> uniquetol (1i) |
485 %!error <TOL must be a double .* precision> uniquetol (1, int8 (1)) | 485 %!error <TOL must be a real floating point scalar> uniquetol (1, int8 (1)) |
486 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2]) | 486 %!error <TOL must be a real floating point scalar> uniquetol (1, [1, 2]) |
487 %!error <TOL must be .* non-complex> uniquetol (1, 1i) | 487 %!error <TOL must be a real floating point scalar> uniquetol (1, 1i) |
488 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows") | 488 %!error <arguments must occur in pairs> uniquetol (1, 2, "byrows") |
489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") | 489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") |
490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) | 490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) |
491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true) | 491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true) |
492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true) | 492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true) |
493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true) | 493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true) |
494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') | 494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') |
495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) | |
495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) | 496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) |
496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) | 497 %!error <DataScale must be .* without NaNs> uniquetol (1, "DataScale", NaN) |
497 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN) | |
498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) | 498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) |
499 %!error <unknown property 'foo'> uniquetol (1, "foo", "bar") | 499 %!error <unknown property 'foo'> uniquetol (1, "foo", "bar") |
500 %!error <unknown property 'foo'> uniquetol (1, 2, "foo", "bar") | 500 %!error <unknown property 'foo'> uniquetol (1, 2, "foo", "bar") |