comparison scripts/set/uniquetol.m @ 31120:4581402b1c5b

uniquetol.m: Simplify code for "ByRows" option (bug #59850). Use standard form for error() messages and update input validation BIST tests to pass. * uniquetol.m: Use "real" instead of "non-complex" in documentation and in error() messages. Update input validation BIST tests to pass with new messages. Update documentation example so that text matches actual output of Octave. Use isreal() rather than iscomplex() in input validation. New variable "calc_indices" which indicates whether outputs ia, ic should be calculated. Use "calc_indices" to reduce running unnecessary code. In ByRows code, eliminate Iall variable and use J for the same purpose. Eliminate linear search ("any (Iall == i)") with direct lookup ("if (J(i))"). Eliminate variables sumeq, ii. Introduce intermediate variable "Arow_i" for clarity. Rename "equ" to "eq_rows" for clarity. Use '!' instead of '~' for logical negation.
author Rik <rik@octave.org>
date Tue, 05 Jul 2022 17:14:44 -0700
parents df030ac26390
children fd29c7a50a78
comparison
equal deleted inserted replaced
31119:df030ac26390 31120:4581402b1c5b
31 ## Return the unique elements of @var{A} within tolerance @var{tol}. 31 ## Return the unique elements of @var{A} within tolerance @var{tol}.
32 ## 32 ##
33 ## Two values, @var{x} and @var{y}, are within relative tolerance if 33 ## Two values, @var{x} and @var{y}, are within relative tolerance if
34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. 34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
35 ## 35 ##
36 ## The input @var{A} must be a non-complex floating point type (double or 36 ## The input @var{A} must be a real (non-complex) floating point type (double
37 ## single). 37 ## or single).
38 ## 38 ##
39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double 39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
40 ## precision input or 1e-6 for single precision input. 40 ## precision input or 1e-6 for single precision input.
41 ## 41 ##
42 ## The function may also be called with the following optional property/value 42 ## The function may also be called with the following optional property/value
88 ## @group 88 ## @group
89 ## x = [1:5]; 89 ## x = [1:5];
90 ## ## Inverse_Function (Function (x)) should return exactly x 90 ## ## Inverse_Function (Function (x)) should return exactly x
91 ## y = exp (log (x)); 91 ## y = exp (log (x));
92 ## D = unique ([x, y]) 92 ## D = unique ([x, y])
93 ## @result{} [1.0000 2.0000 3.0000 3.0000 4.0000 5.0000 5.0000] 93 ## @result{} [1 2 3 3 4 5 5]
94 ## C = uniquetol ([x, y]) 94 ## C = uniquetol ([x, y])
95 ## @result{} [1 2 3 4 5] 95 ## @result{} [1 2 3 4 5]
96 ## @end group 96 ## @end group
97 ## @end example 97 ## @end example
98 ## 98 ##
103 103
104 if (nargin < 1) 104 if (nargin < 1)
105 print_usage (); 105 print_usage ();
106 endif 106 endif
107 107
108 if (! isfloat (A) || iscomplex (A)) 108 if (! (isfloat (A) && isreal (A)))
109 error ("Octave:uniquetol:unsupported-type", 109 error ("Octave:uniquetol:unsupported-type",
110 "uniquetol: A must be a double or single precision non-complex array"); 110 "uniquetol: A must be a real floating point array");
111 endif 111 endif
112 112
113 if (nargin == 1 || ischar (varargin{1})) 113 if (nargin == 1 || ischar (varargin{1}))
114 tol = ifelse (isa (A, "double"), 1e-12, 1e-6); 114 tol = ifelse (isa (A, "double"), 1e-12, 1e-6);
115 elseif (! (isfloat (varargin{1}) && isscalar (varargin{1}))
116 || iscomplex (varargin{1}))
117 error ("Octave:uniquetol:unsupported-type",
118 "uniquetol: TOL must be a double or single precision non-complex scalar");
119 else 115 else
120 tol = varargin{1}; 116 tol = varargin{1};
121 varargin(1) = []; 117 varargin(1) = [];
118 if (! (isfloat (tol) && isreal (tol) && isscalar (tol)))
119 error ("Octave:uniquetol:unsupported-type",
120 "uniquetol: TOL must be a real floating point scalar");
121 endif
122 endif 122 endif
123 123
124 if (mod (numel (varargin), 2)) 124 if (mod (numel (varargin), 2))
125 error ("uniquetol: PROPERTY/VALUE arguments must be passed in pairs"); 125 error ("uniquetol: PROPERTY/VALUE arguments must occur in pairs");
126 endif 126 endif
127 127
128 by_rows = false; 128 by_rows = false;
129 output_all_indices = false; 129 output_all_indices = false;
130 data_scale = []; 130 data_scale = [];
131 calc_indices = nargout > 1;
131 132
132 for k = 1:2:numel (varargin) 133 for k = 1:2:numel (varargin)
133 if (! ischar (varargin{k})) 134 if (! ischar (varargin{k}))
134 error ("uniquetol: PROPERTY must be a string"); 135 error ("uniquetol: PROPERTY must be a string");
135 endif 136 endif
138 by_rows = logical (varargin{k+1}); 139 by_rows = logical (varargin{k+1});
139 if (by_rows && ndims (A) > 2) 140 if (by_rows && ndims (A) > 2)
140 error ('uniquetol: A must be a 2-D array when "ByRows" is true'); 141 error ('uniquetol: A must be a 2-D array when "ByRows" is true');
141 endif 142 endif
142 elseif (strcmpi (varargin{k}, "OutputAllIndices")) 143 elseif (strcmpi (varargin{k}, "OutputAllIndices"))
143 output_all_indices = logical (varargin{k+1}); 144 output_all_indices = logical (varargin{k+1}) & calc_indices;
144 elseif (strcmpi (varargin{k}, "DataScale")) 145 elseif (strcmpi (varargin{k}, "DataScale"))
145 data_scale = varargin{k+1}(:).'; 146 data_scale = varargin{k+1}(:).';
146 if (! isfloat (data_scale) || iscomplex (data_scale) 147 if (! isfloat (data_scale) || iscomplex (data_scale)
147 || any (data_scale(:) < 0) || any (isnan (data_scale(:)))) 148 || any (data_scale(:) < 0) || any (isnan (data_scale(:))))
148 error ("uniquetol: DataScale must be a non-NaN, positive floating point scalar or vector"); 149 error ("uniquetol: DataScale must be a positive floating point scalar or vector, without NaNs");
149 endif 150 endif
150 cols_data_scale = columns (data_scale); 151 cols_data_scale = columns (data_scale);
151 if (cols_data_scale != 1 && cols_data_scale != columns (A)) 152 if (cols_data_scale != 1 && cols_data_scale != columns (A))
152 error ("uniquetol: invalid DataScale size"); 153 error ("uniquetol: invalid DataScale size");
153 endif 154 endif
155 error ("uniquetol: unknown property '%s'", varargin{k}); 156 error ("uniquetol: unknown property '%s'", varargin{k});
156 endif 157 endif
157 endfor 158 endfor
158 159
159 if (isempty (A)) 160 if (isempty (A))
161 ## hack for Matlab empty input compatibility
160 sz_A = size (A); 162 sz_A = size (A);
161 ## hack for Matlab empty input compatibility
162 if (by_rows) 163 if (by_rows)
163 c = A; 164 c = A;
164 sz_A(2) = 1; 165 sz_A(2) = 1;
165 ia = ones (sz_A); 166 ia = ones (sz_A);
166 ic = ones (sz_A); 167 ic = ones (sz_A);
167 else 168 else
168 c = ones (0,1); 169 c = ones (0, 1, class (A));
169 if (sz_A(1) == 1) 170 if (sz_A(1) == 1)
170 c = c.'; 171 c = c.';
171 endif 172 endif
172 ia = ones (0,1); 173 ia = ones (0, 1);
173 ic = ones (0,1); 174 ic = ones (0, 1);
174 endif
175 if (isa (A, "single"))
176 ## c follows class of A, ia and ic are always class "double".
177 c = single (c);
178 endif 175 endif
179 return; 176 return;
180 endif 177 endif
181 178
182 if (isempty (data_scale)) 179 if (isempty (data_scale))
184 endif 181 endif
185 182
186 tol *= data_scale; 183 tol *= data_scale;
187 184
188 if (by_rows) 185 if (by_rows)
189 186 ## Start matrix in sorted order, retain sorting and inverting indices.
190 ##start matrix in sorted order, retain sorting and inverting indices 187 if (calc_indices)
191 [A, srtA] = sortrows (A); 188 [A, srtA] = sortrows (A);
192 [~, inv_srtA] = sort (srtA); 189 [~, inv_srtA] = sort (srtA);
190 else
191 A = sortrows (A);
192 endif
193 193
194 [nr, nc] = size (A); 194 [nr, nc] = size (A);
195 Iall = zeros (nr, 1); 195 I = zeros (nr, 1);
196 I = NaN (nc, 1);
197 ia = {}; 196 ia = {};
198 J = NaN (nc, 1); 197 J = zeros (nr, 1);
199 j = 1; 198 j = 1;
200 ii = 0;
201 199
202 for i = 1:nr 200 for i = 1:nr
203 if (any (Iall == i)) 201 if (J(i))
204 continue; 202 continue; # row previously compared equal
203 endif
204
205 Arow_i = A(i,:);
206 eq_rows = all (abs (A - Arow_i) <= tol, 2);
207 eq_rows(i,1) = eq_rows(i,1) || any (! isfinite (Arow_i), 2);
208 if (output_all_indices)
209 ia_tmp = find (eq_rows);
210 ia{end+1,1} = sort (srtA(ia_tmp));
205 else 211 else
206 equ = all (abs (A - A(i,:)) <= tol, 2); 212 ia_tmp = find (eq_rows, 1);
207 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2); 213 endif
208 sumeq = sum (equ); 214 I(j) = ia_tmp(1);
209 ia_tmp = find (equ); 215 J(eq_rows) = j;
210 if (output_all_indices) 216 j += 1;
211 ia{end+1,1} = sort (srtA(ia_tmp));
212 endif
213 Iall(ii+(1:sumeq)) = ia_tmp;
214 I(j) = ia_tmp(1);
215 J(equ) = j;
216 ii += sumeq;
217 j += 1;
218 endif
219 endfor 217 endfor
220 218
221 I(isnan (I)) = []; 219 I = I(1:j-1);
222 J(isnan (J)) = [];
223 c = A(I,:); 220 c = A(I,:);
224 221
225 if (! output_all_indices) 222 if (calc_indices)
226 ia = srtA(I(1:j-1)); 223 if (! output_all_indices)
227 endif 224 ia = srtA(I(1:j-1));
228 225 endif
229 ic = J(inv_srtA); 226 ic = J(inv_srtA);
227 endif
230 228
231 else 229 else
232 isrowvec = isrow (A); 230 isrowvec = isrow (A);
233 A = A(:); 231 A = A(:);
234 nr = rows (A); 232 nr = rows (A);
275 ia = sAi(ue); 273 ia = sAi(ue);
276 endif 274 endif
277 275
278 if (anyisnanA) 276 if (anyisnanA)
279 rowsc1 = [1:sum(isnanA(:))]'; 277 rowsc1 = [1:sum(isnanA(:))]';
280 if (~all (isnanA)) 278 if (! all (isnanA))
281 rowsc1 += rows (c); 279 rowsc1 += rows (c);
282 endif 280 endif
283 c(rowsc1) = NaN; 281 c(rowsc1) = NaN;
284 ic(isnanA) = rowsc1; 282 ic(isnanA) = rowsc1;
285 if (output_all_indices) 283 if (output_all_indices)
286 ia(rowsc1) = num2cell (findisnanA); 284 ia(rowsc1) = num2cell (findisnanA);
287 else 285 else
288 ia(rowsc1) = findisnanA; 286 ia(rowsc1) = findisnanA;
289 endif 287 endif
290 288
291 ## if numel(c) was 1, appending NaNs creates a row vector instead of 289 ## if numel (c) was 1, appending NaNs creates a row vector instead of
292 ## expected column vector. 290 ## expected column vector.
293 if (isrow (c)) 291 if (isrow (c))
294 c = c.'; 292 c = c.';
295 endif 293 endif
296 endif 294 endif
297 295
298 ## Matlab compatibility - outputs are column vectors unless the input 296 ## Matlab compatibility: Outputs are column vectors unless the input
299 ## is a row vector, in which case the output c is also a row vector. 297 ## is a row vector, in which case the output c is also a row vector.
300 ## ia and ic are always column vectors. (verified Matlab 2022a) 298 ## ia and ic are always column vectors. (verified Matlab 2022a)
301 if (isrowvec) 299 if (isrowvec)
302 c = c.'; 300 c = c.';
303 endif 301 endif
304 endif 302 endif
303
305 endfunction 304 endfunction
306 305
307 306
308 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) 307 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
309 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), 308 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
406 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5}); 405 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
407 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true); 406 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
408 %! assert (class (c), "single"); 407 %! assert (class (c), "single");
409 %! assert (class (ia{1}), "double"); 408 %! assert (class (ia{1}), "double");
410 %! assert (class (ic), "double"); 409 %! assert (class (ic), "double");
411 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true); 410 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "ByRows", true);
412 %! assert (ia, {2;3;1;6;4;5}); 411 %! assert (ia, {2;3;1;6;4;5});
413 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true); 412 %! [c, ia, ic] = uniquetol (single (a),
413 %! "OutputAllIndices", true, "ByRows", true);
414 %! assert (class (c), "single"); 414 %! assert (class (c), "single");
415 %! assert (class (ia{1}), "double"); 415 %! assert (class (ia{1}), "double");
416 %! assert (class (ic), "double"); 416 %! assert (class (ic), "double");
417 417
418 ## Test empty input compatibility 418 ## Test empty input compatibility
478 %! assert (class (ic), "double"); 478 %! assert (class (ic), "double");
479 479
480 480
481 ## Test input validation 481 ## Test input validation
482 %!error <Invalid call> uniquetol () 482 %!error <Invalid call> uniquetol ()
483 %!error <A must be a double or single precision> uniquetol (int8 (1)) 483 %!error <A must be a real floating point array> uniquetol (int8 (1))
484 %!error <A must be .* non-complex> uniquetol (1i) 484 %!error <A must be a real floating point array> uniquetol (1i)
485 %!error <TOL must be a double .* precision> uniquetol (1, int8 (1)) 485 %!error <TOL must be a real floating point scalar> uniquetol (1, int8 (1))
486 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2]) 486 %!error <TOL must be a real floating point scalar> uniquetol (1, [1, 2])
487 %!error <TOL must be .* non-complex> uniquetol (1, 1i) 487 %!error <TOL must be a real floating point scalar> uniquetol (1, 1i)
488 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows") 488 %!error <arguments must occur in pairs> uniquetol (1, 2, "byrows")
489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") 489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) 490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true) 491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true) 492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true) 493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') 494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)
495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) 496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) 497 %!error <DataScale must be .* without NaNs> uniquetol (1, "DataScale", NaN)
497 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN)
498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) 498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2])
499 %!error <unknown property 'foo'> uniquetol (1, "foo", "bar") 499 %!error <unknown property 'foo'> uniquetol (1, "foo", "bar")
500 %!error <unknown property 'foo'> uniquetol (1, 2, "foo", "bar") 500 %!error <unknown property 'foo'> uniquetol (1, 2, "foo", "bar")