comparison scripts/set/uniquetol.m @ 31124:df030ac26390

uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850) * /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to 'byrows' output, ensure ia and ic outputs have column orientation for arrays and cells, verify consistent single class handling, add BISTs for aforementioned cases, and update docstring to note non-complex input requirement.
author Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
date Tue, 05 Jul 2022 15:22:46 -0400
parents 796f54d4ddbf
children 4581402b1c5b
comparison
equal deleted inserted replaced
31123:18b8f73595e0 31124:df030ac26390
31 ## Return the unique elements of @var{A} within tolerance @var{tol}. 31 ## Return the unique elements of @var{A} within tolerance @var{tol}.
32 ## 32 ##
33 ## Two values, @var{x} and @var{y}, are within relative tolerance if 33 ## Two values, @var{x} and @var{y}, are within relative tolerance if
34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. 34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
35 ## 35 ##
36 ## The input @var{A} must be a floating point type (double or single). 36 ## The input @var{A} must be a non-complex floating point type (double or
37 ## single).
37 ## 38 ##
38 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double 39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
39 ## precision input or 1e-6 for single precision input. 40 ## precision input or 1e-6 for single precision input.
40 ## 41 ##
41 ## The function may also be called with the following optional property/value 42 ## The function may also be called with the following optional property/value
96 ## @end example 97 ## @end example
97 ## 98 ##
98 ## @seealso{unique, union, intersect, setdiff, setxor, ismember} 99 ## @seealso{unique, union, intersect, setdiff, setxor, ismember}
99 ## @end deftypefn 100 ## @end deftypefn
100 101
101
102 function [c, ia, ic] = uniquetol (A, varargin) 102 function [c, ia, ic] = uniquetol (A, varargin)
103 103
104 if (nargin < 1) 104 if (nargin < 1)
105 print_usage (); 105 print_usage ();
106 endif
107
108 if (isempty (A))
109 c = A;
110 ia = [];
111 ic = [];
112 return;
113 endif 106 endif
114 107
115 if (! isfloat (A) || iscomplex (A)) 108 if (! isfloat (A) || iscomplex (A))
116 error ("Octave:uniquetol:unsupported-type", 109 error ("Octave:uniquetol:unsupported-type",
117 "uniquetol: A must be a double or single precision non-complex array"); 110 "uniquetol: A must be a double or single precision non-complex array");
161 else 154 else
162 error ("uniquetol: unknown property '%s'", varargin{k}); 155 error ("uniquetol: unknown property '%s'", varargin{k});
163 endif 156 endif
164 endfor 157 endfor
165 158
159 if (isempty (A))
160 sz_A = size (A);
161 ## hack for Matlab empty input compatibility
162 if (by_rows)
163 c = A;
164 sz_A(2) = 1;
165 ia = ones (sz_A);
166 ic = ones (sz_A);
167 else
168 c = ones (0,1);
169 if (sz_A(1) == 1)
170 c = c.';
171 endif
172 ia = ones (0,1);
173 ic = ones (0,1);
174 endif
175 if (isa (A, "single"))
176 ## c follows class of A, ia and ic are always class "double".
177 c = single (c);
178 endif
179 return;
180 endif
181
166 if (isempty (data_scale)) 182 if (isempty (data_scale))
167 data_scale = max (abs (A(! isinf (A))(:))); 183 data_scale = max (abs (A(! isinf (A))(:)));
168 endif 184 endif
169 185
170 tol *= data_scale; 186 tol *= data_scale;
171 187
172 if (by_rows) 188 if (by_rows)
173 189
174 nr = rows (A); 190 ##start matrix in sorted order, retain sorting and inverting indices
175 nc = columns (A); 191 [A, srtA] = sortrows (A);
192 [~, inv_srtA] = sort (srtA);
193
194 [nr, nc] = size (A);
176 Iall = zeros (nr, 1); 195 Iall = zeros (nr, 1);
177 I = NaN (nc, 1); 196 I = NaN (nc, 1);
178 ia = {}; 197 ia = {};
179 J = NaN (nc, 1); 198 J = NaN (nc, 1);
180 j = 1; 199 j = 1;
187 equ = all (abs (A - A(i,:)) <= tol, 2); 206 equ = all (abs (A - A(i,:)) <= tol, 2);
188 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2); 207 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2);
189 sumeq = sum (equ); 208 sumeq = sum (equ);
190 ia_tmp = find (equ); 209 ia_tmp = find (equ);
191 if (output_all_indices) 210 if (output_all_indices)
192 ia{end+1} = ia_tmp; 211 ia{end+1,1} = sort (srtA(ia_tmp));
193 endif 212 endif
194 Iall(ii+(1:sumeq)) = ia_tmp; 213 Iall(ii+(1:sumeq)) = ia_tmp;
195 I(j) = ia_tmp(1); 214 I(j) = ia_tmp(1);
196 J(equ) = j; 215 J(equ) = j;
197 ii += sumeq; 216 ii += sumeq;
202 I(isnan (I)) = []; 221 I(isnan (I)) = [];
203 J(isnan (J)) = []; 222 J(isnan (J)) = [];
204 c = A(I,:); 223 c = A(I,:);
205 224
206 if (! output_all_indices) 225 if (! output_all_indices)
207 ia = I(1:j-1); 226 ia = srtA(I(1:j-1));
208 endif 227 endif
209 ic = J; 228
229 ic = J(inv_srtA);
210 230
211 else 231 else
212 isrowvec = isrow (A); 232 isrowvec = isrow (A);
213 A = A(:); 233 A = A(:);
214 nr = rows (A); 234 nr = rows (A);
245 else 265 else
246 findisnanA = []; 266 findisnanA = [];
247 endif 267 endif
248 if (output_all_indices) 268 if (output_all_indices)
249 nu = cumsumue(end); 269 nu = cumsumue(end);
250 ia = cell (1, nu); 270 ia = cell (nu, 1);
251 for k = 1:nu 271 for k = 1:nu
252 ia{k} = setdiff (sAi(cumsumue==k), findisnanA); 272 ia{k} = setdiff (sAi(cumsumue==k), findisnanA);
253 endfor 273 endfor
254 else 274 else
255 ia = sAi(ue); 275 ia = sAi(ue);
256 endif 276 endif
257 277
258 if (anyisnanA) 278 if (anyisnanA)
259 rowsc1 = rows (c) + (1:sum (isnanA)); 279 rowsc1 = [1:sum(isnanA(:))]';
280 if (~all (isnanA))
281 rowsc1 += rows (c);
282 endif
260 c(rowsc1) = NaN; 283 c(rowsc1) = NaN;
261 ia(rowsc1) = findisnanA;
262 ic(isnanA) = rowsc1; 284 ic(isnanA) = rowsc1;
263 endif 285 if (output_all_indices)
264 286 ia(rowsc1) = num2cell (findisnanA);
265 ## FIXME: Matlab-compatible orientation of output 287 else
266 ## Actually, Matlab prefers row vectors (2021/03/24), but this is different 288 ia(rowsc1) = findisnanA;
267 ## from all the other set functions which prefer column vectors. Assume 289 endif
268 ## that this is a bug in Matlab's implementation and prefer column vectors. 290
291 ## if numel(c) was 1, appending NaNs creates a row vector instead of
292 ## expected column vector.
293 if (isrow (c))
294 c = c.';
295 endif
296 endif
297
298 ## Matlab compatibility - outputs are column vectors unless the input
299 ## is a row vector, in which case the output c is also a row vector.
300 ## ia and ic are always column vectors. (verified Matlab 2022a)
269 if (isrowvec) 301 if (isrowvec)
270 c = c.'; 302 c = c.';
271 endif 303 endif
272 304 endif
273 endif
274
275 endfunction 305 endfunction
276 306
277 307
278 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) 308 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
279 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), 309 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
280 %! [1 1 2; 1 0 1]) 310 %! [1 0 1; 1 1 2])
281 %!assert (uniquetol ([]), [])
282 %!assert (uniquetol ([1]), [1]) 311 %!assert (uniquetol ([1]), [1])
283 %!assert (uniquetol ([2, 1]), [1, 2]); 312 %!assert (uniquetol ([2, 1]), [1, 2]);
284 %!assert (uniquetol ([1; 2]), [1; 2]) 313 %!assert (uniquetol ([1; 2]), [1; 2])
285 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]); 314 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]);
286 %!assert (uniquetol (zeros (1, 0)), zeros (1, 0));
287 %!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0))
288 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4]) 315 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4])
289 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4]) 316 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4])
290 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4]) 317 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4])
291 %!assert (uniquetol (sparse ([2,0;2,0])), sparse ([0;2])) 318 %!assert (uniquetol (sparse ([2,0;2,0])), sparse ([0;2]))
292 %!assert (uniquetol (sparse ([1,2;2,3])), sparse ([1;2;3])) 319 %!assert (uniquetol (sparse ([1,2;2,3])), sparse ([1;2;3]))
294 %! single ([1,2,2,3,2,4])) 321 %! single ([1,2,2,3,2,4]))
295 %!assert (uniquetol (single ([1,2,2,3,2,4])), single ([1,2,3,4])) 322 %!assert (uniquetol (single ([1,2,2,3,2,4])), single ([1,2,3,4]))
296 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true), 323 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true),
297 %! single ([1;2;3;4])) 324 %! single ([1;2;3;4]))
298 325
326 ## Test 2D array sorting
327 %!test
328 %! a = [magic(3); 2 * magic(3)];
329 %! assert (uniquetol (a), [1:10,12,14,16,18]')
330 %! assert (uniquetol (a, "byrows", true), sortrows (a))
331
299 ## Matlab compatibility of output 332 ## Matlab compatibility of output
300 %!test 333 %!test
301 %! x = 1:0.045:3; 334 %! x = 1:0.045:3;
302 %! y = uniquetol (x, 0.1, "datascale", 1); 335 %! y = uniquetol (x, 0.1, "datascale", 1);
303 %! assert (y(1:4), [1, 1.135, 1.27, 1.405]); 336 %! assert (y(1:4), [1, 1.135, 1.27, 1.405]);
312 ## Test index vector return arguments with "ByRows" 345 ## Test index vector return arguments with "ByRows"
313 %!test 346 %!test
314 %! A = [2, 3, 4; 2, 3, 4]; 347 %! A = [2, 3, 4; 2, 3, 4];
315 %! [c, ia, ic] = uniquetol (A, "byrows", true); 348 %! [c, ia, ic] = uniquetol (A, "byrows", true);
316 %! assert (c, [2, 3, 4]); 349 %! assert (c, [2, 3, 4]);
317 %! assert (A(ia,:), c); 350 %! assert (ia, 1);
318 %! assert (c(ic,:), A); 351 %! assert (ic, [1;1]);
319 352
320 %!test 353 %!test
321 %! x = (2:7)'*pi; 354 %! x = (2:7)'*pi;
322 %! y = exp (log (x)); 355 %! y = exp (log (x));
323 %! C = uniquetol ([x; y]); 356 %! C = uniquetol ([x; y]);
326 ## Test "ByRows" Property 359 ## Test "ByRows" Property
327 %!test 360 %!test
328 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95]; 361 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95];
329 %! B = log (exp (A)); 362 %! B = log (exp (A));
330 %! C = uniquetol ([A; B], "ByRows", true); 363 %! C = uniquetol ([A; B], "ByRows", true);
331 %! assert (C, A); 364 %! assert (C, sortrows(A), 10*eps);
332 365
333 ## Test "DataScale" Property 366 ## Test "DataScale" Property
334 %!test 367 %!test
335 %! x = 10^11; 368 %! x = 10^11;
336 %! C = uniquetol ([x, exp(log(x))], 1e-6, "DataScale", 1); 369 %! C = uniquetol ([x, exp(log(x))], 1e-6, "DataScale", 1);
339 ## Test "OutputAllIndices" Property 372 ## Test "OutputAllIndices" Property
340 %!test 373 %!test
341 %! A = [.1 .2 .3 10]; 374 %! A = [.1 .2 .3 10];
342 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true); 375 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true);
343 %! assert (C, [.1, 10]); 376 %! assert (C, [.1, 10]);
344 %! assert (ia, {(1:3)', 4}); 377 %! assert (ia, {(1:3)'; 4});
345 %! assert (ic, [1; 1; 1; 2]); 378 %! assert (ic, [1; 1; 1; 2]);
379
380 ## Test NaN inputs
381 %!assert (uniquetol (NaN), NaN)
382 %!assert (uniquetol ([NaN NaN]), [NaN NaN])
383 %!assert (uniquetol ([NaN NaN]'), [NaN NaN]')
384 %!assert (uniquetol (NaN(2,2)), NaN(4,1))
385
386 %!test
387 %! a = [magic(3); 2 * magic(3)];
388 %! a(4:5) = NaN;
389 %! [c, ia, ic] = uniquetol (a);
390 %! assert (c, [1:10,12,14,18, NaN, NaN]');
391 %! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]');
392 %! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]');
393 %! [c, ia, ic] = uniquetol (single (a));
394 %! assert (class (c), "single");
395 %! assert (class (ia), "double");
396 %! assert (class (ic), "double");
397 %! [c, ia, ic] = uniquetol (a, "ByRows", true);
398 %! assert (c, sortrows (a));
399 %! assert (ia, [2,3,1,6,4,5]');
400 %! assert (ic, [3,1,2,5,6,4]');
401 %! [c, ia, ic] = uniquetol (single (a), "ByRows", true);
402 %! assert (class (c), "single");
403 %! assert (class (ia), "double");
404 %! assert (class (ic), "double");
405 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true);
406 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
407 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
408 %! assert (class (c), "single");
409 %! assert (class (ia{1}), "double");
410 %! assert (class (ic), "double");
411 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
412 %! assert (ia, {2;3;1;6;4;5});
413 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
414 %! assert (class (c), "single");
415 %! assert (class (ia{1}), "double");
416 %! assert (class (ic), "double");
417
418 ## Test empty input compatibility
419 %!test
420 %! [c, ia, ic] = uniquetol ([]);
421 %! assert (c, ones (0,1));
422 %! assert (ia, ones (0,1));
423 %! assert (ic, ones (0,1));
424 %!test
425 %! [c, ia, ic] = uniquetol ([], "byrows", true);
426 %! assert (c, []);
427 %! assert (ia, ones (0,1));
428 %! assert (ic, ones (0,1));
429 %!test
430 %! [c, ia, ic] = uniquetol (ones (0,1));
431 %! assert (c, ones (0,1));
432 %! assert (ia, ones (0,1));
433 %! assert (ic, ones (0,1));
434 %!test
435 %! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true);
436 %! assert (c, ones (0,1));
437 %! assert (ia, ones (0,1));
438 %! assert (ic, ones (0,1));
439 %!test
440 %! [c, ia, ic] = uniquetol (ones (1,0));
441 %! assert (c, ones (1,0));
442 %! assert (ia, ones (0,1));
443 %! assert (ic, ones (0,1));
444 %!test
445 %! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true);
446 %! assert (c, ones (1,0));
447 %! assert (ia, 1);
448 %! assert (ic, 1);
449 %!test
450 %! [c, ia, ic] = uniquetol (ones (1,0,2));
451 %! assert (c, ones (1,0));
452 %! assert (ia, ones (0,1));
453 %! assert (ic, ones (0,1));
454 %!test
455 %! [c, ia, ic] = uniquetol (ones (0,1,2));
456 %! assert (c, ones (0,1));
457 %! assert (ia, ones (0,1));
458 %! assert (ic, ones (0,1));
459 %!test
460 %! [c, ia, ic] = uniquetol (ones (1,2,0));
461 %! assert (c, ones (1,0));
462 %! assert (ia, ones (0,1));
463 %! assert (ic, ones (0,1));
464 %!test
465 %! [c, ia, ic] = uniquetol (single ([]));
466 %! assert (class (c), "single");
467 %! assert (class (ia), "double");
468 %! assert (class (ic), "double");
469 %!test
470 %! [c, ia, ic] = uniquetol (single ([]), "byrows", true);
471 %! assert (class (c), "single");
472 %! assert (class (ia), "double");
473 %! assert (class (ic), "double");
474 %!test
475 %! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true);
476 %! assert (class (c), "single");
477 %! assert (class (ia), "double");
478 %! assert (class (ic), "double");
479
346 480
347 ## Test input validation 481 ## Test input validation
348 %!error <Invalid call> uniquetol () 482 %!error <Invalid call> uniquetol ()
349 %!error <A must be a double or single precision> uniquetol (int8 (1)) 483 %!error <A must be a double or single precision> uniquetol (int8 (1))
350 %!error <A must be .* non-complex> uniquetol (1i) 484 %!error <A must be .* non-complex> uniquetol (1i)
352 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2]) 486 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2])
353 %!error <TOL must be .* non-complex> uniquetol (1, 1i) 487 %!error <TOL must be .* non-complex> uniquetol (1, 1i)
354 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows") 488 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
355 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") 489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
356 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) 490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
357 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') 494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
358 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) 495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
359 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) 496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)
360 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN) 497 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN)
361 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) 498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2])