Mercurial > jwe > octave
comparison scripts/set/uniquetol.m @ 31124:df030ac26390
uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850)
* /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to
'byrows' output, ensure ia and ic outputs have column orientation for arrays
and cells, verify consistent single class handling, add BISTs for
aforementioned cases, and update docstring to note non-complex input
requirement.
author | Nicholas R. Jankowski <jankowski.nicholas@gmail.com> |
---|---|
date | Tue, 05 Jul 2022 15:22:46 -0400 |
parents | 796f54d4ddbf |
children | 4581402b1c5b |
comparison
equal
deleted
inserted
replaced
31123:18b8f73595e0 | 31124:df030ac26390 |
---|---|
31 ## Return the unique elements of @var{A} within tolerance @var{tol}. | 31 ## Return the unique elements of @var{A} within tolerance @var{tol}. |
32 ## | 32 ## |
33 ## Two values, @var{x} and @var{y}, are within relative tolerance if | 33 ## Two values, @var{x} and @var{y}, are within relative tolerance if |
34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. | 34 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. |
35 ## | 35 ## |
36 ## The input @var{A} must be a floating point type (double or single). | 36 ## The input @var{A} must be a non-complex floating point type (double or |
37 ## single). | |
37 ## | 38 ## |
38 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double | 39 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double |
39 ## precision input or 1e-6 for single precision input. | 40 ## precision input or 1e-6 for single precision input. |
40 ## | 41 ## |
41 ## The function may also be called with the following optional property/value | 42 ## The function may also be called with the following optional property/value |
96 ## @end example | 97 ## @end example |
97 ## | 98 ## |
98 ## @seealso{unique, union, intersect, setdiff, setxor, ismember} | 99 ## @seealso{unique, union, intersect, setdiff, setxor, ismember} |
99 ## @end deftypefn | 100 ## @end deftypefn |
100 | 101 |
101 | |
102 function [c, ia, ic] = uniquetol (A, varargin) | 102 function [c, ia, ic] = uniquetol (A, varargin) |
103 | 103 |
104 if (nargin < 1) | 104 if (nargin < 1) |
105 print_usage (); | 105 print_usage (); |
106 endif | |
107 | |
108 if (isempty (A)) | |
109 c = A; | |
110 ia = []; | |
111 ic = []; | |
112 return; | |
113 endif | 106 endif |
114 | 107 |
115 if (! isfloat (A) || iscomplex (A)) | 108 if (! isfloat (A) || iscomplex (A)) |
116 error ("Octave:uniquetol:unsupported-type", | 109 error ("Octave:uniquetol:unsupported-type", |
117 "uniquetol: A must be a double or single precision non-complex array"); | 110 "uniquetol: A must be a double or single precision non-complex array"); |
161 else | 154 else |
162 error ("uniquetol: unknown property '%s'", varargin{k}); | 155 error ("uniquetol: unknown property '%s'", varargin{k}); |
163 endif | 156 endif |
164 endfor | 157 endfor |
165 | 158 |
159 if (isempty (A)) | |
160 sz_A = size (A); | |
161 ## hack for Matlab empty input compatibility | |
162 if (by_rows) | |
163 c = A; | |
164 sz_A(2) = 1; | |
165 ia = ones (sz_A); | |
166 ic = ones (sz_A); | |
167 else | |
168 c = ones (0,1); | |
169 if (sz_A(1) == 1) | |
170 c = c.'; | |
171 endif | |
172 ia = ones (0,1); | |
173 ic = ones (0,1); | |
174 endif | |
175 if (isa (A, "single")) | |
176 ## c follows class of A, ia and ic are always class "double". | |
177 c = single (c); | |
178 endif | |
179 return; | |
180 endif | |
181 | |
166 if (isempty (data_scale)) | 182 if (isempty (data_scale)) |
167 data_scale = max (abs (A(! isinf (A))(:))); | 183 data_scale = max (abs (A(! isinf (A))(:))); |
168 endif | 184 endif |
169 | 185 |
170 tol *= data_scale; | 186 tol *= data_scale; |
171 | 187 |
172 if (by_rows) | 188 if (by_rows) |
173 | 189 |
174 nr = rows (A); | 190 ##start matrix in sorted order, retain sorting and inverting indices |
175 nc = columns (A); | 191 [A, srtA] = sortrows (A); |
192 [~, inv_srtA] = sort (srtA); | |
193 | |
194 [nr, nc] = size (A); | |
176 Iall = zeros (nr, 1); | 195 Iall = zeros (nr, 1); |
177 I = NaN (nc, 1); | 196 I = NaN (nc, 1); |
178 ia = {}; | 197 ia = {}; |
179 J = NaN (nc, 1); | 198 J = NaN (nc, 1); |
180 j = 1; | 199 j = 1; |
187 equ = all (abs (A - A(i,:)) <= tol, 2); | 206 equ = all (abs (A - A(i,:)) <= tol, 2); |
188 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2); | 207 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2); |
189 sumeq = sum (equ); | 208 sumeq = sum (equ); |
190 ia_tmp = find (equ); | 209 ia_tmp = find (equ); |
191 if (output_all_indices) | 210 if (output_all_indices) |
192 ia{end+1} = ia_tmp; | 211 ia{end+1,1} = sort (srtA(ia_tmp)); |
193 endif | 212 endif |
194 Iall(ii+(1:sumeq)) = ia_tmp; | 213 Iall(ii+(1:sumeq)) = ia_tmp; |
195 I(j) = ia_tmp(1); | 214 I(j) = ia_tmp(1); |
196 J(equ) = j; | 215 J(equ) = j; |
197 ii += sumeq; | 216 ii += sumeq; |
202 I(isnan (I)) = []; | 221 I(isnan (I)) = []; |
203 J(isnan (J)) = []; | 222 J(isnan (J)) = []; |
204 c = A(I,:); | 223 c = A(I,:); |
205 | 224 |
206 if (! output_all_indices) | 225 if (! output_all_indices) |
207 ia = I(1:j-1); | 226 ia = srtA(I(1:j-1)); |
208 endif | 227 endif |
209 ic = J; | 228 |
229 ic = J(inv_srtA); | |
210 | 230 |
211 else | 231 else |
212 isrowvec = isrow (A); | 232 isrowvec = isrow (A); |
213 A = A(:); | 233 A = A(:); |
214 nr = rows (A); | 234 nr = rows (A); |
245 else | 265 else |
246 findisnanA = []; | 266 findisnanA = []; |
247 endif | 267 endif |
248 if (output_all_indices) | 268 if (output_all_indices) |
249 nu = cumsumue(end); | 269 nu = cumsumue(end); |
250 ia = cell (1, nu); | 270 ia = cell (nu, 1); |
251 for k = 1:nu | 271 for k = 1:nu |
252 ia{k} = setdiff (sAi(cumsumue==k), findisnanA); | 272 ia{k} = setdiff (sAi(cumsumue==k), findisnanA); |
253 endfor | 273 endfor |
254 else | 274 else |
255 ia = sAi(ue); | 275 ia = sAi(ue); |
256 endif | 276 endif |
257 | 277 |
258 if (anyisnanA) | 278 if (anyisnanA) |
259 rowsc1 = rows (c) + (1:sum (isnanA)); | 279 rowsc1 = [1:sum(isnanA(:))]'; |
280 if (~all (isnanA)) | |
281 rowsc1 += rows (c); | |
282 endif | |
260 c(rowsc1) = NaN; | 283 c(rowsc1) = NaN; |
261 ia(rowsc1) = findisnanA; | |
262 ic(isnanA) = rowsc1; | 284 ic(isnanA) = rowsc1; |
263 endif | 285 if (output_all_indices) |
264 | 286 ia(rowsc1) = num2cell (findisnanA); |
265 ## FIXME: Matlab-compatible orientation of output | 287 else |
266 ## Actually, Matlab prefers row vectors (2021/03/24), but this is different | 288 ia(rowsc1) = findisnanA; |
267 ## from all the other set functions which prefer column vectors. Assume | 289 endif |
268 ## that this is a bug in Matlab's implementation and prefer column vectors. | 290 |
291 ## if numel(c) was 1, appending NaNs creates a row vector instead of | |
292 ## expected column vector. | |
293 if (isrow (c)) | |
294 c = c.'; | |
295 endif | |
296 endif | |
297 | |
298 ## Matlab compatibility - outputs are column vectors unless the input | |
299 ## is a row vector, in which case the output c is also a row vector. | |
300 ## ia and ic are always column vectors. (verified Matlab 2022a) | |
269 if (isrowvec) | 301 if (isrowvec) |
270 c = c.'; | 302 c = c.'; |
271 endif | 303 endif |
272 | 304 endif |
273 endif | |
274 | |
275 endfunction | 305 endfunction |
276 | 306 |
277 | 307 |
278 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) | 308 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) |
279 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), | 309 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), |
280 %! [1 1 2; 1 0 1]) | 310 %! [1 0 1; 1 1 2]) |
281 %!assert (uniquetol ([]), []) | |
282 %!assert (uniquetol ([1]), [1]) | 311 %!assert (uniquetol ([1]), [1]) |
283 %!assert (uniquetol ([2, 1]), [1, 2]); | 312 %!assert (uniquetol ([2, 1]), [1, 2]); |
284 %!assert (uniquetol ([1; 2]), [1; 2]) | 313 %!assert (uniquetol ([1; 2]), [1; 2]) |
285 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]); | 314 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]); |
286 %!assert (uniquetol (zeros (1, 0)), zeros (1, 0)); | |
287 %!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0)) | |
288 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4]) | 315 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4]) |
289 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4]) | 316 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4]) |
290 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4]) | 317 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4]) |
291 %!assert (uniquetol (sparse ([2,0;2,0])), sparse ([0;2])) | 318 %!assert (uniquetol (sparse ([2,0;2,0])), sparse ([0;2])) |
292 %!assert (uniquetol (sparse ([1,2;2,3])), sparse ([1;2;3])) | 319 %!assert (uniquetol (sparse ([1,2;2,3])), sparse ([1;2;3])) |
294 %! single ([1,2,2,3,2,4])) | 321 %! single ([1,2,2,3,2,4])) |
295 %!assert (uniquetol (single ([1,2,2,3,2,4])), single ([1,2,3,4])) | 322 %!assert (uniquetol (single ([1,2,2,3,2,4])), single ([1,2,3,4])) |
296 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true), | 323 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true), |
297 %! single ([1;2;3;4])) | 324 %! single ([1;2;3;4])) |
298 | 325 |
326 ## Test 2D array sorting | |
327 %!test | |
328 %! a = [magic(3); 2 * magic(3)]; | |
329 %! assert (uniquetol (a), [1:10,12,14,16,18]') | |
330 %! assert (uniquetol (a, "byrows", true), sortrows (a)) | |
331 | |
299 ## Matlab compatibility of output | 332 ## Matlab compatibility of output |
300 %!test | 333 %!test |
301 %! x = 1:0.045:3; | 334 %! x = 1:0.045:3; |
302 %! y = uniquetol (x, 0.1, "datascale", 1); | 335 %! y = uniquetol (x, 0.1, "datascale", 1); |
303 %! assert (y(1:4), [1, 1.135, 1.27, 1.405]); | 336 %! assert (y(1:4), [1, 1.135, 1.27, 1.405]); |
312 ## Test index vector return arguments with "ByRows" | 345 ## Test index vector return arguments with "ByRows" |
313 %!test | 346 %!test |
314 %! A = [2, 3, 4; 2, 3, 4]; | 347 %! A = [2, 3, 4; 2, 3, 4]; |
315 %! [c, ia, ic] = uniquetol (A, "byrows", true); | 348 %! [c, ia, ic] = uniquetol (A, "byrows", true); |
316 %! assert (c, [2, 3, 4]); | 349 %! assert (c, [2, 3, 4]); |
317 %! assert (A(ia,:), c); | 350 %! assert (ia, 1); |
318 %! assert (c(ic,:), A); | 351 %! assert (ic, [1;1]); |
319 | 352 |
320 %!test | 353 %!test |
321 %! x = (2:7)'*pi; | 354 %! x = (2:7)'*pi; |
322 %! y = exp (log (x)); | 355 %! y = exp (log (x)); |
323 %! C = uniquetol ([x; y]); | 356 %! C = uniquetol ([x; y]); |
326 ## Test "ByRows" Property | 359 ## Test "ByRows" Property |
327 %!test | 360 %!test |
328 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95]; | 361 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95]; |
329 %! B = log (exp (A)); | 362 %! B = log (exp (A)); |
330 %! C = uniquetol ([A; B], "ByRows", true); | 363 %! C = uniquetol ([A; B], "ByRows", true); |
331 %! assert (C, A); | 364 %! assert (C, sortrows(A), 10*eps); |
332 | 365 |
333 ## Test "DataScale" Property | 366 ## Test "DataScale" Property |
334 %!test | 367 %!test |
335 %! x = 10^11; | 368 %! x = 10^11; |
336 %! C = uniquetol ([x, exp(log(x))], 1e-6, "DataScale", 1); | 369 %! C = uniquetol ([x, exp(log(x))], 1e-6, "DataScale", 1); |
339 ## Test "OutputAllIndices" Property | 372 ## Test "OutputAllIndices" Property |
340 %!test | 373 %!test |
341 %! A = [.1 .2 .3 10]; | 374 %! A = [.1 .2 .3 10]; |
342 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true); | 375 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true); |
343 %! assert (C, [.1, 10]); | 376 %! assert (C, [.1, 10]); |
344 %! assert (ia, {(1:3)', 4}); | 377 %! assert (ia, {(1:3)'; 4}); |
345 %! assert (ic, [1; 1; 1; 2]); | 378 %! assert (ic, [1; 1; 1; 2]); |
379 | |
380 ## Test NaN inputs | |
381 %!assert (uniquetol (NaN), NaN) | |
382 %!assert (uniquetol ([NaN NaN]), [NaN NaN]) | |
383 %!assert (uniquetol ([NaN NaN]'), [NaN NaN]') | |
384 %!assert (uniquetol (NaN(2,2)), NaN(4,1)) | |
385 | |
386 %!test | |
387 %! a = [magic(3); 2 * magic(3)]; | |
388 %! a(4:5) = NaN; | |
389 %! [c, ia, ic] = uniquetol (a); | |
390 %! assert (c, [1:10,12,14,18, NaN, NaN]'); | |
391 %! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]'); | |
392 %! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]'); | |
393 %! [c, ia, ic] = uniquetol (single (a)); | |
394 %! assert (class (c), "single"); | |
395 %! assert (class (ia), "double"); | |
396 %! assert (class (ic), "double"); | |
397 %! [c, ia, ic] = uniquetol (a, "ByRows", true); | |
398 %! assert (c, sortrows (a)); | |
399 %! assert (ia, [2,3,1,6,4,5]'); | |
400 %! assert (ic, [3,1,2,5,6,4]'); | |
401 %! [c, ia, ic] = uniquetol (single (a), "ByRows", true); | |
402 %! assert (class (c), "single"); | |
403 %! assert (class (ia), "double"); | |
404 %! assert (class (ic), "double"); | |
405 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true); | |
406 %! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5}); | |
407 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true); | |
408 %! assert (class (c), "single"); | |
409 %! assert (class (ia{1}), "double"); | |
410 %! assert (class (ic), "double"); | |
411 %! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true); | |
412 %! assert (ia, {2;3;1;6;4;5}); | |
413 %! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true); | |
414 %! assert (class (c), "single"); | |
415 %! assert (class (ia{1}), "double"); | |
416 %! assert (class (ic), "double"); | |
417 | |
418 ## Test empty input compatibility | |
419 %!test | |
420 %! [c, ia, ic] = uniquetol ([]); | |
421 %! assert (c, ones (0,1)); | |
422 %! assert (ia, ones (0,1)); | |
423 %! assert (ic, ones (0,1)); | |
424 %!test | |
425 %! [c, ia, ic] = uniquetol ([], "byrows", true); | |
426 %! assert (c, []); | |
427 %! assert (ia, ones (0,1)); | |
428 %! assert (ic, ones (0,1)); | |
429 %!test | |
430 %! [c, ia, ic] = uniquetol (ones (0,1)); | |
431 %! assert (c, ones (0,1)); | |
432 %! assert (ia, ones (0,1)); | |
433 %! assert (ic, ones (0,1)); | |
434 %!test | |
435 %! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true); | |
436 %! assert (c, ones (0,1)); | |
437 %! assert (ia, ones (0,1)); | |
438 %! assert (ic, ones (0,1)); | |
439 %!test | |
440 %! [c, ia, ic] = uniquetol (ones (1,0)); | |
441 %! assert (c, ones (1,0)); | |
442 %! assert (ia, ones (0,1)); | |
443 %! assert (ic, ones (0,1)); | |
444 %!test | |
445 %! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true); | |
446 %! assert (c, ones (1,0)); | |
447 %! assert (ia, 1); | |
448 %! assert (ic, 1); | |
449 %!test | |
450 %! [c, ia, ic] = uniquetol (ones (1,0,2)); | |
451 %! assert (c, ones (1,0)); | |
452 %! assert (ia, ones (0,1)); | |
453 %! assert (ic, ones (0,1)); | |
454 %!test | |
455 %! [c, ia, ic] = uniquetol (ones (0,1,2)); | |
456 %! assert (c, ones (0,1)); | |
457 %! assert (ia, ones (0,1)); | |
458 %! assert (ic, ones (0,1)); | |
459 %!test | |
460 %! [c, ia, ic] = uniquetol (ones (1,2,0)); | |
461 %! assert (c, ones (1,0)); | |
462 %! assert (ia, ones (0,1)); | |
463 %! assert (ic, ones (0,1)); | |
464 %!test | |
465 %! [c, ia, ic] = uniquetol (single ([])); | |
466 %! assert (class (c), "single"); | |
467 %! assert (class (ia), "double"); | |
468 %! assert (class (ic), "double"); | |
469 %!test | |
470 %! [c, ia, ic] = uniquetol (single ([]), "byrows", true); | |
471 %! assert (class (c), "single"); | |
472 %! assert (class (ia), "double"); | |
473 %! assert (class (ic), "double"); | |
474 %!test | |
475 %! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true); | |
476 %! assert (class (c), "single"); | |
477 %! assert (class (ia), "double"); | |
478 %! assert (class (ic), "double"); | |
479 | |
346 | 480 |
347 ## Test input validation | 481 ## Test input validation |
348 %!error <Invalid call> uniquetol () | 482 %!error <Invalid call> uniquetol () |
349 %!error <A must be a double or single precision> uniquetol (int8 (1)) | 483 %!error <A must be a double or single precision> uniquetol (int8 (1)) |
350 %!error <A must be .* non-complex> uniquetol (1i) | 484 %!error <A must be .* non-complex> uniquetol (1i) |
352 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2]) | 486 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2]) |
353 %!error <TOL must be .* non-complex> uniquetol (1, 1i) | 487 %!error <TOL must be .* non-complex> uniquetol (1, 1i) |
354 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows") | 488 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows") |
355 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") | 489 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar") |
356 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) | 490 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true) |
491 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true) | |
492 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true) | |
493 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true) | |
357 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') | 494 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1') |
358 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) | 495 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1) |
359 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) | 496 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i) |
360 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN) | 497 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN) |
361 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) | 498 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2]) |