Mercurial > forge
view extra/dataframe/inst/@dataframe/private/df_matassign.m @ 11567:b379b9a32196 octave-forge
Workaround for various oddities
author | cdemills |
---|---|
date | Wed, 20 Mar 2013 09:14:10 +0000 |
parents | 56f9ac1a2380 |
children |
line wrap: on
line source
function df = df_matassign(df, S, indc, ncol, RHS) %# auxiliary function: assign the dataframe as if it was a matrix %% Copyright (C) 2009-2012 Pascal Dupuis <Pascal.Dupuis@uclouvain.be> %% %% This file is part of Octave. %% %% Octave is free software; you can redistribute it and/or %% modify it under the terms of the GNU General Public %% License as published by the Free Software Foundation; %% either version 2, or (at your option) any later version. %% %% Octave is distributed in the hope that it will be useful, %% but WITHOUT ANY WARRANTY; without even the implied %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR %% PURPOSE. See the GNU General Public License for more %% details. %% %% You should have received a copy of the GNU General Public %% License along with Octave; see the file COPYING. If not, %% write to the Free Software Foundation, 51 Franklin Street - %% Fifth Floor, Boston, MA 02110-1301, USA. %# %# $Id$ %# if (isnull (RHS)) if (1 == ncol) if (sum (~strcmp (S.subs, ':')) > 2) error("A null assignment can only have one non-colon index."); endif elseif (sum (~strcmp (S.subs, ':')) > 1) error("A null assignment can only have one non-colon index."); endif if (strcmp (S.subs(1), ':')) %# removing column/matrix RHS = S; RHS.subs(2) = []; for indi = (indc) unfolded = df._data{indi}(:, df._rep{indi}); unfolded = feval (@subsasgn, unfolded, RHS, []); df._data{indi} = unfolded; if (~isempty (unfolded)) df._rep(indi) = 1:size (unfolded, 2); endif endfor %# remove empty elements indi = cellfun ('isempty', df._data); if (any (indi)) %# nothing left, remove this column df._cnt(2) = df._cnt(2) - sum (indi); indi = ~indi; %# vector of kept data df._name{2} = df._name{2}(indi); df._over{2} = df._over{2}(indi); df._type = df._type(indi); df._data = df._data(indi); df._rep = df._rep(indi); endif if (size (df._ridx, 3) > 1) df._ridx(:, indc, :) = []; endif elseif (strcmp (S.subs(2), ':')) %# removing rows indr = S.subs{1}; if (~isempty (df._name{1})) df._name{1}(indr, :) = []; df._over{1}(indr) = []; endif df._ridx(indr, :, :) = []; %# to remove a line, iterate on each column df._data = cellfun (@(x) feval(@subsasgn, x, S, []), \ df._data, "UniformOutPut", false); if (isa (indr, 'char')) df._cnt(1) = 0; else df._cnt(1) = df._cnt(1) - length (indr); endif endif df = df_thirddim (df); return; endif indc_was_set = ~isempty (indc); if (~indc_was_set) %# initial dataframe was empty ncol = size (RHS, 2); indc = 1:ncol; endif indr = S.subs{1, 1}; indr_was_set = ~isempty (indr); %# initial dataframe was empty ? if (~indr_was_set || strcmp (indr, ':')) if (iscell (RHS)) nrow = max (sum (cellfun ('size', RHS, 1))); else if (isvector (RHS)) if (0 == df._cnt(1)) nrow = size (RHS, 1); else nrow = df._cnt(1); %# limit to df numbner of rows endif else %# deduce limit from RHS nrow = size (RHS, 1); endif endif indr = 1:nrow; elseif (~isempty (indr)) if (~isnumeric (indr)) %# translate row names to row index [indr, nrow] = df_name2idx (df._name{1}, indr, df._cnt(1), 'row'); S.subs{1, 1} = indr; else nrow = length (indr); endif endif if (length (S.subs) > 2) inds = S.subs{1, 3}; else inds = []; endif rname = cell(0, 0); rname_width = max (1, size (df._name{2}, 2)); ridx = []; cname = rname; ctype = rname; if (iscell (RHS)) if ((length (indc) == df._cnt(2) && size (RHS, 2) >= df._cnt(2)) \ || 0 == df._cnt(2) || isempty (S.subs{1}) || isempty (S.subs{2})) %# providing too much information -- remove extra content if (size (RHS, 1) > 1) %# at this stage, verify that the first line doesn't contain %# chars only; use them for column names dummy = cellfun ('class', \ RHS(1, ~cellfun ('isempty', RHS(1, :))), \ 'UniformOutput', false); dummy = strcmp (dummy, 'char'); if (all (dummy)) if (length (df._over{2}) >= max (indc) \ && ~all (df._over{2}(indc)) && ~isempty (S.subs{2})) warning("Trying to overwrite colum names"); endif cname = RHS(1, :).'; RHS = RHS(2:end, :); if (~indr_was_set) nrow = nrow - 1; indr = 1:nrow; else %# we know indr, there is no reason that RHS(:, 1) contains %# row names. if (isempty (S.subs{2})) %# extract columns position from columns names [indc, ncol, S.subs{2}, dummy] = ... df_name2idx (df._name{2}, cname, df._cnt(2), 'column'); if (length (dummy) ~= sum (dummy)) warning ("Not all RHS column names used"); cname = cname(dummy); RHS = RHS(:, dummy); endif endif endif endif %# at this stage, verify that the first line doesn't contain %# chars only; use them for column types dummy = cellfun ('class', \ RHS(1, ~cellfun ('isempty', RHS(1, :))), \ 'UniformOutput', false); dummy = strcmp (dummy, 'char'); if (all (dummy)) if (length (df._over{2}) >= max (indc) \ && ~all (df._over{2}(indc))) warning ("Trying to overwrite colum names"); endif if (sum (~cellfun ('isempty', RHS(1, indc))) == ncol) ctype = RHS(1, :); endif RHS = RHS(2:end, :); if (~indr_was_set) nrow = nrow - 1; indr = 1:nrow; endif endif endif %# more elements than df width -- try to use the first two as %# row index and/or row name if (size (RHS, 1) > 1) dummy = all (cellfun ('isnumeric', \ RHS(~cellfun ('isempty', RHS(:, 1)), 1))); else dummy = isnumeric(RHS{1, 1}); endif dummy = dummy && (~isempty (cname) && size (cname{1}, 2) < 1); if (dummy) ridx = cell2mat (RHS(:, 1)); %# can it be converted to a list of unique numbers ? if (length (unique (ridx)) == length (ridx)) ridx = RHS(:, 1); RHS = RHS(:, 2:end); if (length (df._name{2}) == df._cnt(2) + ncol) %# columns name were pre-filled with too much values df._name{2}(end) = []; df._over{2}(end) = []; if (size (RHS, 2) < ncol) ncol = size (RHS, 2); indc = 1:ncol; endif elseif (~indc_was_set) ncol = ncol - 1; indc = 1:ncol; endif if (~isempty (cname)) cname = cname(2:end); endif if (~isempty (ctype)) ctype = ctype(2:end); endif else ridx = []; endif endif if (size (RHS, 2) > df._cnt(2)) %# verify the the first row doesn't contain chars only, use them %# for row names dummy = cellfun ('class', \ RHS(~cellfun ('isempty', RHS(:, 1)), 1), \ 'UniformOutput', false); dummy = strcmp (dummy, 'char') \ && (~isempty (cname) && size (cname{1}, 2) < 1); if (all (dummy)) if (length (df._over{1}) >= max (indr) \ && ~all (df._over{1}(indr))) warning("Trying to overwrite row names"); else rname = RHS(:, 1); endif rname_width = max ([1; cellfun('size', rname, 2)]); RHS = RHS(:, 2:end); if (length (df._name{2}) == df._cnt(2) + ncol) %# columns name were pre-filled with too much values df._name{2}(end) = []; df._over{2}(end) = []; if (size (RHS, 2) < ncol) ncol = size (RHS, 2); indc = 1:ncol; endif elseif (~indc_was_set) ncol = ncol - 1; indc = 1:ncol; endif if (~isempty (cname)) cname = cname(2:end); endif if (~isempty (ctype)) ctype = ctype(2:end); endif endif endif endif endif %# perform row resizing if columns are already filled if (~isempty (indr) && isnumeric(indr)) if (max (indr) > df._cnt(1) && size (df._data, 2) == df._cnt(2)) df = df_pad (df, 1, max (indr)-df._cnt(1), rname_width); endif endif if (iscell(RHS)) %# we must pad on a column-by-column basis %# verify that each cell contains a non-empty vector, and that sizes %# are compatible %# dummy = cellfun ('size', RHS(:), 2); %# if any (dummy < 1), %# error("cells content may not be empty"); %# endif %# dummy = cellfun ('size', RHS, 1); %# if any (dummy < 1), %# error("cells content may not be empty"); %# endif %# if any (diff(dummy) > 0), %# error("cells content with unequal length"); %# endif %# if 1 < size (RHS, 1) && any (dummy > 1), %# error("cells may only contain scalar"); %# endif if (size (RHS, 2) > indc) if (size (cname, 1) > indc) ncol = size (RHS, 2); indc = 1:ncol; else keyboard endif endif %# try to detect and remove bottom garbage eff_len = zeros (nrow, 1); if (size (RHS, 1) > 1) for indi = (indr) eff_len(indi, 1) = sum (~cellfun ('isempty', RHS(indi, :))); endfor indi = nrow; while (indi > 0) if (eff_len(indi) < 1) nrow = nrow - 1; indr(end) = []; RHS(end, :) = []; indi = indi - 1; if (~indr_was_set && isempty (df._name{1, 1})) df._cnt(1) = nrow; df._ridx(end) = []; endif else break; endif endwhile clear eff_len; endif %# the real assignement if (1 == size (RHS, 1)) %# each cell contains one vector fillfunc = @(x) RHS{x}; idxOK = logical(indr); else %# use cell2mat to pad on a column-by-column basis fillfunc = @(x) cell2mat (RHS(:, x)); endif indj = 1; for indi = (1:ncol) if (indc(indi) > df._cnt(2)) %# perform dynamic resizing one-by-one, to get type right if (isempty (ctype) || length (ctype) < indc(indi)) df = df_pad(df, 2, indc(indi)-df._cnt(2), class(RHS{1, indj})); else df = df_pad(df, 2, indc(indi)-df._cnt(2), ctype{indj}); endif endif if (nrow == df._cnt(1)) %# whole assignement try if (size (RHS, 1) <= 1) switch df._type{indc(indi)} case {'char' } %# use a cell array to hold strings dummy = cellfun (@num2str, RHS(:, indj), \ 'UniformOutput', false); case {'double' } dummy = fillfunc (indj); otherwise dummy = cast(fillfunc (indj), df._type{indc(indi)}); endswitch else %# keeps indexes in sync as cell elements may be empty idxOK = ~cellfun ('isempty', RHS(:, indj)); %# intialise dummy so that it can receive "anything" dummy = []; switch (df._type{indc(indi)}) case {'char' } %# use a cell array to hold strings dummy = cellfun (@num2str, RHS(:, indj), \ 'UniformOutput', false); case {'double' } dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA; otherwise dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA; dummy = cast(dummy, df._type{indc(indi)}); endswitch endif catch fprintf (2, "Something went wrong while converting colum %d\n", indj); fprintf (2, "Error was: %s\n", lasterr ()); dummy = unique(cellfun(@class, RHS(:, indj), ... 'UniformOutput', false)); if (any (strmatch ("char", dummy, "exact"))) fprintf (2, "Downclassing to char\n"); %# replace the actual column, of type numeric, by a char df._type{indc(indi)} = 'char'; dummy = RHS(:, indj); for indk = (size (dummy, 1):-1:1) if (~isa ("char", dummy{indk})) if (isinteger (dummy{indk})) dummy(indk) = mat2str (dummy{indk}); elseif (isa ("logical", dummy{indk})) if (dummy{indk}) dummy(indk) = "true"; else dummy{indk} = "false"; endif elseif (isnumeric (dummy{indk})) dummy(indk) = mat2str (dummy{indk}, 6); endif endif endfor else dummy = \ sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \ indj, df._type{indc(indi)}, length (indr), disp (RHS(:, indj))); error (dummy); endif end_try_catch if (size (dummy, 1) < df._cnt(1)) dummy(end+1:df._cnt(1), :) = NA; endif else %# partial assignement -- extract actual data and update dummy = df._data{indc(indi)}; try switch (df._type{indc(indi)}) case {'char' } %# use a cell array to hold strings dummy(indr, 1) = cellfun(@num2str, RHS(:, indj), \ 'UniformOutput', false); case {'double' } dummy(indr, :) = fillfunc (indj); otherwise dummy(indr, :) = cast(fillfunc (indj), df._type{indc(indi)}); endswitch catch dummy = \ sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \ indj, df._type{indc(indi)}, length (indr), disp (RHS(:, indj))); error (dummy); end_try_catch endif df._data{indc(indi)} = dummy; df._rep{indc(indi)} = 1:size (dummy, 2); indj = indj + 1; endfor else %# RHS is either a numeric, either a df if (any (indc > min (size (df._data, 2), df._cnt(2)))) df = df_pad(df, 2, max (indc-min (size (df._data, 2), df._cnt(2))),\ class(RHS)); endif if (~isempty (inds) && isnumeric(inds) && any (inds > 1)) for indi = (1:ncol) if (max (inds) > length (df._rep{indc(indi)})) df = df_pad(df, 3, max (inds)-length (df._rep{indc(indi)}), \ indc(indi)); endif endfor endif if (isa (RHS, 'dataframe')) %# block-copy index S.subs(2) = 1; if (any (~isna(RHS._ridx))) df._ridx = feval(@subsasgn, df._ridx, S, RHS._ridx); endif %# skip second dim and copy data S.subs(2) = []; Sorig = S; for indi = (1:ncol) [df, S] = df_cow(df, S, indc(indi)); if (strcmp (df._type(indc(indi)), RHS._type(indi))) try df._data{indc(indi)} = feval (@subsasgn, df._data{indc(indi)}, S, \ RHS._data{indi}(:, RHS._rep{indi})); catch disp (lasterr ()); disp('line 516 ???'); keyboard end_try_catch else df._data{indc(indi)} = feval (@subsasgn, df._data{indc(indi)}, S, \ cast (RHS._data{indi}(:, RHS._rep{indi}),\ df._type(indc(indi)))); endif S = Sorig; endfor if (~isempty (RHS._name{1})) df._name{1}(indr) = genvarname(RHS._name{1}(indr)); df._over{1}(indr) = RHS._over{1}(indr); endif if (~isempty (RHS._src)) if (~any (strcmp (cellstr(df._src), cellstr(RHS._src)))) df._src = vertcat(df._src, RHS._src); endif endif if (~isempty (RHS._cmt)) if (~any (strcmp (cellstr(df._cmt), cellstr(RHS._cmt)))) df._cmt = vertcat(df._cmt, RHS._cmt); endif endif else %# RHS is homogenous, pad at once if (isvector (RHS)) %# scalar - vector if (isempty (S.subs)) fillfunc = @(x, y) RHS; else %# ignore 'column' dimension -- force colum vectors -- use a %# third dim just in case if (isempty (S.subs{1})) S.subs{1} = ':'; endif S.subs(2) = []; if (length (S.subs) < 2) S.subs{2} = 1; endif if (ncol > 1 && length (RHS) > 1) %# set a row from a vector fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS(y)); else fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS); endif endif Sorig = S; for indi = (1:ncol) try lasterr(''); dummy= 'df_cow'; [df, S] = df_cow(df, S, indc(indi)); dummy = 'fillfunc'; df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi); S = Sorig; catch disp (lasterr ()) disp ('line 491 '); keyboard end_try_catch # catch # if ndims(df._data{indc(indi)}) > 2, # %# upstream forgot to give the third dim # dummy = S; dummy.subs(3) = 1; # df._data{indc(indi)} = fillfunc(df._data{indc(indi)}, \ # dummy, indi); # else # rethrow(lasterr()); # endif # end_try_catch endfor else %# 2D - 3D matrix S.subs(2) = []; %# ignore 'column' dimension if (isempty (S.subs{1})) S.subs{1} = indr; endif %# rotate slices in dim 1-3 to slices in dim 1-2 fillfunc = @(x, S, y) feval(@subsasgn, x, S, squeeze(RHS(:, y, :))); Sorig = S; for indi = (1:ncol) [df, S] = df_cow(df, S, indc(indi)); df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi); S = Sorig; endfor endif if (indi < size (RHS, 2) && ~isa (RHS, 'char')) warning (' not all columns of RHS used'); endif endif endif %# delayed row padding -- column padding occured before if (~isempty (indr) && isnumeric (indr)) if (max (indr) > df._cnt(1) && size (df._data, 2) < df._cnt(2)) df = df_pad(df, 1, max (indr)-df._cnt(1), rname_width); endif endif %# adjust ridx and rnames, if required if (~isempty (ridx)) dummy = df._ridx; if (1 == size (RHS, 1)) dummy(indr) = ridx{1}; else dummy(indr) = vertcat(ridx{indr}); endif if (length (unique (dummy)) ~= length (dummy)) %# || \ %# any (diff(dummy) <= 0), error("row indexes are not unique or not ordered"); endif df._ridx = dummy; endif if (~isempty (rname) && (length (df._over{1}) < max (indr) || \ all (df._over{1}(indr)))) df._name{1}(indr, 1) = genvarname(rname); df._over{1}(1, indr) = false; endif if (~isempty (cname) && (length (df._over{2}) < max (indc) || \ all (df._over{2}(indc)))) if (length (cname) < ncol) cname(end+1:ncol) = {'_'}; endif cname(cellfun (@isempty, cname)) = 'unnamed'; try df._name{2}(indc, 1) = genvarname (cname); catch %# there was a problem with genvarname. dummy = sum (~cellfun ('isempty', cname)); if (1 == dummy) dummy = strsplit(cname{1}, ' ', true); if (length (dummy) == ncol) df._name{2}(indc, 1) = dummy; else disp ('line 528 '); keyboard endif else disp ('line 531 '); keyboard endif end_try_catch df._over{2}(1, indc) = false; endif df = df_thirddim (df); endfunction