# HG changeset patch # User cdemills # Date 1363770850 0 # Node ID b379b9a321960e422af6327113a51f4881fec141 # Parent 60b2ba2eb51df065a390327ae9b496590d4351ed Workaround for various oddities diff -r 60b2ba2eb51d -r b379b9a32196 extra/dataframe/inst/@dataframe/dataframe.m --- a/extra/dataframe/inst/@dataframe/dataframe.m Mon Mar 18 21:05:16 2013 +0000 +++ b/extra/dataframe/inst/@dataframe/dataframe.m Wed Mar 20 09:14:10 2013 +0000 @@ -108,7 +108,7 @@ %# default values seeked = []; trigger = []; unquot = true; sep = "\t,"; cmt_lines = []; -conv_regexp = {}; datefmt = ''; +conv_regexp = {}; datefmt = ''; verbose = false; if (length (varargin) > 0) %# extract known arguments indi = 1; @@ -166,6 +166,9 @@ case 'datefmt' datefmt = varargin{indi + 1}; varargin(indi:indi+1) = []; + case 'verbose' + verbose = varargin{indi + 1}; + varargin(indi:indi+1) = []; case '--' %# stop processing args -- take the rest as filenames varargin(indi) = []; @@ -239,7 +242,11 @@ %# cut into lines -- include the EOL to have a one-to-one %# matching between line numbers. Use a non-greedy match. lines = regexp (in, ['.*?' eol], 'match'); - dummy = cellfun (@(x) regexp (x, eol), lines); + try + dummy = cellfun (@(x) regexp (x, eol), lines); + catch + disp('line 245 -- binary garbage in the input file ? '); keyboard + end %# remove the EOL character(s) lines(1 == dummy) = {""}; %# use a positive lookahead -- eol is not part of the match @@ -330,7 +337,13 @@ while (indk <= size (the_line, 2)) if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1)) %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif - if (unquot) + if (isempty (dummy {indk})) + %# empty field, just don't care + indk = indk + 1; indm = indm + 1; + continue; + endif + + if (unquot) try %# remove quotes and leading space(s) x(indj, indm) = regexp (dummy{indk}, '[^''" ].*[^''"]', 'match'){1}; @@ -463,6 +476,10 @@ if (ndims (x) > 2), idx.subs{3} = 1:size (x, 3); endif %# df = subsasgn(df, idx, x); <= call directly lower level try + if (verbose) + printf ("Calling df_matassign, orig size: %s\n", disp (size (df))); + printf ("size(x): %s\n", disp (size (x))); + endif df = df_matassign (df, idx, indj, length (indj), x); catch disp ('line 443 '); keyboard diff -r 60b2ba2eb51d -r b379b9a32196 extra/dataframe/inst/@dataframe/private/df_matassign.m --- a/extra/dataframe/inst/@dataframe/private/df_matassign.m Mon Mar 18 21:05:16 2013 +0000 +++ b/extra/dataframe/inst/@dataframe/private/df_matassign.m Wed Mar 20 09:14:10 2013 +0000 @@ -284,7 +284,7 @@ endfor indi = nrow; while (indi > 0) - if (eff_len(indi) < ncol) + if (eff_len(indi) < 1) nrow = nrow - 1; indr(end) = []; RHS(end, :) = []; @@ -311,7 +311,7 @@ indj = 1; for indi = (1:ncol) if (indc(indi) > df._cnt(2)) - %# perform dynamic resizing one-by-one, to get type right + %# perform dynamic resizing one-by-one, to get type right if (isempty (ctype) || length (ctype) < indc(indi)) df = df_pad(df, 2, indc(indi)-df._cnt(2), class(RHS{1, indj})); else @@ -348,9 +348,12 @@ endswitch endif catch - dummy = unique(cellfun(@class, RHS(:, indj), ... + fprintf (2, "Something went wrong while converting colum %d\n", indj); + fprintf (2, "Error was: %s\n", lasterr ()); + dummy = unique(cellfun(@class, RHS(:, indj), ... 'UniformOutput', false)); if (any (strmatch ("char", dummy, "exact"))) + fprintf (2, "Downclassing to char\n"); %# replace the actual column, of type numeric, by a char df._type{indc(indi)} = 'char'; dummy = RHS(:, indj); @@ -402,7 +405,7 @@ df._data{indc(indi)} = dummy; df._rep{indc(indi)} = 1:size (dummy, 2); indj = indj + 1; endfor - + else %# RHS is either a numeric, either a df if (any (indc > min (size (df._data, 2), df._cnt(2))))