Mercurial > forge

--- a/extra/dataframe/inst/@dataframe/dataframe.m	Mon Mar 18 21:05:16 2013 +0000
+++ b/extra/dataframe/inst/@dataframe/dataframe.m	Wed Mar 20 09:14:10 2013 +0000
@@ -108,7 +108,7 @@

 %# default values
 seeked = []; trigger = []; unquot = true; sep = "\t,"; cmt_lines = [];
-conv_regexp = {}; datefmt = '';
+conv_regexp = {}; datefmt = ''; verbose = false;

 if (length (varargin) > 0)	%# extract known arguments
   indi = 1;
@@ -166,6 +166,9 @@
         case 'datefmt'
           datefmt = varargin{indi + 1};
           varargin(indi:indi+1) = [];
+	case 'verbose'
+          verbose = varargin{indi + 1};
+          varargin(indi:indi+1) = [];
 	case '--'
 	  %# stop processing args -- take the rest as filenames
 	  varargin(indi) = [];
@@ -239,7 +242,11 @@
           %# cut into lines -- include the EOL to have a one-to-one
           %# matching between line numbers. Use a non-greedy match.
           lines = regexp (in, ['.*?' eol], 'match');
-          dummy = cellfun (@(x) regexp (x, eol), lines);
+	  try
+            dummy = cellfun (@(x) regexp (x, eol), lines);
+	  catch
+	    disp('line 245 -- binary garbage in the input file ? '); keyboard
+	  end
           %# remove the EOL character(s)
           lines(1 == dummy) = {""};
           %# use a positive lookahead -- eol is not part of the match
@@ -330,7 +337,13 @@
               while (indk <= size (the_line, 2))
 		if (isempty (the_line{indk}) || any (size (the_line{indk}) > 1))
                   %#if indi > 1 && indk > 1, disp('line 117 '); keyboard; %#endif
-                  if (unquot)
+		  if (isempty (dummy {indk}))
+		    %# empty field, just don't care
+		    indk = indk + 1; indm = indm + 1;
+		    continue;
+		  endif
+
+		  if (unquot)
                     try
                       %# remove quotes and leading space(s)
                       x(indj, indm) = regexp (dummy{indk}, '[^''" ].*[^''"]', 'match'){1};
@@ -463,6 +476,10 @@
       if (ndims (x) > 2), idx.subs{3} = 1:size (x, 3); endif
       %#      df = subsasgn(df, idx, x);        <= call directly lower level
       try
+	if (verbose)
+	   printf ("Calling df_matassign, orig size: %s\n", disp (size (df)));
+	   printf ("size(x): %s\n", disp (size (x)));
+	endif
 	df = df_matassign (df, idx, indj, length (indj), x);
       catch
 	disp ('line 443 '); keyboard
--- a/extra/dataframe/inst/@dataframe/private/df_matassign.m	Mon Mar 18 21:05:16 2013 +0000
+++ b/extra/dataframe/inst/@dataframe/private/df_matassign.m	Wed Mar 20 09:14:10 2013 +0000
@@ -284,7 +284,7 @@
       endfor
       indi = nrow;
       while (indi > 0)
-        if (eff_len(indi) < ncol)
+        if (eff_len(indi) < 1)
           nrow = nrow - 1;
           indr(end) = [];
           RHS(end, :) = [];
@@ -311,7 +311,7 @@
     indj = 1;
     for indi = (1:ncol)
       if (indc(indi) > df._cnt(2))
-        %# perform dynamic resizing one-by-one, to get type right
+	%# perform dynamic resizing one-by-one, to get type right
         if (isempty (ctype) || length (ctype) < indc(indi))
           df = df_pad(df, 2, indc(indi)-df._cnt(2), class(RHS{1, indj}));
         else
@@ -348,9 +348,12 @@
             endswitch
           endif
         catch
-          dummy =  unique(cellfun(@class, RHS(:, indj), ...
+	  fprintf (2, "Something went wrong while converting colum %d\n", indj);
+	  fprintf (2, "Error was: %s\n", lasterr ());
+	  dummy =  unique(cellfun(@class, RHS(:, indj), ...
                                   'UniformOutput', false));
           if (any (strmatch ("char", dummy, "exact")))
+	    fprintf (2, "Downclassing to char\n");
             %# replace the actual column, of type numeric, by a char
             df._type{indc(indi)} = 'char';
             dummy = RHS(:, indj);
@@ -402,7 +405,7 @@
       df._data{indc(indi)} = dummy; df._rep{indc(indi)} = 1:size (dummy, 2);
       indj = indj + 1;
     endfor
-
+
   else
     %# RHS is either a numeric, either a df
     if (any (indc > min (size (df._data, 2), df._cnt(2))))