changeset 21462:b7d1e93c0702

initial implementation of textscan in C++ * textscan.h, textscan.cc: New files. * libinterp/corefcn/module.mk: Update. * textscan.m: Delete. * scripts/io/module.mk: Update.
author Lachlan Andrew <lachlanbis@gmail.com>
date Tue, 15 Mar 2016 19:36:52 +1100
parents 35ce775d6115
children a4c411681e25
files libinterp/corefcn/module.mk libinterp/corefcn/textscan.cc libinterp/corefcn/textscan.h scripts/io/module.mk scripts/io/textscan.m
diffstat 5 files changed, 3780 insertions(+), 709 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/module.mk	Thu Mar 17 12:06:30 2016 -0700
+++ b/libinterp/corefcn/module.mk	Tue Mar 15 19:36:52 2016 +1100
@@ -85,6 +85,7 @@
   libinterp/corefcn/symtab.h \
   libinterp/corefcn/sysdep.h \
   libinterp/corefcn/text-renderer.h \
+  libinterp/corefcn/textscan.h \
   libinterp/corefcn/toplev.h \
   libinterp/corefcn/txt-eng.h \
   libinterp/corefcn/utils.h \
@@ -243,6 +244,7 @@
   libinterp/corefcn/sysdep.cc \
   libinterp/corefcn/time.cc \
   libinterp/corefcn/text-renderer.cc \
+  libinterp/corefcn/textscan.cc \
   libinterp/corefcn/toplev.cc \
   libinterp/corefcn/tril.cc \
   libinterp/corefcn/tsearch.cc \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libinterp/corefcn/textscan.cc	Tue Mar 15 19:36:52 2016 +1100
@@ -0,0 +1,3357 @@
+/*
+Copyright (C) 2015-2016 Lachlan Andrew, Monash University
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+/** @file libinterp/corefcn/textscan
+ *  Implement the 'textscan' octave command
+ */
+
+//#ifdef HAVE_CONFIG_H
+#include <config.h>
+//#endif
+
+#include "list"
+
+#include "Array.cc"
+
+#include "Cell.h"
+#include "textscan.h"
+#include "oct-stream.h"
+#include "ovl.h"
+#include "utils.h"
+#include "defun.h"
+#include "ov-re-mat.h"
+
+// Note: this info string tries to start new sentences on new lines
+// to minimize the number of lines changed when making changes to it.
+DEFUN (textscan, args, ,
+       "-*- texinfo -*-\n\
+@deftypefn  {} {@var{C} =} textscan (@var{fid}, @var{format})\n\
+@deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{repeat})\n\
+@deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{})\n\
+@deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{repeat}, @var{param}, @var{value}, @dots{})\n\
+@deftypefnx {} {@var{C} =} textscan (@var{str}, @dots{})\n\
+@deftypefnx {} {[@var{C}, @var{position}] =} textscan (@dots{})\n\
+Read data from a text file or string.\n\
+\n\
+The string @var{str} or file associated with @var{fid} is read from and\n\
+parsed according to @var{format}.\n\
+The function is an extension of @code{strread} and @code{textread}.\n\
+Differences include: the ability to read from either a file or a string,\n\
+additional options, and additional format specifiers.\n\
+\n\
+The input is interpreted as a sequence of \"words\", delimiters\n\
+(such as whitespace) and literals.\n\
+The characters that form delimiters and whitespace are determined\n\
+by the options.\n\
+The format consists of format specifiers interspersed between literals.\n\
+In the format, whitespace forms a delimiter between consecutive literals,\n\
+but is otherwise ignored.\n\
+\n\
+The output @var{C} is a cell array whose second dimension is determined\n\
+by the number of format specifiers.\n\
+\n\
+The first word of the input is matched to the first specifier of the\n\
+format and placed in the first column of the output;\n\
+the second is matched to the second specifier and placed in the second column\n\
+and so forth.\n\
+If there are more words than specifiers, the process is repeated until all\n\
+words have been processed or the limit imposed by @var{repeat} has been met\n\
+(see below).\n\
+\n\
+The string @var{format} describes how the words in @var{str} should be\n\
+parsed.\n\
+As in @var{fscanf}, any (non-whitespace) text in the format that is\n\
+not one of these specifiers is considered a literal;\n\
+if there is a literal between two format specifiers then that same literal\n\
+must appear in the input stream between the matching words.\n\
+\n\
+The following specifiers are valid:\n\
+\n\
+@table @code\n\
+@item  %f\n\
+@itemx %f64\n\
+@itemx %n\n\
+The word is parsed as a number and converted to double.\n\
+\n\
+@item  %f32\n\
+The word is parsed as a number and converted to single (float).\n\
+\n\
+@item  %d\n\
+@itemx %d8\n\
+@itemx %d16\n\
+@itemx %d32\n\
+@itemx %d64\n\
+The word is parsed as a number and converted to int8, int16, int32 or int64.\n\
+If not size is specified, int32 is used.\n\
+\n\
+@item  %u\n\
+@itemx %u8\n\
+@itemx %u16\n\
+@itemx %u32\n\
+@itemx %u64\n\
+The word is parsed as a number and converted to uint8, uint16, uint32 or\n\
+uint64. If not size is specified, uint32 is used.\n\
+\n\
+@item %s\n\
+The word is parsed as a string, ending at the last character before\n\
+whitespace, an end-of-line or a delimiter specified in the options.\n\
+\n\
+@item %q\n\
+The word is parsed as a \"quoted string\".\n\
+If the first character of the string is a double quote (\") then the string\n\
+includes everything until a matching double quote, including whitespace,\n\
+delimiters and end of line characters.\n\
+If a pair of consecutive double quotes appears in the input,\n\
+it is replaced in the output by a single double quote.\n\
+That is, the input \"He said \"\"Hello\"\"\" would return the value\n\
+'He said \"Hello\"'.\n\
+\n\
+@item  %c\n\
+The next character of the input is read.\n\
+This includes delimiters, whitespace and end of line characters.\n\
+\n\
+@item  %[...]\n\
+@itemx %[^...]\n\
+In the first form, the word consists of the longest run consisting of only\n\
+characters between the brackets.\n\
+Ranges of characters can be specified by a hyphen;\n\
+for example, %[0-9a-zA-Z] matches all alphanumeric characters\n\
+(if the underlying character set is ASCII).\n\
+Since Matlab treats hyphens literally, this expansion only applies to\n\
+alphanumeric characters.\n\
+To include '-' in the set, it should appear first or last in the brackets;\n\
+to include ']', it should be the first character.\n\
+If the first character is '^' then the word consists of characters\n\
+NOT listed.\n\
+\n\
+@item %N...\n\
+For %s, %c %d, %f, %n, %u, an optional width can be specified as %Ns etc.\n\
+where N is an integer > 1.\n\
+For %c, this causes exactly the next N characters to be read instead of\n\
+a single character.\n\
+For the other specifiers, it is an upper bound on the\n\
+number of characters read;\n\
+normal delimiters can cause fewer characters to be read.\n\
+For complex numbers, this limit applies to the real and imaginary\n\
+components individually.\n\
+For %f and %n, format specifiers like %N.Mf are allowed, where M is an upper\n\
+bound on number of characters after the decimal point to be considered;\n\
+subsequent digits are skipped.\n\
+For example, the specifier %8.2f would read 12.345e6 as 1.234e7.\n\
+\n\
+@item %*...\n\
+The word specified by the remainder of the conversion specifier is skipped.\n\
+\n\
+@item literals\n\
+In addition the format may contain literal character strings;\n\
+these will be skipped during reading.\n\
+If the input string does not match this literal, the processing terminates,\n\
+unless \"ReturnOnError\" is set to \"continue\".\n\
+@end table\n\
+\n\
+Parsed words corresponding to the first specifier are returned in the first\n\
+output argument and likewise for the rest of the specifiers.\n\
+\n\
+By default, if there is only one input argument, @var{format} is @t{\"%f\"}.\n\
+This means that numbers are read from @var{str} into a single column vector.\n\
+If @var{format} is explicitly empty, \"\", then textscan will return data\n\
+in a number of columns matching the number of fields on the first data\n\
+line of the input.\n\
+Either of these is suitable only if @var{str} contains only numeric fields.\n\
+\n\
+For example, the string\n\
+\n\
+@example\n\
+@group\n\
+@var{str} = \"\\\n\
+Bunny Bugs   5.5\\n\\\n\
+Duck Daffy  -7.5e-5\\n\\\n\
+Penguin Tux   6\"\n\
+@end group\n\
+@end example\n\
+\n\
+@noindent\n\
+can be read using\n\
+\n\
+@example\n\
+@var{a} = textscan (@var{str}, \"%s %s %f\");\n\
+@end example\n\
+\n\
+The optional numeric argument @var{repeat} can be used for limiting the\n\
+number of items read:\n\
+\n\
+@table @asis\n\
+@item -1\n\
+(default) read all of the string or file until the end.\n\
+\n\
+@item N\n\
+Read until the first of two conditions occurs: the format has been processed\n\
+N times, or N lines of the input have been processed.\n\
+Zero (0) is an acceptable value for @var{repeat}.\n\
+Currently, end-of-line characters inside %q, %c, and %[...]$ conversions\n\
+do not contribute to the line count.\n\
+This is incompatible with Matlab and may change in future.\n\
+@end table\n\
+\n\
+The behavior of @code{textscan} can be changed via property-value pairs.\n\
+The following properties are recognized:\n\
+\n\
+@table @asis\n\
+@item @qcode{\"BufSize\"}\n\
+This specifies the number of bytes to use for the internal buffer.\n\
+A modest speed improvement is obtained by setting this to a large value\n\
+when reading a large file, especially the input contains long strings.\n\
+The default is 4096, or a value dependent on @var{n} is that is specified.\n\
+\n\
+@item @qcode{\"CollectOutput\"}\n\
+A value of 1 or true instructs textscan to concatenate consecutive columns\n\
+of the same class in the output cell array.\n\
+A value of 0 or false (default) leaves output in distinct columns.\n\
+\n\
+@item @qcode{\"CommentStyle\"}\n\
+Parts of @var{str} are considered comments and will be skipped.\n\
+@var{value} is the comment style and can be either\n\
+(1) One string, or 1x1 cell string, to skip everything to the right of it;\n\
+(2) A cell array of two strings, to skip everything between the first and\n\
+second strings.\n\
+Comments are only parsed where whitespace is accepted, and do not act as\n\
+delimiters.\n\
+\n\
+@item @qcode{\"Delimiter\"}\n\
+If @var{value} is a string, any character in @var{value} will be used to\n\
+split @var{str} into words.\n\
+If @var{value} is a cell array of strings,\n\
+any string in the array will be used to split @var{str} into words.\n\
+(default value = any whitespace.)\n\
+\n\
+@item @qcode{\"EmptyValue\"}\n\
+Value to return for empty numeric values in non-whitespace delimited data.\n\
+The default is NaN@.\n\
+When the data type does not support NaN (int32 for example),\n\
+then default is zero.\n\
+\n\
+@item @qcode{\"EndOfLine\"}\n\
+@var{value} can be either a emtpy or one character specifying the\n\
+end of line character, or the pair\n\
+@qcode{\"@xbackslashchar{}r@xbackslashchar{}n\"} (CRLF).\n\
+In the latter case, any of\n\
+@qcode{\"@xbackslashchar{}r\"}, @qcode{\"@xbackslashchar{}n\"} or\n\
+@qcode{\"@xbackslashchar{}r@xbackslashchar{}n\"} is counted as a (single)\n\
+newline.\n\
+If no value is given, @qcode{\"@xbackslashchar{}r@xbackslashchar{}n\"} is\n\
+used.\n\
+@c If set to \"\" (empty string) EOLs are ignored as delimiters and added\n\
+@c to whitespace.\n\
+\n\
+@c When reading from a character string, optional input argument @var{n}\n\
+@c specifies the number of times @var{format} should be used (i.e., to limit\n\
+@c the amount of data read).\n\
+@c When reading from file, @var{n} specifies the number of data lines to read;\n\
+@c in this sense it differs slightly from the format repeat count in strread.\n\
+\n\
+@item @qcode{\"HeaderLines\"}\n\
+The first @var{value} number of lines of @var{fid} are skipped.\n\
+Note that this does not refer to the first non-comment lines, but the first\n\
+lines of any type.\n\
+\n\
+@item @qcode{\"MultipleDelimsAsOne\"}\n\
+If @var{value} is non-zero,\n\
+treat a series of consecutive delimiters, without whitespace in between,\n\
+as a single delimiter.\n\
+Consecutive delimiter series need not be vertically @qcode{\"aligned\"}.\n\
+Without this option, a single delimiter before the end of the line does\n\
+not cause the line to be considered to end with an empty value,\n\
+but a single delimiter at the start of a line causes the line\n\
+to be considered to start with an empty value.\n\
+\n\
+@item @qcode{\"TreatAsEmpty\"}\n\
+Treat single occurrences (surrounded by delimiters or whitespace) of the\n\
+string(s) in @var{value} as missing values.\n\
+\n\
+@item @qcode{\"ReturnOnError\"}\n\
+If set to numerical 1 or true, return normally as soon as an error\n\
+is encountered, such as trying to read a string using @qcode{%f}.\n\
+If set to 0 or false, return an error and no data.\n\
+If set to \"continue\" (default), textscan attempts to continue reading\n\
+beyond the location; however, this may cause the parsing to get out of sync.\n\
+\n\
+@item @qcode{\"Whitespace\"}\n\
+Any character in @var{value} will be interpreted as whitespace and trimmed;\n\
+The default value for whitespace is\n\
+@c Note: the next line specifically has a newline which generates a space\n\
+@c       in the output of qcode, but keeps the next line < 80 characters.\n\
+@qcode{\"\n\
+@xbackslashchar{}b@xbackslashchar{}r@xbackslashchar{}n@xbackslashchar{}t\"}\n\
+(note the space).  Unless whitespace is set to @qcode{\"\"} (empty) AND at\n\
+least one @qcode{\"%s\"} format conversion specifier is supplied, a space is\n\
+always part of whitespace.\n\
+\n\
+@end table\n\
+\n\
+When the number of words in @var{str} or @var{fid} doesn't match an exact\n\
+multiple of the number of format conversion specifiers,\n\
+textscan's behavior depends on\n\
+whether the last character of the string or file is\n\
+an end-of-line as specified by the EndOfLine option:\n\
+\n\
+@table @asis\n\
+@item last character = end-of-line\n\
+Data columns are padded with empty fields, NaN or 0 (for integer fields)\n\
+so that all columns have equal length\n\
+\n\
+@item last character is not end-of-line\n\
+Data columns are not padded; textscan returns columns of unequal length\n\
+@end table\n\
+\n\
+\n\
+The second output, @var{position}, provides the position, in characters\n\
+from the beginning of the file or string, at which the processing stopped.\n\
+\n\
+@seealso{dlmread, fscanf, load, strread, textread}\n\
+@end deftypefn")
+{
+  octave_value_list retval;
+  std::string format;
+  int params = 1;
+
+  if (args.length () < 1)
+    print_usage ();
+  else if (args.length () == 1)
+    format = "%f";      // ommited format = %f.  explicit "" = width from file
+  else if (args(1).is_string ())
+    {
+      format = args(1).string_value ();
+      if (args(1).is_sq_string ())
+        format = do_string_escapes (format);
+      params++;
+    }
+  else
+    error ("textscan: FORMAT must be a string, not <%s>",
+           args(1).class_name ().c_str ());
+
+  octave_idx_type ntimes = -1;
+  textscan tscanner;
+
+  if (args.length () >= 3)
+    {
+      if (args(2).is_numeric_type ())
+        {
+          ntimes = args(2).idx_type_value ();
+          if (ntimes < args(2).double_value ())
+            error ("textscan: REPEAT = %g is too large",
+                   args(2).double_value ());
+          params = 3;
+        }
+    }
+  textscan_format_list fmt_list (format);
+
+  tscanner.parse_options (args, params, fmt_list);
+
+  if (args(0).is_string ())
+    {
+      std::istringstream is (args(0).string_value ());
+      octave_value tmp = tscanner.scan (&is, fmt_list, ntimes);
+      retval(0) = tmp;
+
+      std::ios::iostate state = is.rdstate ();
+      is.clear ();
+      retval(1) = octave_value (static_cast<long>(is.tellg ()));
+      is.setstate (state);
+    }
+  else
+    {
+      octave_stream os = octave_stream_list::lookup (args(0), "textscan");
+      octave_value tmp = tscanner.scan (os.input_stream (), fmt_list, ntimes);
+
+      retval(0) = tmp;
+      // FIXME -- warn if stream is not opened in binary mode?
+      std::ios::iostate state = os.input_stream ()->rdstate ();
+      os.input_stream ()->clear ();
+      retval(1) = os.tell ();
+      os.input_stream ()->setstate (state);
+    }
+
+  return retval;
+}
+
+/********************/
+/* Calculate x^n */
+/* Used for ...e+nn  so that, for example, 1e2 is exactly 100 and 5e-1 is 1/2 */
+double pown (double x, unsigned int n)
+{
+  double retval = 1;
+
+  for (unsigned int d = n; d; d >>= 1)
+    {
+      if (d & 1)
+        retval *= x;
+      x *= x;
+    }
+
+  return retval;
+}
+
+/********************/
+/* Read a double considering the "precision" field of  fmt  and the */
+/* exp_chars  option of  options. */
+double
+textscan::read_double (dstr& is, const textscan_format_elt& fmt) const
+{
+  int    sign = 1;
+  unsigned int    width_left = fmt.width;
+  double retval  = 0;
+  bool   valid = false;         // syntactically correct double?
+
+  int    ch = is.peek ();
+
+  if (ch == '+')
+    {
+      is.get ();
+      ch = is.peek ();
+      if (width_left)
+        width_left--;
+    }
+  else if (ch == '-')
+    {
+      sign = -1;
+      is.get ();
+      ch = is.peek ();
+      if (width_left)
+        width_left--;
+    }
+
+  // Read integer part
+  if (ch != '.')
+    {
+      if (ch >= '0' && ch <= '9')       // valid if at least one digit
+        valid = true;
+      while (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
+        retval = retval * 10 + (ch - '0');
+      width_left++;
+    }
+  // Read fractional part, up to specified precision
+  if (ch == '.' && width_left)
+    {
+      double multiplier = 1;
+      int precision = fmt.prec;
+      int i;
+
+      if (width_left)
+        width_left--;                  // Consider width of '.'
+
+      if (precision == -1)
+        precision = 1<<30;           // FIXME Should be MAXINT
+      if (!valid)                    // if there was nothing before '.'...
+        is.get ();                   // ...ch was a "peek", not "get".
+
+      for (i = 0; i < precision; i++)
+        {
+          if (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
+            retval += (ch - '0') * (multiplier *= 0.1);
+          else
+            {
+              width_left++;
+              break;
+            }
+        }
+
+      // round up if we truncated and the next digit is >= 5
+      if ((i == precision || !width_left) && (ch = is.get ()) >= '5'
+                                              && ch <= '9')
+        retval += multiplier;
+
+      if (i > 0)
+        valid = true;           // valid if at least one digit after '.'
+
+      // skip remainder after '.', to field width, to look for exponent
+      if (i == precision)
+        while (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
+          ;  /* discard */
+      width_left++;
+    }
+
+  // look for exponent part in, e.g.,  6.023E+23
+  const char *ec = exp_chars.c_str ();
+  bool used_exp = false;
+  if (valid && width_left > 1 && strchr (ec, ch))
+    {
+      int ch1 = is.peek ();
+      if (ch1 == '-' || ch1 == '+' || (ch1 >= '0' && ch1 <= '9'))
+        {          // if 1.0e+$ or some such, this will set failbit, as we want
+          width_left--;                         // count "E"
+          int exp = 0;
+          int exp_sign = 1;
+          if (ch1 == '+')
+            {
+              if (width_left)
+                width_left--;
+              is.get ();
+            }
+          else if (ch1 == '-')
+            {
+              exp_sign = -1;
+              is.get ();
+              if (width_left)
+                width_left--;
+            }
+          valid = false;
+          while (width_left-- && is && (ch = is.get ()) >= '0' && ch1 <= '9')
+            {
+              exp = exp*10 + ch - '0';
+              valid = true;
+            }
+          width_left++;
+          if (ch != EOF && width_left)
+            is.putback (ch);
+
+          double multiplier = pown (10, exp);
+          if (exp_sign > 0)
+            retval *= multiplier;
+          else
+            retval /= multiplier;
+
+          used_exp = true;
+        }
+    }
+  is.clear ();
+  if (!used_exp && ch != EOF && width_left)
+    is.putback (ch);
+
+  // Check for +/- inf and NaN
+  if (!valid && width_left >= 3)
+    {
+      int i = lookahead (is, inf_nan, 3, false);   // false -> case insensitive
+      if (i == 0)
+        {
+          retval = octave_Inf;
+          valid = true;
+        }
+      else if (i == 1)
+        {
+          retval = octave_NaN;
+          valid = true;
+        }
+    }
+
+  // Check for +/- inf and NaN
+  if (!valid && width_left >= 3)
+    {
+      int i = lookahead (is, inf_nan, 3, false);   // false -> case insensitive
+      if (i == 0)
+        {
+          retval = octave_Inf;
+          valid = true;
+        }
+      else if (i == 1)
+        {
+          retval = octave_NaN;
+          valid = true;
+        }
+    }
+
+  if (!valid)
+    is.setstate (std::ios::failbit);
+  else
+    is.setstate (is.rdstate () & ~std::ios::failbit);
+
+  return retval * sign;
+}
+
+/********************/
+// Read a single number: real, complex, inf, NaN,
+// possibly with limited precision.
+// Calls to this should be preceded by skip_whitespace.
+// Calling that inside scan_complex would violate its const declaration.
+void
+textscan::scan_complex (dstr& is, const textscan_format_elt& fmt,
+                        Complex& val) const
+{
+  double im = 0;
+  double re = 0;
+  bool as_empty = false;   // did we fail but match a "treat_as_empty" string?
+  bool inf = false;
+
+  int ch = is.peek ();
+  if (ch == '+' || ch == '-')   // check for [+-][ij] with no coefficients
+    {
+      ch = is.get ();
+      int ch2 = is.peek ();
+      if (ch2 == 'i' || ch2 == 'j')
+        {
+          double value = 1;
+          is.get ();
+          // Check not -inf
+          if (is.peek () == 'n')
+            {
+              char *pos = is.tellg ();
+              std::ios::iostate state = is.rdstate ();
+
+              is.get ();
+              ch2 = is.get ();
+              if (ch2 == 'f')
+                {
+                  inf = true;
+                  re = (ch == '+') ? octave_Inf : -octave_Inf;
+                  value = 0;
+                }
+              else
+                {
+                  is.clear (state);
+                  is.seekg (pos);   // reset to position before look-ahead
+                }
+            }
+
+          im = (ch == '+') ? value : -value;
+        }
+      else
+        is.putback (ch);
+    }
+
+  if (!im && !inf)        // if not [+-][ij] or [+-]inf, read real normally
+    {
+      char *pos = is.tellg ();
+      std::ios::iostate state = is.rdstate ();
+      //re = octave_read_value<double> (is);
+      re = read_double (is, fmt);
+
+      // check for "treat as empty" string
+      if (treat_as_empty.numel ()
+          && (is.fail () || octave_is_NaN_or_NA (Complex (re))
+              || re == octave_Inf))
+        {
+
+          for (int i = 0; i < treat_as_empty.numel (); i++)
+            {
+              if (ch == treat_as_empty (i).string_value ()[0])
+                {
+                  as_empty = true;   // first char matches, so read the lot
+                  break;
+                }
+            }
+          if (as_empty)              // if first char matched...
+            {
+              as_empty = false;      // ...look for the whole string
+
+              is.clear (state);      // treat_as_empty "-" causes partial read
+              is.seekg (pos);        // reset to position before failed read
+
+              // treat_as_empty strings may be different sizes.
+              // Read ahead longest, put it all back, then re-read the string
+              // that matches.
+              char *look, look_buf [treat_as_empty_len + 1];
+                // prefill, in case EOF means part-filled.
+              memset (look_buf, '\0', treat_as_empty_len);
+              look = is.read (look_buf, treat_as_empty_len, pos);
+
+              is.clear (state);
+              is.seekg (pos);        // reset to position before look-ahead
+                                     // FIXME -- is.read could invalidate pos
+
+              for (int i = 0; i < treat_as_empty.numel (); i++)
+                {
+                  std::string s = treat_as_empty (i).string_value ();
+                  if (!strncmp (s.c_str (), look, s.size ()))
+                    {
+                      as_empty = true;
+                                     // read just the right amount
+                      is.read (look_buf, s.size (), pos);
+                      break;
+                    }
+                }
+            }
+        }
+
+      if (!is.eof () && !as_empty)
+        {
+          state = is.rdstate ();        // before tellg, since that fails at EOF
+          pos = is.tellg ();
+          ch = is.peek ();   // ch == EOF if read failed; no need to chk fail
+          if (ch == 'i' || ch == 'j')           // pure imaginary
+            {
+              is.get ();
+              im = re;
+              re = 0;
+            }
+          else if (ch == '+' || ch == '-')      // see if it is real+imag[ij]
+            {
+              // save stream state in case we have to restore it
+              pos   = is.tellg ();
+              state = is.rdstate ();
+
+              //im = octave_read_value<double> (is);
+              im = read_double (is, fmt);
+              if (is.fail ())
+                im = 1;
+
+              if (is.peek () == 'i' || is.peek () == 'j')
+                is.get ();
+              else
+                {
+                  im = 0;           // no valid imaginary part.  Restore state
+                  is.clear (state); // eof shouldn't cause fail.
+                  is.seekg (pos);
+                }
+            }
+          else if (is.eof ())       // we've read enough to be a "valid" read
+            is.clear (state);       // failed peek shouldn't cause fail
+        }
+    }
+  if (as_empty)
+    val = empty_value.scalar_value ();
+  else
+    val = Complex (re, im);
+}
+
+/********************/
+// Return in  val  the run of characters from  is  NOT contained in  pattern.
+int
+textscan::scan_caret (dstr& is, const char *pattern, std::string& val)
+          const
+{
+  int c1 = EOF;
+  std::ostringstream obuf;              // Is this optimised for growing?
+
+  while (is && (c1 = (is && !is.eof ()) ? is.get_undelim () : EOF) != EOF
+         && !strchr (pattern, c1))
+    obuf << static_cast<char> (c1);
+
+  val = obuf.str ();
+
+  if (c1 != EOF)
+    is.putback (c1);
+
+  return c1;
+}
+
+/********************/
+// Read until one of the strings in  delimiters  is found.
+// For efficiency,  ends  is a list of the last character of each delimiter
+std::string
+textscan::read_until (dstr& is, const Cell& delimiters,
+                     const std::string& ends) const
+{
+  std::string retval ("");
+  bool done = false;
+  do
+    {                               // find sequence ending with an ending char
+      std::string next;
+      scan_caret (is, ends.c_str (), next);
+      retval = retval + next;   // FIXME -- could use repeated doubling of size
+
+      int last = (!is.eof ()) ? is.get_undelim () : EOF;
+      if (last != EOF)
+        {
+          retval = retval + static_cast<char> (last);
+          for (int i = 0; i < delimiters.numel (); i++)
+            {
+              std::string delim = delimiters(i).string_value ();
+              int start = retval.length () - delim.length ();
+              if (start < 0)
+                start = 0;
+              std::string may_match = retval.substr (start);
+              if (may_match == delim)
+                {
+                  done = true;
+                  retval = retval.substr (0, start);
+                  break;
+                }
+            }
+        }
+    }
+  while (!done && is && !is.eof ());
+
+  return retval;
+}
+
+
+/********************/
+// Read stream until either fmt.width chars have been read, or options.delimiter
+// has been found
+// Does *not* rely on fmt being 's' -- used by formats like %6f to limit to 6
+void
+textscan::scan_string (dstr& is, const textscan_format_elt& fmt, std::string& val) const
+{
+  if (delim_list.numel () == 0)
+    {
+      unsigned int i = 0;
+      unsigned int width = fmt.width;
+
+      for (i = 0; i < width; i++)
+        {
+          if (i+1 > val.length ())
+            val = val + val + ' ';      // grow even if empty
+          int ch = is.get ();
+          if (is_delim (ch) || ch == EOF)
+            {
+              is.putback (ch);
+              break;
+            }
+          else
+            val[i] = ch;
+        }
+      val = val.substr (0, i);          // trim pre-allocation
+    }
+  else  // Cell array of multi-character delimiters
+    {
+      std::string ends ("");
+      for (int i = 0; i < delim_list.numel (); i++)
+        {
+          std::string tmp = delim_list(i).string_value ();
+          ends += tmp.substr (tmp.length () - 1);
+        }
+      val = textscan::read_until (is, delim_list, ends);
+    }
+}
+
+/********************/
+// Return in  val  the run of characters from  is  contained in  pattern.
+int
+textscan::scan_bracket (dstr& is, const char *pattern, std::string& val)
+          const
+{
+  int c1 = EOF;
+  std::ostringstream obuf;              // Is this optimised for growing?
+
+  while (is && strchr (pattern, (c1 = is.get_undelim ())))
+    obuf << static_cast<char> (c1);
+
+  val = obuf.str ();
+  if (c1 != EOF)
+    is.putback (c1);
+  return c1;
+}
+
+/********************/
+// Return in  val  a string, either delimited by whitespace/delimiters, or
+// enclosed in a pair of double quotes ("...").  Enclosing quotes are removed.
+// A consecutive pair "" is inserted into  val  as a single ".
+void
+textscan::scan_qstring (dstr& is, const textscan_format_elt& fmt,
+                        std::string& val)
+{
+  skip_whitespace (is);
+
+  if (is.peek () != '\"')
+    scan_string (is, fmt, val);
+  else
+    {
+      is.get ();
+      scan_caret (is, "\"", val);       // read everything until "
+      is.get ();                        // swallow "
+
+      while (is && is.peek () == '\"')  // if double ", insert one in stream,
+        {                               // and keep looking for single "
+          is.get ();
+          std::string val1;
+          scan_caret (is, "\"", val1);
+          val = val + "\"" + val1;
+          is.get_undelim ();
+        }
+    }
+}
+
+/********************/
+// Read from  is  into  val  a string of the next  fmt.width  characters,
+// including any whitespace or delimiters.
+void
+textscan::scan_cstring (dstr& is, const textscan_format_elt& fmt,
+                        std::string& val) const
+{
+  int ch;
+  val.resize (fmt.width);
+  for (unsigned int i = 0; is && i < fmt.width; i++)
+    if ((ch = is.get_undelim ()) != EOF)
+      val[i] = ch;
+    else
+      {
+        val.resize (i);
+        break;
+      }
+}
+
+
+/**
+ *  Read a single '%...' conversion and place it in position  row  of  ov.
+ */
+void
+textscan::scan_one (dstr& is, const textscan_format_elt& fmt,
+                    octave_value& ov, Array<octave_idx_type> row)
+{
+  skip_whitespace (is);
+
+  is.clear ();
+
+  octave_value val;
+  if (fmt.numeric)
+    {
+      if (fmt.type == 'f' || fmt.type == 'n')
+        {
+          Complex v;
+          skip_whitespace (is);
+          scan_complex (is, fmt, v);
+
+          if (!fmt.discard && !is.fail ())
+            {
+              if (fmt.bitwidth == 64)
+                {
+                  if (ov.is_real_type () && v.imag () == 0)
+                    ov.internal_rep ()->fast_elem_insert (row(0), v.real ());
+                  else
+                    {
+                      if (ov.is_real_type ())  // cat does type conversion
+                        ov = do_cat_op (ov, octave_value (v), row);
+                      else
+                        ov.internal_rep ()->fast_elem_insert (row(0), v);
+                    }
+                }
+              else
+                {
+                  if (ov.is_real_type () && v.imag () == 0)
+                    ov.internal_rep ()->fast_elem_insert (row(0),
+                                                         float (v.real ()));
+                  else
+                    {
+                      if (ov.is_real_type ())  // cat does type conversion
+                        ov = do_cat_op (ov, octave_value (v), row);
+                      else
+                        ov.internal_rep ()->fast_elem_insert (row(0),
+                                                             FloatComplex (v));
+                    }
+                }
+            }
+        }
+      else
+        {
+          double v;    // Matlab docs say 1e30 etc should be valid for %d and
+                       // 1000 as a %d8 should be 127, so read as double.
+                       // Some loss of precision for d64 and u64.
+          skip_whitespace (is);
+          v = read_double (is, fmt);
+          if (!fmt.discard && !is.fail ())
+            switch (fmt.bitwidth)
+              {
+                case 64:
+                  switch (fmt.type)
+                    {
+                      case 'd':
+                        {
+                          octave_int64 vv = v;
+                          ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                        }
+                        break;
+                      case 'u':
+                        {
+                          octave_uint64 vv = v;
+                          ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                        }
+                        break;
+                    }
+                  break;
+                case 32:
+                  switch (fmt.type)
+                    {
+                      case 'd':
+                        {
+                          octave_int32 vv = v;
+                          ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                        }
+                        break;
+                      case 'u':
+                        {
+                          octave_uint32 vv = v;
+                          ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                        }
+                        break;
+                    }
+                  break;
+                case 16:
+                  if (fmt.type == 'd')
+                    {
+                      octave_int16 vv = v;
+                      ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                    }
+                  else
+                    {
+                      octave_uint16 vv = v;
+                      ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                    }
+                  break;
+                case 8:
+                  if (fmt.type == 'd')
+                    {
+                      octave_int8 vv = v;
+                      ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                    }
+                  else
+                    {
+                      octave_uint8 vv = v;
+                      ov.internal_rep ()->fast_elem_insert (row(0), vv);
+                    }
+                  break;
+              }
+        }
+      if (is.fail ())
+        {
+          if (!fmt.discard)
+            ov = do_cat_op (ov, empty_value, row);
+
+          // If we are continuing after errors, skip over this field
+          if (return_on_error == 2)
+            {
+              std::ios::iostate state = is.rdstate ();
+              is.clear ();          // clear to allow read pointer to advance
+
+              std::string dummy;
+              scan_string (is, fmt, dummy);
+
+              is.setstate (state);
+            }
+        }
+
+    }
+  else
+    {
+      std::string vv ("        ");      // initial buffer.  Grows as needed
+      switch (fmt.type)
+        {
+          case 's':
+            scan_string (is, fmt, vv);
+            break;
+          case 'q':
+            scan_qstring (is, fmt, vv);
+            break;
+          case 'c':
+            scan_cstring (is, fmt, vv);
+            break;
+          case '[':
+            scan_bracket (is, fmt.char_class.c_str (), vv);
+            break;
+          case '^':
+            scan_caret   (is, fmt.char_class.c_str (), vv);
+            break;
+        }
+      if (!fmt.discard)
+        ov.internal_rep ()->fast_elem_insert (row (0),
+                                              Cell (octave_value (vv)));
+        // FIXME -- why does failbit get set at EOF, instead of eofbit?
+      if (vv.length () != 0)
+        is.clear (is.rdstate () & ~std::ios_base::failbit);
+    }
+  is.field_done ();
+}
+
+#include "builtin-defun-decls.h"
+/**
+ *  Read data corresponding to the entire format string once,
+ *  placing the values in row  row of retval.
+ */
+int
+textscan::read_format_once (dstr& is,
+                            textscan_format_list& fmt_list,
+                            std::list<octave_value> & retval,
+                            Array<octave_idx_type> row, int& done_after)
+{
+  const textscan_format_elt *elem = fmt_list.first ();
+  std::list<octave_value>::iterator out=retval.begin ();
+  bool no_conversions = true;
+  bool done = false;
+  int i;
+  bool conversion_failed = false;       // Record for ReturnOnError
+  bool this_conversion_failed;          // Record for ReturnOnError
+
+  octave_quit (); 		        // Allow ctrl-C
+
+  for (i = 0; i < fmt_list.numel (); i++)
+    {
+      this_conversion_failed = false;
+      is.clear ();           // clear fail of previous numeric conversions
+      switch (elem->type)
+        {
+          case 'C':
+          case 'D':
+            std::cerr << "textscan: Conversion %" << elem->type
+                      << " not yet implemented\n";
+            break;
+
+          case 'u':
+          case 'd':
+          case 'f':
+          case 'n':
+          case 's':
+          case '[':
+          case '^':
+          case 'q':
+          case 'c':
+            scan_one (is, *elem, *out, row);
+            break;
+
+          case textscan_format_elt::literal_conversion :
+            match_literal (is, *elem);
+            break;
+          default:
+            error ("Unknown format element '%c'", elem->type);
+        }
+
+      if (!is.fail ())
+        {
+          if (!elem->discard)
+            no_conversions = false;
+        }
+      else
+        {
+          if (return_on_error < 2)
+            this_conversion_failed = true;
+          is.clear (is.rdstate () & ~std::ios::failbit);
+        }
+
+      if (!elem->discard)
+        out++;
+
+      elem = fmt_list.next ();
+      char *pos = is.tellg ();
+      // FIXME -- these conversions "ignore delimiters".  Should they include
+      // delimiters at the start of the conversion, or can those be skipped?
+      if (elem->type != textscan_format_elt::literal_conversion
+          // && elem->type != '[' && elem->type != '^' && elem->type != 'c'
+         )
+        skip_delim (is);
+      if (this_conversion_failed)
+        {
+          if (is.tellg () == pos && ! conversion_failed)
+            {                 // done_after = first failure
+              done_after = i; // note fail, but parse others to get empty_val
+              conversion_failed = true;
+            }
+          else
+            this_conversion_failed = false;
+        }
+
+      if (is.eof ())
+        {
+          if (!done)
+            done_after = i+1;
+          done = true;        // note EOF, but process others to get empty_val
+        }
+    }
+  if (done)
+    is.setstate (std::ios::eofbit);
+
+    // returning -3 means "error, and no columns read this row
+  if (is.eof ())
+    return (2 + no_conversions);
+
+  if (conversion_failed)
+    return (4 + no_conversions);
+
+  return 0;
+}
+
+/********************/
+void
+textscan::parse_options (const octave_value_list& args, int first_param,
+                         textscan_format_list& fmt_list)
+{
+  int last = args.length ();
+  int n = last - first_param;
+
+  if (n & 1)
+    error ("textscan: %d parameters given, but only %d values", n-n/2, n/2);
+
+  delim_len = 1;
+  bool have_delims = false;
+  for (int i = first_param; i < last; i += 2)
+    {
+      if (!args(i).is_string ())
+        error ("textscan: Invalid paramter type <%s> for parameter %d",
+               args(i).type_name ().c_str (), (i-first_param)/2 + 1);
+
+      std::string param = args(i).string_value ();
+      std::transform (param.begin (), param.end (),
+                      param.begin (), ::tolower);
+      if (param == "delimiter")
+        {
+          bool invalid = true;
+          if (args(i+1).is_string ())
+            {
+              invalid = false;
+              have_delims = true;
+              delims = args(i+1).string_value ();
+            }
+          else if (args(i+1).is_cell ())
+            {
+              invalid = false;
+              delim_list = args(i+1).cell_value ();
+              delim_table = " "; // non-empty, to flag non-default delim
+
+              // Check that all elements are strings, and find max length
+              for (int j = 0; j < delim_list.numel (); j++)
+                {
+                  if (!delim_list(j).is_string ())
+                    invalid = true;
+                  else
+                    {
+                      octave_idx_type len = delim_list(j).string_value ()
+                                                         .length ();
+                      delim_len = std::max (static_cast<int>(len), delim_len);
+                    }
+                }
+            }
+          if (invalid)
+            error ("textscan:  Delimiters must be either a string or "
+                   "cell array of strings");
+        }
+      else if (param == "commentstyle")
+        {
+          if (args(i+1).is_string ())
+            {   // check here for names like "C++", "C", "shell", ...?
+              comment_style = Cell (args(i+1));
+            }
+          else if (args(i+1).is_cell ())
+            {
+              comment_style = args(i+1).cell_value ();
+              int len = comment_style.numel ();
+              if ((len >= 1 && !comment_style (0).is_string ())
+                  || (len >= 2 && !comment_style (1).is_string ())
+                  || (len >= 3))
+                error ("textscan: CommentStyle must be either a string or "
+                       "cell array of one or two strings");
+            }
+          else
+            error ("textscan:  CommentStyle must be either a string"
+                   " or cell array of one or two strings, not <%s>",
+                   args(i+1).class_name ().c_str ());
+
+          // How far ahead do we need to look to detect an open comment
+          // and which character do we look for?
+          if (comment_style.numel () >= 1)
+            {
+              comment_len  = comment_style (0).string_value ().size ();
+              comment_char = comment_style (0).string_value ()[0];
+            }
+        }
+      else if (param == "treatasempty")
+        {
+          bool invalid = false;
+          if (args(i+1).is_string ())
+            {
+              treat_as_empty = Cell (args(i+1));
+              treat_as_empty_len = args(i+1).string_value ().size ();
+            }
+          else if (args(i+1).is_cell ())
+            {
+              treat_as_empty = args(i+1).cell_value ();
+              for (int j = 0; j < treat_as_empty.numel (); j++)
+                if (!treat_as_empty (j).is_string ())
+                  invalid = true;
+                else
+                  {
+                    int k = treat_as_empty (j).string_value ().size ();
+                    if (k > treat_as_empty_len)
+                      treat_as_empty_len = k;
+                  }
+            }
+          if (invalid)
+            error ("textscan:  TreatAsEmpty must be either a string or "
+                   "cell array of one or two strings");
+
+          // FIXME Ensure none is a prefix of a later one. Sort by length?
+        }
+      else if (param == "collectoutput")
+        {
+          if (args(i+1).is_numeric_type ())
+            collect_output = args(i+1).bool_value ();
+          else
+            error ("textscan:  CollectOutput must be logical or numeric");
+        }
+      else if (param == "emptyvalue")
+        {
+          if (args(i+1).is_numeric_type ())
+            empty_value = args(i+1).scalar_value ();
+          else
+            error ("textscan: EmptyValue must be numeric, not <%s>",
+                   args(i+1).class_name ().c_str ());
+        }
+      else if (param == "headerlines")
+        {
+          if (args(i+1).is_numeric_type ())
+            header_lines = args(i+1).scalar_value ();
+          else
+            error ("textscan: HeaderLines must be numeric");
+        }
+      else if (param == "bufsize")
+        {
+          if (args(i+1).is_numeric_type ())
+            buffer_size = args(i+1).scalar_value ();
+          else
+            error ("textscan: BufSize must be numeric");
+        }
+      else if (param == "multipledelimsasone")
+        {
+          if (args(i+1).is_numeric_type ())
+            {
+              if (args(i+1).bool_value ())
+                multiple_delims_as_one = true;
+            }
+          else
+            error ("textscan: MultipleDimsAsOne must be logical or numeric");
+        }
+      else if (param == "returnonerror")
+        {
+          if (args(i+1).is_numeric_type ())
+            return_on_error = args(i+1).bool_value ();
+          else if (args(i+1).is_string ()
+                   && args(i+1).string_value () == "continue")
+            return_on_error = 2;
+          else
+            error ("textscan: ReturnOnError must be logical or "
+                   "numeric, or 'continue'");
+        }
+      else if (param == "whitespace")
+        {
+          if (args(i+1).is_string ())
+            whitespace = args(i+1).string_value ();
+          else
+            error ("textscan: Whitespace must be a character string");
+        }
+      else if (param == "expchars")
+        {
+          if (args(i+1).is_string ())
+            {
+              exp_chars = args(i+1).string_value ();
+              default_exp = false;
+            }
+          else
+            error ("textscan: ExpChars must be a character string");
+        }
+      else if (param == "endofline")
+        {
+          bool valid = true;
+          if (args(i+1).is_string ())
+            {
+              std::string s = args(i+1).string_value ();
+              if (args(i+1).is_sq_string ())
+                s = do_string_escapes (s);
+              int l = s.length ();
+              if (l == 0)
+                eol1 = eol2 = -2;
+              else if (l == 1)
+                eol1 = eol2 = s.c_str ()[0];
+              else if (l == 2)
+                {
+                  eol1 = s.c_str ()[0];
+                  eol2 = s.c_str ()[1];
+                  if (eol1 != '\r' || eol2 != '\n')    // Why limit it?
+                    valid = false;
+                }
+              else
+                valid = false;
+            }
+          else
+            valid = false;
+          if (!valid)
+            error ("textscan: EndOfLine must be at most one character "
+                   "or '\\r\\n'");
+        }
+      else
+        error ("textscan: Unrecognised option '%s'", param.c_str ());
+    }
+
+  whitespace_table = std::string (256, '\0');
+  for (unsigned int i = 0; i < whitespace.length (); i++)
+    whitespace_table[whitespace[i]] = '1';
+
+  // For Matlab compatibility, add 0x20 to whitespace, unless
+  // whitespace is explicitly ignored.
+  if (! (whitespace.length () == 0 && fmt_list.has_string))
+    whitespace_table[' '] = '1';
+
+  // Create look-up table of delimiters, based on 'delimiter'
+  delim_table = std::string (256, '\0');
+  delim_table[eol1] = '1';        // EOL is always a delimiter
+  delim_table[eol2] = '1';        // EOL is always a delimiter
+  if (!have_delims)
+    for (unsigned int i = 0; i < 256; i++)
+      {
+        if (isspace (i))
+          delim_table[i] = '1';
+      }
+  else
+    for (unsigned int i = 0; i < delims.length (); i++)
+      delim_table[delims[i]] = '1';
+}
+
+/********************/
+// skip comments, and characters specified by the "Whitespace" option
+// If EOLstop==true, doesn't skip end of line
+int
+textscan::skip_whitespace (dstr& is, bool EOLstop)
+{
+  int c1 = EOF;
+  bool found_comment = false;
+
+  do
+    {
+      found_comment = false;
+      int prev = -1;
+      while (is && (c1 = is.get_undelim ()) != EOF
+             && ( ( (c1 == eol1 || c1 == eol2) && ++lines && !EOLstop)
+                  || isspace (c1)))
+        {
+          if (prev == eol1 && eol1 != eol2 && c1 == eol2)
+            lines--;
+          prev = c1;
+        }
+
+      if (c1 == comment_char)           // see if we match an open comment
+        {
+          // save stream state in case we have to restore it
+          char *pos   = is.tellg ();
+          std::ios::iostate state = is.rdstate ();
+
+          char *look, tmp [comment_len];
+          look = is.read (tmp, comment_len-1, pos);   // already read first char
+          if (is && !strncmp (comment_style(0).string_value ().substr (1)
+                              .c_str (), look, comment_len-1))
+            {
+              found_comment = true;
+
+              std::string dummy;
+              char eol [3] = {static_cast<char> (eol1),
+                              static_cast<char> (eol2),
+                              '\0'};
+              if (comment_style.numel () == 1)  // skip to end of line
+                {
+                  scan_caret (is, eol, dummy);
+                  c1 = is.get_undelim ();
+                  if (c1 == eol1 && eol1 != eol2 && is.peek_undelim () == eol2)
+                    is.get_undelim ();
+                  lines++;
+                }
+              else      // matching pair
+                {
+                  std::string end_c = comment_style(1).string_value ();
+                        // last char of end-comment sequence
+                  char last[2] = {*(end_c.substr (end_c.length ()-1).c_str ()),
+                                  '\0'};
+                  std::string may_match ("");
+                  do
+                    {           // find sequence ending with last char
+                      scan_caret (is, last, dummy);
+                      is.get_undelim ();        // (read   last  itself)
+
+                      may_match = may_match + dummy + *last;
+                      int start = may_match.length () - end_c.length ();
+                      if (start < 0)
+                        start = 0;
+                      may_match = may_match.substr (start);
+                    }
+                  while (may_match != end_c && is && !is.eof ());
+                }
+            }
+          else  // wasn't really a comment; restore state
+            {
+              is.clear (state);
+              is.seekg (pos);
+            }
+        }
+    }
+  while (found_comment);
+
+  if (c1 != EOF)
+    is.putback (c1);
+  return c1;
+}
+
+/********************/
+// See if the next few characters match one of the strings in target.
+// For efficiency,  max_len  is the cached longest length of any target.
+// Returns -1 if none is found, or the inde of the match.
+int
+textscan::lookahead (dstr& is, const Cell& targets, int max_len,
+                     bool case_sensitive) const
+{
+  // target strings may be different sizes.
+  // Read ahead longest, put it all back, then re-read the string
+  // that matches.
+
+  char *pos = is.tellg ();
+
+  char *look, tmp [max_len + 1];
+
+  memset (tmp, '\0', max_len); // prefill, in case EOF means part-filled.
+  look = is.read (tmp, max_len, pos);
+
+  is.clear ();
+  is.seekg (pos);              // reset to position before look-ahead
+                               // FIXME  pos may be corrupted by is.read
+
+  int i;
+  int (*compare)(const char *, const char *, size_t);
+  compare = case_sensitive ? strncmp : strncasecmp;
+
+  for (i = 0; i < targets.numel (); i++)
+    {
+      std::string s = targets (i).string_value ();
+      if (!(*compare) (s.c_str (), look, s.size ()))
+        {
+          is.read (tmp, s.size (), pos); // read just the right amount
+          break;
+        }
+    }
+
+  if (i == targets.numel ())
+    i = -1;
+
+  return i;
+}
+
+/********************/
+// Skip delimiters -- multiple if  MultipleDelimsAsOne specified.
+int
+textscan::skip_delim (dstr& is)
+{
+  int c1 = skip_whitespace (is, true);  // 'true': stop once EOL is read
+  if (delim_list.numel () == 0)         // single character delimiter
+    {
+      if (is_delim (c1) || c1 == eol1 || c1 == eol2)
+        {
+          is.get ();
+          if (c1 == eol1 && is.peek_undelim () == eol2)
+            is.get_undelim ();          // if \r\n, skip the \n too.
+
+          if (multiple_delims_as_one)
+            {
+              int prev = -1;
+              // skip multiple delims.
+              // Increment lines for each end-of-line seen; for \r\n, decrement
+              while (is && (c1 = is.get_undelim ()) != EOF
+                     && (((c1 == eol1 || c1 == eol2) && ++lines)
+                         || isspace (c1) || is_delim (c1)))
+                {
+                  if (prev == eol1 && eol1 != eol2 && c1 == eol2)
+                    lines--;
+                  prev = c1;
+                }
+              if (c1 != EOF)
+                is.putback (c1);
+            }
+        }
+    }
+  else                                  // multi-character delimiter
+    {
+      int first_match;
+
+      if (c1 == eol1 || c1 == eol2
+          || (-1 != (first_match = lookahead (is, delim_list, delim_len))))
+        {
+          if (c1 == eol1)
+            {
+              is.get_undelim ();
+              if (is.peek_undelim () == eol2)
+                is.get_undelim ();
+            }
+          else if (c1 == eol2)
+            {
+              is.get_undelim ();
+            }
+
+          if (multiple_delims_as_one)
+            {
+              int prev = -1;
+              // skip multiple delims.
+              // Increment lines for each end-of-line seen; for \r\n, decrement
+              while (is && (c1 = skip_whitespace (is, true)) != EOF
+                     && (((c1 == eol1 || c1 == eol2) && ++lines)
+                         || -1 != lookahead (is, delim_list, delim_len)))
+                {
+                  if (prev == eol1 && eol1 != eol2 && c1 == eol2)
+                    lines--;
+                  prev = c1;
+                }
+            }
+        }
+    }
+
+  return c1;
+}
+
+/********************/
+// Read in as much of the input as coincides with the literal in
+// the format string.
+// Return "true" if the entire literal is matched, else false (and set failbit)
+bool
+textscan::match_literal (dstr& is, const textscan_format_elt& fmt)
+{
+     // "false" -> treat EOL as normal space
+     // since a delimiter at the start of a line is a mismatch, not empty field
+  skip_whitespace (is, false);
+
+  for (unsigned int i = 0; i < fmt.width; i++)
+    {
+      int ch = is.get_undelim ();
+      if (ch != fmt.text[i])
+        {
+          if (ch != EOF)
+            is.putback (ch);
+          is.setstate (std::ios::failbit);
+          return false;
+        }
+    }
+  return true;
+}
+
+
+/********************/
+textscan_format_list::textscan_format_list (const std::string& s)
+  : set_from_first (false), has_string (false), nconv (0), curr_idx (0),
+    list (dim_vector (16, 1)), buf (0)
+{
+  octave_idx_type num_elts = 0;
+
+  size_t n = s.length ();
+
+  size_t i = 0;
+
+  unsigned int width = -1;              // Unspecified width = max (except %c)
+  int prec = -1;
+  int bitwidth = 0;
+  bool discard = false;
+  char type = '\0';
+
+  bool have_more = true;
+
+  if (s.length () == 0)
+    {
+      buf = new std::ostringstream ("%f");
+      bitwidth = 64;
+      type = 'f';
+      add_elt_to_list (width, prec, bitwidth, octave_value (NDArray ()),
+                       discard, type, num_elts);
+      have_more = false;
+      set_from_first = true;
+      nconv = 1;
+    }
+  else
+    {
+      set_from_first = false;
+      while (i < n)
+        {
+          have_more = true;
+
+          if (! buf)
+            buf = new std::ostringstream ();
+
+          if (s[i] == '%' && (i+1 == n || s[i+1] != '%'))
+            {
+              // Process percent-escape conversion type.
+
+              process_conversion (s, i, n, num_elts);
+
+              have_more = (buf != 0);
+            }
+          else if (isspace (s[i]))
+            {
+              while (++i < n && isspace (s[i]))
+                /* skip whitespace */;
+
+              have_more = false;
+            }
+          else
+            {
+              type = textscan_format_elt::literal_conversion;
+
+              width = 0;
+              prec = -1;
+              bitwidth = 0;
+              discard = true;
+
+              while (i < n && ! isspace (s[i])
+                     && (s[i] != '%' || (i+1 < n && s[i+1] == '%')))
+                {
+                  if (s[i] == '%')      // if double %, skip one
+                    i++;
+                  *buf << s[i++];
+                  width++;
+                }
+
+              add_elt_to_list (width, prec, bitwidth, octave_value (), discard,
+                               type, num_elts);
+
+              have_more = false;
+            }
+
+          if (nconv < 0)
+            {
+              have_more = false;
+              break;
+            }
+        }
+    }
+
+  if (have_more)
+    add_elt_to_list (width, prec, bitwidth, octave_value (), discard, type, num_elts);
+
+  list.resize (dim_vector (num_elts, 1));
+
+  delete buf;
+}
+
+/********************/
+textscan_format_list::~textscan_format_list (void)
+{
+  octave_idx_type n = list.numel ();
+
+  for (octave_idx_type i = 0; i < n; i++)
+    {
+      textscan_format_elt *elt = list(i);
+      delete elt;
+    }
+}
+
+/********************/
+void
+textscan_format_list::add_elt_to_list (unsigned int width, int prec,
+                                       int bitwidth, octave_value val_type,
+                                       bool discard, char type,
+                                       octave_idx_type& num_elts,
+                                       const std::string& char_class)
+{
+  if (buf)
+    {
+      std::string text = buf->str ();
+
+      if (! text.empty ())
+        {
+          textscan_format_elt *elt
+            = new textscan_format_elt (text.c_str (), width, prec, bitwidth,
+                                       discard, type, char_class);
+
+          if (num_elts == list.numel ())
+            {
+              list.resize (dim_vector (2 * num_elts, 1));
+            }
+
+          if (!discard)
+            {
+              output_container.push_back (val_type);
+            }
+          list(num_elts++) = elt;
+        }
+
+      delete buf;
+      buf = 0;
+    }
+}
+
+/********************/
+void
+textscan_format_list::process_conversion (const std::string& s, size_t& i,
+                                          size_t n, octave_idx_type& num_elts)
+{
+  unsigned width = 0;
+  int prec = -1;
+  int bitwidth = 0;
+  bool discard = false;
+  octave_value val_type;
+  char type = '\0';
+
+  *buf << s[i++];
+
+  bool have_width = false;
+
+  while (i < n)
+    {
+      switch (s[i])
+        {
+        case '*':
+          if (discard)
+            nconv = -1;
+          else
+            {
+              discard = true;
+              *buf << s[i++];
+            }
+          break;
+
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+          if (have_width)
+            nconv = -1;
+          else
+            {
+              char c = s[i++];
+              width = width * 10 + c - '0';
+              have_width = true;
+              *buf << c;
+              while (i < n && isdigit (s[i]))
+                {
+                  c = s[i++];
+                  width = width * 10 + c - '0';
+                  *buf << c;
+                }
+
+              if (i < n && s[i] == '.')
+                {
+                  *buf << s[i++];
+                  prec = 0;
+                  while (i < n && isdigit (s[i]))
+                    {
+                      c = s[i++];
+                      prec = prec * 10 + c - '0';
+                      *buf << c;
+                    }
+                }
+            }
+          break;
+
+        case 'd': case 'u':
+          {
+            bool done = true;
+            *buf << (type = s[i++]);
+            if (i < n)
+              {
+                if (s[i] == '8')
+                  {
+                    bitwidth = 8;
+                    if (type == 'd')
+                      val_type = octave_value (int8NDArray ());
+                    else
+                      val_type = octave_value (uint8NDArray ());
+                    *buf << s[i++];
+                  }
+                else if (s[i] == '1' && i+1 < n && s[i+1] == '6')
+                  {
+                    bitwidth = 16;
+                    if (type == 'd')
+                      val_type = octave_value (int16NDArray ());
+                    else
+                      val_type = octave_value (uint16NDArray ());
+                    *buf << s[i++];
+                    *buf << s[i++];
+                  }
+                else if (s[i] == '3' && i+1 < n && s[i+1] == '2')
+                  {
+                    done = false;       // use default size below
+                    *buf << s[i++];
+                    *buf << s[i++];
+                  }
+                else if (s[i] == '6' && i+1 < n && s[i+1] == '4')
+                  {
+                    bitwidth = 64;
+                    if (type == 'd')
+                      val_type = octave_value (int64NDArray ());
+                    else
+                      val_type = octave_value (uint64NDArray ());
+                    *buf << s[i++];
+                    *buf << s[i++];
+                  }
+                else
+                  done = false;
+              }
+            else
+              done = false;
+
+            if (!done)
+              {
+                bitwidth = 32;
+                if (type == 'd')
+                  val_type = octave_value (int32NDArray ());
+                else
+                  val_type = octave_value (uint32NDArray ());
+              }
+            goto fini;
+          }
+
+        case 'f':
+          *buf << (type = s[i++]);
+          bitwidth = 64;
+          if (i < n)
+            {
+              if (s[i] == '3' && i+1 < n && s[i+1] == '2')
+                {
+                  bitwidth = 32;
+                  val_type = octave_value (FloatNDArray ());
+                  *buf << s[i++];
+                  *buf << s[i++];
+                }
+              else if (s[i] == '6' && i+1 < n && s[i+1] == '4')
+                {
+                  val_type = octave_value (NDArray ());
+                  *buf << s[i++];
+                  *buf << s[i++];
+                }
+              else
+                val_type = octave_value (NDArray ());
+            }
+          else
+            val_type = octave_value (NDArray ());
+          goto fini;
+
+        case 'n':
+          *buf << (type = s[i++]);
+          bitwidth = 64;
+          val_type = octave_value (NDArray ());
+          goto fini;
+
+        case 's': case 'q': case '[': case 'c':
+          if (!discard)
+            val_type = octave_value (Cell ());
+          *buf << (type = s[i++]);
+          has_string = true;
+          goto fini;
+
+        fini:
+          {
+            if (!have_width)
+              {
+                if (type == 'c')        // %c defaults to one character
+                  width = 1;
+                else
+                  width = static_cast<unsigned int> (-1); // others: unlimited
+              }
+
+            if (finish_conversion (s, i, n, width, prec, bitwidth, val_type,
+                                   discard, type, num_elts) == 0)
+              return;
+          }
+          break;
+
+        default:
+          error ("textscan: '%%%c' is not a valid format specifier", s[i]);
+        }
+
+      if (nconv < 0)
+        break;
+    }
+
+  nconv = -1;
+}
+
+/**
+ * Parse [...] and [^...]
+ * Matlab does not expand expressions like A-Z, but they are useful, and so
+ * we parse them "carefully".  We treat '-' as a usual character unless
+ * both start and end characters are from the same class (upper case, lower
+ * case, numeric), or this is not the first '-' in the pattern.
+ *
+ * Keeps both a running list of characters and a mask of which chars have
+ * occurred.  The first is efficient for patterns with few characters.  The
+ * latter is efficient for [^...] patterns.
+ */
+static std::string
+textscan_char_class (const std::string& pattern)
+{
+  int len = pattern.length ();
+  if (len == 0)
+    return "";
+
+  std::string retval (256, '\0');
+  std::string mask   (256, '\0');       // number of times chr has been seen
+
+  int in = 0, out = 0;
+  unsigned char ch, prev = 0;
+  bool flip = false;
+
+  ch = pattern[in];
+  if (ch == '^')
+    {
+      in++;
+      flip = true;
+    }
+  mask[pattern[in]] = '\1';
+  retval[out++] = pattern[in++];        // even copy ']' if it is first
+
+  bool prev_was_range = false;          // disallow "a-m-z" as a pattern
+  bool prev_prev_was_range = false;
+  for (; in < len; in++)
+    {
+      bool was_range = false;
+      ch = pattern[in];
+      if (ch == ']')
+        break;
+
+      if (prev == '-' && in > 1 && isalnum (ch) && ! prev_prev_was_range)
+        {
+          unsigned char start_of_range = pattern[in-2];
+          if (start_of_range < ch
+              && ((isupper (ch) && isupper (start_of_range))
+                  || (islower (ch) && islower (start_of_range))
+                  || (isdigit (ch) && isdigit (start_of_range))
+                  || mask['-'] > 1))    // not the first '-'
+            {
+              was_range = true;
+              out--;
+              mask['-']--;
+              for (int i = start_of_range; i <= ch; i++)
+                {
+                  if (mask[i] == '\0')
+                    {
+                      mask[i] = '\1';
+                      retval[out++] = i;
+                    }
+                }
+            }
+        }
+      if (!was_range)
+        {
+          if (mask[ch]++ == 0)
+            retval[out++] = ch;
+          else if (ch != '-')
+            warning_with_id ("octave:textscan-pattern",
+                             "textscan: [...] contains two '%c's", ch);
+          if (prev == '-' && mask['-'] >= 2)
+            warning_with_id ("octave:textscan-pattern",
+                             "textscan: [...] contains two '-'s "
+                             "outside range expressions");
+        }
+      prev = ch;
+      prev_prev_was_range = prev_was_range;
+      prev_was_range = was_range;
+    }
+  if (flip)                             // [^...]
+    {
+      out = 0;
+      for (int i = 0; i < 256; i++)
+        if (!mask[i])
+          retval[out++] = i;
+    }
+  retval.resize (out);
+
+  return retval;
+}
+
+/********************/
+int
+textscan_format_list::finish_conversion (const std::string& s, size_t& i,
+                                         size_t n, unsigned int& width,
+                                         int& prec, int& bitwidth,
+                                         octave_value& val_type, bool discard,
+                                         char& type, octave_idx_type& num_elts)
+{
+  int retval = 0;
+
+  std::string char_class;
+
+  size_t beg_idx = std::string::npos;
+  size_t end_idx = std::string::npos;
+
+  if (type != '%')
+    {
+      nconv++;
+      if (type == '[')
+        {
+          if (i < n)
+            {
+              beg_idx = i;
+
+              if (s[i] == '^')
+                {
+                  type = '^';
+                  *buf << s[i++];
+
+                  if (i < n)
+                    {
+                      beg_idx = i;
+
+                      if (s[i] == ']')
+                        *buf << s[i++];
+                    }
+                }
+              else if (s[i] == ']')
+                *buf << s[i++];
+            }
+
+          while (i < n && s[i] != ']')
+            *buf << s[i++];
+
+          if (i < n && s[i] == ']')
+            {
+              end_idx = i-1;
+              *buf << s[i++];
+            }
+
+          if (s[i-1] != ']')
+            retval = nconv = -1;
+        }
+    }
+
+  if (nconv >= 0)
+    {
+      if (beg_idx != std::string::npos && end_idx != std::string::npos)
+        char_class = textscan_char_class (s.substr (beg_idx,
+                                                    end_idx - beg_idx + 1));
+
+      add_elt_to_list (width, prec, bitwidth, val_type, discard, type,
+                       num_elts, char_class);
+    }
+
+  return retval;
+}
+
+/********************
+void
+textscan_format_list::printme (void) const
+{
+  octave_idx_type n = list.numel ();
+
+  for (octave_idx_type i = 0; i < n; i++)
+    {
+      textscan_format_elt *elt = list(i);
+
+      std::cerr
+        << "width:      " << elt->width << "\n"
+        << "digits      " << elt->prec << "\n"
+        << "bitwidth:   " << elt->bitwidth << "\n"
+        << "discard:    " << elt->discard << "\n"
+        << "type:       ";
+
+      if (elt->type == textscan_format_elt::literal_conversion)
+        std::cerr << "literal text\n";
+      else if (elt->type == textscan_format_elt::whitespace_conversion)
+        std::cerr << "whitespace\n";
+      else
+        std::cerr << elt->type << "\n";
+
+      std::cerr
+        << "char_class: `" << undo_string_escapes (elt->char_class) << "'\n"
+        << "text:       `" << undo_string_escapes (elt->text) << "'\n\n";
+    }
+}
+* */
+
+/********************/
+// If FORMAT is explicitly "", it is assumed to be "%f" repeated enough times
+// to read the first row of the file.  Set it now.
+int
+textscan_format_list::read_first_row (dstr& is, textscan& ts)
+{
+  // Read first line and strip end-of-line, which may be two characters
+  std::string first_line (20, ' ');
+  is.getline (first_line, static_cast<char> (ts.eol2));
+  if (first_line.length () > 0
+      && first_line[first_line.length () - 1] == ts.eol1)
+    first_line.resize (first_line.length () - 1);
+
+  std::istringstream strstr (first_line);
+  dstr ds (strstr, is);
+
+  dim_vector dv (1,1);      // initial size of each output_container
+  Complex val;
+  octave_value val_type;
+  nconv = 0;
+  int max_empty = 1000;     // failsafe, if ds fails but not with eof
+  int retval = 0;
+
+  // read line, creating output_container as we go
+  while (!ds.eof ())
+    {
+      bool already_skipped_delim = false;
+      ts.skip_whitespace (ds);
+      ds.progress_benchmark ();
+      bool progress = false;
+      ts.scan_complex (ds, *list(0), val);
+      if (ds.fail ())
+        {
+          ds.clear (ds.rdstate () & ~std::ios::failbit);
+
+          if (ds.eof ())
+            break;
+
+          // If we don't continue after a conversion error, then
+          // unless this was a missing value (i.e., followed by a delimiter),
+          // return with an error status.
+          if (ts.return_on_error < 2)
+            {
+              ts.skip_delim (ds);
+              if (ds.no_progress ())
+                {
+                  retval = 4;
+                  break;
+                }
+              already_skipped_delim = true;
+            }
+          else  // skip offending field
+            {
+              std::ios::iostate state = ds.rdstate ();
+              ds.clear ();          // clear to allow read pointer to advance
+
+              std::string dummy;
+              textscan_format_elt fe ("", first_line.length ());
+              ts.scan_string (ds, fe, dummy);
+
+              progress = (dummy.length ());
+              ds.setstate (state);
+            }
+
+          val = ts.empty_value.scalar_value ();
+          if (!--max_empty)
+            break;
+        }
+      if (val.imag () == 0)
+        val_type = octave_value (NDArray (dv, val.real ()));
+      else
+        val_type = octave_value (ComplexNDArray (dv, val));
+      output_container.push_back (val_type);
+      if (! already_skipped_delim)
+        ts.skip_delim (ds);
+      if (! progress && ds.no_progress ())
+        break;
+      nconv++;
+    }
+  output_container.pop_front (); // discard empty element from constructor
+
+  //Create  fmt  now that the size is known
+  list.resize (dim_vector (nconv, 1));
+  for (octave_idx_type i = 1; i < nconv; i++)
+    list(i) = new textscan_format_elt (*list(0));
+
+  return retval;             // May have returned 4 above.
+}
+
+/**
+ * Perform actual textscan: read data from stream, and create cell array
+ */
+octave_value
+textscan::scan (std::istream *isp, textscan_format_list& fmt_list,
+             octave_idx_type ntimes)
+{
+  octave_value retval;
+
+  if (!isp)
+    error ("internal error: textscan called with invalid istream");
+  if (fmt_list.num_conversions () == -1)
+    error ("textscan: invalid format specified");
+  if (fmt_list.num_conversions () == 0)
+    error ("textscan: no valid format conversion specifiers\n");
+
+  // skip the first  header_lines
+  std::string dummy;
+  for (int i = 0; i < header_lines && *isp; i++)
+    getline (*isp, dummy, static_cast<char> (eol2));
+
+  // Create our own buffered stream, for fast get/putback/tell/seek.
+
+        // First, see how far ahead it should let us look.
+  int max_lookahead = std::max (std::max (comment_len, treat_as_empty_len),
+                                std::max (delim_len, 3)); // 3 for NaN and Inf
+
+        // Next, choose a buffer size to avoid reading too much, or too often.
+  octave_idx_type buf_size;
+  if (buffer_size)
+    buf_size = buffer_size;
+  else if (ntimes > 0)
+    {
+      buf_size = 80 * ntimes;
+      if (buf_size < ntimes)    // if overflow...
+        buf_size = ntimes;
+      buf_size = std::max (ntimes, std::min (buf_size, 4096));
+    }
+  else
+    buf_size = 4096;
+        // Finally, create the stream.
+  dstr is (*isp, whitespace + delims, max_lookahead, buf_size);
+
+  // Grow retval dynamically.  "size" is half the initial size
+  // (FIXME -- Should we start smaller if ntimes is large?)
+  octave_idx_type size = ((ntimes < 8 && ntimes >= 0) ? ntimes : 1);
+  Array<octave_idx_type> row_idx (dim_vector (1,2));
+  row_idx(1) = 0;
+
+  int err = 0;
+  octave_idx_type row = 0;
+
+  if (multiple_delims_as_one)           // bug #44750?
+    skip_delim (is);
+
+  int done_after;  // Number of columns read when EOF seen.
+
+  // If FORMAT explicitly "", read first line and see how many "%f" match
+  if (fmt_list.set_from_first)
+    {
+      err = fmt_list.read_first_row (is, *this);
+      lines = 1;
+
+      done_after = fmt_list.numel () + 1;
+      if (!err)
+        row = 1;  // the above puts the first line into fmt_list.out_buf ()
+    }
+  else
+    done_after = fmt_list.out_buf ().size () + 1;
+
+  std::list<octave_value> out = fmt_list.out_buf ();
+
+  // We will later merge adjacent columns of the same type.
+  // Check now which columns to merge.
+  // Reals may become complex, and so we can't trust types
+  // after reading in data.
+  // If the format was "", that conversion may already have happened,
+  // so force all to be merged (as all are %f).
+  bool merge_with_prev[fmt_list.numel ()];
+  int conv = 0;
+  if (collect_output)
+    {
+      int prev_type = -1;
+      for (std::list<octave_value>::iterator col = out.begin ();
+           col != out.end (); col++)
+        {
+          if (col->type_id () == prev_type
+              || (fmt_list.set_from_first && prev_type != -1))
+            merge_with_prev [conv++] = true;
+          else
+            merge_with_prev [conv++] = false;
+          prev_type = col->type_id ();
+        }
+    }
+
+  // This should be caught by earlier code, but this avoids a possible
+  // infinite loop below.
+  if (fmt_list.num_conversions () == 0)
+    error ("textscan: No conversions specified");
+
+
+  // Read the data.  This is the main loop.
+  if (!err)
+    for (/* row set ~30 lines above */; row < ntimes || ntimes == -1; row++)
+      {
+        if (row == 0 || row >= size)
+          {
+            size += size+1;
+            for (std::list<octave_value>::iterator col = out.begin ();
+                 col != out.end (); col++)
+              *col = (*col).resize (dim_vector (size, 1), 0);
+          }
+        row_idx(0) = row;
+        err = read_format_once (is, fmt_list, out, row_idx, done_after);
+        if (err > 0 || !is || (lines >= ntimes && ntimes > -1))
+          break;
+      }
+
+  if ((err & 4) && !return_on_error)
+    error ("textscan: Read error in field %d of row %d",
+           done_after + 1, row + 1);
+
+  // If file does not end in EOL, do not pad columns with NaN.
+  bool uneven_columns = false;
+  if (isp->eof () || (err & 4))
+    {
+      isp->clear ();
+      isp->seekg (-1, std::ios_base::end);
+      int last_char = isp->get ();
+      isp->setstate (isp->eofbit);
+      uneven_columns = (last_char != eol1 && last_char != eol2);
+    }
+
+  // convert return value to Cell array
+  Array<octave_idx_type> ra_idx (dim_vector (1,2));
+
+          // (err & 1) means "error, and no columns read this row
+          // FIXME -- This may redundant now that done_after=0 says the same
+  if (err & 1)
+    done_after = out.size () + 1;
+  int valid_rows = (row == ntimes) ? ntimes : ((err & 1) ? row : row+1);
+  dim_vector dv (valid_rows, 1);
+
+  ra_idx(0) = 0;
+  int i = 0;
+  if (!collect_output)
+    {
+      retval = Cell (dim_vector (1, out.size ()));
+      for (std::list<octave_value>::iterator col = out.begin ();
+           col != out.end (); col++, i++)
+        {
+          // trim last columns if that was requested
+          if (i == done_after && uneven_columns)
+            dv = dim_vector (std::max (valid_rows - 1, 0), 1);
+
+          ra_idx(1) = i;
+          retval = do_cat_op (retval, octave_value (Cell (col->resize (dv,0))),
+                              ra_idx);
+        }
+    }
+  else  // group adjacent cells of the same type into a single cell
+    {
+      octave_value    cur;                // current cell, accumulating columns
+      octave_idx_type group_size = 0;     // columns in this cell
+      int prev_type = -1;
+
+      conv = 0;
+      retval = Cell ();
+      for (std::list<octave_value>::iterator col = out.begin ();
+           col != out.end (); col++)
+        {
+          if (!merge_with_prev [conv++])  // including first time
+            {
+              if (prev_type != -1)
+                {
+                  ra_idx(1) = i++;
+                  retval = do_cat_op (retval, octave_value (Cell(cur)),
+                                      ra_idx);
+                }
+              cur = octave_value (col->resize (dv,0));
+              group_size = 1;
+              prev_type = col->type_id ();
+            }
+          else
+            {
+              ra_idx(1) = group_size++;
+              cur = do_cat_op (cur, octave_value (col->resize (dv,0)),
+                               ra_idx);
+            }
+        }
+      ra_idx(1) = i;
+      retval = do_cat_op (retval, octave_value (Cell (cur)), ra_idx);
+    }
+  return retval;
+}
+
+/********************/
+/**
+ * Create a delimited stream, reading from is, with delimiters delims,
+ * and allowing reading of up to tellg() + longest_lookeahead.
+ * When is is at EOF, lookahead may be padded by ASCII nuls.
+ */
+dstr::dstr (std::istream& is, const std::string& delimiters,
+            int longest_lookahead, octave_idx_type bsize)
+    : bufsize (bsize), i_stream (is), longest (longest_lookahead),
+      delims (delimiters),
+      flags (std::ios::failbit & ~std::ios::failbit) // can't cast 0
+{
+  buf = new char[bufsize];
+  eob = buf + bufsize;
+  idx = eob;                    // refresh_buf shouldn't try to copy old data
+  progress_marker = idx;
+  refresh_buf ();               // load the first batch of data
+}
+
+// Used to create a stream from a strstream from data read from a dstr.
+// FIXME: Find a more efficient approach.  Perhaps derived dstrstream
+dstr::dstr (std::istream& is, const dstr& ds)
+  : bufsize (ds.bufsize), i_stream (is), longest (ds.longest),
+    delims (ds.delims),
+    flags (std::ios::failbit & ~std::ios::failbit) // can't cast 0
+{
+  buf = new char[bufsize];
+  eob = buf + bufsize;
+  idx = eob;                    // refresh_buf shouldn't try to copy old data
+  progress_marker = idx;
+  refresh_buf ();               // load the first batch of data
+}
+
+dstr::~dstr ()
+{
+  // Seek to the correct position in i_stream.
+  if (!eof ())
+    {
+      i_stream.clear ();
+      i_stream.seekg (buf_in_file);
+      i_stream.read (buf, idx - buf);
+    }
+
+  delete [] buf;
+}
+
+/** Read a character from the buffer, refilling the buffer from the
+ * file if necessary.
+ */
+int
+dstr::get_undelim ()
+{
+  int retval;
+  if (eof ())
+    {
+      setstate (std::ios_base::failbit);
+      return EOF;
+    }
+
+  if (idx < eob)
+    retval = *idx++;
+  else
+    {
+      refresh_buf ();
+      if (eof ())
+        {
+          setstate (std::ios_base::eofbit);
+          retval = EOF;
+        }
+      else
+        retval = *idx++;
+    }
+  if (idx >= last)
+    delimited = false;
+  return retval;
+}
+
+/** Return the next character to be read without incrementing the pointer,
+ * refilling the buffer from the file if necessary.
+ */
+int
+dstr::peek_undelim ()
+{
+  int retval = get_undelim ();
+  putback ();
+
+  return retval;
+}
+
+/**
+ * Copy remaining unprocessed data to the start of the buffer and load
+ * new data to fill it.
+ * Returns EOF if the file is at EOF before reading any data and all of the
+ * data that has been read has been processed.
+ */
+int
+dstr::refresh_buf (void)
+{
+  if (eof ())
+    return EOF;
+
+  int retval;
+  int old_remaining = eob - idx;
+
+  if (old_remaining < 0)
+    {
+      idx = eob;
+      old_remaining = 0;
+    }
+
+  octave_quit ();                       // allow ctrl-C
+
+  if (old_remaining > 0)
+    memmove (buf, idx, old_remaining);
+
+  progress_marker -= idx - buf;         // where original idx would have been
+  idx = buf;
+
+  int gcount;   // chars read
+  if (!i_stream.eof ())
+    {
+      buf_in_file = i_stream.tellg ();   // record for destructor
+      i_stream.read (buf + old_remaining, bufsize - old_remaining);
+      gcount = i_stream.gcount ();
+    }
+  else
+    gcount = 0;
+
+  eob = buf + old_remaining + gcount;
+  last = eob;
+  if (gcount == 0)
+    {
+      delimited = false;
+      if (eob != buf)              // no more data in file, but still some to go
+        retval = 0;
+      else
+        retval = EOF;              // file and buffer are both done.
+    }
+  else
+    {
+      delimited = true;
+
+      for (last = eob - longest; last - buf >= 0; last--)
+        {
+          if (strchr (delims.c_str (), *last))
+            break;
+        }
+      if (last - buf < 0)
+        delimited = false;
+
+      retval = 0;
+    }
+
+  if (retval == EOF)  // Ensure fast peek doesn't give valid char
+    *idx = '\0';      // FIXME - check that no TreatAsEmpty etc starts w. \0?
+
+  return retval;
+}
+
+/** Return a pointer to a block of data of size size, assuming that a
+ * sufficiently large buffer is available in buffer, if required.
+ * If called when delimited==true, and size is no greater than
+ * longest_lookahead then this will not call refresh_buf(), so seekg()
+ * still works.  Otherwise, seekg may be invalidated.
+ */
+char *
+dstr::read (char *buffer, int size, char* &prior_tell)
+{
+  char *retval;
+  if (eob  - idx > size)
+    {
+      retval = idx;
+      idx += size;
+      if (idx > last)
+        delimited = false;
+    }
+  else
+    {
+      // If there was a tellg pointing to an earlier point than the current
+      // read position, try to keep it in the active buffer.
+      // In the current code, prior_tell==idx for each call,
+      // so this is not necessary, just a precaution.
+      if (eob - prior_tell + size < bufsize)
+        {
+          octave_idx_type gap = idx - prior_tell;
+          idx = prior_tell;
+          refresh_buf ();
+          idx += gap;
+        }
+      else      // can't keep the tellg in range.  May skip some data.
+        {
+          refresh_buf ();
+        }
+      prior_tell = buf;
+
+      if (eob - idx > size)
+        {
+          retval = idx;
+          idx += size;
+          if (idx > last)
+            delimited = false;
+        }
+      else
+        {
+          if (size <= bufsize)          // small read, but reached EOF
+            {
+              retval = idx;
+              memset (eob, 0, size + (idx - buf));
+              idx += size;
+            }
+          else  // Reading more than the whole buf; return it in buffer
+            {
+              retval = buffer;
+              // FIXME -- read bufsize at a time
+              int i;
+              for (i = 0; i < size && !eof (); i++)
+                *buffer++ = get_undelim ();
+              if (eof ())
+                memset (buffer, 0, size - i);
+            }
+        }
+    }
+  return retval;
+}
+
+/**
+ * Return in out an entire line, terminated by delim.
+ * On input, out must have length at least 1.
+ */
+int
+dstr::getline (std::string& out, char delim)
+{
+  int len = out.length (), used = 0;
+  int ch;
+  while ((ch = get_undelim ()) != delim && ch != EOF)
+    {
+      out[used++] = ch;
+      if (used == len)
+        {
+          len <<= 1;
+          out.resize (len);
+        }
+    }
+  out.resize (used);
+  field_done ();
+
+  return ch;
+}
+
+/*
+%!test
+%! str = "1,  2,  3,  4\n 5,  ,  ,  8\n 9, 10, 11, 12";
+%! fmtstr = "%f %d %f %s";
+%! c = textscan (str, fmtstr, 2, "delimiter", ",", "emptyvalue", -Inf);
+%! assert (isequal (c{1}, [1;5]));
+%! assert (length (c{1}), 2);
+%! assert (iscellstr (c{4}));
+%! assert (isequal (c{3}, [3; -Inf]));
+
+%!test
+%! b = [10:10:100];
+%! b = [b; 8*b/5];
+%! str = sprintf ("%g miles/hr = %g kilometers/hr\n", b);
+%! fmt = "%f miles/hr = %f kilometers/hr";
+%! c = textscan (str, fmt);
+%! assert (c{1}, b(1,:)', 1e-5);
+%! assert (c{2}, b(2,:)', 1e-5);
+
+%!test
+%! str = "13, -, NA, str1, -25\r\n// Middle line\r\n36, na, 05, str3, 6";
+%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",",
+%!                "treatAsEmpty", {"NA", "na", "-"},"commentStyle", "//");
+%! assert (a{1}, int32 ([13; 36]));
+%! assert (a{2}, [NaN; NaN]);
+%! assert (a{3}, [NaN; 5]);
+%! assert (a{4}, {"str1"; "str3"});
+%! assert (a{5}, [-25; 6]);
+
+%!test
+%! str = "Km:10 = hhhBjjj miles16hour\r\n";
+%! str = [str "Km:15 = hhhJjjj miles241hour\r\n"];
+%! str = [str "Km:2 = hhhRjjj miles3hour\r\n"];
+%! str = [str "Km:25 = hhhZ\r\n"];
+%! fmt = "Km:%d = hhh%1sjjj miles%dhour";
+%! a = textscan (str, fmt, "delimiter", " ");
+%! assert (a{1}', int32 ([10 15 2 25]));
+%! assert (a{2}', {'B' 'J' 'R' 'Z'});
+%! assert (a{3}', int32 ([16 241 3 0]));
+
+## Test with default endofline parameter
+%!test
+%! c = textscan ("L1\nL2", "%s");
+%! assert (c{:}, {"L1"; "L2"});
+
+## Test with endofline parameter set to "" (empty) - newline should be in word
+%!test
+%! c = textscan ("L1\nL2", "%s", "endofline", "");
+%! assert (int8 ([c{:}{:}]), int8 ([ 76,  49,  10,  76,  50 ]));
+
+###  Matlab fails this test.  A literal after a conversion is not a delimiter
+#%!test
+#%! ## No delimiters at all besides EOL.  Skip fields, even empty fields
+#%! str = "Text1Text2Text\nTextText4Text\nText57Text";
+#%! c = textscan (str, "Text%*dText%dText");
+#%! assert (c{1}, int32 ([2; 4; 0]));
+
+## CollectOutput test
+%!test
+%! b = [10:10:100];
+%! b = [b; 8*b/5; 8*b*1000/5];
+%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
+%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
+%! c = textscan (str, fmt, "collectoutput", 1);
+%! assert (size (c{3}), [10, 2]);
+%! assert (size (c{2}), [10, 2]);
+
+## CollectOutput test with uneven column length files
+%!test
+%! b = [10:10:100];
+%! b = [b; 8*b/5; 8*b*1000/5];
+%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
+%! str = [str "110 miles/hr"];
+%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
+%! c = textscan (str, fmt, "collectoutput", 1);
+%! assert (size (c{1}), [11, 1]);
+%! assert (size (c{3}), [11, 2]);
+%! assert (size (c{2}), [11, 2]);
+%! assert (c{3}(end), NaN);
+%! assert (c{2}{11, 1}, "/hr");
+%! assert (isempty (c{2}{11, 2}), true);
+
+## Double quoted string
+%!test
+%! str = 'First    "the second called ""the middle""" third';
+%! fmt = "%q";
+%! c = textscan (str, fmt);
+%! assert (c{1}, {"First"; 'the second called "the middle"'; "third"});
+
+## Arbitrary character
+%!test
+%! c = textscan ("a first, \n second, third", "%s %c %11c", 'delimiter', ' ,');
+%! assert (c{1}, {"a"; "ond"});
+%! assert (c{2}, {"f"; "t"});
+%! assert (c{3}, {"irst, \n sec"; "hird"});
+
+## Field width and non-standard delimiters
+%!test
+%! str = "12;34;123456789;7";
+%! c = textscan (str, "%4d %4d", "delimiter", ";", "collectOutput", 1);
+%! assert (c, {[12 34; 1234 5678; 9 7]});
+
+## Field width and non-standard delimiters
+%!test
+%! str = "12;34;123456789;7";
+%! c = textscan (str, "%4f %f", "delimiter", ";", "collectOutput", 1);
+%! assert (c, {[12 34; 1234 56789; 7 NaN]});
+
+## Ignore trailing delimiter, but use leading one
+%!test
+%! str = "12.234e+2,34, \n12345.789-9876j,78\n,10|3";
+%! c = textscan (str, "%10.2f %f", "delimiter", ",", "collectOutput", 1,
+%!                "expChars", "e|");
+%! assert (c, {[1223 34; 12345.79-9876j 78; NaN 10000]}, 1e-6);
+
+%!test
+%! ## Multi-character delimiter
+%! str = "99end2 space88gap 4564";
+%! c = textscan (str, "%d %s", "delimiter", {"end", "gap", "space"});
+%! assert (c{1}, int32([99; 88]));
+%! assert (c{2}, {"2 "; "4564"});
+
+### Delimiters as part of literals, and following literals
+#%!test
+#%! str = "12 R&D & 7";
+#%! c = textscan (str, "%f R&D %f", "delimiter", "&", "collectOutput", 1, "EmptyValue", -99);
+#%! assert (c, {[12 -99; 7 -99]});
+#
+### Delimiters as part of literals, and before literals
+#%!test
+#%! str = "12 & R&D 7";
+#%! c = textscan (str, "%f R&D %f", "delimiter", "&", "collectOutput", 1);
+#%! assert (c, {[12 7]});
+
+%!test
+%! ## Check number of lines read, not number of passes through format string
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1\n2\n3\n4\n5\n6");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "%f %f", 2);
+%! E = feof (fid);
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {1, 2});
+%! assert (!E);
+
+%!test
+%! ## Check number of lines read, not number of passes through format string
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1\r\n2\r3\n4\r\n5\n6");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "%f %f", 4);
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {[1;3], [2;4]})
+
+%!test
+%! ## Check number of lines read, with multiple delimiters
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1-\r\n-2\r3-\n-4\r\n5\n6");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "%f %f", 4, "delimiter", "-", "multipleDelimsAsOne", 1);
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {[1;3], [2;4]})
+
+%!test
+%! ## Check ReturnOnError
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1 2 3\n4 s 6");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "%f %f %f");
+%! fseek (fid, 0, "bof");
+%! d = textscan (fid, "%f %f %f", "ReturnOnError", 1);
+%! fseek (fid, 0, "bof");
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {[1;4], [2;NaN], [3;6]})
+%! assert (d, {[1;4], [2], [3]})
+
+%!test
+%! ## Check ReturnOnError
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1 2 3\n4 s 6\n");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "%f %f %f", "ReturnOnError", 1);
+%! fseek (fid, 0, "bof");
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {[1;4], [2;NaN], [3;NaN]})
+
+%!error <Read error in field 2 of row 2> textscan ("1 2 3\n4 s 6", "%f %f %f", "ReturnOnError", 0);
+
+%!test
+%! ## Check ReturnOnError
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, "1 s 3\n4 5 6");
+%! fseek (fid, 0, "bof");
+%! c = textscan (fid, "");
+%! fseek (fid, 0, "bof");
+%! d = textscan (fid, "", "ReturnOnError", 1);
+%! fseek (fid, 0, "bof");
+%! fclose (fid);
+%! unlink (f);
+%! assert (c, {[1;4], [NaN;5], [3;6]})
+%! assert (d, {1})
+
+%!test
+%! ## Check ReturnOnError with empty fields
+%! c = textscan ("1,,3\n4,5,6", "", "Delimiter", ",", "ReturnOnError", 1);
+%! assert (c, {[1;4], [NaN;5], [3;6]})
+
+%!test
+%! ## Check ReturnOnError with empty fields
+%! c = textscan ("1,,3\n4,5,6", "%f %f %f", "Delimiter", ",", "ReturnOnError", 1);
+%! assert (c, {[1;4], [NaN;5], [3;6]})
+
+%!test
+%! ## Check ReturnOnError in first column
+%! c = textscan ("1 2 3\ns 5 6", "", "ReturnOnError", 1);
+%! assert (c, {1, 2, 3})
+
+## Test input validation
+%!error textscan ()
+%!error textscan (single (40))
+%!error textscan ({40})
+%!error <must be a string> textscan ("Hello World", 2)
+#%!error <cannot provide position information> [C, pos] = textscan ("Hello World")
+%!error <at most one character or> textscan ("Hello World", '%s', 'EndOfLine', 3)
+%!error <'%z' is not a valid format specifier> textscan ("1.0", "%z");
+%!error <no valid format conversion specifiers> textscan ("1.0", "foo");
+
+## Test incomplete first data line
+%! R = textscan (['Empty1' char(10)], 'Empty%d %f');
+%! assert (R{1}, int32 (1));
+%! assert (isempty (R{2}), true);
+
+## bug #37023
+%!test
+%! data = textscan("   1. 1 \n 2 3\n", '%f %f');
+%! assert (data{1}, [1; 2], 1e-15);
+%! assert (data{2}, [1; 3], 1e-15);
+
+## Whitespace test (bug #37333) using delimiter ";"
+%!test
+%! tc = [];
+%! tc{1, 1} = "C:/code;";
+%! tc{1, end+1} = "C:/code/meas;";
+%! tc{1, end+1} = " C:/code/sim;";
+%! tc{1, end+1} = "C:/code/utils;";
+%! string = [tc{:}];
+%! c = textscan (string, "%s", "delimiter", ";");
+%! for k = 1:max (numel (c{1}), numel (tc))
+%!   lh = c{1}{k};
+%!   rh = tc{k};
+%!   rh(rh == ";") = "";
+%!   rh = strtrim (rh);
+%!   assert (strcmp (lh, rh));
+%! end
+
+## Whitespace test (bug #37333), adding multipleDelimsAsOne true arg
+%!test
+%! tc = [];
+%! tc{1, 1} = "C:/code;";
+%! tc{1, end+1} = " C:/code/meas;";
+%! tc{1, end+1} = "C:/code/sim;;";
+%! tc{1, end+1} = "C:/code/utils;";
+%! string = [tc{:}];
+%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 1);
+%! for k = 1:max (numel (c{1}), numel (tc))
+%!   lh = c{1}{k};
+%!   rh = tc{k};
+%!   rh(rh == ";") = "";
+%!   rh = strtrim (rh);
+%!   assert (strcmp (lh, rh));
+%! end
+
+## Whitespace test (bug #37333), adding multipleDelimsAsOne false arg
+%!test
+%! tc = [];
+%! tc{1, 1} = "C:/code;";
+%! tc{1, end+1} = " C:/code/meas;";
+%! tc{1, end+1} = "C:/code/sim;;";
+%! tc{1, end+1} = "";
+%! tc{1, end+1} = "C:/code/utils;";
+%! string = [tc{:}];
+%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 0);
+%! for k = 1:max (numel (c{1}), numel (tc))
+%!   lh = c{1}{k};
+%!   rh = tc{k};
+%!   rh(rh == ";") = "";
+%!   rh = strtrim (rh);
+%!   assert (strcmp (lh, rh));
+%! end
+
+## Whitespace test (bug #37333) whitespace "" arg
+%!test
+%! tc = [];
+%! tc{1, 1} = "C:/code;";
+%! tc{1, end+1} = " C:/code/meas;";
+%! tc{1, end+1} = "C:/code/sim;";
+%! tc{1, end+1} = "C:/code/utils;";
+%! string = [tc{:}];
+%! c = textscan (string, "%s", "delimiter", ";", "whitespace", "");
+%! for k = 1:max (numel (c{1}), numel (tc))
+%!   lh = c{1}{k};
+%!   rh = tc{k};
+%!   rh(rh == ";") = "";
+%!   assert (strcmp (lh, rh));
+%! end
+
+## Whitespace test (bug #37333), whitespace " " arg
+%!test
+%! tc = [];
+%! tc{1, 1} = "C:/code;";
+%! tc{1, end+1} = " C:/code/meas;";
+%! tc{1, end+1} = "C:/code/sim;";
+%! tc{1, end+1} = "C:/code/utils;";
+%! string = [tc{:}];
+%! c = textscan (string, "%s", "delimiter", ";", "whitespace", " ");
+%! for k = 1:max (numel (c{1}), numel (tc))
+%!   lh = c{1}{k};
+%!   rh = tc{k};
+%!   rh(rh == ";") = "";
+%!   rh = strtrim (rh);
+%!   assert (strcmp (lh, rh));
+%! end
+
+## Tests reading with empty format, should return proper nr of columns
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, " 1 2 3 4\n5 6 7 8");
+%! fseek (fid, 0, "bof");
+%! A = textscan (fid, "");
+%! E = feof (fid);
+%! fclose (fid);
+%! unlink (f);
+%! assert (A{1}, [1 ; 5], 1e-6);
+%! assert (A{2}, [2 ; 6], 1e-6);
+%! assert (A{3}, [3 ; 7], 1e-6);
+%! assert (A{4}, [4 ; 8], 1e-6);
+%! assert (E);
+
+## Tests reading with empty format; empty fields & incomplete lower row
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid, " ,2,,4\n5,6");
+%! fseek (fid, 0, "bof");
+%! A = textscan (fid, "", "delimiter", ",", "EmptyValue", 999, "CollectOutput" , 1);
+%! fclose (fid);
+%! unlink (f);
+%! assert (A{1}, [999, 2, 999, 4; 5, 6, 999, 999], 1e-6);
+
+## Error message tests
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! msg1 = "textscan: 1 parameters given, but only 0 values";
+%! try
+%! A = textscan (fid, "", "headerlines");
+%! end_try_catch;
+%! assert (!feof (fid));
+%! fclose (fid);
+%! unlink (f);
+%! assert (msg1, lasterr);
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! msg1 = "textscan: HeaderLines must be numeric";
+%! try
+%! A = textscan (fid, "", "headerlines", "hh");
+%! end_try_catch;
+%! fclose (fid);
+%! unlink (f);
+%! assert (msg1, lasterr);
+
+%!test
+%! ## Skip headerlines
+%! A = textscan ("field 1  field2\n 1 2\n3 4", "", "headerlines", 1, "collectOutput", 1);
+%! assert (A, {[1 2; 3 4]});
+
+%!test
+%! ## Skip headerlines with non-default EOL
+%! A = textscan ("field 1  field2\r 1 2\r3 4", "", "headerlines", 2, "collectOutput", 1, "EndOfLine", '\r');
+%! assert (A, {[3 4]});
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid,"some_string");
+%! fseek (fid, 0, "bof");
+%! msg1 = "textscan: EndOfLine must be at most one character or '\\r\\n'";
+%! try
+%! A = textscan (fid, "%f", "EndOfLine", "\n\r");
+%! end_try_catch;
+%! fclose (fid);
+%! unlink (f);
+%! assert (msg1, lasterr);
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid,"some_string");
+%! fseek (fid, 0, "bof");
+%! msg1 = "textscan: EndOfLine must be at most one character or '\\r\\n'";
+%! try
+%! A = textscan (fid, "%f", "EndOfLine", 33);
+%! end_try_catch;
+%! fclose (fid);
+%! unlink (f);
+%! assert (msg1, lasterr);
+
+## Bug #41824
+%!test
+%! assert (textscan ("123", "", "whitespace", " "){:}, 123);
+
+## Bug #42343-1, just test supplied emptyvalue
+%!test
+%! assert (textscan (",NaN", "", "delimiter", "," ,"emptyValue" ,Inf), {Inf, NaN});
+
+## Bug #42343-2, test padding with supplied emptyvalue
+%!test
+%! a = textscan (",1,,4\nInf,  ,NaN", "", "delimiter", ",", "emptyvalue", -10);
+%! assert (cell2mat (a), [-10, 1, -10, 4; Inf, -10, NaN, -10]);
+
+## Bug #42528
+%!test
+%! assert (textscan ("1i", ""){1},  0+1i);
+%! assert (cell2mat (textscan ("3, 2-4i, NaN\n -i, 1, 23.4+2.2i\n 1+1 1+1j", "", "delimiter", ",")), [3+0i, 2-4i, NaN+0i; 0-i,  1+0i, 23.4+2.2i; 1 1 1+1i]);
+
+%!test
+%! ## TreatAsEmpty
+%! C = textscan ("1,2,3,NN,5,6\n", "%d%d%d%f", "delimiter", ",", "TreatAsEmpty", "NN");
+%! assert (C{3}(1), int32 (3));
+%! assert (C{4}(1), NaN);
+
+## MultipleDelimsAsOne
+%!test
+%! str = "11, 12, 13,, 15\n21,, 23, 24, 25\n,, 33, 34, 35";
+%! C = textscan (str, "%f %f %f %f", "delimiter", ",", "multipledelimsasone", 1, "endofline", "\n");
+%! assert (C{1}', [11, 21, 33]);
+%! assert (C{2}', [12, 23, 34]);
+%! assert (C{3}', [13, 24, 35]);
+%! assert (C{4}', [15, 25, NaN]);
+
+## Bug #44750
+%!test
+%! assert (textscan ("/home/foo/", "%s", "delimiter", "/", "MultipleDelimsAsOne", 1){1}, ...
+%!         {"home"; "foo"});
+
+### Allow cuddling %sliteral but warn it is ambiguous
+#%!test
+#%! C = textscan ("abcxyz51\nxyz83\n##xyz101", "%s xyz %d");
+#%! assert (C{1}([1 3]), {"abc"; "##"});
+#%! assert (isempty (C{1}{2}), true);
+#%! assert (C{2}, int32([51; 83; 101]));
+### Literals are not delimiters.
+
+## Test for false positives in check for non-supported format specifiers
+%!test
+%! assert (textscan ("Total: 32.5 % (of cm values)", "Total: %f %% (of cm values)"){1}, 32.5, 1e-5);
+
+## Test various forms of string format specifiers (bug #45712)
+%!test
+%! str = "14 :1 z:2 z:3 z:5 z:11";
+%! C = textscan (str, "%f %s %*s %3s %*3s %f", "delimiter", ":");
+%! assert (C, {14, {"1 z"}, {"3 z"}, 11});
+
+%% Bit width, fixed width conv. specifiers
+%!test
+%! str2 = "123456789012345 ";
+%! str2 = [str2 str2 str2 str2 str2 str2 str2 str2];
+%! str2 = [str2 "123456789.01234 1234567890.1234 12345.678901234 12345.678901234"];
+%! pttrn = "%3u8%*s %5u16%*s %10u32%*s %15u64 %3d8%*s %5d16%*s %10d32%*s %15d64 %9f32%*s %14f64%*s %10.2f32%*s %12.2f64%*s";
+%! C = textscan (str2, pttrn, "delimiter", " ");
+%! assert (C{1}, uint8 (123));
+%! assert (C{2}, uint16 (12345));
+%! assert (C{3}, uint32 (1234567890));
+%! assert (C{4}, uint64 (123456789012345));
+%! assert (C{5}, int8 (123));
+%! assert (C{6}, int16 (12345));
+%! assert (C{7}, int32 (1234567890));
+%! assert (C{8}, int64 (123456789012345));
+%! assert (C{9}, single (123456789), 1e-12);
+%! assert (C{10}, double (1234567890.123), 1e-15);
+%! assert (C{11}, single (12345.68), 1e-5);
+%! assert (C{12}, double (12345.68), 1e-11);
+
+%% Bit width, fixed width conv. specifiers -- check the right amount is left
+%!test
+%! str2 = "123456789012345 ";
+%! str2 = [str2 str2 "123456789.01234"];
+%! pttrn = "%3u8 %5u16 %10u32 %3d8 %5d16 %10d32 %9f32 %9f";
+%! C = textscan (str2, pttrn, "delimiter", " ");
+%! assert (C{1}, uint8 (123));
+%! assert (C{2}, uint16 (45678));
+%! assert (C{3}, uint32 (9012345));
+%! assert (C{4}, int8 (123));
+%! assert (C{5}, int16 (45678));
+%! assert (C{6}, int32 (9012345));
+%! assert (C{7}, single (123456789), 1e-12);
+%! assert (C{8}, double (0.01234), 1e-12);
+
+%!test
+%! C = textscan ("123.123", "%2f %3f %3f");
+%! assert (C{1}, 12);
+%! assert (C{2}, 3.1, 1e-11);
+%! assert (C{3}, 23);
+
+%!test
+%! C = textscan ("123.123", "%3f %3f %3f");
+%! assert (C{1}, 123);
+%! assert (C{2}, 0.12, 1e-11);
+%! assert (C{3}, 3);
+
+%!test
+%! C = textscan ("123.123", "%4f %3f");
+%! assert (C{1}, 123);
+%! assert (C{2}, 123);
+
+%% field width interrupts exponent.  (Matlab incorrectly gives [12, 2e12])
+%!test
+%! assert (textscan ("12e12",  "%4f"), {[120;  2]});
+%! assert (textscan ("12e+12", "%5f"), {[120;  2]});
+%! assert (textscan ("125e-12","%6f"), {[12.5; 2]});
+
+%% %[] tests
+%% Plain [..] and *[..]
+%!test
+%! ar = "abcdefguvwxAny\nacegxyzTrailing\nJunk";
+%! C = textscan (ar, "%[abcdefg] %*[uvwxyz] %s");
+%! assert (C{1}, {"abcdefg"; "aceg"; ""});
+%! assert (C{2}, {"Any"; "Trailing"; "Junk"});
+
+%!test
+%! assert (textscan ("A2 B2 C3", "%*[ABC]%d", 3), {int32([2; 2; 3])});
+
+%% [^..] and *[^..]
+%!test
+%! br = "abcdefguvwx1Any\nacegxyz2Trailing\n3Junk";
+%! C = textscan (br, "%[abcdefg] %*[^0123456789] %s");
+%! assert (C{1}, {"abcdefg"; "aceg"; ""});
+%! assert (C{2}, {"1Any"; "2Trailing"; "3Junk"});
+
+%% [..] and [^..] containing delimiters
+%!test
+%! cr = "ab cd efguv wx1Any\na ce gx yz2Trailing\n   3Junk";
+%! C = textscan (cr, "%[ abcdefg] %*[^0123456789] %s", "delimiter", " \n", "whitespace", "");
+%! assert (C{1}, {"ab cd efg"; "a ce g"; "   "});
+%! assert (C{2}, {"1Any"; "2Trailing"; "3Junk"});
+
+%% Bug #36464
+%!test
+%! assert (textscan ('1 2 3 4 5 6', "%*n%n%*[^\n]"){1}, 2);
+
+%% test %[]] and %[^]]
+%!test
+%! assert (textscan ('345]', "%*[123456]%[]]"){1}{1}, "]");
+%! assert (textscan ('345]', "%*[^]]%s"){1}{1}, "]");
+
+%% Test that "-i" checks the next two characters
+%!test
+%! C = textscan ("-i -in -inf -infinity", "%f %f%s %f %f %s");
+%! assert (C, {-i, -i, {"n"}, -Inf, -Inf, {"inity"}});
+
+%% Again for "+i", this time with custom parser
+%!test
+%! C = textscan ("+i +in +inf +infinity", "%f %f%s %f %f %s", "ExpChars", "eE");
+%! assert (C, {i, i, {"n"}, Inf, Inf, {"inity"}});
+
+%% Check single quoted format interprets control sequences
+%!test
+%! C = textscan ("1 2\t3 4", '%f %[^\t] %f %f');
+%! assert (C, {1, {"2"}, 3, 4});
+
+%% Check overflow and underflow of integer types
+%!test
+%! a = "-1e90 ";
+%! b = "1e90 ";
+%! fmt = "%d8 %d16 %d32 %d64 %u8 %u16 %u32 %u64 ";
+%! C = textscan ([a a a a a a a a b b b b b b b b], fmt);
+%! assert (C{1}, int8 ([-128; 127]));
+%! assert (C{2}, int16([-32768; 32767]));
+%! assert (C{3}, int32([-2147483648; 2147483647]));
+%! assert (C{4}, int64([-9223372036854775808; 9223372036854775807]));
+%! assert (C{5}, uint8 ([0; 255]));
+%! assert (C{6}, uint16([0; 65535]));
+%! assert (C{7}, uint32([0; 4294967295]));
+%! assert (C{8}, uint64([0; 18446744073709551615]));
+
+%% Tests from Matlab (does The MathWorks have any copyright over the input?)
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid,"09/12/2005 Level1 12.34 45 1.23e10 inf Nan Yes 5.1+3i\n");
+%! fprintf (fid,"10/12/2005 Level2 23.54 60 9e19 -inf  0.001 No 2.2-.5i\n");
+%! fprintf (fid,"11/12/2005 Level3 34.90 12 2e5   10  100   No 3.1+.1i\n");
+%! fseek (fid, 0, "bof");
+%! C = textscan(fid,"%s %s %f32 %d8 %u %f %f %s %f");
+%! %assert (C{1}, {"09/12/2005";"10/12/2005";"11/12/2005"});
+%! assert (C{2}, {"Level1";"Level2";"Level3"});
+%! assert (C{3}, [single(12.34);single(23.54);single(34.90)]);
+%! assert (C{4}, [int8(45);int8(60);int8(12)]);
+%! assert (C{5}, [uint32(4294967295);uint32(4294967295);uint32(200000)]);
+%! assert (C{6}, [inf;-inf;10]);
+%! assert (C{7}, [NaN;0.001;100], eps);
+%! assert (C{8}, {"Yes";"No";"No"});
+%! assert (C{9}, [5.1+3i;2.2-0.5i;3.1+0.1i]);
+%! fseek (fid, 0, "bof");
+%! C = textscan(fid,"%s Level%d %f32 %d8 %u %f %f %s %f");
+%! assert (C{2}, [int32(1);int32(2);int32(3)]);
+%! assert (C{3}, [single(12.34);single(23.54);single(34.90)]);
+%! fseek (fid, 0, "bof");
+%! C = textscan(fid,'%s %*[^\n]');
+%! fclose (fid);
+%! unlink (f);
+%! assert (C, {{"09/12/2005";"10/12/2005";"11/12/2005"}});
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid,"1,  2,  3,  4,   ,  6\n");
+%! fprintf (fid,"7,  8,  9,   , 11, 12\n");
+%! fseek (fid, 0, "bof");
+%! C = textscan(fid,"%f %f %f %f %u8 %f", "Delimiter",",","EmptyValue",-Inf);
+%! fclose (fid);
+%! unlink (f);
+%! assert (C{4}, [4; -Inf]);
+%! assert (C{5}, uint8 ([0; 11]));
+
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! fprintf (fid,"abc, 2, NA, 3, 4\n");
+%! fprintf (fid,"// Comment Here\n");
+%! fprintf (fid,"def, na, 5, 6, 7\n");
+%! fseek (fid, 0, "bof");
+%! C = textscan(fid,"%s %n %n %n %n","Delimiter",",","TreatAsEmpty",{"NA","na"},"CommentStyle","//");
+%! fclose (fid);
+%! unlink (f);
+%! assert (C{1}, {"abc";"def"});
+%! assert (C{2}, [2; NaN]);
+%! assert (C{3}, [NaN; 5]);
+%! assert (C{4}, [3; 6]);
+%! assert (C{5}, [4; 7]);
+
+%!test
+%!## Test start of comment as string
+%! c = textscan ("1 / 2 // 3", "%n %s %u8", "CommentStyle", {"//"});
+%! assert (c, {1, "/", 2});
+*/
+
+// These tests have end-comment sequences, so can't just be in a comment
+#if 0
+%!test
+%!## Test unfinished comment
+%! c = textscan ("1 2 /* half comment", "%n %u8", "CommentStyle", {"/*", "*/"});
+%! assert (c, {1, 2});
+
+## Test reading from a real file
+%!test
+%! f = tempname ();
+%! fid = fopen (f, "w+");
+%! d = rand (1, 4);
+%! fprintf (fid, "  %f %f /* comment */  %f  %f ", d);
+%! fseek (fid, 0, "bof");
+%! A = textscan (fid, "%f %f", "CommentStyle", {"/*", "*/"});
+%! E = feof (fid);
+%! fclose (fid);
+%! unlink (f);
+%! assert (A{1}, [d(1); d(3)], 1e-6);
+%! assert (A{2}, [d(2); d(4)], 1e-6);
+%! assert (E);
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libinterp/corefcn/textscan.h	Tue Mar 15 19:36:52 2016 +1100
@@ -0,0 +1,421 @@
+/*
+Copyright (C) 2015-2016 Lachlan Andrew, Monash University
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+/** @file
+ * Implementation of textscan, a versatile text parser.
+ */
+
+#if !defined (octave_textscan_h)
+#define octave_textscan_h 1
+
+// For Inf and NaN
+#include "lo-ieee.h"
+
+/** Delimited stream, optimised to read strings of characters separated
+ * by single-character delimiters.
+ *
+ * The reason behind this class is that octstream doesn't provide seek/tell,
+ * but the opportunity has been taken to optimise for the textscan workload.
+ *
+ * The function reads chunks into a 4kiB buffer, and marks where the last
+ * delimiter occurs.  Reads up to this delimiter can be fast.  After that
+ * last delimiter, the remaining text is moved to the front of the buffer
+ * and the buffer is refilled.  This also allows cheap seek and tell
+ * operations within a "fast read" block.
+ */
+class
+dstr
+{
+  int bufsize;         // number of characters to read from the file at once
+  std::istream& i_stream;   // stream to read from
+  char *buf;           // temporary storage for a "chunk" of data
+  char *idx;           // Current read pointer
+  char *last;          // location of last delimiter in the buffer at buf
+                       //        (undefined if delimited is false)
+  char *eob;           // Position after last character in buffer
+  bool delimited;      // True if there is delimiter in the bufer after idx
+  int longest;         // longest lookahead required
+  const std::string delims; // sequence of single-character delimiters
+
+  std::streampos buf_in_file;  // Position of start of buf in original stream
+
+  char *progress_marker; // Marker to see if a read consumes any characters
+
+public:
+  dstr (std::istream& is, const std::string& delimiters,
+        int longest_lookahead, octave_idx_type bsize = 4096);
+
+  dstr (std::istream& is, const dstr& ds);
+
+  ~dstr ();
+
+  // Called when optimised sequence of get() is finished.  Ensures that
+  // there is a remaining delimiter in buf, or loads more data in.
+  void field_done (void)
+  {
+    if (idx >= last)
+      refresh_buf ();
+  }
+
+  // Load new data into buffer, and set eob, last, idx.
+  // Return EOF at end of file, 0 otherwise.
+  int refresh_buf (void);
+
+  // get a character, relying on caller to call field_done () if
+  // a delimiter has been reached.
+  int get (void)   { return delimited ? *idx++ : get_undelim (); }
+
+  // get a character, checking for underrun of the buffer
+  int get_undelim (void);
+
+  // Read character that will be got by the next get().
+  int peek (void)   { return *idx; }
+
+  // Read character that will be got by the next get().
+  int peek_undelim (void);
+
+  // Undo a 'get' or 'get_undelim'.  It is the caller's responsibility
+  // to avoid overflow by calling putbacks only for a character got by
+  // get() or get_undelim(), with no intervening
+  // get, get_delim, field_done, refresh_buf, getline, read or seekg.
+  void putback (char /*ch*/ = 0)  { --idx; }
+
+  int getline  (std::string& dest, char delim);
+  //int skipline (char delim);
+  char *read (char *buffer, int size, char* &new_start);
+
+  // return a position suitable to "seekg", valid only within this
+  // block between calls to field_done ().
+  char *tellg (void) { return idx; }
+  void seekg (char *old_idx) { idx = old_idx; }
+
+  bool eof (void)
+    {
+      return (eob == buf && i_stream.eof ()) || (flags & std::ios_base::eofbit);
+    }
+  operator const void* (void) { return (!eof () && !flags) ? this : 0; }
+
+  bool fail (void) { return flags & std::ios_base::failbit; }
+  std::ios_base::iostate rdstate (void) { return flags; }
+  void setstate (std::ios_base::iostate m) { flags = flags | m; }
+  void clear (std::ios_base::iostate m
+                        = (std::ios_base::eofbit & ~std::ios_base::eofbit))
+    { flags = flags & m; }
+
+  // Report if any characters have been consumed.
+  // (get, read etc. not cancelled by putback or seekg)
+  void progress_benchmark (void) { progress_marker = idx; }
+  bool no_progress (void) { return progress_marker == idx; }
+private:
+  std::ios_base::iostate flags; 
+
+  // No copying
+  dstr (const dstr&);
+  dstr& operator = (const dstr&);
+};
+
+/**
+ * A single conversion specifier, such as %f or %c
+ */
+class
+OCTINTERP_API
+textscan_format_elt
+{
+public:
+
+  enum special_conversion
+    {
+      whitespace_conversion = 1,
+      literal_conversion = 2
+    };
+
+  textscan_format_elt (const char *txt = 0, int w = 0, int p = -1,
+                       int bw = 0, bool dis = false, char typ = '\0',
+                       const std::string& ch_class = std::string ())
+    : text (strsave (txt)), width (w), prec (p), bitwidth (bw),
+      char_class (ch_class), type (typ), discard (dis),
+      numeric(typ == 'd' || typ == 'u' || type == 'f' || type == 'n')
+      { }
+
+  textscan_format_elt (const textscan_format_elt& e)
+    : text (strsave (e.text)), width (e.width), prec (e.prec),
+      bitwidth (e.bitwidth), char_class (e.char_class), type (e.type),
+      discard (e.discard), numeric (e.numeric)
+      { }
+
+  textscan_format_elt& operator = (const textscan_format_elt& e)
+    {
+      if (this != &e)
+        {
+          text = strsave (e.text);
+          width = e.width;
+          prec = e.prec;
+          bitwidth = e.bitwidth;
+          discard = e.discard;
+          type = e.type;
+          numeric = e.numeric;
+          char_class = e.char_class;
+        }
+
+      return *this;
+    }
+
+  ~textscan_format_elt (void) { delete [] text; }
+
+  // The C-style format string.
+  const char *text;
+
+  // The maximum field width.
+  unsigned int width;
+
+  // The maximum number of digits to read after the decimal in a
+  // floating point conversion.
+  int prec;
+
+  // The size of the result.  For integers, bitwidth may be 8, 16, 34,
+  // or 64.  For floating point values, bitwidth may be 32 or 64.
+  int bitwidth;
+
+  // The class of characters in a `[' or `^' format.
+  std::string char_class;
+
+  // Type of conversion
+  //  -- `d', `u', `f', `n', `s', `q', `c', `%', `C', `D', `[' or `^'.
+  char type;
+
+  // TRUE if we are not storing the result of this conversion.
+  bool discard;
+
+  // TRUE if the type is 'd', 'u', 'f', 'n'
+  bool numeric;
+};
+
+class textscan;
+
+/**
+ * The (parsed) sequence of format specifiers.
+ */
+class
+OCTINTERP_API
+textscan_format_list
+{
+public:
+
+  textscan_format_list (const std::string& fmt = std::string ());
+
+  ~textscan_format_list (void);
+
+  octave_idx_type num_conversions (void) const { return nconv; }
+
+  // The length can be different than the number of conversions.
+  // For example, "x %d y %d z" has 2 conversions but the length of
+  // the list is 3 because of the characters that appear after the
+  // last conversion.
+
+  octave_idx_type numel (void) const { return list.numel (); }
+
+  const textscan_format_elt *first (void)
+    {
+      curr_idx = 0;
+      return current ();
+    }
+
+  const textscan_format_elt *current (void) const
+    { return list.numel () > 0 ? list.elem (curr_idx) : 0; }
+
+  const textscan_format_elt *next (bool cycle = true)
+    {
+      curr_idx++;
+
+      if (curr_idx >= list.numel ())
+        {
+          if (cycle)
+            curr_idx = 0;
+          else
+            return 0;
+        }
+      return current ();
+    }
+
+  void printme (void) const;
+
+  bool ok (void) const { return (nconv >= 0); }
+
+  operator const void* (void) const { return ok () ? this : 0; }
+
+  bool set_from_first;  // true if number of %f to be set from data file
+  bool has_string;      // at least one conversion specifier is s,q,c, or [...]
+  int read_first_row (dstr& is, textscan& ts);
+
+  std::list<octave_value> out_buf (void) const { return (output_container); }
+
+private:
+
+  // Number of conversions specified by this format string, or -1 if
+  // invalid conversions have been found.
+  octave_idx_type nconv;
+
+  // Index to current element;
+  octave_idx_type curr_idx;
+
+  // FIXME -- maybe LIST should be a std::list object?
+  // List of format elements.
+  Array<textscan_format_elt*> list;
+
+  // list holding column arrays of types specified by conversions
+  std::list<octave_value > output_container;
+
+  // Temporary buffer.
+  std::ostringstream *buf;
+
+  void add_elt_to_list (unsigned int width, int prec, int bitwidth,
+                        octave_value val_type, bool discard,
+                        char type, octave_idx_type& num_elts,
+                        const std::string& char_class = std::string ());
+
+  void process_conversion (const std::string& s, size_t& i, size_t n,
+                           octave_idx_type& num_elts);
+
+  int finish_conversion (const std::string& s, size_t& i, size_t n,
+                         unsigned int& width, int& prec, int& bitwidth,
+                         octave_value& val_type,
+                         bool discard, char& type,
+                         octave_idx_type& num_elts);
+  // No copying!
+
+  textscan_format_list (const textscan_format_list&);
+
+  textscan_format_list& operator = (const textscan_format_list&);
+};
+
+/**
+ * Main class to implement textscan.
+ * Reads data and parses it according to a textscan_format_list.
+ * The calling sequence is
+ *   textscan ();
+ *   parse_options (...);
+ *   scan (...);
+ */
+class
+textscan
+{
+  friend class textscan_format_list;
+
+  std::string buf;
+  // Three cases for delim_table and delim_list
+  // 1. delim_table empty, delim_list empty:  whitespace delimiters
+  // 2. delim_table = look-up table of delim chars, delim_list empty.
+  // 3. delim_table non-empty, delim_list = Cell array of delim strings
+  std::string  whitespace_table;
+  std::string  delim_table;  // delim_table[i]=='\0' if i is not a delimiter,
+  std::string  delims;       // string of delimiter characters
+  Cell         comment_style;
+  int          comment_len;  // How far ahead to look to detect an open comment
+  int          comment_char; // first character of open comment
+  octave_idx_type buffer_size;
+
+  std::string  date_locale;
+
+  Cell         inf_nan;      // 'inf' and 'nan' for formatted_double
+  Cell         delim_list;   // Array of strings of delimiters
+  int          delim_len;    // Longest delimiter
+
+  octave_value empty_value;
+  std::string  exp_chars;
+  int          header_lines;
+  Cell         treat_as_empty;
+  int          treat_as_empty_len;      // longest string to treat as "N/A"
+  std::string  whitespace;
+  short        eol1, eol2;
+  short        return_on_error;
+
+  bool         collect_output;
+  bool         multiple_delims_as_one;
+
+  bool         default_exp;
+  bool         numeric_delim;
+
+  octave_idx_type lines;
+
+  int read_format_once (dstr &isp, textscan_format_list& fmt_list,
+                        std::list<octave_value> & retval,
+                        Array<octave_idx_type> row, int& done_after);
+
+  void scan_one (dstr& is, const textscan_format_elt& fmt,
+                          octave_value& ov, Array<octave_idx_type> row);
+
+  // Methods to process a particular conversion specifier
+  double read_double (dstr& is, const textscan_format_elt& fmt) const;
+  void scan_complex (dstr& is, const textscan_format_elt& fmt,
+                     Complex& val) const;
+
+  int scan_bracket (dstr& is, const char *pattern, std::string& val) const;
+  int scan_caret (dstr& is, const char *, std::string& val) const;
+  void scan_string (dstr& is, const textscan_format_elt& fmt,
+                             std::string& val) const;
+  void scan_cstring (dstr& is, const textscan_format_elt& fmt,
+                              std::string& val) const;
+  void scan_qstring (dstr& is, const textscan_format_elt& fmt,
+                              std::string& val);
+
+  // helper methods
+  std::string read_until (dstr& is, const Cell& delimiters,
+                          const std::string& ends) const;
+  int lookahead (dstr& is, const Cell& targets, int max_len,
+                 bool case_sensitive = true) const;
+
+  char *get_field    (dstr& isp, unsigned int width) const;
+  bool match_literal (dstr& isp, const textscan_format_elt& elem);
+  int  skip_whitespace (dstr& is, bool EOLstop = false);
+  int  skip_delim      (dstr& is);
+  bool is_delim (unsigned char ch) const
+    {
+      return (delim_table.length () == 0
+              && (isspace (ch) || ch == eol1 || ch == eol2))
+             || delim_table[ch] != '\0';
+    }
+  bool isspace (unsigned int ch) const { return whitespace_table[ch & 0xff]; }
+        // true if the only delimiter is whitespace
+  bool whitespace_delim (void) const { return delim_table.length () == 0; }
+
+  public:
+
+  textscan (void)
+    : buf (""), delim_table (""), delims (), comment_len (0), comment_char(-2),
+      buffer_size (0),
+      empty_value (octave_NaN), exp_chars ("edED"), header_lines (0),
+      treat_as_empty_len (0), whitespace (" \b\t"), eol1('\r'), eol2('\n'),
+      return_on_error (2), collect_output (false),
+      multiple_delims_as_one (false), default_exp (true),
+      numeric_delim (false), lines (0)
+    {
+      inf_nan = Cell (dim_vector (1,2));
+      inf_nan(0) = Cell (octave_value ("inf"));
+      inf_nan(1) = Cell (octave_value ("nan"));
+    };
+
+  octave_value scan (std::istream* isp, textscan_format_list& fmt_list,
+                     octave_idx_type ntimes);
+  void parse_options (const octave_value_list& args, int first_param,
+                         textscan_format_list& formats);
+};
+
+#endif
--- a/scripts/io/module.mk	Thu Mar 17 12:06:30 2016 -0700
+++ b/scripts/io/module.mk	Tue Mar 15 19:36:52 2016 +1100
@@ -9,7 +9,6 @@
   scripts/io/importdata.m \
   scripts/io/is_valid_file_id.m \
   scripts/io/strread.m \
-  scripts/io/textscan.m \
   scripts/io/textread.m
 
 scripts_iodir = $(fcnfiledir)/io
--- a/scripts/io/textscan.m	Thu Mar 17 12:06:30 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,708 +0,0 @@
-## Copyright (C) 2010-2014 Ben Abbott
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn  {} {@var{C} =} textscan (@var{fid}, @var{format})
-## @deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n})
-## @deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{})
-## @deftypefnx {} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}, @var{param}, @var{value}, @dots{})
-## @deftypefnx {} {@var{C} =} textscan (@var{str}, @dots{})
-## @deftypefnx {} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{})
-## Read data from a text file or string.
-##
-## The string @var{str} or file associated with @var{fid} is read from and
-## parsed according to @var{format}.  The function behaves like @code{strread}
-## except it can also read from file instead of a string.  See the documentation
-## of @code{strread} for details.
-##
-## In addition to the options supported by @code{strread}, this function
-## supports a few more:
-##
-## @itemize
-## @item @qcode{"collectoutput"}:
-## A value of 1 or true instructs textscan to concatenate consecutive columns
-## of the same class in the output cell array.  A value of 0 or false (default)
-## leaves output in distinct columns.
-##
-## @item @qcode{"endofline"}:
-## Specify @qcode{"@xbackslashchar{}r"}, @qcode{"@xbackslashchar{}n"} or
-## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"} (for CR, LF, or CRLF).  If no
-## value is given, it will be inferred from the file.  If set to "" (empty
-## string) EOLs are ignored as delimiters and added to whitespace.
-##
-## @item @qcode{"headerlines"}:
-## The first @var{value} number of lines of @var{fid} are skipped.
-##
-## @item @qcode{"returnonerror"}:
-## If set to numerical 1 or true (default), return normally when read errors
-## have been encountered.  If set to 0 or false, return an error and no data.
-## As the string or file is read by columns rather than by rows, and because
-## textscan is fairly forgiving as regards read errors, setting this option
-## may have little or no actual effect.
-## @end itemize
-##
-## When reading from a character string, optional input argument @var{n}
-## specifies the number of times @var{format} should be used (i.e., to limit
-## the amount of data read).
-## When reading from file, @var{n} specifies the number of data lines to read;
-## in this sense it differs slightly from the format repeat count in strread.
-##
-## The output @var{C} is a cell array whose second dimension is determined
-## by the number of format specifiers.
-##
-## The second output, @var{position}, provides the position, in characters,
-## from the beginning of the file.
-##
-## If the format string is empty (not: omitted) and the file contains only
-## numeric data (excluding headerlines), textscan will return data in a number
-## of columns matching the number of numeric fields on the first data line of
-## the file.
-##
-## @seealso{dlmread, fscanf, load, strread, textread}
-## @end deftypefn
-
-## Author: Ben Abbott <bpabbott@mac.com>
-
-function [C, position] = textscan (fid, format = "%f", varargin)
-
-  BUFLENGTH = 4096;               # Read buffer
-  emptfmt = 0;                    # Signals deliberately empty format string
-
-  ## Check input
-  if (nargin < 1)
-    print_usage ();
-  endif
-
-  if (isempty (format))
-    emptfmt = 1;
-    format = "%f";
-  endif
-
-  if (! ischar (format))
-    error ("textscan: FORMAT must be a string");
-  endif
-
-  ## Determine the number of data fields & initialize output array
-  num_fields = numel (regexp (format, '(%(\d*|\d*\.\d*)?[nfduscq]|%\[)', "match"));
-  if (! num_fields)
-    error ("textscan.m: no valid format conversion specifiers found\n");
-  endif
-  C = cell (1, num_fields);
-
-  if (! (isa (fid, "double") && fid > 0) && ! ischar (fid))
-    error ("textscan: first argument must be a file id or character string");
-  endif
-
-  args = varargin;
-  if (nargin > 2 && isnumeric (args{1}))
-    nlines = args{1};
-  else
-    nlines = Inf;
-  endif
-  if (nlines < 1)
-    printf ("textscan: N = 0, no data read\n");
-    return;  endif
-
-  if (! any (strcmpi (args, "emptyvalue")))
-    ## Matlab returns NaNs for missing values
-    args(end+1:end+2) = {'emptyvalue', NaN};
-  endif
-
-  ## Check default parameter values that differ for strread & textread
-
-  ipos = find (strcmpi (args, "whitespace"));
-  if (isempty (ipos))
-    ## Matlab default whitespace = " \b\t"
-    args(end+1:end+2) = {'whitespace', " \b\t"};
-    whitespace = " \b\t";
-  else
-    ## Check if there's at least one string format specifier
-    has_str_fmt = regexp (format, '%[*]?\d*s', "once");
-    ## If there is a string format AND whitespace value = empty,
-    ## don't add a space (char(32)) to whitespace
-    if (! (isempty (args{ipos+1}) && has_str_fmt))
-      args{ipos+1} = unique ([" ", args{ipos+1}]);
-    endif
-    whitespace = args{ipos+1};
-  endif
-
-  if (! any (strcmpi (args, "delimiter")))
-    ## Matlab says default delimiter = whitespace.
-    ## strread() will pick this up further
-    args(end+1:end+2) = {'delimiter', ""};
-    delimiter = "";
-  else
-    delimiter = args{find (strcmpi (args, "delimiter")) + 1};
-  endif
-
-  collop = false;
-  ipos = find (strcmpi (args, "collectoutput"));
-  if (! isempty (ipos))
-    ## Search & concatenate consecutive columns of same class requested
-    if (isscalar (args{ipos+1})
-        && (islogical (args{ipos+1}) || isnumeric (args{ipos+1})))
-      collop = args{ipos+1};
-    else
-      warning ("textscan: illegal value for CollectOutput parameter - ignored");
-    endif
-    ## Remove argument before call to strread() below
-    args(ipos:ipos+1) = [];
-  endif
-
-  if (any (strcmpi (args, "returnonerror")))
-    ## Because of the way strread() reads data (columnwise) this parameter
-    ## can't be neatly implemented.  strread() will pick it up anyway
-    warning ('textscan: ReturnOnError is not fully implemented');
-  else
-    ## Set default value (=true)
-    args(end+1:end+2) = {"returnonerror", 1};
-  endif
-
-  ## Check if a headerlines argument is specified
-  headerlines = find (strcmpi (args, "headerlines"), 1);
-  if (! isempty (headerlines))
-    ## Yep. But it is stray when reading from strings...
-    if (ischar (fid))
-      warning ("textscan: 'headerlines' ignored when reading from strings");
-    endif
-  endif
-
-  if (ischar (fid))
-    ## Read from a text string
-    if (nargout == 2)
-      error ("textscan: cannot provide position information for character input");
-    endif
-    str = fid;
-  else
-    st_pos = ftell (fid);
-    ## Skip header lines if requested
-    if (! isempty (headerlines))
-      ## Beware of missing or wrong headerline value
-      if (headerlines  == numel (args)
-         || ! isnumeric (args{headerlines + 1}))
-        error ("Missing or illegal value for 'headerlines'" );
-      endif
-      ## Avoid conveying floats to fskipl
-      args{headerlines + 1} = round (args{headerlines + 1});
-      if (args{headerlines + 1} > 0)
-        ## Beware of zero valued headerline, fskipl would skip to EOF
-        fskipl (fid, args{headerlines + 1});
-        st_pos = ftell (fid);
-      elseif (args{headerlines + 1} < 0)
-        warning ("textscan.m: negative headerline value ignored");
-      endif
-      args(headerlines:headerlines+1) = [];
-    endif
-    ## Read a first file chunk. Rest follows after endofline processing
-    [str, count] = fscanf (fid, "%c", BUFLENGTH);
-
-  endif
-
-  ## Check for empty result
-  if (isempty (str))
-    warning ("textscan: no data read");
-    return;
-  endif
-
-  ## Check value of 'endofline'.  String or file doesn't seem to matter
-  endofline = find (strcmpi (args, "endofline"), 1);
-  if (! isempty (endofline))
-    if (ischar (args{endofline + 1}))
-      eol_char = args{endofline + 1};
-      if (strcmp (typeinfo (eol_char), "sq_string"))
-        eol_char = do_string_escapes (eol_char);
-      endif
-      if (! any (strcmp (eol_char, {"", "\n", "\r", "\r\n"})))
-        error ("textscan: illegal EndOfLine character value specified");
-      endif
-    else
-      error ("textscan: character value required for EndOfLine");
-    endif
-  else
-    if (! ischar (fid))
-    ## Determine EOL from file.
-    ## Search for EOL candidates in the first BUFLENGTH chars
-    eol_srch_len = min (length (str), BUFLENGTH);
-    ## First try DOS (CRLF)
-    if (! isempty (strfind (str(1 : eol_srch_len), "\r\n")))
-      eol_char = "\r\n";
-    ## Perhaps old Macintosh? (CR)
-    elseif (! isempty (strfind (str(1 : eol_srch_len), "\r")))
-      eol_char = "\r";
-    ## Otherwise, use plain UNIX (LF)
-    else
-      eol_char = "\n";
-    endif
-    else
-      eol_char = "\n";
-    endif
-    ## Set up the default endofline param value
-    args(end+1:end+2) = {"endofline", eol_char};
-  endif
-
-  if (! ischar (fid))
-    ## Now that we know what EOL looks like, we can process format_repeat_count.
-    ## FIXME: below isn't ML-compatible: counts lines, not format string uses
-    if (isfinite (nlines) && (nlines >= 0))
-      l_eol_char = length (eol_char);
-      eoi = findstr (str, eol_char);
-      n_eoi = length (eoi);
-      nblks = 0;
-      ## Avoid slow repeated str concatenation, first seek requested end of data
-      while (n_eoi < nlines && count == BUFLENGTH)
-        [nstr, count] = fscanf (fid, "%c", BUFLENGTH);
-        if (count > 0)
-          ## Watch out for multichar EOL being missed across buffer boundaries
-          if (l_eol_char > 1)
-            str = [str(end - length (eol_char) + 2 : end) nstr];
-          else
-            str = nstr;
-          endif
-          eoi = findstr (str, eol_char);
-          n_eoi += numel (eoi);
-          ++nblks;
-        endif
-      endwhile
-      ## Handle case of missing trailing EOL
-      if (! strcmp (str(end - length (eol_char) + 1 : end), eol_char))
-        eoi = [ eoi (length (str)) ];
-        ++n_eoi;
-      endif
-      ## OK, found EOL delimiting last requested line. Compute ptr (incl. EOL)
-      if (isempty (eoi))
-        disp ("textscan: format repeat count specified but no endofline found");
-        data_size = nblks * BUFLENGTH + count;
-      else
-        ## Compute data size to read incl complete EOL
-        data_size = (nblks * BUFLENGTH) ...
-                    + eoi(end + min (nlines, n_eoi) - n_eoi) ...
-                    + l_eol_char - 1;
-      endif
-      fseek (fid, st_pos, "bof");
-      str = fscanf (fid, "%c", data_size);
-      args{1} = Inf;
-    else
-      fseek (fid, st_pos, "bof");
-      str = fread (fid, "char=>char").';
-    endif
-  endif
-
-  ## Strip trailing EOL to avoid returning stray missing values (f. strread).
-  ## However, in case of CollectOutput request, presence of EOL is required;
-  ## also in case of deliberately entered empty format string
-  eol_at_end = strcmp (str(end-length (eol_char) + 1 : end), eol_char);
-  if (collop || emptfmt)
-    if (! eol_at_end)
-      str(end+1 : end+length (eol_char)) = eol_char;
-    endif
-  elseif (eol_at_end)
-     str(end-length (eol_char) + 1 : end) = "";
-    ## A corner case: str may now be empty....
-    if (isempty (str)); return; endif
-   endif
-
-  ## Call strread to make it do the real work
-  C = cell (1, num_fields);
-  [C{:}] = strread (str, format, args{:});
-
-  ## I.c.o. empty format, match nr. of cols to nr. of fields on first read line
-  if (emptfmt)
-    ## Find end of first line
-    eoi = index (str, eol_char);
-    if (eoi)
-      ## str contains an EOL, proceed with assessing nr. of columns
-      ncols = countcols (C, str(1 : eoi-1), delimiter, whitespace);
-      ## See if lowermost data row must be completed
-      pad = mod (numel (C{1}), ncols);
-      if (pad)
-        ## Pad output with emptyvalues (rest has been done by stread.m)
-        emptv = find (strcmpi (args, "emptyvalue"));
-        if (isempty (emptv))
-          ## By default textscan returns NaNs for empty fields
-          C(1) = [C{1}; NaN(ncols - pad, 1)];
-        else
-          ## Otherwise return supplied emptyvalue. Pick last occurrence
-          C(1) = [C{1}; repmat(args{emptv(end)+1}, ncols - pad, 1)];
-        endif
-      endif
-      ## Compute nr. of rows
-      nrows = floor (numel (C{1}) / ncols);
-      ## Reshape C; watch out, transpose needed
-      C(1) = reshape (C{1}, ncols, numel (C{1}) / ncols).';
-      ## Distribute columns over C and wipe cols 2:end of C{1}
-      for ii=2:ncols
-        C(ii) = C{1}(:, ii);
-      endfor
-      C{1} = C{1}(:, 1);
-    endif
-  endif
-
-  ## If requested, collect output columns of same class
-  if (collop)
-    C = colloutp (C);
-  endif
-
-  if (nargout == 2)
-    ## Remember file position (persistent var)
-    position = ftell (fid);
-  endif
-
-endfunction
-
-
-## Assess nr of data fields on first line of data
-function ncols = countcols (C, str, dlm, wsp)
-
-  if (isempty (dlm))
-    ## Field separator = whitespace. Fold multiple whitespace into one
-    str = regexprep (str, sprintf ("[%s]", wsp), " ");
-    str = strtrim (str);
-    ncols = numel (strfind (str, " ")) + 1;
-  else
-    ncols = numel (regexp (str, sprintf ("[%s]", dlm))) + 1;
-  endif
-
-endfunction
-
-
-## Collect consecutive columns of same class into one cell column
-function C = colloutp (C)
-
-  ## Start at rightmost column and work backwards to avoid ptr mixup
-  ii = numel (C);
-  while (ii > 1)
-    clss1 = class (C{ii});
-    jj = ii;
-    while (jj > 1 && strcmp (clss1, class (C{jj - 1})))
-      ## Column to the left is still same class; check next column to the left
-      --jj;
-    endwhile
-    if (jj < ii)
-      ## Concatenate columns into current column
-      C{jj} = [C{jj : ii}];
-      ## Wipe concatenated columns to the right, resume search to the left
-      C(jj+1 : ii) = [];
-      ii = jj - 1;
-    else
-      ## No similar class in column to the left, search from there
-      --ii;
-    endif
-  endwhile
-
-endfunction
-
-
-%!test
-%! str = "1,  2,  3,  4\n 5,  ,  ,  8\n 9, 10, 11, 12";
-%! fmtstr = "%f %d %f %s";
-%! c = textscan (str, fmtstr, 2, "delimiter", ",", "emptyvalue", -Inf);
-%! assert (isequal (c{1}, [1;5]));
-%! assert (length (c{1}), 2);
-%! assert (iscellstr (c{4}));
-%! assert (isequal (c{3}, [3; -Inf]));
-
-%!test
-%! b = [10:10:100];
-%! b = [b; 8*b/5];
-%! str = sprintf ("%g miles/hr = %g kilometers/hr\n", b);
-%! fmt = "%f miles/hr = %f kilometers/hr";
-%! c = textscan (str, fmt);
-%! assert (b(1,:)', c{1}, 1e-5);
-%! assert (b(2,:)', c{2}, 1e-5);
-
-%!test
-%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6";
-%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//");
-%! assert (a{1}, int32 ([13; 36]));
-%! assert (a{2}, [72; NaN]);
-%! assert (a{3}, [NaN; 5]);
-%! assert (a{4}, {"str1"; "str3"});
-%! assert (a{5}, [25; 6]);
-
-%!test
-%! str = "Km:10 = hhhBjjj miles16hour\r\n";
-%! str = [str "Km:15 = hhhJjjj miles241hour\r\n"];
-%! str = [str "Km:2 = hhhRjjj miles3hour\r\n"];
-%! str = [str "Km:25 = hhhZ\r\n"];
-%! fmt = "Km:%d = hhh%1sjjj miles%dhour";
-%! a = textscan (str, fmt, "delimiter", " ");
-%! assert (a{1}', int32 ([10 15 2 25]));
-%! assert (a{2}', {'B' 'J' 'R' 'Z'});
-%! assert (a{3}', int32 ([16 241 3 0]));
-
-## Test with default endofline parameter
-%!test
-%! c = textscan ("L1\nL2", "%s");
-%! assert (c{:}, {"L1"; "L2"});
-
-## Test with endofline parameter set to "" (empty) - newline should be in word
-%!test
-%! c = textscan ("L1\nL2", "%s", "endofline", "");
-%! assert (int8 (c{:}{:}), int8 ([ 76,  49,  10,  76,  50 ]));
-
-%!test
-%! ## No delimiters at all besides EOL.  Skip fields, even empty fields
-%! str = "Text1Text2Text\nTextText4Text\nText57Text";
-%! c = textscan (str, "Text%*dText%dText");
-%! assert (c{1}, int32 ([2; 4; 0]));
-
-## CollectOutput test
-%!test
-%! b = [10:10:100];
-%! b = [b; 8*b/5; 8*b*1000/5];
-%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
-%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
-%! c = textscan (str, fmt, "collectoutput", 1);
-%! assert (size (c{3}), [10, 2]);
-%! assert (size (c{2}), [10, 2]);
-
-## CollectOutput test with uneven column length files
-%!test
-%! b = [10:10:100];
-%! b = [b; 8*b/5; 8*b*1000/5];
-%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
-%! str = [str "110 miles/hr"];
-%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
-%! c = textscan (str, fmt, "collectoutput", 1);
-%! assert (size (c{1}), [11, 1]);
-%! assert (size (c{3}), [11, 2]);
-%! assert (size (c{2}), [11, 2]);
-%! assert (c{3}(end), NaN);
-%! assert (c{2}{11, 1}, "/hr");
-%! assert (isempty (c{2}{11, 2}), true);
-
-## Test input validation
-%!error textscan ()
-%!error textscan (single (4))
-%!error textscan ({4})
-%!error <must be a string> textscan ("Hello World", 2)
-%!error <cannot provide position information> [C, pos] = textscan ("Hello World")
-%!error <character value required> textscan ("Hello World", '%s', 'EndOfLine', 3)
-
-%! Test incomplete first data line
-%! R = textscan (['Empty1' char(10)], 'Empty%d %f');
-%! assert (R{1}, int32 (1));
-%! assert (isempty (R{2}), true);
-
-## bug #37023 (actually a strread test)
-%!test
-%! data = textscan("   1. 1 \n 2 3\n", '%f %f');
-%! assert (data{1}, [1; 2], 1e-15);
-%! assert (data{2}, [1; 3], 1e-15);
-
-## Whitespace test (bug #37333) using delimiter ";"
-%!test
-%! tc = [];
-%! tc{1, 1} = "C:/code;";
-%! tc{1, end+1} = "C:/code/meas;";
-%! tc{1, end+1} = " C:/code/sim;";
-%! tc{1, end+1} = "C:/code/utils;";
-%! string = [tc{:}];
-%! c = textscan (string, "%s", "delimiter", ";");
-%! for k = 1:numel (c{1})
-%!   lh = c{1}{k};
-%!   rh = tc{k};
-%!   rh(rh == ";") = "";
-%!   rh = strtrim (rh);
-%!   assert (strcmp (lh, rh));
-%! end
-
-## Whitespace test (bug #37333), adding multipleDelimsAsOne true arg
-%!test
-%! tc = [];
-%! tc{1, 1} = "C:/code;";
-%! tc{1, end+1} = " C:/code/meas;";
-%! tc{1, end+1} = "C:/code/sim;;";
-%! tc{1, end+1} = "C:/code/utils;";
-%! string = [tc{:}];
-%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 1);
-%! for k = 1:numel (c{1})
-%!   lh = c{1}{k};
-%!   rh = tc{k};
-%!   rh(rh == ";") = "";
-%!   rh = strtrim (rh);
-%!   assert (strcmp (lh, rh));
-%! end
-
-## Whitespace test (bug #37333), adding multipleDelimsAsOne false arg
-%!test
-%! tc = [];
-%! tc{1, 1} = "C:/code;";
-%! tc{1, end+1} = " C:/code/meas;";
-%! tc{1, end+1} = "C:/code/sim;;";
-%! tc{1, end+1} = "";
-%! tc{1, end+1} = "C:/code/utils;";
-%! string = [tc{:}];
-%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 0);
-%! for k = 1:numel (c{1})
-%!   lh = c{1}{k};
-%!   rh = tc{k};
-%!   rh(rh == ";") = "";
-%!   rh = strtrim (rh);
-%!   assert (strcmp (lh, rh));
-%! end
-
-## Whitespace test (bug #37333) whitespace "" arg
-%!test
-%! tc = [];
-%! tc{1, 1} = "C:/code;";
-%! tc{1, end+1} = " C:/code/meas;";
-%! tc{1, end+1} = "C:/code/sim;";
-%! tc{1, end+1} = "C:/code/utils;";
-%! string = [tc{:}];
-%! c = textscan (string, "%s", "delimiter", ";", "whitespace", "");
-%! for k = 1:numel (c{1})
-%!   lh = c{1}{k};
-%!   rh = tc{k};
-%!   rh(rh == ";") = "";
-%!   assert (strcmp (lh, rh));
-%! end
-
-## Whitespace test (bug #37333), whitespace " " arg
-%!test
-%! tc = [];
-%! tc{1, 1} = "C:/code;";
-%! tc{1, end+1} = " C:/code/meas;";
-%! tc{1, end+1} = "C:/code/sim;";
-%! tc{1, end+1} = "C:/code/utils;";
-%! string = [tc{:}];
-%! c = textscan (string, "%s", "delimiter", ";", "whitespace", " ");
-%! for k = 1:numel (c{1})
-%!   lh = c{1}{k};
-%!   rh = tc{k};
-%!   rh(rh == ";") = "";
-%!   rh = strtrim (rh);
-%!   assert (strcmp (lh, rh));
-%! end
-
-## Test reading from a real file
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! d = rand (1, 4);
-%! fprintf (fid, "  %f %f   %f  %f ", d);
-%! fseek (fid, 0, "bof");
-%! A = textscan (fid, "%f %f");
-%! fclose (fid);
-%! unlink (f);
-%! assert (A{1}, [d(1); d(3)], 1e-6);
-%! assert (A{2}, [d(2); d(4)], 1e-6);
-
-## Tests reading with empty format, should return proper nr of columns
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! fprintf (fid, " 1 2 3 4\n5 6 7 8");
-%! fseek (fid, 0, "bof");
-%! A = textscan (fid, "");
-%! fclose (fid);
-%! unlink (f);
-%! assert (A{1}, [1 ; 5], 1e-6);
-%! assert (A{2}, [2 ; 6], 1e-6);
-%! assert (A{3}, [3 ; 7], 1e-6);
-%! assert (A{4}, [4 ; 8], 1e-6);
-
-## Tests reading with empty format; empty fields & incomplete lower row
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! fprintf (fid, " ,2,,4\n5,6");
-%! fseek (fid, 0, "bof");
-%! A = textscan (fid, "", "delimiter", ",", "EmptyValue", 999, "CollectOutput" , 1);
-%! fclose (fid);
-%! unlink (f);
-%! assert (A{1}, [999, 2, 999, 4; 5, 6, 999, 999], 1e-6);
-
-## Error message tests
-
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! msg1 = "Missing or illegal value for 'headerlines'";
-%! try
-%! A = textscan (fid, "", "headerlines");
-%! end_try_catch;
-%! fclose (fid);
-%! unlink (f);
-%! assert (msg1, lasterr);
-
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! msg1 = "Missing or illegal value for 'headerlines'";
-%! try
-%! A = textscan (fid, "", "headerlines", "hh");
-%! end_try_catch;
-%! fclose (fid);
-%! unlink (f);
-%! assert (msg1, lasterr);
-
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! fprintf (fid,"some_string");
-%! fseek (fid, 0, "bof");
-%! msg1 = "textscan: illegal EndOfLine character value specified";
-%! try
-%! A = textscan (fid, "%f", "EndOfLine", "\n\r");
-%! end_try_catch;
-%! fclose (fid);
-%! unlink (f);
-%! assert (msg1, lasterr);
-
-%!test
-%! f = tempname ();
-%! fid = fopen (f, "w+");
-%! fprintf (fid,"some_string");
-%! fseek (fid, 0, "bof");
-%! msg1 = "textscan: character value required for EndOfLine";
-%! try
-%! A = textscan (fid, "%f", "EndOfLine", 33);
-%! end_try_catch;
-%! fclose (fid);
-%! unlink (f);
-%! assert (msg1, lasterr);
-
-## Bug #41824
-%!test
-%! assert (textscan ("123", "", "whitespace", " "){:}, 123);
-
-## Bug #42343-1, just test supplied emptyvalue (actually done by strread.m)
-%!test
-%! assert (textscan (",NaN", "", "delimiter", "," ,"emptyValue" ,Inf), {Inf, NaN});
-
-## Bug #42343-2, test padding with supplied emptyvalue (done by textscan.m)
-%!test
-%! a = textscan (",1,,4\nInf,  ,NaN", "", "delimiter", ",", "emptyvalue", -10);
-%! assert (cell2mat (a), [-10, 1, -10, 4; Inf, -10, NaN, -10]);
-
-## Bug #42528
-%!test
-%! assert (textscan ("1i", ""){1},  0+1i);
-%! assert (cell2mat (textscan ("3, 2-4i, NaN\n -i, 1, 23.4+2.2i", "")), [3+0i, 2-4i, NaN+0i; 0-i,  1+0i, 23.4+2.2i]);
-
-## Illegal format specifiers
-%!test
-%!error <no valid format conversion specifiers> textscan ("1.0", "%z");
-
-## Properly process single-quoted EOL args (bug #46477)
-%!test
-%! C = textscan ("hello, world!", "%s%s", "endofline", '\n');
-%! assert (C{1}{1}, "hello,");
-%! assert (C{2}{1}, "world!");
-