Mercurial > jwe > octave
changeset 21468:7d06afd924be
* textscan.h, textscan.cc: More style fixes.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Thu, 17 Mar 2016 18:04:22 -0400 |
parents | 3aa83b8ce94b |
children | 29e2523c99c2 |
files | libinterp/corefcn/textscan.cc libinterp/corefcn/textscan.h |
diffstat | 2 files changed, 222 insertions(+), 138 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/textscan.cc Thu Mar 17 17:46:56 2016 -0400 +++ b/libinterp/corefcn/textscan.cc Thu Mar 17 18:04:22 2016 -0400 @@ -1747,50 +1747,51 @@ // values in row ROW of retval. int -textscan::read_format_once (dstr& is, - textscan_format_list& fmt_list, +textscan::read_format_once (dstr& is, textscan_format_list& fmt_list, std::list<octave_value> & retval, Array<octave_idx_type> row, int& done_after) { const textscan_format_elt *elem = fmt_list.first (); - std::list<octave_value>::iterator out=retval.begin (); + std::list<octave_value>::iterator out = retval.begin (); bool no_conversions = true; bool done = false; - int i; bool conversion_failed = false; // Record for ReturnOnError - bool this_conversion_failed; // Record for ReturnOnError - - octave_quit (); // Allow ctrl-C - - for (i = 0; i < fmt_list.numel (); i++) + + octave_quit (); + + for (int i = 0; i < fmt_list.numel (); i++) { - this_conversion_failed = false; - is.clear (); // clear fail of previous numeric conversions + bool this_conversion_failed = false; + + // Clear fail of previous numeric conversions. + is.clear (); + switch (elem->type) { - case 'C': - case 'D': - std::cerr << "textscan: Conversion %" << elem->type - << " not yet implemented\n"; - break; - - case 'u': - case 'd': - case 'f': - case 'n': - case 's': - case '[': - case '^': - case 'q': - case 'c': - scan_one (is, *elem, *out, row); - break; - - case textscan_format_elt::literal_conversion : - match_literal (is, *elem); - break; - default: - error ("Unknown format element '%c'", elem->type); + case 'C': + case 'D': + std::cerr << "textscan: Conversion %" << elem->type + << " not yet implemented\n"; + break; + + case 'u': + case 'd': + case 'f': + case 'n': + case 's': + case '[': + case '^': + case 'q': + case 'c': + scan_one (is, *elem, *out, row); + break; + + case textscan_format_elt::literal_conversion : + match_literal (is, *elem); + break; + + default: + error ("Unknown format element '%c'", elem->type); } if (!is.fail ()) @@ -1802,6 +1803,7 @@ { if (return_on_error < 2) this_conversion_failed = true; + is.clear (is.rdstate () & ~std::ios::failbit); } @@ -1810,12 +1812,14 @@ elem = fmt_list.next (); char *pos = is.tellg (); + // FIXME -- these conversions "ignore delimiters". Should they include // delimiters at the start of the conversion, or can those be skipped? if (elem->type != textscan_format_elt::literal_conversion // && elem->type != '[' && elem->type != '^' && elem->type != 'c' ) skip_delim (is); + if (this_conversion_failed) { if (is.tellg () == pos && ! conversion_failed) @@ -1831,13 +1835,16 @@ { if (!done) done_after = i+1; - done = true; // note EOF, but process others to get empty_val + + // note EOF, but process others to get empty_val. + done = true; } } + if (done) is.setstate (std::ios::eofbit); - // returning -3 means "error, and no columns read this row + // Returning -3 means "error, and no columns read this row". if (is.eof ()) return (2 + no_conversions);
--- a/libinterp/corefcn/textscan.h Thu Mar 17 17:46:56 2016 -0400 +++ b/libinterp/corefcn/textscan.h Thu Mar 17 18:04:22 2016 -0400 @@ -26,9 +26,6 @@ #if ! defined (octave_textscan_h) #define octave_textscan_h 1 -// For Inf and NaN -#include "lo-ieee.h" - // Delimited stream, optimised to read strings of characters separated // by single-character delimiters. // @@ -52,9 +49,9 @@ dstr (std::istream& is, const dstr& ds); - ~dstr (); + ~dstr (void); - // Called when optimised sequence of get() is finished. Ensures that + // Called when optimised sequence of get is finished. Ensures that // there is a remaining delimiter in buf, or loads more data in. void field_done (void) { @@ -66,17 +63,17 @@ // Return EOF at end of file, 0 otherwise. int refresh_buf (void); - // get a character, relying on caller to call field_done () if + // Get a character, relying on caller to call field_done if // a delimiter has been reached. int get (void) { return delimited ? *idx++ : get_undelim (); } - // get a character, checking for underrun of the buffer + // Get a character, checking for underrun of the buffer. int get_undelim (void); - // Read character that will be got by the next get(). + // Read character that will be got by the next get. int peek (void) { return *idx; } - // Read character that will be got by the next get(). + // Read character that will be got by the next get. int peek_undelim (void); // Undo a 'get' or 'get_undelim'. It is the caller's responsibility @@ -86,54 +83,85 @@ void putback (char /*ch*/ = 0) { --idx; } int getline (std::string& dest, char delim); - //int skipline (char delim); + + // int skipline (char delim); + char *read (char *buffer, int size, char* &new_start); - // return a position suitable to "seekg", valid only within this - // block between calls to field_done (). + // Return a position suitable to "seekg", valid only within this + // block between calls to field_done. char *tellg (void) { return idx; } + void seekg (char *old_idx) { idx = old_idx; } bool eof (void) - { - return (eob == buf && i_stream.eof ()) || (flags & std::ios_base::eofbit); - } + { + return (eob == buf && i_stream.eof ()) || (flags & std::ios_base::eofbit); + } + operator const void* (void) { return (!eof () && !flags) ? this : 0; } bool fail (void) { return flags & std::ios_base::failbit; } + std::ios_base::iostate rdstate (void) { return flags; } + void setstate (std::ios_base::iostate m) { flags = flags | m; } + void clear (std::ios_base::iostate m - = (std::ios_base::eofbit & ~std::ios_base::eofbit)) - { flags = flags & m; } + = (std::ios_base::eofbit & ~std::ios_base::eofbit)) + { + flags = flags & m; + } // Report if any characters have been consumed. // (get, read etc. not cancelled by putback or seekg) + void progress_benchmark (void) { progress_marker = idx; } + bool no_progress (void) { return progress_marker == idx; } private: - int bufsize; // number of characters to read from the file at once - std::istream& i_stream; // stream to read from - char *buf; // temporary storage for a "chunk" of data - char *idx; // Current read pointer - char *last; // location of last delimiter in the buffer at buf - // (undefined if delimited is false) - char *eob; // Position after last character in buffer - bool delimited; // True if there is delimiter in the bufer after idx - int longest; // longest lookahead required - const std::string delims; // sequence of single-character delimiters + // Number of characters to read from the file at once. + int bufsize; + + // Stream to read from. + std::istream& i_stream; + + // Temporary storage for a "chunk" of data. + char *buf; + + // Current read pointer. + char *idx; + + // Location of last delimiter in the buffer at buf (undefined if + // delimited is false). + char *last; - std::streampos buf_in_file; // Position of start of buf in original stream + // Position after last character in buffer. + char *eob; + + // True if there is delimiter in the bufer after idx. + bool delimited; + + // Longest lookahead required. + int longest; - char *progress_marker; // Marker to see if a read consumes any characters + // Sequence of single-character delimiters. + const std::string delims; + + // Position of start of buf in original stream. + std::streampos buf_in_file; + + // Marker to see if a read consumes any characters. + char *progress_marker; std::ios_base::iostate flags; // No copying! dstr (const dstr&); + dstr& operator = (const dstr&); }; @@ -146,10 +174,10 @@ public: enum special_conversion - { - whitespace_conversion = 1, - literal_conversion = 2 - }; + { + whitespace_conversion = 1, + literal_conversion = 2 + }; textscan_format_elt (const char *txt = 0, int w = 0, int p = -1, int bw = 0, bool dis = false, char typ = '\0', @@ -157,30 +185,30 @@ : text (strsave (txt)), width (w), prec (p), bitwidth (bw), char_class (ch_class), type (typ), discard (dis), numeric(typ == 'd' || typ == 'u' || type == 'f' || type == 'n') - { } + { } textscan_format_elt (const textscan_format_elt& e) : text (strsave (e.text)), width (e.width), prec (e.prec), bitwidth (e.bitwidth), char_class (e.char_class), type (e.type), discard (e.discard), numeric (e.numeric) - { } + { } textscan_format_elt& operator = (const textscan_format_elt& e) - { - if (this != &e) - { - text = strsave (e.text); - width = e.width; - prec = e.prec; - bitwidth = e.bitwidth; - discard = e.discard; - type = e.type; - numeric = e.numeric; - char_class = e.char_class; - } + { + if (this != &e) + { + text = strsave (e.text); + width = e.width; + prec = e.prec; + bitwidth = e.bitwidth; + discard = e.discard; + type = e.type; + numeric = e.numeric; + char_class = e.char_class; + } - return *this; - } + return *this; + } ~textscan_format_elt (void) { delete [] text; } @@ -236,27 +264,30 @@ octave_idx_type numel (void) const { return list.numel (); } const textscan_format_elt *first (void) - { - curr_idx = 0; - return current (); - } + { + curr_idx = 0; + return current (); + } const textscan_format_elt *current (void) const - { return list.numel () > 0 ? list.elem (curr_idx) : 0; } + { + return list.numel () > 0 ? list.elem (curr_idx) : 0; + } const textscan_format_elt *next (bool cycle = true) - { - curr_idx++; + { + curr_idx++; - if (curr_idx >= list.numel ()) - { - if (cycle) - curr_idx = 0; - else - return 0; - } - return current (); - } + if (curr_idx >= list.numel ()) + { + if (cycle) + curr_idx = 0; + else + return 0; + } + + return current (); + } void printme (void) const; @@ -264,8 +295,12 @@ operator const void* (void) const { return ok () ? this : 0; } - bool set_from_first; // true if number of %f to be set from data file - bool has_string; // at least one conversion specifier is s,q,c, or [...] + // True if number of %f to be set from data file. + bool set_from_first; + + // At least one conversion specifier is s,q,c, or [...]. + bool has_string; + int read_first_row (dstr& is, textscan& ts); std::list<octave_value> out_buf (void) const { return (output_container); } @@ -338,48 +373,72 @@ inf_nan(1) = Cell (octave_value ("nan")); }; + ~textscan (void) { } + octave_value scan (std::istream* isp, textscan_format_list& fmt_list, octave_idx_type ntimes); + void parse_options (const octave_value_list& args, int first_param, - textscan_format_list& formats); + textscan_format_list& formats); private: friend class textscan_format_list; std::string buf; + // Three cases for delim_table and delim_list // 1. delim_table empty, delim_list empty: whitespace delimiters // 2. delim_table = look-up table of delim chars, delim_list empty. // 3. delim_table non-empty, delim_list = Cell array of delim strings - std::string whitespace_table; - std::string delim_table; // delim_table[i]=='\0' if i is not a delimiter, - std::string delims; // string of delimiter characters - Cell comment_style; - int comment_len; // How far ahead to look to detect an open comment - int comment_char; // first character of open comment + + std::string whitespace_table; + + // delim_table[i] == '\0' if i is not a delimiter. + std::string delim_table; + + // String of delimiter characters. + std::string delims; + + Cell comment_style; + + // How far ahead to look to detect an open comment. + int comment_len; + + // First character of open comment. + int comment_char; + octave_idx_type buffer_size; - std::string date_locale; + std::string date_locale; + + // 'inf' and 'nan' for formatted_double. + Cell inf_nan; - Cell inf_nan; // 'inf' and 'nan' for formatted_double - Cell delim_list; // Array of strings of delimiters - int delim_len; // Longest delimiter + // Array of strings of delimiters. + Cell delim_list; + + // Longest delimiter. + int delim_len; octave_value empty_value; - std::string exp_chars; - int header_lines; - Cell treat_as_empty; - int treat_as_empty_len; // longest string to treat as "N/A" - std::string whitespace; - short eol1, eol2; - short return_on_error; + std::string exp_chars; + int header_lines; + Cell treat_as_empty; + + // Longest string to treat as "N/A". + int treat_as_empty_len; + + std::string whitespace; - bool collect_output; - bool multiple_delims_as_one; + short eol1; + short eol2; + short return_on_error; - bool default_exp; - bool numeric_delim; + bool collect_output; + bool multiple_delims_as_one; + bool default_exp; + bool numeric_delim; octave_idx_type lines; @@ -390,39 +449,57 @@ void scan_one (dstr& is, const textscan_format_elt& fmt, octave_value& ov, Array<octave_idx_type> row); - // Methods to process a particular conversion specifier + // Methods to process a particular conversion specifier. double read_double (dstr& is, const textscan_format_elt& fmt) const; + void scan_complex (dstr& is, const textscan_format_elt& fmt, Complex& val) const; int scan_bracket (dstr& is, const char *pattern, std::string& val) const; + int scan_caret (dstr& is, const char *, std::string& val) const; + void scan_string (dstr& is, const textscan_format_elt& fmt, - std::string& val) const; + std::string& val) const; + void scan_cstring (dstr& is, const textscan_format_elt& fmt, - std::string& val) const; + std::string& val) const; + void scan_qstring (dstr& is, const textscan_format_elt& fmt, - std::string& val); + std::string& val); - // helper methods + // Helper methods. std::string read_until (dstr& is, const Cell& delimiters, const std::string& ends) const; + int lookahead (dstr& is, const Cell& targets, int max_len, bool case_sensitive = true) const; - char *get_field (dstr& isp, unsigned int width) const; + char *get_field (dstr& isp, unsigned int width) const; + bool match_literal (dstr& isp, const textscan_format_elt& elem); + int skip_whitespace (dstr& is, bool EOLstop = false); - int skip_delim (dstr& is); + + int skip_delim (dstr& is); + bool is_delim (unsigned char ch) const - { - return (delim_table.length () == 0 - && (isspace (ch) || ch == eol1 || ch == eol2)) - || delim_table[ch] != '\0'; - } + { + return ((delim_table.length () == 0 + && (isspace (ch) || ch == eol1 || ch == eol2)) + || delim_table[ch] != '\0'); + } + bool isspace (unsigned int ch) const { return whitespace_table[ch & 0xff]; } - // true if the only delimiter is whitespace + + // True if the only delimiter is whitespace. bool whitespace_delim (void) const { return delim_table.length () == 0; } + + // No copying! + + textscan (const textscan&); + + textscan& operator = (const textscan&); }; #endif