Mercurial > octave
diff libinterp/corefcn/oct-stream.cc @ 26706:ccea3574f36b
Support encoding of file streams in textscan (bug #55452).
* oct-stream.cc (do_textscan): Pass encoding in constructor for textscan object.
(textscan): Store encoding in object. Convert strings from encoding.
* file-io.cc (textscan): Add BIST.
* io.tst (fopen): Use code page identifier that better works cross-platform.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 09 Feb 2019 20:05:47 +0100 |
parents | c13143821eef |
children | 0a62d9a6aa2d |
line wrap: on
line diff
--- a/libinterp/corefcn/oct-stream.cc Sat Feb 09 17:41:48 2019 +0100 +++ b/libinterp/corefcn/oct-stream.cc Sat Feb 09 20:05:47 2019 +0100 @@ -1775,7 +1775,8 @@ { public: - textscan (const std::string& who_arg = "textscan"); + textscan (const std::string& who_arg = "textscan", + const std::string& encoding = "utf-8"); // No copying! @@ -1797,6 +1798,8 @@ // What function name should be shown when reporting errors. std::string who; + std::string m_encoding; + std::string buf; // Three cases for delim_table and delim_list @@ -2506,13 +2509,13 @@ return retval; // May have returned 4 above. } - textscan::textscan (const std::string& who_arg) - : who (who_arg), buf (), whitespace_table (), delim_table (), - delims (), comment_style (), comment_len (0), comment_char (-2), - buffer_size (0), date_locale (), inf_nan (init_inf_nan ()), - empty_value (numeric_limits<double>::NaN ()), exp_chars ("edED"), - header_lines (0), treat_as_empty (), treat_as_empty_len (0), - whitespace (" \b\t"), eol1 ('\r'), eol2 ('\n'), + textscan::textscan (const std::string& who_arg, const std::string& encoding) + : who (who_arg), m_encoding (encoding), buf (), whitespace_table (), + delim_table (), delims (), comment_style (), comment_len (0), + comment_char (-2), buffer_size (0), date_locale (), + inf_nan (init_inf_nan ()), empty_value (numeric_limits<double>::NaN ()), + exp_chars ("edED"), header_lines (0), treat_as_empty (), + treat_as_empty_len (0), whitespace (" \b\t"), eol1 ('\r'), eol2 ('\n'), return_on_error (1), collect_output (false), multiple_delims_as_one (false), default_exp (true), lines (0) { } @@ -3148,6 +3151,10 @@ ends[i++] = eol2; val = textscan::read_until (is, delim_list, ends); } + + // convert from codepage + if (m_encoding.compare ("utf-8")) + val = string::u8_from_encoding ("textscan", val, m_encoding); } // Return in VAL the run of characters from IS contained in PATTERN. @@ -3195,6 +3202,10 @@ is.get_undelim (); } } + + // convert from codepage + if (m_encoding.compare ("utf-8")) + val = string::u8_from_encoding ("textscan", val, m_encoding); } // Read from IS into VAL a string of the next fmt.width characters, @@ -3217,6 +3228,10 @@ break; } } + + // convert from codepage + if (m_encoding.compare ("utf-8")) + val = string::u8_from_encoding ("textscan", val, m_encoding); } // Read a single '%...' conversion and place it in position ROW of OV. @@ -5309,7 +5324,7 @@ invalid_operation (who, "reading"); else { - textscan scanner (who); + textscan scanner (who, encoding ()); retval = scanner.scan (*isp, fmt, ntimes, options, read_count); }