Mercurial > octave
diff liboctave/util/oct-string.cc @ 30822:0826c503f294
Encoding facet based on gnulib uniconv for STL iostreams (bug #61839).
* liboctave/util/oct-string.h, liboctave/util/oct-string.cc (codecvt_u8): Add
encoding facet based on gnulib uniconv for STL iostreams.
* liboctave/wrappers/uniconv-wrappers.h, liboctave/wrappers/uniconv-wrappers.c
(octave_u8_conv_from_encoding_offsets, octave_u8_conv_to_encoding_offsets): Add
new wrappers.
* libinterp/corefcn/oct-stream.h, libinterp/corefcn/oct-stream.cc
(octave::stream): Wrap output stream with encoding facet.
* libinterp/corefcn/utils.h, libinterp/corefcn/utils.cc (format, vformat):
Remove functions that are no longer needed.
* libinterp/corefcn/file-io.cc: Add BIST.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 05 Mar 2022 21:20:58 +0100 |
parents | 796f54d4ddbf |
children | 79edd49a5a97 |
line wrap: on
line diff
--- a/liboctave/util/oct-string.cc Sun Mar 06 18:20:05 2022 -0800 +++ b/liboctave/util/oct-string.cc Sat Mar 05 21:20:58 2022 +0100 @@ -607,6 +607,90 @@ return num_replacements; } +typedef octave::string::codecvt_u8::InternT InternT; +typedef octave::string::codecvt_u8::ExternT ExternT; +typedef octave::string::codecvt_u8::StateT StateT; + +typename std::codecvt<InternT, ExternT, StateT>::result +octave::string::codecvt_u8::do_out + (StateT& /* state */, + const InternT* from, const InternT* from_end, const InternT*& from_next, + ExternT* to, ExternT* to_end, ExternT*& to_next) const +{ + if (from_end < from) + return std::codecvt<InternT, ExternT, StateT>::noconv; + + // Convert from UTF-8 to output encoding + std::size_t srclen = (from_end-from) * sizeof (InternT); + std::size_t lengthp = (to_end-to) * sizeof (ExternT); + const uint8_t *u8_str = reinterpret_cast<const uint8_t *> (from); + char *enc_str = octave_u8_conv_to_encoding (m_enc.c_str (), u8_str, srclen, + &lengthp); + + size_t max = to_end - to; + if (lengthp < max) + max = lengthp; + + // copy conversion result to output + // FIXME: Handle incomplete UTF-8 characters at end of buffer. + std::copy_n (enc_str, max, to); + ::free (enc_str); + + from_next = from + srclen; + to_next = to + max; + + return std::codecvt<InternT, ExternT, StateT>::ok; +} + +typename std::codecvt<InternT, ExternT, StateT>::result +octave::string::codecvt_u8::do_in + (StateT& /* state */, + const ExternT* from, const ExternT* from_end, const ExternT*& from_next, + InternT* to, InternT* to_end, InternT*& to_next) const +{ + // Convert from input encoding to UTF-8 + std::size_t srclen = (from_end-from) * sizeof (ExternT); + std::size_t lengthp = (to_end-to) * sizeof (InternT); + const char *enc_str = reinterpret_cast<const char *> (from); + uint8_t *u8_str = octave_u8_conv_from_encoding (m_enc.c_str (), + enc_str, srclen, &lengthp); + + std::size_t max = to_end - to; + if (lengthp < max) + max = lengthp; + + // copy conversion result to output + std::copy_n (u8_str, max, to); + ::free (u8_str); + + from_next = from + srclen; + to_next = to + max; + + return std::codecvt<InternT, ExternT, StateT>::ok; +} + +int octave::string::codecvt_u8::do_length + (StateT& /* state */, const ExternT *src, const ExternT *end, + std::size_t max) const +{ + // return number of external characters that produce MAX internal ones + std::size_t srclen = end-src; + std::size_t offsets[srclen]; + std::size_t lengthp = max; + octave_u8_conv_from_encoding_offsets (m_enc.c_str (), src, srclen, offsets, + &lengthp); + std::size_t ext_char; + for (ext_char = 0; ext_char < srclen; ext_char++) + { + if (offsets[ext_char] != static_cast<size_t> (-1) + && offsets[ext_char] >= max) + break; + } + + return ext_char; +} + + template <typename T> std::string rational_approx (T val, int len)