diff liboctave/util/oct-string.h @ 30827:0826c503f294

Encoding facet based on gnulib uniconv for STL iostreams (bug #61839). * liboctave/util/oct-string.h, liboctave/util/oct-string.cc (codecvt_u8): Add encoding facet based on gnulib uniconv for STL iostreams. * liboctave/wrappers/uniconv-wrappers.h, liboctave/wrappers/uniconv-wrappers.c (octave_u8_conv_from_encoding_offsets, octave_u8_conv_to_encoding_offsets): Add new wrappers. * libinterp/corefcn/oct-stream.h, libinterp/corefcn/oct-stream.cc (octave::stream): Wrap output stream with encoding facet. * libinterp/corefcn/utils.h, libinterp/corefcn/utils.cc (format, vformat): Remove functions that are no longer needed. * libinterp/corefcn/file-io.cc: Add BIST.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 05 Mar 2022 21:20:58 +0100
parents 796f54d4ddbf
children
line wrap: on
line diff
--- a/liboctave/util/oct-string.h	Sun Mar 06 18:20:05 2022 -0800
+++ b/liboctave/util/oct-string.h	Sat Mar 05 21:20:58 2022 +0100
@@ -28,6 +28,8 @@
 
 #include "octave-config.h"
 
+#include <locale>
+
 #include "oct-cmplx.h"
 
 namespace octave
@@ -155,6 +157,94 @@
     extern OCTAVE_API unsigned int
     u8_validate (const std::string& who, std::string& in_string,
                  const u8_fallback_type type = U8_REPLACEMENT_CHAR);
+
+
+    template<class Facet>
+    struct
+    deletable_facet : Facet
+    {
+      template<class ...Args>
+      deletable_facet (Args&& ...args)
+      : Facet (std::forward<Args> (args)...)
+      { }
+
+      // destructor needs to be public
+      ~deletable_facet () {}
+    };
+
+    class
+    OCTAVE_API
+    codecvt_u8 : public std::codecvt<char, char, std::mbstate_t>
+    {
+    public:
+
+      // No copying!
+
+      codecvt_u8 (codecvt_u8 &) = delete;
+
+      codecvt_u8& operator = (codecvt_u8 &) = delete;
+
+      codecvt_u8 (const std::string &enc)
+      : m_enc (enc)
+      { }
+
+      virtual ~codecvt_u8 () { }
+
+      typedef char InternT;
+      typedef char ExternT;
+      typedef std::mbstate_t StateT;
+
+    private:
+
+      OCTAVE_API
+      typename std::codecvt<InternT, ExternT, StateT>::result
+      do_out (StateT& state,
+              const InternT* from, const InternT* from_end, const InternT*& from_next,
+              ExternT* to, ExternT* to_end, ExternT*& to_next) const;
+
+      OCTAVE_API
+      typename std::codecvt<InternT, ExternT, StateT>::result
+      do_in (StateT& state,
+             const ExternT* from, const ExternT* from_end, const ExternT*& from_next,
+             InternT* to, InternT* to_end, InternT*& to_next) const;
+
+      typename std::codecvt<InternT, ExternT, StateT>::result
+      do_unshift (StateT& /* state */, ExternT* to, ExternT* /* to_end */,
+                  ExternT*& to_next) const
+      {
+        // FIXME: What is the correct thing to unshift?
+        // Just reset?
+        to_next = to;
+
+        return std::codecvt<InternT, ExternT, StateT>::ok;
+      }
+
+      int do_encoding () const throw ()
+      {
+        // return 0 because UTF-8 encoding is variable length
+        return 0;
+      }
+
+      bool do_always_noconv () const throw ()
+      {
+        // return false to indicate non-identity conversion
+        return false;
+      }
+
+      OCTAVE_API int
+      do_length (StateT& state, const ExternT *src, const ExternT *end,
+                 std::size_t max) const;
+
+      int do_max_length() const throw ()
+      {
+        // For UTF-8, a maximum of 4 bytes are needed for one character.
+        return 4;
+      }
+
+      std::string m_enc;
+
+    };
+
   }
 }