changeset 14210:238e499c5fea

locale support implementation in [fs]scanf functions Implement the ability to scan "1,2" and get 1.2 as results under French locale, where the decimal separator is ','. This uses the C++ standard library locale approach: the interpretation of some stream locale is modified using the "imbue" function. In case of fscanf, the previous locale is restored after each call. The functions interface is not modified. When the last argument is a string, its value is used as the new locale setting. * src/oct-stream.h (octave_base_stream::imbue): new function, which merely ignores its argument. (octave_stream::imbue): add a real implementation, which is a proxy to std::[io]stream.imbue. * src/file_io.cc: (fscanf): when the last argument is a string, save the actual locale value and set a new value from this string. Generate a warning in case the requested locale is not supported. Revert to previous locale if OK, or to standard locale in case of problem. (scanf): Likewise, except the stream is generated on-the-fly, there is no need to restore its locale value. Added a test case. Updated the doc for both functions.
author Pascal Dupuis <Pascal.Dupuis@worldonline.be>
date Wed, 18 Jan 2012 16:40:29 -0500
parents 67a0ab9f8283
children 3f4cae8cb9fc
files src/file-io.cc src/oct-stream.h
diffstat 2 files changed, 73 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/file-io.cc	Thu Jan 12 12:54:24 2012 -0800
+++ b/src/file-io.cc	Wed Jan 18 16:40:29 2012 -0500
@@ -43,7 +43,9 @@
 #include <cstdio>
 
 #include <iostream>
+#include <locale>
 #include <stack>
+#include <stdexcept>
 #include <vector>
 
 #include <fcntl.h>
@@ -1070,7 +1072,7 @@
 DEFUN (fscanf, args, ,
   "-*- texinfo -*-\n\
 @deftypefn  {Built-in Function} {[@var{val}, @var{count}, @var{errmsg}] =} fscanf (@var{fid}, @var{template}, @var{size})\n\
-@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}, @var{errmsg}] =} fscanf (@var{fid}, @var{template}, \"C\")\n\
++@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}] =} fscanf (@var{fid}, @var{template}, @var{locale})\n\
 In the first form, read from @var{fid} according to @var{template},\n\
 returning the result in the matrix @var{val}.\n\
 \n\
@@ -1109,7 +1111,10 @@
 with each conversion specifier in @var{template} corresponding to a\n\
 single scalar return value.  This form is more `C-like', and also\n\
 compatible with previous versions of Octave.  The number of successful\n\
-conversions is returned in @var{count}\n\
+conversions is returned in @var{count}. It permits to explicitly\n\
+specify a locale to take into account langage specific features, \n\
+such as decimal separator. This operation restores the previous locales\n\
+setting at the end of the conversion.\n\
 @ifclear OCTAVE_MANUAL\n\
 \n\
 See the Formatted Input section of the GNU Octave manual for a\n\
@@ -1131,7 +1136,25 @@
       if (! error_state)
         {
           if (args(1).is_string ())
-            retval = os.oscanf (args(1), who);
+            {
+              std::locale oldloc;
+              try
+                {
+                  // Use args(2) val as the new locale setting. Keep
+                  // old val for restoring afterwards.
+                  oldloc = 
+                    os.imbue (std::locale (args(2).string_value ().c_str ()));
+                  
+                }
+              catch (std::runtime_error)
+                {
+                  // Display a warning if the specified locale is unknown
+                  warning ("fscanf: invalid locale. Try `locale -a' for a list of supported values.");
+                  oldloc = std::locale::classic ();
+                }
+              retval = os.oscanf (args(1), who);
+              os.imbue (oldloc);
+            }
           else
             ::error ("%s: format TEMPLATE must be a string", who.c_str ());
         }
@@ -1199,7 +1222,7 @@
 DEFUN (sscanf, args, ,
   "-*- texinfo -*-\n\
 @deftypefn  {Built-in Function} {[@var{val}, @var{count}, @var{errmsg}, @var{pos}] =} sscanf (@var{string}, @var{template}, @var{size})\n\
-@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}, @var{errmsg}] =} sscanf (@var{string}, @var{template}, \"C\")\n\
+@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}] =} sscanf (@var{string}, @var{template}, @var{locale})\n\
 This is like @code{fscanf}, except that the characters are taken from the\n\
 string @var{string} instead of from a stream.  Reaching the end of the\n\
 string is treated as an end-of-file condition.  In addition to the values\n\
@@ -1224,8 +1247,22 @@
 
           if (os.is_valid ())
             {
-              if (args(1).is_string ())
-                retval = os.oscanf (args(1), who);
+              if (args(1).is_string ()) 
+                {
+                  // Use args(2) val as the new locale setting. As the os
+                  // object is short lived, we don't need to restore
+                  // locale afterwards.
+                  try
+                    {  
+                      os.imbue (std::locale (args(2).string_value ().c_str ()));
+                    }
+                  catch (std::runtime_error)
+                    {
+                      // Display a warning if the specified locale is unknown
+                      warning ("sscanf: invalid locale. Try `locale -a' for a list of supported values.");
+                    }
+                  retval = os.oscanf (args(1), who);
+                }              
               else
                 ::error ("%s: format TEMPLATE must be a string", who.c_str ());
             }
@@ -1293,10 +1330,16 @@
   return retval;
 }
 
+/*
+%!test
+%! assert(sscanf('1,2', '%f', 'C'), 1)
+%! assert(sscanf('1,2', '%f', 'fr_FR'), 1.2)
+*/
+
 DEFUN (scanf, args, nargout,
   "-*- texinfo -*-\n\
 @deftypefn  {Built-in Function} {[@var{val}, @var{count}, @var{errmsg}] =} scanf (@var{template}, @var{size})\n\
-@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}, @var{errmsg}]] =} scanf (@var{template}, \"C\")\n\
+@deftypefnx {Built-in Function} {[@var{v1}, @var{v2}, @dots{}, @var{count}]] =} scanf (@var{template}, @var{locale}))\n\
 This is equivalent to calling @code{fscanf} with @var{fid} = @code{stdin}.\n\
 \n\
 It is currently not useful to call @code{scanf} in interactive\n\
--- a/src/oct-stream.h	Thu Jan 12 12:54:24 2012 -0800
+++ b/src/oct-stream.h	Wed Jan 18 16:40:29 2012 -0500
@@ -371,6 +371,12 @@
 
   virtual std::ostream *output_stream (void) { return 0; }
 
+  // If the derived class is locale-aware, it must implement this function 
+  // in order to set a new locale. By default, this function avoids messing 
+  // with locales and ignores its input argument.
+  virtual std::locale imbue ( const std::locale & loc ) 
+    { return std::locale::classic (); }
+  
   // Return TRUE if this stream is open.
 
   bool is_open (void) const { return open_state; }
@@ -613,7 +619,23 @@
   {
     return rep ? rep->output_stream () : 0;
   }
-
+  
+  std::locale imbue (const std::locale & loc )
+    {
+      if (!rep) return std::locale::classic ();
+      
+      std::istream *is = rep->input_stream ();
+      std::ostream *os = rep->output_stream ();
+      
+      if (os) 
+        {
+          if (is)
+            (void) is->imbue (loc);
+          return os->imbue (loc);
+        }
+      return is ? is->imbue (loc) : std::locale::classic ();
+    }
+  
   void clearerr (void) { if (rep) rep->clearerr (); }
 
 private: