changeset 25620:467729037209

Strip BOM from UTF-8 encoded .m files (bug #52574). * input.cc: Strip byte order mark (BOM) from UTF-8 encoded .m files.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 14 Jul 2018 14:16:22 +0200
parents b3f6443f6b20
children 0fa7312e34bc
files libinterp/corefcn/input.cc
diffstat 1 files changed, 7 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/input.cc	Fri Jul 13 21:46:50 2018 -0700
+++ b/libinterp/corefcn/input.cc	Sat Jul 14 14:16:22 2018 +0200
@@ -1115,7 +1115,13 @@
       = (mfile_encoding.compare ("system") == 0
          ? octave_locale_charset_wrapper () : mfile_encoding);
 
-    if (encoding.compare ("utf-8") != 0)
+    if (encoding.compare ("utf-8") == 0)
+    {
+      // Check for BOM and strip it
+      if (src_str.compare (0, 3, "\xef\xbb\xbf") == 0)
+        src_str.erase (0, 3);
+    }
+    else
     {
       // convert encoding to UTF-8 before returning string
       const char *src = src_str.c_str ();