changeset 29977:0d67814eb038

jsondecode.cc: New option "makeValidName" to disable string validation. This cset is part of GSoC 2021. Octave structures are "more powerful" compared to Matlab. For example, the following works in Octave only struct ("image/png", "data") In Matlab the `/` is a prohibited character by `matlab.lang.makeValidName`. Avoiding any substitutions becomes necessary to preserve the original key-value-pairs of a JSON structs. One example is to read, process, and write Jupyter-notebook files. In Matlab, `matlab.lang.makeValidName` is always applied on the "key" field, while Octaves' `jsondecode` offers options such as `prefix` and `ReplacementStyle` to adjust those settings. The new options allows turning off the whole validation entirely during the JSON decoding process. * libinterp/corefcn/jsondecode.cc: New option "makeValidName" to disable string validation. * test/json/jsondecode_BIST.tst: new test case for new option, fix white space (tabs).
author Abdallah Elshamy <abdallah.k.elshamy@gmail.com>
date Tue, 17 Aug 2021 14:54:29 +0900
parents c44c72cc68a0
children c7c69808356f
files libinterp/corefcn/jsondecode.cc test/json/jsondecode_BIST.tst
diffstat 2 files changed, 78 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/jsondecode.cc	Tue Aug 17 00:13:44 2021 -0400
+++ b/libinterp/corefcn/jsondecode.cc	Tue Aug 17 14:54:29 2021 +0900
@@ -30,6 +30,7 @@
 #include "defun.h"
 #include "error.h"
 #include "errwarn.h"
+#include "oct-string.h"
 #include "ovl.h"
 #include "utils.h"
 
@@ -42,7 +43,7 @@
 
 octave_value
 decode (const rapidjson::Value& val,
-        const octave::make_valid_name_options& options);
+        const octave::make_valid_name_options* options);
 
 //! Decodes a numerical JSON value into a scalar number.
 //!
@@ -92,7 +93,7 @@
 
 octave_value
 decode_object (const rapidjson::Value& val,
-               const octave::make_valid_name_options& options)
+               const octave::make_valid_name_options* options)
 {
   octave_scalar_map retval;
 
@@ -101,7 +102,8 @@
     // Validator function "matlab.lang.makeValidName" to guarantee legitimate
     // variable name.
     std::string varname = pair.name.GetString ();
-    octave::make_valid_name (varname, options);
+    if (options != nullptr)
+      octave::make_valid_name (varname, *options);
     retval.assign (varname, decode (pair.value, options));
   }
 
@@ -184,7 +186,7 @@
 
 octave_value
 decode_string_and_mixed_array (const rapidjson::Value& val,
-                               const octave::make_valid_name_options& options)
+                               const octave::make_valid_name_options* options)
 {
   Cell retval (dim_vector (val.Size (), 1));
   octave_idx_type index = 0;
@@ -220,7 +222,7 @@
 
 octave_value
 decode_object_array (const rapidjson::Value& val,
-                     const octave::make_valid_name_options& options)
+                     const octave::make_valid_name_options* options)
 {
   Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value ();
   string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames ();
@@ -277,7 +279,7 @@
 
 octave_value
 decode_array_of_arrays (const rapidjson::Value& val,
-                        const octave::make_valid_name_options& options)
+                        const octave::make_valid_name_options* options)
 {
   // Some arrays should be decoded as NDArrays and others as cell arrays
   Cell cell = decode_string_and_mixed_array (val, options).cell_value ();
@@ -343,7 +345,7 @@
 
 octave_value
 decode_array (const rapidjson::Value& val,
-              const octave::make_valid_name_options& options)
+              const octave::make_valid_name_options* options)
 {
   // Handle empty arrays
   if (val.Empty ())
@@ -406,7 +408,7 @@
 
 octave_value
 decode (const rapidjson::Value& val,
-        const octave::make_valid_name_options& options)
+        const octave::make_valid_name_options* options)
 {
   if (val.IsBool ())
     return val.GetBool ();
@@ -433,6 +435,7 @@
 @deftypefn  {} {@var{object} =} jsondecode (@var{JSON_txt})
 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs})
 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx})
+@deftypefnx {} {@var{object} =} jsondecode (@dots{}, "makeValidName", @var{TF})
 
 Decode text that is formatted in JSON.
 
@@ -445,6 +448,10 @@
 @qcode{"Prefix"}, see
 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}.
 
+If the value of the option @qcode{\"makeValidName\"} is false then names
+will not be changed by @code{matlab.lang.makeValidName} and the
+@qcode{\"ReplacementStyle\"} and @qcode{\"Prefix\"} options will be ignored.
+
 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the
 original text as some names may be changed by @code{matlab.lang.makeValidName}.
 
@@ -504,6 +511,15 @@
 @end group
 
 @group
+jsondecode ('@{"nu#m#ber": 7, "s#tr#ing": "hi"@}', ...
+            'makeValidName', false)
+    @result{} scalar structure containing the fields:
+
+         nu#m#ber = 7
+         s#tr#ing = hi
+@end group
+
+@group
 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_')
     @result{} scalar structure containing the fields:
 
@@ -523,7 +539,29 @@
   if (! (nargin % 2))
     print_usage ();
 
-  make_valid_name_options options (args.slice (1, nargin - 1));
+  // Detect if the user wants to use makeValidName
+  bool use_makeValidName = true;
+  octave_value_list make_valid_name_params;
+  for (auto i = 1; i < nargin; i = i + 2)
+    {
+      std::string parameter = args(i).xstring_value ("jsondecode: "
+        "option argument must be a string");
+      if (string::strcmpi (parameter, "makeValidName"))
+        {
+          use_makeValidName = args(i + 1).xbool_value ("jsondecode: "
+            "'makeValidName' value must be a bool");
+        }
+      else
+        make_valid_name_params.append (args.slice(i, 2));
+    }
+
+  make_valid_name_options* options = nullptr;
+
+  if (use_makeValidName)
+    {
+      make_valid_name_options options_obj (make_valid_name_params);
+      options = &options_obj;
+    }
 
   if (! args(0).is_string ())
     error ("jsondecode: JSON_TXT must be a character string");
--- a/test/json/jsondecode_BIST.tst	Tue Aug 17 00:13:44 2021 -0400
+++ b/test/json/jsondecode_BIST.tst	Tue Aug 17 14:54:29 2021 +0900
@@ -200,21 +200,21 @@
 %! json = ['{' , ...
 %!     '"glossary": { ', ...
 %!         '"title": "example glossary",', ...
-%! 		'"GlossDiv": {', ...
+%!     '"GlossDiv": {', ...
 %!             '"title": "S",', ...
-%! 			'"GlossList": {', ...
+%!     '"GlossList": {', ...
 %!                 '"GlossEntry": {', ...
 %!                     '"ID": "SGML",', ...
-%! 					'"SortAs": "SGML",', ...
-%! 					'"GlossTerm": "Standard Generalized Markup Language",', ...
-%! 					'"Acronym": "SGML",', ...
-%! 					'"Abbrev": "ISO 8879:1986",', ...
-%! 					'"GlossDef": {', ...
+%!                     '"SortAs": "SGML",', ...
+%!                     '"GlossTerm": "Standard Generalized Markup Language",', ...
+%!                     '"Acronym": "SGML",', ...
+%!                     '"Abbrev": "ISO 8879:1986",', ...
+%!                     '"GlossDef": {', ...
 %!                         '"para": "A meta-markup language, ', ...
 %!                         'used to create markup languages such as DocBook.",', ...
-%! 						'"GlossSeeAlso": ["GML", "XML"]', ...
+%!                         '"GlossSeeAlso": ["GML", "XML"]', ...
 %!                     '},', ...
-%! 					'"GlossSee": "markup"', ...
+%!                     '"GlossSee": "markup"', ...
 %!                 '}', ...
 %!             '}', ...
 %!         '}', ...
@@ -517,7 +517,7 @@
 %! obs  = jsondecode (json);
 %! assert (isequaln (obs, exp));
 
-%%% Test 7: Check "ReplacementStyle" and "Prefix" options
+%%% Test 7: Check "ReplacementStyle", "Prefix", and "makeValidName" options
 
 %!testif HAVE_RAPIDJSON
 %! json = '{"1a": {"1*a": {"1+*/-a": {"1#a": {}}}}}';
@@ -554,3 +554,24 @@
 %!                                struct('x_1a', 3, 'b_2', 4)}});
 %! obs  = jsondecode (json, "ReplacementStyle", "underscore", "Prefix", "x_");
 %! assert (isequal (obs, exp));
+
+%% Check decoding of objects inside an object without using makeValidName
+%!testif HAVE_RAPIDJSON
+%! json = ['{"object": {"  hi 1   ": 1, "%string.array": 2,' ...
+%!                     '"img/svg+xml": 3, "": 1}}'];
+%! exp  = struct ('object', ...
+%!                struct ('  hi 1   ', 1, '%string.array', 2,
+%!                        'img/svg+xml', 3, '', 1));
+%! obs  = jsondecode (json, "makeValidName", false);
+%! assert (isequal (obs, exp));
+
+%!testif HAVE_RAPIDJSON
+%! json = '{"1a": {"1*a": {"1+*/-a": {"1#a": {}}}}}';
+%! exp  = struct ('n1a', ...
+%!                struct ('n1a', struct ('n1a', struct ('n1a', struct ()))));
+%! obs  = jsondecode (json, "ReplacementStyle", "delete", ...
+%!                          "makeValidName", false, ...
+%!                          "Prefix", "_", ...
+%!                          "makeValidName", true, ...
+%!                          "Prefix", "n");
+%! assert (isequal (obs, exp));