diff libinterp/corefcn/jsondecode.cc @ 29977:0d67814eb038

jsondecode.cc: New option "makeValidName" to disable string validation. This cset is part of GSoC 2021. Octave structures are "more powerful" compared to Matlab. For example, the following works in Octave only struct ("image/png", "data") In Matlab the `/` is a prohibited character by `matlab.lang.makeValidName`. Avoiding any substitutions becomes necessary to preserve the original key-value-pairs of a JSON structs. One example is to read, process, and write Jupyter-notebook files. In Matlab, `matlab.lang.makeValidName` is always applied on the "key" field, while Octaves' `jsondecode` offers options such as `prefix` and `ReplacementStyle` to adjust those settings. The new options allows turning off the whole validation entirely during the JSON decoding process. * libinterp/corefcn/jsondecode.cc: New option "makeValidName" to disable string validation. * test/json/jsondecode_BIST.tst: new test case for new option, fix white space (tabs).
author Abdallah Elshamy <abdallah.k.elshamy@gmail.com>
date Tue, 17 Aug 2021 14:54:29 +0900
parents 7d6709900da7
children c7c69808356f
line wrap: on
line diff
--- a/libinterp/corefcn/jsondecode.cc	Tue Aug 17 00:13:44 2021 -0400
+++ b/libinterp/corefcn/jsondecode.cc	Tue Aug 17 14:54:29 2021 +0900
@@ -30,6 +30,7 @@
 #include "defun.h"
 #include "error.h"
 #include "errwarn.h"
+#include "oct-string.h"
 #include "ovl.h"
 #include "utils.h"
 
@@ -42,7 +43,7 @@
 
 octave_value
 decode (const rapidjson::Value& val,
-        const octave::make_valid_name_options& options);
+        const octave::make_valid_name_options* options);
 
 //! Decodes a numerical JSON value into a scalar number.
 //!
@@ -92,7 +93,7 @@
 
 octave_value
 decode_object (const rapidjson::Value& val,
-               const octave::make_valid_name_options& options)
+               const octave::make_valid_name_options* options)
 {
   octave_scalar_map retval;
 
@@ -101,7 +102,8 @@
     // Validator function "matlab.lang.makeValidName" to guarantee legitimate
     // variable name.
     std::string varname = pair.name.GetString ();
-    octave::make_valid_name (varname, options);
+    if (options != nullptr)
+      octave::make_valid_name (varname, *options);
     retval.assign (varname, decode (pair.value, options));
   }
 
@@ -184,7 +186,7 @@
 
 octave_value
 decode_string_and_mixed_array (const rapidjson::Value& val,
-                               const octave::make_valid_name_options& options)
+                               const octave::make_valid_name_options* options)
 {
   Cell retval (dim_vector (val.Size (), 1));
   octave_idx_type index = 0;
@@ -220,7 +222,7 @@
 
 octave_value
 decode_object_array (const rapidjson::Value& val,
-                     const octave::make_valid_name_options& options)
+                     const octave::make_valid_name_options* options)
 {
   Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value ();
   string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames ();
@@ -277,7 +279,7 @@
 
 octave_value
 decode_array_of_arrays (const rapidjson::Value& val,
-                        const octave::make_valid_name_options& options)
+                        const octave::make_valid_name_options* options)
 {
   // Some arrays should be decoded as NDArrays and others as cell arrays
   Cell cell = decode_string_and_mixed_array (val, options).cell_value ();
@@ -343,7 +345,7 @@
 
 octave_value
 decode_array (const rapidjson::Value& val,
-              const octave::make_valid_name_options& options)
+              const octave::make_valid_name_options* options)
 {
   // Handle empty arrays
   if (val.Empty ())
@@ -406,7 +408,7 @@
 
 octave_value
 decode (const rapidjson::Value& val,
-        const octave::make_valid_name_options& options)
+        const octave::make_valid_name_options* options)
 {
   if (val.IsBool ())
     return val.GetBool ();
@@ -433,6 +435,7 @@
 @deftypefn  {} {@var{object} =} jsondecode (@var{JSON_txt})
 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs})
 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx})
+@deftypefnx {} {@var{object} =} jsondecode (@dots{}, "makeValidName", @var{TF})
 
 Decode text that is formatted in JSON.
 
@@ -445,6 +448,10 @@
 @qcode{"Prefix"}, see
 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}.
 
+If the value of the option @qcode{\"makeValidName\"} is false then names
+will not be changed by @code{matlab.lang.makeValidName} and the
+@qcode{\"ReplacementStyle\"} and @qcode{\"Prefix\"} options will be ignored.
+
 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the
 original text as some names may be changed by @code{matlab.lang.makeValidName}.
 
@@ -504,6 +511,15 @@
 @end group
 
 @group
+jsondecode ('@{"nu#m#ber": 7, "s#tr#ing": "hi"@}', ...
+            'makeValidName', false)
+    @result{} scalar structure containing the fields:
+
+         nu#m#ber = 7
+         s#tr#ing = hi
+@end group
+
+@group
 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_')
     @result{} scalar structure containing the fields:
 
@@ -523,7 +539,29 @@
   if (! (nargin % 2))
     print_usage ();
 
-  make_valid_name_options options (args.slice (1, nargin - 1));
+  // Detect if the user wants to use makeValidName
+  bool use_makeValidName = true;
+  octave_value_list make_valid_name_params;
+  for (auto i = 1; i < nargin; i = i + 2)
+    {
+      std::string parameter = args(i).xstring_value ("jsondecode: "
+        "option argument must be a string");
+      if (string::strcmpi (parameter, "makeValidName"))
+        {
+          use_makeValidName = args(i + 1).xbool_value ("jsondecode: "
+            "'makeValidName' value must be a bool");
+        }
+      else
+        make_valid_name_params.append (args.slice(i, 2));
+    }
+
+  make_valid_name_options* options = nullptr;
+
+  if (use_makeValidName)
+    {
+      make_valid_name_options options_obj (make_valid_name_params);
+      options = &options_obj;
+    }
 
   if (! args(0).is_string ())
     error ("jsondecode: JSON_TXT must be a character string");