comparison libinterp/corefcn/jsondecode.cc @ 29977:0d67814eb038

jsondecode.cc: New option "makeValidName" to disable string validation. This cset is part of GSoC 2021. Octave structures are "more powerful" compared to Matlab. For example, the following works in Octave only struct ("image/png", "data") In Matlab the `/` is a prohibited character by `matlab.lang.makeValidName`. Avoiding any substitutions becomes necessary to preserve the original key-value-pairs of a JSON structs. One example is to read, process, and write Jupyter-notebook files. In Matlab, `matlab.lang.makeValidName` is always applied on the "key" field, while Octaves' `jsondecode` offers options such as `prefix` and `ReplacementStyle` to adjust those settings. The new options allows turning off the whole validation entirely during the JSON decoding process. * libinterp/corefcn/jsondecode.cc: New option "makeValidName" to disable string validation. * test/json/jsondecode_BIST.tst: new test case for new option, fix white space (tabs).
author Abdallah Elshamy <abdallah.k.elshamy@gmail.com>
date Tue, 17 Aug 2021 14:54:29 +0900
parents 7d6709900da7
children c7c69808356f
comparison
equal deleted inserted replaced
29976:c44c72cc68a0 29977:0d67814eb038
28 #endif 28 #endif
29 29
30 #include "defun.h" 30 #include "defun.h"
31 #include "error.h" 31 #include "error.h"
32 #include "errwarn.h" 32 #include "errwarn.h"
33 #include "oct-string.h"
33 #include "ovl.h" 34 #include "ovl.h"
34 #include "utils.h" 35 #include "utils.h"
35 36
36 #if defined (HAVE_RAPIDJSON) 37 #if defined (HAVE_RAPIDJSON)
37 # include <rapidjson/document.h> 38 # include <rapidjson/document.h>
40 41
41 #if defined (HAVE_RAPIDJSON) 42 #if defined (HAVE_RAPIDJSON)
42 43
43 octave_value 44 octave_value
44 decode (const rapidjson::Value& val, 45 decode (const rapidjson::Value& val,
45 const octave::make_valid_name_options& options); 46 const octave::make_valid_name_options* options);
46 47
47 //! Decodes a numerical JSON value into a scalar number. 48 //! Decodes a numerical JSON value into a scalar number.
48 //! 49 //!
49 //! @param val JSON value that is guaranteed to be a numerical value. 50 //! @param val JSON value that is guaranteed to be a numerical value.
50 //! 51 //!
90 //! octave_value struct = decode_object (d, octave_value_list ()); 91 //! octave_value struct = decode_object (d, octave_value_list ());
91 //! @endcode 92 //! @endcode
92 93
93 octave_value 94 octave_value
94 decode_object (const rapidjson::Value& val, 95 decode_object (const rapidjson::Value& val,
95 const octave::make_valid_name_options& options) 96 const octave::make_valid_name_options* options)
96 { 97 {
97 octave_scalar_map retval; 98 octave_scalar_map retval;
98 99
99 for (const auto& pair : val.GetObject ()) 100 for (const auto& pair : val.GetObject ())
100 { 101 {
101 // Validator function "matlab.lang.makeValidName" to guarantee legitimate 102 // Validator function "matlab.lang.makeValidName" to guarantee legitimate
102 // variable name. 103 // variable name.
103 std::string varname = pair.name.GetString (); 104 std::string varname = pair.name.GetString ();
104 octave::make_valid_name (varname, options); 105 if (options != nullptr)
106 octave::make_valid_name (varname, *options);
105 retval.assign (varname, decode (pair.value, options)); 107 retval.assign (varname, decode (pair.value, options));
106 } 108 }
107 109
108 return retval; 110 return retval;
109 } 111 }
182 //! octave_value cell = decode_string_and_mixed_array (d, octave_value_list ()); 184 //! octave_value cell = decode_string_and_mixed_array (d, octave_value_list ());
183 //! @endcode 185 //! @endcode
184 186
185 octave_value 187 octave_value
186 decode_string_and_mixed_array (const rapidjson::Value& val, 188 decode_string_and_mixed_array (const rapidjson::Value& val,
187 const octave::make_valid_name_options& options) 189 const octave::make_valid_name_options* options)
188 { 190 {
189 Cell retval (dim_vector (val.Size (), 1)); 191 Cell retval (dim_vector (val.Size (), 1));
190 octave_idx_type index = 0; 192 octave_idx_type index = 0;
191 for (const auto& elem : val.GetArray ()) 193 for (const auto& elem : val.GetArray ())
192 retval(index++) = decode (elem, options); 194 retval(index++) = decode (elem, options);
218 //! octave_value object_array = decode_object_array (d, octave_value_list ()); 220 //! octave_value object_array = decode_object_array (d, octave_value_list ());
219 //! @endcode 221 //! @endcode
220 222
221 octave_value 223 octave_value
222 decode_object_array (const rapidjson::Value& val, 224 decode_object_array (const rapidjson::Value& val,
223 const octave::make_valid_name_options& options) 225 const octave::make_valid_name_options* options)
224 { 226 {
225 Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value (); 227 Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value ();
226 string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames (); 228 string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames ();
227 229
228 bool same_field_names = true; 230 bool same_field_names = true;
275 //! octave_value cell = decode_array_of_arrays (d, octave_value_list ()); 277 //! octave_value cell = decode_array_of_arrays (d, octave_value_list ());
276 //! @endcode 278 //! @endcode
277 279
278 octave_value 280 octave_value
279 decode_array_of_arrays (const rapidjson::Value& val, 281 decode_array_of_arrays (const rapidjson::Value& val,
280 const octave::make_valid_name_options& options) 282 const octave::make_valid_name_options* options)
281 { 283 {
282 // Some arrays should be decoded as NDArrays and others as cell arrays 284 // Some arrays should be decoded as NDArrays and others as cell arrays
283 Cell cell = decode_string_and_mixed_array (val, options).cell_value (); 285 Cell cell = decode_string_and_mixed_array (val, options).cell_value ();
284 286
285 // Only arrays with sub-arrays of booleans and numericals will return NDArray 287 // Only arrays with sub-arrays of booleans and numericals will return NDArray
341 //! octave_value array = decode_array (d, octave_value_list ()); 343 //! octave_value array = decode_array (d, octave_value_list ());
342 //! @endcode 344 //! @endcode
343 345
344 octave_value 346 octave_value
345 decode_array (const rapidjson::Value& val, 347 decode_array (const rapidjson::Value& val,
346 const octave::make_valid_name_options& options) 348 const octave::make_valid_name_options* options)
347 { 349 {
348 // Handle empty arrays 350 // Handle empty arrays
349 if (val.Empty ()) 351 if (val.Empty ())
350 return NDArray (); 352 return NDArray ();
351 353
404 //! octave_value value = decode (d, octave_value_list ()); 406 //! octave_value value = decode (d, octave_value_list ());
405 //! @endcode 407 //! @endcode
406 408
407 octave_value 409 octave_value
408 decode (const rapidjson::Value& val, 410 decode (const rapidjson::Value& val,
409 const octave::make_valid_name_options& options) 411 const octave::make_valid_name_options* options)
410 { 412 {
411 if (val.IsBool ()) 413 if (val.IsBool ())
412 return val.GetBool (); 414 return val.GetBool ();
413 else if (val.IsNumber ()) 415 else if (val.IsNumber ())
414 return decode_number (val); 416 return decode_number (val);
431 DEFUN (jsondecode, args, , 433 DEFUN (jsondecode, args, ,
432 doc: /* -*- texinfo -*- 434 doc: /* -*- texinfo -*-
433 @deftypefn {} {@var{object} =} jsondecode (@var{JSON_txt}) 435 @deftypefn {} {@var{object} =} jsondecode (@var{JSON_txt})
434 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs}) 436 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs})
435 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx}) 437 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx})
438 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "makeValidName", @var{TF})
436 439
437 Decode text that is formatted in JSON. 440 Decode text that is formatted in JSON.
438 441
439 The input @var{JSON_txt} is a string that contains JSON text. 442 The input @var{JSON_txt} is a string that contains JSON text.
440 443
442 decoding @var{JSON_txt}. 445 decoding @var{JSON_txt}.
443 446
444 For more information about the options @qcode{"ReplacementStyle"} and 447 For more information about the options @qcode{"ReplacementStyle"} and
445 @qcode{"Prefix"}, see 448 @qcode{"Prefix"}, see
446 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}. 449 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}.
450
451 If the value of the option @qcode{\"makeValidName\"} is false then names
452 will not be changed by @code{matlab.lang.makeValidName} and the
453 @qcode{\"ReplacementStyle\"} and @qcode{\"Prefix\"} options will be ignored.
447 454
448 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the 455 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the
449 original text as some names may be changed by @code{matlab.lang.makeValidName}. 456 original text as some names may be changed by @code{matlab.lang.makeValidName}.
450 457
451 This table shows the conversions from JSON data types to Octave data types: 458 This table shows the conversions from JSON data types to Octave data types:
502 number = 7 509 number = 7
503 string = hi 510 string = hi
504 @end group 511 @end group
505 512
506 @group 513 @group
514 jsondecode ('@{"nu#m#ber": 7, "s#tr#ing": "hi"@}', ...
515 'makeValidName', false)
516 @result{} scalar structure containing the fields:
517
518 nu#m#ber = 7
519 s#tr#ing = hi
520 @end group
521
522 @group
507 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_') 523 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_')
508 @result{} scalar structure containing the fields: 524 @result{} scalar structure containing the fields:
509 525
510 m_1 = one 526 m_1 = one
511 m_2 = two 527 m_2 = two
521 537
522 // makeValidName options are pairs, the number of arguments must be odd. 538 // makeValidName options are pairs, the number of arguments must be odd.
523 if (! (nargin % 2)) 539 if (! (nargin % 2))
524 print_usage (); 540 print_usage ();
525 541
526 make_valid_name_options options (args.slice (1, nargin - 1)); 542 // Detect if the user wants to use makeValidName
543 bool use_makeValidName = true;
544 octave_value_list make_valid_name_params;
545 for (auto i = 1; i < nargin; i = i + 2)
546 {
547 std::string parameter = args(i).xstring_value ("jsondecode: "
548 "option argument must be a string");
549 if (string::strcmpi (parameter, "makeValidName"))
550 {
551 use_makeValidName = args(i + 1).xbool_value ("jsondecode: "
552 "'makeValidName' value must be a bool");
553 }
554 else
555 make_valid_name_params.append (args.slice(i, 2));
556 }
557
558 make_valid_name_options* options = nullptr;
559
560 if (use_makeValidName)
561 {
562 make_valid_name_options options_obj (make_valid_name_params);
563 options = &options_obj;
564 }
527 565
528 if (! args(0).is_string ()) 566 if (! args(0).is_string ())
529 error ("jsondecode: JSON_TXT must be a character string"); 567 error ("jsondecode: JSON_TXT must be a character string");
530 568
531 std::string json = args(0).string_value (); 569 std::string json = args(0).string_value ();