Mercurial > octave
comparison libinterp/corefcn/jsondecode.cc @ 29977:0d67814eb038
jsondecode.cc: New option "makeValidName" to disable string validation.
This cset is part of GSoC 2021. Octave structures are "more powerful"
compared to Matlab. For example, the following works in Octave only
struct ("image/png", "data")
In Matlab the `/` is a prohibited character by `matlab.lang.makeValidName`.
Avoiding any substitutions becomes necessary to preserve the original
key-value-pairs of a JSON structs. One example is to read, process, and write
Jupyter-notebook files.
In Matlab, `matlab.lang.makeValidName` is always applied on the "key" field,
while Octaves' `jsondecode` offers options such as `prefix` and
`ReplacementStyle` to adjust those settings. The new options allows turning
off the whole validation entirely during the JSON decoding process.
* libinterp/corefcn/jsondecode.cc: New option "makeValidName" to disable string
validation.
* test/json/jsondecode_BIST.tst: new test case for new option, fix white space
(tabs).
author | Abdallah Elshamy <abdallah.k.elshamy@gmail.com> |
---|---|
date | Tue, 17 Aug 2021 14:54:29 +0900 |
parents | 7d6709900da7 |
children | c7c69808356f |
comparison
equal
deleted
inserted
replaced
29976:c44c72cc68a0 | 29977:0d67814eb038 |
---|---|
28 #endif | 28 #endif |
29 | 29 |
30 #include "defun.h" | 30 #include "defun.h" |
31 #include "error.h" | 31 #include "error.h" |
32 #include "errwarn.h" | 32 #include "errwarn.h" |
33 #include "oct-string.h" | |
33 #include "ovl.h" | 34 #include "ovl.h" |
34 #include "utils.h" | 35 #include "utils.h" |
35 | 36 |
36 #if defined (HAVE_RAPIDJSON) | 37 #if defined (HAVE_RAPIDJSON) |
37 # include <rapidjson/document.h> | 38 # include <rapidjson/document.h> |
40 | 41 |
41 #if defined (HAVE_RAPIDJSON) | 42 #if defined (HAVE_RAPIDJSON) |
42 | 43 |
43 octave_value | 44 octave_value |
44 decode (const rapidjson::Value& val, | 45 decode (const rapidjson::Value& val, |
45 const octave::make_valid_name_options& options); | 46 const octave::make_valid_name_options* options); |
46 | 47 |
47 //! Decodes a numerical JSON value into a scalar number. | 48 //! Decodes a numerical JSON value into a scalar number. |
48 //! | 49 //! |
49 //! @param val JSON value that is guaranteed to be a numerical value. | 50 //! @param val JSON value that is guaranteed to be a numerical value. |
50 //! | 51 //! |
90 //! octave_value struct = decode_object (d, octave_value_list ()); | 91 //! octave_value struct = decode_object (d, octave_value_list ()); |
91 //! @endcode | 92 //! @endcode |
92 | 93 |
93 octave_value | 94 octave_value |
94 decode_object (const rapidjson::Value& val, | 95 decode_object (const rapidjson::Value& val, |
95 const octave::make_valid_name_options& options) | 96 const octave::make_valid_name_options* options) |
96 { | 97 { |
97 octave_scalar_map retval; | 98 octave_scalar_map retval; |
98 | 99 |
99 for (const auto& pair : val.GetObject ()) | 100 for (const auto& pair : val.GetObject ()) |
100 { | 101 { |
101 // Validator function "matlab.lang.makeValidName" to guarantee legitimate | 102 // Validator function "matlab.lang.makeValidName" to guarantee legitimate |
102 // variable name. | 103 // variable name. |
103 std::string varname = pair.name.GetString (); | 104 std::string varname = pair.name.GetString (); |
104 octave::make_valid_name (varname, options); | 105 if (options != nullptr) |
106 octave::make_valid_name (varname, *options); | |
105 retval.assign (varname, decode (pair.value, options)); | 107 retval.assign (varname, decode (pair.value, options)); |
106 } | 108 } |
107 | 109 |
108 return retval; | 110 return retval; |
109 } | 111 } |
182 //! octave_value cell = decode_string_and_mixed_array (d, octave_value_list ()); | 184 //! octave_value cell = decode_string_and_mixed_array (d, octave_value_list ()); |
183 //! @endcode | 185 //! @endcode |
184 | 186 |
185 octave_value | 187 octave_value |
186 decode_string_and_mixed_array (const rapidjson::Value& val, | 188 decode_string_and_mixed_array (const rapidjson::Value& val, |
187 const octave::make_valid_name_options& options) | 189 const octave::make_valid_name_options* options) |
188 { | 190 { |
189 Cell retval (dim_vector (val.Size (), 1)); | 191 Cell retval (dim_vector (val.Size (), 1)); |
190 octave_idx_type index = 0; | 192 octave_idx_type index = 0; |
191 for (const auto& elem : val.GetArray ()) | 193 for (const auto& elem : val.GetArray ()) |
192 retval(index++) = decode (elem, options); | 194 retval(index++) = decode (elem, options); |
218 //! octave_value object_array = decode_object_array (d, octave_value_list ()); | 220 //! octave_value object_array = decode_object_array (d, octave_value_list ()); |
219 //! @endcode | 221 //! @endcode |
220 | 222 |
221 octave_value | 223 octave_value |
222 decode_object_array (const rapidjson::Value& val, | 224 decode_object_array (const rapidjson::Value& val, |
223 const octave::make_valid_name_options& options) | 225 const octave::make_valid_name_options* options) |
224 { | 226 { |
225 Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value (); | 227 Cell struct_cell = decode_string_and_mixed_array (val, options).cell_value (); |
226 string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames (); | 228 string_vector field_names = struct_cell(0).scalar_map_value ().fieldnames (); |
227 | 229 |
228 bool same_field_names = true; | 230 bool same_field_names = true; |
275 //! octave_value cell = decode_array_of_arrays (d, octave_value_list ()); | 277 //! octave_value cell = decode_array_of_arrays (d, octave_value_list ()); |
276 //! @endcode | 278 //! @endcode |
277 | 279 |
278 octave_value | 280 octave_value |
279 decode_array_of_arrays (const rapidjson::Value& val, | 281 decode_array_of_arrays (const rapidjson::Value& val, |
280 const octave::make_valid_name_options& options) | 282 const octave::make_valid_name_options* options) |
281 { | 283 { |
282 // Some arrays should be decoded as NDArrays and others as cell arrays | 284 // Some arrays should be decoded as NDArrays and others as cell arrays |
283 Cell cell = decode_string_and_mixed_array (val, options).cell_value (); | 285 Cell cell = decode_string_and_mixed_array (val, options).cell_value (); |
284 | 286 |
285 // Only arrays with sub-arrays of booleans and numericals will return NDArray | 287 // Only arrays with sub-arrays of booleans and numericals will return NDArray |
341 //! octave_value array = decode_array (d, octave_value_list ()); | 343 //! octave_value array = decode_array (d, octave_value_list ()); |
342 //! @endcode | 344 //! @endcode |
343 | 345 |
344 octave_value | 346 octave_value |
345 decode_array (const rapidjson::Value& val, | 347 decode_array (const rapidjson::Value& val, |
346 const octave::make_valid_name_options& options) | 348 const octave::make_valid_name_options* options) |
347 { | 349 { |
348 // Handle empty arrays | 350 // Handle empty arrays |
349 if (val.Empty ()) | 351 if (val.Empty ()) |
350 return NDArray (); | 352 return NDArray (); |
351 | 353 |
404 //! octave_value value = decode (d, octave_value_list ()); | 406 //! octave_value value = decode (d, octave_value_list ()); |
405 //! @endcode | 407 //! @endcode |
406 | 408 |
407 octave_value | 409 octave_value |
408 decode (const rapidjson::Value& val, | 410 decode (const rapidjson::Value& val, |
409 const octave::make_valid_name_options& options) | 411 const octave::make_valid_name_options* options) |
410 { | 412 { |
411 if (val.IsBool ()) | 413 if (val.IsBool ()) |
412 return val.GetBool (); | 414 return val.GetBool (); |
413 else if (val.IsNumber ()) | 415 else if (val.IsNumber ()) |
414 return decode_number (val); | 416 return decode_number (val); |
431 DEFUN (jsondecode, args, , | 433 DEFUN (jsondecode, args, , |
432 doc: /* -*- texinfo -*- | 434 doc: /* -*- texinfo -*- |
433 @deftypefn {} {@var{object} =} jsondecode (@var{JSON_txt}) | 435 @deftypefn {} {@var{object} =} jsondecode (@var{JSON_txt}) |
434 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs}) | 436 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "ReplacementStyle", @var{rs}) |
435 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx}) | 437 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "Prefix", @var{pfx}) |
438 @deftypefnx {} {@var{object} =} jsondecode (@dots{}, "makeValidName", @var{TF}) | |
436 | 439 |
437 Decode text that is formatted in JSON. | 440 Decode text that is formatted in JSON. |
438 | 441 |
439 The input @var{JSON_txt} is a string that contains JSON text. | 442 The input @var{JSON_txt} is a string that contains JSON text. |
440 | 443 |
442 decoding @var{JSON_txt}. | 445 decoding @var{JSON_txt}. |
443 | 446 |
444 For more information about the options @qcode{"ReplacementStyle"} and | 447 For more information about the options @qcode{"ReplacementStyle"} and |
445 @qcode{"Prefix"}, see | 448 @qcode{"Prefix"}, see |
446 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}. | 449 @ref{XREFmatlab_lang_makeValidName,,matlab.lang.makeValidName}. |
450 | |
451 If the value of the option @qcode{\"makeValidName\"} is false then names | |
452 will not be changed by @code{matlab.lang.makeValidName} and the | |
453 @qcode{\"ReplacementStyle\"} and @qcode{\"Prefix\"} options will be ignored. | |
447 | 454 |
448 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the | 455 NOTE: Decoding and encoding JSON text is not guaranteed to reproduce the |
449 original text as some names may be changed by @code{matlab.lang.makeValidName}. | 456 original text as some names may be changed by @code{matlab.lang.makeValidName}. |
450 | 457 |
451 This table shows the conversions from JSON data types to Octave data types: | 458 This table shows the conversions from JSON data types to Octave data types: |
502 number = 7 | 509 number = 7 |
503 string = hi | 510 string = hi |
504 @end group | 511 @end group |
505 | 512 |
506 @group | 513 @group |
514 jsondecode ('@{"nu#m#ber": 7, "s#tr#ing": "hi"@}', ... | |
515 'makeValidName', false) | |
516 @result{} scalar structure containing the fields: | |
517 | |
518 nu#m#ber = 7 | |
519 s#tr#ing = hi | |
520 @end group | |
521 | |
522 @group | |
507 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_') | 523 jsondecode ('@{"1": "one", "2": "two"@}', 'Prefix', 'm_') |
508 @result{} scalar structure containing the fields: | 524 @result{} scalar structure containing the fields: |
509 | 525 |
510 m_1 = one | 526 m_1 = one |
511 m_2 = two | 527 m_2 = two |
521 | 537 |
522 // makeValidName options are pairs, the number of arguments must be odd. | 538 // makeValidName options are pairs, the number of arguments must be odd. |
523 if (! (nargin % 2)) | 539 if (! (nargin % 2)) |
524 print_usage (); | 540 print_usage (); |
525 | 541 |
526 make_valid_name_options options (args.slice (1, nargin - 1)); | 542 // Detect if the user wants to use makeValidName |
543 bool use_makeValidName = true; | |
544 octave_value_list make_valid_name_params; | |
545 for (auto i = 1; i < nargin; i = i + 2) | |
546 { | |
547 std::string parameter = args(i).xstring_value ("jsondecode: " | |
548 "option argument must be a string"); | |
549 if (string::strcmpi (parameter, "makeValidName")) | |
550 { | |
551 use_makeValidName = args(i + 1).xbool_value ("jsondecode: " | |
552 "'makeValidName' value must be a bool"); | |
553 } | |
554 else | |
555 make_valid_name_params.append (args.slice(i, 2)); | |
556 } | |
557 | |
558 make_valid_name_options* options = nullptr; | |
559 | |
560 if (use_makeValidName) | |
561 { | |
562 make_valid_name_options options_obj (make_valid_name_params); | |
563 options = &options_obj; | |
564 } | |
527 | 565 |
528 if (! args(0).is_string ()) | 566 if (! args(0).is_string ()) |
529 error ("jsondecode: JSON_TXT must be a character string"); | 567 error ("jsondecode: JSON_TXT must be a character string"); |
530 | 568 |
531 std::string json = args(0).string_value (); | 569 std::string json = args(0).string_value (); |