Mercurial > octave
diff scripts/miscellaneous/JupyterNotebook.m @ 30169:cefa5d2d30bc
New class for filling and running Jupyter Notebooks.
* NEWS: Add to list of new functions announce feature.
* scripts/miscellaneous/JupyterNotebook.m: New classdef class.
* scripts/miscellaneous/module.mk: Add JupyterNotebook to build system.
* test/jupyter-notebook/JupyterNotebook.tst,
test/jupyter-notebook/octave_kernel.ipynb,
test/jupyter-notebook/plot_magic_and_errors.ipynb: New test files.
* test/jupyter-notebook/module.mk: Add new test files to build system.
* test/module.mk: Add "jupyter-notebook" directory to build system.
This is the result of GSoC 2021 by Abdallah Elshamy.
Patch pushed by Kai T. Ohlhus.
author | Abdallah Elshamy <abdallah.k.elshamy@gmail.com> |
---|---|
date | Tue, 14 Sep 2021 17:54:04 +0900 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/miscellaneous/JupyterNotebook.m Tue Sep 14 17:54:04 2021 +0900 @@ -0,0 +1,668 @@ +## Copyright (C) 2021 The Octave Project Developers +## +## This program is free software: you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see +## <https://www.gnu.org/licenses/>. + + +classdef JupyterNotebook < handle + + ## -*- texinfo -*- + ## @deftypefn {} {@var{notebook} =} JupyterNotebook (@var{notebookFileName}) + ## + ## Run and fill the Jupyter Notebook in @var{notebookFileName} within + ## GNU Octave. + ## + ## Supported are textual and graphical Octave outputs. + ## + ## This class has a public attribute @qcode{notebook} which is a struct + ## representing the JSON-decoded Jupyter Notebook. This attribute is + ## intentionally public to enable advanced notebook manipulations. + ## + ## Note: Jupyter Notebook versions (@qcode{nbformat}) lower than 4.0 are + ## not supported. + ## + ## @qcode{%plot} magic is supported with the following settings: + ## @itemize @bullet + ## @item + ## "%plot -f <format>" or "%plot --format <format>": specifies the + ## image storage format. Supported formats are: + ## + ## @itemize @minus + ## @item + ## PNG (default) + ## + ## @item + ## SVG (Note: If SVG images do not appear in the notebook, it is most + ## related to the Jupyter Notebook security mechanism and explicitly + ## "trusting" them is necessary). + ## + ## @item + ## JPG + ## @end itemize + ## + ## @item + ## "%plot -r <number>" or "%plot --resolution <number>": specifies the + ## image resolution. + ## + ## @item + ## "%plot -w <number>" or "%plot --width <number>": specifies the + ## image width. + ## + ## @item + ## "%plot -h <number>" or "%plot --height <number>": specifies the + ## image height. + ## @end itemize + ## + ## Examples: + ## + ## @example + ## @group + ## ## Run all cells and generate the filled notebook + ## + ## ## Instantiate an object from the notebook file + ## notebook = JupyterNotebook("myNotebook.ipynb") + ## @result{} notebook = + ## + ## <object JupyterNotebook> + ## + ## ## Run the code and embed the results in the @qcode{notebook} attribute + ## notebook.runAll() + ## ## Generate the new notebook by overwriting the original notebook + ## notebook.generateNotebook("myNotebook.ipynb") + ## @end group + ## + ## @group + ## ## Run the second cell and generate the filled notebook + ## + ## ## Instantiate an object from the notebook file + ## notebook = JupyterNotebook("myNotebook.ipynb") + ## @result{} notebook = + ## + ## <object JupyterNotebook> + ## + ## ## Run the code and embed the results in the @qcode{notebook} attribute + ## notebook.run(2) + ## ## Generate the new notebook in a new file + ## notebook.generateNotebook("myNewNotebook.ipynb") + ## @end group + ## + ## @group + ## ## Generate an Octave script from a notebook + ## + ## ## Instantiate an object from the notebook file + ## notebook = JupyterNotebook("myNotebook.ipynb") + ## @result{} notebook = + ## + ## <object JupyterNotebook> + ## + ## ## Generate the octave script + ## notebook.generateOctaveScript("myScript.m") + ## @end group + ## @end example + ## + ## @seealso{jsondecode, jsonencode} + ## @end deftypefn + + properties + + notebook = struct() + + endproperties + + properties (Access = "private") + + context = struct("ans", "") + + endproperties + + methods + + function obj = JupyterNotebook (notebookFileName) + + if (nargin != 1) + print_usage (); + endif + + if (! (ischar (notebookFileName) && isrow (notebookFileName))) + error ("JupyterNotebook: notebookFileName must be a string"); + endif + + obj.notebook = jsondecode (fileread (notebookFileName), + "makeValidName", false); + + ## Validate the notebook's format according to nbformat: 4.0 + if (! (isfield (obj.notebook, "metadata") + && isfield (obj.notebook, "nbformat") + && isfield (obj.notebook, "nbformat_minor") + && isfield (obj.notebook, "cells"))) + error ("JupyterNotebook: not valid format for Jupyter notebooks"); + endif + + ## Issue a warning if the format is lower than 4.0 + if (obj.notebook.nbformat < 4) + warning (["JupyterNotebook: nbformat versions lower than 4.0 are ", ... + "not supported"]); + endif + + ## Handle the case if there is only one cell. + ## Make "obj.notebook.cells" a cell of structs to match the format. + if (numel (obj.notebook.cells) == 1) + obj.notebook.cells = {obj.notebook.cells}; + endif + + ## Handle the case if the cells have the same keys. + ## Make "obj.notebook.cells" a cell of structs instead of struct array + ## to unify the indexing method. + if (isstruct (obj.notebook.cells)) + obj.notebook.cells = num2cell (obj.notebook.cells); + endif + + for i = 1:numel (obj.notebook.cells) + if (! isfield (obj.notebook.cells{i}, "source")) + error ("JupyterNotebook: cells must contain a \"source\" field"); + endif + + if (! isfield (obj.notebook.cells{i}, "cell_type")) + error ("JupyterNotebook: cells must contain a \"cell_type\" field"); + endif + + ## Handle when null JSON values are decoded into empty arrays. + if (isfield (obj.notebook.cells{i}, "execution_count") + && numel (obj.notebook.cells{i}.execution_count) == 0) + obj.notebook.cells{i}.execution_count = 1; + endif + + ## Handle the case if there is only one output in the cell. + ## Make the outputs of the cell a cell of structs to match the format. + if (isfield (obj.notebook.cells{i}, "outputs") + && numel (obj.notebook.cells{i}.outputs) == 1) + obj.notebook.cells{i}.outputs = {obj.notebook.cells{i}.outputs}; + endif + endfor + + endfunction + + + function generateOctaveScript (obj, scriptFileName) + + ## -*- texinfo -*- + ## @deftypefn {} {} generateOctaveScript (@var{scriptFileName}) + ## + ## Write an Octave script that has the contents of the Jupyter Notebook + ## stored in the @qcode{notebook} attribute to @var{scriptFileName}. + ## + ## Non code cells are generated as block comments. + ## + ## See @code{help JupyterNotebook} for examples. + ## + ## @end deftypefn + + if (nargin != 2) + print_usage (); + endif + + if (! (ischar (scriptFileName) && isrow (scriptFileName))) + error ("JupyterNotebook: scriptFileName must be a string"); + endif + + fhandle = fopen (scriptFileName, "w"); + + for i = 1:numel (obj.notebook.cells) + if (strcmp (obj.notebook.cells{i}.cell_type, "markdown")) + fputs (fhandle, "\n#{\n"); + endif + + for k = 1:numel (obj.notebook.cells{i}.source) + fputs (fhandle, obj.notebook.cells{i}.source{k}); + endfor + + if (strcmp (obj.notebook.cells{i}.cell_type, "markdown")) + fputs (fhandle, "\n#}\n"); + endif + fputs (fhandle, "\n"); + endfor + fclose (fhandle); + + endfunction + + + function generateNotebook (obj, notebookFileName) + + ## -*- texinfo -*- + ## @deftypefn {} {} generateNotebook (@var{notebookFileName}) + ## + ## Write the Jupyter Notebook stored in the @qcode{notebook} + ## attribute to @var{notebookFileName}. + ## + ## The @qcode{notebook} attribute is encoded to JSON text. + ## + ## See @code{help JupyterNotebook} for examples. + ## + ## @end deftypefn + + if (nargin != 2) + print_usage (); + endif + + if (! (ischar (notebookFileName) && isrow (notebookFileName))) + error ("JupyterNotebook: notebookFileName must be a string"); + endif + + fhandle = fopen (notebookFileName, "w"); + + fputs (fhandle, jsonencode (obj.notebook, "ConvertInfAndNaN", false, + "PrettyPrint", true)); + + fclose (fhandle); + + endfunction + + + function run (obj, cell_index) + + ## -*- texinfo -*- + ## @deftypefn {} {} run (@var{cell_index}) + ## + ## Run the Jupyter Notebook cell with index @var{cell_index} + ## and eventually replace previous output cells in the object. + ## + ## The first Jupyter Notebook cell has the index 1. + ## + ## Note: The code evaluation of the Jupyter Notebook cells is done + ## in a separate Jupyter Notebook context. Thus currently open + ## figures and workspace variables won't be affected by executing + ## this function. However, current workspace variables cannot be + ## accessed either. + ## + ## See @code{help JupyterNotebook} for examples. + ## + ## @end deftypefn + + if (nargin != 2) + print_usage (); + endif + + if (! (isscalar (cell_index) && ! islogical (cell_index) + && (mod (cell_index, 1) == 0) && (cell_index > 0))) + error ("JupyterNotebook: cell_index must be a scalar positive integer"); + endif + + if (cell_index > length (obj.notebook.cells)) + error ("JupyterNotebook: cell_index is out of bound"); + endif + + if (! strcmp (obj.notebook.cells{cell_index}.cell_type, "code")) + return; + endif + + ## Remove previous outputs. + obj.notebook.cells{cell_index}.outputs = {}; + + if (isempty (obj.notebook.cells{cell_index}.source)) + return; + endif + + ## Default values for printOptions. + printOptions.imageFormat = "png"; + printOptions.resolution = "0"; + + ## The default width and height in Jupyter notebook + printOptions.width = "640"; + printOptions.height = "480"; + + ## Parse "plot magic" commands. + ## https://github.com/Calysto/metakernel/blob/master/metakernel/ ... + ## magics/README.md#plot + for j = 1 : numel (obj.notebook.cells{cell_index}.source) + if (strncmpi (obj.notebook.cells{cell_index}.source{j}, "%plot", 5)) + magics = strsplit (strtrim ( + obj.notebook.cells{cell_index}.source{j})); + for i = 1 : numel (magics) + if (any (strcmp (magics{i}, {"-f", "--format"})) + && (i < numel (magics))) + printOptions.imageFormat = magics{i+1}; + endif + if (any (strcmp (magics{i}, {"-r", "--resolution"})) + && (i < numel (magics))) + printOptions.resolution = magics{i+1}; + endif + if (any (strcmp (magics{i}, {"-w", "--width"})) + && (i < numel (magics))) + printOptions.width = magics{i+1}; + endif + if (any (strcmp (magics{i}, {"-h", "--height"})) + && (i < numel (magics))) + printOptions.height = magics{i+1}; + endif + endfor + endif + endfor + + ## Remember previously opened figures. + fig_ids = findall (0, "type", "figure"); + + ## Create a new figure, if there are existing plots. + if (! isempty (fig_ids)) + newFig = figure (); + endif + + stream_output = struct ("name", "stdout", "output_type", "stream"); + + output_lines = obj.evalCode (strjoin ( + obj.notebook.cells{cell_index}.source)); + + if (! isempty(output_lines)) + stream_output.text = {output_lines}; + endif + + if (isfield (stream_output, "text")) + obj.notebook.cells{cell_index}.outputs{end + 1} = stream_output; + endif + + ## If there are existing plots and newFig is empty, delete it. + if (exist ("newFig") && isempty (get (newFig, "children"))) + delete (newFig); + endif + + ## Check for newly created figures. + fig_ids_new = setdiff (findall (0, "type", "figure"), fig_ids); + + if (numel (fig_ids_new) > 0) + if (exist ("__octave_jupyter_temp__", "dir")) + ## Delete open figures before raising the error. + for i = 1:numel (fig_ids_new) + delete (fig_ids_new(i)); + endfor + error (["JupyterNotebook: temporary directory ", ... + "__octave_jupyter_temp__ exists. Please remove it ", ... + "manually."]); + endif + + [status, msg, msgid] = mkdir ("__octave_jupyter_temp__"); + if (status == 0) + ## Delete open figures before raising the error. + for i = 1 : numel (fig_ids_new) + delete (fig_ids_new(i)); + endfor + error (["JupyterNotebook: Cannot create a temporary directory. ", ... + msg]); + endif + + for i = 1:numel (fig_ids_new) + figure (fig_ids_new(i), "visible", "off"); + obj.embedImage (cell_index, fig_ids_new (i), printOptions); + delete (fig_ids_new(i)); + endfor + + [status, msg, msgid] = rmdir ("__octave_jupyter_temp__"); + if (status == 0) + error (["JupyterNotebook: Cannot delete the temporary ", ... + "directory. ", msg]); + endif + endif + + endfunction + + + function runAll (obj) + + ## -*- texinfo -*- + ## @deftypefn {} {} runAll () + ## + ## Run all Jupyter Notebook cells and eventually replace previous + ## output cells in the object. + ## + ## Note: The code evaluation of the Jupyter Notebook cells is done + ## in a separate Jupyter Notebook context. Thus currently open + ## figures and workspace variables won't be affected by executing + ## this function. However, current workspace variables cannot be + ## accessed either. + ## + ## See @code{help JupyterNotebook} for examples. + ## + ## @end deftypefn + + if (nargin != 1) + print_usage (); + endif + + for i = 1:numel (obj.notebook.cells) + obj.run(i); + endfor + + endfunction + + endmethods + + + methods (Access = "private") + + function retVal = evalCode (__obj__, __code__) + + ## Evaluate the code string "__code__" using "evalc". + ## Before the code is evaluated, the previous notebook context is + ## loaded from "__obj__" and the new context is saved to that struct. + + if (nargin != 2) + print_usage (); + endif + + if (isempty (__code__)) + retVal = []; + return; + endif + + if (! (ischar (__code__) && isrow (__code__))) + error ("JupyterNotebook: code must be a string"); + endif + + __obj__.loadContext (); + + ## Add a statement to detect the value of the variable "ans" + __code__ = [__code__, "\nans"]; + + retVal = strtrim (evalc (__code__, ["printf (\"error: \"); ", ... + "printf (lasterror.message)"])); + + ## Handle the "ans" variable in the context. + start_index = rindex (retVal, "ans =") + 6; + if ((start_index > 6)) + if ((start_index <= length (retVal))) + end_index = start_index; + while ((retVal(end_index) != "\n") && (end_index < length (retVal))) + end_index += 1; + endwhile + __obj__.context.ans = retVal(start_index:end_index); + else + end_index = length (retVal); + __obj__.context.ans = ""; + endif + + ## Delete the output of the additional statement if the execution + ## is completed with no errors. + if (end_index == length (retVal)) + ## Remove the extra new line if there are other outputs with + ## the "ans" statement output + if (start_index == 7) + start_index = 1; + else + start_index = start_index - 7; + endif + retVal(start_index:end_index) = ""; + endif + endif + + __obj__.saveContext (); + + endfunction + + + function saveContext (obj, op) + + ## Save the context in private "obj" attribute. + + ## Handle the "ans" variable in the context. + obj.context = struct ("ans", obj.context.ans); + + forbidden_var_names = {"__code__", "__obj__", "ans"}; + + ## Get variable names. + var_names = {evalin("caller", "whos").name}; + + ## Store all variables to context. + for i = 1:length (var_names) + if (! any (strcmp (var_names{i}, forbidden_var_names))) + obj.context.(var_names{i}) = evalin ("caller", var_names{i}); + endif + endfor + + endfunction + + + function loadContext (obj) + + ## Load the context from private "obj" attribute. + for [val, key] = obj.context + assignin ("caller", key, val); + endfor + + endfunction + + + function embedImage (obj, cell_index, figHandle, printOptions) + + ## Embed images in the notebook. + ## + ## To support a new format: + ## 1. Create a new function that embeds the new format + ## (e.g. embed_svg_image). + ## 2. Add a new case to the switch-statement below. + + if (isempty (get (figHandle, "children"))) + error_text = {"The figure is empty!"}; + obj.addErrorOutput (cell_index, "The figure is empty!"); + return; + endif + + ## Check if the resolution is correct + if (isempty (str2num (printOptions.resolution))) + obj.addErrorOutput (cell_index, + "A number is required for resolution, not a string"); + return; + endif + + ## Check if the width is correct + if (isempty (str2num (printOptions.width))) + obj.addErrorOutput (cell_index, + "A number is required for width, not a string"); + return; + endif + + ## Check if the height is correct + if (isempty (str2num (printOptions.height))) + obj.addErrorOutput (cell_index, + "A number is required for height, not a string"); + return; + endif + + switch (lower (printOptions.imageFormat)) + case "png" + display_output = obj.embed_png_jpg_image (figHandle, + printOptions, "png"); + case "jpg" + display_output = obj.embed_png_jpg_image (figHandle, + printOptions, "jpg"); + case "svg" + display_output = obj.embed_svg_image (figHandle, printOptions); + otherwise + obj.addErrorOutput (cell_index, ["Cannot embed the \'", ... + printOptions.imageFormat, ... + "\' image format\n"]); + return; + endswitch + + obj.notebook.cells{cell_index}.outputs{end + 1} = display_output; + + endfunction + + + function dstruct = embed_png_jpg_image (obj, figHandle, printOptions, fmt) + + if (strcmp (fmt, "png")) + mime = "image/png"; + else + mime = "image/jpeg"; + endif + + image_path = fullfile ("__octave_jupyter_temp__", ["temp.", fmt]); + print (figHandle, image_path, ["-d", fmt], + ["-r" printOptions.resolution]); + + dstruct.output_type = "display_data"; + dstruct.metadata.(mime).width = printOptions.width; + dstruct.metadata.(mime).height = printOptions.height; + dstruct.data.("text/plain") = {"<IPython.core.display.Image object>"}; + dstruct.data.(mime) = base64_encode (uint8 (fileread (image_path))); + + delete (image_path); + + endfunction + + + function dstruct = embed_svg_image (obj, figHandle, printOptions) + + image_path = fullfile ("__octave_jupyter_temp__", "temp.svg"); + print (figHandle, image_path, "-dsvg", ["-r" printOptions.resolution]); + + dstruct.output_type = "display_data"; + dstruct.metadata = struct (); + dstruct.data.("text/plain") = {"<IPython.core.display.SVG object>"}; + dstruct.data.("image/svg+xml") = strsplit (fileread (image_path), "\n"); + + ## FIXME: The following is a workaround until we can properly print + ## SVG images in the right width and height. + ## Detect the <svg> tag. it is either the first or the second item + if (strncmpi (dstruct.data.("image/svg+xml"){1}, "<svg", 4)) + i = 1; + else + i = 2; + endif + + ## Embed the width and height in the image itself + svg_tag = dstruct.data.("image/svg+xml"){i}; + svg_tag = regexprep (svg_tag, "width=\"(.*?)\"", + ["width=\"" printOptions.width "px\""]); + svg_tag = regexprep (svg_tag, "height=\"(.*?)\"", + ["height=\"" printOptions.height "px\""]); + dstruct.data.("image/svg+xml"){i} = svg_tag; + + delete (image_path); + + endfunction + + + function addErrorOutput (obj, cell_index, error_msg) + + stream_output.name = "stderr"; + stream_output.output_type = "stream"; + stream_output.text = {error_msg}; + obj.notebook.cells{cell_index}.outputs{end + 1} = stream_output; + + endfunction + + endmethods + +endclassdef + +#!error JupyterNotebook ()