view scripts/miscellaneous/jupyter_notebook.m @ 33559:62fca924fe85 default tip @

doc: Update NEWS.10.md file. * NEWS.10.md: Indent NEWS.10.md for clarity. Add note about changes to colormap() functionality.
author Rik <rik@octave.org>
date Thu, 09 May 2024 18:23:33 -0700
parents 2e484f9f1f18
children
line wrap: on
line source

########################################################################
##
## Copyright (C) 2021-2024 The Octave Project Developers
##
## See the file COPYRIGHT.md in the top-level directory of this
## distribution or <https://octave.org/copyright/>.
##
## Octave is free software: you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <https://www.gnu.org/licenses/>.
##
########################################################################

classdef jupyter_notebook < handle

  ## -*- texinfo -*-
  ## @deftypefn  {} {@var{notebook} =} jupyter_notebook (@var{notebook_filename})
  ## @deftypefnx {} {@var{notebook} =} jupyter_notebook (@var{notebook_filename}, @var{options})
  ##
  ## Run and fill the Jupyter Notebook in file @var{notebook_filename} from
  ## within GNU Octave.
  ##
  ## Both text and graphical Octave outputs are supported.
  ##
  ## This class has a public property @code{notebook} which is a structure
  ## representing the JSON-decoded Jupyter Notebook.  This property is
  ## intentionally public to enable advanced notebook manipulations.
  ##
  ## Note: Jupyter Notebook versions (@code{nbformat}) lower than 4.0 are not
  ## supported.
  ##
  ## The optional second argument @var{options} is a struct with fields:
  ##
  ## @itemize @bullet
  ## @item
  ## @code{tmpdir} to set the temporary working directory.
  ## @end itemize
  ##
  ## @code{%plot} magic is supported with the following settings:
  ##
  ## @itemize @bullet
  ## @item
  ## "@code{%plot -f <format>}" or "@code{%plot --format <format>}": specifies
  ## the image storage format.  Supported formats are:
  ##
  ## @itemize @minus
  ## @item
  ## PNG (default)
  ##
  ## @item SVG
  ## (Note: If SVG images do not appear in the notebook, it is most likely
  ## related to Jupyter Notebook security mechanisms and explicitly "trusting"
  ## them will be necessary).
  ##
  ## @item
  ## JPG
  ## @end itemize
  ##
  ## @item
  ## "@code{%plot -r <number>}" or "@code{%plot --resolution <number>}":
  ## specifies the image resolution.
  ##
  ## @item
  ## "@code{%plot -w <number>}" or "@code{%plot --width <number>}": specifies
  ## the image width.
  ##
  ## @item
  ## "@code{%plot -h <number>}" or "@code{%plot --height <number>}": specifies
  ## the image height.
  ## @end itemize
  ##
  ## Examples:
  ##
  ## @example
  ## @group
  ## ## Run all cells and generate the filled notebook
  ##
  ## ## Instantiate an object from a notebook file
  ## notebook = jupyter_notebook ("myNotebook.ipynb");
  ## ## Run the code and embed the results in the @code{notebook} property
  ## notebook.run_all ();
  ## ## Generate a new notebook by overwriting the original notebook
  ## notebook.generate_notebook ("myNotebook.ipynb");
  ## @end group
  ##
  ## @group
  ## ## Run just the second cell and generate the filled notebook
  ##
  ## ## Instantiate an object from a notebook file
  ## notebook = jupyter_notebook ("myNotebook.ipynb");
  ## ## Run the code and embed the results in the @code{notebook} property
  ## notebook.run (2)
  ## ## Generate a new notebook in a new file
  ## notebook.generate_notebook ("myNewNotebook.ipynb");
  ## @end group
  ##
  ## @group
  ## ## Generate an Octave script from a notebook
  ##
  ## ## Instantiate an object from a notebook file
  ## notebook = jupyter_notebook ("myNotebook.ipynb");
  ## ## Generate the Octave script
  ## notebook.generate_octave_script ("jup_script.m");
  ## @end group
  ## @end example
  ##
  ## @seealso{jsondecode, jsonencode}
  ## @end deftypefn

  properties

    notebook = struct ();

  endproperties

  properties (Access = "private")

    context = struct ("ans", "");

    ## Note: This name needs to be stored in a property because it is
    ## set in the constructor but used in some other methods.  However,
    ## we want to defer calling tempname() until immediately before
    ## calling mkdir().  The temporary directory currently created and
    ## deleted in the constructor and the name is reset to the empty
    ## string when the directory is deleted.  Another possible
    ## implementation might be to generate the name and create the
    ## temporary directory here, then delete it in the class destructor.

    tmpdir = "";

  endproperties

  methods

    function obj = jupyter_notebook (notebook_filename, options)

      if (nargin < 1)
        print_usage ();
      endif

      if (! (ischar (notebook_filename) && isrow (notebook_filename)))
        error ("jupyter_notebook: NOTEBOOK_FILENAME must be a string");
      endif

      ## Validate options if present.
      if (nargin == 2 && ! isstruct (options))
        error ("jupyter_notebook: OPTIONS must be a struct");
      endif
      if (nargin == 2 && isfield (options, "tmpdir"))
        obj.tmpdir = options.tmpdir;
      endif

      obj.notebook = jsondecode (fileread (notebook_filename),
                                 "makeValidName", false);

      ## Validate the notebook's format according to nbformat: 4.0
      if (! all (isfield (obj.notebook,
                          {"metadata", "nbformat", "nbformat_minor", "cells"})))
        error ("jupyter_notebook: invalid format for Jupyter notebooks");
      endif

      ## Issue a warning if the format is lower than 4.0.
      if (obj.notebook.nbformat < 4)
        warning (["jupyter_notebook: nbformat versions lower than 4.0 are ", ...
                  "not supported"]);
      endif

      ## Handle the case of only one cell.
      ## Make "obj.notebook.cells" a cell of structs to match the format.
      if (numel (obj.notebook.cells) == 1)
        obj.notebook.cells = {obj.notebook.cells};
      endif

      ## Handle the case where the cells have the same keys.
      ## Make "obj.notebook.cells" a cell of structs, instead of struct array,
      ## to unify the indexing method.
      if (isstruct (obj.notebook.cells))
        obj.notebook.cells = num2cell (obj.notebook.cells);
      endif

      for i = 1:numel (obj.notebook.cells)
        nbcell = obj.notebook.cells{i};
        if (! isfield (nbcell, "source"))
          error ('jupyter_notebook: cells must contain a "source" field');
        endif

        if (! isfield (nbcell, "cell_type"))
          error ('jupyter_notebook: cells must contain a "cell_type" field');
        endif

        ## Handle null JSON values which are decoded into empty arrays.
        if (isfield (nbcell, "execution_count")
            && numel (nbcell.execution_count) == 0)
          obj.notebook.cells{i}.execution_count = 1;
        endif

        ## Handle the case of only one output in the cell.
        ## Make the outputs of the cell a cell of structs to match the format.
        if (isfield (nbcell, "outputs") && numel (nbcell.outputs) == 1)
          obj.notebook.cells{i}.outputs = {obj.notebook.cells{i}.outputs};
        endif
      endfor

    endfunction


    function generate_octave_script (obj, script_filename)

      ## -*- texinfo -*-
      ## @deftypefn {} {} generate_octave_script (@var{script_filename})
      ##
      ## Write an Octave script that has the contents of the Jupyter Notebook
      ## stored in the @code{notebook} attribute to @var{script_filename}.
      ##
      ## Non-code cells are generated as block comments.
      ##
      ## See @code{help jupyter_notebook} for examples.
      ##
      ## @seealso{jupyter_notebook}
      ## @end deftypefn

      if (nargin != 2)
        print_usage ();
      endif

      if (! (ischar (script_filename) && isrow (script_filename)))
        error ("jupyter_notebook: SCRIPT_FILENAME must be a string");
      endif

      fid = fopen (script_filename, "w");

      for i = 1:numel (obj.notebook.cells)
        nbcell = obj.notebook.cells{i};
        is_markdown = strcmp (nbcell.cell_type, "markdown");

        if (is_markdown)
          fputs (fid, "\n#{\n");
        endif

        for k = 1:numel (nbcell.source)
          fputs (fid, nbcell.source{k});
        endfor

        if (is_markdown)
          fputs (fid, "\n#}\n");
        endif
        fputs (fid, "\n");
      endfor
      fclose (fid);

    endfunction


    function generate_notebook (obj, notebook_filename)

      ## -*- texinfo -*-
      ## @deftypefn {} {} generate_notebook (@var{notebook_filename})
      ##
      ## Write the Jupyter Notebook stored in the @code{notebook}
      ## attribute to @var{notebook_filename}.
      ##
      ## The @code{notebook} attribute is encoded to JSON text.
      ##
      ## See @code{help jupyter_notebook} for examples.
      ##
      ## @seealso{jupyter_notebook}
      ## @end deftypefn

      if (nargin != 2)
        print_usage ();
      endif

      if (! (ischar (notebook_filename) && isrow (notebook_filename)))
        error ("jupyter_notebook: NOTEBOOK_FILENAME must be a string");
      endif

      fid = fopen (notebook_filename, "w");

      fputs (fid, jsonencode (obj.notebook,
                              "ConvertInfAndNaN", false,
                              "PrettyPrint", true));

      fclose (fid);

    endfunction


    function run (obj, cell_index)

      ## -*- texinfo -*-
      ## @deftypefn {} {} run (@var{cell_index})
      ##
      ## Run the Jupyter Notebook cell with index @var{cell_index}
      ## and eventually replace previous output cells in the object.
      ##
      ## The first Jupyter Notebook cell has the index 1.
      ##
      ## Note: The code evaluation of the Jupyter Notebook cells is done
      ## in a separate Jupyter Notebook context.  Thus, currently open
      ## figures and workspace variables won't be affected by executing
      ## this function.  However, current workspace variables cannot be
      ## accessed either.
      ##
      ## See @code{help jupyter_notebook} for examples.
      ##
      ## @seealso{jupyter_notebook}
      ## @end deftypefn

      if (nargin != 2)
        print_usage ();
      endif

      if (! (isscalar (cell_index) && isindex (cell_index)))
        error ("jupyter_notebook: CELL_INDEX must be a scalar positive integer");
      endif

      if (cell_index > numel (obj.notebook.cells))
        error ("jupyter_notebook: CELL_INDEX is out of bound");
      endif

      nbcell = obj.notebook.cells{cell_index};

      if (! strcmp (nbcell.cell_type, "code"))
        return;
      endif

      ## Remove previous outputs.
      obj.notebook.cells{cell_index}.outputs = {};

      if (isempty (nbcell.source))
        return;
      endif

      ## Default values for printOptions.
      printOptions.imageFormat = "png";
      printOptions.resolution = "0";

      ## The default width and height in Jupyter notebook
      printOptions.width = "640";
      printOptions.height = "480";

      ## Parse "plot magic" commands.
      ## https://github.com/Calysto/metakernel/blob/master/metakernel/magics/README.md#plot
      for j = 1 : numel (nbcell.source)
        if (strncmpi (nbcell.source{j}, "%plot", 5))
          magics = strsplit (strtrim (nbcell.source{j}));
          for i = 1 : numel (magics)
            if (any (strcmp (magics{i}, {"-f", "--format"}))
                && (i < numel (magics)))
              printOptions.imageFormat = magics{i+1};
            endif
            if (any (strcmp (magics{i}, {"-r", "--resolution"}))
                && (i < numel (magics)))
              printOptions.resolution = magics{i+1};
            endif
            if (any (strcmp (magics{i}, {"-w", "--width"}))
                && (i < numel (magics)))
              printOptions.width = magics{i+1};
            endif
            if (any (strcmp (magics{i}, {"-h", "--height"}))
                && (i < numel (magics)))
              printOptions.height = magics{i+1};
            endif
          endfor
        endif
      endfor

      ## Remember previously opened figures.
      fig_ids = findall (groot, "type", "figure");

      ## Create a new figure, if there are existing plots.
      if (! isempty (fig_ids))
        newFig = figure ();
      endif

      stream_output = struct ("name", "stdout", "output_type", "stream");

      output_lines = obj.evalCode (strjoin (nbcell.source));

      if (! isempty (output_lines))
        stream_output.text = {output_lines};
      endif

      if (isfield (stream_output, "text"))
        obj.notebook.cells{cell_index}.outputs{end+1} = stream_output;
      endif

      ## If there are existing plots and newFig is empty, delete it.
      if (exist ("newFig") && isempty (get (newFig, "children")))
        delete (newFig);
      endif

      ## Check for newly created figures.
      fig_ids_new = setdiff (findall (groot, "type", "figure"), fig_ids);

      if (! isempty (fig_ids_new))
        if (! isempty (obj.tmpdir) && exist (obj.tmpdir, "dir"))
          ## Delete open figures before raising the error.
          delete (fig_ids_new);
          error (["JupyterNotebook: temporary directory %s exists.  ", ...
                  "Please remove it manually."], obj.tmpdir);
        endif

        if (isempty (obj.tmpdir))
          obj.tmpdir = tempname ();
          clear_tmpdir_property = true;
        else
          clear_tmpdir_property = false;
        endif
        [status, msg] = mkdir (obj.tmpdir);
        if (status == 0)
          ## Delete open figures before raising the error.
          delete (fig_ids_new);
          error (["jupyter_notebook: cannot create a temporary directory. ", ...
                  msg]);
        endif

        ## FIXME: Maybe it would be better for these cleanup actions to
        ## happen in an onCleanup object or unwind_protect block so that
        ## they will be executed no matter how we exit this function?

        for i = 1:numel (fig_ids_new)
          figure (fig_ids_new(i), "visible", "off");
          obj.embedImage (cell_index, fig_ids_new(i), printOptions);
          delete (fig_ids_new(i));
        endfor

        [status, msg] = rmdir (obj.tmpdir);
        if (status == 0)
          error (["jupyter_notebook: cannot delete the temporary ", ...
                  "directory. ", msg]);
        endif
        if (clear_tmpdir_property)
          obj.tmpdir = "";
        endif
      endif

    endfunction


    function run_all (obj)

      ## -*- texinfo -*-
      ## @deftypefn {} {} run_all ()
      ##
      ## Run all Jupyter Notebook cells and eventually replace previous
      ## output cells in the object.
      ##
      ## Note: The code evaluation of the Jupyter Notebook cells is done
      ## in a separate Jupyter Notebook context.  Thus, currently open
      ## figures and workspace variables won't be affected by executing
      ## this function.  However, current workspace variables cannot be
      ## accessed either.
      ##
      ## See @code{help jupyter_notebook} for examples.
      ##
      ## @seealso{jupyter_notebook}
      ## @end deftypefn

      if (nargin != 1)
        print_usage ();
      endif

      for i = 1:numel (obj.notebook.cells)
        obj.run (i);
      endfor

    endfunction

  endmethods


  methods (Access = "private")

    function retval = evalCode (__obj__, __code__)

      ## Evaluate the code string "__code__" using "evalc".
      ## Before the code is evaluated, the previous notebook context is
      ## loaded from "__obj__" and the new context is saved to that struct.

      if (nargin != 2)
        print_usage ();
      endif

      if (isempty (__code__))
        retval = [];
        return;
      endif

      if (! (ischar (__code__) && isrow (__code__)))
        error ("jupyter_notebook: CODE must be a string");
      endif

      __obj__.loadContext ();

      ## Add a statement to detect the value of the variable "ans"
      __code__ = [__code__, "\nans"];

      retval = strtrim (evalc (__code__, ["printf (\"error: \"); ", ...
                                          "printf (lasterror.message)"]));

      ## Handle the "ans" variable in the context.
      start_index = rindex (retval, "ans =") + 6;
      if (start_index > 6)
        if (start_index <= length (retval))
          end_index = start_index;
          ## FIXME: loops are slow.
          idx = find (retval(start_index+1:end) == "\n", 1);
          if (idx)
            end_index = start_index + idx;
          else
            end_index = length (retval);
          endif
          __obj__.context.ans = retval(start_index:end_index);
        else
          end_index = length (retval);
          __obj__.context.ans = "";
        endif

        ## Delete the output of the additional statement if the execution
        ## is completed with no errors.
        if (end_index == length (retval))
          ## Remove the extra new line if there are other outputs with
          ## the "ans" statement output
          if (start_index == 7)
            start_index = 1;
          else
            start_index -= 7;
          endif
          retval(start_index:end_index) = "";
        endif
      endif

      __obj__.saveContext ();

    endfunction


    function saveContext (obj)

      ## Save the context in private "obj" attribute.

      ## Handle the "ans" variable in the context.
      obj.context = struct ("ans", obj.context.ans);

      forbidden_var_names = {"__code__", "__obj__", "ans"};

      ## Get variable names.
      var_names = {evalin("caller", "whos").name};

      ## Store all variables to context.
      for i = 1:numel (var_names)
        if (! any (strcmp (var_names{i}, forbidden_var_names)))
          obj.context.(var_names{i}) = evalin ("caller", var_names{i});
        endif
      endfor

    endfunction


    function loadContext (obj)

      ## Load the context from private "obj" attribute.
      for [val, key] = obj.context
        assignin ("caller", key, val);
      endfor

    endfunction


    function embedImage (obj, cell_index, figHandle, printOptions)

      ## Embed images in the notebook.
      ##
      ## To support a new format:
      ## 1. Create a new function that embeds the new format
      ##    (e.g. embed_svg_image).
      ## 2. Add a new case to the switch-statement below.

      if (isempty (get (figHandle, "children")))
        error_text = {"The figure is empty!"};
        obj.addErrorOutput (cell_index, "The figure is empty!");
        return;
      endif

      ## Check if the resolution is correct
      if (isempty (str2num (printOptions.resolution)))
        obj.addErrorOutput (cell_index,
                            "A number is required for resolution, not a string");
        return;
      endif

      ## Check if the width is correct
      if (isempty (str2num (printOptions.width)))
        obj.addErrorOutput (cell_index,
                            "A number is required for width, not a string");
        return;
      endif

      ## Check if the height is correct
      if (isempty (str2num (printOptions.height)))
        obj.addErrorOutput (cell_index,
                            "A number is required for height, not a string");
        return;
      endif

      switch (lower (printOptions.imageFormat))
        case "png"
          display_output = obj.embed_png_jpg_image (figHandle,
                                                    printOptions, "png");
        case "jpg"
          display_output = obj.embed_png_jpg_image (figHandle,
                                                    printOptions, "jpg");
        case "svg"
          display_output = obj.embed_svg_image (figHandle, printOptions);
        otherwise
          obj.addErrorOutput (cell_index, ["Cannot embed the \'", ...
                                           printOptions.imageFormat, ...
                                           "\' image format\n"]);
          return;
      endswitch

      obj.notebook.cells{cell_index}.outputs{end+1} = display_output;

    endfunction


    function dstruct = embed_png_jpg_image (obj, figHandle, printOptions, fmt)

      if (strcmp (fmt, "png"))
        mime = "image/png";
      else
        mime = "image/jpeg";
      endif

      image_path = fullfile (obj.tmpdir, ["temp." fmt]);
      print (figHandle, image_path, ["-d" fmt],
             ["-r" printOptions.resolution]);

      dstruct.output_type = "display_data";
      dstruct.metadata.(mime).width  = printOptions.width;
      dstruct.metadata.(mime).height = printOptions.height;
      dstruct.data.("text/plain") = {"<IPython.core.display.Image object>"};
      dstruct.data.(mime) = base64_encode (uint8 (fileread (image_path)));

      delete (image_path);

    endfunction


    function dstruct = embed_svg_image (obj, figHandle, printOptions)

      image_path = fullfile (obj.tmpdir, "temp.svg");
      print (figHandle, image_path, "-dsvg", ["-r" printOptions.resolution]);

      dstruct.output_type = "display_data";
      dstruct.metadata = struct ();
      dstruct.data.("text/plain") = {"<IPython.core.display.SVG object>"};
      dstruct.data.("image/svg+xml") = strsplit (fileread (image_path), "\n");

      ## FIXME: The following is a workaround until we can properly print
      ##        SVG images in the right width and height.
      ## Detect the <svg> tag; it is either the first or the second item.
      if (strncmpi (dstruct.data.("image/svg+xml"){1}, "<svg", 4))
        i = 1;
      else
        i = 2;
      endif

      ## Embed the width and height in the image itself
      svg_tag = dstruct.data.("image/svg+xml"){i};
      svg_tag = regexprep (svg_tag, 'width=".*?"',
                           ['width="' printOptions.width 'px"']);
      svg_tag = regexprep (svg_tag, 'height=".*?"',
                           ['height="' printOptions.height 'px"']);
      dstruct.data.("image/svg+xml"){i} = svg_tag;

      delete (image_path);

    endfunction


    function addErrorOutput (obj, cell_index, error_msg)

      stream_output.name        = "stderr";
      stream_output.output_type = "stream";
      stream_output.text        = {error_msg};
      obj.notebook.cells{cell_index}.outputs{end+1} = stream_output;

    endfunction

  endmethods

endclassdef


## Note: Functional BIST tests are located in the 'test/jupyter-notebook'
##       directory.

## Test input validation
%!error <Invalid call> jupyter_notebook ()
%!error <NOTEBOOK_FILENAME must be a string> jupyter_notebook (1)
%!error <NOTEBOOK_FILENAME must be a string> jupyter_notebook (['a';'b'])
%!error <OPTIONS must be a struct> jupyter_notebook ("fname", 1)