changeset 22674:c024fb103114 stable

minor improvements for publish. * scripts/general/publish.m: - Add support of GNU Octave HTML manual URLs like <octave:plot TEXT>. - Avoid bad interleaved formatting with placeholders. - No formatting of the document title. * scripts/general/__publish_html_output__.m: Improve the simple syntax highlighter. * test/publish/test_script.m: Test the GNU Octave HTML manual URLs.
author Kai T. Ohlhus <k.ohlhus@gmail.com>
date Thu, 27 Oct 2016 13:35:16 +0200
parents 4144fc49c318
children b477f275c12f f88de8f27259
files scripts/general/private/__publish_html_output__.m scripts/general/publish.m test/publish/test_script.m
diffstat 3 files changed, 128 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/general/private/__publish_html_output__.m	Wed Oct 26 18:37:39 2016 -0400
+++ b/scripts/general/private/__publish_html_output__.m	Thu Oct 27 13:35:16 2016 +0200
@@ -208,76 +208,108 @@
 
   outstr = "";
   i = 1;
+  placeholder_cstr = {};
+  plh = 0;
   while (i <= length(str))
     ## Block comment
     if (any (strncmp (str(i:end), {"%{", "#{"}, 2)))
-      outstr = [outstr, "<span class=\"comment\">", str(i:i+1)];
+      plh_str = ["<span class=\"comment\">", str(i:i+1)];
       i = i + 2;
       while ((i <= length(str)) ...
              && ! (any (strncmp (str(i:end), {"%}", "#}"}, 2))))
-        outstr = [outstr, str(i)];
+        plh_str = [plh_str, str(i)];
         i++;
       endwhile
       if (i < length(str))
-        outstr = [outstr, str(i:i+1), "</span>"];
+        plh_str = [plh_str, str(i:i+1), "</span>"];
         i = i + 2;
       else
-        outstr = [outstr, "</span>"];
+        plh_str = [plh_str, "</span>"];
       endif
+      plh = plh + 1;
+      placeholder_cstr{plh} = plh_str;
+      outstr = [outstr, " PUBLISHPLACEHOLDER", num2str(plh), " "];
     ## Line comment
     elseif (any (strcmp (str(i), {"%", "#"})))
-      outstr = [outstr, "<span class=\"comment\">"];
+      plh_str = "<span class=\"comment\">";
       while ((i <= length(str)) && (! strcmp (str(i), "\n")))
-        outstr = [outstr, str(i)];
+        plh_str = [plh_str, str(i)];
         i++;
       endwhile
-      outstr = [outstr, "</span>\n"];
+      plh_str = [plh_str, "</span>\n"];
       i++;
+      plh = plh + 1;
+      placeholder_cstr{plh} = plh_str;
+      outstr = [outstr, " PUBLISHPLACEHOLDER", num2str(plh), " "];
     ## Single quoted string
     elseif (strcmp (str(i), "'"))
-      outstr = [outstr, "<span class=\"string\">'"];
+      plh_str = "<span class=\"string\">'";
       i++;
       while (i <= length(str))
         ## Ignore escaped string terminations
         if (strncmp (str(i:end), "''", 2))
-          outstr = [outstr, "''"];
+          plh_str = [plh_str, "''"];
           i = i + 2;
         ## Is string termination
         elseif (strcmp (str(i), "'"))
-          outstr = [outstr, "'</span>"];
+          plh_str = [plh_str, "'</span>"];
           i++;
           break;
+        ## Is string termination by line break
+        elseif (strcmp (str(i), "\n"))
+          plh_str = [plh_str, "</span>"];
+          break;
         ## String content
         else
-          outstr = [outstr, str(i)];
+          plh_str = [plh_str, str(i)];
           i++;
         endif
       endwhile
+      plh = plh + 1;
+      placeholder_cstr{plh} = plh_str;
+      outstr = [outstr, " PUBLISHPLACEHOLDER", num2str(plh), " "];
     ## Double quoted string
     elseif (strcmp (str(i), "\""))
-      outstr = [outstr, "<span class=\"string\">\""];
+      plh_str = "<span class=\"string\">\"";
       i++;
       while (i <= length(str))
         ## Is string termination
         if (strcmp (str(i), "\"") && ! strcmp (str(i - 1), "\\"))
-          outstr = [outstr, "\"</span>"];
+          plh_str = [plh_str, "\"</span>"];
           i++;
           break;
+        ## Is string termination by line break
+        elseif (strcmp (str(i), "\n"))
+          plh_str = [plh_str, "</span>"];
+          break;
         ## String content
         else
-          outstr = [outstr, str(i)];
+          plh_str = [plh_str, str(i)];
           i++;
         endif
       endwhile
+      plh = plh + 1;
+      placeholder_cstr{plh} = plh_str;
+      outstr = [outstr, " PUBLISHPLACEHOLDER", num2str(plh), " "];
     else
       outstr = [outstr, str(i)];
       i++;
     endif
   endwhile
   kwords = iskeyword ();
+  ## TODO: remove hack for regexp (bug #38149)
+  outstr = [" ", strrep(outstr, "\n", " \n "), " "];
   for i = 1:length(kwords)
     outstr = regexprep (outstr, ...
-      ['(?!<span[^>]*?>)(\s|^)(', kwords{i},')(\s|$)(?![^<]*?<\/span>)'], ...
+      ['(\s)(', kwords{i},')(\s|\()'], ...
       ["$1<span class=\"keyword\">$2</span>$3"]);
   endfor
+  ## TODO: remove hack for regexp (bug #38149)
+  outstr = strrep(outstr(2:end-1), " \n ", "\n");
+
+  ## Restore placeholders
+  for i = plh:-1:1
+    outstr = strrep (outstr, [" PUBLISHPLACEHOLDER", num2str(i), " "], ...
+      placeholder_cstr{i});
+  endfor
 endfunction
\ No newline at end of file
--- a/scripts/general/publish.m	Wed Oct 26 18:37:39 2016 -0400
+++ b/scripts/general/publish.m	Thu Oct 27 13:35:16 2016 +0200
@@ -726,7 +726,7 @@
     [~,title_str] = fileparts (doc_struct.m_source_file_name);
   endif
 
-  content = formatter ("header", format_text(title_str, formatter), ...
+  content = formatter ("header", title_str, ...
     format_output (doc_struct.intro, formatter, options), ...
     get_toc (doc_struct.body, formatter));
   content = [content, format_output(doc_struct.body, formatter, options)];
@@ -807,41 +807,86 @@
   ##   These are: links, bold, italic, monospaced, (TM), (R)
   ##
 
-  ## Links "<http://www.someurl.com>"
-  str = regexprep (str, '<(\S{3,}[^\s<>]*)>', ...
-    formatter ("link", "$1", "$1"));
-  ## Links "<octave:Function TEXT>"
-  ## TODO: better pointer to the function documentation
-  str = regexprep (str, '<octave:([^\s<>]*) *([^<>$]*)>', ...
-    formatter ("link", ["https://www.gnu.org/software/octave/", ...
-      "doc/interpreter/Function-Index.html"], "$2"));
-  ## Links "<http://www.someurl.com TEXT>"
-  str = regexprep (str, '<(\S{3,}[^\s<>]*) *([^<>$]*)>', ...
-    formatter ("link", "$1", "$2"));
-  oldstr = str;
-  ## Save inline "$" and block "$$" LaTeX math
-  [~,~,~,math_cstr] = regexp (str, '\${1,2}.*?\${1,2}');
-  for i = 1:length(math_cstr)
-    str = regexprep (str, '\${1,2}(.*?)\${1,2}', ...
-      ["PUBLISHMATH", num2str(i)], "once");
+  ## Regular expressions for the formats:
+  ##
+  ## * Links "<http://www.someurl.com>"
+  ## * Links "<octave:Function TEXT>"
+  ## * Links "<http://www.someurl.com TEXT>"
+  ## * inline "$" and block "$$" LaTeX math
+  ##
+  regexes = {'<\S{3,}[^\s<>]*>', ...
+             '<octave:[^\s<>]* *[^<>$]*>', ...
+             '<\S{3,}[^\s<>]* *[^<>$]*>', ...
+             '\${1,2}.*?\${1,2}', ...
+             '\*[^*]*\*', ...  # Bold
+             '_[^_]*_', ...    # Italic
+             '\|[^|]*\|'};     # Monospaced
+             
+
+  ##  Helper function to escape some special characters for the GNU Octave
+  ##  manual, see https://www.gnu.org/software/texinfo/manual/texinfo/html_node/HTML-Xref-Node-Name-Expansion.html
+  texinfo_esc = @(str) strrep (strrep (str, "-", "_002d"), "_", "_005f");
+
+  ## Substitute all occurances with placeholders
+  placeholder_cstr = {};
+  plh = 0;
+  for i = 1:length(regexes)
+    [~,~,~,cstr] = regexp (str, regexes{i});
+    for j = 1:length(cstr)
+      plh = plh + 1;
+      str = regexprep (str, regexes{i}, ...
+        ["PUBLISHPLACEHOLDER", num2str(plh)], "once");
+      switch (i)
+        case 1
+          # Links "<http://www.someurl.com>"
+          url = cstr{j};
+          cstr{j} = formatter ("link", url(2:end-1), url(2:end-1));
+        case 2
+          # Links "<octave:Function TEXT>"
+          idx = strfind (cstr{j}, " ");
+          url = cstr{j};
+          url = ["https://www.gnu.org/software/octave/doc/interpreter/", ...
+                 "XREF", texinfo_esc(url(9:idx-1)), ".html"];
+          txt = cstr{j};
+          txt = format_text (txt(idx+1:end-1), formatter);
+          cstr{j} = formatter ("link", url, txt);
+        case 3
+          # Links "<http://www.someurl.com TEXT>"
+          idx = strfind (cstr{j}, " ");
+          url = cstr{j};
+          url = url(2:idx-1);
+          txt = cstr{j};
+          txt = format_text (txt(idx+1:end-1), formatter);
+          cstr{j} = formatter ("link", url, txt);
+        case 4
+          # inline "$" and block "$$" LaTeX math --> do nothing
+        case 5
+          # Bold
+          txt = cstr{j};
+          cstr{j} = formatter ("bold", format_text (txt(2:end-1), formatter));
+        case 6
+          # Italic
+          txt = cstr{j};
+          cstr{j} = formatter ("italic", format_text (txt(2:end-1), formatter));
+        case 7
+          # Monospaced
+          txt = cstr{j};
+          cstr{j} = formatter ("monospaced", format_text (txt(2:end-1), ...
+            formatter));
+      endswitch
+    endfor
+    placeholder_cstr = [placeholder_cstr, cstr];
   endfor
-  ## Loop because of inlined expressions, e.g. *BOLD _ITALIC_*
-  do
-    oldstr = str;
-    ## Bold
-    str = regexprep (str, '\*([^*$_|]*)\*', formatter ("bold", "$1"));
-    ## Italic
-    str = regexprep (str, '_([^_$|*]*)_', formatter ("italic", "$1"));
-    ## Monospaced
-    str = regexprep (str, '\|([^|$_*]*)\|', formatter ("monospaced", "$1"));
-  until (strcmp (str, oldstr))
-  ## Restore inline "$" and block "$$" LaTeX math
-  for i = length(math_cstr):-1:1
-    str = strrep (str, ["PUBLISHMATH", num2str(i)], math_cstr{i});
-  endfor
+
   ## Replace special symbols
   str = strrep (str, "(TM)", formatter("TM"));
   str = strrep (str, "(R)", formatter("R"));
+
+  ## Restore placeholders
+  for i = plh:-1:1
+    str = strrep (str, ["PUBLISHPLACEHOLDER", num2str(i)], ...
+      placeholder_cstr{i});
+  endfor
 endfunction
 
 
--- a/test/publish/test_script.m	Wed Oct 26 18:37:39 2016 -0400
+++ b/test/publish/test_script.m	Thu Oct 27 13:35:16 2016 +0200
@@ -189,8 +189,8 @@
 %% Links
 % <https://www.gnu.org/software/octave>
 % <https://www.gnu.org/software/octave GNU Octave Homepage>
-% <octave:FUNCTION DISPLAYED TEXT>
-% <octave:FUNCTION Nested markup and newline
+% <octave:plot DISPLAYED TEXT FOR PLOT FUNCTION>
+% <octave:abs Nested markup and newline
 % PLAIN TEXT(TM) _ITALIC TEXT_(R) *BOLD TEXT* |MONOSPACED TEXT|>
 % <https://www.gnu.org/software/octave Nested markup and newline
 % PLAIN TEXT(TM) _ITALIC TEXT_(R) *BOLD TEXT* |MONOSPACED TEXT|>
@@ -199,8 +199,8 @@
 ## Links
 # <https://www.gnu.org/software/octave>
 # <https://www.gnu.org/software/octave GNU Octave Homepage>
-# <octave:FUNCTION DISPLAYED TEXT>
-# <octave:FUNCTION Nested markup and newline
+# <octave:plot DISPLAYED TEXT FOR PLOT FUNCTION>
+# <octave:abs Nested markup and newline
 # PLAIN TEXT(TM) _ITALIC TEXT_(R) *BOLD TEXT* |MONOSPACED TEXT|>
 # <https://www.gnu.org/software/octave Nested markup and newline
 # PLAIN TEXT(TM) _ITALIC TEXT_(R) *BOLD TEXT* |MONOSPACED TEXT|>