diff scripts/general/publish.m @ 23964:1a0cbb573a67

publish: Improve escaping of special characters in output (bug #51782) * scripts/general/publish.m: Make use of new publish formatter interface function "do_escape_special_chars". Replace (TM) and (R) by placeholders. Avoid unnecessary merging of text paragraphs. * scripts/general/private/__publish_html_output__.m: New publish formatter interface function "do_escape_special_chars" to properly escape HTML entities. Usage where appropiate. * scripts/general/private/__publish_latex_output__.m: Rename "escape_latex" to a more general purpose escaping function "do_escape_special_chars" of all formatters. Usage where appropiate. Extend escaped LaTeX characters. * test/publish/test_script.m: Various new stress test cases for publish markup.
author Kai T. Ohlhus <k.ohlhus@gmail.com>
date Tue, 29 Aug 2017 18:01:48 +0200
parents 58b76c741c3d
children b1d1229d9e83
line wrap: on
line diff
--- a/scripts/general/publish.m	Mon Aug 28 18:50:26 2017 +0200
+++ b/scripts/general/publish.m	Tue Aug 29 18:01:48 2017 +0200
@@ -685,14 +685,8 @@
     ## * Inline "$" and block "$$" LaTeX math
     ## * Links
     ## * Trademark symbols
-    block = strjoin (block, "\n");
-    if (isempty (p_content) || ! strcmp (p_content{end}.type, "text"))
-      p_content{end+1}.type = "text";
-      p_content{end}.content = block;
-    else
-      p_content{end}.content = strjoin ({p_content{end}.content, block}, ...
-                                        "\n");
-    endif
+    p_content{end+1}.type = "text";
+    p_content{end}.content = strjoin (block, "\n");
   endfor
 endfunction
 
@@ -730,7 +724,8 @@
     [~, title_str] = fileparts (doc.m_source_file_name);
   endif
 
-  content = formatter ("header", title_str,
+  content = formatter ("header",
+                       formatter ("escape_special_chars", title_str),
                        format_output (doc.intro, formatter, options),
                        get_toc (doc.body, formatter));
   content = [content, format_output(doc.body, formatter, options)];
@@ -783,7 +778,7 @@
     switch (cstr{i}.type)
       case "code"
         if (options.showCode)
-          str = [str, formatter(cstr{i}.type, cstr{i}.content)];
+          str = [str, formatter("code", cstr{i}.content)];
         endif
         if ((options.evalCode) && (! isempty (cstr{i}.output)))
           str = [str, formatter("code_output", cstr{i}.output)];
@@ -842,6 +837,7 @@
   ## 6) Bold *text*
   ## 7) Italic _text_
   ## 8) Monospaced |text|
+  ## 9) (TM) or (R)
   regexes = {'<\S{3,}[^\s<>]*>', ...
              '<octave:[^\s<>]* *[^<>$]*>', ...
              '<\S{3,}[^\s<>]* *[^<>$]*>', ...
@@ -849,7 +845,8 @@
              regex_helper('\$', '$'), ...
              regex_helper('\*', '*'), ...
              regex_helper('_', '_'), ...
-             regex_helper('\|', '|')};
+             regex_helper('\|', '|'), ...
+             '\((TM|R)\)'};
 
   ## Function to escape some special characters for the GNU Octave manual,
   ## see https://www.gnu.org/software/texinfo/manual/texinfo/html_node/HTML-Xref-Node-Name-Expansion.html
@@ -912,14 +909,17 @@
           txt = cstr{j};
           cstr{j} = formatter ("monospaced", format_text (txt(2:end-1), ...
                                formatter));
+        case 9
+          ## (TM) or (R)
+          txt = cstr{j};
+          cstr{j} = formatter (txt(2:end-1));
       endswitch
     endfor
     placeholder_cstr = [placeholder_cstr, cstr];
   endfor
 
   ## Replace special symbols
-  str = strrep (str, "(TM)", formatter ("TM"));
-  str = strrep (str, "(R)", formatter ("R"));
+  str = formatter ("escape_special_chars", str);
 
   ## Restore placeholders
   for i = plh:-1:1