changeset 23926:58b76c741c3d

publish: improve detecion of block markup (bug #51782) * scripts/general/publish.m: improve detecion of block markup by using a recursive strategy with regular expressions, rather than parsing line by line. There was a problem, because such a <html> or <latex> block was never able to span an empty line in the previous strategy. * scripts/general/private/__publish_html_output__.m (do_html), scripts/general/private/__publish_latex_output__.m (do_latex): sourround output by newlines. * test/publish/test_script.m: New test cases for block markup with empty lines.
author Kai T. Ohlhus <k.ohlhus@gmail.com>
date Mon, 21 Aug 2017 17:22:28 +0200
parents d64985eaf56d
children e3a36f84d01d
files scripts/general/private/__publish_html_output__.m scripts/general/private/__publish_latex_output__.m scripts/general/publish.m test/publish/test_script.m
diffstat 4 files changed, 91 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/general/private/__publish_html_output__.m	Sun Aug 20 21:09:27 2017 -0700
+++ b/scripts/general/private/__publish_html_output__.m	Mon Aug 21 17:22:28 2017 +0200
@@ -257,7 +257,7 @@
 endfunction
 
 function outstr = do_html (str)
-  outstr = str;
+  outstr = ["\n" str "\n"];
 endfunction
 
 function outstr = do_latex (str)
--- a/scripts/general/private/__publish_latex_output__.m	Sun Aug 20 21:09:27 2017 -0700
+++ b/scripts/general/private/__publish_latex_output__.m	Mon Aug 21 17:22:28 2017 +0200
@@ -225,7 +225,7 @@
 endfunction
 
 function outstr = do_latex (str)
-  outstr = str;
+  outstr = ["\n" str "\n"];
 endfunction
 
 function outstr = do_link (url_str, str)
--- a/scripts/general/publish.m	Sun Aug 20 21:09:27 2017 -0700
+++ b/scripts/general/publish.m	Mon Aug 21 17:22:28 2017 +0200
@@ -587,8 +587,34 @@
     return;
   endif
 
+  ## Extract <html> and <latex> blocks recursively.
+  content_str = strjoin (content, "\n");
+  tags = {"html", "latex"};
+  for i = 1:length(tags)
+    tok = regexp (content_str, ...
+      ['(.*?)(^|\n\n)(<', tags{i}, '>)\n(.*?)\n(<\/', ...
+        tags{i}, '>)($|\n\n)(.*)'], "tokens", "once");
+    if (! isempty (tok))
+      ## If there was some text before that block --> recursion
+      if (! strcmpi (tok{1}, ["<", tags{i}, ">"]))
+        p_content = parse_paragraph_content (strsplit (tok{1}, "\n"));
+        tok(1:2) = [];
+      endif
+      ## Extract the block content
+      p_content{end+1}.type = tags{i};
+      p_content{end}.content = tok{2};
+      ## If there was some text after that block --> recursion
+      if (length (tok) == 5)
+        p_content = [p_content, ...
+          parse_paragraph_content(strsplit (tok{5}, "\n"))];
+      endif
+      return;
+    endif
+  endfor
+
   ## Split into blocks separated by empty lines
   idx = [0, find(cellfun (@isempty, content)), length(content) + 1];
+
   ## For each block
   for i = find (diff (idx) > 1)
     block = content(idx(i) + 1:idx(i+1) - 1);
@@ -652,58 +678,21 @@
       continue;
     endif
 
-    ## Parse remaining blocks line by line
-    j = 1;
-    while (j <= numel (block))
-      ## HTML markup
-      if (strcmpi (block{j}, "<html>"))
-        start_html = j + 1;
-        while (j < numel (block) && ! strcmpi (block{j}, "</html>"))
-          j++;
-        endwhile
-        if (j == numel (block) && ! strcmpi (block{j}, "</html>"))
-          warning ("publish: no closing </html> found");
-        else
-          j++;  # Skip closing tag
-        endif
-        if (j > start_html)
-          p_content{end+1}.type = "html";
-          p_content{end}.content = strjoin (block(start_html:j-2), "\n");
-        endif
-      ## LaTeX markup
-      elseif (strcmpi (block{j}, "<latex>"))
-        start_latex = j + 1;
-        while (j < numel (block) && ! strcmpi (block{j}, "</latex>"))
-          j++;
-        endwhile
-        if (j == numel (block) && ! strcmpi (block{j}, "</latex>"))
-          warning ("publish: no closing </latex> found");
-        else
-          j++;  # Skip closing tag
-        endif
-        if (j > start_latex)
-          p_content{end+1}.type = "latex";
-          p_content{end}.content = strjoin (block(start_latex:j-2), "\n");
-        endif
-      ## Remaining normal text or markups belonging to normal text
-      ## that are handled while output generation:
-      ##
-      ## * Italic "_", bold "*", and monospaced "|" text
-      ## * Inline "$" and block "$$" LaTeX math
-      ## * Links
-      ## * Trademark symbols
-      else
-        if (j == 1 || isempty (p_content)
-            || ! strcmp (p_content{end}.type, "text"))
-          p_content{end+1}.type = "text";
-          p_content{end}.content = block{j};
-        else
-          p_content{end}.content = strjoin ({p_content{end}.content, ...
-                                             block{j}}, "\n");
-        endif
-        j++;
-      endif
-    endwhile
+    ## Now it can be only normal text or markups belonging to normal text
+    ## that are handled while output generation:
+    ##
+    ## * Italic "_", bold "*", and monospaced "|" text
+    ## * Inline "$" and block "$$" LaTeX math
+    ## * Links
+    ## * Trademark symbols
+    block = strjoin (block, "\n");
+    if (isempty (p_content) || ! strcmp (p_content{end}.type, "text"))
+      p_content{end+1}.type = "text";
+      p_content{end}.content = block;
+    else
+      p_content{end}.content = strjoin ({p_content{end}.content, block}, ...
+                                        "\n");
+    endif
   endfor
 endfunction
 
--- a/test/publish/test_script.m	Sun Aug 20 21:09:27 2017 -0700
+++ b/test/publish/test_script.m	Mon Aug 21 17:22:28 2017 +0200
@@ -225,6 +225,30 @@
 # </html>
 #
 
+%% HTML Markup with empty lines
+% <html>
+% <table><tr>
+% <td style="border: 1px solid black;">one</td>
+% <td style="border: 1px solid black;">two</td></tr>
+% </table>
+%
+% and some text.
+% </html>
+%
+
+## HTML Markup with empty lines
+# <html>
+# <table>
+# <tr>
+# <td style="border: 1px solid black;">one</td>
+# <td style="border: 1px solid black;">two</td>
+# </tr>
+# </table>
+#
+# and some text.
+# </html>
+#
+
 %% LaTeX Markup
 % <latex>
 % \begin{equation}
@@ -245,6 +269,30 @@
 # </latex>
 #
 
+%% LaTeX Markup with empty lines
+% <latex>
+% Some text
+%
+% \begin{equation}
+% \begin{pmatrix}
+% 1 & 2 \\ 3 & 4
+% \end{pmatrix}
+% \end{equation}
+% </latex>
+%
+
+## LaTeX Markup with empty lines
+# <latex>
+# Some text
+#
+# \begin{equation}
+# \begin{pmatrix}
+# 1 & 2 \\ 3 & 4
+# \end{pmatrix}
+# \end{equation}
+# </latex>
+#
+
 %% Long void
 %
 %