changeset 12086:804dc0f7ee3d octave-forge

Use regexp rather than index
author prnienhuis
date Tue, 01 Oct 2013 21:49:13 +0000
parents 20f1e936de8c
children 46e8ff7a04d9
files main/io/inst/getxmlnode.m
diffstat 1 files changed, 46 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/main/io/inst/getxmlnode.m	Mon Sep 30 19:54:54 2013 +0000
+++ b/main/io/inst/getxmlnode.m	Tue Oct 01 21:49:13 2013 +0000
@@ -17,39 +17,69 @@
 ## -*- texinfo -*- 
 ## @deftypefn {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname})
 ## @deftypefnx {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname}, @var{is})
+## @deftypefnx {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname}, @var{is}, @var{contnt})
 ## Get string representing the first xml node @var{nname} from xml file in
 ## string @var{xml}, optionally starting at position @var{is}, and return
-## start and end indices.
+## start and end indices. If @var{contnt} is TRUE, return the portion of the
+## node between the outer tags.
 ##
 ## @seealso{}
 ## @end deftypefn
 
 ## Author: Philip Nienhuis <prnienhuis at users.sf.net>
 ## Created: 2013-09-08
+## Updates
+## 2013-09-30 Use regexp for start & end positions as index catches false positives
+## 2013-10-01 Input validation
+##     ''     Further simplified using regexp
+##     ''     Option to return just node contents
 
-function [ node, spos, epos ] = getxmlnode (xml, nname, is=1)
+function [ node, spos, epos ] = getxmlnode (xml, nname, is=1, contnt=0)
+
+  if (nargin >= 3 && isempty (is))
+    is = 1;
+  endif
+
+  ## Input validation
+  if (! ischar (xml) || ! ischar (nname))
+    error ("getxmlnode: text strings expected for first two args");
+  elseif (nargin==3 && (! islogical (is) && ! isnumeric (is)))
+    error ("getxmlnode: logicalor numerical value expected for arg #3");
+  elseif (nargin==4 && (! islogical (contnt) && ! isnumeric (contnt)))
+    error ("getxmlnode: logicalor numerical value expected for arg #3");
+  endif
+
+  is = max (is, 1);
 
   node = '';
-  spos = index (xml(is:end), ["<" nname]);
-  if (spos)
+  ## Start tag must end with either > or a space preceding an attribute
+  spos = regexp (xml(is:end), sprintf ("<%s( |>)", nname));
+  if (! isempty (spos))
     ## Apparently a node exists. Get its end. Maybe it is a single node
     ## ending in "/>"
-    epos = index (xml(is+spos:end), "/>");
-    ## Do check if the "/>" really belongs to this node
-    if ((! epos) || index (xml(is+spos:is+spos+epos), "><"))
-      ## Apparently it is a composite node 
-      epos = index (xml(is+spos:end), ["</" nname ">"]);
-      if (! epos)
-        ## Apparently the xml is invalid?
-        error ("getxmlnode: couldn't find matching end tag for %s", nname);
+    spos = spos(1);
+    [~, epos] = regexp (xml(is+spos:end), sprintf ("(</%s>|%s[^><]*/>)", nname, nname));
+    if (! isempty (epos))
+      epos = epos(1);
+      node = xml(is+spos-1 : is+spos+epos(1)-1);
+      if (contnt)
+        if (strcmp (node(end-1:end), "/>"))
+          Single node tag. Return empty string
+          node = '';
+        else
+          ## Get contents between end of opening tag en start of end tag
+          node = node(index (node, ">", "first")+1 : index (node, "<", "last")-1);
+        endif
       endif
-      epos = is + spos + epos + length (nname) + 1;
     else
-      epos = is + spos + epos;
+      error ("getxmlnode: couldn't find matching end tag for %s", nname);
     endif
-    spos = is + spos - 1;
-    node = xml(spos:epos);
+    ## Update position pointers relative to input string
+    epos += is + spos - 1;
+    spos += is - 1;
   else
+    ## No node found; reset pointers
+    spos = 0;
     epos = 0;
   endif