Mercurial > forge
changeset 12086:804dc0f7ee3d octave-forge
Use regexp rather than index
author | prnienhuis |
---|---|
date | Tue, 01 Oct 2013 21:49:13 +0000 |
parents | 20f1e936de8c |
children | 46e8ff7a04d9 |
files | main/io/inst/getxmlnode.m |
diffstat | 1 files changed, 46 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/main/io/inst/getxmlnode.m Mon Sep 30 19:54:54 2013 +0000 +++ b/main/io/inst/getxmlnode.m Tue Oct 01 21:49:13 2013 +0000 @@ -17,39 +17,69 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname}) ## @deftypefnx {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname}, @var{is}) +## @deftypefnx {Function File} {@var{node} =} getxmlnode (@var{xml}, @var{nname}, @var{is}, @var{contnt}) ## Get string representing the first xml node @var{nname} from xml file in ## string @var{xml}, optionally starting at position @var{is}, and return -## start and end indices. +## start and end indices. If @var{contnt} is TRUE, return the portion of the +## node between the outer tags. ## ## @seealso{} ## @end deftypefn ## Author: Philip Nienhuis <prnienhuis at users.sf.net> ## Created: 2013-09-08 +## Updates +## 2013-09-30 Use regexp for start & end positions as index catches false positives +## 2013-10-01 Input validation +## '' Further simplified using regexp +## '' Option to return just node contents -function [ node, spos, epos ] = getxmlnode (xml, nname, is=1) +function [ node, spos, epos ] = getxmlnode (xml, nname, is=1, contnt=0) + + if (nargin >= 3 && isempty (is)) + is = 1; + endif + + ## Input validation + if (! ischar (xml) || ! ischar (nname)) + error ("getxmlnode: text strings expected for first two args"); + elseif (nargin==3 && (! islogical (is) && ! isnumeric (is))) + error ("getxmlnode: logicalor numerical value expected for arg #3"); + elseif (nargin==4 && (! islogical (contnt) && ! isnumeric (contnt))) + error ("getxmlnode: logicalor numerical value expected for arg #3"); + endif + + is = max (is, 1); node = ''; - spos = index (xml(is:end), ["<" nname]); - if (spos) + ## Start tag must end with either > or a space preceding an attribute + spos = regexp (xml(is:end), sprintf ("<%s( |>)", nname)); + if (! isempty (spos)) ## Apparently a node exists. Get its end. Maybe it is a single node ## ending in "/>" - epos = index (xml(is+spos:end), "/>"); - ## Do check if the "/>" really belongs to this node - if ((! epos) || index (xml(is+spos:is+spos+epos), "><")) - ## Apparently it is a composite node - epos = index (xml(is+spos:end), ["</" nname ">"]); - if (! epos) - ## Apparently the xml is invalid? - error ("getxmlnode: couldn't find matching end tag for %s", nname); + spos = spos(1); + [~, epos] = regexp (xml(is+spos:end), sprintf ("(</%s>|%s[^><]*/>)", nname, nname)); + if (! isempty (epos)) + epos = epos(1); + node = xml(is+spos-1 : is+spos+epos(1)-1); + if (contnt) + if (strcmp (node(end-1:end), "/>")) + Single node tag. Return empty string + node = ''; + else + ## Get contents between end of opening tag en start of end tag + node = node(index (node, ">", "first")+1 : index (node, "<", "last")-1); + endif endif - epos = is + spos + epos + length (nname) + 1; else - epos = is + spos + epos; + error ("getxmlnode: couldn't find matching end tag for %s", nname); endif - spos = is + spos - 1; - node = xml(spos:epos); + ## Update position pointers relative to input string + epos += is + spos - 1; + spos += is - 1; else + ## No node found; reset pointers + spos = 0; epos = 0; endif