Mercurial > octave-nkf
view scripts/strings/str2double.m @ 5185:75a442ecd410
[project @ 2005-03-03 06:59:01 by jwe]
author | jwe |
---|---|
date | Thu, 03 Mar 2005 06:59:01 +0000 |
parents | d35c5104ffbe |
children | e58bbd2b9c94 |
line wrap: on
line source
## Copyright (C) 2004 by Alois Schloegl <a.schloegl@ieee.org> ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2, or (at your option) ## any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, write to the Free ## Software Foundation, 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA. ## STR2DOUBLE converts strings into numeric values ## [NUM, STATUS,STRARRAY] = STR2DOUBLE(STR) ## ## STR2DOUBLE can replace STR2NUM, but avoids the insecure use of EVAL ## on unknown data [1]. ## ## STR can be the form '[+-]d[.]dd[[eE][+-]ddd]' ## d can be any of digit from 0 to 9, [] indicate optional elements ## NUM is the corresponding numeric value. ## if the conversion fails, status is -1 and NUM is NaN. ## STATUS = 0: conversion was successful ## STATUS = -1: couldnot convert string into numeric value ## STRARRAY is a cell array of strings. ## ## Elements which are not defined or not valid return NaN and ## the STATUS becomes -1 ## STR can be also a character array or a cell array of strings. ## Then, NUM and STATUS return matrices of appropriate size. ## ## STR can also contain multiple elements. ## default row-delimiters are: ## NEWLINE, CARRIAGE RETURN and SEMICOLON i.e. ASCII 10, 13 and 59. ## default column-delimiters are: ## TAB, SPACE and COMMA i.e. ASCII 9, 32, and 44. ## default decimal delimiter is '.' char(46), sometimes (e.g in ## Tab-delimited text files generated by Excel export in Europe) ## might used ',' as decimal delimiter. ## ## [NUM, STATUS] = STR2DOUBLE(STR,CDELIM,RDELIM,DDELIM) ## CDELIM .. [OPTIONAL] user-specified column delimiter ## RDELIM .. [OPTIONAL] user-specified row delimiter ## DDELIM .. [OPTIONAL] user-specified decimal delimiter ## CDELIM, RDELIM and DDELIM must contain only ## NULL, NEWLINE, CARRIAGE RETURN, SEMICOLON, COLON, SLASH, TAB, SPACE, COMMA, or ()[]{} ## i.e. ASCII 0,9,10,11,12,13,14,32,33,34,40,41,44,47,58,59,91,93,123,124,125 ## ## Examples: ## str2double('-.1e-5') ## ans = -1.0000e-006 ## ## str2double('.314e1, 44.44e-1, .7; -1e+1') ## ans = ## 3.1400 4.4440 0.7000 ## -10.0000 NaN NaN ## ## line ='200,300,400,NaN,-inf,cd,yes,no,999,maybe,NaN'; ## [x,status]=str2double(line) ## x = ## 200 300 400 NaN -Inf NaN NaN NaN 999 NaN NaN ## status = ## 0 0 0 0 0 -1 -1 -1 0 -1 0 ## ## Reference(s): ## [1] David A. Wheeler, Secure Programming for Linux and Unix HOWTO. ## http://en.tldp.org/HOWTO/Secure-Programs-HOWTO/ function [num, status, strarray] = str2double (s, cdelim, rdelim, ddelim) FLAG_OCTAVE = exist('OCTAVE_VERSION','builtin'); ## digits, sign, exponent,NaN,Inf ## valid_char = '0123456789eE+-.nNaAiIfF'; ## valid delimiters valid_delim = char (sort ([0, 9:14, 32:34, abs("()[]{},;:\"|/")])); if (nargin < 1) error ("missing input argument"); endif if (nargin < 2) ## column delimiter cdelim = char ([9, 32, abs(",")]); else ## make unique cdelim cdelim = char (sort (cdelim(:))); tmp = [1; 1+find(diff(abs(cdelim))>0)]; cdelim = cdelim(tmp)'; endif if (nargin < 3) ## row delimiter rdelim = char ([0, 10, 13, abs(";")]); else ## make unique rdelim rdelim = char (sort (rdelim(:))); tmp = [1; 1+find(diff(abs(rdelim))>0)]; rdelim = rdelim(tmp)'; endif if (nargin < 4) ddelim = '.'; elseif (length (ddelim) != 1) error ("decimal delimiter must be exactly one character"); endif ## check if RDELIM and CDELIM are distinct delim = sort (abs ([cdelim, rdelim, ddelim])); tmp = [1, 1+find(diff(delim)>0)]; delim = delim(tmp); ## [length(delim),length(cdelim),length(rdelim)] if (length (delim) < (length(cdelim) + length(rdelim))+1) ## length (ddelim) must be one. error ("row, column and decimal delimiter are not distinct"); endif ## check if delimiters are valid tmp = sort (abs ([cdelim, rdelim])); flag = zeros (size (tmp)); k1 = 1; k2 = 1; while (k1 <= length (tmp) && k2 <= length (valid_delim)), if (tmp(k1) == valid_delim(k2)) flag(k1) = 1; k1++; elseif (tmp(k1) < valid_delim(k2)) k1++; elseif (tmp(k1) > valid_delim(k2)) k2++; endif endwhile if (! all (flag)) error ("invalid delimiters!"); endif ## various input parameters if (isnumeric (s)) if (all (s < 256) && all (s >= 0)) s = char (s); else error ("str2double: input variable must be a string"); endif endif if (isempty (s)) num = []; status = 0; return; elseif (iscell (s)) strarray = s; elseif (ischar (s) && all (size (s) > 1)) ## char array transformed into a string. for k = 1:size (s, 1) tmp = find (! isspace (s(k,:))); strarray{k,1} = s(k,min(tmp):max(tmp)); endfor elseif (ischar (s)), num = []; status = 0; strarray = {}; ## add stop sign; makes sure last digit is not skipped s(end+1) = rdelim(1); RD = zeros (size (s)); for k = 1:length (rdelim), RD = RD | (s == rdelim(k)); endfor CD = RD; for k = 1:length (cdelim), CD = CD | (s==cdelim(k)); endfor k1 = 1; # current row k2 = 0; # current column k3 = 0; # current element sl = length (s); ix = 1; ## while (ix < sl) & any(abs(s(ix))==[rdelim,cdelim]), while (ix < sl && CD(ix)) ix++ endwhile ta = ix; te = []; while (ix <= sl) if (ix == sl) te = sl; endif ## if any(abs(s(ix))==[cdelim(1),rdelim(1)]), if (CD(ix)) te = ix - 1; endif if (! isempty (te)) k2++; k3++; strarray{k1,k2} = s(ta:te); ## strarray{k1,k2} = [ta,te]; flag = 0; ## while any(abs(s(ix))==[cdelim(1),rdelim(1)]) & (ix < sl), while (CD(ix) && ix < sl) flag = flag | RD(ix); ix++; endwhile if (flag) k2 = 0; k1++; endif ta = ix; te = []; endif ix++; endwhile else error ("str2double: invalid input argument"); endif [nr, nc]= size (strarray); status = zeros (nr, nc); num = repmat (NaN, nr, nc); for k1 = 1:nr for k2 = 1:nc t = strarray{k1,k2}; if (length (t) == 0) ## return error code status(k1,k2) = -1; num(k1,k2) = NaN; else ## get mantisse g = 0; v = 1; if (t(1) == "-") v = -1; l = min (2, length(t)); elseif (t(1) == "+") l = min (2, length (t)); else l = 1; endif if (strcmpi (t(l:end), "inf")) num(k1,k2) = v*Inf; elseif (strcmpi (t(l:end), "NaN")); num(k1,k2) = NaN; else if (ddelim == ".") t(t==ddelim) = "."; endif [v,tmp2,c] = sscanf(char(t), "%f %s", "C"); ## [v,c,em,ni] = sscanf(char(t),"%f %s"); ## c = c * (ni>length(t)); if (c == 1), num(k1,k2) = v; else num(k1,k2) = NaN; status(k1,k2) = -1; endif endif endif endfor endfor endfunction