view scripts/strings/str2double.m @ 5185:75a442ecd410

[project @ 2005-03-03 06:59:01 by jwe]
author jwe
date Thu, 03 Mar 2005 06:59:01 +0000
parents d35c5104ffbe
children e58bbd2b9c94
line wrap: on
line source

## Copyright (C) 2004 by Alois Schloegl <a.schloegl@ieee.org>	
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, write to the Free
## Software Foundation, 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA.

## STR2DOUBLE converts strings into numeric values
##  [NUM, STATUS,STRARRAY] = STR2DOUBLE(STR)
##
##  STR2DOUBLE can replace STR2NUM, but avoids the insecure use of EVAL
##  on unknown data [1].
##
##    STR can be the form '[+-]d[.]dd[[eE][+-]ddd]'
##	d can be any of digit from 0 to 9, [] indicate optional elements
##    NUM is the corresponding numeric value.
##       if the conversion fails, status is -1 and NUM is NaN.
##    STATUS = 0: conversion was successful
##    STATUS = -1: couldnot convert string into numeric value
##    STRARRAY is a cell array of strings.
##
##    Elements which are not defined or not valid return NaN and
##        the STATUS becomes -1
##    STR can be also a character array or a cell array of strings.
##        Then, NUM and STATUS return matrices of appropriate size.
##
##    STR can also contain multiple elements.
##    default row-delimiters are:
##        NEWLINE, CARRIAGE RETURN and SEMICOLON i.e. ASCII 10, 13 and 59.
##    default column-delimiters are:
##        TAB, SPACE and COMMA i.e. ASCII 9, 32, and 44.
##    default decimal delimiter is '.' char(46), sometimes (e.g in
##	Tab-delimited text files generated by Excel export in Europe)
##	might used ',' as decimal delimiter.
##
##  [NUM, STATUS] = STR2DOUBLE(STR,CDELIM,RDELIM,DDELIM)
##       CDELIM .. [OPTIONAL] user-specified column delimiter
##       RDELIM .. [OPTIONAL] user-specified row delimiter
##       DDELIM .. [OPTIONAL] user-specified decimal delimiter
##       CDELIM, RDELIM and DDELIM must contain only
##       NULL, NEWLINE, CARRIAGE RETURN, SEMICOLON, COLON, SLASH, TAB, SPACE, COMMA, or ()[]{}
##       i.e. ASCII 0,9,10,11,12,13,14,32,33,34,40,41,44,47,58,59,91,93,123,124,125
##
##    Examples:
##	str2double('-.1e-5')
##	   ans = -1.0000e-006
##
## 	str2double('.314e1, 44.44e-1, .7; -1e+1')
##	ans =
##	    3.1400    4.4440    0.7000
##	  -10.0000       NaN       NaN
##
##	line ='200,300,400,NaN,-inf,cd,yes,no,999,maybe,NaN';
##	[x,status]=str2double(line)
##	x =
##	   200   300   400   NaN  -Inf   NaN   NaN   NaN   999   NaN   NaN
##	status =
##	    0     0     0     0     0    -1    -1    -1     0    -1     0
##
## Reference(s):
## [1] David A. Wheeler, Secure Programming for Linux and Unix HOWTO.
##    http://en.tldp.org/HOWTO/Secure-Programs-HOWTO/

function [num, status, strarray] = str2double (s, cdelim, rdelim, ddelim)

  FLAG_OCTAVE = exist('OCTAVE_VERSION','builtin');

  ## digits, sign, exponent,NaN,Inf
  ## valid_char = '0123456789eE+-.nNaAiIfF';

  ## valid delimiters
  valid_delim = char (sort ([0, 9:14, 32:34, abs("()[]{},;:\"|/")]));

  if (nargin < 1)
    error ("missing input argument");
  endif

  if (nargin < 2)
    ## column delimiter
    cdelim = char ([9, 32, abs(",")]);
  else
    ## make unique cdelim
    cdelim = char (sort (cdelim(:)));
    tmp = [1; 1+find(diff(abs(cdelim))>0)];
    cdelim = cdelim(tmp)';
  endif

  if (nargin < 3)
    ## row delimiter
    rdelim = char ([0, 10, 13, abs(";")]);
  else
    ## make unique rdelim
    rdelim = char (sort (rdelim(:)));
    tmp = [1; 1+find(diff(abs(rdelim))>0)];
    rdelim = rdelim(tmp)';
  endif

  if (nargin < 4)
    ddelim = '.';
  elseif (length (ddelim) != 1)
    error ("decimal delimiter must be exactly one character");
  endif

  ## check if RDELIM and CDELIM are distinct

  delim = sort (abs ([cdelim, rdelim, ddelim]));
  tmp   = [1, 1+find(diff(delim)>0)];
  delim = delim(tmp);
  ## [length(delim),length(cdelim),length(rdelim)]
  if (length (delim) < (length(cdelim) + length(rdelim))+1)
    ## length (ddelim) must be one.
    error ("row, column and decimal delimiter are not distinct");
  endif

  ## check if delimiters are valid
  tmp  = sort (abs ([cdelim, rdelim]));
  flag = zeros (size (tmp));
  k1 = 1;
  k2 = 1;
  while (k1 <= length (tmp) && k2 <= length (valid_delim)),
    if (tmp(k1) == valid_delim(k2))
      flag(k1) = 1;
      k1++;
    elseif (tmp(k1) < valid_delim(k2))
      k1++;
    elseif (tmp(k1) > valid_delim(k2))
      k2++;
    endif
  endwhile
  if (! all (flag))
    error ("invalid delimiters!");
  endif

  ## various input parameters

  if (isnumeric (s))
    if (all (s < 256) && all (s >= 0))
      s = char (s);
    else
      error ("str2double: input variable must be a string");
    endif
  endif

  if (isempty (s))
    num = [];
    status = 0;
    return;
  elseif (iscell (s))
    strarray = s;
  elseif (ischar (s) && all (size (s) > 1))
    ## char array transformed into a string.
    for k = 1:size (s, 1)
      tmp = find (! isspace (s(k,:)));
      strarray{k,1} = s(k,min(tmp):max(tmp));
    endfor
  elseif (ischar (s)),
    num = [];
    status = 0;
    strarray = {};
    ## add stop sign; makes sure last digit is not skipped
    s(end+1) = rdelim(1);
    RD = zeros (size (s));
    for k = 1:length (rdelim),
      RD = RD | (s == rdelim(k));
    endfor
    CD = RD;
    for k = 1:length (cdelim),
      CD = CD | (s==cdelim(k));
    endfor

    k1 = 1; # current row
    k2 = 0; # current column
    k3 = 0; # current element

    sl = length (s);
    ix = 1;
    ## while (ix < sl) & any(abs(s(ix))==[rdelim,cdelim]),
    while (ix < sl && CD(ix))
      ix++
    endwhile
    ta = ix;
    te = [];
    while (ix <= sl)
      if (ix == sl)
        te = sl;
      endif
      ## if any(abs(s(ix))==[cdelim(1),rdelim(1)]),
      if (CD(ix))
        te = ix - 1;
      endif
      if (! isempty (te))
        k2++;
        k3++;
        strarray{k1,k2} = s(ta:te);
        ## strarray{k1,k2} = [ta,te];

        flag = 0;
        ## while any(abs(s(ix))==[cdelim(1),rdelim(1)]) & (ix < sl),
        while (CD(ix) && ix < sl)
          flag = flag | RD(ix);
          ix++;
        endwhile

        if (flag)
          k2 = 0;
          k1++;
        endif
        ta = ix;
        te = [];
      endif
      ix++;
    endwhile
  else
    error ("str2double: invalid input argument");
  endif

  [nr, nc]= size (strarray);
  status = zeros (nr, nc);
  num = repmat (NaN, nr, nc);

  for k1 = 1:nr
    for k2 = 1:nc
      t = strarray{k1,k2};
      if (length (t) == 0)
	## return error code
	status(k1,k2) = -1;
	num(k1,k2) = NaN;
      else
	## get mantisse
	g = 0;
	v = 1;
	if (t(1) == "-")
	  v = -1;
	  l = min (2, length(t));
	elseif (t(1) == "+")
	  l = min (2, length (t));
	else
	  l = 1;
	endif

	if (strcmpi (t(l:end), "inf"))
	  num(k1,k2) = v*Inf;
	elseif (strcmpi (t(l:end), "NaN"));
	  num(k1,k2) = NaN;
	else
	  if (ddelim == ".")
	    t(t==ddelim) = ".";
	  endif
	  [v,tmp2,c] = sscanf(char(t), "%f %s", "C");
	  ## [v,c,em,ni] = sscanf(char(t),"%f %s");
	  ## c = c * (ni>length(t));
	  if (c == 1),
	    num(k1,k2) = v;
	  else
	    num(k1,k2) = NaN;
	    status(k1,k2) = -1;
	  endif
	endif
      endif
    endfor
  endfor

endfunction