view main/system-identification/devel/tisean/source_c/lfo-test.c @ 9894:82ff20b4d849 octave-forge

system-identitifaction: Adding devel TISEAN files
author jpicarbajal
date Wed, 28 Mar 2012 13:32:37 +0000
parents
children
line wrap: on
line source

/*
 *   This file is part of TISEAN
 *
 *   Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
 *
 *   TISEAN is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   TISEAN is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with TISEAN; if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
/*Author: Rainer Hegger */
/*Changes:
  Sep 8, 2006: Add -o functionality
  Sep 7, 2006: Completely rewritten to handle multivariate data
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "routines/tsa.h"
#include <math.h>

#define WID_STR "Estimates the average forecast error of a local\n\t\
linear fit"


/*number of boxes for the neighbor search algorithm*/
#define NMAX 512

unsigned int nmax=(NMAX-1),comp1,hdim,**indexes;
long **box,*list;
unsigned long *found,*hfound;
double **series;
double epsilon;
double **mat,**imat,*vec,*localav,*foreav;

char epsset=0,causalset=0;
unsigned int verbosity=VER_INPUT|VER_FIRST_LINE;
unsigned int COMP=1,EMBED=2,DIM,DELAY=1,MINN=30,STEP=1;
double EPS0=1.e-3,EPSF=1.2;
unsigned long LENGTH=ULONG_MAX,exclude=0,CLENGTH=ULONG_MAX,causal;
char *infile=NULL,*COLUMN=NULL,*outfile=NULL;
char dimset=0,stout=1;

void show_options(char *progname)
{
  what_i_do(progname,WID_STR);
  fprintf(stderr," Usage: %s [options]\n",progname);
  fprintf(stderr," Options:\n");
  fprintf(stderr,"Everything not being a valid option will be interpreted"
          " as a possible"
          " datafile.\nIf no datafile is given stdin is read. Just - also"
          " means stdin\n");
  fprintf(stderr,"\t-l # of data to use [default: whole file]\n");
  fprintf(stderr,"\t-x # of lines to be ignored [default: 0]\n");
  fprintf(stderr,"\t-c columns to read [default: 1]\n");
  fprintf(stderr,"\t-m # of components, embedding dimension "
	  "[default: %u,%u]\n",COMP,EMBED);
  fprintf(stderr,"\t-d delay [default: 1]\n");
  fprintf(stderr,"\t-n iterations [default: length]\n");
  fprintf(stderr,"\t-k minimal number of neighbors for the fit "
	  "[default: 30]\n");
  fprintf(stderr,"\t-r neighborhoud size to start with "
	  "[default: (data interval)/1000]\n");
  fprintf(stderr,"\t-f factor to increase size [default: 1.2]\n");
  fprintf(stderr,"\t-s steps to forecast [default: 1]\n");
  fprintf(stderr,"\t-C width of causality window [default: steps]\n");
  fprintf(stderr,"\t-o output file [default 'datafile'.fce"
	  " no -o means write to stdout]\n");
  fprintf(stderr,"\t-V verbosity level [default: 1]\n\t\t"
          "0='only panic messages'\n\t\t"
          "1='+ input/output messages'\n\t\t"
	  "2='+ print indiviual forecast errors'\n");
  fprintf(stderr,"\t-h show these options\n");
  exit(0);
}

void scan_options(int n,char **in)
{
  char *out;

  if ((out=check_option(in,n,'l','u')) != NULL)
    sscanf(out,"%lu",&LENGTH);
  if ((out=check_option(in,n,'x','u')) != NULL)
    sscanf(out,"%lu",&exclude);
  if ((out=check_option(in,n,'c','s')) != NULL) {
    COLUMN=out;
    dimset=1;
  }
  if ((out=check_option(in,n,'m','2')) != NULL)
    sscanf(out,"%u,%u",&COMP,&EMBED);
  if ((out=check_option(in,n,'d','u')) != NULL)
    sscanf(out,"%u",&DELAY);
  if ((out=check_option(in,n,'n','u')) != NULL)
    sscanf(out,"%lu",&CLENGTH);
  if ((out=check_option(in,n,'V','u')) != NULL)
    sscanf(out,"%u",&verbosity);
  if ((out=check_option(in,n,'k','u')) != NULL)
    sscanf(out,"%u",&MINN);
  if ((out=check_option(in,n,'r','f')) != NULL) {
    epsset=1;
    sscanf(out,"%lf",&EPS0);
  }
  if ((out=check_option(in,n,'f','f')) != NULL)
    sscanf(out,"%lf",&EPSF);
  if ((out=check_option(in,n,'s','u')) != NULL)
    sscanf(out,"%u",&STEP);
  if ((out=check_option(in,n,'C','u')) != NULL) {
    sscanf(out,"%lu",&causal);
    causalset=1;
  }
  if ((out=check_option(in,n,'o','o')) != NULL) {
    stout=0;
    if (strlen(out) > 0)
      outfile=out;
  }
}

void put_in_boxes(void)
{
  int i,j,n;
  double epsinv;

  epsinv=1.0/epsilon;
  for (i=0;i<NMAX;i++)
    for (j=0;j<NMAX;j++)
      box[i][j]= -1;

  for (n=hdim;n<LENGTH-STEP;n++) {
    i=(int)(series[0][n]*epsinv)&nmax;
    j=(int)(series[comp1][n-hdim]*epsinv)&nmax;
    list[n]=box[i][j];
    box[i][j]=n;
  }
}

unsigned int hfind_neighbors(unsigned long act)
{
  char toolarge;
  int i,j,i1,i2,j1,k,element;
  unsigned long nfound=0;
  unsigned int hcomp,hdel;
  double max,dx,epsinv;

  epsinv=1.0/epsilon;

  i=(int)(series[0][act]*epsinv)&nmax;
  j=(int)(series[comp1][act-hdim]*epsinv)&nmax;
  
  for (i1=i-1;i1<=i+1;i1++) {
    i2=i1&nmax;
    for (j1=j-1;j1<=j+1;j1++) {
      element=box[i2][j1&nmax];
      while (element != -1) {
	max=0.0;
	toolarge=0;
	for (k=0;k<DIM;k += 1) {
	  hcomp=indexes[0][k];
	  hdel=indexes[1][k];
	  dx=fabs(series[hcomp][element-hdel]-series[hcomp][act-hdel]);
	  max=(dx>max) ? dx : max;
	  if (max > epsilon) {
	    toolarge=1;
	    break;
	  }
	  if (toolarge)
	    break;
	}
	if (max <= epsilon)
	  hfound[nfound++]=element;
	element=list[element];
      }
    }
  }
  return nfound;
}

void multiply_matrix(double **mat,double *vec)
{
  double *hvec;
  long i,j;

  check_alloc(hvec=(double*)malloc(sizeof(double)*DIM));
  for (i=0;i<DIM;i++) {
    hvec[i]=0.0;
    for (j=0;j<DIM;j++)
      hvec[i] += mat[i][j]*vec[j];
  }
  for (i=0;i<DIM;i++)
    vec[i]=hvec[i];
  free(hvec);
}

void make_fit(int number,unsigned long act,double *newcast)
{
  double *sj,*si,lavi,lavj,fav;
  unsigned int hci,hdi,hcj,hdj;
  long i,j,n,which;

  for (i=0;i<DIM;i++)
    localav[i]=0.0;
  for (i=0;i<COMP;i++)
    foreav[i]=0.0;

  for (n=0;n<number;n++) {
    which=found[n];
    for (j=0;j<COMP;j++)
      foreav[j] += series[j][which+STEP];
    for (j=0;j<DIM;j++) {
      hcj=indexes[0][j];
      hdj=indexes[1][j];
      localav[j] += series[hcj][which-hdj];
    }
  }

  for (i=0;i<DIM;i++)
    localav[i] /= number;
  for (i=0;i<COMP;i++)
    foreav[i] /= number;

  for (i=0;i<DIM;i++) {
    hci=indexes[0][i];
    hdi=indexes[1][i];
    lavi=localav[i];
    si=series[hci];
    for (j=i;j<DIM;j++) {
      hcj=indexes[0][j];
      hdj=indexes[1][j];
      lavj=localav[j];
      sj=series[hcj];
      mat[i][j]=0.0;
      for (n=0;n<number;n++) {
	which=found[n];
	mat[i][j] += (si[which-hdi]-lavi)*(sj[which-hdj]-lavj);
      }
      mat[i][j] /= number;
      mat[j][i] = mat[i][j];
    }
  }

  imat=invert_matrix(mat,DIM);

  for (i=0;i<COMP;i++) {
    si=series[i];
    fav=foreav[i];
    for (j=0;j<DIM;j++) {
      hcj=indexes[0][j];
      hdj=indexes[1][j];
      lavj=localav[j];
      vec[j]=0.0;
      sj=series[hcj];
      for (n=0;n<number;n++) {
	which=found[n];
	vec[j] += (si[which+STEP]-fav)*(sj[which-hdj]);
      }
      vec[j] /= number;
    }

    multiply_matrix(imat,vec);

    newcast[i]=foreav[i];
    for (j=0;j<DIM;j++) {
      hcj=indexes[0][j];
      hdj=indexes[1][j];
      newcast[i] += vec[j]*(series[hcj][act-hdj]-localav[j]);
    }
  }
  

  for (i=0;i<DIM;i++)
    free(imat[i]);
  free(imat);
}

int main(int argc,char **argv)
{
  char stin=0,alldone,*done;
  long i,j;
  unsigned long actfound;
  unsigned long clength;
  double *rms,*av,*min,*interval,maxinterval,norm;
  double *error,**individual=NULL;
  double *newcast;
  FILE *fout;

  if (scan_help(argc,argv))
    show_options(argv[0]);
  
  scan_options(argc,argv);

  if (!causalset)
    causal=STEP;

#ifndef OMIT_WHAT_I_DO
  if (verbosity&VER_INPUT)
    what_i_do(argv[0],WID_STR);
#endif

  infile=search_datafile(argc,argv,NULL,verbosity);
  if (infile == NULL)
    stin=1;
  
  if (outfile == NULL) {
    if (!stin) {
      check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
      strcpy(outfile,infile);
      strcat(outfile,".fce");
    }
    else {
      check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
      strcpy(outfile,"stdin.fce");
    }
  }
  if (!stout)
    test_outfile(outfile);
  
  if (COLUMN == NULL)
    series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,"",dimset,
                                      verbosity);
  else
    series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,COLUMN,
                                      dimset,verbosity);

  if ((LENGTH-(EMBED-1)*DELAY) < MINN) {
    fprintf(stderr,"Data set is too short to find enough neighbors "
	    "for the fit! Exiting!\n");
    exit(ONESTEP_TOO_FEW_POINTS);
  }

  DIM=EMBED*COMP;
  check_alloc(min=(double*)malloc(sizeof(double)*COMP));
  check_alloc(interval=(double*)malloc(sizeof(double)*COMP));
  check_alloc(av=(double*)malloc(sizeof(double)*COMP));
  check_alloc(rms=(double*)malloc(sizeof(double)*COMP));

  maxinterval=0.0;
  for (i=0;i<COMP;i++) {
    rescale_data(series[i],LENGTH,&min[i],&interval[i]);
    maxinterval=(maxinterval<interval[i])?interval[i]:maxinterval;
    variance(series[i],LENGTH,&av[i],&rms[i]);
  }
  
  if (verbosity&VER_USR1) {
    check_alloc(individual=(double**)malloc(sizeof(double*)*COMP));
    for (j=0;j<COMP;j++) {
      check_alloc(individual[j]=(double*)malloc(sizeof(double)*LENGTH));
      for (i=0;i<LENGTH;i++)
	individual[j][i]=0.0;
    }
  }

  check_alloc(list=(long*)malloc(sizeof(long)*LENGTH));
  check_alloc(found=(unsigned long*)malloc(sizeof(long)*LENGTH));
  check_alloc(hfound=(unsigned long*)malloc(sizeof(long)*LENGTH));
  check_alloc(done=(char*)malloc(sizeof(char)*LENGTH));
  check_alloc(box=(long**)malloc(sizeof(long*)*NMAX));
  for (i=0;i<NMAX;i++)
    check_alloc(box[i]=(long*)malloc(sizeof(long)*NMAX));
    
  for (i=0;i<LENGTH;i++)
    done[i]=0;

  alldone=0;
  if (epsset)
    EPS0 /= maxinterval;

  epsilon=EPS0/EPSF;
  clength=(CLENGTH <= LENGTH) ? CLENGTH-STEP : LENGTH-STEP;
  comp1=COMP-1;
  indexes=make_multi_index(COMP,EMBED,DELAY);

  hdim=(EMBED-1)*DELAY;
  check_alloc(newcast=(double*)malloc(sizeof(double)*COMP));


  check_alloc(localav=(double*)malloc(sizeof(double)*DIM));
  check_alloc(foreav=(double*)malloc(sizeof(double)*COMP));
  check_alloc(vec=(double*)malloc(sizeof(double)*DIM));
  check_alloc(mat=(double**)malloc(sizeof(double*)*DIM));
  for (i=0;i<=DIM;i++)
    check_alloc(mat[i]=(double*)malloc(sizeof(double)*DIM));

  check_alloc(error=(double*)malloc(sizeof(double)*COMP));
  for (i=0;i<COMP;i++)
    error[i]=0.0;

  while (!alldone) {
    alldone=1;
    epsilon*=EPSF;
    put_in_boxes() ;
    for (i=(EMBED-1)*DELAY;i<clength;i++)
      if (!done[i]) {
	actfound=hfind_neighbors(i);
	actfound=exclude_interval(actfound,i-causal+1,
				  i+causal+(EMBED-1)*DELAY-1,hfound,found);
	if (actfound > MINN) {
	  make_fit(actfound,i,newcast);
	  for (j=0;j<COMP;j++)
	    error[j] += sqr(newcast[j]-series[j][i+STEP]);
	  if (verbosity&VER_USR1) {
	    for (j=0;j<COMP;j++)
	      individual[j][i]=(newcast[j]-series[j][i+STEP])*interval[j];
	  }
	  done[i]=1;
	}
	alldone &= done[i];
      }
  }
  norm=((double)clength-(double)((EMBED-1)*DELAY));
  if (stout) {
    if (verbosity&VER_USR1) {
      fprintf(stdout,"#Relative forecast errors for each component:\n");
      for (i=0;i<COMP;i++) 
	fprintf(stdout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
    
      for (i=(EMBED-1)*DELAY;i<clength;i++) {
	for (j=0;j<COMP-1;j++)
	  fprintf(stdout,"%e ",individual[j][i]);
	fprintf(stdout,"%e\n",individual[COMP-1][i]);
      }
    }
    else {
      fprintf(stdout,"#Relative forecast errors for each component:\n");
      for (i=0;i<COMP;i++) 
	fprintf(stdout,"%e\n",sqrt(error[i]/norm)/rms[i]);
    }
  }
  else {
    fout=fopen(outfile,"w");
    if (verbosity&VER_INPUT)
      fprintf(stderr,"Opened %s for writing\n",outfile);
    if (verbosity&VER_USR1) {
      fprintf(fout,"#Relative forecast errors for each component:\n");
      for (i=0;i<COMP;i++) 
	fprintf(fout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
    
      for (i=(EMBED-1)*DELAY;i<clength;i++) {
	for (j=0;j<COMP-1;j++)
	  fprintf(fout,"%e ",individual[j][i]);
	fprintf(fout,"%e\n",individual[COMP-1][i]);
      }
    }
    else {
      fprintf(fout,"#Relative forecast errors for each component:\n");
      for (i=0;i<COMP;i++) 
	fprintf(fout,"%e\n",sqrt(error[i]/norm)/rms[i]);
    }
    fclose(fout);
    free(outfile);
  }

  return 0;
}