Mercurial > forge
view main/system-identification/devel/tisean/source_c/arima-model.c @ 9894:82ff20b4d849 octave-forge
system-identitifaction: Adding devel TISEAN files
author | jpicarbajal |
---|---|
date | Wed, 28 Mar 2012 13:32:37 +0000 |
parents | |
children |
line wrap: on
line source
/* * This file is part of TISEAN * * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber * * TISEAN is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * TISEAN is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with TISEAN; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /*Author: Rainer Hegger, Last modified: Feb 6, 2006 */ /*Changes: Feb 4, 2006: First version Feb 6, 2006: Find and remove bugs (1) Feb 11, 2006: Add rand_arb_dist to iterate_***_model */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <limits.h> #include <math.h> #include "routines/tsa.h" #define WID_STR "Fits an multivariate ARIMA model to the data and gives\ the coefficients\n\tand the residues (or an iterated model)" unsigned long length=ULONG_MAX,exclude=0; unsigned int dim=1,poles=10,ilength,ITER=50; unsigned int arpoles=0,ipoles=0,mapoles=0,offset; unsigned int verbosity=1; char *outfile=NULL,*column=NULL,stdo=1,dimset=0,run_model=0,arimaset=0; char *infile=NULL; double **series,convergence=1.0e-3; double *my_average; unsigned long ardim,armadim; unsigned int **aindex; void show_options(char *progname) { what_i_do(progname,WID_STR); fprintf(stderr," Usage: %s [options]\n",progname); fprintf(stderr," Options:\n"); fprintf(stderr,"Everything not being a valid option will be interpreted" " as a possible" " datafile.\nIf no datafile is given stdin is read. Just - also" " means stdin\n"); fprintf(stderr,"\t-l length of file [default is whole file]\n"); fprintf(stderr,"\t-x # of lines to be ignored [default is 0]\n"); fprintf(stderr,"\t-m dimension [default is 1]\n"); fprintf(stderr,"\t-c columns to read [default is 1,...,dimension]\n"); fprintf(stderr,"\t-p order of initial AR-Fit [default is %u]\n",poles); fprintf(stderr,"\t-P order of AR,I,MA-Fit [default is %u,%u,%u]\n", arpoles,ipoles,mapoles); fprintf(stderr,"\t-I # of arima iterations [default is %u]\n",ITER); fprintf(stderr,"\t-e accuracy of convergence [default is %lf]\n",convergence); fprintf(stderr,"\t-s length of iterated model [default no iteration]\n"); fprintf(stderr,"\t-o output file name [default is 'datafile'.ari]\n"); fprintf(stderr,"\t-V verbosity level [default is 1]\n\t\t" "0='only panic messages'\n\t\t" "1='+ input/output messages'\n\t\t" "2='+ print residuals though iterating a model'\n\t\t" "4='+ print original data plus residuals'\n"); fprintf(stderr,"\t-h show these options\n\n"); exit(0); } void scan_options(int argc,char **argv) { char *out; if ((out=check_option(argv,argc,'p','u')) != NULL) { sscanf(out,"%u",&poles); if (poles < 1) { fprintf(stderr,"The order should at least be one!\n"); exit(127); } } if ((out=check_option(argv,argc,'l','u')) != NULL) sscanf(out,"%lu",&length); if ((out=check_option(argv,argc,'x','u')) != NULL) sscanf(out,"%lu",&exclude); if ((out=check_option(argv,argc,'m','u')) != NULL) { sscanf(out,"%u",&dim); dimset=1; } if ((out=check_option(argv,argc,'P','3')) != NULL) { sscanf(out,"%u,%u,%u",&arpoles,&ipoles,&mapoles); if ((arpoles+ipoles+mapoles)>0) arimaset=1; } if ((out=check_option(argv,argc,'I','u')) != NULL) sscanf(out,"%u",&ITER); if ((out=check_option(argv,argc,'e','f')) != NULL) sscanf(out,"%lf",&convergence); if ((out=check_option(argv,argc,'c','u')) != NULL) column=out; if ((out=check_option(argv,argc,'V','u')) != NULL) sscanf(out,"%u",&verbosity); if ((out=check_option(argv,argc,'s','u')) != NULL) { sscanf(out,"%u",&ilength); run_model=1; } if ((out=check_option(argv,argc,'o','o')) != NULL) { stdo=0; if (strlen(out) > 0) outfile=out; } } void make_difference(void) { unsigned long i,d; for (i=length-1;i>0;i--) for (d=0;d<dim;d++) series[d][i]=series[d][i]-series[d][i-1]; } unsigned int** make_ar_index(void) { unsigned int** ar_index; unsigned long i; check_alloc(ar_index=(unsigned int**)malloc(sizeof(unsigned int*)*2)); for (i=0;i<2;i++) check_alloc(ar_index[i]=(unsigned int*) malloc(sizeof(unsigned int)*ardim)); for (i=0;i<ardim;i++) { ar_index[0][i]=i/poles; ar_index[1][i]=i%poles; } return ar_index; } unsigned int** make_arima_index(unsigned int ars,unsigned int mas) { unsigned int** arima_index; unsigned int armad; unsigned long i,i0; armad=(ars+mas)*dim; check_alloc(arima_index=(unsigned int**)malloc(sizeof(unsigned int*)*2)); for (i=0;i<2;i++) check_alloc(arima_index[i]=(unsigned int*) malloc(sizeof(unsigned int)*armad)); for (i=0;i<ars*dim;i++) { arima_index[0][i]=i/ars; arima_index[1][i]=i%ars; } i0=ars*dim; for (i=0;i<mas*dim;i++) { arima_index[0][i+i0]=dim+i/mas; arima_index[1][i+i0]=i%mas; } return arima_index; } void set_averages_to_zero(void) { double var; long i,j; for (i=0;i<dim;i++) { variance(series[i],length,&my_average[i],&var); for (j=0;j<length;j++) series[i][j] -= my_average[i]; } } double** build_matrix(double **mat,unsigned int size) { long n,i,j,is,id,js,jd; double norm; norm=1./((double)length-1.0-(double)poles-(double)offset); for (i=0;i<size;i++) { id=aindex[0][i]; is=aindex[1][i]; for (j=i;j<size;j++) { jd=aindex[0][j]; js=aindex[1][j]; mat[i][j]=0.0; for (n=offset+poles-1;n<length-1;n++) mat[i][j] += series[id][n-is]*series[jd][n-js]; mat[i][j] *= norm; mat[j][i]=mat[i][j]; } } return invert_matrix(mat,size); } void build_vector(double *vec,unsigned int size,long comp) { long i,is,id,n; double norm; norm=1./((double)length-1.0-(double)poles-(double)offset); for (i=0;i<size;i++) { id=aindex[0][i]; is=aindex[1][i]; vec[i]=0.0; for (n=offset+poles-1;n<length-1;n++) vec[i] += series[comp][n+1]*series[id][n-is]; vec[i] *= norm; } } double* multiply_matrix_vector(double **mat,double *vec,unsigned int size) { long i,j; double *new_vec; check_alloc(new_vec=(double*)malloc(sizeof(double)*size)); for (i=0;i<size;i++) { new_vec[i]=0.0; for (j=0;j<size;j++) new_vec[i] += mat[i][j]*vec[j]; } return new_vec; } double* make_residuals(double **diff,double **coeff,unsigned int size) { long n,n1,d,i,is,id; double *resi; check_alloc(resi=(double*)malloc(sizeof(double)*dim)); for (i=0;i<dim;i++) resi[i]=0.0; for (n=poles-1;n<length-1;n++) { n1=n+1; for (d=0;d<dim;d++) { diff[d][n1]=series[d][n1]; for (i=0;i<size;i++) { id=aindex[0][i]; is=aindex[1][i]; diff[d][n1] -= coeff[d][i]*series[id][n-is]; } resi[d] += sqr(diff[d][n1]); } } for (i=0;i<dim;i++) resi[i]=sqrt(resi[i]/((double)length-(double)poles)); return resi; } void iterate_model(double **coeff,double *sigma,double **diff,FILE *file) { long i,j,i1,i2,n,d; double **iterate,*swap,**myrand; check_alloc(iterate=(double**)malloc(sizeof(double*)*(poles+1))); for (i=0;i<=poles;i++) check_alloc(iterate[i]=(double*)malloc(sizeof(double)*dim)); check_alloc(myrand=(double**)malloc(sizeof(double*)*dim)); for (i=0;i<dim;i++) myrand[i]=rand_arb_dist(diff[i],length,ilength+poles,100,0x44325); rnd_init(0x44325); for (i=0;i<1000;i++) rnd_long(); for (i=0;i<dim;i++) for (j=0;j<poles;j++) iterate[j][i]=myrand[i][j]; for (n=0;n<ilength;n++) { for (d=0;d<dim;d++) { iterate[poles][d]=myrand[d][n+poles]; for (i1=0;i1<dim;i1++) for (i2=0;i2<poles;i2++) iterate[poles][d] += coeff[d][i1*poles+i2]*iterate[poles-1-i2][i1]; } if (file != NULL) { for (d=0;d<dim;d++) fprintf(file,"%e ",iterate[poles][d]); fprintf(file,"\n"); } else { for (d=0;d<dim;d++) printf("%e ",iterate[poles][d]); printf("\n"); } swap=iterate[0]; for (i=0;i<poles;i++) iterate[i]=iterate[i+1]; iterate[poles]=swap; } for (i=0;i<=poles;i++) free(iterate[i]); free(iterate); for (i=0;i<dim;i++) free(myrand[i]); free(myrand); } void iterate_arima_model(double **coeff,double *sigma,double **diff,FILE *file) { double **iterate,*swap,**myrand; unsigned long i,j,n,is,id; check_alloc(iterate=(double**)malloc(sizeof(double*)*(poles+1))); for (i=0;i<=poles;i++) check_alloc(iterate[i]=(double*)malloc(sizeof(double)*2*dim)); check_alloc(myrand=(double**)malloc(sizeof(double*)*dim)); for (i=0;i<dim;i++) myrand[i]=rand_arb_dist(diff[i],length,ilength+poles,100,0x44325); rnd_init(0x44325); for (i=0;i<1000;i++) rnd_long(); for (i=0;i<dim;i++) for (j=0;j<poles;j++) iterate[j][i]=iterate[j][dim+i]=myrand[i][j]; for (n=0;n<ilength;n++) { for (i=0;i<dim;i++) iterate[poles][i]=iterate[poles][i+dim]=myrand[i][n+poles]; for (j=0;j<dim;j++) { for (i=0;i<armadim;i++) { id=aindex[0][i]; is=aindex[1][i]; iterate[poles][j] += coeff[j][i]*iterate[poles-1-is][id]; } } if (file != NULL) { for (i=0;i<dim;i++) fprintf(file,"%e ",iterate[poles][i]); fprintf(file,"\n"); } else { for (i=0;i<dim;i++) printf("%e ",iterate[poles][i]); printf("\n"); } swap=iterate[0]; for (i=0;i<poles;i++) iterate[i]=iterate[i+1]; iterate[poles]=swap; } for (i=0;i<=poles;i++) free(iterate[i]); free(iterate); for (i=0;i<dim;i++) free(myrand[i]); free(myrand); } int main(int argc,char **argv) { char stdi=0; double *pm; long i,j,iter,hj,realiter=0; unsigned int size,is,id; FILE *file; double **mat,**inverse,*vec,**coeff,**diff,**hseries; double **oldcoeff,*diffcoeff=NULL; double hdiff,**xdiff=NULL,avpm; double loglikelihood,aic,alldiff; if (scan_help(argc,argv)) show_options(argv[0]); scan_options(argc,argv); #ifndef OMIT_WHAT_I_DO if (verbosity&VER_INPUT) what_i_do(argv[0],WID_STR); #endif infile=search_datafile(argc,argv,NULL,verbosity); if (infile == NULL) stdi=1; if (outfile == NULL) { if (!stdi) { check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1)); strcpy(outfile,infile); strcat(outfile,".ari"); } else { check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1)); strcpy(outfile,"stdin.ari"); } } if (!stdo) test_outfile(outfile); if (column == NULL) series=(double**)get_multi_series(infile,&length,exclude,&dim,"",dimset, verbosity); else series=(double**)get_multi_series(infile,&length,exclude,&dim,column, dimset,verbosity); check_alloc(my_average=(double*)malloc(sizeof(double)*dim)); for (i=0;i<ipoles;i++) make_difference(); for (i=0;i<dim;i++) series[i] += ipoles; length -= ipoles; set_averages_to_zero(); if (poles >= length) { fprintf(stderr,"It makes no sense to have more poles than data! Exiting\n"); exit(AR_MODEL_TOO_MANY_POLES); } if (arimaset) { if ((arpoles >= length) || (mapoles >= length)) { fprintf(stderr,"It makes no sense to have more poles than data! Exiting\n"); exit(AR_MODEL_TOO_MANY_POLES); } } ardim=poles*dim; aindex=make_ar_index(); check_alloc(vec=(double*)malloc(sizeof(double)*ardim)); check_alloc(mat=(double**)malloc(sizeof(double*)*ardim)); for (i=0;i<ardim;i++) check_alloc(mat[i]=(double*)malloc(sizeof(double)*ardim)); check_alloc(coeff=(double**)malloc(sizeof(double*)*dim)); inverse=build_matrix(mat,ardim); for (i=0;i<dim;i++) { build_vector(vec,ardim,i); coeff[i]=multiply_matrix_vector(inverse,vec,ardim); } check_alloc(diff=(double**)malloc(sizeof(double*)*dim)); for (i=0;i<dim;i++) check_alloc(diff[i]=(double*)malloc(sizeof(double)*length)); pm=make_residuals(diff,coeff,ardim); free(vec); for (i=0;i<ardim;i++) { free(mat[i]); free(inverse[i]); } free(mat); free(inverse); size=ardim; if (arimaset) { offset=poles; for (i=0;i<2;i++) free(aindex[i]); free(aindex); for (i=0;i<dim;i++) free(coeff[i]); free(coeff); check_alloc(xdiff=(double**)malloc(sizeof(double*)*ITER)); for (i=0;i<ITER;i++) check_alloc(xdiff[i]=(double*)malloc(sizeof(double)*dim)); armadim=(arpoles+mapoles)*dim; aindex=make_arima_index(arpoles,mapoles); size=armadim; check_alloc(hseries=(double**)malloc(sizeof(double*)*2*dim)); for (i=0;i<dim;i++) { check_alloc(hseries[i]=(double*)malloc(sizeof(double)*length)); check_alloc(hseries[i+dim]=(double*)malloc(sizeof(double)*length)); for (j=0;j<length;j++) { hseries[i][j]=series[i][j]; hseries[i+dim][j]=diff[i][j]; } } for (i=0;i<dim;i++) free(series[i]-ipoles); free(series); series=hseries; check_alloc(oldcoeff=(double**)malloc(sizeof(double*)*dim)); for (i=0;i<dim;i++) { check_alloc(oldcoeff[i]=(double*)malloc(sizeof(double)*armadim)); for (j=0;j<armadim;j++) oldcoeff[i][j]=0.0; } check_alloc(diffcoeff=(double*)malloc(sizeof(double)*ITER)); for (iter=1;iter<=ITER;iter++) { check_alloc(vec=(double*)malloc(sizeof(double)*armadim)); check_alloc(mat=(double**)malloc(sizeof(double*)*armadim)); for (i=0;i<armadim;i++) check_alloc(mat[i]=(double*)malloc(sizeof(double)*armadim)); check_alloc(coeff=(double**)malloc(sizeof(double*)*dim)); poles=(arpoles > mapoles)? arpoles:mapoles; offset += poles; inverse=build_matrix(mat,armadim); for (i=0;i<dim;i++) { build_vector(vec,armadim,i); coeff[i]=multiply_matrix_vector(inverse,vec,armadim); } pm=make_residuals(diff,coeff,armadim); for (j=0;j<dim;j++) { hdiff=0.0; hj=j+dim; for (i=offset;i<length;i++) hdiff += sqr(series[hj][i]-diff[j][i]); for (i=0;i<length;i++) { series[hj][i]=diff[j][i]; } xdiff[iter-1][j]=sqrt(hdiff/(double)(length-offset)); } free(vec); for (i=0;i<armadim;i++) { free(mat[i]); free(inverse[i]); } free(mat); free(inverse); diffcoeff[iter-1]=0.0; for (i=0;i<dim;i++) for (j=0;j<dim;j++) { diffcoeff[iter-1] += sqr(coeff[i][j]-oldcoeff[i][j]); oldcoeff[i][j]=coeff[i][j]; } diffcoeff[iter-1]=sqrt(diffcoeff[iter-1]/(double)armadim); alldiff=xdiff[iter-1][0]; for (i=1;i<dim;i++) if (xdiff[iter-1][i] > alldiff) alldiff=xdiff[iter-1][i]; realiter=iter; if (alldiff < convergence) iter=ITER; if (iter < ITER) { for (i=0;i<dim;i++) free(coeff[i]); free(coeff); } } } if (stdo) { if (arimaset) { printf("#convergence of residuals in arima fit\n"); for (i=0;i<realiter;i++) { printf("#iteration %ld ",i+1); for (j=0;j<dim;j++) printf("%e ",xdiff[i][j]); printf("%e",diffcoeff[i]); printf("\n"); } } avpm=pm[0]*pm[0]; loglikelihood= -log(pm[0]); for (i=1;i<dim;i++) { avpm += pm[i]*pm[i]; loglikelihood -= log(pm[i]); } loglikelihood *= ((double)length); loglikelihood += -((double)length)* ((1.0+log(2.*M_PI))*dim)/2.0; avpm=sqrt(avpm/dim); printf("#average forcast error= %e\n",avpm); printf("#individual forecast errors: "); for (i=0;i<dim;i++) printf("%e ",pm[i]); printf("\n"); if (arimaset) aic=2.0*(arpoles+mapoles)-2.0*loglikelihood; else aic=2.0*poles-2.0*loglikelihood; printf("#Log-Likelihood= %e\t AIC= %e\n",loglikelihood,aic); for (i=0;i<size;i++) { id=aindex[0][i]; is=aindex[1][i]; if (id < dim) printf("#x_%u(n-%u) ",id+1,is); else printf("#e_%u(n-%u) ",id+1-dim,is); for (j=0;j<dim;j++) printf("%e ",coeff[j][i]); printf("\n"); } if (!run_model || (verbosity&VER_USR1)) { for (i=poles;i<length;i++) { if (run_model) printf("#"); for (j=0;j<dim;j++) if (verbosity&VER_USR2) printf("%e %e ",series[j][i]+my_average[j],diff[j][i]); else printf("%e ",diff[j][i]); printf("\n"); } } if (run_model && (ilength > 0)) { if (!arimaset) iterate_model(coeff,pm,diff,NULL); else iterate_arima_model(coeff,pm,diff,NULL); } } else { file=fopen(outfile,"w"); if (verbosity&VER_INPUT) fprintf(stderr,"Opened %s for output\n",outfile); if (arimaset) { fprintf(file,"#convergence of residuals in arima fit\n"); for (i=0;i<realiter;i++) { fprintf(file,"#iteration %ld ",i+1); for (j=0;j<dim;j++) fprintf(file,"%e ",xdiff[i][j]); fprintf(file,"%e",diffcoeff[i]); fprintf(file,"\n"); } } avpm=pm[0]*pm[0]; loglikelihood= -log(pm[0]); for (i=1;i<dim;i++) { avpm += pm[i]*pm[i]; loglikelihood -= log(pm[i]); } loglikelihood *= ((double)length); loglikelihood += -((double)length)* ((1.0+log(2.*M_PI))*dim)/2.0; avpm=sqrt(avpm/dim); fprintf(file,"#average forcast error= %e\n",avpm); fprintf(file,"#individual forecast errors: "); for (i=0;i<dim;i++) fprintf(file,"%e ",pm[i]); fprintf(file,"\n"); if (arimaset) aic=2.0*(arpoles+mapoles)-2.0*loglikelihood; else aic=2.0*poles-2.0*loglikelihood; fprintf(file,"#Log-Likelihood= %e\t AIC= %e\n",loglikelihood,aic); for (i=0;i<size;i++) { id=aindex[0][i]; is=aindex[1][i]; if (id < dim) fprintf(file,"#x_%u(n-%u) ",id+1,is); else fprintf(file,"#e_%u(n-%u) ",id+1-dim,is); for (j=0;j<dim;j++) fprintf(file,"%e ",coeff[j][i]); fprintf(file,"\n"); } if (!run_model || (verbosity&VER_USR1)) { for (i=poles;i<length;i++) { if (run_model) fprintf(file,"#"); for (j=0;j<dim;j++) if (verbosity&VER_USR2) fprintf(file,"%e %e ",series[j][i]+my_average[j],diff[j][i]); else fprintf(file,"%e ",diff[j][i]); fprintf(file,"\n"); } } if (run_model && (ilength > 0)) { if (!arimaset) iterate_model(coeff,pm,diff,file); else iterate_arima_model(coeff,pm,diff,file); } fclose(file); } if (outfile != NULL) free(outfile); if (infile != NULL) free(infile); for (i=0;i<dim;i++) { free(coeff[i]); free(diff[i]); free(series[i]); if (arimaset) free(series[i+dim]); } free(coeff); free(diff); free(series); free(pm); return 0; }