Mercurial > forge
diff main/audio/endpoint.h @ 0:6b33357c7561 octave-forge
Initial revision
author | pkienzle |
---|---|
date | Wed, 10 Oct 2001 19:54:49 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main/audio/endpoint.h Wed Oct 10 19:54:49 2001 +0000 @@ -0,0 +1,150 @@ +/* + * ENDPOINT.H - endpoint class definition + * + * Bruce T. Lowerre, Public domain, 1995, 1997 + * + * $Log$ + * Revision 1.1 2001/10/10 19:54:49 pkienzle + * Initial revision + * + * Revision 1.1 2001/04/22 08:29:30 pkienzle + * adding in all of matcompat + * + * Revision 1.2 1997/05/23 19:59:01 lowerre + * renamed class endpoint to endpointer, conflicts with <rpcsvc/nis.h> + * + * Revision 1.1 1997/05/14 20:34:34 lowerre + * Initial revision + * + * + */ + +/* The endpointer is used to determine the start and end of a live + * input signal. Unlike a pre-recorded utterance, a live input signal + * is open-ended in that the actual start and end of the signal is + * totally unknown. The search, using HMM techniques with a silence + * model, will usually do a fairly good job of guessing the start of + * the signal. However, the actual end of the signal is unknown to + * the recognizer. Reaching the end state in the recognizer does not + * necessarily mean the end of signal. Therefore, the end of signal + * must be calculated by some means. This is the job of the end point + * detector. */ + +#ifndef ENDPOINT_H +#define ENDPOINT_H + +//#include <general.h> // contains general defs + +typedef enum +{ + NOSILENCE, + INSILENCE, + START, + INSIGNAL, + END +} EPSTATE; + +typedef enum +{ + EP_NONE, + EP_RESET, + EP_SILENCE, + EP_SIGNAL, + EP_INUTT, + EP_MAYBEEND, + EP_ENDOFUTT, + EP_NOTEND, + EP_NOSTARTSILENCE +} EPTAG; + +class endpointer +{ + private: + EPSTATE epstate; + float ave, + noise, + begthresh, + energy, + maxpeak, + endthresh, + begfact, + endfact, + energyfact, + mnbe, + peakreturn, // average energy + dpnoise, + triggerfact, // schmidt trigger percent + minstartsilence, + *lastdpnoise; // array of size numdpnoise + long samprate, // sampling rate in Hz + windowsize, // window size in samples + stepsize, // step size in samples + scnt, + avescnt, + vcnt, + evcnt, + voicecount, + minfriclng, + bscnt, + zccnt, + startframe, + endframe, + ncount, + zcthresh, + numdpnoise, + minrise, + maxpause, + maxipause, + startblip, + endblip, + minuttlng, + minvoicelng, + zc; // zero cross count per window + bool startsilenceok, + low; // is signal currently low or high? + void zcpeakpick // get zc count and average energy + ( + short* // raw samples + ); + void setnoise (); // initial noise level set + void averagenoise (); // average noise array and shift + public: + endpointer // constructor + ( + long, // sampling rate in Hz + long, // window size in samples + long, // step size in samples + long = 700, // endof utt silence default, msec + long = 100, // minuttlng default, msec + long = 600, // zcthresh default, Hz + float = 40.0, // begfact default + float = 80.0, // endfact default + float = 200.0, // energyfact default + float = 2000.0, // minstartsilence default + float = 3.0, // triggerfact default + long = 6, // numdpnoise default + long = 50, // minfriclng default, msec + long = 150, // maxpause default, msec + long = 30, // startblip default, msec + long = 20, // endblip default, msec + long = 60, // minvoicelng default, msec + long = 50 // minrise default, msec + ); + ~endpointer (); // destructor + + void initendpoint (); // initialize variables + EPTAG getendpoint + ( + short* // raw samples of window size + ); + const char *gettagname // convert tag to ascii + ( + EPTAG + ); + void printvars (); // print variables + long getzc () {return (zc);} // get the zero cross count + float getenergy () {return (peakreturn);} // get the RMS energy +}; // end class endpointer + + +#endif