Mercurial > forge
view main/audio/endpoint.h @ 0:6b33357c7561 octave-forge
Initial revision
author | pkienzle |
---|---|
date | Wed, 10 Oct 2001 19:54:49 +0000 |
parents | |
children |
line wrap: on
line source
/* * ENDPOINT.H - endpoint class definition * * Bruce T. Lowerre, Public domain, 1995, 1997 * * $Log$ * Revision 1.1 2001/10/10 19:54:49 pkienzle * Initial revision * * Revision 1.1 2001/04/22 08:29:30 pkienzle * adding in all of matcompat * * Revision 1.2 1997/05/23 19:59:01 lowerre * renamed class endpoint to endpointer, conflicts with <rpcsvc/nis.h> * * Revision 1.1 1997/05/14 20:34:34 lowerre * Initial revision * * */ /* The endpointer is used to determine the start and end of a live * input signal. Unlike a pre-recorded utterance, a live input signal * is open-ended in that the actual start and end of the signal is * totally unknown. The search, using HMM techniques with a silence * model, will usually do a fairly good job of guessing the start of * the signal. However, the actual end of the signal is unknown to * the recognizer. Reaching the end state in the recognizer does not * necessarily mean the end of signal. Therefore, the end of signal * must be calculated by some means. This is the job of the end point * detector. */ #ifndef ENDPOINT_H #define ENDPOINT_H //#include <general.h> // contains general defs typedef enum { NOSILENCE, INSILENCE, START, INSIGNAL, END } EPSTATE; typedef enum { EP_NONE, EP_RESET, EP_SILENCE, EP_SIGNAL, EP_INUTT, EP_MAYBEEND, EP_ENDOFUTT, EP_NOTEND, EP_NOSTARTSILENCE } EPTAG; class endpointer { private: EPSTATE epstate; float ave, noise, begthresh, energy, maxpeak, endthresh, begfact, endfact, energyfact, mnbe, peakreturn, // average energy dpnoise, triggerfact, // schmidt trigger percent minstartsilence, *lastdpnoise; // array of size numdpnoise long samprate, // sampling rate in Hz windowsize, // window size in samples stepsize, // step size in samples scnt, avescnt, vcnt, evcnt, voicecount, minfriclng, bscnt, zccnt, startframe, endframe, ncount, zcthresh, numdpnoise, minrise, maxpause, maxipause, startblip, endblip, minuttlng, minvoicelng, zc; // zero cross count per window bool startsilenceok, low; // is signal currently low or high? void zcpeakpick // get zc count and average energy ( short* // raw samples ); void setnoise (); // initial noise level set void averagenoise (); // average noise array and shift public: endpointer // constructor ( long, // sampling rate in Hz long, // window size in samples long, // step size in samples long = 700, // endof utt silence default, msec long = 100, // minuttlng default, msec long = 600, // zcthresh default, Hz float = 40.0, // begfact default float = 80.0, // endfact default float = 200.0, // energyfact default float = 2000.0, // minstartsilence default float = 3.0, // triggerfact default long = 6, // numdpnoise default long = 50, // minfriclng default, msec long = 150, // maxpause default, msec long = 30, // startblip default, msec long = 20, // endblip default, msec long = 60, // minvoicelng default, msec long = 50 // minrise default, msec ); ~endpointer (); // destructor void initendpoint (); // initialize variables EPTAG getendpoint ( short* // raw samples of window size ); const char *gettagname // convert tag to ascii ( EPTAG ); void printvars (); // print variables long getzc () {return (zc);} // get the zero cross count float getenergy () {return (peakreturn);} // get the RMS energy }; // end class endpointer #endif