view main/audio/endpoint.h @ 0:6b33357c7561 octave-forge

Initial revision
author pkienzle
date Wed, 10 Oct 2001 19:54:49 +0000
parents
children
line wrap: on
line source

/*
 * ENDPOINT.H - endpoint class definition
 *
 * Bruce T. Lowerre, Public domain, 1995, 1997
 *
 * $Log$
 * Revision 1.1  2001/10/10 19:54:49  pkienzle
 * Initial revision
 *
 * Revision 1.1  2001/04/22 08:29:30  pkienzle
 * adding in all of matcompat
 *
 * Revision 1.2  1997/05/23 19:59:01  lowerre
 * renamed class endpoint to endpointer, conflicts with <rpcsvc/nis.h>
 *
 * Revision 1.1  1997/05/14 20:34:34  lowerre
 * Initial revision
 *
 *
 */

/* The endpointer is used to determine the start and end of a live
 * input signal.  Unlike a pre-recorded utterance, a live input signal
 * is open-ended in that the actual start and end of the signal is
 * totally unknown.  The search, using HMM techniques with a silence
 * model, will usually do a fairly good job of guessing the start of
 * the signal.  However, the actual end of the signal is unknown to
 * the recognizer.  Reaching the end state in the recognizer does not
 * necessarily mean the end of signal.  Therefore, the end of signal
 * must be calculated by some means.  This is the job of the end point
 * detector.  */

#ifndef ENDPOINT_H
#define ENDPOINT_H

//#include <general.h>				// contains general defs

typedef enum
{
    NOSILENCE,
    INSILENCE,
    START,
    INSIGNAL,
    END
} EPSTATE;

typedef enum
{
    EP_NONE,
    EP_RESET,
    EP_SILENCE,
    EP_SIGNAL,
    EP_INUTT,
    EP_MAYBEEND,
    EP_ENDOFUTT,
    EP_NOTEND,
    EP_NOSTARTSILENCE
} EPTAG;

class endpointer
{
    private:
        EPSTATE		epstate;
        float		ave,
			noise,
			begthresh,
			energy,
			maxpeak,
			endthresh,
			begfact,
			endfact,
			energyfact,
			mnbe,
			peakreturn,	// average energy
			dpnoise,
			triggerfact,	// schmidt trigger percent
			minstartsilence,
			*lastdpnoise;	// array of size numdpnoise
        long		samprate,	// sampling rate in Hz
			windowsize,	// window size in samples
			stepsize,	// step size in samples
        		scnt,
			avescnt,
			vcnt,
			evcnt,
			voicecount,
			minfriclng,
			bscnt,
			zccnt,
			startframe,
			endframe,
			ncount,
			zcthresh,
			numdpnoise,
			minrise,
			maxpause,
			maxipause,
			startblip,
			endblip,
			minuttlng,
			minvoicelng,
			zc;		// zero cross count per window
        bool		startsilenceok,
			low;		// is signal currently low or high?
        void zcpeakpick			// get zc count and average energy
        (
            short*			// raw samples
        );
        void setnoise ();		// initial noise level set
        void averagenoise ();		// average noise array and shift
    public:
        endpointer			// constructor
        (
            long,			// sampling rate in Hz
            long,			// window  size in samples
            long,			// step size in samples
            long = 700,			// endof utt silence default, msec
            long = 100,			// minuttlng default, msec
            long = 600,			// zcthresh default, Hz
            float = 40.0,		// begfact default
            float = 80.0,		// endfact default
            float = 200.0,		// energyfact default
            float = 2000.0,		// minstartsilence default
            float = 3.0,		// triggerfact default
            long = 6,			// numdpnoise default
            long = 50,			// minfriclng default, msec
            long = 150,			// maxpause default, msec
            long = 30,			// startblip default, msec
            long = 20,			// endblip default, msec
            long = 60,			// minvoicelng default, msec
            long = 50			// minrise default, msec
        );
        ~endpointer ();			// destructor

        void initendpoint ();		// initialize variables
        EPTAG getendpoint
        (
            short*			// raw samples of window size
        );
        const char *gettagname		// convert tag to ascii
        (
            EPTAG
        );
        void printvars ();		// print variables
        long getzc () {return (zc);}	// get the zero cross count
        float getenergy () {return (peakreturn);}	// get the RMS energy
}; // end class endpointer


#endif