1 files changed, 808 insertions, 808 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/wb_vad.c b/media/libstagefright/codecs/amrwbenc/src/wb_vad.c
index 0126853..7e1d673 100644
--- a/media/libstagefright/codecs/amrwbenc/src/wb_vad.c
+++ b/media/libstagefright/codecs/amrwbenc/src/wb_vad.c
@@ -1,808 +1,808 @@
-/*
- ** Copyright 2003-2010, VisualOn, Inc.
- **
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- **
- **     http://www.apache.org/licenses/LICENSE-2.0
- **
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- */
-
-/***********************************************************************
-*      File: wb_vad.c                                                  *
-*                                                                      *
-*      Description: Voice Activity Detection                           *
-*                                                                      *
-************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "cnst.h"
-#include "wb_vad.h"
-#include "typedef.h"
-#include "basic_op.h"
-#include "math_op.h"
-#include "wb_vad_c.h"
-#include "mem_align.h"
-
-/******************************************************************************
-*  Calculate Log2 and scale the signal:
-*
-*    ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
-*
-*  input   output
-*  32768   16384
-*  1       31744
-*
-* When input is in the range of [1,2^16], max error is 0.0380%.
-*********************************************************************************/
-
-static Word16 ilog2(                       /* return: output value of the log2 */
-		Word16 mant                        /* i: value to be converted */
-		)
-{
-	Word16 ex, ex2, res;
-	Word32 i, l_temp;
-
-	if (mant <= 0)
-	{
-		mant = 1;                         
-	}
-	ex = norm_s(mant);
-	mant = mant << ex;
-
-	for (i = 0; i < 3; i++)
-		mant = vo_mult(mant, mant);
-	l_temp = vo_L_mult(mant, mant);
-
-	ex2 = norm_l(l_temp);
-	mant = extract_h(l_temp << ex2);
-
-	res = (ex + 16) << 10;
-	res = add1(res, (ex2 << 6));
-	res = vo_sub(add1(res, 127), (mant >> 8));
-	return (res);
-}
-
-/******************************************************************************
-*
-*     Function     : filter5
-*     Purpose      : Fifth-order half-band lowpass/highpass filter pair with
-*                    decimation.
-*
-*******************************************************************************/
-
-static void filter5(
-		Word16 * in0,                         /* i/o : input values; output low-pass part  */
-		Word16 * in1,                         /* i/o : input values; output high-pass part */
-		Word16 data[]                         /* i/o : filter memory                       */
-		)
-{
-	Word16 temp0, temp1, temp2;
-
-	temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
-	temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
-	data[0] = temp0;                      
-
-	temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
-	temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
-	data[1] = temp0;                       
-
-	*in0 = extract_h((vo_L_add(temp1, temp2) << 15));   
-	*in1 = extract_h((vo_L_sub(temp1, temp2) << 15));  
-}
-
-/******************************************************************************
-*
-*     Function     : filter3
-*     Purpose      : Third-order half-band lowpass/highpass filter pair with
-*                    decimation.
-*
-*******************************************************************************/
-
-static void filter3(
-		Word16 * in0,                         /* i/o : input values; output low-pass part  */
-		Word16 * in1,                         /* i/o : input values; output high-pass part */
-		Word16 * data                         /* i/o : filter memory                       */
-		)
-{
-	Word16 temp1, temp2;
-
-	temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
-	temp2 = add1(*data, vo_mult(COEFF3, temp1));
-	*data = temp1;                        
-
-	*in1 = extract_h((vo_L_sub(*in0, temp2) << 15));   
-	*in0 = extract_h((vo_L_add(*in0, temp2) << 15));   
-}
-
-/******************************************************************************
-*
-*     Function   : level_calculation
-*     Purpose    : Calculate signal level in a sub-band. Level is calculated
-*                  by summing absolute values of the input data.
-*
-*                  Signal level calculated from of the end of the frame
-*                  (data[count1 - count2]) is stored to (*sub_level)
-*                  and added to the level of the next frame.
-*
-******************************************************************************/
-
-static Word16 level_calculation(                      /* return: signal level */
-		Word16 data[],                        /* i   : signal buffer                                    */
-		Word16 * sub_level,                   /* i   : level calculated at the end of the previous frame*/
-		                                      /* o   : level of signal calculated from the last         */
-		                                      /*       (count2 - count1) samples                        */
-		Word16 count1,                        /* i   : number of samples to be counted                  */
-		Word16 count2,                        /* i   : number of samples to be counted                  */
-		Word16 ind_m,                         /* i   : step size for the index of the data buffer       */
-		Word16 ind_a,                         /* i   : starting index of the data buffer                */
-		Word16 scale                          /* i   : scaling for the level calculation                */
-		)
-{
-	Word32 i, l_temp1, l_temp2;
-	Word16 level;
-
-	l_temp1 = 0L;                          
-	for (i = count1; i < count2; i++)
-	{
-		l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
-	}
-
-	l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
-	*sub_level = extract_h(L_shl(l_temp1, scale));      
-
-	for (i = 0; i < count1; i++)
-	{
-		l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
-	}
-	level = extract_h(L_shl2(l_temp2, scale));
-
-	return level;
-}
-
-/******************************************************************************
-*
-*     Function     : filter_bank
-*     Purpose      : Divide input signal into bands and calculate level of
-*                    the signal in each band
-*
-*******************************************************************************/
-
-static void filter_bank(
-		VadVars * st,                         /* i/o : State struct               */
-		Word16 in[],                          /* i   : input frame                */
-		Word16 level[]                        /* o   : signal levels at each band */
-		)
-{
-	Word32 i;
-	Word16 tmp_buf[FRAME_LEN];
-
-	/* shift input 1 bit down for safe scaling */
-	for (i = 0; i < FRAME_LEN; i++)
-	{
-		tmp_buf[i] = in[i] >> 1;       
-	}
-
-	/* run the filter bank */
-	for (i = 0; i < 128; i++)
-	{
-		filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
-	}
-	for (i = 0; i < 64; i++)
-	{
-		filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
-		filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
-	}
-	for (i = 0; i < 32; i++)
-	{
-		filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
-		filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
-		filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
-	}
-	for (i = 0; i < 16; i++)
-	{
-		filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
-		filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
-		filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
-	}
-
-	for (i = 0; i < 8; i++)
-	{
-		filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
-		filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
-	}
-
-	/* calculate levels in each frequency band */
-
-	/* 4800 - 6400 Hz */
-	level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);   
-	/* 4000 - 4800 Hz */
-	level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);   
-	/* 3200 - 4000 Hz */
-	level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);   
-	/* 2400 - 3200 Hz */
-	level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);   
-	/* 2000 - 2400 Hz */
-	level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);       
-	/* 1600 - 2000 Hz */
-	level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);        
-	/* 1200 - 1600 Hz */
-	level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);        
-	/* 800 - 1200 Hz */
-	level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);       
-	/* 600 - 800 Hz */
-	level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17); 
-	/* 400 - 600 Hz */
-	level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);        
-	/* 200 - 400 Hz */
-	level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);        
-	/* 0 - 200 Hz */
-	level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17); 
-}
-
-/******************************************************************************
-*
-*     Function   : update_cntrl
-*     Purpose    : Control update of the background noise estimate.
-*
-*******************************************************************************/
-
-static void update_cntrl(
-		VadVars * st,                         /* i/o : State structure                    */
-		Word16 level[]                        /* i   : sub-band levels of the input frame */
-		)
-{
-	Word32 i;
-	Word16 num, temp, stat_rat, exp, denom;
-	Word16 alpha;
-
-	/* if a tone has been detected for a while, initialize stat_count */
-	if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
-	{
-		st->stat_count = STAT_COUNT;      
-	} else
-	{
-		/* if 8 last vad-decisions have been "0", reinitialize stat_count */
-		if ((st->vadreg & 0x7f80) == 0)
-		{
-			st->stat_count = STAT_COUNT;   
-		} else
-		{
-			stat_rat = 0;                  
-			for (i = 0; i < COMPLEN; i++)
-			{
-				if(level[i] > st->ave_level[i])
-				{
-					num = level[i];        
-					denom = st->ave_level[i];   
-				} else
-				{
-					num = st->ave_level[i];
-					denom = level[i];      
-				}
-				/* Limit nimimum value of num and denom to STAT_THR_LEVEL */
-				if(num < STAT_THR_LEVEL)
-				{
-					num = STAT_THR_LEVEL;  
-				}
-				if(denom < STAT_THR_LEVEL)
-				{
-					denom = STAT_THR_LEVEL;
-				}
-				exp = norm_s(denom);
-				denom = denom << exp;
-
-				/* stat_rat = num/denom * 64 */
-				temp = div_s(num >> 1, denom);
-				stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
-			}
-
-			/* compare stat_rat with a threshold and update stat_count */
-			if(stat_rat > STAT_THR)
-			{
-				st->stat_count = STAT_COUNT;    
-			} else
-			{
-				if ((st->vadreg & 0x4000) != 0)
-				{
-
-					if (st->stat_count != 0)
-					{
-						st->stat_count = st->stat_count - 1;       
-					}
-				}
-			}
-		}
-	}
-
-	/* Update average amplitude estimate for stationarity estimation */
-	alpha = ALPHA4;                        
-	if(st->stat_count == STAT_COUNT)
-	{
-		alpha = 32767;                    
-	} else if ((st->vadreg & 0x4000) == 0)
-	{
-		alpha = ALPHA5;                   
-	}
-	for (i = 0; i < COMPLEN; i++)
-	{
-		st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));   
-	}
-}
-
-/******************************************************************************
-*
-*     Function     : hangover_addition
-*     Purpose      : Add hangover after speech bursts
-*
-*******************************************************************************/
-
-static Word16 hangover_addition(                      /* return: VAD_flag indicating final VAD decision */
-		VadVars * st,                         /* i/o : State structure                     */
-		Word16 low_power,                     /* i   : flag power of the input frame    */
-		Word16 hang_len,                      /* i   : hangover length */
-		Word16 burst_len                      /* i   : minimum burst length for hangover addition */
-		)
-{
-	/* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0"         */
-	if (low_power != 0)
-	{
-		st->burst_count = 0;               
-		st->hang_count = 0;                
-		return 0;
-	}
-	/* update the counters (hang_count, burst_count) */
-	if ((st->vadreg & 0x4000) != 0)
-	{
-		st->burst_count = st->burst_count + 1;    
-		if(st->burst_count >= burst_len)
-		{
-			st->hang_count = hang_len;     
-		}
-		return 1;
-	} else
-	{
-		st->burst_count = 0;               
-		if (st->hang_count > 0)
-		{
-			st->hang_count = st->hang_count - 1;    
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/******************************************************************************
-*
-*     Function   : noise_estimate_update
-*     Purpose    : Update of background noise estimate
-*
-*******************************************************************************/
-
-static void noise_estimate_update(
-		VadVars * st,                         /* i/o : State structure                       */
-		Word16 level[]                        /* i   : sub-band levels of the input frame */
-		)
-{
-	Word32 i; 
-	Word16 alpha_up, alpha_down, bckr_add = 2;
-
-	/* Control update of bckr_est[] */
-	update_cntrl(st, level);
-
-	/* Choose update speed */
-	if ((0x7800 & st->vadreg) == 0)
-	{
-		alpha_up = ALPHA_UP1;              
-		alpha_down = ALPHA_DOWN1;          
-	} else
-	{
-		if ((st->stat_count == 0))
-		{
-			alpha_up = ALPHA_UP2;          
-			alpha_down = ALPHA_DOWN2;      
-		} else
-		{
-			alpha_up = 0;                  
-			alpha_down = ALPHA3;           
-			bckr_add = 0;                  
-		}
-	}
-
-	/* Update noise estimate (bckr_est) */
-	for (i = 0; i < COMPLEN; i++)
-	{
-		Word16 temp;
-		temp = (st->old_level[i] - st->bckr_est[i]);
-
-		if (temp < 0)
-		{                                  /* update downwards */
-			st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp))); 
-			/* limit minimum value of the noise estimate to NOISE_MIN */
-			if(st->bckr_est[i] < NOISE_MIN)
-			{
-				st->bckr_est[i] = NOISE_MIN;   
-			}
-		} else
-		{                                  /* update upwards */
-			st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));   
-
-			/* limit maximum value of the noise estimate to NOISE_MAX */
-			if(st->bckr_est[i] > NOISE_MAX)
-			{
-				st->bckr_est[i] = NOISE_MAX;    
-			}
-		}
-	}
-
-	/* Update signal levels of the previous frame (old_level) */
-	for (i = 0; i < COMPLEN; i++)
-	{
-		st->old_level[i] = level[i];      
-	}
-}
-
-/******************************************************************************
-*
-*     Function     : vad_decision
-*     Purpose      : Calculates VAD_flag
-*
-*******************************************************************************/
-
-static Word16 vad_decision(                           /* return value : VAD_flag */
-		VadVars * st,                         /* i/o : State structure                       */
-		Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
-		Word32 pow_sum                        /* i   : power of the input frame           */
-		)
-{
-	Word32 i;
-	Word32 L_snr_sum;
-	Word32 L_temp;
-	Word16 vad_thr, temp, noise_level;
-	Word16 low_power_flag;
-	Word16 hang_len, burst_len;
-	Word16 ilog2_speech_level, ilog2_noise_level;
-	Word16 temp2;
-
-	/* Calculate squared sum of the input levels (level) divided by the background noise components
-	 * (bckr_est). */
-	L_snr_sum = 0;                        
-	for (i = 0; i < COMPLEN; i++)
-	{
-		Word16 exp;
-
-		exp = norm_s(st->bckr_est[i]);
-		temp = (st->bckr_est[i] << exp);
-		temp = div_s((level[i] >> 1), temp);
-		temp = shl(temp, (exp - (UNIRSHFT - 1)));
-		L_snr_sum = L_mac(L_snr_sum, temp, temp);
-	}
-
-	/* Calculate average level of estimated background noise */
-	L_temp = 0;                           
-	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
-	{
-		L_temp = vo_L_add(L_temp, st->bckr_est[i]);
-	}
-
-	noise_level = extract_h((L_temp << 12));
-	/* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
-	temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;
-
-	if(st->speech_level < temp)
-	{
-		st->speech_level = temp;          
-	}
-	ilog2_noise_level = ilog2(noise_level);
-
-	/* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
-	 * subtracting MIN_SPEECH_SNR*noise_level from speech level */
-	ilog2_speech_level = ilog2(st->speech_level - temp);
-
-	temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);
-
-	temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
-	if (temp2 < SP_CH_MIN)
-	{
-		temp2 = SP_CH_MIN;                 
-	}
-	if (temp2 > SP_CH_MAX)
-	{
-		temp2 = SP_CH_MAX;                 
-	}
-	vad_thr = temp + temp2;
-
-	if(vad_thr < THR_MIN)
-	{
-		vad_thr = THR_MIN;                 
-	}
-	/* Shift VAD decision register */
-	st->vadreg = (st->vadreg >> 1);       
-
-	/* Make intermediate VAD decision */
-	if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
-	{
-		st->vadreg = (Word16) (st->vadreg | 0x4000); 
-	}
-	/* check if the input power (pow_sum) is lower than a threshold" */
-	if(pow_sum < VAD_POW_LOW)
-	{
-		low_power_flag = 1;               
-	} else
-	{
-		low_power_flag = 0;               
-	}
-	/* Update background noise estimates */
-	noise_estimate_update(st, level);
-
-	/* Calculate values for hang_len and burst_len based on vad_thr */
-	hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
-	if(hang_len < HANG_LOW)
-	{
-		hang_len = HANG_LOW;              
-	}
-	burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);
-
-	return (hangover_addition(st, low_power_flag, hang_len, burst_len));
-}
-
-/******************************************************************************
-*
-*     Function : Estimate_Speech()
-*     Purpose  : Estimate speech level
-*
-* Maximum signal level is searched and stored to the variable sp_max.
-* The speech frames must locate within SP_EST_COUNT number of frames.
-* Thus, noisy frames having occasional VAD = "1" decisions will not
-* affect to the estimated speech_level.
-*
-*******************************************************************************/
-
-static void Estimate_Speech(
-		VadVars * st,                         /* i/o : State structure    */
-		Word16 in_level                       /* level of the input frame */
-		)
-{
-	Word16 alpha;
-
-	/* if the required activity count cannot be achieved, reset counters */
-	if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
-	{
-		st->sp_est_cnt = 0;                
-		st->sp_max = 0;                    
-		st->sp_max_cnt = 0;                
-	}
-	st->sp_est_cnt += 1; 
-
-	if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
-	{
-		/* update sp_max */
-		if(in_level > st->sp_max)
-		{
-			st->sp_max = in_level;         
-		}
-		st->sp_max_cnt += 1;        
-
-		if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
-		{
-			Word16 tmp;
-			/* update speech estimate */
-			tmp = (st->sp_max >> 1);      /* scale to get "average" speech level */
-
-			/* select update speed */
-			if(tmp > st->speech_level)
-			{
-				alpha = ALPHA_SP_UP;       
-			} else
-			{
-				alpha = ALPHA_SP_DOWN;    
-			}
-			if(tmp > MIN_SPEECH_LEVEL2)
-			{
-				st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level))); 
-			}
-			/* clear all counters used for speech estimation */
-			st->sp_max = 0;                
-			st->sp_max_cnt = 0;            
-			st->sp_est_cnt = 0;            
-		}
-	}
-}
-
-/******************************************************************************
-*
-*  Function:   wb_vad_init
-*  Purpose:    Allocates state memory and initializes state memory
-*
-*******************************************************************************/
-
-Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
-		VadVars ** state,                     /* i/o : State structure    */
-		VO_MEM_OPERATOR *pMemOP
-		)
-{
-	VadVars *s;
-
-	if (state == (VadVars **) NULL)
-	{
-		fprintf(stderr, "vad_init: invalid parameter\n");
-		return -1;
-	}
-	*state = NULL;
-
-	/* allocate memory */
-	if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
-	{
-		fprintf(stderr, "vad_init: can not malloc state structure\n");
-		return -1;
-	}
-	wb_vad_reset(s);
-
-	*state = s;
-
-	return 0;
-}
-
-/******************************************************************************
-*
-*  Function:   wb_vad_reset
-*  Purpose:    Initializes state memory
-*
-*******************************************************************************/
-
-Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
-		VadVars * state                       /* i/o : State structure    */
-		)
-{
-	Word32 i, j;
-
-	if (state == (VadVars *) NULL)
-	{
-		fprintf(stderr, "vad_reset: invalid parameter\n");
-		return -1;
-	}
-	state->tone_flag = 0;
-	state->vadreg = 0;
-	state->hang_count = 0;
-	state->burst_count = 0;
-	state->hang_count = 0;
-
-	/* initialize memory used by the filter bank */
-	for (i = 0; i < F_5TH_CNT; i++)
-	{
-		for (j = 0; j < 2; j++)
-		{
-			state->a_data5[i][j] = 0;
-		}
-	}
-
-	for (i = 0; i < F_3TH_CNT; i++)
-	{
-		state->a_data3[i] = 0;
-	}
-
-	/* initialize the rest of the memory */
-	for (i = 0; i < COMPLEN; i++)
-	{
-		state->bckr_est[i] = NOISE_INIT;
-		state->old_level[i] = NOISE_INIT;
-		state->ave_level[i] = NOISE_INIT;
-		state->sub_level[i] = 0;
-	}
-
-	state->sp_est_cnt = 0;
-	state->sp_max = 0;
-	state->sp_max_cnt = 0;
-	state->speech_level = SPEECH_LEVEL_INIT;
-	state->prev_pow_sum = 0;
-	return 0;
-}
-
-/******************************************************************************
-*
-*  Function:   wb_vad_exit
-*  Purpose:    The memory used for state memory is freed
-*
-*******************************************************************************/
-
-void wb_vad_exit(
-		VadVars ** state,                      /* i/o : State structure    */
-		VO_MEM_OPERATOR *pMemOP
-		)
-{
-	if (state == NULL || *state == NULL)
-		return;
-	/* deallocate memory */
-	mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
-	*state = NULL;
-	return;
-}
-
-/******************************************************************************
-*
-*     Function     : wb_vad_tone_detection
-*     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
-*                    pitch gain is high. This is used to detect
-*                    signaling tones and other signals with high pitch gain.
-*
-*******************************************************************************/
-
-void wb_vad_tone_detection(
-		VadVars * st,                         /* i/o : State struct            */
-		Word16 p_gain                         /* pitch gain      */
-		)
-{
-	/* update tone flag */
-	st->tone_flag = (st->tone_flag >> 1);
-
-	/* if (pitch_gain > TONE_THR) set tone flag */
-	if (p_gain > TONE_THR)
-	{
-		st->tone_flag = (Word16) (st->tone_flag | 0x4000);
-	}
-}
-
-/******************************************************************************
-*
-*     Function     : wb_vad
-*     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
-*
-*******************************************************************************/
-
-Word16 wb_vad(                                /* Return value : VAD Decision, 1 = speech, 0 = noise */
-		VadVars * st,                         /* i/o : State structure                 */
-		Word16 in_buf[]                       /* i   : samples of the input frame   */
-	     )
-{
-	Word16 level[COMPLEN];
-	Word32 i;
-	Word16 VAD_flag, temp;
-	Word32 L_temp, pow_sum;
-
-	/* Calculate power of the input frame. */
-	L_temp = 0L;                           
-	for (i = 0; i < FRAME_LEN; i++)
-	{
-		L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
-	}
-
-	/* pow_sum = power of current frame and previous frame */
-	pow_sum = L_add(L_temp, st->prev_pow_sum);  
-
-	/* save power of current frame for next call */
-	st->prev_pow_sum = L_temp;             
-
-	/* If input power is very low, clear tone flag */
-	if (pow_sum < POW_TONE_THR)
-	{
-		st->tone_flag = (Word16) (st->tone_flag & 0x1fff);      
-	}
-	/* Run the filter bank and calculate signal levels at each band */
-	filter_bank(st, in_buf, level);
-
-	/* compute VAD decision */
-	VAD_flag = vad_decision(st, level, pow_sum);
-
-	/* Calculate input level */
-	L_temp = 0;                          
-	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
-	{
-		L_temp = vo_L_add(L_temp, level[i]);
-	}
-
-	temp = extract_h(L_temp << 12);
-
-	Estimate_Speech(st, temp);             /* Estimate speech level */
-	return (VAD_flag);
-}
-
-
-
-
+/*
+ ** Copyright 2003-2010, VisualOn, Inc.
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ **     http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+/***********************************************************************
+*      File: wb_vad.c                                                  *
+*                                                                      *
+*      Description: Voice Activity Detection                           *
+*                                                                      *
+************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "cnst.h"
+#include "wb_vad.h"
+#include "typedef.h"
+#include "basic_op.h"
+#include "math_op.h"
+#include "wb_vad_c.h"
+#include "mem_align.h"
+
+/******************************************************************************
+*  Calculate Log2 and scale the signal:
+*
+*    ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
+*
+*  input   output
+*  32768   16384
+*  1       31744
+*
+* When input is in the range of [1,2^16], max error is 0.0380%.
+*********************************************************************************/
+
+static Word16 ilog2(                       /* return: output value of the log2 */
+		Word16 mant                        /* i: value to be converted */
+		)
+{
+	Word16 ex, ex2, res;
+	Word32 i, l_temp;
+
+	if (mant <= 0)
+	{
+		mant = 1;                         
+	}
+	ex = norm_s(mant);
+	mant = mant << ex;
+
+	for (i = 0; i < 3; i++)
+		mant = vo_mult(mant, mant);
+	l_temp = vo_L_mult(mant, mant);
+
+	ex2 = norm_l(l_temp);
+	mant = extract_h(l_temp << ex2);
+
+	res = (ex + 16) << 10;
+	res = add1(res, (ex2 << 6));
+	res = vo_sub(add1(res, 127), (mant >> 8));
+	return (res);
+}
+
+/******************************************************************************
+*
+*     Function     : filter5
+*     Purpose      : Fifth-order half-band lowpass/highpass filter pair with
+*                    decimation.
+*
+*******************************************************************************/
+
+static void filter5(
+		Word16 * in0,                         /* i/o : input values; output low-pass part  */
+		Word16 * in1,                         /* i/o : input values; output high-pass part */
+		Word16 data[]                         /* i/o : filter memory                       */
+		)
+{
+	Word16 temp0, temp1, temp2;
+
+	temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
+	temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
+	data[0] = temp0;                      
+
+	temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
+	temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
+	data[1] = temp0;                       
+
+	*in0 = extract_h((vo_L_add(temp1, temp2) << 15));   
+	*in1 = extract_h((vo_L_sub(temp1, temp2) << 15));  
+}
+
+/******************************************************************************
+*
+*     Function     : filter3
+*     Purpose      : Third-order half-band lowpass/highpass filter pair with
+*                    decimation.
+*
+*******************************************************************************/
+
+static void filter3(
+		Word16 * in0,                         /* i/o : input values; output low-pass part  */
+		Word16 * in1,                         /* i/o : input values; output high-pass part */
+		Word16 * data                         /* i/o : filter memory                       */
+		)
+{
+	Word16 temp1, temp2;
+
+	temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
+	temp2 = add1(*data, vo_mult(COEFF3, temp1));
+	*data = temp1;                        
+
+	*in1 = extract_h((vo_L_sub(*in0, temp2) << 15));   
+	*in0 = extract_h((vo_L_add(*in0, temp2) << 15));   
+}
+
+/******************************************************************************
+*
+*     Function   : level_calculation
+*     Purpose    : Calculate signal level in a sub-band. Level is calculated
+*                  by summing absolute values of the input data.
+*
+*                  Signal level calculated from of the end of the frame
+*                  (data[count1 - count2]) is stored to (*sub_level)
+*                  and added to the level of the next frame.
+*
+******************************************************************************/
+
+static Word16 level_calculation(                      /* return: signal level */
+		Word16 data[],                        /* i   : signal buffer                                    */
+		Word16 * sub_level,                   /* i   : level calculated at the end of the previous frame*/
+		                                      /* o   : level of signal calculated from the last         */
+		                                      /*       (count2 - count1) samples                        */
+		Word16 count1,                        /* i   : number of samples to be counted                  */
+		Word16 count2,                        /* i   : number of samples to be counted                  */
+		Word16 ind_m,                         /* i   : step size for the index of the data buffer       */
+		Word16 ind_a,                         /* i   : starting index of the data buffer                */
+		Word16 scale                          /* i   : scaling for the level calculation                */
+		)
+{
+	Word32 i, l_temp1, l_temp2;
+	Word16 level;
+
+	l_temp1 = 0L;                          
+	for (i = count1; i < count2; i++)
+	{
+		l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
+	}
+
+	l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
+	*sub_level = extract_h(L_shl(l_temp1, scale));      
+
+	for (i = 0; i < count1; i++)
+	{
+		l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
+	}
+	level = extract_h(L_shl2(l_temp2, scale));
+
+	return level;
+}
+
+/******************************************************************************
+*
+*     Function     : filter_bank
+*     Purpose      : Divide input signal into bands and calculate level of
+*                    the signal in each band
+*
+*******************************************************************************/
+
+static void filter_bank(
+		VadVars * st,                         /* i/o : State struct               */
+		Word16 in[],                          /* i   : input frame                */
+		Word16 level[]                        /* o   : signal levels at each band */
+		)
+{
+	Word32 i;
+	Word16 tmp_buf[FRAME_LEN];
+
+	/* shift input 1 bit down for safe scaling */
+	for (i = 0; i < FRAME_LEN; i++)
+	{
+		tmp_buf[i] = in[i] >> 1;       
+	}
+
+	/* run the filter bank */
+	for (i = 0; i < 128; i++)
+	{
+		filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
+	}
+	for (i = 0; i < 64; i++)
+	{
+		filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
+		filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
+	}
+	for (i = 0; i < 32; i++)
+	{
+		filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
+		filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
+		filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
+	}
+	for (i = 0; i < 16; i++)
+	{
+		filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
+		filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
+		filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
+	}
+
+	for (i = 0; i < 8; i++)
+	{
+		filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
+		filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
+	}
+
+	/* calculate levels in each frequency band */
+
+	/* 4800 - 6400 Hz */
+	level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);   
+	/* 4000 - 4800 Hz */
+	level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);   
+	/* 3200 - 4000 Hz */
+	level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);   
+	/* 2400 - 3200 Hz */
+	level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);   
+	/* 2000 - 2400 Hz */
+	level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);       
+	/* 1600 - 2000 Hz */
+	level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);        
+	/* 1200 - 1600 Hz */
+	level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);        
+	/* 800 - 1200 Hz */
+	level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);       
+	/* 600 - 800 Hz */
+	level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17); 
+	/* 400 - 600 Hz */
+	level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);        
+	/* 200 - 400 Hz */
+	level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);        
+	/* 0 - 200 Hz */
+	level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17); 
+}
+
+/******************************************************************************
+*
+*     Function   : update_cntrl
+*     Purpose    : Control update of the background noise estimate.
+*
+*******************************************************************************/
+
+static void update_cntrl(
+		VadVars * st,                         /* i/o : State structure                    */
+		Word16 level[]                        /* i   : sub-band levels of the input frame */
+		)
+{
+	Word32 i;
+	Word16 num, temp, stat_rat, exp, denom;
+	Word16 alpha;
+
+	/* if a tone has been detected for a while, initialize stat_count */
+	if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
+	{
+		st->stat_count = STAT_COUNT;      
+	} else
+	{
+		/* if 8 last vad-decisions have been "0", reinitialize stat_count */
+		if ((st->vadreg & 0x7f80) == 0)
+		{
+			st->stat_count = STAT_COUNT;   
+		} else
+		{
+			stat_rat = 0;                  
+			for (i = 0; i < COMPLEN; i++)
+			{
+				if(level[i] > st->ave_level[i])
+				{
+					num = level[i];        
+					denom = st->ave_level[i];   
+				} else
+				{
+					num = st->ave_level[i];
+					denom = level[i];      
+				}
+				/* Limit nimimum value of num and denom to STAT_THR_LEVEL */
+				if(num < STAT_THR_LEVEL)
+				{
+					num = STAT_THR_LEVEL;  
+				}
+				if(denom < STAT_THR_LEVEL)
+				{
+					denom = STAT_THR_LEVEL;
+				}
+				exp = norm_s(denom);
+				denom = denom << exp;
+
+				/* stat_rat = num/denom * 64 */
+				temp = div_s(num >> 1, denom);
+				stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
+			}
+
+			/* compare stat_rat with a threshold and update stat_count */
+			if(stat_rat > STAT_THR)
+			{
+				st->stat_count = STAT_COUNT;    
+			} else
+			{
+				if ((st->vadreg & 0x4000) != 0)
+				{
+
+					if (st->stat_count != 0)
+					{
+						st->stat_count = st->stat_count - 1;       
+					}
+				}
+			}
+		}
+	}
+
+	/* Update average amplitude estimate for stationarity estimation */
+	alpha = ALPHA4;                        
+	if(st->stat_count == STAT_COUNT)
+	{
+		alpha = 32767;                    
+	} else if ((st->vadreg & 0x4000) == 0)
+	{
+		alpha = ALPHA5;                   
+	}
+	for (i = 0; i < COMPLEN; i++)
+	{
+		st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));   
+	}
+}
+
+/******************************************************************************
+*
+*     Function     : hangover_addition
+*     Purpose      : Add hangover after speech bursts
+*
+*******************************************************************************/
+
+static Word16 hangover_addition(                      /* return: VAD_flag indicating final VAD decision */
+		VadVars * st,                         /* i/o : State structure                     */
+		Word16 low_power,                     /* i   : flag power of the input frame    */
+		Word16 hang_len,                      /* i   : hangover length */
+		Word16 burst_len                      /* i   : minimum burst length for hangover addition */
+		)
+{
+	/* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0"         */
+	if (low_power != 0)
+	{
+		st->burst_count = 0;               
+		st->hang_count = 0;                
+		return 0;
+	}
+	/* update the counters (hang_count, burst_count) */
+	if ((st->vadreg & 0x4000) != 0)
+	{
+		st->burst_count = st->burst_count + 1;    
+		if(st->burst_count >= burst_len)
+		{
+			st->hang_count = hang_len;     
+		}
+		return 1;
+	} else
+	{
+		st->burst_count = 0;               
+		if (st->hang_count > 0)
+		{
+			st->hang_count = st->hang_count - 1;    
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************
+*
+*     Function   : noise_estimate_update
+*     Purpose    : Update of background noise estimate
+*
+*******************************************************************************/
+
+static void noise_estimate_update(
+		VadVars * st,                         /* i/o : State structure                       */
+		Word16 level[]                        /* i   : sub-band levels of the input frame */
+		)
+{
+	Word32 i; 
+	Word16 alpha_up, alpha_down, bckr_add = 2;
+
+	/* Control update of bckr_est[] */
+	update_cntrl(st, level);
+
+	/* Choose update speed */
+	if ((0x7800 & st->vadreg) == 0)
+	{
+		alpha_up = ALPHA_UP1;              
+		alpha_down = ALPHA_DOWN1;          
+	} else
+	{
+		if ((st->stat_count == 0))
+		{
+			alpha_up = ALPHA_UP2;          
+			alpha_down = ALPHA_DOWN2;      
+		} else
+		{
+			alpha_up = 0;                  
+			alpha_down = ALPHA3;           
+			bckr_add = 0;                  
+		}
+	}
+
+	/* Update noise estimate (bckr_est) */
+	for (i = 0; i < COMPLEN; i++)
+	{
+		Word16 temp;
+		temp = (st->old_level[i] - st->bckr_est[i]);
+
+		if (temp < 0)
+		{                                  /* update downwards */
+			st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp))); 
+			/* limit minimum value of the noise estimate to NOISE_MIN */
+			if(st->bckr_est[i] < NOISE_MIN)
+			{
+				st->bckr_est[i] = NOISE_MIN;   
+			}
+		} else
+		{                                  /* update upwards */
+			st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));   
+
+			/* limit maximum value of the noise estimate to NOISE_MAX */
+			if(st->bckr_est[i] > NOISE_MAX)
+			{
+				st->bckr_est[i] = NOISE_MAX;    
+			}
+		}
+	}
+
+	/* Update signal levels of the previous frame (old_level) */
+	for (i = 0; i < COMPLEN; i++)
+	{
+		st->old_level[i] = level[i];      
+	}
+}
+
+/******************************************************************************
+*
+*     Function     : vad_decision
+*     Purpose      : Calculates VAD_flag
+*
+*******************************************************************************/
+
+static Word16 vad_decision(                           /* return value : VAD_flag */
+		VadVars * st,                         /* i/o : State structure                       */
+		Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
+		Word32 pow_sum                        /* i   : power of the input frame           */
+		)
+{
+	Word32 i;
+	Word32 L_snr_sum;
+	Word32 L_temp;
+	Word16 vad_thr, temp, noise_level;
+	Word16 low_power_flag;
+	Word16 hang_len, burst_len;
+	Word16 ilog2_speech_level, ilog2_noise_level;
+	Word16 temp2;
+
+	/* Calculate squared sum of the input levels (level) divided by the background noise components
+	 * (bckr_est). */
+	L_snr_sum = 0;                        
+	for (i = 0; i < COMPLEN; i++)
+	{
+		Word16 exp;
+
+		exp = norm_s(st->bckr_est[i]);
+		temp = (st->bckr_est[i] << exp);
+		temp = div_s((level[i] >> 1), temp);
+		temp = shl(temp, (exp - (UNIRSHFT - 1)));
+		L_snr_sum = L_mac(L_snr_sum, temp, temp);
+	}
+
+	/* Calculate average level of estimated background noise */
+	L_temp = 0;                           
+	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
+	{
+		L_temp = vo_L_add(L_temp, st->bckr_est[i]);
+	}
+
+	noise_level = extract_h((L_temp << 12));
+	/* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
+	temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;
+
+	if(st->speech_level < temp)
+	{
+		st->speech_level = temp;          
+	}
+	ilog2_noise_level = ilog2(noise_level);
+
+	/* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
+	 * subtracting MIN_SPEECH_SNR*noise_level from speech level */
+	ilog2_speech_level = ilog2(st->speech_level - temp);
+
+	temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);
+
+	temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
+	if (temp2 < SP_CH_MIN)
+	{
+		temp2 = SP_CH_MIN;                 
+	}
+	if (temp2 > SP_CH_MAX)
+	{
+		temp2 = SP_CH_MAX;                 
+	}
+	vad_thr = temp + temp2;
+
+	if(vad_thr < THR_MIN)
+	{
+		vad_thr = THR_MIN;                 
+	}
+	/* Shift VAD decision register */
+	st->vadreg = (st->vadreg >> 1);       
+
+	/* Make intermediate VAD decision */
+	if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
+	{
+		st->vadreg = (Word16) (st->vadreg | 0x4000); 
+	}
+	/* check if the input power (pow_sum) is lower than a threshold" */
+	if(pow_sum < VAD_POW_LOW)
+	{
+		low_power_flag = 1;               
+	} else
+	{
+		low_power_flag = 0;               
+	}
+	/* Update background noise estimates */
+	noise_estimate_update(st, level);
+
+	/* Calculate values for hang_len and burst_len based on vad_thr */
+	hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
+	if(hang_len < HANG_LOW)
+	{
+		hang_len = HANG_LOW;              
+	}
+	burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);
+
+	return (hangover_addition(st, low_power_flag, hang_len, burst_len));
+}
+
+/******************************************************************************
+*
+*     Function : Estimate_Speech()
+*     Purpose  : Estimate speech level
+*
+* Maximum signal level is searched and stored to the variable sp_max.
+* The speech frames must locate within SP_EST_COUNT number of frames.
+* Thus, noisy frames having occasional VAD = "1" decisions will not
+* affect to the estimated speech_level.
+*
+*******************************************************************************/
+
+static void Estimate_Speech(
+		VadVars * st,                         /* i/o : State structure    */
+		Word16 in_level                       /* level of the input frame */
+		)
+{
+	Word16 alpha;
+
+	/* if the required activity count cannot be achieved, reset counters */
+	if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
+	{
+		st->sp_est_cnt = 0;                
+		st->sp_max = 0;                    
+		st->sp_max_cnt = 0;                
+	}
+	st->sp_est_cnt += 1; 
+
+	if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
+	{
+		/* update sp_max */
+		if(in_level > st->sp_max)
+		{
+			st->sp_max = in_level;         
+		}
+		st->sp_max_cnt += 1;        
+
+		if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
+		{
+			Word16 tmp;
+			/* update speech estimate */
+			tmp = (st->sp_max >> 1);      /* scale to get "average" speech level */
+
+			/* select update speed */
+			if(tmp > st->speech_level)
+			{
+				alpha = ALPHA_SP_UP;       
+			} else
+			{
+				alpha = ALPHA_SP_DOWN;    
+			}
+			if(tmp > MIN_SPEECH_LEVEL2)
+			{
+				st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level))); 
+			}
+			/* clear all counters used for speech estimation */
+			st->sp_max = 0;                
+			st->sp_max_cnt = 0;            
+			st->sp_est_cnt = 0;            
+		}
+	}
+}
+
+/******************************************************************************
+*
+*  Function:   wb_vad_init
+*  Purpose:    Allocates state memory and initializes state memory
+*
+*******************************************************************************/
+
+Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
+		VadVars ** state,                     /* i/o : State structure    */
+		VO_MEM_OPERATOR *pMemOP
+		)
+{
+	VadVars *s;
+
+	if (state == (VadVars **) NULL)
+	{
+		fprintf(stderr, "vad_init: invalid parameter\n");
+		return -1;
+	}
+	*state = NULL;
+
+	/* allocate memory */
+	if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
+	{
+		fprintf(stderr, "vad_init: can not malloc state structure\n");
+		return -1;
+	}
+	wb_vad_reset(s);
+
+	*state = s;
+
+	return 0;
+}
+
+/******************************************************************************
+*
+*  Function:   wb_vad_reset
+*  Purpose:    Initializes state memory
+*
+*******************************************************************************/
+
+Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
+		VadVars * state                       /* i/o : State structure    */
+		)
+{
+	Word32 i, j;
+
+	if (state == (VadVars *) NULL)
+	{
+		fprintf(stderr, "vad_reset: invalid parameter\n");
+		return -1;
+	}
+	state->tone_flag = 0;
+	state->vadreg = 0;
+	state->hang_count = 0;
+	state->burst_count = 0;
+	state->hang_count = 0;
+
+	/* initialize memory used by the filter bank */
+	for (i = 0; i < F_5TH_CNT; i++)
+	{
+		for (j = 0; j < 2; j++)
+		{
+			state->a_data5[i][j] = 0;
+		}
+	}
+
+	for (i = 0; i < F_3TH_CNT; i++)
+	{
+		state->a_data3[i] = 0;
+	}
+
+	/* initialize the rest of the memory */
+	for (i = 0; i < COMPLEN; i++)
+	{
+		state->bckr_est[i] = NOISE_INIT;
+		state->old_level[i] = NOISE_INIT;
+		state->ave_level[i] = NOISE_INIT;
+		state->sub_level[i] = 0;
+	}
+
+	state->sp_est_cnt = 0;
+	state->sp_max = 0;
+	state->sp_max_cnt = 0;
+	state->speech_level = SPEECH_LEVEL_INIT;
+	state->prev_pow_sum = 0;
+	return 0;
+}
+
+/******************************************************************************
+*
+*  Function:   wb_vad_exit
+*  Purpose:    The memory used for state memory is freed
+*
+*******************************************************************************/
+
+void wb_vad_exit(
+		VadVars ** state,                      /* i/o : State structure    */
+		VO_MEM_OPERATOR *pMemOP
+		)
+{
+	if (state == NULL || *state == NULL)
+		return;
+	/* deallocate memory */
+	mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
+	*state = NULL;
+	return;
+}
+
+/******************************************************************************
+*
+*     Function     : wb_vad_tone_detection
+*     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
+*                    pitch gain is high. This is used to detect
+*                    signaling tones and other signals with high pitch gain.
+*
+*******************************************************************************/
+
+void wb_vad_tone_detection(
+		VadVars * st,                         /* i/o : State struct            */
+		Word16 p_gain                         /* pitch gain      */
+		)
+{
+	/* update tone flag */
+	st->tone_flag = (st->tone_flag >> 1);
+
+	/* if (pitch_gain > TONE_THR) set tone flag */
+	if (p_gain > TONE_THR)
+	{
+		st->tone_flag = (Word16) (st->tone_flag | 0x4000);
+	}
+}
+
+/******************************************************************************
+*
+*     Function     : wb_vad
+*     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
+*
+*******************************************************************************/
+
+Word16 wb_vad(                                /* Return value : VAD Decision, 1 = speech, 0 = noise */
+		VadVars * st,                         /* i/o : State structure                 */
+		Word16 in_buf[]                       /* i   : samples of the input frame   */
+	     )
+{
+	Word16 level[COMPLEN];
+	Word32 i;
+	Word16 VAD_flag, temp;
+	Word32 L_temp, pow_sum;
+
+	/* Calculate power of the input frame. */
+	L_temp = 0L;                           
+	for (i = 0; i < FRAME_LEN; i++)
+	{
+		L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
+	}
+
+	/* pow_sum = power of current frame and previous frame */
+	pow_sum = L_add(L_temp, st->prev_pow_sum);  
+
+	/* save power of current frame for next call */
+	st->prev_pow_sum = L_temp;             
+
+	/* If input power is very low, clear tone flag */
+	if (pow_sum < POW_TONE_THR)
+	{
+		st->tone_flag = (Word16) (st->tone_flag & 0x1fff);      
+	}
+	/* Run the filter bank and calculate signal levels at each band */
+	filter_bank(st, in_buf, level);
+
+	/* compute VAD decision */
+	VAD_flag = vad_decision(st, level, pow_sum);
+
+	/* Calculate input level */
+	L_temp = 0;                          
+	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
+	{
+		L_temp = vo_L_add(L_temp, level[i]);
+	}
+
+	temp = extract_h(L_temp << 12);
+
+	Estimate_Speech(st, temp);             /* Estimate speech level */
+	return (VAD_flag);
+}
+
+
+
+