www.pudn.com > audioProcessingtoolbox.rar > epdByVolHod.m
function [epInSampleIndex, epInFrameIndex, segment, zeroOneVec, volume, hod, vh] = epdByVolHod(y, fs, nbits, epdParam, plotOpt)
% epdByVolHod: EPD based on volume and HOD (high-order difference)
% Usage: [epInSampleIndex, epInFrameIndex, segment, zeroOneVec, volume, hod, vh] = epdByVolHod(y, fs, nbits, epdParam, plotOpt)
% epInSampleIndex: two-element end-points in sample index
% epInFrameIndex: two-element end-points in frame index
% segment: segment of voice activity
% y: input audio signals
% fs: sampling rate
% nbits: no. of bits
% epdParam: parameters for EPD
% plotOpt: 0 for silence operation, 1 for plotting
%
% Example:
% waveFile='SingaporeIsAFinePlace.wav';
% [y, fs, nbits] = wavReadInt(waveFile);
% epdParam=epdParamSet(fs);
% plotOpt = 1;
% [epInSampleIndex, epInFrameIndex, segment] = epdByVolHod(y, fs, nbits, epdParam, plotOpt);
% Roger Jang, 20070323
if nargin<1, selfdemo; return; end
if nargin<2, fs=16000; end
if nargin<3, nbits=16; end
if nargin<4 | isempty(epdParam), epdParam=epdParamSet(fs); end
if nargin<5, plotOpt=0; end
if size(y, 2)~=1, error('y is not mono!'); end
frameSize=epdParam.frameSize;
overlap=epdParam.overlap;
minSegment=round(epdParam.minSegment*fs/(frameSize-overlap));
maxSilBetweenWord=round(epdParam.maxSilBetweenWord*fs/(frameSize-overlap));
%minLastWordDuration=round(epdParam.minLastWordDuration*fs/(frameSize-overlap));
% ====== Compute volume/hod
frameMat=buffer2(y, frameSize, overlap); % frame blocking
frameMat=frameZeroMean(frameMat, 2);
frameNum=size(frameMat, 2); % no. of frames
volume=frame2volume(frameMat, 1); % method=1
hod=frame2ashod(frameMat, epdParam.diffOrder);
% ====== Compute vh thresholds
vh=volume*epdParam.volWeight+(1-epdParam.volWeight)*hod;
temp=sort(vh);
index=round(frameNum/32); if index==0, index=1; end
vhMin=temp(index);
vhMax=temp(frameNum-index+1); % To avoid qiYin
vhTh=(vhMax-vhMin)/epdParam.vhRatio+vhMin;
%fprintf('vhMin=%g, vhMax=%g, vhTh=%g\n', vhMin, vhMax, vhTh);
% ====== Identify voiced part that's larger than volTh2
segment=findSegment(vh>vhTh);
% ====== Delete short sound clips
index = [];
for i=1:length(segment),
if segment(i).duration<=minSegment
index = [index, i];
end
end
segment(index) = [];
% ====== If the sil between the last two segment is too big, delete the last segment
if length(segment)>1
if segment(end).begin-segment(end-1).end>maxSilBetweenWord
% if segment(end).duration<=minLastWordDuration % Not used!
segment(end)=[];
% end
end
end
% ====== Check if segment 1 is noise
%if length(segment)>=2
% sil=segment(2).begin-segment(1).end-1;
% if (sil>epdParam.minSil) && (2*segment(1).duration=2
% sil=segment(end).begin-segment(end-1).end-1;
% if (sil>epdParam.minSil) && (segment(end-1).duration>2*segment(end).duration)
% segment(end)=[];
% end
%end
% Use segment2 for further processing. Keep segment for plotting
segment2=segment;
% If a sil is longer than its neighboring segment, delete the segment
while 0
silDuration=[];
for i=1:length(segment2)-1
silDuration(i)=segment2(i+1).begin-segment2(i).end-1;
end
if isempty(silDuration), break; end
[maxSil, index]=max(silDuration);
% if maxSil>epdParam.minSil % max silence too long ===> delete one of its neighboring segment
if maxSil>min(segment2(index).duration, segment2(index+1).duration) % max silence too long ===> delete one of its neighboring segment
if segment2(index).duration1, U.y=U.y/(2^nbits/2); end
if ~isempty(epInSampleIndex)
U.voicedY=U.y(epInSampleIndex(1):epInSampleIndex(end));
else
U.voicedY=[];
end
set(gcf, 'userData', U);
uicontrol('string', 'Play all', 'callback', 'U=get(gcf, ''userData''); sound(U.y, U.fs);');
uicontrol('string', 'Play voiced', 'callback', 'U=get(gcf, ''userData''); sound(U.voicedY, U.fs);', 'position', [100, 20, 60, 20]);
end
% ====== Self demo
function selfdemo
waveFile='SingaporeIsAFinePlace.wav';
[y, fs, nbits] = wavReadInt(waveFile);
epdParam=epdParamSet(fs);
plotOpt = 1;
[epInSampleIndex, epInFrameIndex, volume, hod, vh] = epdByVolHod(y, fs, nbits, epdParam, plotOpt);