www.pudn.com > audioProcessingtoolbox.rar > endPointDetect2.m
function [endPoint, voicedSegment] = endPointDetect(y, fs, nbits, plotOpt, epdParam)
% endPointDetect: End point detection based on volume and zero-crossing rate
% Usage: [endPoint, voicedSegment] = endPointDetect(y, fs, nbits, plotOpt, epdParam)
% Roger Jang, 20041118
if nargin<1, selfdemo; return; end
if nargin<2, fs=8000; end
if nargin<3, nbits=8; end
if nargin<4, plotOpt=0; end
if nargin<5,
epdParam.frameSize = round(fs/31.25); % fs=8000 ===> frameSize=256;
epdParam.frameSize = 320; % same as htk
epdParam.overlap = round(epdParam.frameSize*3/4);
epdParam.zcrRatio = 0.3;
epdParam.testFrameNum = 5; % 測試平均環境噪音的音框數(沒用到)
epdParam.volRatio1=2;
epdParam.volRatio2=4;
end
frameSize=epdParam.frameSize;
overlap=epdParam.overlap;
zcrRatio=epdParam.zcrRatio;
% ====== Zero adjusted
y = y-round(mean(y));
% ====== Take frames
framedY = buffer2(y, frameSize, overlap);
frameNum = size(framedY, 2); % Number of frames
% ====== Compute volume and zcr
volume=zeros(frameNum, 1);
zcr=zeros(frameNum, 1);
for i=1:frameNum
frame=framedY(:, i);
frame=frame-round(mean(frame));
volume(i)=sum(abs(frame-mean(frame)))/frameNum;
zcr(i)=sum(abs(diff(frame>0)));
end
frameTime = frame2sampleIndex(1:frameNum, frameSize, overlap)/fs;
volTh=max(volume);
volTh1=epdParam.volRatio1*volTh;
volTh2=epdParam.volRatio2*volTh;
% ====== Find initial end points according volume level2 (upper level)
voicedIndex = find(volume>=volTh2);
while isempty(voicedIndex) % 若找不到有聲音部分,改變音量門檻值
volTh1=max(volume)/16;
volTh2=max(volume)/8;
voicedIndex = find(volume>=volTh2);
end
zcrTh = max(zcr)*zcrRatio;
sound = [];
k = 1;
sound(k).begin = voicedIndex(1);
for i=2:length(voicedIndex)-1,
if voicedIndex(i+1)-voicedIndex(i)>1,
sound(k).end = voicedIndex(i);
sound(k+1).begin = voicedIndex(i+1);
k = k+1;
end
end
sound(k).end = voicedIndex(end);
% ====== Delete short sound clips
%index = [];
%for i=1:length(sound),
% if (sound(i).end-sound(i).begin)<4
% index = [index, i];
% end
%end
%sound(index) = [];
% ====== Expand end points to volume level1 (lower level)
for i=1:length(sound),
head = sound(i).begin;
while (head-1)>=1 & volume(head-1)>volTh1,
head=head-1;
end
sound(i).begin = head;
tail = sound(i).end;
while (tail+1)<=length(volume) & volume(tail+1)>volTh1,
tail=tail+1;
end
sound(i).end = tail;
end
% ====== Expand end points to include high zcr region
for i=1:length(sound),
head = sound(i).begin;
while (head-1)>=1 & zcr(head-1)>zcrTh,
head=head-1;
end
sound(i).begin = head;
end
% ====== Delete repeated sound segments
if length(sound) ~=0,
index = [];
for i=1:length(sound)-1,
if sound(i).begin==sound(i+1).begin & sound(i).end==sound(i+1).end,
index=[index, i];
end
end
sound(index) = [];
end;
% ====== Transform sample-point-based index
if length(sound) ~=0,
for i=1:length(sound),
voicedSegment(i).begin = sound(i).begin*(frameSize-overlap);
voicedSegment(i).end = min(length(y), sound(i).end*(frameSize-overlap));
end
endPoint=[voicedSegment(1).begin, voicedSegment(end).end]; % 取頭尾
else
endPoint = [];
end;
if plotOpt,
axes1H=subplot(4,1,1);
time=(1:length(y))/fs;
plot(time, y); grid on
axis([min(time), max(time), -2^nbits/2, 2^nbits/2]);
ylabel('Amplitude');
title('Waveform');
% Plot end points
yBound=[-2^nbits/2, 2^nbits/2];
for i=1:length(sound),
line(frame2sampleIndex(sound(i).begin, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( sound(i).end, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes2H=subplot(4,1,2);
plot(frameTime, volume, '.-'); grid on
line([min(frameTime), max(frameTime)], volTh1*[1 1], 'color', 'c');
line([min(frameTime), max(frameTime)], volTh2*[1 1], 'color', 'c');
axis tight
ylabel('Volume');
title('Volume');
% Plot end points
yBound = [min(volume) max(volume)];
for i=1:length(sound),
line(frame2sampleIndex(sound(i).begin, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( sound(i).end, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes3H=subplot(4,1,3);
plot(frameTime, zcr, '.-'); grid on
line([min(frameTime), max(frameTime)], zcrTh*[1 1], 'color', 'c');
axis([min(frameTime), max(frameTime), 0, max(zcr)]);
ylabel('ZCR');
title('Zero crossing rate');
% Plot end points
yBound = [0 max(zcr)];
for i=1:length(sound),
line(frame2sampleIndex(sound(i).begin, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( sound(i).end, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes4H=subplot(4,1,4);
voicedIndex=endPoint(1):endPoint(2);
voicedTime=time(voicedIndex);
voicedY=y(voicedIndex);
voicedH=plot(voicedTime, voicedY); grid on
axis([time(endPoint(1)), time(endPoint(2)), -inf, inf]);
ylabel('Amplitude');
title('Voiced waveform');
U.y=y; U.fs=fs; U.nbits=nbits;
U.axes1H=axes1H; U.axes2H=axes2H; U.axes3H=axes3H; U.axes4H=axes4H;
U.voicedIndex=voicedIndex; U.voicedH=voicedH;
U.voicedY=voicedY; U.voicedTime=voicedTime;
set(gcf, 'userData', U);
uicontrol('string', 'Play all', 'callback', 'U=get(gcf, ''userData''); sound(U.y/(2^U.nbits/2), U.fs);');
uicontrol('string', 'Play voiced', 'callback', 'U=get(gcf, ''userData''); sound(U.voicedY/(2^U.nbits/2), U.fs);', 'position', [100, 20, 60, 20]);
% Play the segmented sound
% head = sound(1).begin*(frameSize-overlap);
% tail = min(length(y), sound(end).end*(frameSize-overlap));
% thisY = y(head:tail);
% fprintf('His return to hear the cutted sound %g:', i);
% pause;
% fprintf('\n');
% wavplay(thisY, fs, 'sync');
% fprintf('\n');
end
% ======
function sampleIndex=frame2sampleIndex(frameIndex, frameSize, overlap)
sampleIndex=(frameIndex-1)*(frameSize-overlap)+round(frameSize/2);
% ====== Self demo
function selfdemo
waveFile='malisa\SenPC0000_2.wav';
waveFile='Arthas\SenIC0000_3.wav';
waveFile='tracy\SenIC0000_2.wav';
waveFile='abo\SenIC0000_3.wav';
waveFile='roger\SenIC0000_2.wav';
waveFile='jacky\SenIC0000_3.wav';
waveFile='__Cyberon__\2_f_0.wav';
waveFile='__Cyberon__\1_f_19.wav';
waveFile='主人下馬客在船.wav';
waveFile='此恨綿綿無絕期+sil.wav';
waveFile='楊家有女初長成.wav';
plotOpt = 1;
[y, fs, nbits] = wavReadInt(waveFile);
endPoint = feval(mfilename, y, fs, nbits, plotOpt);