www.pudn.com > audioProcessingtoolbox.rar > endPoint2.m
function output=endPoint(y, fs, nbits, epdParam, plotOpt)
% endPoint: Endpoint detection(適用於 RMC 的電話語音錄音)
% Usage: output=endPoint(y, fs, nbits, epdParam, plotOpt)
% output=endPoint(waveFile, [], [], epdParam, plotOpt)
%
% output(1): start index
% output(2): end index
if nargin==0; selfdemo; return; end
if isstr(y); file=y; [y, fs, nbits]=wavread(file); end
if nargin<4 | isempty(epdParam)
epdParam.duDuration=0.2; % 開頭部分的「嘟」聲,約 0.2 秒
epdParam.volRatio=0.15;
epdParam.zcrRatio=0.35;
epdParam.frameSize=256;
epdParam.overlap=128;
epdParam.extension=2; % 從原始端點向前後各延伸2個frame
end
if nargin<5; plotOpt=0; end
y=y*2^nbits/2; % 轉成整數
%y=y((epdParam.duDuration*fs+1):end); % 刪除電話語音錄音「嘟」的部分
startFrameIndex=1+floor((epdParam.duDuration*fs-epdParam.overlap)/(epdParam.frameSize-epdParam.overlap)); % 刪除電話語音錄音「嘟」的部分
%y=y-mean(y);
time=(1:length(y))/fs;
framedY=buffer2(y, epdParam.frameSize, epdParam.overlap);
framedY=framedY-ones(epdParam.frameSize,1)*mean(framedY); % 設定每個 frame 的平均值為零
frameNum=size(framedY, 2);
%fprintf('frameNum=%d\n', frameNum);
frameTime=((0:frameNum-1)*(epdParam.frameSize-epdParam.overlap)+epdParam.frameSize/2)/fs;
% ====== Compute volume
vol=sum(abs(framedY));
vol(1:startFrameIndex-1)=0;
%disp(vol(1:3));
volTh=max(vol)*epdParam.volRatio; % Volume threshold
%fprintf('volTh=%g\n', volTh);
% ====== Compute zero crossing rate
zcr=sum(abs(diff(framedY>0)));
zcr=sum(framedY(1:end-1,:).*framedY(2:end, :)<0);
zcr(1:startFrameIndex-1)=0;
%disp(zcr(1:3));
zcrTh=max(zcr)*epdParam.zcrRatio;
%fprintf('zcrTh=%g\n', zcrTh);
% ====== Compute endpoint
index1=vol>volTh; % 音量要夠大
index2=zcr0
firstNonZero=[firstNonZero, i];
else
break;
end
end
index(firstNonZero)=0;
% 在第一秒內,找出第一個 [0 1 0]
for i=startFrameIndex:ceil(fs/(epdParam.frameSize-epdParam.overlap))
if all(index(i:i+2)==[0 1 0])
break;
end
end
patternIndex1=i+1;
index(patternIndex1)=0;
% 在第一秒內,找出第一個 [0 1 1 0]
for i=startFrameIndex:ceil(fs/(epdParam.frameSize-epdParam.overlap))
if all(index(i:i+3)==[0 1 1 0])
break;
end
end
patternIndex2=[i+1, i+2];
index(patternIndex2)=0;
temp=find(index);
%fprintf('start frame = %d\n', temp(1));
%fprintf('end frame = %d\n', temp(end));
startIndex=(temp(1)-1)*(epdParam.frameSize-epdParam.overlap)+epdParam.frameSize/2-epdParam.extension*epdParam.frameSize;
endIndex=(temp(end)-1)*(epdParam.frameSize-epdParam.overlap)+epdParam.frameSize/2+epdParam.extension*epdParam.frameSize;
output(1)=max(startIndex, 1);
output(2)=min(endIndex, length(y));
%fprintf('start sample = %d\n', output(1));
%fprintf('end sample = %d\n', output(2));
% ====== Code for plotting
if plotOpt
plotNum=3;
subplot(plotNum,1,1);
plot(time, y);
set(gca, 'xlim', [min(time), max(time)]); grid on
if exist('file'), title(file); end
subplot(plotNum,1,2);
plot(frameTime, vol, '.-');
line([min(frameTime), max(frameTime)], volTh*[1 1], 'color', 'r');
set(gca, 'xlim', [min(frameTime), max(frameTime)]); grid on
ylabel('Volume');
subplot(plotNum,1,3);
plot(frameTime, zcr, '.-');
line([min(frameTime), max(frameTime)], zcrTh*[1 1], 'color', 'r');
set(gca, 'xlim', [min(frameTime), max(frameTime)]); grid on
ylabel('Zero crossing rate');
subplot(plotNum, 1, 1);
limit=axis;
line(output(1)*[1 1]/fs, limit(3:4), 'color', 'r');
line(output(2)*[1 1]/fs, limit(3:4), 'color', 'r');
line(epdParam.duDuration*[1 1], limit(3:4), 'color', 'm');
subplot(plotNum, 1, 2);
line(frameTime(index), vol(index), 'marker', 'o', 'color', 'r', 'linestyle', 'none');
line(frameTime(firstNonZero), vol(firstNonZero), 'marker', 'x', 'color', 'k', 'linestyle', 'none'); % 被移除的點:開始連續非零
line(frameTime(patternIndex1), vol(patternIndex1), 'marker', 'x', 'color', 'k', 'linestyle', 'none'); % [0 1 0]
line(frameTime(patternIndex2), vol(patternIndex2), 'marker', 'x', 'color', 'k', 'linestyle', 'none'); % [0 1 1 0]
newY=y(output(1):output(2));
sound(newY/(2^nbits/2), fs);
return
Y=fft(framedY);
z=log(Y.*conj(Y)+eps);
z=Y.*conj(Y);
z=z(1:65, :);
%z=z*diag(1./sum(z)); % Sum of each column is 1
variance=diag(cov(z));
th=max(variance)/5;
subplot(plotNum,1,4);
plot(frameTime, variance, '.-');
set(gca, 'xlim', [min(frameTime), max(frameTime)]); grid on
line([min(frameTime), max(frameTime)], th*[1 1], 'color', 'r');
%set(gca, 'ylim', [0 20000]);
ylabel('Spectral variance');
temp=abs(framedY)+eps;
temp=temp*diag(1./sum(temp)); % Sum of each column is 1
entropy=sum(temp.*log(temp));
subplot(plotNum,1,5);
plot(frameTime, entropy, '.-');
set(gca, 'xlim', [min(frameTime), max(frameTime)]); grid on
ylabel('Spectral entropy');
diffVal=sum(abs(diff(framedY)));
subplot(plotNum,1,6);
plot(frameTime, diffVal, '.-');
set(gca, 'xlim', [min(frameTime), max(frameTime)]); grid on
ylabel('diffVal');
end
% ====== self demo
function selfdemo
waveFile='alexxx1.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\car3.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\coco5.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\eva2.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\irene3.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\coco1.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\coco4.wav';
waveFile='D:\users\jang\matlab\toolbox\asr\application\RMC\waveData\ivr辨識音檔-手機\soph5.wav';
plotOpt=1;
feval(mfilename, waveFile, [], [], [], plotOpt);