原始能量谱4000 350045040030003502500300zH/率2000250频15002001501000100500500 00.511.522.5时间/s通过mel倒谱修正后的能量谱20 700186001614500目12400数器10波滤83006200421000 00.511.522.5时间/s
附录2
function M3 = blockFrames(s, fs, m, n) % blockFrames:
% Puts the signal into frames 分帧函数 % Inputs:
% s contains the signal to analize 语音信号
% fs is the sampling rate of the signal 语音采样频率
% m is the distance between the beginnings of two frames 两帧之间的距离
% n is the number of samples per frame 每帧的采样点数 % Output:
% M3 is a matrix containing all the frames 数组形式,包含了所有的帧 l = length(s); %语音信号的长度
nbFrame = floor((l - n) / m) + 1; %帧数 for i = 1:n for j = 1:nbFrame
M(i, j) = s(((j - 1) * m) + i); %逐帧扫描 end end
h = hamming(n);
M2 = diag(h) * M; %加汉明窗 for i = 1:nbFrame
M3(:, i) = fft(M2(:, i)); %短时傅立叶变换 End
function code = train(traindir, n) % 计算wav文件的VQ码码本
% Speaker Recognition: Training Stage % Input:
% traindir : string name of directory contains all train sound files % n : number of train files in traindir % Output:
% code : trained VQ codebooks, code{i} for i-th speaker % Note:
% Sound files in traindir is supposed to be: % s1.wav, s2.wav, ..., sn.wav % Example:
% >> code = train('C:\\data\\train\\', 8);
k = 16; % number of centroids required
for i = 1:n % train a VQ codebook for each speaker file = sprintf('%ss%d.wav', traindir, i); disp(file);
[s, fs] = wavread(file);
v = mfcc(s, fs); % Compute MFCC's code{i} = vqlbg(v, k); % Train VQ codebook end
function d = disteu(x, y)
% DISTEU Pairwise Euclidean distances between columns of two matrices 测试失真度 % Input:
% x, y: Two matrices whose each column is an a vector data. % Output:
% d: Element d(i,j) will be the Euclidean distance between two column vectors X(:,i) and Y(:,j) % Note:
% The Euclidean distance D between two vectors X and Y is: % D = sum((x-y).^2).^0.5 [M, N] = size(x); [M2, P] = size(y); if (M ~= M2) error('不匹配!') end
d = zeros(N, P); if (N < P)
copies = zeros(1,P); for n = 1:N
d(n,:) = sum((x(:, n+copies) - y) .^2, 1); end else
copies = zeros(1,N); for p = 1:P
d(:,p) = sum((x - y(:, p+copies)) .^2, 1)'; end end
d = d.^0.5;
function m = melfb(p, n, fs)
% MELFB Determine matrix for a mel-spaced filterbank % Inputs: p number of filters in filterbank 滤波器数 % n length of fft FFT变换的点数
% fs sample rate in Hz 采样频率 % Outputs: x a (sparse) matrix containing the filterbank amplitudes % size(x) = [p, 1+floor(n/2)]
% Usage: For example, to compute the mel-scale spectrum of a
% colum-vector signal s, with length n and sample rate fs: %
% f = fft(s);
% m = melfb(p, n, fs); % n2 = 1 + floor(n/2); % z = m * abs(f(1:n2)).^2; %
% z would contain p samples of the desired mel-scale spectrum %
% To plot filterbanks e.g.: %
% plot(linspace(0, (12500/2), 129), melfb(20, 256, 12500)'), % title('Mel-spaced filterbank'), xlabel('Frequency (Hz)');
f0 = 700 / fs; fn2 = floor(n/2);
lr = log(1 + 0.5/f0) / (p+1);
% convert to fft bin numbers with 0 for DC term bl = n * (f0 * (exp([0 1 p p+1] * lr) - 1));
b1 = floor(bl(1)) + 1; b2 = ceil(bl(2)); b3 = floor(bl(3));
b4 = min(fn2, ceil(bl(4))) - 1;
pf = log(1 + (b1:b4)/n/f0) / lr; fp = floor(pf); pm = pf - fp;
r = [fp(b2:b4) 1+fp(1:b3)]; c = [b2:b4 1:b3] + 1;
v = 2 * [1-pm(b2:b4) pm(1:b3)];
m = sparse(r, c, v, p, 1+fn2);
function r = mfcc(s, fs) % s声音信号的向量 fs取样频率 % MFCC
% Inputs: s contains the signal to analize % fs is the sampling rate of the signal % Output: r contains the transformed signal m = 100; n = 256;
相关推荐: