Professional Documents
Culture Documents
ADSP Mell Spectrum
ADSP Mell Spectrum
ADSP Mell Spectrum
Tugas:
1. Dapatkan suatu model pembentukan Mell-Cepstrum untuk pengolahan sinyal wicara
2. Buat suatu program Matlab untuk pengolahan Mell-Cepstrum
3. Bandingkan hasilnya dengan proses pengolahan mengunakan DFT
Jawaban:
1. Sinyal kontinyu Pre emphasize Frame blocking Frame Windowing Fast Fourrier
Transform (FFT) Spectrums Mell Frequency Warping Mell Spectrums Discrete Cosine
Transform Mell Cepstrums Cepstral Liftering Library
2.
fs=16000;
plot(f,x);
[s,fs]=wavread('a.wav');
xlabel('frequency(Hz)')
figure(1)
ylabel('magnitude(dB)')
subplot(211)
plot(s)
%Mell Filter Bank
xlabel('Sample')
K = nfft/2+1;
ylabel('Magnitude')
M = 23;
hz2mel = @(hz)(1127*log(1+hz/700));
%frame
mel2hz = @(mel)(700*exp(mel/1127)-700);
frame_1=0.02*fs;
xframe=s(9*frame_1:10*frame_1);
[ H1, fs ] = trifbank( M, K, [0 fs], fs, hz2mel,
subplot(212)
mel2hz );
plot(xframe)
figure(4)
xlabel('Sample')
plot( fs, H1 );
ylabel('Magnitude')
xlabel( 'Frequency (Hz)' );
ylabel( 'Weight' );
%windowing = hamming
FBE = H1 * MAG(1:K,:); % FBE( FBE<1.0 ) = 1.0;
win=hamming(length(frame_1));
% apply mel floor
y_1frame_window=xframe.*win;
figure(5)
figure(2)
plot(abs(FBE))
freqz(y_1frame_window)
dctm = @( N, M )( sqrt(2.0/M) * cos(
%proses FT
repmat([0:N-1].',1,M) ...
Y=fft(y_1frame_window);
.* repmat(pi*([1:M]-0.5)/M,N,1) ) );
hz8000=8000*length(Y)/fs;
N=16;
f=(1:hz8000)*fs/length(Y);
DCT_ = dctm(N,M);
x=20*log10(abs(Y(1:length(f)))+eps);
CC= DCT_ * log(FBE);
nfft = 2^8;
ceplifter = @( N, L )( 1+0.5*L*sin(pi*[0:N-1]/L)
MAG = abs(fft(y_1frame_window,nfft,1));
);
figure(3)
L = 16; %liftering parameter
0.9
0.8
0.7
0.6
Weight
1
waveform
0.5
0.5
Magnitude
0.4
0
0.3
-0.5
0.2
-1
0
500
1000
1500
2000
2500
3000
0.1
Sample
0
0.2
500
1000
1500
2000
2500
3000
3500
4000
Frequency (Hz)
waveform
Magnitude
0.1
10
-0.1
9
-0.2
0
20
40
60
80
100
120
140
160
180
Sample
7
20
Magnitude (dB)
6
0
4
-20
3
-40
0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1
2000
0
0
10
15
20
-2000
-4000
-6000
0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
20
Spectrum
10
magnitude(dB)
Phase (degrees)
-10
-20
-30
-40
0
1000
2000
3000
4000
frequency(Hz)
5000
6000
7000
8000
25
subplot( 313 );
imagesc( time_frames, [1:C], MFCCs(2:end,:) );
axis( 'xy' );
xlim( [ min(time_frames) max(time_frames) ] );
xlabel( 'Time (s)' );
ylabel( 'Cepstrum index' );
title( 'Mel frequency cepstrum' );
% Generate plots
figure('Position', [30 30 800 600],
'PaperPositionMode', 'auto', ...
'color', 'w', 'PaperOrientation', 'landscape',
'Visible', 'on' );
subplot( 311 );
plot( time, speech, 'k' );
xlim( [ min(time_frames) max(time_frames) ] );
xlabel( 'Time (s)' );
ylabel( 'Amplitude' );
title( 'Speech waveform');
Speech waveform
1
Amplitude
0.5
0
-0.5
-1
0.05
0.1
0.15
0.2
0.25
0.3
0.2
0.25
0.3
0.2
0.25
0.3
Time (s)
Log (mel) filterbank energies
20
Channel index
15
10
5
0.05
0.1
0.15
Time (s)
Mel frequency cepstrum
12
10
Cepstrum index
subplot( 312 );
imagesc( time_frames, [1:M], logFBEs );
axis( 'xy' );
xlim( [ min(time_frames) max(time_frames) ] );
xlabel( 'Time (s)' );
ylabel( 'Channel index' );
title( 'Log (mel) filterbank energies');
8
6
4
2
0.05
0.1
0.15
Time (s)
q=(ms1:ms20)/fs;
plot(q,abs(C(ms1:ms20)));
legend('Cepstrum');
xlabel('Quefrency(s)')
ylabel('Amplitude')
[c,fx]=max(abs(C(ms1:ms20)));
fprintf('Fx=%g Hz\n',fs/(ms2+fx-1));
1
Waveform
0.5
Magnitude
3.
clear all;
fs=16000;
x=wavread('a.wav');
-0.5
-1
0
500
1000
1500
2000
2500
3000
Sample
0.2
0.1
-0.1
-0.2
0
100
150
200
250
300
350
20
Magnitude (dB)
-20
-40
-60
0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.8
0.9
x 10
2
Phase (degrees)
%windowing
win=hamming(length(frame_i));
y_1frame_window=xframe.*win;
figure(2)
freqz(y_1frame_window)
%do Fourier
Transform
figure(3)
Y=fft(y_1frame_window);
hz8000=8000*length(Y)/fs;
f=(0:hz8000)*fs/length(Y);
plot(f,20*log10(abs(Y(1:length(f)))+eps));
legend('Spectrum');
xlabel('Frequency(Hz)')
ylabel('Magnitude(dB)')
50
-2
-4
0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
Magnitude(dB)
-10
-20
-30
-40
-50
0
1000
2000
3000
4000
5000
6000
7000
8000
Frequency(Hz)
150
Cepstrum
Amplitude
100
50
0
0
0.002
0.004
0.006
0.008
0.01
0.012
Quefrency(s)
0.014
0.016
0.018
0.02