% Labina 03 - Pitch a Codec 

% signal 
s = wavread ('test.wav'); 
sm = s - mean(s); 
plot (sm); sound (sm); 
sr = frame (sm, 160, 0); % no overlap !

% playing with one frame - 7th one is the nice one ... 
x = sr (:,7); 
plot (x); 

% pitch detection from pure ACF
R = xcorr (x); 
R = R(160:end); 
n = 0:159; plot (n, R); 

% some constants 
Lmin = 20; Lmax = 146; 
thr = 0.3; % maximum needs to be at least thr * R[0]
% detect lag 
[Rmax,ii] = max(R((Lmin+1):(Lmax+1)));  % needs to add 1 because of Matlab indexing
if Rmax >= thr * R(1)  % R[0] in Matlab indexing 
  L=ii+Lmin-1;         % and here needs to remove it again... 
else
  L=0;
end
hold on; plot (L,Rmax,'or'); hold off;
% show lag in samples
L 
% in seconds 
T0 = L / 8000
% and fundamental frequency in Hz 
F0 = 1/T0 

% NCCF - we choose the same portion of signal as 7th frame. 
from = (7-1) * 160 + 1; to = from + 160 -1; 
selected = sm(from:to);  % nonshifted frame
x - selected             % check that it is really the same one ... 
E1 = sum(selected .^ 2); % energy of non-shifted frame.  
% in the following cycle, look at a few values, then comment out the plot and pause
Rnccf = zeros(1,Lmax + 1); 
for n = [0 Lmin:Lmax]
  froms = from-n; tos = to-n; % indexes of the shifted frame
  shifted = sm(froms:tos); 
  plot(1:160,selected,1:160,shifted); pause; 
  E2 = sum(shifted .^ 2); % energy of the shifted one
  numerator = selected' * shifted; 
  nccf = numerator / sqrt(E1 * E2)
  Rnccf(n+1) = nccf;  % Matlab indexing. 
end
n = 0:Lmax; plot (n, Rnccf); 
% detect lag 
[Rmax,ii] = max(Rnccf((Lmin+1):(Lmax+1)));  % needs to add 1 because of Matlab indexing
if Rmax >= thr * Rnccf(1)  % R[0] in Matlab indexing 
  L=ii+Lmin-1;             % and here needs to remove it again... 
else
  L=0;
end
% show lag: 
L 

% ACF and NCCF for the whole signal - constants need to be set in experiment...
Lacf = lag_acf (sm,160,0,20,146,0.3); 
Lnccf = lag_nccf (sm,160,0,20,146,0.7);
subplot(411); plot(sm); axis tight 
subplot(412); plot(Lacf); axis tight 
subplot(413); plot(Lnccf); axis tight 

>>> shows that NCCF is much more reliable ! 

% trying to post-process ... 
Lnccf_med = medfilt1 (Lnccf, 5); 
subplot(414); plot(Lnccf_med); axis tight 

% testing on some longer signal ... train.wav 
s = wavread ('train.wav'); 
sm = s - mean(s); 
Lacf = lag_acf (sm,160,0,20,146,0.3); 
Lnccf = lag_nccf (sm,160,0,20,146,0.7);
subplot(411); plot(sm); axis tight 
subplot(412); plot(Lacf); axis tight 
subplot(413); plot(Lnccf); axis tight 
Lnccf_med = medfilt1 (Lnccf, 5); 
subplot(414); plot(Lnccf_med); axis tight 

>>> docela solidni bordel v ACF a nefiltrovane NCCF ! 

%%%%%%%%%%%% kodovani %%%%%%%%%%%%%
s = wavread ('train.wav'); 
sm = s - mean(s); 
% first parameterization function: 
[A,G,L,Nram] = param2 (sm,160,0,10,20,146,0.7); 
% and synthesis
ss = synthesize (A,G,L,10,160); 
plot(ss); soundsc(ss); 

wavwrite (ss,8000,16,'/homes/kazi/cernocky/BORDEL/a.wav')

>>> to je docela slusny ! 

%%%%%%%%%% full coder including quantization %%%%%%%%
% vvvvvvvv toto NE do zadani vvvvvvvvv
% CB trenuju v adresari vq_ne_ven, protoze nechci davat zase vse ... 
% 256 codevectors, na Ackach. 
%lbg.m edituji jen tak, aby to pocitalo 256 codevektoru ! 
s = wavread ('train.wav'); 
sm = s - mean(s); 
% first parameterization function: 
[A,G,L,Nram] = param2 (sm,160,0,10,20,146,0.7); 
cd vq_ne_ven/
lbg 

save cb256.txt CB -ascii 
... hm, spatny, spousta NaN ! ... takze mensi. 128 - taky NaN ! takze 64 ... to bude 
    mazec. ... grr, tak to ne ! delam vetsi a ty NaN z ni vymazu ! 
    
bad = find (isnan(CB(1,:))); 
CBclean = removerows(CB',bad)' 
... has 210 entries - not bad ... 

save cb210.txt CBclean  -ascii 

% also something for the gain  ... 
gcb = logspace(log10(min(G)), log10(max(G)), 64); 
save gcb64.txt gcb -ascii 

% ^^^^^^^^^^^^ back to the lab ^^^^^^^^^^^^^^^^^^^^

% quantize A coefficients ! 
load cb210.txt 
load gcb64.txt 

s = wavread ('train.wav'); 
sm = s - mean(s); 
% first parameterization function: 
[A,G,L,Nram] = param2 (sm,160,0,10,20,146,0.7); 
[asym,gd] = vq_code(A, cb210); 
gsym = vq_code(G, gcb64); 
% decoding 
Adecoded = cb210(:,asym);
Gdecoded = gcb64(:,gsym);

% and synthesis
ss = synthesize (Adecoded,Gdecoded,L,10,160); 
plot (ss); soundsc(ss); 

wavwrite (ss,8000,16,'/homes/kazi/cernocky/BORDEL/b.wav')

>>> not so bad ! 

% so how many bits now just LPC and lag ? 
(8 + 7 + 6) * 50 
 ... 1050  bit / s ... slusny ! 
 
%%%%%%%%% putting it to 2 big functions that will process from file to file ... 
s = coder('train.wav', '/tmp/koko'); 
ss = decoder ('/tmp/koko', 'decoded.wav'); 

% ok, brutal testing sith something else !  some conversion needed ... 
sox /homes/kazi/cernocky/PROJECTS/MV_2008/mv/ses0099.wav -t wav -s -w ses0099.wav 
s = coder('/tmp/mako.wav', '/tmp/koko'); 
ss = decoder ('/tmp/koko', '/tmp/makod.wav'); 
>>> hodne velky humus ! radeji nejakeho chlapa ... 
    
sox /homes/kazi/cernocky/PROJECTS/MV_2008/mv/ses0019.wav -t wav -s -w ses0019.wav
s = coder('/tmp/mako.wav', '/tmp/koko'); 
ss = decoder ('/tmp/koko', '/tmp/makod.wav'); 
>>> taky velky maso ! zkousim Sebestu speechdat test: 

s = coder('testspdat.wav', '/tmp/koko'); 
ss = decoder ('/tmp/koko', '/tmp/testspdatd.wav'); 
... to je slusne. Dejme do lab nejprve toto, pak je nechame vyzkouset si neco 
  jineho a zeptame se, proc to vali tak blbe ! 
  
HOWGH 

%%% packing for students 
tar cvfhz ~/BORDEL/03_pitch_codec.tgz *.tex *.pdf *.m *.txt *.wav

  


