-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_final_allsongs_chroma.m
208 lines (164 loc) · 7.38 KB
/
test_final_allsongs_chroma.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
clear;
addpath chroma-ansyn/
addpath tool/
addpath tool/tf_agc
load W1b1L1;
load W1b1L2;
% init values for CQT
fs = 16000;
bins_per_octave = 21;
fmin = 30; %lower boundary for CQT (lo west frequency bin will be immediately above this): fmax/<power of two> = 250
% extract features from the ground truth recordings
gtFiles=dir('covers_pairs/*.wav');
L_gt=length(gtFiles); % L_gt=3
patchInAllFiles=cell(L_gt,1); % each cell represents all the patches in one .wav file
numOfPatchInOneFile=zeros(L_gt,1); % each element represents the number of patches in one .wav file
numSum=0;
for i=1:L_gt
wav_gt=audioread(fullfile('covers_pairs',gtFiles(i).name));
gtFiles(i).name
b=beat2(wav_gt,fs)*fs; %extract beats from sample
for lenb=1:length(b)-1
b_min(lenb) = (b(lenb+1)-b(lenb)); %get min of beat sample difference
end
b_min = floor(min(b_min));
wav_gt = wav_gt(b(1):b(end-24)); %truncates wav file to first beat
if mod(b_min,2) ~= 0 %ensure beat interval is even for smooth CQT
b_min=b_min+1;
end
L_win=b_min; %window size (patch size) = 1/4th note
L_hop=floor(L_win/2); % hop size is 1/8th note
len_wav=length(wav_gt); %length of wav file
intCHROMA_single = chromagram_IF(wav_gt,fs,b_min);
clear -except intCHROMA_gt %clear full chromatogram for next iteration
%create all possible transpositions of chroma features
for c_loop=1:12
intCHROMA_gt(12*c_loop-11:12*c_loop-c_loop+1,:)=intCHROMA_single(c_loop:12,:);
end
for c_loop=2:12
intCHROMA_gt(12*c_loop-c_loop+2:12*c_loop,:)=intCHROMA_single(1:c_loop-1,:);
end
%patches
input_fbins=size(intCHROMA_gt,1);
patch_win=10; %number of cqt windows per patch
len=size(intCHROMA_gt,2); %number of elements along x axis of spectrogram
num=floor(len); %number of patches. determines hop size. #patches = #beats, so hop size = 1/4 note.
numSum=numSum+num;
cellpatch=cell(num,1);
numOfPatchInOneFile(i)=num; % number of patches in one .wav file stored
% reshape the patches into long vectors of 1*1440
for k=1:num-patch_win
intCHROMA_patch=intCHROMA_gt(:,(k-1)+1:(k-1)+patch_win);
intCHROMA_patch=reshape(intCHROMA_patch,[input_fbins*patch_win,1]);
intCHROMA_patch=intCHROMA_patch';
cellpatch(k)={intCHROMA_patch};
end
patchInOneFile=cell2mat(cellpatch);
patchInOneFile=round(255*patchInOneFile/max(max(patchInOneFile))); % normalization of the patches in one .wav file
% silence removal by rms detection
cout=0;
for l=1:size(patchInOneFile,1)
if rms(rms(patchInOneFile(l,:)))>=0.01
cout=cout+1;
end
end
numPatchAftRmv=cout;
patchInOneFile_silence_rmv=cell(numPatchAftRmv,1);
cout=0;
for l=1:size(patchInOneFile,1)
if rms(rms(patchInOneFile(l,:)))>=0.01
cout=cout+1;
patchInOneFile_silence_rmv(cout)={patchInOneFile(l,:)};
end
end
patchInAllFiles(i)={cell2mat(patchInOneFile_silence_rmv)};
end
%%%%%%%%%%%%%%%RESULTS CALCULATIONS%%%%%%%%%%%%%%%%%
% initialize results tables
results_dtw_allsongs = zeros(L_gt, L_gt);
results_dtwnorm_allsongs = zeros(L_gt, L_gt);
results_euclidean_allsongs = zeros(L_gt, L_gt);
results_xcorr_allsongs = zeros(L_gt, L_gt);
results_kldiv_allsongs = zeros(L_gt, L_gt);
results_mahaldist_allsongs = zeros(L_gt, L_gt);
counter=0
%calculate output features
for i = 1:L_gt
a2 = W1L1*patchInAllFiles{i,1}'+b1L1*ones(1,size(patchInAllFiles{i,1}',2)); %applies weight W1 and adds a copy of bias b1 to each column of a2
a2 = (1+exp((-1*a2))).^-1; % normalize 1st hiddn layer with sig function and weighting
a3 = W1L2*a2+b1L2*ones(1,size(patchInAllFiles{i,1}',2)); %applies weight W2 and adds a copy of bias b1 to each column of a3
a3 = (1+exp((-1*a3))).^-1; % normalize 2nd hidden layer (output layer) with sig function and weighting
a3_cells{i}=a3;
a3_cells_mean{i}=mean(a3,2);
fprintf('>');
if mod(i,20)==0
fprintf('\n');
end
end
fprintf('output features done \ncalculating results\n');
%calculate results
for n = 1:L_gt
for i = 1:L_gt
% Distance between sampled song and sampling song .
%results_dtw_allsongs(n,i)=dtw(a3_cells{n}',a3_cells{i}');
results_dtwnorm_allsongs(n,i)=dtw(a3_cells{n}',a3_cells{i}')/(size(a3_cells{n},2)+size(a3_cells{i},2)); %normalized over length of output patches
%DTW TAKES A LONG TIME BUT IS MOST ACCURATE. CONSIDER COMMENTING IT
%OUT FOR QUICK AND DIRTY RESULTS
%educlidean distance
results_euclidean_allsongs(n,i)=sum(abs(a3_cells_mean{n}-a3_cells_mean{i}).^2);
%cross correlation
results_xcorr_allsongs(n,i)=sum(xcorr(a3_cells_mean{n}-a3_cells_mean{i}));
%kl divergence
results_kldiv_allsongs(n,i)=sum(KLDiv(a3_cells_mean{n}',a3_cells_mean{i}'));
%mahalanobis distance
%results_mahaldist_allsongs(n,i)=mean(mean(pdist2(a3_cells{n}',a3_cells{i}','mahalanobis'),2),1);
fprintf('>');
if mod(i,20)==0
fprintf('\n');
end
end
fprintf('\n');
end
%set results diagonals to high values so they are ignored by min command
for i = 1:L_gt
results_dtwnorm_allsongs(i,i)=9999999;
results_euclidean_allsongs(i,i)=9999999;
results_kldiv_allsongs(i,i)=9999999;
results_xcorr_allsongs(i,i)=9999999;
end
%calculate overall results
correct_guesses=zeros(3,4);
for i = 1:2:L_gt
if results_dtwnorm_allsongs(i,i+1) <= min(results_dtwnorm_allsongs(i,:));
correct_guesses(2,1)=correct_guesses(2,1)+1;
end
if results_euclidean_allsongs(i,i+1) <= min(results_euclidean_allsongs(i,:));
correct_guesses(2,2)=correct_guesses(2,2)+1;
end
if results_kldiv_allsongs(i,i+1) <= min(results_kldiv_allsongs(i,:));
correct_guesses(2,3)=correct_guesses(2,3)+1;
end
if results_xcorr_allsongs(i,i+1) <= min(results_xcorr_allsongs(i,:));
correct_guesses(2,4)=correct_guesses(2,4)+1;
end
end
for i = 2:2:L_gt
if results_dtwnorm_allsongs(i,i-1) <= min(results_dtwnorm_allsongs(i,:));
correct_guesses(3,1)=correct_guesses(3,1)+1;
end
if results_euclidean_allsongs(i,i-1) <= min(results_euclidean_allsongs(i,:));
correct_guesses(3,2)=correct_guesses(3,2)+1;
end
if results_kldiv_allsongs(i,i-1) <= min(results_kldiv_allsongs(i,:));
correct_guesses(3,3)=correct_guesses(3,3)+1;
end
if results_xcorr_allsongs(i,i-1) <= min(results_xcorr_allsongs(i,:));
correct_guesses(3,4)=correct_guesses(3,4)+1;
end
end
%correct_guesses(1,1)='results_dtwnorm_allsongs';
%correct_guesses(1,2)='results_euclidean_allsongs';
%correct_guesses(1,3)='results_kldiv_allsongs';
%correct_guesses(1,4)='results_xcorr_allsongs';
correct_guesses %distance from training set to covers
%should be the same for all but KL divergence