-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSongClass.py
170 lines (150 loc) · 6.85 KB
/
SongClass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import scipy.signal
import imagehash
import os
from scipy.io import wavfile
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from scipy.ndimage.filters import maximum_filter
from scipy.ndimage.morphology import (binary_erosion,generate_binary_structure,iterate_structure)
import matplotlib.mlab as mlab
import logging
import pandas as pd
from scipy.spatial.distance import hamming
logging.basicConfig(filename='log.log',level=logging.DEBUG)
all_songs_data = []
processed_songs = []
class SongClass:
logging.info('Song Loaded')
def __init__(self,idx,is_mix=False):
srate,song_data = wavfile.read(all_songs_data[idx])
self.song_name = str(all_songs_data[idx])
self.song_name = self.song_name[11:-6]
logging.info(self.song_name[11:-6])
self.srate = 44100
if is_mix:
self.song_data = song_data[0:int(float(self.srate * 60.0))]
#
else:
self.song_data = song_data[0:int(float(self.srate * 60.0)),0]
wavfile.write('tryit.wav',self.srate,self.song_data)
self.window_size = 4096
self.amp_min = 20
self.default_overlap_ratio = 0.2
self.connect_mask = 2
self.peak_neighbourhood_size = 20
self.fan_value = 20 # how much it can be paired with others
#logging.info(self.processed_songs)
def fingerprint(self,update=False):
self.data = mlab.specgram(self.song_data, Fs=self.srate, NFFT=self.window_size, window=mlab.window_hanning,
noverlap=int(self.window_size * self.default_overlap_ratio))[0]
self.imag = self.data
self.data = 10 * np.log10(self.data, out=np.zeros_like(self.data), where=(self.data != 0))
self.data[self.data== -np.inf] =0
#logging.info(self.local_maxima)
if update:
self.local_maxima = self.get_maxima(update=True)
else:
self.local_maxima = self.get_maxima()
self.c = 0
self.features = Image.open('temp.png')
self.c += 1
self.hashed = imagehash.phash(self.features)
logging.info(self.hashed)
if update==False:
processed_songs.append((self.song_name, self.hashed))
def generate_spectrogram(self):
pass
def get_maxima(self,update=False):
'''
1)generate a mask that has a square shape for better performance
'''
struct = generate_binary_structure(2,self.connect_mask)
'''
2) peak_neighborhood is the number of cells around an amplitude peak in spectrogram
to be considered as a specrtal peak
'''
neighbor = iterate_structure(struct,self.peak_neighbourhood_size)
'''
3) apply a max filter with the filter mask we created on my song data with the output shape of the struct with the peak neighborhood size
that we can specify, for better accuracy i chose 20 points to be considered.
'''
local_max = maximum_filter(self.data,footprint=neighbor) == self.data
'''
4) remove background by first identifying the points in my data where the values are zero, by a boolean mask on my data(XOR) and then we can
remove it using the scipy function binary_erosion
'''
background = (self.data ==0)
eroded_background = binary_erosion(background,structure=neighbor,border_value=1)
'''
5) now we have the data of the local maximas when we remove the background from the output of the max filter
we can just extract the peaks of and their frequencies & time points. and we can filter the peaks by flattening
'''
detect_peaks = local_max != eroded_background
amps = self.data[detect_peaks]
freqs,times = np.where(detect_peaks)
amps = amps.flatten()
'''
6) we can finally get the indices for the frequency and time with np.where
i specified the amp.min to be 20 which is the minimum amplitude that we can consider
when getting peaks
'''
filter_idxs =np.where(amps>self.amp_min)
self.frequencies_filter = freqs[filter_idxs]
self.times_filter = times[filter_idxs]
fig,ax = plt.subplots()
ax.axis('off')
#ax.set_position([0,0,1,1])
'''
if update:
frex,tims,pwr=scipy.signal.spectrogram(self.song_data,fs=self.srate, nfft=self.window_size)
plt.pcolormesh(tims,frex,pwr,vmin=0,vmax=9)
fig.patch.set_alpha(0.)
ax.patch.set_alpha(0.)
plt.savefig(f"Spectrograms/{self.song_name}_spectrogram.png",transparent=True)
else:
'''
self.imag = mlab.specgram(self.song_data, Fs=self.srate, NFFT=self.window_size, window=mlab.window_hanning,
noverlap=int(self.window_size * self.default_overlap_ratio))[0]
plt.imshow(self.imag)
#ax.scatter(self.times_filter, self.frequencies_filter)
plt.gca().invert_yaxis()
plt.show()
#fig.patch.set_alpha(0.)
#ax.patch.set_alpha(0.)
plt.savefig('temp.png')
#plt.savefig(f"Spectrograms/Features/{self.song_name}_locmax.png", transparent=True)
return list(zip(self.frequencies_filter,self.times_filter))
def mix(self,song2,slider):
slider = slider/100
mixed = np.average([self.song_data,song2.song_data],axis=0,weights=[slider, (1-slider)])
wavfile.write('mixed.wav', 44100, mixed)
all_songs_data.append('mixed.wav')
self.mixed_song = SongClass(-1, is_mix=True)
self.mixed_song.fingerprint(update=True)
all_songs_data.pop(-1)
difference = []
#print(len(SongClass.processed_songs))
similarity_output = []
for i in range(len(processed_songs)):
#difference.append(1-hamming(str(processed_songs[i][1]) - str(self.mixed_song.hashed))*len(str(self.mixed_song.hashed)))
# difference.append(SequenceMatcher(None,str(mixed_song.hashed),str(SongClass.processed_songs[i][1])).ratio()*100)
difference.append((1 - (processed_songs[i][1] - self.mixed_song.hashed) / 64.0) * 100)
# difference = weight(difference)
for i in range(len(difference)):
# j=3*i
similarity_output.append((processed_songs[i][0][7:],processed_songs[i][0][0:7], difference[i]))
print('list',similarity_output)
df = pd.DataFrame(similarity_output, columns=['Song', 'Group', 'Similarity'])
df = df.sort_values(by='Similarity', ascending=False)
print(df)
return df
# similarity = [y for x in (SongClass.processed_songs[0],difference) for y in x]
# print('similarity list: ',similarity)
def weight(list):
x=[]
print('inside weight: ',len(list))
for i in range(0,len(list),3):
x.append(np.average([list[i],list[i+1],list[i+2]],weights=[1,1,1]))
print(len(x),'len of x')
return x