forked from ideo/LaughDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
85 lines (67 loc) · 2.57 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from pydub import AudioSegment
import h5py
import youtube_dl #pip install youtube_dl
import webvtt #pip install webvtt-py
import numpy as np
import os
import youtube_dl
def sound_slice_generator(sound_path, clipsize=1000, sample_rate=48000):
'''
Generates np array value from sound clip to feed into VGGish Embedder
Define a clipsize and use clipsize/2 as the lag.
|--|--|--|--|--|--|--|--| clipsize/2
|-----| idx = 1
|-----| idx = 2
|-----| idx = 3
|-----| idx = 4
|-----| idx = 5
so each idx represents (idx-1) * (clipsize/2) start time of each array
e.g. idx = 3 represents (3-1) * (.5s / 2) = (2) * (.25s) = 0.5s start time
input:
list_sounds: generator of slices of sound clip
clipsize: size of each clip to run inference (in ms)
returns:
generator to feed into vggish embedder
'''
sound = AudioSegment.from_file(sound_path)
sound = sound.set_frame_rate(sample_rate)
sound = sound.set_channels(1)
sound = sound.set_sample_width(2)
print('No of sound slices', sound.duration_seconds * 1000 / clipsize * 2)
step = int(clipsize/2)
list_sounds = sound[::step] # generate clipsize/2 values of the clip
prev_iter_value = None
for idx, v in enumerate(list_sounds):
if idx == 0:
prev_iter_value = v
continue
overlapped = prev_iter_value + v # combine current
samples = overlapped.get_array_of_samples()
np_samples = np.array(samples)
s_reshaped = np_samples.reshape((-1,1))
prev_iter_value = v
#print(idx, s_reshaped.shape)
yield s_reshaped, v.frame_rate
def save_embeddings_hdf(path_file, list_of_embeddings):
'''
Saves list of embeddings for audio files
'''
with h5py.File(path_file, 'w', libver='latest') as f: # use 'latest' for performance
for idx, v in list_of_embeddings:
dset = f.create_dataset(str(idx), data=v, compression='gzip', compression_opts=9)
def download_audio_subtitle(url):
filename = None
def my_hook(d):
if d['status'] == 'finished':
filename = d['filename']
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': '%(title)s.%(ext)s',
'writesubtitles': True,
'writeautomaticsub': True,
'quiet': True,
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url]) # Download into the current working directory
return filename