From 1b3f3ba108a1e50f797f0bd2433e5860f2696f24 Mon Sep 17 00:00:00 2001 From: Miquel Tubau Pires Date: Sun, 2 Feb 2020 20:52:59 +0100 Subject: [PATCH] data augmentation files uploaded --- scripts/augmentate_data.py | 88 ++++++++++++++++++++++++++++++++++++++ scripts/wav_functions.py | 80 ++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100755 scripts/augmentate_data.py create mode 100755 scripts/wav_functions.py diff --git a/scripts/augmentate_data.py b/scripts/augmentate_data.py new file mode 100755 index 00000000..8616e95b --- /dev/null +++ b/scripts/augmentate_data.py @@ -0,0 +1,88 @@ +import os +import argparse +import string, random +from shutil import copyfile +import scipy.io.wavfile as wavfile +from wav_functions import readwav + +printable = set(string.printable) + + +def augment_data(wav_data, factor, audio_seconds): + # get different audios until we reach self.data_augmentation + audios = [] + centers = [] + for id in range(0,factor): + # selecting a center randomly but taking into consideration there needs to be data enough "in the left" and "in the right" + samples_to_retain = audio_seconds * 16000 + valid_centers = range(0,len(wav_data))[samples_to_retain/2:-samples_to_retain/2] + center = random.choice(valid_centers) + + # if the center is already taken, let's find another one. + while center in centers: center = random.choice(valid_centers) + + centers.append(center) + audios.append(wav_data[center-samples_to_retain/2:center+samples_to_retain/2]) + + return audios + +if __name__=="__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("--folder_path", required = True, help='path of the folder where to take the data and augment') + parser.add_argument("--factor", required = True, help = 'data augmentation factor') + parser.add_argument('--output_folder', required = True, help = 'folder where to store the augmented data') + parser.add_argument('--audio_seconds', required = True, help='audio of the resulting augmented .wav files') + + args = parser.parse_args() + + youtubers = [youtuber for youtuber in os.listdir(args.folder_path) if not youtuber.startswith('.')] + + id_number = 0 + + for youtuber in youtubers: + + working_path = os.path.join(args.folder_path,youtuber) + + # creating if necessary a folder with the youtuber name in the output_folder + if not os.path.exists(os.path.join(args.output_folder,youtuber)): + os.makedirs(os.path.join(args.output_folder,youtuber)) + + audios = [audio for audio in os.listdir(working_path) if audio.endswith('.wav')] + + for audio in audios: + + number = audio.split('_')[-1][:-4] + + # getting the paths of the original audio and face + audio_path = os.path.join(working_path,audio) + corresponding_face = os.path.join(working_path,'cropped_face_frame_'+number+'.png') + + # reading the audio file and converting it into an array + fm, _, wav_data = readwav(audio_path) + + if fm != 16000: + raise ValueError('Sampling rate is expected to be 16 KHz!') + + if len(wav_data) < 16000 * int(args.audio_seconds): + raise ValueError('The original audio is shorter than the desired output') + + # obtaining the audios as a result of the data augmentation + wav_vectors = augment_data(wav_data, int(args.factor), int(args.audio_seconds)) + + for wav_vec in wav_vectors: + + new_path = os.path.join(args.output_folder,youtuber) + + # converting wav_vec into a .wav file and storing it + wav_file = wavfile.write(filename=os.path.join(new_path,'preprocessed_frame_'+str(id_number)+'.wav'), rate=16000, data=wav_vec) + new_face_path = os.path.join(new_path,'cropped_face_frame_'+str(id_number)+'.png') + + + # copying the faces from the older path to the newer + + copyfile(corresponding_face, new_face_path) + + # updating id_number to avoid overwritting files + id_number += 1 diff --git a/scripts/wav_functions.py b/scripts/wav_functions.py new file mode 100755 index 00000000..99540a92 --- /dev/null +++ b/scripts/wav_functions.py @@ -0,0 +1,80 @@ +# wavio.py +# Author: Warren Weckesser +# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause) + +import wave +import numpy as np + + +def _wav2array(nchannels, sampwidth, data): + """data must be the string containing the bytes from the wav file.""" + num_samples, remainder = divmod(len(data), sampwidth * nchannels) + if remainder > 0: + raise ValueError('The length of data is not a multiple of ' + 'sampwidth * num_channels.') + if sampwidth > 4: + raise ValueError("sampwidth must not be greater than 4.") + + if sampwidth == 3: + a = np.empty((num_samples, nchannels, 4), dtype=np.uint8) + raw_bytes = np.fromstring(data, dtype=np.uint8) + a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth) + a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255 + result = a.view('>> rate = 22050 # samples per second + >>> T = 3 # sample duration (seconds) + >>> f = 440.0 # sound frequency (Hz) + >>> t = np.linspace(0, T, T*rate, endpoint=False) + >>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t) + >>> writewav24("sine24.wav", rate, x) + """ + a32 = np.asarray(data, dtype=np.int32) + if a32.ndim == 1: + # Convert to a 2D array with a single column. + a32.shape = a32.shape + (1,) + # By shifting first 0 bits, then 8, then 16, the resulting output + # is 24 bit little-endian. + a8 = (a32.reshape(a32.shape + (1,)) >> np.array([0, 8, 16])) & 255 + wavdata = a8.astype(np.uint8).tostring() + + w = wave.open(filename, 'wb') + w.setnchannels(a32.shape[1]) + w.setsampwidth(3) + w.setframerate(rate) + w.writeframes(wavdata) + w.close() \ No newline at end of file