forked from Plachtaa/VITS-fast-fine-tuning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemucs_denoise.py
31 lines (29 loc) · 1.34 KB
/
demucs_denoise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import torch
import torchaudio
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--denoise_user', type=bool, default=False,
help='whether to denoise user recorded voice')
parser.add_argument('--denoise_character', type=bool, default=False,
help='whether to denoise uploaded character voice')
args = parser.parse_args()
if args.denoise_user:
audio_dir = "./user_voice/"
wavfiles = []
for filename in list(os.walk(audio_dir))[0][2]:
if filename.endswith(".wav"):
wavfiles.append(filename)
# denoise with demucs
for i, wavfile in enumerate(wavfiles):
os.system(f"demucs --two-stems=vocals {audio_dir}{wavfile}")
# read & store the denoised vocals back
for wavfile in wavfiles:
i = wavfile.strip(".wav")
wav, sr = torchaudio.load(f"./separated/htdemucs/{i}/vocals.wav", frame_offset=0, num_frames=-1, normalize=True, channels_first=True)
# merge two channels into one
wav = wav.mean(dim=0).unsqueeze(0)
if sr != 22050:
wav = torchaudio.transforms.Resample(orig_freq=sr, new_freq=22050)(wav)
torchaudio.save(f"./user_voice/{i}.wav", wav, 22050, channels_first=True)