Skip to content

Commit 09cd766

Browse files
committed
add rescaling option
1 parent aeed225 commit 09cd766

File tree

4 files changed

+14
-0
lines changed

4 files changed

+14
-0
lines changed

hparams.py

+5
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@
116116
preemphasis=0.97,
117117
min_level_db=-100,
118118
ref_level_db=20,
119+
# whether to rescale waveform or not.
120+
# Let x is an input waveform, rescaled waveform y is given by:
121+
# y = x / np.abs(x).max() * rescaling_max
122+
rescaling=False,
123+
rescaling_max=0.999,
119124
# mel-spectrogram is normalized to [0, 1] for each utterance and clipping may
120125
# happen depends on min_level_db and ref_level_db, causing clipping noise.
121126
# If False, assertion is added to ensure no clipping happens.

jsut.py

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ def _process_utterance(out_dir, index, wav_path, text):
4444
else:
4545
wav, _ = librosa.effects.trim(wav, top_db=30)
4646

47+
if hparams.rescaling:
48+
wav = wav / np.abs(wav).max() * hparams.rescaling_max
49+
4750
# Compute the linear-scale spectrogram from the wav:
4851
spectrogram = audio.spectrogram(wav).astype(np.float32)
4952
n_frames = spectrogram.shape[1]

ljspeech.py

+3
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def _process_utterance(out_dir, index, wav_path, text):
5353
# Load the audio to a numpy array:
5454
wav = audio.load_wav(wav_path)
5555

56+
if hparams.rescaling:
57+
wav = wav / np.abs(wav).max() * hparams.rescaling_max
58+
5659
# Compute the linear-scale spectrogram from the wav:
5760
spectrogram = audio.spectrogram(wav).astype(np.float32)
5861
n_frames = spectrogram.shape[1]

vctk.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def _process_utterance(out_dir, index, speaker_id, wav_path, text):
6767
else:
6868
wav, _ = librosa.effects.trim(wav, top_db=15)
6969

70+
if hparams.rescaling:
71+
wav = wav / np.abs(wav).max() * hparams.rescaling_max
72+
7073
# Compute the linear-scale spectrogram from the wav:
7174
spectrogram = audio.spectrogram(wav).astype(np.float32)
7275
n_frames = spectrogram.shape[1]

0 commit comments

Comments
 (0)