|
32 | 32 |
|
33 | 33 | import librosa
|
34 | 34 | import numpy as np
|
35 |
| -import parselmouth |
36 | 35 | import torch
|
37 | 36 | import torch.nn.functional as F
|
38 | 37 | from scipy import ndimage
|
@@ -88,35 +87,7 @@ def estimate_pitch(wav, mel_len, method='pyin', normalize_mean=None,
|
88 | 87 | if type(normalize_std) is float or type(normalize_std) is list:
|
89 | 88 | normalize_std = torch.tensor(normalize_std)
|
90 | 89 |
|
91 |
| - if method == 'praat': |
92 |
| - |
93 |
| - snd = parselmouth.Sound(wav) |
94 |
| - pitch_mel = snd.to_pitch(time_step=snd.duration / (mel_len + 3) |
95 |
| - ).selected_array['frequency'] |
96 |
| - assert np.abs(mel_len - pitch_mel.shape[0]) <= 1.0 |
97 |
| - |
98 |
| - pitch_mel = torch.from_numpy(pitch_mel).unsqueeze(0) |
99 |
| - |
100 |
| - if n_formants > 1: |
101 |
| - formant = snd.to_formant_burg( |
102 |
| - time_step=snd.duration / (mel_len + 3)) |
103 |
| - formant_n_frames = formant.get_number_of_frames() |
104 |
| - assert np.abs(mel_len - formant_n_frames) <= 1.0 |
105 |
| - |
106 |
| - formants_mel = np.zeros((formant_n_frames + 1, n_formants - 1)) |
107 |
| - for i in range(1, formant_n_frames + 1): |
108 |
| - formants_mel[i] = np.asarray([ |
109 |
| - formant.get_value_at_time( |
110 |
| - formant_number=f, |
111 |
| - time=formant.get_time_from_frame_number(i)) |
112 |
| - for f in range(1, n_formants) |
113 |
| - ]) |
114 |
| - |
115 |
| - pitch_mel = torch.cat( |
116 |
| - [pitch_mel, torch.from_numpy(formants_mel).permute(1, 0)], |
117 |
| - dim=0) |
118 |
| - |
119 |
| - elif method == 'pyin': |
| 90 | + if method == 'pyin': |
120 | 91 |
|
121 | 92 | snd, sr = librosa.load(wav)
|
122 | 93 | pitch_mel, voiced_flag, voiced_probs = librosa.pyin(
|
@@ -181,7 +152,7 @@ def __init__(self,
|
181 | 152 | pitch_online_dir=None,
|
182 | 153 | betabinomial_online_dir=None,
|
183 | 154 | use_betabinomial_interpolator=True,
|
184 |
| - pitch_online_method='praat', |
| 155 | + pitch_online_method='pyin', |
185 | 156 | **ignored):
|
186 | 157 |
|
187 | 158 | # Expect a list of filenames
|
@@ -338,7 +309,7 @@ def get_pitch(self, index, mel_len=None):
|
338 | 309 | if cached_fpath.is_file():
|
339 | 310 | return torch.load(cached_fpath)
|
340 | 311 |
|
341 |
| - # No luck so far - calculate or replace with praat |
| 312 | + # No luck so far - calculate |
342 | 313 | wav = audiopath
|
343 | 314 | if not wav.endswith('.wav'):
|
344 | 315 | wav = re.sub('/mels/', '/wavs/', wav)
|
|
0 commit comments