Skip to content

Commit 98a6fac

Browse files
albertzSpotlight0xff
authored andcommitted
use PySoundFile instead of librosa.load (audioread)
beetbox/audioread#64 librosa/librosa#681 beetbox/audioread#62 beetbox/audioread#63
1 parent 9710c42 commit 98a6fac

File tree

2 files changed

+26
-6
lines changed

2 files changed

+26
-6
lines changed

GeneratingDataset.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,12 @@ class _NltkCorpusReaderDataset(CachedDataset2):
691691

692692

693693
class ExtractAudioFeatures:
694+
"""
695+
Currently uses librosa to extract MFCC features.
696+
We could also use python_speech_features.
697+
We could also add support e.g. to directly extract log-filterbanks or so.
698+
"""
699+
694700
def __init__(self,
695701
window_len=0.025, step_len=0.010,
696702
num_feature_filters=40, with_delta=False, norm_mean=None, norm_std_dev=None,
@@ -1524,9 +1530,6 @@ def __init__(self, path, prefix, bpe, audio, partition_epoch=None, fixed_random_
15241530
self.prefix = prefix
15251531
assert prefix in ["train", "dev", "eval"]
15261532
assert os.path.exists(path + "/train-clean-100")
1527-
import Util
1528-
Util.monkeyfix_glib()
1529-
Util.monkeypatch_audioread()
15301533
self.bpe = BytePairEncoding(**bpe)
15311534
self.labels = self.bpe.labels
15321535
self._fixed_random_seed = fixed_random_seed
@@ -1619,13 +1622,20 @@ def _collect_single_seq(self, seq_idx):
16191622
:param int seq_idx:
16201623
:rtype: DatasetSeq
16211624
"""
1625+
# Don't use librosa.load which internally uses audioread which would use Gstreamer as a backend,
1626+
# which has multiple issues:
1627+
# https://github.com/beetbox/audioread/issues/62
1628+
# https://github.com/beetbox/audioread/issues/63
1629+
# Instead, use PySoundFile, which is also faster. See here for discussions:
1630+
# https://github.com/beetbox/audioread/issues/64
1631+
# https://github.com/librosa/librosa/issues/681
16221632
import os
1623-
import librosa
1633+
import soundfile # pip install pysoundfile
16241634
subdir, speaker_id, chapter_id, seq_id = self._reference_seq_order[self._get_ref_seq_idx(seq_idx)]
16251635
audio_fn = "%(p)s/%(sd)s/%(sp)i/%(ch)i/%(sp)i-%(ch)i-%(i)04i.flac" % {
16261636
"p": self.path, "sd": subdir, "sp": speaker_id, "ch": chapter_id, "i": seq_id}
16271637
assert os.path.exists(audio_fn)
1628-
audio, sample_rate = librosa.load(audio_fn, sr=None)
1638+
audio, sample_rate = soundfile.read(audio_fn)
16291639
features = self.feature_extractor.get_audio_features(audio=audio, sample_rate=sample_rate)
16301640
targets_txt = self.transs[(subdir, speaker_id, chapter_id, seq_id)]
16311641
targets = numpy.array(self.bpe.get_seq(targets_txt), dtype="int32")

Util.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -2570,8 +2570,8 @@ def monkeyfix_glib():
25702570
"""
25712571
Fixes some stupid bugs such that SIGINT is not working.
25722572
This is used by audioread, and indirectly by librosa for loading audio.
2573-
25742573
https://stackoverflow.com/questions/16410852/
2574+
See also :func:`monkeypatch_audioread`.
25752575
"""
25762576
try:
25772577
import gi
@@ -2593,6 +2593,16 @@ def monkeypatch_audioread():
25932593
audioread does not behave optimal in some cases.
25942594
E.g. each call to _ca_available() takes quite long because of the ctypes.util.find_library usage.
25952595
We will patch this.
2596+
2597+
However, the recommendation would be to not use audioread (librosa.load).
2598+
audioread uses Gstreamer as a backend by default currently (on Linux).
2599+
Gstreamer has multiple issues. See also :func:`monkeyfix_glib`, and here for discussion:
2600+
https://github.com/beetbox/audioread/issues/62
2601+
https://github.com/beetbox/audioread/issues/63
2602+
2603+
Instead, use PySoundFile, which is also faster. See here for discussions:
2604+
https://github.com/beetbox/audioread/issues/64
2605+
https://github.com/librosa/librosa/issues/681
25962606
"""
25972607
try:
25982608
import audioread

0 commit comments

Comments
 (0)