|
| 1 | +from parameterized import parameterized |
| 2 | + |
| 3 | +from torchaudio.io import AudioEffector |
| 4 | +from torchaudio_unittest.common_utils import get_sinusoid, skipIfNoFFmpeg, TorchaudioTestCase |
| 5 | + |
| 6 | +from .common import lt42 |
| 7 | + |
| 8 | + |
| 9 | +@skipIfNoFFmpeg |
| 10 | +class EffectorTest(TorchaudioTestCase): |
| 11 | + def test_null(self): |
| 12 | + """No effect and codec will return the same result""" |
| 13 | + sample_rate = 8000 |
| 14 | + frames_per_chunk = 256 |
| 15 | + |
| 16 | + effector = AudioEffector(effect=None, format=None) |
| 17 | + original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False) |
| 18 | + |
| 19 | + # one-go |
| 20 | + output = effector.apply(original, sample_rate) |
| 21 | + self.assertEqual(original, output) |
| 22 | + # streaming |
| 23 | + for i, chunk in enumerate(effector.stream(original, sample_rate, frames_per_chunk)): |
| 24 | + start = i * frames_per_chunk |
| 25 | + end = (i + 1) * frames_per_chunk |
| 26 | + self.assertEqual(original[start:end, :], chunk) |
| 27 | + |
| 28 | + @parameterized.expand( |
| 29 | + [ |
| 30 | + ("ogg", "flac"), # flac only supports s16 and s32 |
| 31 | + ("ogg", "opus"), # opus only supports 48k Hz |
| 32 | + ("ogg", "vorbis"), # vorbis only supports stereo |
| 33 | + ("wav", None), |
| 34 | + ("wav", "pcm_u8"), |
| 35 | + ("mp3", None), |
| 36 | + ] |
| 37 | + ) |
| 38 | + def test_formats(self, format, encoder): |
| 39 | + """Formats (some with restrictions) just work without an issue in effector""" |
| 40 | + sample_rate = 8000 |
| 41 | + |
| 42 | + effector = AudioEffector(format=format, encoder=encoder) |
| 43 | + original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False) |
| 44 | + |
| 45 | + output = effector.apply(original, sample_rate) |
| 46 | + |
| 47 | + # On 4.1 OPUS produces 8020 samples (extra 20) |
| 48 | + # this has been fixed on 4.2+ |
| 49 | + if encoder == "opus" and lt42(): |
| 50 | + return |
| 51 | + |
| 52 | + self.assertEqual(original.shape, output.shape) |
| 53 | + |
| 54 | + # Note |
| 55 | + # MP3 adds padding which cannot be removed when the encoded data is written to |
| 56 | + # file-like object without seek method. |
| 57 | + # The number of padding is retrievable as `AVCoedcContext::initial_padding` |
| 58 | + # https://ffmpeg.org/doxygen/4.1/structAVCodecContext.html#a8f95550ce04f236e9915516d04d3d1ab |
| 59 | + # but this is not exposed yet. |
| 60 | + # These "priming" samples have negative time stamp, so we can also add logic |
| 61 | + # to discard them at decoding, however, as far as I checked, when data is loaded |
| 62 | + # with StreamReader, the time stamp is reset. I tried options like avoid_negative_ts, |
| 63 | + # https://ffmpeg.org/ffmpeg-formats.html |
| 64 | + # but it made no difference. Perhaps this is because the information about negative |
| 65 | + # timestamp is only available at encoding side, and it presumably is written to |
| 66 | + # header file, but it is not happening somehow with file-like object. |
| 67 | + # Need to investigate more to remove MP3 padding |
| 68 | + if format == "mp3": |
| 69 | + return |
| 70 | + |
| 71 | + for chunk in effector.stream(original, sample_rate, frames_per_chunk=original.size(0)): |
| 72 | + self.assertEqual(original.shape, chunk.shape) |
| 73 | + |
| 74 | + @parameterized.expand([("loudnorm=I=-16:LRA=11:TP=-1.5",), ("volume=2",)]) |
| 75 | + def test_effect(self, effect): |
| 76 | + sample_rate = 8000 |
| 77 | + |
| 78 | + effector = AudioEffector(effect=effect) |
| 79 | + original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False) |
| 80 | + |
| 81 | + output = effector.apply(original, sample_rate) |
| 82 | + self.assertEqual(original.shape, output.shape) |
0 commit comments