File tree Expand file tree Collapse file tree 3 files changed +77
-1
lines changed Expand file tree Collapse file tree 3 files changed +77
-1
lines changed Original file line number Diff line number Diff line change
1
+ import type { PyodideInterface } from "pyodide" ;
2
+ import { beforeEach , describe , it , expect } from "vitest" ;
3
+ import { setupPyodideForTest } from "./utils" ;
4
+
5
+ import { downloadFile } from "./utils" ;
6
+
7
+ describe ( "read_audio() and ASR" , ( ) => {
8
+ let pyodide : PyodideInterface ;
9
+
10
+ beforeEach ( async ( ) => {
11
+ pyodide = await setupPyodideForTest ( [ "scipy" ] ) ;
12
+ } ) ;
13
+
14
+ it ( "can read an audio file from a local file" , async ( ) => {
15
+ await downloadFile ( pyodide , "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav" , "/tmp/jfk.wav" )
16
+
17
+ await pyodide . runPythonAsync ( `
18
+ from transformers_js_py import import_transformers_js, read_audio
19
+ import numpy as np
20
+
21
+ transformers = await import_transformers_js()
22
+ pipeline = transformers.pipeline
23
+ pipe = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en')
24
+
25
+ audio = read_audio("/tmp/jfk.wav", 16000)
26
+ result = await pipe(audio)
27
+ text = result["text"]
28
+ ` ) ;
29
+ const text = await pyodide . globals . get ( "text" ) ;
30
+ expect ( text ) . toEqual ( " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." ) ;
31
+ } ) ;
32
+ } ) ;
Original file line number Diff line number Diff line change
1
+ from .audio import read_audio
1
2
from .proxies import import_transformers_js
2
3
from .url import as_url
3
4
4
- __all__ = ["as_url" , "import_transformers_js" ]
5
+ __all__ = ["as_url" , "read_audio" , " import_transformers_js" ]
Original file line number Diff line number Diff line change
1
+ try :
2
+ import numpy as np
3
+ except ImportError :
4
+ np = None # type: ignore
5
+
6
+
7
+ def read_audio (filename , sampling_rate : int ) -> "np.ndarray" :
8
+ # Refs:
9
+ # * https://github.com/xenova/transformers.js/blob/2.15.1/src/utils/audio.js#L42-L77
10
+ # * https://huggingface.co/docs/transformers.js/guides/node-audio-processing
11
+
12
+ try :
13
+ import numpy as np
14
+ import scipy # type: ignore
15
+ except ImportError :
16
+ raise ImportError (
17
+ "You need to have `numpy` and `scipy` installed to use this feature."
18
+ )
19
+
20
+ original_sample_rate , samples = scipy .io .wavfile .read (filename , mmap = False )
21
+
22
+ # Ensure samples are float32
23
+ # Ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.read.html # noqa: E501
24
+ if samples .dtype == np .int16 :
25
+ samples = samples .astype (np .float32 ) / 32768.0
26
+ elif samples .dtype == np .int32 :
27
+ samples = samples .astype (np .float32 ) / 2147483648.0
28
+ elif samples .dtype == np .uint8 :
29
+ samples = (samples .astype (np .float32 ) - 128.0 ) / 128.0
30
+
31
+ if original_sample_rate != sampling_rate :
32
+ samples = scipy .signal .resample (
33
+ samples , int (len (samples ) * sampling_rate / original_sample_rate )
34
+ )
35
+
36
+ if samples .ndim > 1 and samples .shape [1 ] > 1 :
37
+ SCALING_FACTOR = np .sqrt (2 )
38
+ # Merge channels (into first channel to save memory)
39
+ left = samples [:, 0 ]
40
+ right = samples [:, 1 ]
41
+ samples = SCALING_FACTOR * (left + right ) / 2
42
+
43
+ return samples
You can’t perform that action at this time.
0 commit comments