Skip to content

Commit dbd20fc

Browse files
feat(api): add wav and pcm to response_format (#1189)
1 parent 96fa995 commit dbd20fc

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

src/openai/resources/audio/speech.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def create(
4141
input: str,
4242
model: Union[str, Literal["tts-1", "tts-1-hd"]],
4343
voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
44-
response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
44+
response_format: Literal["mp3", "opus", "aac", "flac", "pcm", "wav"] | NotGiven = NOT_GIVEN,
4545
speed: float | NotGiven = NOT_GIVEN,
4646
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
4747
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -65,7 +65,11 @@ def create(
6565
available in the
6666
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
6767
68-
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
68+
response_format: The format to return audio in. Supported formats are `mp3`, `opus`, `aac`,
69+
`flac`, `pcm`, and `wav`.
70+
71+
The `pcm` audio format, similar to `wav` but without a header, utilizes a 24kHz
72+
sample rate, mono channel, and 16-bit depth in signed little-endian format.
6973
7074
speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
7175
the default.
@@ -113,7 +117,7 @@ async def create(
113117
input: str,
114118
model: Union[str, Literal["tts-1", "tts-1-hd"]],
115119
voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
116-
response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
120+
response_format: Literal["mp3", "opus", "aac", "flac", "pcm", "wav"] | NotGiven = NOT_GIVEN,
117121
speed: float | NotGiven = NOT_GIVEN,
118122
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
119123
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -137,7 +141,11 @@ async def create(
137141
available in the
138142
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
139143
140-
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
144+
response_format: The format to return audio in. Supported formats are `mp3`, `opus`, `aac`,
145+
`flac`, `pcm`, and `wav`.
146+
147+
The `pcm` audio format, similar to `wav` but without a header, utilizes a 24kHz
148+
sample rate, mono channel, and 16-bit depth in signed little-endian format.
141149
142150
speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
143151
the default.

src/openai/types/audio/speech_create_params.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,14 @@ class SpeechCreateParams(TypedDict, total=False):
2626
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
2727
"""
2828

29-
response_format: Literal["mp3", "opus", "aac", "flac"]
30-
"""The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`."""
29+
response_format: Literal["mp3", "opus", "aac", "flac", "pcm", "wav"]
30+
"""The format to return audio in.
31+
32+
Supported formats are `mp3`, `opus`, `aac`, `flac`, `pcm`, and `wav`.
33+
34+
The `pcm` audio format, similar to `wav` but without a header, utilizes a 24kHz
35+
sample rate, mono channel, and 16-bit depth in signed little-endian format.
36+
"""
3137

3238
speed: float
3339
"""The speed of the generated audio.

0 commit comments

Comments
 (0)