Skip to content

Commit 1cf1a45

Browse files
srousseyxenova
andauthored
Add option to skip special tokens in TextStreamer (#1139)
* Add option to skip special tokens in TextStreamer to be like WhisperTextStreamer * Re-order decode kwargs --------- Co-authored-by: Joshua Lochner <[email protected]>
1 parent a938a56 commit 1cf1a45

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/generation/streamers.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export class TextStreamer extends BaseStreamer {
3737
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
3838
* @param {Object} options
3939
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
40+
* @param {boolean} [options.skip_special_tokens=true] Whether to skip special tokens when decoding
4041
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
4142
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
4243
* @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
@@ -45,6 +46,7 @@ export class TextStreamer extends BaseStreamer {
4546
skip_prompt = false,
4647
callback_function = null,
4748
token_callback_function = null,
49+
skip_special_tokens = true,
4850
decode_kwargs = {},
4951
...kwargs
5052
} = {}) {
@@ -53,7 +55,7 @@ export class TextStreamer extends BaseStreamer {
5355
this.skip_prompt = skip_prompt;
5456
this.callback_function = callback_function ?? stdout_write;
5557
this.token_callback_function = token_callback_function;
56-
this.decode_kwargs = { ...decode_kwargs, ...kwargs };
58+
this.decode_kwargs = { skip_special_tokens, ...decode_kwargs, ...kwargs };
5759

5860
// variables used in the streaming process
5961
this.token_cache = [];
@@ -169,9 +171,10 @@ export class WhisperTextStreamer extends TextStreamer {
169171
} = {}) {
170172
super(tokenizer, {
171173
skip_prompt,
174+
skip_special_tokens,
172175
callback_function,
173176
token_callback_function,
174-
decode_kwargs: { skip_special_tokens, ...decode_kwargs },
177+
decode_kwargs,
175178
});
176179
this.timestamp_begin = tokenizer.timestamp_begin;
177180

0 commit comments

Comments
 (0)