Skip to content

Commit 2f680ee

Browse files
committed
Create get_chat_template helper function
1 parent 93b25fb commit 2f680ee

File tree

1 file changed

+62
-33
lines changed

1 file changed

+62
-33
lines changed

src/tokenizers.js

+62-33
Original file line numberDiff line numberDiff line change
@@ -3057,6 +3057,67 @@ export class PreTrainedTokenizer extends Callable {
30573057

30583058
return decoded;
30593059
}
3060+
3061+
/**
3062+
* Retrieve the chat template string used for tokenizing chat messages. This template is used
3063+
* internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
3064+
* template for better generation tracking.
3065+
*
3066+
* @param {Object} options An optional object containing the following properties:
3067+
* @param {string} [options.chat_template=null]
3068+
* A Jinja template or the name of a template to use for this conversion.
3069+
* It is usually not necessary to pass anything to this argument,
3070+
* as the model's template will be used by default.
3071+
* @param {Object[]} [options.tools=null]
3072+
* A list of tools (callable functions) that will be accessible to the model. If the template does not
3073+
* support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
3074+
* giving the name, description and argument types for the tool. See our
3075+
* [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
3076+
* for more information.
3077+
* @returns {string} The chat template string.
3078+
*/
3079+
get_chat_template({
3080+
chat_template = null,
3081+
tools = null,
3082+
} = {}) {
3083+
3084+
// First, handle the cases when the model has a dict of multiple templates
3085+
if (this.chat_template && typeof this.chat_template === 'object') {
3086+
const template_dict = this.chat_template;
3087+
3088+
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
3089+
// The user can pass the name of a template to the chat template argument instead of an entire template
3090+
chat_template = template_dict[chat_template];
3091+
} else if (chat_template === null) {
3092+
if (tools !== null && 'tool_use' in template_dict) {
3093+
chat_template = template_dict['tool_use'];
3094+
} else if ('default' in template_dict) {
3095+
chat_template = template_dict['default'];
3096+
} else {
3097+
throw Error(
3098+
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
3099+
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
3100+
`template names are ${Object.keys(template_dict).sort()}.`
3101+
)
3102+
}
3103+
}
3104+
} else if (chat_template === null) {
3105+
// These are the cases when the model has a single template
3106+
// priority: `chat_template` argument > `tokenizer.chat_template`
3107+
if (this.chat_template) {
3108+
chat_template = this.chat_template;
3109+
} else {
3110+
throw Error(
3111+
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
3112+
"argument was passed! For information about writing templates and setting the " +
3113+
"tokenizer.chat_template attribute, please see the documentation at " +
3114+
"https://huggingface.co/docs/transformers/main/en/chat_templating"
3115+
)
3116+
}
3117+
}
3118+
return chat_template;
3119+
}
3120+
30603121
/**
30613122
* Converts a list of message objects with `"role"` and `"content"` keys to a list of token
30623123
* ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
@@ -3130,40 +3191,8 @@ export class PreTrainedTokenizer extends Callable {
31303191
...kwargs
31313192
} = {}) {
31323193

3133-
// First, handle the cases when the model has a dict of multiple templates
3134-
if (this.chat_template && typeof this.chat_template === 'object') {
3135-
const template_dict = this.chat_template;
3194+
chat_template = this.get_chat_template({ chat_template, tools });
31363195

3137-
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
3138-
// The user can pass the name of a template to the chat template argument instead of an entire template
3139-
chat_template = template_dict[chat_template];
3140-
} else if (chat_template === null) {
3141-
if (tools !== null && 'tool_use' in template_dict) {
3142-
chat_template = template_dict['tool_use'];
3143-
} else if ('default' in template_dict) {
3144-
chat_template = template_dict['default'];
3145-
} else {
3146-
throw Error(
3147-
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
3148-
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
3149-
`template names are ${Object.keys(template_dict).sort()}.`
3150-
)
3151-
}
3152-
}
3153-
} else {
3154-
// These are the cases when the model has a single template
3155-
// priority: `chat_template` argument > `tokenizer.chat_template`
3156-
if (this.chat_template) {
3157-
chat_template = this.chat_template;
3158-
} else {
3159-
throw Error(
3160-
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
3161-
"argument was passed! For information about writing templates and setting the " +
3162-
"tokenizer.chat_template attribute, please see the documentation at " +
3163-
"https://huggingface.co/docs/transformers/main/en/chat_templating"
3164-
)
3165-
}
3166-
}
31673196
if (typeof chat_template !== 'string') {
31683197
throw Error(`chat_template must be a string, but got ${typeof chat_template}`);
31693198
}

0 commit comments

Comments
 (0)