@@ -3704,6 +3704,144 @@ class MiniCPMv26ChatHandler(Llava15ChatHandler):
37043704 )
37053705
37063706
3707+ class MiniCPMv45ChatHandler (Llava15ChatHandler ):
3708+ """
3709+ Handler for MiniCPM-V 4.5 models.
3710+
3711+ Supports:
3712+ - Multi-step tool calls with <tool_call> and <tool_response> XML tags.
3713+ - Integrated reasoning (thinking) process with <think> tags.
3714+ - Specialized system prompt handling with tool definitions.
3715+ - Global image numbering for multi-image processing.
3716+ """
3717+
3718+ # Model specific control tokens
3719+ MINICPMV_BOS_TOKEN = "<|im_start|>"
3720+ MINICPMV_EOS_TOKEN = "<|im_end|>"
3721+ MINICPMV_PAD_TOKEN = "<|endoftext|>"
3722+
3723+ # Image placeholder tags
3724+ MINICPMV_IMAGE_START_TOKEN = "<image>"
3725+ MINICPMV_IMAGE_END_TOKEN = "</image>"
3726+ MINICPMV_IMAGE_ID_START_TOKEN = "<image_id>"
3727+ MINICPMV_IMAGE_ID_END_TOKEN = "</image_id>"
3728+
3729+ CHAT_FORMAT = (
3730+ # --- 1. First System Message & Tools Definitions ---
3731+ "{%- if tools %}"
3732+ "{{- '" + MINICPMV_BOS_TOKEN + "system\\ n' }}"
3733+ "{%- if messages[0].role == 'system' %}{{- messages[0].content + '\\ n\\ n' }}{%- endif %}"
3734+ "{{- '# Tools\\ n\\ nYou may call one or more functions to assist with the user query.\\ n\\ n' }}"
3735+ "{{- 'You are provided with function signatures within <tools></tools> XML tags:\\ n<tools>' }}"
3736+ "{%- for tool in tools %}{{- '\\ n' + (tool | tojson) }}{%- endfor %}"
3737+ "{{- '\\ n</tools>\\ n\\ nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\ n<tool_call>\\ n{\" name\" : <function-name>, \" arguments\" : <args-json-object>}\\ n</tool_call>" + MINICPMV_EOS_TOKEN + "\\ n' }}"
3738+ "{%- elif messages[0].role == 'system' %}"
3739+ "{{- '" + MINICPMV_BOS_TOKEN + "system\\ n' + messages[0].content + '" + MINICPMV_EOS_TOKEN + "\\ n' }}"
3740+ "{%- endif %}"
3741+
3742+ # --- 2. Message Stream Processing ---
3743+ "{% set image_count = namespace(value=0) %}"
3744+ "{%- for message in messages %}"
3745+ # --- Unified Role Handling (User, Assistant, and subsequent Systems) ---
3746+ "{%- if message.role in ['user', 'assistant'] or (message.role == 'system' and not loop.first) %}"
3747+ "{{- '" + MINICPMV_BOS_TOKEN + "' + message.role + '\\ n' }}"
3748+
3749+ "{%- set content = message.content %}"
3750+ "{%- if content is not string %}"
3751+ "{%- set ns = namespace(content_str='') %}"
3752+ "{%- for item in content %}"
3753+ # --- Explicit image_url type and value checking ---
3754+ "{%- if item.type == 'image_url' %}"
3755+ "{%- set image_url = item.image_url if item.image_url is string else item.image_url.url %}"
3756+ "{%- set image_count.value = image_count.value + 1 %}"
3757+ # Format: <image_id>N</image_id>: <image>IMAGE_URL</image>
3758+ "{%- set ns.content_str = ns.content_str + '<image_id>' + (image_count.value | string) + '</image_id>: <image>' + image_url + '</image>' %}"
3759+ "{%- elif item.type == 'text' %}"
3760+ "{%- set ns.content_str = ns.content_str + item.text %}"
3761+ "{%- endif %}"
3762+ "{%- endfor %}"
3763+ "{%- set content = ns.content_str %}"
3764+ "{%- endif %}"
3765+
3766+ "{{- content -}}"
3767+
3768+ # Append tool_calls to assistant messages if they exist
3769+ "{%- if message.role == 'assistant' and message.tool_calls %}"
3770+ "{%- for tool_call in message.tool_calls %}"
3771+ "{%- set tc = tool_call.function if tool_call.function else tool_call %}"
3772+ "{{- '\\ n<tool_call>\\ n{\" name\" : \" ' + tc.name + '\" , \" arguments\" : ' }}"
3773+ "{{- tc.arguments if tc.arguments is string else tc.arguments | tojson }}"
3774+ "{{- '}\\ n</tool_call>' }}"
3775+ "{%- endfor %}"
3776+ "{%- endif %}"
3777+ "{{- '" + MINICPMV_EOS_TOKEN + "\\ n' }}"
3778+
3779+ # --- Specialized Tool Response Handling ---
3780+ # Group consecutive tool responses under a single user-like block
3781+ "{%- elif message.role == 'tool' %}"
3782+ "{%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}"
3783+ "{{- '" + MINICPMV_BOS_TOKEN + "user' }}"
3784+ "{%- endif %}"
3785+ "{{- '\\ n<tool_response>\\ n' + message.content + '\\ n</tool_response>' }}"
3786+ "{%- if loop.last or (messages[loop.index0 + 1].role != 'tool') %}"
3787+ "{{- '" + MINICPMV_EOS_TOKEN + "\\ n' }}"
3788+ "{%- endif %}"
3789+ "{%- endif %}"
3790+ "{%- endfor %}"
3791+
3792+ # --- 3. Generation Prompt ---
3793+ "{%- if add_generation_prompt %}"
3794+ "{{- '" + MINICPMV_BOS_TOKEN + "assistant\\ n' }}"
3795+ # Handle thinking/reasoning block visibility based on configuration
3796+ "{%- if enable_thinking is defined and enable_thinking is false %}"
3797+ "{{- '<think>\\ n\\ n</think>\\ n\\ n' }}"
3798+ "{%- elif enable_thinking is defined and enable_thinking is true %}"
3799+ "{{- '<think>\\ n' }}"
3800+ "{%- endif %}"
3801+ "{%- endif %}"
3802+ )
3803+
3804+ def __init__ (self , enable_thinking : bool = True , ** kwargs ):
3805+ """
3806+ Initializes the MiniCPM-V 4.5 Handler.
3807+
3808+ Args:
3809+ enable_thinking (bool): If True, model generates reasoning before the final answer.
3810+ **kwargs: Additional arguments for the base Llava15ChatHandler.
3811+ """
3812+ self .enable_thinking = enable_thinking
3813+ super ().__init__ (** kwargs )
3814+
3815+ def __call__ (self , ** kwargs ):
3816+ # Inject thinking control flag into the template
3817+ self .extra_template_arguments ["enable_thinking" ] = self .enable_thinking
3818+
3819+ # Set stop token patch
3820+ kwargs ['stop' ] = [self .MINICPMV_EOS_TOKEN , self .MINICPMV_PAD_TOKEN ]
3821+
3822+ llama = kwargs ['llama' ]
3823+ llama .reset ()
3824+ llama ._ctx .memory_clear (True )
3825+ llama .n_tokens = 0
3826+
3827+ if hasattr (llama , 'input_ids' ):
3828+ llama .input_ids .fill (0 )
3829+
3830+ if hasattr (self , '_last_image_embed' ):
3831+ self ._last_image_embed = None
3832+ self ._last_image_hash = None
3833+
3834+ if self .verbose :
3835+ messages = kwargs .get ('messages' , [])
3836+ try :
3837+ image_count = len (self .get_image_urls (messages ))
3838+ print (f"MiniCPMV45ChatHandler(enable_thinking={ self .enable_thinking } ) - Processing { image_count } images" , file = sys .stderr )
3839+ except Exception :
3840+ print (f"MiniCPMV45ChatHandler - Cleared state" , file = sys .stderr )
3841+
3842+ return super ().__call__ (** kwargs )
3843+
3844+
37073845class Gemma3ChatHandler (Llava15ChatHandler ):
37083846
37093847 GEMMA3_BOI_TOKEN = "<start_of_image>"
0 commit comments