@@ -2651,24 +2651,46 @@ class PoolerConfig:
2651
2651
## for embeddings models
2652
2652
normalize : Optional [bool ] = None
2653
2653
"""
2654
- Whether to normalize the embeddings outputs.
2654
+ Whether to normalize the embeddings outputs. Defaults to True.
2655
2655
"""
2656
2656
dimensions : Optional [int ] = None
2657
2657
"""
2658
2658
Reduce the dimensions of embeddings if model
2659
- support matryoshka representation.
2659
+ support matryoshka representation. Defaults to None.
2660
+ """
2661
+ enable_chunked_processing : Optional [bool ] = None
2662
+ """
2663
+ Whether to enable chunked processing for long inputs that exceed the model's
2664
+ maximum position embeddings. When enabled, long inputs will be split into
2665
+ chunks, processed separately, and then aggregated using weighted averaging.
2666
+ This allows embedding models to handle arbitrarily long text without CUDA
2667
+ errors. Defaults to False.
2668
+ """
2669
+ max_embed_len : Optional [int ] = None
2670
+ """
2671
+ Maximum input length allowed for embedding generation. When set, allows
2672
+ inputs longer than max_embed_len to be accepted for embedding models.
2673
+ When an input exceeds max_embed_len, it will be handled according to
2674
+ the original max_model_len validation logic.
2675
+ Defaults to None (i.e. set to max_model_len).
2660
2676
"""
2661
2677
2662
2678
## for classification models
2663
2679
activation : Optional [bool ] = None
2664
2680
"""
2665
2681
Whether to apply activation function to the classification outputs.
2682
+ Defaults to True.
2683
+ """
2684
+ logit_bias : Optional [float ] = None
2685
+ """
2686
+ If provided, classification logit biases. Defaults to None.
2666
2687
"""
2667
2688
2668
2689
## for reward models
2669
2690
softmax : Optional [bool ] = None
2670
2691
"""
2671
2692
Whether to apply softmax to the reward outputs.
2693
+ Defaults to True.
2672
2694
"""
2673
2695
step_tag_id : Optional [int ] = None
2674
2696
"""
@@ -2683,25 +2705,6 @@ class PoolerConfig:
2683
2705
``math-shepherd-mistral-7b-prm`` model.
2684
2706
"""
2685
2707
2686
- enable_chunked_processing : Optional [bool ] = None
2687
- """
2688
- Whether to enable chunked processing for long inputs that exceed the model's
2689
- maximum position embeddings. When enabled, long inputs will be split into
2690
- chunks, processed separately, and then aggregated using weighted averaging.
2691
- This allows embedding models to handle arbitrarily long text without CUDA
2692
- errors. Defaults to False.
2693
- """
2694
-
2695
- max_embed_len : Optional [int ] = None
2696
- """
2697
- Maximum input length allowed for embedding generation. When set, allows
2698
- inputs longer than max_embed_len to be accepted for embedding models.
2699
- This parameter enables accepting long inputs without requiring
2700
- VLLM_ALLOW_LONG_MAX_MODEL_LEN environment variable. When an input exceeds
2701
- max_embed_len, it will be handled according to the original max_model_len
2702
- validation logic. Defaults to None (i.e. set to max_model_len).
2703
- """
2704
-
2705
2708
def compute_hash (self ) -> str :
2706
2709
"""
2707
2710
WARNING: Whenever a new field is added to this config,
0 commit comments