|
22 | 22 | # See the License for the specific language governing permissions and
|
23 | 23 | # limitations under the License.
|
24 | 24 | """Inference-only Qwen2-VL model compatible with HuggingFace weights."""
|
25 |
| -from array import array |
26 | 25 | from functools import lru_cache, partial
|
27 | 26 | from typing import (Iterable, List, Mapping, Optional, Tuple, Type, TypedDict,
|
28 | 27 | Union)
|
|
66 | 65 | from vllm.multimodal.base import MultiModalData
|
67 | 66 | from vllm.multimodal.image import cached_get_image_processor
|
68 | 67 | from vllm.platforms import current_platform
|
69 |
| -from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE, IntermediateTensors, |
70 |
| - SequenceData) |
| 68 | +from vllm.sequence import IntermediateTensors, SequenceData |
71 | 69 | from vllm.transformers_utils.processor import get_processor
|
72 | 70 |
|
73 | 71 | logger = init_logger(__name__)
|
@@ -681,15 +679,14 @@ def dummy_data_for_qwen2_vl(
|
681 | 679 | "--limit-mm-per-prompt.")
|
682 | 680 |
|
683 | 681 | hf_config = ctx.get_hf_config(Qwen2VLConfig)
|
684 |
| - token_ids = array(VLLM_TOKEN_ID_ARRAY_TYPE, |
685 |
| - [hf_config.vision_start_token_id]) |
686 |
| - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
687 |
| - [hf_config.image_token_id]) * max_llm_image_tokens |
688 |
| - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
689 |
| - [hf_config.vision_end_token_id]) |
690 |
| - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
691 |
| - [0]) * (seq_len - max_llm_image_tokens - 2) |
692 |
| - dummy_seqdata = SequenceData(token_ids) |
| 682 | + |
| 683 | + dummy_seqdata = SequenceData.from_token_counts( |
| 684 | + (hf_config.vision_start_token_id, 1), |
| 685 | + (hf_config.image_token_id, max_llm_image_tokens), |
| 686 | + (hf_config.vision_end_token_id, 1), |
| 687 | + (0, seq_len - max_llm_image_tokens - 2), |
| 688 | + ) |
| 689 | + |
693 | 690 | dummy_image = Image.new("RGB", (max_resized_width, max_resized_height),
|
694 | 691 | color=0)
|
695 | 692 |
|
|
0 commit comments