-
-
Notifications
You must be signed in to change notification settings - Fork 8.8k
Description
I use grpc server multithreading to do infer,but get error as fllowing
File "/usr/local/lib/python3.8/site-packages/vllm/entrypoints/llm.py", line 130, in generate
return self._run_engine(use_tqdm)
File "/usr/local/lib/python3.8/site-packages/vllm/entrypoints/llm.py", line 150, in _run_engine
step_outputs = self.llm_engine.step()
File "/usr/local/lib/python3.8/site-packages/vllm/engine/llm_engine.py", line 559, in step
return self._process_model_outputs(output, scheduler_outputs)
File "/usr/local/lib/python3.8/site-packages/vllm/engine/llm_engine.py", line 518, in _process_model_outputs
self._process_sequence_group_samples(seq_group, samples)
File "/usr/local/lib/python3.8/site-packages/vllm/engine/llm_engine.py", line 357, in _process_sequence_group_samples
parent_child_dict[sample.parent_seq_id].append(sample)
KeyError: 513
port = '8500'
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
predict_pb2_grpc.add_PredictServiceServicer_to_server(Predict(), server)
server.add_insecure_port('[::]:' + port)
server.start()
logging.info("Starting server on %s", port)
server.wait_for_termination()