diff --git a/lmdeploy/serve/qos_engine/qos_engine.py b/lmdeploy/serve/qos_engine/qos_engine.py index 1a311bc0f2..df1bf8e413 100644 --- a/lmdeploy/serve/qos_engine/qos_engine.py +++ b/lmdeploy/serve/qos_engine/qos_engine.py @@ -79,7 +79,7 @@ async def generate(self, request): result_generator = self.engine.generate( request.prompt[i], request.session_id + i, - True, # always use stream to enable batching + stream_response=True, # always use stream for batching sequence_start=True, sequence_end=True, request_output_len=request.max_tokens @@ -119,7 +119,7 @@ async def generate(self, request): result_generator = self.engine.generate( request.messages, request.session_id, - True, # always use stream to enable batching + stream_response=True, # always use stream to enable batching sequence_start=True, sequence_end=True, request_output_len=request.max_tokens