diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
index 53a37db77c..329b9aee70 100644
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -589,15 +589,7 @@ async def generate(
                     yield GenOut(response, self.id2step[str(session_id)],
                                  len(input_ids), tokens, finish_reason, res,
                                  logprobs)
-                if outputs.status > ResponseType.FINISH:
-                    yield GenOut(
-                        response='internal error happened',
-                        history_token_len=self.id2step[str(session_id)],
-                        input_token_len=len(input_ids),
-                        generate_token_len=0,
-                        finish_reason='error',
-                        token_ids=[])
-                else:
+                if outputs.status <= ResponseType.FINISH:
                     finish_reason = 'length' \
                         if tokens >= gen_config.max_new_tokens else 'stop'
                     # utf-8 char at the end means it's a potential unfinished
@@ -607,6 +599,14 @@ async def generate(
                         response = ''
                     yield GenOut(response, self.id2step[str(session_id)],
                                  len(input_ids), tokens, finish_reason)
+                else:
+                    yield GenOut(
+                        response='internal error happened',
+                        history_token_len=self.id2step[str(session_id)],
+                        input_token_len=len(input_ids),
+                        generate_token_len=0,
+                        finish_reason='error',
+                        token_ids=[])
                 # update step
                 self.id2step[str(session_id)] += len(input_ids) + tokens
                 if sequence_end: