fix: first agent latency (#2334)

3b357f51 · Yeuoly · GitHub · 09acf215 · 3b357f51
Unverified Commit 3b357f51 authored Feb 01, 2024 by Yeuoly Committed by GitHub Feb 01, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

assistant_fc_runner.py api/core/features/assistant_fc_runner.py +6 -1

No files found.
--- a/api/core/features/assistant_fc_runner.py
+++ b/api/core/features/assistant_fc_runner.py
@@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                tool_input='',
                messages_ids=message_file_ids
            )
-            self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
            # recale llm max tokens
            self.recale_llm_max_tokens(self.model_config, prompt_messages)
@@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
            current_llm_usage = None
            if self.stream_tool_call:
+                is_first_chunk = True
                for chunk in chunks:
+                    if is_first_chunk:
+                        self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
+                        is_first_chunk = False
                    # check if there is any tool call
                    if self.check_tool_calls(chunk):
                        function_call_state = True
@@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                if not result.message.content:
                    result.message.content = ''
+                self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
                yield LLMResultChunk(
                    model=model_instance.model,
                    prompt_messages=result.prompt_messages,