feat: optimize xinference stream (#989)

18d38771 · takatost · GitHub · 53e83d86 · 18d38771
Unverified Commit 18d38771 authored Aug 24, 2023 by takatost Committed by GitHub Aug 24, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

xinference_llm.py api/core/third_party/langchain/llms/xinference_llm.py +3 -3

No files found.
--- a/api/core/third_party/langchain/llms/xinference_llm.py
+++ b/api/core/third_party/langchain/llms/xinference_llm.py
@@ -108,12 +108,12 @@ class XinferenceLLM(Xinference):
        Yields:
            A string token.
        """
-        if isinstance(model, RESTfulGenerateModelHandle):
-            streaming_response = model.generate(
+        if isinstance(model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)):
+            streaming_response = model.chat(
                prompt=prompt, generate_config=generate_config
            )
        else:
-            streaming_response = model.chat(
+            streaming_response = model.generate(
                prompt=prompt, generate_config=generate_config
            )