Unverified Commit 18d38771 authored by takatost's avatar takatost Committed by GitHub

feat: optimize xinference stream (#989)

parent 53e83d86
......@@ -108,12 +108,12 @@ class XinferenceLLM(Xinference):
Yields:
A string token.
"""
if isinstance(model, RESTfulGenerateModelHandle):
streaming_response = model.generate(
if isinstance(model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)):
streaming_response = model.chat(
prompt=prompt, generate_config=generate_config
)
else:
streaming_response = model.chat(
streaming_response = model.generate(
prompt=prompt, generate_config=generate_config
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment