Unverified Commit 4f3053a8 authored by takatost's avatar takatost Committed by GitHub

fix: xinference chat completion error (#952)

parent b3c2bf12
...@@ -46,7 +46,7 @@ class XinferenceLLM(Xinference): ...@@ -46,7 +46,7 @@ class XinferenceLLM(Xinference):
return combined_text_output return combined_text_output
else: else:
completion = model.chat(prompt=prompt, generate_config=generate_config) completion = model.chat(prompt=prompt, generate_config=generate_config)
return completion["choices"][0]["text"] return completion["choices"][0]["message"]["content"]
elif isinstance(model, RESTfulGenerateModelHandle): elif isinstance(model, RESTfulGenerateModelHandle):
generate_config: "LlamaCppGenerateConfig" = kwargs.get("generate_config", {}) generate_config: "LlamaCppGenerateConfig" = kwargs.get("generate_config", {})
...@@ -82,7 +82,7 @@ class XinferenceLLM(Xinference): ...@@ -82,7 +82,7 @@ class XinferenceLLM(Xinference):
completion = combined_text_output completion = combined_text_output
else: else:
completion = model.chat(prompt=prompt, generate_config=generate_config) completion = model.chat(prompt=prompt, generate_config=generate_config)
completion = completion["choices"][0]["text"] completion = completion["choices"][0]["message"]["content"]
if stop is not None: if stop is not None:
completion = enforce_stop_tokens(completion, stop) completion = enforce_stop_tokens(completion, stop)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment