fix: huggingface and replicate. (#1888)

91ee62d1 · Garfield Dai · GitHub · ede69b46 · 91ee62d1 · 91ee62d1
Unverified Commit 91ee62d1 authored Jan 03, 2024 by Garfield Dai Committed by GitHub Jan 03, 2024
Show whitespace changes
Inline Side-by-side

Showing with 58 additions and 29 deletions

llm.py .../model_runtime/model_providers/huggingface_hub/llm/llm.py +25 -14

llm.py api/core/model_runtime/model_providers/replicate/llm/llm.py +33 -15

No files found.
--- a/api/core/model_runtime/model_providers/huggingface_hub/llm/llm.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/llm/llm.py
@@ -154,6 +154,7 @@ class HuggingfaceHubLargeLanguageModel(_CommonHuggingfaceHub, LargeLanguageModel
                content=chunk.token.text
            )
+            if chunk.details:
                prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
                completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
@@ -166,6 +167,16 @@ class HuggingfaceHubLargeLanguageModel(_CommonHuggingfaceHub, LargeLanguageModel
                        index=index,
                        message=assistant_prompt_message,
                        usage=usage,
+                        finish_reason=chunk.details.finish_reason,
+                    ),
+                )
+            else:
+                yield LLMResultChunk(
+                    model=model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=index,
+                        message=assistant_prompt_message,
                    ),
                )

--- a/api/core/model_runtime/model_providers/replicate/llm/llm.py
+++ b/api/core/model_runtime/model_providers/replicate/llm/llm.py
@@ -116,7 +116,7 @@ class ReplicateLargeLanguageModel(_CommonReplicate, LargeLanguageModel):
        )
        for key, value in input_properties:
-            if key not in ['system_prompt', 'prompt']:
+            if key not in ['system_prompt', 'prompt'] and 'stop' not in key:
                value_type = value.get('type')
                if not value_type:
@@ -151,9 +151,17 @@ class ReplicateLargeLanguageModel(_CommonReplicate, LargeLanguageModel):
        index = -1
        current_completion: str = ""
        stop_condition_reached = False
+        prediction_output_length = 10000
+        is_prediction_output_finished = False
        for output in prediction.output_iterator():
            current_completion += output
+            if not is_prediction_output_finished and prediction.status == 'succeeded':
+                prediction_output_length = len(prediction.output) - 1
+                is_prediction_output_finished = True
            if stop:
                for s in stop:
                    if s in current_completion:
@@ -172,6 +180,16 @@ class ReplicateLargeLanguageModel(_CommonReplicate, LargeLanguageModel):
                content=output if output else ''
            )
+            if index < prediction_output_length:
+                yield LLMResultChunk(
+                    model=model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=index,
+                        message=assistant_prompt_message
+                    )
+                )
+            else:
                prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
                completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
@@ -183,8 +201,8 @@ class ReplicateLargeLanguageModel(_CommonReplicate, LargeLanguageModel):
                    delta=LLMResultChunkDelta(
                        index=index,
                        message=assistant_prompt_message,
-                    usage=usage,
+                        usage=usage
-                ),
+                    )
                )
    def _handle_generate_response(self, model: str, credentials: dict, prediction: Prediction, stop: list[str],