Merge branch 'main' into feat/enterprise

fe2f3a0a · Yeuoly · a29c1f93 · 2f28afeb · fe2f3a0a · fe2f3a0a
Unverified Commit fe2f3a0a authored Mar 06, 2024 by Yeuoly
23 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -145,6 +145,9 @@ docker/volumes/db/data/*
 docker/volumes/redis/data/*
 docker/volumes/weaviate/*
 docker/volumes/qdrant/*
+docker/volumes/etcd/*
+docker/volumes/minio/*
+docker/volumes/milvus/*

 sdks/python-client/build
 sdks/python-client/dist

--- a/api/app.py
+++ b/api/app.py
@@ -26,6 +26,7 @@ from config import CloudEditionConfig, Config
 from extensions import (
    ext_celery,
    ext_code_based_extension,
+    ext_compress,
    ext_database,
    ext_hosting_provider,
    ext_login,
@@ -96,6 +97,7 @@ def create_app(test_config=None) -> Flask:
 def initialize_extensions(app):
    # Since the application instance is now created, pass it to each Flask
    # extension instance to bind it to the Flask application instance (app)
+    ext_compress.init_app(app)
    ext_code_based_extension.init()
    ext_database.init_app(app)
    ext_migrate.init(app, db)

--- a/api/config.py
+++ b/api/config.py
@@ -298,6 +298,8 @@ class Config:

        self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')

+        self.API_COMPRESSION_ENABLED = get_bool_env('API_COMPRESSION_ENABLED')
+

 class CloudEditionConfig(Config):


--- a/api/core/features/assistant_cot_runner.py
+++ b/api/core/features/assistant_cot_runner.py
@@ -28,6 +28,9 @@ from models.model import Conversation, Message


 class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
+    _is_first_iteration = True
+    _ignore_observation_providers = ['wenxin']
+
    def run(self, conversation: Conversation,
        message: Message,
        query: str,
@@ -42,10 +45,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
        agent_scratchpad: list[AgentScratchpadUnit] = []
        self._init_agent_scratchpad(agent_scratchpad, self.history_prompt_messages)

-        # check model mode
-        if self.app_orchestration_config.model_config.mode == "completion":
-            # TODO: stop words
-            if 'Observation' not in app_orchestration_config.model_config.stop:
+        if 'Observation' not in app_orchestration_config.model_config.stop:
+            if app_orchestration_config.model_config.provider not in self._ignore_observation_providers:
                app_orchestration_config.model_config.stop.append('Observation')

        # override inputs
@@ -202,6 +203,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
                        )
                    )

+            scratchpad.thought = scratchpad.thought.strip() or 'I am thinking about how to help you'
            agent_scratchpad.append(scratchpad)
                        
            # get llm usage
@@ -255,9 +257,15 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
                        # invoke tool
                        error_response = None
                        try:
+                            if isinstance(tool_call_args, str):
+                                try:
+                                    tool_call_args = json.loads(tool_call_args)
+                                except json.JSONDecodeError:
+                                    pass
+                            
                            tool_response = tool_instance.invoke(
                                user_id=self.user_id, 
-                                tool_parameters=tool_call_args if isinstance(tool_call_args, dict) else json.loads(tool_call_args)
+                                tool_parameters=tool_call_args
                            )
                            # transform tool response to llm friendly response
                            tool_response = self.transform_tool_invoke_messages(tool_response)
@@ -466,7 +474,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
            if isinstance(message, AssistantPromptMessage):
                current_scratchpad = AgentScratchpadUnit(
                    agent_response=message.content,
-                    thought=message.content,
+                    thought=message.content or 'I am thinking about how to help you',
                    action_str='',
                    action=None,
                    observation=None,
@@ -546,7 +554,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):

        result = ''
        for scratchpad in agent_scratchpad:
-            result += scratchpad.thought + next_iteration.replace("{{observation}}", scratchpad.observation or '') + "\n"
+            result += (scratchpad.thought or '') + (scratchpad.action_str or '') + \
+                next_iteration.replace("{{observation}}", scratchpad.observation or 'It seems that no response is available')

        return result
    
@@ -621,21 +630,24 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
                ))

            # add assistant message
-            if len(agent_scratchpad) > 0:
+            if len(agent_scratchpad) > 0 and not self._is_first_iteration:
                prompt_messages.append(AssistantPromptMessage(
-                    content=(agent_scratchpad[-1].thought or '')
+                    content=(agent_scratchpad[-1].thought or '') + (agent_scratchpad[-1].action_str or ''),
                ))
            
            # add user message
-            if len(agent_scratchpad) > 0:
+            if len(agent_scratchpad) > 0 and not self._is_first_iteration:
                prompt_messages.append(UserPromptMessage(
-                    content=(agent_scratchpad[-1].observation or ''),
+                    content=(agent_scratchpad[-1].observation or 'It seems that no response is available'),
                ))

+            self._is_first_iteration = False
+
            return prompt_messages
        elif mode == "completion":
            # parse agent scratchpad
            agent_scratchpad_str = self._convert_scratchpad_list_to_str(agent_scratchpad)
+            self._is_first_iteration = False
            # parse prompt messages
            return [UserPromptMessage(
                content=first_prompt.replace("{{instruction}}", instruction)

--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -186,7 +186,7 @@ class IndexingRunner:
                first()

            index_type = dataset_document.doc_form
-            index_processor = IndexProcessorFactory(index_type, processing_rule.to_dict()).init_index_processor()
+            index_processor = IndexProcessorFactory(index_type).init_index_processor()
            self._load(
                index_processor=index_processor,
                dataset=dataset,

--- a/api/core/model_runtime/model_providers/mistralai/_assets/icon_s_en.png
+++ b/api/core/model_runtime/model_providers/mistralai/_assets/icon_s_en.png
--- a/api/core/model_runtime/model_providers/xinference/xinference_helper.py
+++ b/api/core/model_runtime/model_providers/xinference/xinference_helper.py
-from os import path
 from threading import Lock
 from time import time

 from requests.adapters import HTTPAdapter
 from requests.exceptions import ConnectionError, MissingSchema, Timeout
 from requests.sessions import Session
+from yarl import URL


 class XinferenceModelExtraParameter:
@@ -55,7 +55,10 @@ class XinferenceHelper:
            get xinference model extra parameter like model_format and model_handle_type
        """

-        url = path.join(server_url, 'v1/models', model_uid)
+        if not model_uid or not model_uid.strip() or not server_url or not server_url.strip():
+            raise RuntimeError('model_uid is empty')
+
+        url = str(URL(server_url) / 'v1' / 'models' / model_uid)

        # this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
        session = Session()
@@ -66,7 +69,6 @@ class XinferenceHelper:
            response = session.get(url, timeout=10)
        except (MissingSchema, ConnectionError, Timeout) as e:
            raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}')
-
        if response.status_code != 200:
            raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}')
        

--- a/api/core/rag/extractor/csv_extractor.py
+++ b/api/core/rag/extractor/csv_extractor.py
@@ -3,6 +3,7 @@ import csv
 from typing import Optional

 from core.rag.extractor.extractor_base import BaseExtractor
+from core.rag.extractor.helpers import detect_file_encodings
 from core.rag.models.document import Document


@@ -36,7 +37,7 @@ class CSVExtractor(BaseExtractor):
                docs = self._read_from_file(csvfile)
        except UnicodeDecodeError as e:
            if self._autodetect_encoding:
-                detected_encodings = detect_filze_encodings(self._file_path)
+                detected_encodings = detect_file_encodings(self._file_path)
                for encoding in detected_encodings:
                    try:
                        with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile:

--- a/api/core/tools/provider/_position.yaml
+++ b/api/core/tools/provider/_position.yaml
@@ -18,3 +18,4 @@
 - vectorizer
 - gaode
 - wecom
+- qrcode
--- a/api/core/tools/provider/builtin/qrcode/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/qrcode/_assets/icon.svg
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
+<svg width="800px" height="800px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
+    <g>
+        <path fill="none" d="M0 0h24v24H0z"/>
+        <path d="M16 17v-1h-3v-3h3v2h2v2h-1v2h-2v2h-2v-3h2v-1h1zm5 4h-4v-2h2v-2h2v4zM3 3h8v8H3V3zm2 2v4h4V5H5zm8-2h8v8h-8V3zm2 2v4h4V5h-4zM3 13h8v8H3v-8zm2 2v4h4v-4H5zm13-2h3v2h-3v-2zM6 6h2v2H6V6zm0 10h2v2H6v-2zM16 6h2v2h-2V6z"/>
+    </g>
+</svg>
\ No newline at end of file
--- a/api/core/tools/provider/builtin/qrcode/qrcode.py
+++ b/api/core/tools/provider/builtin/qrcode/qrcode.py
+from typing import Any
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.qrcode.tools.qrcode_generator import QRCodeGeneratorTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class QRCodeProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict[str, Any]) -> None:
+        try:
+            QRCodeGeneratorTool().invoke(user_id='',
+                                         tool_parameters={
+                                            'content': 'Dify 123 😊'
+                                        })
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/qrcode/qrcode.yaml
+++ b/api/core/tools/provider/builtin/qrcode/qrcode.yaml
+identity:
+  author: Bowen Liang
+  name: qrcode
+  label:
+    en_US: QRCode
+    zh_Hans: 二维码工具
+    pt_BR: QRCode
+  description:
+    en_US: A tool for generating QR code (quick-response code) image.
+    zh_Hans: 一个二维码工具
+    pt_BR: A tool for generating QR code (quick-response code) image.
+  icon: icon.svg
--- a/api/core/tools/provider/builtin/qrcode/tools/qrcode_generator.py
+++ b/api/core/tools/provider/builtin/qrcode/tools/qrcode_generator.py
+import io
+import logging
+from typing import Any, Union
+
+import qrcode
+from qrcode.image.pure import PyPNGImage
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class QRCodeGeneratorTool(BuiltinTool):
+    def _invoke(self,
+                user_id: str,
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        # get expression
+        content = tool_parameters.get('content', '')
+        if not content:
+            return self.create_text_message('Invalid parameter content')
+
+        try:
+            img = qrcode.make(data=content, image_factory=PyPNGImage)
+            byte_stream = io.BytesIO()
+            img.save(byte_stream)
+            byte_array = byte_stream.getvalue()
+            return self.create_blob_message(blob=byte_array,
+                                            meta={'mime_type': 'image/png'},
+                                            save_as=self.VARIABLE_KEY.IMAGE.value)
+        except Exception:
+            logging.exception(f'Failed to generate QR code for content: {content}')
+            return self.create_text_message('Failed to generate QR code')
--- a/api/core/tools/provider/builtin/qrcode/tools/qrcode_generator.yaml
+++ b/api/core/tools/provider/builtin/qrcode/tools/qrcode_generator.yaml
+identity:
+  name: qrcode_generator
+  author: Bowen Liang
+  label:
+    en_US: QR Code Generator
+    zh_Hans: 二维码生成器
+    pt_BR: QR Code Generator
+description:
+  human:
+    en_US: A tool for generating QR code image
+    zh_Hans: 一个用于生成二维码的工具
+    pt_BR: A tool for generating QR code image
+  llm: A tool for generating QR code image
+parameters:
+  - name: content
+    type: string
+    required: true
+    label:
+      en_US: content text for QR code
+      zh_Hans: 二维码文本内容
+      pt_BR: content text for QR code
+    human_description:
+      en_US: content text for QR code
+      zh_Hans: 二维码文本内容
+      pt_BR: 二维码文本内容
+    form: llm
--- a/api/core/tools/provider/builtin/twilio/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/twilio/_assets/icon.svg
+<svg width="2500" height="2500" viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid"><g fill="#CF272D"><path d="M127.86 222.304c-52.005 0-94.164-42.159-94.164-94.163 0-52.005 42.159-94.163 94.164-94.163 52.004 0 94.162 42.158 94.162 94.163 0 52.004-42.158 94.163-94.162 94.163zm0-222.023C57.245.281 0 57.527 0 128.141 0 198.756 57.245 256 127.86 256c70.614 0 127.859-57.244 127.859-127.859 0-70.614-57.245-127.86-127.86-127.86z"/><path d="M133.116 96.297c0-14.682 11.903-26.585 26.586-26.585 14.683 0 26.585 11.903 26.585 26.585 0 14.684-11.902 26.586-26.585 26.586-14.683 0-26.586-11.902-26.586-26.586M133.116 159.983c0-14.682 11.903-26.586 26.586-26.586 14.683 0 26.585 11.904 26.585 26.586 0 14.683-11.902 26.586-26.585 26.586-14.683 0-26.586-11.903-26.586-26.586M69.431 159.983c0-14.682 11.904-26.586 26.586-26.586 14.683 0 26.586 11.904 26.586 26.586 0 14.683-11.903 26.586-26.586 26.586-14.682 0-26.586-11.903-26.586-26.586M69.431 96.298c0-14.683 11.904-26.585 26.586-26.585 14.683 0 26.586 11.902 26.586 26.585 0 14.684-11.903 26.586-26.586 26.586-14.682 0-26.586-11.902-26.586-26.586"/></g></svg>
\ No newline at end of file
--- a/api/core/tools/provider/builtin/twilio/tools/send_message.py
+++ b/api/core/tools/provider/builtin/twilio/tools/send_message.py
+from typing import Any, Union
+
+from langchain.utilities import TwilioAPIWrapper
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SendMessageTool(BuiltinTool):
+    """
+    A tool for sending messages using Twilio API.
+
+    Args:
+        user_id (str): The ID of the user invoking the tool.
+        tool_parameters (Dict[str, Any]): The parameters required for sending the message.
+
+    Returns:
+        Union[ToolInvokeMessage, List[ToolInvokeMessage]]: The result of invoking the tool, which includes the status of the message sending operation.
+    """
+
+    def _invoke(
+        self, user_id: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        account_sid = self.runtime.credentials["account_sid"]
+        auth_token = self.runtime.credentials["auth_token"]
+        from_number = self.runtime.credentials["from_number"]
+
+        message = tool_parameters["message"]
+        to_number = tool_parameters["to_number"]
+
+        if to_number.startswith("whatsapp:"):
+            from_number = f"whatsapp: {from_number}"
+
+        twilio = TwilioAPIWrapper(
+            account_sid=account_sid, auth_token=auth_token, from_number=from_number
+        )
+
+        # Sending the message through Twilio
+        result = twilio.run(message, to_number)
+
+        return self.create_text_message(text="Message sent successfully.")
--- a/api/core/tools/provider/builtin/twilio/tools/send_message.yaml
+++ b/api/core/tools/provider/builtin/twilio/tools/send_message.yaml
+identity:
+  name: send_message
+  author: Yash Parmar
+  label:
+    en_US: SendMessage
+    zh_Hans: 发送消息
+    pt_BR: SendMessage
+description:
+  human:
+    en_US: Send SMS or Twilio Messaging Channels messages.
+    zh_Hans: 发送SMS或Twilio消息通道消息。
+    pt_BR: Send SMS or Twilio Messaging Channels messages.
+  llm: Send SMS or Twilio Messaging Channels messages. Supports different channels including WhatsApp.
+parameters:
+  - name: message
+    type: string
+    required: true
+    label:
+      en_US: Message
+      zh_Hans: 消息内容
+      pt_BR: Message
+    human_description:
+      en_US: The content of the message to be sent.
+      zh_Hans: 要发送的消息内容。
+      pt_BR: The content of the message to be sent.
+    llm_description: The content of the message to be sent.
+    form: llm
+  - name: to_number
+    type: string
+    required: true
+    label:
+      en_US: To Number
+      zh_Hans: 收信号码
+      pt_BR: Para Número
+    human_description:
+      en_US: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
+      zh_Hans: 收件人的电话号码。WhatsApp消息前缀为'whatsapp:'，例如，"whatsapp:+1234567890"。
+      pt_BR: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
+    llm_description: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
+    form: llm
--- a/api/core/tools/provider/builtin/twilio/twilio.py
+++ b/api/core/tools/provider/builtin/twilio/twilio.py
+from typing import Any
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class TwilioProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict[str, Any]) -> None:
+        try:
+            """
+            SendMessageTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id="",
+                tool_parameters={
+                    "message": "Credential validation message",
+                    "to_number": "+14846624384",
+                },
+            )
+            """
+            pass
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/twilio/twilio.yaml
+++ b/api/core/tools/provider/builtin/twilio/twilio.yaml
+identity:
+  author: Yash Parmar
+  name: twilio
+  label:
+    en_US: Twilio
+    zh_Hans: Twilio
+    pt_BR: Twilio
+  description:
+    en_US: Send messages through SMS or Twilio Messaging Channels.
+    zh_Hans: 通过SMS或Twilio消息通道发送消息。
+    pt_BR: Send messages through SMS or Twilio Messaging Channels.
+  icon: icon.svg
+credentials_for_provider:
+  account_sid:
+    type: secret-input
+    required: true
+    label:
+      en_US: Account SID
+      zh_Hans: 账户SID
+      pt_BR: Account SID
+    placeholder:
+      en_US: Please input your Twilio Account SID
+      zh_Hans: 请输入您的Twilio账户SID
+      pt_BR: Please input your Twilio Account SID
+  auth_token:
+    type: secret-input
+    required: true
+    label:
+      en_US: Auth Token
+      zh_Hans: 认证令牌
+      pt_BR: Auth Token
+    placeholder:
+      en_US: Please input your Twilio Auth Token
+      zh_Hans: 请输入您的Twilio认证令牌
+      pt_BR: Please input your Twilio Auth Token
+  from_number:
+    type: secret-input
+    required: true
+    label:
+      en_US: From Number
+      zh_Hans: 发信号码
+      pt_BR: De Número
+    placeholder:
+      en_US: Please input your Twilio phone number
+      zh_Hans: 请输入您的Twilio电话号码
+      pt_BR: Please input your Twilio phone number
--- a/api/core/tools/tool/tool.py
+++ b/api/core/tools/tool/tool.py
@@ -174,7 +174,18 @@ class Tool(BaseModel, ABC):

        return result

-    def invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
+    def invoke(self, user_id: str, tool_parameters: Union[dict[str, Any], str]) -> list[ToolInvokeMessage]:
+        # check if tool_parameters is a string
+        if isinstance(tool_parameters, str):
+            # check if this tool has only one parameter
+            parameters = [parameter for parameter in self.parameters if parameter.form == ToolParameter.ToolParameterForm.LLM]
+            if parameters and len(parameters) == 1:
+                tool_parameters = {
+                    parameters[0].name: tool_parameters
+                }
+            else:
+                raise ValueError(f"tool_parameters should be a dict, but got a string: {tool_parameters}")
+
        # update tool_parameters
        if self.runtime.runtime_parameters:
            tool_parameters.update(self.runtime.runtime_parameters)

--- a/api/extensions/ext_compress.py
+++ b/api/extensions/ext_compress.py
+from flask import Flask
+
+
+def init_app(app: Flask):
+    if app.config.get('API_COMPRESSION_ENABLED', False):
+        from flask_compress import Compress
+
+        compress = Compress()
+        compress.init_app(app)
+
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -3,6 +3,7 @@ beautifulsoup4==4.12.2
 flask~=3.0.1
 Flask-SQLAlchemy~=3.0.5
 SQLAlchemy~=1.4.28
+Flask-Compress~=1.14
 flask-login~=0.6.3
 flask-migrate~=4.0.5
 flask-restful~=0.3.10
@@ -67,4 +68,7 @@ pydub~=0.25.1
 gmpy2~=2.1.5
 numexpr~=2.9.0
 duckduckgo-search==4.4.3
-arxiv==2.1.0
\ No newline at end of file
+arxiv==2.1.0
+yarl~=1.9.4
+twilio==9.0.0
+qrcode~=7.4.2
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -32,68 +32,70 @@ class MockXinferenceClass(object):
        response = Response()
        if 'v1/models/' in url:
            # get model uid
-            model_uid = url.split('/')[-1]
+            model_uid = url.split('/')[-1] or ''
            if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
                model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
                response.status_code = 404
+                response._content = b'{}'
                return response

            # check if url is valid
            if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
                response.status_code = 404
+                response._content = b'{}'
                return response
            
            if model_uid in ['generate', 'chat']:
                response.status_code = 200
                response._content = b'''{
-        "model_type": "LLM",
-        "address": "127.0.0.1:43877",
-        "accelerators": [
-            "0",
-            "1"
-        ],
-        "model_name": "chatglm3-6b",
-        "model_lang": [
-            "en"
-        ],
-        "model_ability": [
-            "generate",
-            "chat"
-        ],
-        "model_description": "latest chatglm3",
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantization": "none",
-        "model_hub": "huggingface",
-        "revision": null,
-        "context_length": 2048,
-        "replica": 1
-    }'''
+                    "model_type": "LLM",
+                    "address": "127.0.0.1:43877",
+                    "accelerators": [
+                        "0",
+                        "1"
+                    ],
+                    "model_name": "chatglm3-6b",
+                    "model_lang": [
+                        "en"
+                    ],
+                    "model_ability": [
+                        "generate",
+                        "chat"
+                    ],
+                    "model_description": "latest chatglm3",
+                    "model_format": "pytorch",
+                    "model_size_in_billions": 7,
+                    "quantization": "none",
+                    "model_hub": "huggingface",
+                    "revision": null,
+                    "context_length": 2048,
+                    "replica": 1
+                }'''
                return response
            
            elif model_uid == 'embedding':
                response.status_code = 200
                response._content = b'''{
-        "model_type": "embedding",
-        "address": "127.0.0.1:43877",
-        "accelerators": [
-            "0",
-            "1"
-        ],
-        "model_name": "bge",
-        "model_lang": [
-            "en"
-        ],
-        "revision": null,
-        "max_tokens": 512
-}'''
+                    "model_type": "embedding",
+                    "address": "127.0.0.1:43877",
+                    "accelerators": [
+                        "0",
+                        "1"
+                    ],
+                    "model_name": "bge",
+                    "model_lang": [
+                        "en"
+                    ],
+                    "revision": null,
+                    "max_tokens": 512
+                }'''
                return response
            
        elif 'v1/cluster/auth' in url:
            response.status_code = 200
            response._content = b'''{
-    "auth": true
-}'''
+                "auth": true
+            }'''
            return response
        
    def _check_cluster_authenticated(self):