Unverified Commit fe2f3a0a authored by Yeuoly's avatar Yeuoly

Merge branch 'main' into feat/enterprise

parents a29c1f93 2f28afeb
...@@ -145,6 +145,9 @@ docker/volumes/db/data/* ...@@ -145,6 +145,9 @@ docker/volumes/db/data/*
docker/volumes/redis/data/* docker/volumes/redis/data/*
docker/volumes/weaviate/* docker/volumes/weaviate/*
docker/volumes/qdrant/* docker/volumes/qdrant/*
docker/volumes/etcd/*
docker/volumes/minio/*
docker/volumes/milvus/*
sdks/python-client/build sdks/python-client/build
sdks/python-client/dist sdks/python-client/dist
......
...@@ -26,6 +26,7 @@ from config import CloudEditionConfig, Config ...@@ -26,6 +26,7 @@ from config import CloudEditionConfig, Config
from extensions import ( from extensions import (
ext_celery, ext_celery,
ext_code_based_extension, ext_code_based_extension,
ext_compress,
ext_database, ext_database,
ext_hosting_provider, ext_hosting_provider,
ext_login, ext_login,
...@@ -96,6 +97,7 @@ def create_app(test_config=None) -> Flask: ...@@ -96,6 +97,7 @@ def create_app(test_config=None) -> Flask:
def initialize_extensions(app): def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask # Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app) # extension instance to bind it to the Flask application instance (app)
ext_compress.init_app(app)
ext_code_based_extension.init() ext_code_based_extension.init()
ext_database.init_app(app) ext_database.init_app(app)
ext_migrate.init(app, db) ext_migrate.init(app, db)
......
...@@ -298,6 +298,8 @@ class Config: ...@@ -298,6 +298,8 @@ class Config:
self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT') self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
self.API_COMPRESSION_ENABLED = get_bool_env('API_COMPRESSION_ENABLED')
class CloudEditionConfig(Config): class CloudEditionConfig(Config):
......
...@@ -28,6 +28,9 @@ from models.model import Conversation, Message ...@@ -28,6 +28,9 @@ from models.model import Conversation, Message
class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
_is_first_iteration = True
_ignore_observation_providers = ['wenxin']
def run(self, conversation: Conversation, def run(self, conversation: Conversation,
message: Message, message: Message,
query: str, query: str,
...@@ -42,10 +45,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -42,10 +45,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
agent_scratchpad: list[AgentScratchpadUnit] = [] agent_scratchpad: list[AgentScratchpadUnit] = []
self._init_agent_scratchpad(agent_scratchpad, self.history_prompt_messages) self._init_agent_scratchpad(agent_scratchpad, self.history_prompt_messages)
# check model mode if 'Observation' not in app_orchestration_config.model_config.stop:
if self.app_orchestration_config.model_config.mode == "completion": if app_orchestration_config.model_config.provider not in self._ignore_observation_providers:
# TODO: stop words
if 'Observation' not in app_orchestration_config.model_config.stop:
app_orchestration_config.model_config.stop.append('Observation') app_orchestration_config.model_config.stop.append('Observation')
# override inputs # override inputs
...@@ -202,6 +203,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -202,6 +203,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
) )
) )
scratchpad.thought = scratchpad.thought.strip() or 'I am thinking about how to help you'
agent_scratchpad.append(scratchpad) agent_scratchpad.append(scratchpad)
# get llm usage # get llm usage
...@@ -255,9 +257,15 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -255,9 +257,15 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
# invoke tool # invoke tool
error_response = None error_response = None
try: try:
if isinstance(tool_call_args, str):
try:
tool_call_args = json.loads(tool_call_args)
except json.JSONDecodeError:
pass
tool_response = tool_instance.invoke( tool_response = tool_instance.invoke(
user_id=self.user_id, user_id=self.user_id,
tool_parameters=tool_call_args if isinstance(tool_call_args, dict) else json.loads(tool_call_args) tool_parameters=tool_call_args
) )
# transform tool response to llm friendly response # transform tool response to llm friendly response
tool_response = self.transform_tool_invoke_messages(tool_response) tool_response = self.transform_tool_invoke_messages(tool_response)
...@@ -466,7 +474,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -466,7 +474,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
if isinstance(message, AssistantPromptMessage): if isinstance(message, AssistantPromptMessage):
current_scratchpad = AgentScratchpadUnit( current_scratchpad = AgentScratchpadUnit(
agent_response=message.content, agent_response=message.content,
thought=message.content, thought=message.content or 'I am thinking about how to help you',
action_str='', action_str='',
action=None, action=None,
observation=None, observation=None,
...@@ -546,7 +554,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -546,7 +554,8 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
result = '' result = ''
for scratchpad in agent_scratchpad: for scratchpad in agent_scratchpad:
result += scratchpad.thought + next_iteration.replace("{{observation}}", scratchpad.observation or '') + "\n" result += (scratchpad.thought or '') + (scratchpad.action_str or '') + \
next_iteration.replace("{{observation}}", scratchpad.observation or 'It seems that no response is available')
return result return result
...@@ -621,21 +630,24 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner): ...@@ -621,21 +630,24 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
)) ))
# add assistant message # add assistant message
if len(agent_scratchpad) > 0: if len(agent_scratchpad) > 0 and not self._is_first_iteration:
prompt_messages.append(AssistantPromptMessage( prompt_messages.append(AssistantPromptMessage(
content=(agent_scratchpad[-1].thought or '') content=(agent_scratchpad[-1].thought or '') + (agent_scratchpad[-1].action_str or ''),
)) ))
# add user message # add user message
if len(agent_scratchpad) > 0: if len(agent_scratchpad) > 0 and not self._is_first_iteration:
prompt_messages.append(UserPromptMessage( prompt_messages.append(UserPromptMessage(
content=(agent_scratchpad[-1].observation or ''), content=(agent_scratchpad[-1].observation or 'It seems that no response is available'),
)) ))
self._is_first_iteration = False
return prompt_messages return prompt_messages
elif mode == "completion": elif mode == "completion":
# parse agent scratchpad # parse agent scratchpad
agent_scratchpad_str = self._convert_scratchpad_list_to_str(agent_scratchpad) agent_scratchpad_str = self._convert_scratchpad_list_to_str(agent_scratchpad)
self._is_first_iteration = False
# parse prompt messages # parse prompt messages
return [UserPromptMessage( return [UserPromptMessage(
content=first_prompt.replace("{{instruction}}", instruction) content=first_prompt.replace("{{instruction}}", instruction)
......
...@@ -186,7 +186,7 @@ class IndexingRunner: ...@@ -186,7 +186,7 @@ class IndexingRunner:
first() first()
index_type = dataset_document.doc_form index_type = dataset_document.doc_form
index_processor = IndexProcessorFactory(index_type, processing_rule.to_dict()).init_index_processor() index_processor = IndexProcessorFactory(index_type).init_index_processor()
self._load( self._load(
index_processor=index_processor, index_processor=index_processor,
dataset=dataset, dataset=dataset,
......
from os import path
from threading import Lock from threading import Lock
from time import time from time import time
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, MissingSchema, Timeout from requests.exceptions import ConnectionError, MissingSchema, Timeout
from requests.sessions import Session from requests.sessions import Session
from yarl import URL
class XinferenceModelExtraParameter: class XinferenceModelExtraParameter:
...@@ -55,7 +55,10 @@ class XinferenceHelper: ...@@ -55,7 +55,10 @@ class XinferenceHelper:
get xinference model extra parameter like model_format and model_handle_type get xinference model extra parameter like model_format and model_handle_type
""" """
url = path.join(server_url, 'v1/models', model_uid) if not model_uid or not model_uid.strip() or not server_url or not server_url.strip():
raise RuntimeError('model_uid is empty')
url = str(URL(server_url) / 'v1' / 'models' / model_uid)
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3 # this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
session = Session() session = Session()
...@@ -66,7 +69,6 @@ class XinferenceHelper: ...@@ -66,7 +69,6 @@ class XinferenceHelper:
response = session.get(url, timeout=10) response = session.get(url, timeout=10)
except (MissingSchema, ConnectionError, Timeout) as e: except (MissingSchema, ConnectionError, Timeout) as e:
raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}') raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}')
if response.status_code != 200: if response.status_code != 200:
raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}') raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}')
......
...@@ -3,6 +3,7 @@ import csv ...@@ -3,6 +3,7 @@ import csv
from typing import Optional from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.extractor.helpers import detect_file_encodings
from core.rag.models.document import Document from core.rag.models.document import Document
...@@ -36,7 +37,7 @@ class CSVExtractor(BaseExtractor): ...@@ -36,7 +37,7 @@ class CSVExtractor(BaseExtractor):
docs = self._read_from_file(csvfile) docs = self._read_from_file(csvfile)
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
if self._autodetect_encoding: if self._autodetect_encoding:
detected_encodings = detect_filze_encodings(self._file_path) detected_encodings = detect_file_encodings(self._file_path)
for encoding in detected_encodings: for encoding in detected_encodings:
try: try:
with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile: with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile:
......
...@@ -18,3 +18,4 @@ ...@@ -18,3 +18,4 @@
- vectorizer - vectorizer
- gaode - gaode
- wecom - wecom
- qrcode
<?xml version="1.0" encoding="utf-8"?>
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg width="800px" height="800px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<g>
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M16 17v-1h-3v-3h3v2h2v2h-1v2h-2v2h-2v-3h2v-1h1zm5 4h-4v-2h2v-2h2v4zM3 3h8v8H3V3zm2 2v4h4V5H5zm8-2h8v8h-8V3zm2 2v4h4V5h-4zM3 13h8v8H3v-8zm2 2v4h4v-4H5zm13-2h3v2h-3v-2zM6 6h2v2H6V6zm0 10h2v2H6v-2zM16 6h2v2h-2V6z"/>
</g>
</svg>
\ No newline at end of file
from typing import Any
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.qrcode.tools.qrcode_generator import QRCodeGeneratorTool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class QRCodeProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
try:
QRCodeGeneratorTool().invoke(user_id='',
tool_parameters={
'content': 'Dify 123 😊'
})
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))
identity:
author: Bowen Liang
name: qrcode
label:
en_US: QRCode
zh_Hans: 二维码工具
pt_BR: QRCode
description:
en_US: A tool for generating QR code (quick-response code) image.
zh_Hans: 一个二维码工具
pt_BR: A tool for generating QR code (quick-response code) image.
icon: icon.svg
import io
import logging
from typing import Any, Union
import qrcode
from qrcode.image.pure import PyPNGImage
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class QRCodeGeneratorTool(BuiltinTool):
def _invoke(self,
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
invoke tools
"""
# get expression
content = tool_parameters.get('content', '')
if not content:
return self.create_text_message('Invalid parameter content')
try:
img = qrcode.make(data=content, image_factory=PyPNGImage)
byte_stream = io.BytesIO()
img.save(byte_stream)
byte_array = byte_stream.getvalue()
return self.create_blob_message(blob=byte_array,
meta={'mime_type': 'image/png'},
save_as=self.VARIABLE_KEY.IMAGE.value)
except Exception:
logging.exception(f'Failed to generate QR code for content: {content}')
return self.create_text_message('Failed to generate QR code')
identity:
name: qrcode_generator
author: Bowen Liang
label:
en_US: QR Code Generator
zh_Hans: 二维码生成器
pt_BR: QR Code Generator
description:
human:
en_US: A tool for generating QR code image
zh_Hans: 一个用于生成二维码的工具
pt_BR: A tool for generating QR code image
llm: A tool for generating QR code image
parameters:
- name: content
type: string
required: true
label:
en_US: content text for QR code
zh_Hans: 二维码文本内容
pt_BR: content text for QR code
human_description:
en_US: content text for QR code
zh_Hans: 二维码文本内容
pt_BR: 二维码文本内容
form: llm
<svg width="2500" height="2500" viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid"><g fill="#CF272D"><path d="M127.86 222.304c-52.005 0-94.164-42.159-94.164-94.163 0-52.005 42.159-94.163 94.164-94.163 52.004 0 94.162 42.158 94.162 94.163 0 52.004-42.158 94.163-94.162 94.163zm0-222.023C57.245.281 0 57.527 0 128.141 0 198.756 57.245 256 127.86 256c70.614 0 127.859-57.244 127.859-127.859 0-70.614-57.245-127.86-127.86-127.86z"/><path d="M133.116 96.297c0-14.682 11.903-26.585 26.586-26.585 14.683 0 26.585 11.903 26.585 26.585 0 14.684-11.902 26.586-26.585 26.586-14.683 0-26.586-11.902-26.586-26.586M133.116 159.983c0-14.682 11.903-26.586 26.586-26.586 14.683 0 26.585 11.904 26.585 26.586 0 14.683-11.902 26.586-26.585 26.586-14.683 0-26.586-11.903-26.586-26.586M69.431 159.983c0-14.682 11.904-26.586 26.586-26.586 14.683 0 26.586 11.904 26.586 26.586 0 14.683-11.903 26.586-26.586 26.586-14.682 0-26.586-11.903-26.586-26.586M69.431 96.298c0-14.683 11.904-26.585 26.586-26.585 14.683 0 26.586 11.902 26.586 26.585 0 14.684-11.903 26.586-26.586 26.586-14.682 0-26.586-11.902-26.586-26.586"/></g></svg>
\ No newline at end of file
from typing import Any, Union
from langchain.utilities import TwilioAPIWrapper
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class SendMessageTool(BuiltinTool):
"""
A tool for sending messages using Twilio API.
Args:
user_id (str): The ID of the user invoking the tool.
tool_parameters (Dict[str, Any]): The parameters required for sending the message.
Returns:
Union[ToolInvokeMessage, List[ToolInvokeMessage]]: The result of invoking the tool, which includes the status of the message sending operation.
"""
def _invoke(
self, user_id: str, tool_parameters: dict[str, Any]
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
account_sid = self.runtime.credentials["account_sid"]
auth_token = self.runtime.credentials["auth_token"]
from_number = self.runtime.credentials["from_number"]
message = tool_parameters["message"]
to_number = tool_parameters["to_number"]
if to_number.startswith("whatsapp:"):
from_number = f"whatsapp: {from_number}"
twilio = TwilioAPIWrapper(
account_sid=account_sid, auth_token=auth_token, from_number=from_number
)
# Sending the message through Twilio
result = twilio.run(message, to_number)
return self.create_text_message(text="Message sent successfully.")
identity:
name: send_message
author: Yash Parmar
label:
en_US: SendMessage
zh_Hans: 发送消息
pt_BR: SendMessage
description:
human:
en_US: Send SMS or Twilio Messaging Channels messages.
zh_Hans: 发送SMS或Twilio消息通道消息。
pt_BR: Send SMS or Twilio Messaging Channels messages.
llm: Send SMS or Twilio Messaging Channels messages. Supports different channels including WhatsApp.
parameters:
- name: message
type: string
required: true
label:
en_US: Message
zh_Hans: 消息内容
pt_BR: Message
human_description:
en_US: The content of the message to be sent.
zh_Hans: 要发送的消息内容。
pt_BR: The content of the message to be sent.
llm_description: The content of the message to be sent.
form: llm
- name: to_number
type: string
required: true
label:
en_US: To Number
zh_Hans: 收信号码
pt_BR: Para Número
human_description:
en_US: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
zh_Hans: 收件人的电话号码。WhatsApp消息前缀为'whatsapp:',例如,"whatsapp:+1234567890"。
pt_BR: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
llm_description: The recipient's phone number. Prefix with 'whatsapp:' for WhatsApp messages, e.g., "whatsapp:+1234567890".
form: llm
from typing import Any
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class TwilioProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
try:
"""
SendMessageTool().fork_tool_runtime(
meta={
"credentials": credentials,
}
).invoke(
user_id="",
tool_parameters={
"message": "Credential validation message",
"to_number": "+14846624384",
},
)
"""
pass
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))
identity:
author: Yash Parmar
name: twilio
label:
en_US: Twilio
zh_Hans: Twilio
pt_BR: Twilio
description:
en_US: Send messages through SMS or Twilio Messaging Channels.
zh_Hans: 通过SMS或Twilio消息通道发送消息。
pt_BR: Send messages through SMS or Twilio Messaging Channels.
icon: icon.svg
credentials_for_provider:
account_sid:
type: secret-input
required: true
label:
en_US: Account SID
zh_Hans: 账户SID
pt_BR: Account SID
placeholder:
en_US: Please input your Twilio Account SID
zh_Hans: 请输入您的Twilio账户SID
pt_BR: Please input your Twilio Account SID
auth_token:
type: secret-input
required: true
label:
en_US: Auth Token
zh_Hans: 认证令牌
pt_BR: Auth Token
placeholder:
en_US: Please input your Twilio Auth Token
zh_Hans: 请输入您的Twilio认证令牌
pt_BR: Please input your Twilio Auth Token
from_number:
type: secret-input
required: true
label:
en_US: From Number
zh_Hans: 发信号码
pt_BR: De Número
placeholder:
en_US: Please input your Twilio phone number
zh_Hans: 请输入您的Twilio电话号码
pt_BR: Please input your Twilio phone number
...@@ -174,7 +174,18 @@ class Tool(BaseModel, ABC): ...@@ -174,7 +174,18 @@ class Tool(BaseModel, ABC):
return result return result
def invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]: def invoke(self, user_id: str, tool_parameters: Union[dict[str, Any], str]) -> list[ToolInvokeMessage]:
# check if tool_parameters is a string
if isinstance(tool_parameters, str):
# check if this tool has only one parameter
parameters = [parameter for parameter in self.parameters if parameter.form == ToolParameter.ToolParameterForm.LLM]
if parameters and len(parameters) == 1:
tool_parameters = {
parameters[0].name: tool_parameters
}
else:
raise ValueError(f"tool_parameters should be a dict, but got a string: {tool_parameters}")
# update tool_parameters # update tool_parameters
if self.runtime.runtime_parameters: if self.runtime.runtime_parameters:
tool_parameters.update(self.runtime.runtime_parameters) tool_parameters.update(self.runtime.runtime_parameters)
......
from flask import Flask
def init_app(app: Flask):
if app.config.get('API_COMPRESSION_ENABLED', False):
from flask_compress import Compress
compress = Compress()
compress.init_app(app)
...@@ -3,6 +3,7 @@ beautifulsoup4==4.12.2 ...@@ -3,6 +3,7 @@ beautifulsoup4==4.12.2
flask~=3.0.1 flask~=3.0.1
Flask-SQLAlchemy~=3.0.5 Flask-SQLAlchemy~=3.0.5
SQLAlchemy~=1.4.28 SQLAlchemy~=1.4.28
Flask-Compress~=1.14
flask-login~=0.6.3 flask-login~=0.6.3
flask-migrate~=4.0.5 flask-migrate~=4.0.5
flask-restful~=0.3.10 flask-restful~=0.3.10
...@@ -67,4 +68,7 @@ pydub~=0.25.1 ...@@ -67,4 +68,7 @@ pydub~=0.25.1
gmpy2~=2.1.5 gmpy2~=2.1.5
numexpr~=2.9.0 numexpr~=2.9.0
duckduckgo-search==4.4.3 duckduckgo-search==4.4.3
arxiv==2.1.0 arxiv==2.1.0
\ No newline at end of file yarl~=1.9.4
twilio==9.0.0
qrcode~=7.4.2
...@@ -32,68 +32,70 @@ class MockXinferenceClass(object): ...@@ -32,68 +32,70 @@ class MockXinferenceClass(object):
response = Response() response = Response()
if 'v1/models/' in url: if 'v1/models/' in url:
# get model uid # get model uid
model_uid = url.split('/')[-1] model_uid = url.split('/')[-1] or ''
if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \ if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
model_uid not in ['generate', 'chat', 'embedding', 'rerank']: model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
response.status_code = 404 response.status_code = 404
response._content = b'{}'
return response return response
# check if url is valid # check if url is valid
if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url): if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
response.status_code = 404 response.status_code = 404
response._content = b'{}'
return response return response
if model_uid in ['generate', 'chat']: if model_uid in ['generate', 'chat']:
response.status_code = 200 response.status_code = 200
response._content = b'''{ response._content = b'''{
"model_type": "LLM", "model_type": "LLM",
"address": "127.0.0.1:43877", "address": "127.0.0.1:43877",
"accelerators": [ "accelerators": [
"0", "0",
"1" "1"
], ],
"model_name": "chatglm3-6b", "model_name": "chatglm3-6b",
"model_lang": [ "model_lang": [
"en" "en"
], ],
"model_ability": [ "model_ability": [
"generate", "generate",
"chat" "chat"
], ],
"model_description": "latest chatglm3", "model_description": "latest chatglm3",
"model_format": "pytorch", "model_format": "pytorch",
"model_size_in_billions": 7, "model_size_in_billions": 7,
"quantization": "none", "quantization": "none",
"model_hub": "huggingface", "model_hub": "huggingface",
"revision": null, "revision": null,
"context_length": 2048, "context_length": 2048,
"replica": 1 "replica": 1
}''' }'''
return response return response
elif model_uid == 'embedding': elif model_uid == 'embedding':
response.status_code = 200 response.status_code = 200
response._content = b'''{ response._content = b'''{
"model_type": "embedding", "model_type": "embedding",
"address": "127.0.0.1:43877", "address": "127.0.0.1:43877",
"accelerators": [ "accelerators": [
"0", "0",
"1" "1"
], ],
"model_name": "bge", "model_name": "bge",
"model_lang": [ "model_lang": [
"en" "en"
], ],
"revision": null, "revision": null,
"max_tokens": 512 "max_tokens": 512
}''' }'''
return response return response
elif 'v1/cluster/auth' in url: elif 'v1/cluster/auth' in url:
response.status_code = 200 response.status_code = 200
response._content = b'''{ response._content = b'''{
"auth": true "auth": true
}''' }'''
return response return response
def _check_cluster_authenticated(self): def _check_cluster_authenticated(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment