Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
95733796
Unverified
Commit
95733796
authored
Mar 05, 2024
by
Yeuoly
Committed by
GitHub
Mar 05, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: replace os.path.join with yarl (#2690)
parent
552f319b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
48 additions
and
43 deletions
+48
-43
xinference_helper.py
...l_runtime/model_providers/xinference/xinference_helper.py
+5
-3
requirements.txt
api/requirements.txt
+2
-1
xinference.py
...ests/integration_tests/model_runtime/__mock/xinference.py
+41
-39
No files found.
api/core/model_runtime/model_providers/xinference/xinference_helper.py
View file @
95733796
from
os
import
path
from
threading
import
Lock
from
time
import
time
from
requests.adapters
import
HTTPAdapter
from
requests.exceptions
import
ConnectionError
,
MissingSchema
,
Timeout
from
requests.sessions
import
Session
from
yarl
import
URL
class
XinferenceModelExtraParameter
:
...
...
@@ -55,7 +55,10 @@ class XinferenceHelper:
get xinference model extra parameter like model_format and model_handle_type
"""
url
=
path
.
join
(
server_url
,
'v1/models'
,
model_uid
)
if
not
model_uid
or
not
model_uid
.
strip
()
or
not
server_url
or
not
server_url
.
strip
():
raise
RuntimeError
(
'model_uid is empty'
)
url
=
str
(
URL
(
server_url
)
/
'v1'
/
'models'
/
model_uid
)
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
session
=
Session
()
...
...
@@ -66,7 +69,6 @@ class XinferenceHelper:
response
=
session
.
get
(
url
,
timeout
=
10
)
except
(
MissingSchema
,
ConnectionError
,
Timeout
)
as
e
:
raise
RuntimeError
(
f
'get xinference model extra parameter failed, url: {url}, error: {e}'
)
if
response
.
status_code
!=
200
:
raise
RuntimeError
(
f
'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}'
)
...
...
api/requirements.txt
View file @
95733796
...
...
@@ -68,4 +68,5 @@ pydub~=0.25.1
gmpy2~=2.1.5
numexpr~=2.9.0
duckduckgo-search==4.4.3
arxiv==2.1.0
\ No newline at end of file
arxiv==2.1.0
yarl~=1.9.4
\ No newline at end of file
api/tests/integration_tests/model_runtime/__mock/xinference.py
View file @
95733796
...
...
@@ -32,68 +32,70 @@ class MockXinferenceClass(object):
response
=
Response
()
if
'v1/models/'
in
url
:
# get model uid
model_uid
=
url
.
split
(
'/'
)[
-
1
]
model_uid
=
url
.
split
(
'/'
)[
-
1
]
or
''
if
not
re
.
match
(
r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
,
model_uid
)
and
\
model_uid
not
in
[
'generate'
,
'chat'
,
'embedding'
,
'rerank'
]:
response
.
status_code
=
404
response
.
_content
=
b
'{}'
return
response
# check if url is valid
if
not
re
.
match
(
r'^(https?):\/\/[^\s\/$.?#].[^\s]*$'
,
url
):
response
.
status_code
=
404
response
.
_content
=
b
'{}'
return
response
if
model_uid
in
[
'generate'
,
'chat'
]:
response
.
status_code
=
200
response
.
_content
=
b
'''{
"model_type": "LLM",
"address": "127.0.0.1:43877",
"accelerators": [
"0",
"1"
],
"model_name": "chatglm3-6b",
"model_lang": [
"en"
],
"model_ability": [
"generate",
"chat"
],
"model_description": "latest chatglm3",
"model_format": "pytorch",
"model_size_in_billions": 7,
"quantization": "none",
"model_hub": "huggingface",
"revision": null,
"context_length": 2048,
"replica": 1
}'''
"model_type": "LLM",
"address": "127.0.0.1:43877",
"accelerators": [
"0",
"1"
],
"model_name": "chatglm3-6b",
"model_lang": [
"en"
],
"model_ability": [
"generate",
"chat"
],
"model_description": "latest chatglm3",
"model_format": "pytorch",
"model_size_in_billions": 7,
"quantization": "none",
"model_hub": "huggingface",
"revision": null,
"context_length": 2048,
"replica": 1
}'''
return
response
elif
model_uid
==
'embedding'
:
response
.
status_code
=
200
response
.
_content
=
b
'''{
"model_type": "embedding",
"address": "127.0.0.1:43877",
"accelerators": [
"0",
"1"
],
"model_name": "bge",
"model_lang": [
"en"
],
"revision": null,
"max_tokens": 512
}'''
"model_type": "embedding",
"address": "127.0.0.1:43877",
"accelerators": [
"0",
"1"
],
"model_name": "bge",
"model_lang": [
"en"
],
"revision": null,
"max_tokens": 512
}'''
return
response
elif
'v1/cluster/auth'
in
url
:
response
.
status_code
=
200
response
.
_content
=
b
'''{
"auth": true
}'''
"auth": true
}'''
return
response
def
_check_cluster_authenticated
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment