Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
0c330fc0
Unverified
Commit
0c330fc0
authored
Feb 01, 2024
by
Yeuoly
Committed by
GitHub
Feb 01, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: add xinference llm context size (#2336)
parent
cfbb7bec
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
3 deletions
+13
-3
llm.py
api/core/model_runtime/model_providers/xinference/llm/llm.py
+5
-0
xinference_helper.py
...l_runtime/model_providers/xinference/xinference_helper.py
+8
-3
No files found.
api/core/model_runtime/model_providers/xinference/llm/llm.py
View file @
0c330fc0
...
...
@@ -75,6 +75,9 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
if
extra_param
.
support_function_call
:
credentials
[
'support_function_call'
]
=
True
if
extra_param
.
context_length
:
credentials
[
'context_length'
]
=
extra_param
.
context_length
except
RuntimeError
as
e
:
raise
CredentialsValidateFailedError
(
f
'Xinference credentials validate failed: {e}'
)
except
KeyError
as
e
:
...
...
@@ -296,6 +299,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
raise
ValueError
(
f
'xinference model ability {extra_args.model_ability} is not supported'
)
support_function_call
=
credentials
.
get
(
'support_function_call'
,
False
)
context_length
=
credentials
.
get
(
'context_length'
,
2048
)
entity
=
AIModelEntity
(
model
=
model
,
...
...
@@ -309,6 +313,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
]
if
support_function_call
else
[],
model_properties
=
{
ModelPropertyKey
.
MODE
:
completion_type
,
ModelPropertyKey
.
CONTEXT_SIZE
:
context_length
},
parameter_rules
=
rules
)
...
...
api/core/model_runtime/model_providers/xinference/xinference_helper.py
View file @
0c330fc0
...
...
@@ -14,15 +14,17 @@ class XinferenceModelExtraParameter(object):
model_handle_type
:
str
model_ability
:
List
[
str
]
max_tokens
:
int
=
512
context_length
:
int
=
2048
support_function_call
:
bool
=
False
def
__init__
(
self
,
model_format
:
str
,
model_handle_type
:
str
,
model_ability
:
List
[
str
],
support_function_call
:
bool
,
max_tokens
:
int
)
->
None
:
support_function_call
:
bool
,
max_tokens
:
int
,
context_length
:
int
)
->
None
:
self
.
model_format
=
model_format
self
.
model_handle_type
=
model_handle_type
self
.
model_ability
=
model_ability
self
.
support_function_call
=
support_function_call
self
.
max_tokens
=
max_tokens
self
.
context_length
=
context_length
cache
=
{}
cache_lock
=
Lock
()
...
...
@@ -57,7 +59,7 @@ class XinferenceHelper:
url
=
path
.
join
(
server_url
,
'v1/models'
,
model_uid
)
# this meth
i
d is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
# this meth
o
d is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
session
=
Session
()
session
.
mount
(
'http://'
,
HTTPAdapter
(
max_retries
=
3
))
session
.
mount
(
'https://'
,
HTTPAdapter
(
max_retries
=
3
))
...
...
@@ -88,11 +90,14 @@ class XinferenceHelper:
support_function_call
=
'tools'
in
model_ability
max_tokens
=
response_json
.
get
(
'max_tokens'
,
512
)
context_length
=
response_json
.
get
(
'context_length'
,
2048
)
return
XinferenceModelExtraParameter
(
model_format
=
model_format
,
model_handle_type
=
model_handle_type
,
model_ability
=
model_ability
,
support_function_call
=
support_function_call
,
max_tokens
=
max_tokens
max_tokens
=
max_tokens
,
context_length
=
context_length
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment