Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
ebd11e74
Unverified
Commit
ebd11e74
authored
Jan 10, 2024
by
Yeuoly
Committed by
GitHub
Jan 10, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: baichuan max chunks (#1990)
parent
94626487
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
85 additions
and
11 deletions
+85
-11
text_embedding.py
...model_providers/baichuan/text_embedding/text_embedding.py
+48
-11
test_text_embedding.py
...ation_tests/model_runtime/baichuan/test_text_embedding.py
+37
-0
No files found.
api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
View file @
ebd11e74
from
typing
import
Optional
from
typing
import
Optional
,
Tuple
from
core.model_runtime.entities.model_entities
import
PriceType
from
core.model_runtime.entities.text_embedding_entities
import
TextEmbeddingResult
,
EmbeddingUsage
...
...
@@ -38,6 +38,50 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
raise
ValueError
(
'Invalid model name'
)
if
not
api_key
:
raise
CredentialsValidateFailedError
(
'api_key is required'
)
# split into chunks of batch size 16
chunks
=
[]
for
i
in
range
(
0
,
len
(
texts
),
16
):
chunks
.
append
(
texts
[
i
:
i
+
16
])
embeddings
=
[]
token_usage
=
0
for
chunk
in
chunks
:
# embeding chunk
chunk_embeddings
,
chunk_usage
=
self
.
embedding
(
model
=
model
,
api_key
=
api_key
,
texts
=
chunk
,
user
=
user
)
embeddings
.
extend
(
chunk_embeddings
)
token_usage
+=
chunk_usage
result
=
TextEmbeddingResult
(
model
=
model
,
embeddings
=
embeddings
,
usage
=
self
.
_calc_response_usage
(
model
=
model
,
credentials
=
credentials
,
tokens
=
token_usage
)
)
return
result
def
embedding
(
self
,
model
:
str
,
api_key
,
texts
:
list
[
str
],
user
:
Optional
[
str
]
=
None
)
\
->
Tuple
[
list
[
list
[
float
]],
int
]:
"""
Embed given texts
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:return: embeddings result
"""
url
=
self
.
api_base
headers
=
{
'Authorization'
:
'Bearer '
+
api_key
,
...
...
@@ -85,17 +129,10 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
except
Exception
as
e
:
raise
InternalServerError
(
f
"Failed to convert response to json: {e} with text: {response.text}"
)
usage
=
self
.
_calc_response_usage
(
model
=
model
,
credentials
=
credentials
,
tokens
=
usage
[
'total_tokens'
])
return
[
data
[
'embedding'
]
for
data
in
embeddings
],
usage
[
'total_tokens'
]
result
=
TextEmbeddingResult
(
model
=
model
,
embeddings
=
[[
float
(
data
)
for
data
in
x
[
'embedding'
]
]
for
x
in
embeddings
],
usage
=
usage
)
return
result
def
get_num_tokens
(
self
,
model
:
str
,
credentials
:
dict
,
texts
:
list
[
str
])
->
int
:
"""
...
...
api/tests/integration_tests/model_runtime/baichuan/test_text_embedding.py
View file @
ebd11e74
...
...
@@ -59,3 +59,40 @@ def test_get_num_tokens():
)
assert
num_tokens
==
2
def
test_max_chunks
():
model
=
BaichuanTextEmbeddingModel
()
result
=
model
.
invoke
(
model
=
'baichuan-text-embedding'
,
credentials
=
{
'api_key'
:
os
.
environ
.
get
(
'BAICHUAN_API_KEY'
),
},
texts
=
[
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
"hello"
,
"world"
,
]
)
assert
isinstance
(
result
,
TextEmbeddingResult
)
assert
len
(
result
.
embeddings
)
==
22
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment