Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
6cf93379
Unverified
Commit
6cf93379
authored
Jan 25, 2024
by
takatost
Committed by
GitHub
Jan 25, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: split chunks return empty strings (#2197)
parent
8639abec
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
8 deletions
+13
-8
indexing_runner.py
api/core/indexing_runner.py
+3
-1
text_embedding.py
...l_providers/azure_openai/text_embedding/text_embedding.py
+3
-3
text_embedding.py
...e/model_providers/cohere/text_embedding/text_embedding.py
+4
-1
text_embedding.py
...e/model_providers/openai/text_embedding/text_embedding.py
+3
-3
No files found.
api/core/indexing_runner.py
View file @
6cf93379
...
@@ -655,7 +655,9 @@ class IndexingRunner:
...
@@ -655,7 +655,9 @@ class IndexingRunner:
else
:
else
:
page_content
=
page_content
page_content
=
page_content
document_node
.
page_content
=
page_content
document_node
.
page_content
=
page_content
split_documents
.
append
(
document_node
)
if
document_node
.
page_content
:
split_documents
.
append
(
document_node
)
all_documents
.
extend
(
split_documents
)
all_documents
.
extend
(
split_documents
)
# processing qa document
# processing qa document
if
document_form
==
'qa_model'
:
if
document_form
==
'qa_model'
:
...
...
api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
View file @
6cf93379
import
base64
import
base64
import
copy
import
copy
import
time
import
time
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
import
tiktoken
import
tiktoken
...
@@ -76,7 +76,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
...
@@ -76,7 +76,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
model
=
model
,
model
=
model
,
client
=
client
,
client
=
client
,
texts
=
[
""
]
,
texts
=
""
,
extra_model_kwargs
=
extra_model_kwargs
extra_model_kwargs
=
extra_model_kwargs
)
)
...
@@ -147,7 +147,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
...
@@ -147,7 +147,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
return
ai_model_entity
.
entity
return
ai_model_entity
.
entity
@
staticmethod
@
staticmethod
def
_embedding_invoke
(
model
:
str
,
client
:
AzureOpenAI
,
texts
:
list
[
str
],
def
_embedding_invoke
(
model
:
str
,
client
:
AzureOpenAI
,
texts
:
Union
[
list
[
str
],
str
],
extra_model_kwargs
:
dict
)
->
Tuple
[
list
[
list
[
float
]],
int
]:
extra_model_kwargs
:
dict
)
->
Tuple
[
list
[
list
[
float
]],
int
]:
response
=
client
.
embeddings
.
create
(
response
=
client
.
embeddings
.
create
(
input
=
texts
,
input
=
texts
,
...
...
api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
View file @
6cf93379
...
@@ -76,7 +76,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
...
@@ -76,7 +76,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
model
=
model
,
model
=
model
,
credentials
=
credentials
,
credentials
=
credentials
,
texts
=
[
""
]
texts
=
[
"
"
]
)
)
used_tokens
+=
embedding_used_tokens
used_tokens
+=
embedding_used_tokens
...
@@ -131,6 +131,9 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
...
@@ -131,6 +131,9 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param text: text to tokenize
:param text: text to tokenize
:return:
:return:
"""
"""
if
not
text
:
return
Tokens
([],
[],
{})
# initialize client
# initialize client
client
=
cohere
.
Client
(
credentials
.
get
(
'api_key'
))
client
=
cohere
.
Client
(
credentials
.
get
(
'api_key'
))
...
...
api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
View file @
6cf93379
import
base64
import
base64
import
time
import
time
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
import
tiktoken
import
tiktoken
...
@@ -89,7 +89,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
...
@@ -89,7 +89,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
embeddings_batch
,
embedding_used_tokens
=
self
.
_embedding_invoke
(
model
=
model
,
model
=
model
,
client
=
client
,
client
=
client
,
texts
=
[
""
]
,
texts
=
""
,
extra_model_kwargs
=
extra_model_kwargs
extra_model_kwargs
=
extra_model_kwargs
)
)
...
@@ -160,7 +160,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
...
@@ -160,7 +160,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
except
Exception
as
ex
:
except
Exception
as
ex
:
raise
CredentialsValidateFailedError
(
str
(
ex
))
raise
CredentialsValidateFailedError
(
str
(
ex
))
def
_embedding_invoke
(
self
,
model
:
str
,
client
:
OpenAI
,
texts
:
list
[
str
],
def
_embedding_invoke
(
self
,
model
:
str
,
client
:
OpenAI
,
texts
:
Union
[
list
[
str
],
str
],
extra_model_kwargs
:
dict
)
->
Tuple
[
list
[
list
[
float
]],
int
]:
extra_model_kwargs
:
dict
)
->
Tuple
[
list
[
list
[
float
]],
int
]:
"""
"""
Invoke embedding model
Invoke embedding model
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment