Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
9bda030e
Commit
9bda030e
authored
Jul 27, 2023
by
jyong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feat/milvus-support' into deploy/dev
parents
76569834
37f5bb3d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
15 deletions
+15
-15
indexing_runner.py
api/core/indexing_runner.py
+15
-15
No files found.
api/core/indexing_runner.py
View file @
9bda030e
...
...
@@ -496,8 +496,9 @@ class IndexingRunner:
# parse document to nodes
documents
=
splitter
.
split_documents
([
text_doc
])
split_documents
=
[]
def
format_document
(
flask_app
:
Flask
,
document_node
:
Document
,
split_documents
:
List
)
->
List
[
Document
]:
def
format_document
(
flask_app
:
Flask
,
document_node
:
Document
)
->
List
[
Document
]:
with
flask_app
.
app_context
():
print
(
"process:"
+
document_node
.
page_content
)
format_documents
=
[]
...
...
@@ -528,20 +529,19 @@ class IndexingRunner:
qa_documents
.
append
(
qa_document
)
format_documents
.
extend
(
qa_documents
)
split_documents
.
append
(
format_documents
)
split_documents
=
[]
threads
=
[]
for
doc
in
documents
:
document_format_thread
=
threading
.
Thread
(
target
=
format_document
,
kwargs
=
{
'flask_app'
:
current_app
.
_get_current_object
(),
'document_node'
:
doc
,
'split_documents'
:
split_documents
})
threads
.
append
(
document_format_thread
)
document_format_thread
.
start
()
for
thread
in
threads
:
thread
.
join
()
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
return
format_documents
# threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=format_document, kwargs={
# 'flask_app': current_app._get_current_object(), 'document_node': doc, 'split_documents': split_documents})
# threads.append(document_format_thread)
# document_format_thread.start()
# for thread in threads:
# thread.join()
with
ThreadPoolExecutor
(
max_workers
=
10
)
as
executor
:
future_to_doc
=
{
executor
.
submit
(
format_document
,
current_app
.
_get_current_object
(),
doc
):
doc
for
doc
in
documents
}
for
future
in
concurrent
.
futures
.
as_completed
(
future_to_doc
):
split_documents
.
extend
(
future
.
result
())
all_documents
.
extend
(
split_documents
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment