Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
ce157abb
Commit
ce157abb
authored
Jul 26, 2023
by
jyong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mutil thread
parent
9763fc28
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
9 deletions
+16
-9
indexing_runner.py
api/core/indexing_runner.py
+16
-9
No files found.
api/core/indexing_runner.py
View file @
ce157abb
...
...
@@ -3,6 +3,7 @@ import datetime
import
json
import
logging
import
re
import
threading
import
time
import
uuid
from
concurrent.futures
import
ThreadPoolExecutor
,
as_completed
...
...
@@ -496,9 +497,7 @@ class IndexingRunner:
# parse document to nodes
documents
=
splitter
.
split_documents
([
text_doc
])
split_documents
=
[]
def
format_document
(
flask_app
:
Flask
,
document_node
:
Document
)
->
List
[
Document
]:
def
format_document
(
flask_app
:
Flask
,
document_node
:
Document
,
split_documents
:
List
)
->
List
[
Document
]:
with
flask_app
.
app_context
():
print
(
"process:"
+
document_node
.
page_content
)
format_documents
=
[]
...
...
@@ -529,12 +528,20 @@ class IndexingRunner:
qa_documents
.
append
(
qa_document
)
format_documents
.
extend
(
qa_documents
)
return
format_documents
with
ThreadPoolExecutor
()
as
executor
:
future_to_doc
=
{
executor
.
submit
(
format_document
,
current_app
.
_get_current_object
(),
doc
):
doc
for
doc
in
documents
}
for
future
in
concurrent
.
futures
.
as_completed
(
future_to_doc
):
split_documents
.
extend
(
future
.
result
())
split_documents
.
append
(
format_documents
)
split_documents
=
[]
threads
=
[]
for
doc
in
documents
:
document_format_thread
=
threading
.
Thread
(
target
=
format_document
,
kwargs
=
{
'flask_app'
:
current_app
.
_get_current_object
(),
'document_node'
:
doc
,
'split_documents'
:
split_documents
})
threads
.
append
(
document_format_thread
)
document_format_thread
.
start
()
for
thread
in
threads
:
thread
.
join
()
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
all_documents
.
extend
(
split_documents
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment