Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
06ae4b4b
Commit
06ae4b4b
authored
Jul 29, 2023
by
jyong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feat/add-thread-control' into deploy/dev
parents
a15348ef
7776a636
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
6 deletions
+13
-6
indexing_runner.py
api/core/indexing_runner.py
+13
-6
No files found.
api/core/indexing_runner.py
View file @
06ae4b4b
...
@@ -515,15 +515,22 @@ class IndexingRunner:
...
@@ -515,15 +515,22 @@ class IndexingRunner:
# document_format_thread.start()
# document_format_thread.start()
# for thread in threads:
# for thread in threads:
# thread.join()
# thread.join()
with
ThreadPoolExecutor
()
as
executor
:
future_to_doc
=
{
executor
.
submit
(
self
.
format_document
,
llm
,
doc
,
document_form
):
doc
for
doc
in
documents
}
def
worker
(
doc
):
for
future
in
concurrent
.
futures
.
as_completed
(
future_to_doc
):
return
self
.
format_document
(
llm
=
llm
,
document_node
=
doc
,
split_documents
=
split_documents
,
split_documents
.
extend
(
future
.
result
())
document_form
=
document_form
)
with
ThreadPoolExecutor
(
max_workers
=
10
)
as
executor
:
# max_workers 控制并发线程数
executor
.
map
(
worker
,
documents
)
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(self.format_document, llm, doc, document_form): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
all_documents
.
extend
(
split_documents
)
all_documents
.
extend
(
split_documents
)
return
all_documents
return
all_documents
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
document_node
,
document_form
:
str
):
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
document_node
,
split_documents
,
document_form
:
str
):
print
(
document_node
.
page_content
)
print
(
document_node
.
page_content
)
format_documents
=
[]
format_documents
=
[]
if
document_node
.
page_content
is
None
or
not
document_node
.
page_content
.
strip
():
if
document_node
.
page_content
is
None
or
not
document_node
.
page_content
.
strip
():
...
@@ -554,7 +561,7 @@ class IndexingRunner:
...
@@ -554,7 +561,7 @@ class IndexingRunner:
format_documents
.
extend
(
qa_documents
)
format_documents
.
extend
(
qa_documents
)
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
error
(
str
(
e
))
logging
.
error
(
str
(
e
))
return
format_documents
split_documents
.
extend
(
format_documents
)
def
_split_to_documents_for_estimate
(
self
,
text_docs
:
List
[
Document
],
splitter
:
TextSplitter
,
def
_split_to_documents_for_estimate
(
self
,
text_docs
:
List
[
Document
],
splitter
:
TextSplitter
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment