Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
b38115b4
Commit
b38115b4
authored
Jul 29, 2023
by
jyong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feat/add-thread-control' into deploy/dev
parents
06ae4b4b
50faca40
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
19 deletions
+12
-19
indexing_runner.py
api/core/indexing_runner.py
+12
-19
No files found.
api/core/indexing_runner.py
View file @
b38115b4
...
...
@@ -507,25 +507,18 @@ class IndexingRunner:
model_name
=
'gpt-3.5-turbo'
,
max_tokens
=
2000
)
# threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
# threads.append(document_format_thread)
# document_format_thread.start()
# for thread in threads:
# thread.join()
def
worker
(
doc
):
return
self
.
format_document
(
llm
=
llm
,
document_node
=
doc
,
split_documents
=
split_documents
,
document_form
=
document_form
)
with
ThreadPoolExecutor
(
max_workers
=
10
)
as
executor
:
# max_workers 控制并发线程数
executor
.
map
(
worker
,
documents
)
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(self.format_document, llm, doc, document_form): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
for
i
in
range
(
0
,
len
(
documents
),
10
):
threads
=
[]
sub_documents
=
documents
[
i
:
i
+
10
]
for
doc
in
sub_documents
:
document_format_thread
=
threading
.
Thread
(
target
=
self
.
format_document
,
kwargs
=
{
'llm'
:
llm
,
'document_node'
:
doc
,
'split_documents'
:
split_documents
,
'document_form'
:
document_form
})
threads
.
append
(
document_format_thread
)
document_format_thread
.
start
()
for
thread
in
threads
:
thread
.
join
()
all_documents
.
extend
(
split_documents
)
return
all_documents
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment