Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
0e5ce218
Commit
0e5ce218
authored
Jul 28, 2023
by
jyong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mutil thread
parent
1073b964
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
37 deletions
+16
-37
indexing_runner.py
api/core/indexing_runner.py
+16
-37
No files found.
api/core/indexing_runner.py
View file @
0e5ce218
...
@@ -516,28 +516,6 @@ class IndexingRunner:
...
@@ -516,28 +516,6 @@ class IndexingRunner:
model_name
=
'gpt-3.5-turbo'
,
model_name
=
'gpt-3.5-turbo'
,
max_tokens
=
2000
max_tokens
=
2000
)
)
#threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
# threads.append(document_format_thread)
# document_format_thread.start()
# for thread in threads:
# thread.join()
#asyncio.run(self.format_document(llm, documents, split_documents, document_form))
# threads.append(task)
# await asyncio.gather(*threads)
# asyncio.run(main())
#await asyncio.gather(say('Hello', 2), say('World', 1))
# with Pool(5) as pool:
# for doc in documents:
# result = pool.apply_async(format_document, kwds={'flask_app': current_app._get_current_object(), 'document_node': doc, 'split_documents': split_documents})
# if result.ready():
# split_documents.extend(result.get())
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
self
.
format_document
(
llm
,
documents
,
split_documents
,
document_form
)
self
.
format_document
(
llm
,
documents
,
split_documents
,
document_form
)
all_documents
.
extend
(
split_documents
)
all_documents
.
extend
(
split_documents
)
...
@@ -545,7 +523,6 @@ class IndexingRunner:
...
@@ -545,7 +523,6 @@ class IndexingRunner:
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
documents
:
List
[
Document
],
split_documents
:
List
,
document_form
:
str
):
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
documents
:
List
[
Document
],
split_documents
:
List
,
document_form
:
str
):
for
document_node
in
documents
:
for
document_node
in
documents
:
print
(
"process:"
+
document_node
.
page_content
)
format_documents
=
[]
format_documents
=
[]
if
document_node
.
page_content
is
None
or
not
document_node
.
page_content
.
strip
():
if
document_node
.
page_content
is
None
or
not
document_node
.
page_content
.
strip
():
return
format_documents
return
format_documents
...
@@ -559,20 +536,22 @@ class IndexingRunner:
...
@@ -559,20 +536,22 @@ class IndexingRunner:
format_documents
.
append
(
document_node
)
format_documents
.
append
(
document_node
)
elif
document_form
==
'qa_model'
:
elif
document_form
==
'qa_model'
:
try
:
# qa model document
# qa model document
response
=
LLMGenerator
.
generate_qa_document_sync
(
llm
,
document_node
.
page_content
)
response
=
LLMGenerator
.
generate_qa_document_sync
(
llm
,
document_node
.
page_content
)
document_qa_list
=
self
.
format_split_text
(
response
)
document_qa_list
=
self
.
format_split_text
(
response
)
qa_documents
=
[]
qa_documents
=
[]
for
result
in
document_qa_list
:
for
result
in
document_qa_list
:
qa_document
=
Document
(
page_content
=
result
[
'question'
],
metadata
=
document_node
.
metadata
.
copy
())
qa_document
=
Document
(
page_content
=
result
[
'question'
],
metadata
=
document_node
.
metadata
.
copy
())
doc_id
=
str
(
uuid
.
uuid4
())
doc_id
=
str
(
uuid
.
uuid4
())
hash
=
helper
.
generate_text_hash
(
result
[
'question'
])
hash
=
helper
.
generate_text_hash
(
result
[
'question'
])
qa_document
.
metadata
[
'answer'
]
=
result
[
'answer'
]
qa_document
.
metadata
[
'answer'
]
=
result
[
'answer'
]
qa_document
.
metadata
[
'doc_id'
]
=
doc_id
qa_document
.
metadata
[
'doc_id'
]
=
doc_id
qa_document
.
metadata
[
'doc_hash'
]
=
hash
qa_document
.
metadata
[
'doc_hash'
]
=
hash
qa_documents
.
append
(
qa_document
)
qa_documents
.
append
(
qa_document
)
format_documents
.
extend
(
qa_documents
)
format_documents
.
extend
(
qa_documents
)
except
Exception
:
continue
split_documents
.
extend
(
format_documents
)
split_documents
.
extend
(
format_documents
)
def
_split_to_documents_for_estimate
(
self
,
text_docs
:
List
[
Document
],
splitter
:
TextSplitter
,
def
_split_to_documents_for_estimate
(
self
,
text_docs
:
List
[
Document
],
splitter
:
TextSplitter
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment