Commit ce157abb authored by jyong's avatar jyong

mutil thread

parent 9763fc28
......@@ -3,6 +3,7 @@ import datetime
import json
import logging
import re
import threading
import time
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
......@@ -496,9 +497,7 @@ class IndexingRunner:
# parse document to nodes
documents = splitter.split_documents([text_doc])
split_documents = []
def format_document(flask_app: Flask, document_node: Document) -> List[Document]:
def format_document(flask_app: Flask, document_node: Document, split_documents: List) -> List[Document]:
with flask_app.app_context():
print("process:"+document_node.page_content)
format_documents = []
......@@ -529,12 +528,20 @@ class IndexingRunner:
qa_documents.append(qa_document)
format_documents.extend(qa_documents)
return format_documents
with ThreadPoolExecutor() as executor:
future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
for future in concurrent.futures.as_completed(future_to_doc):
split_documents.extend(future.result())
split_documents.append(format_documents)
split_documents = []
threads = []
for doc in documents:
document_format_thread = threading.Thread(target=format_document, kwargs={
'flask_app': current_app._get_current_object(), 'document_node': doc, 'split_documents': split_documents})
threads.append(document_format_thread)
document_format_thread.start()
for thread in threads:
thread.join()
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
all_documents.extend(split_documents)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment