Commit ce157abb authored by jyong's avatar jyong

mutil thread

parent 9763fc28
...@@ -3,6 +3,7 @@ import datetime ...@@ -3,6 +3,7 @@ import datetime
import json import json
import logging import logging
import re import re
import threading
import time import time
import uuid import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
...@@ -496,9 +497,7 @@ class IndexingRunner: ...@@ -496,9 +497,7 @@ class IndexingRunner:
# parse document to nodes # parse document to nodes
documents = splitter.split_documents([text_doc]) documents = splitter.split_documents([text_doc])
split_documents = [] def format_document(flask_app: Flask, document_node: Document, split_documents: List) -> List[Document]:
def format_document(flask_app: Flask, document_node: Document) -> List[Document]:
with flask_app.app_context(): with flask_app.app_context():
print("process:"+document_node.page_content) print("process:"+document_node.page_content)
format_documents = [] format_documents = []
...@@ -529,12 +528,20 @@ class IndexingRunner: ...@@ -529,12 +528,20 @@ class IndexingRunner:
qa_documents.append(qa_document) qa_documents.append(qa_document)
format_documents.extend(qa_documents) format_documents.extend(qa_documents)
return format_documents split_documents.append(format_documents)
split_documents = []
with ThreadPoolExecutor() as executor: threads = []
future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents} for doc in documents:
for future in concurrent.futures.as_completed(future_to_doc): document_format_thread = threading.Thread(target=format_document, kwargs={
split_documents.extend(future.result()) 'flask_app': current_app._get_current_object(), 'document_node': doc, 'split_documents': split_documents})
threads.append(document_format_thread)
document_format_thread.start()
for thread in threads:
thread.join()
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
all_documents.extend(split_documents) all_documents.extend(split_documents)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment