Commit b38115b4 authored by jyong's avatar jyong

Merge branch 'feat/add-thread-control' into deploy/dev

parents 06ae4b4b 50faca40
......@@ -507,25 +507,18 @@ class IndexingRunner:
model_name='gpt-3.5-turbo',
max_tokens=2000
)
# threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
# threads.append(document_format_thread)
# document_format_thread.start()
# for thread in threads:
# thread.join()
def worker(doc):
return self.format_document(llm=llm, document_node=doc, split_documents=split_documents,
document_form=document_form)
with ThreadPoolExecutor(max_workers=10) as executor: # max_workers 控制并发线程数
executor.map(worker, documents)
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(self.format_document, llm, doc, document_form): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
for i in range(0, len(documents), 10):
threads = []
sub_documents = documents[i:i + 10]
for doc in sub_documents:
document_format_thread = threading.Thread(target=self.format_document, kwargs={
'llm': llm, 'document_node': doc, 'split_documents': split_documents,
'document_form': document_form})
threads.append(document_format_thread)
document_format_thread.start()
for thread in threads:
thread.join()
all_documents.extend(split_documents)
return all_documents
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment