Commit b38115b4 authored by jyong's avatar jyong

Merge branch 'feat/add-thread-control' into deploy/dev

parents 06ae4b4b 50faca40
...@@ -507,25 +507,18 @@ class IndexingRunner: ...@@ -507,25 +507,18 @@ class IndexingRunner:
model_name='gpt-3.5-turbo', model_name='gpt-3.5-turbo',
max_tokens=2000 max_tokens=2000
) )
# threads = [] for i in range(0, len(documents), 10):
# for doc in documents: threads = []
# document_format_thread = threading.Thread(target=self.format_document, kwargs={ sub_documents = documents[i:i + 10]
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form}) for doc in sub_documents:
# threads.append(document_format_thread) document_format_thread = threading.Thread(target=self.format_document, kwargs={
# document_format_thread.start() 'llm': llm, 'document_node': doc, 'split_documents': split_documents,
# for thread in threads: 'document_form': document_form})
# thread.join() threads.append(document_format_thread)
document_format_thread.start()
def worker(doc): for thread in threads:
return self.format_document(llm=llm, document_node=doc, split_documents=split_documents, thread.join()
document_form=document_form)
with ThreadPoolExecutor(max_workers=10) as executor: # max_workers 控制并发线程数
executor.map(worker, documents)
# with ThreadPoolExecutor() as executor:
# future_to_doc = {executor.submit(self.format_document, llm, doc, document_form): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
all_documents.extend(split_documents) all_documents.extend(split_documents)
return all_documents return all_documents
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment