Merge branch 'feat/add-thread-control' into deploy/dev

b38115b4 · jyong · 06ae4b4b · 50faca40 · b38115b4
Commit b38115b4 authored Jul 29, 2023 by jyong
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 19 deletions

indexing_runner.py api/core/indexing_runner.py +12 -19

No files found.
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -507,25 +507,18 @@ class IndexingRunner:
                model_name='gpt-3.5-turbo',
                max_tokens=2000
            )
-            # threads = []
+            for i in range(0, len(documents), 10):
-            # for doc in documents:
+                threads = []
-            #     document_format_thread = threading.Thread(target=self.format_document, kwargs={
+                sub_documents = documents[i:i + 10]
-            #         'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
+                for doc in sub_documents:
-            #     threads.append(document_format_thread)
+                    document_format_thread = threading.Thread(target=self.format_document, kwargs={
-            #     document_format_thread.start()
+                        'llm': llm, 'document_node': doc, 'split_documents': split_documents,
-            # for thread in threads:
+                        'document_form': document_form})
-            #     thread.join()
+                    threads.append(document_format_thread)
+                    document_format_thread.start()
-            def worker(doc):
+                for thread in threads:
-                return self.format_document(llm=llm, document_node=doc, split_documents=split_documents,
+                    thread.join()
-                                            document_form=document_form)
-            with ThreadPoolExecutor(max_workers=10) as executor:  # max_workers 控制并发线程数
-                executor.map(worker, documents)
-            # with ThreadPoolExecutor() as executor:
-            #     future_to_doc = {executor.submit(self.format_document, llm, doc, document_form): doc for doc in documents}
-            #     for future in concurrent.futures.as_completed(future_to_doc):
-            #         split_documents.extend(future.result())
            all_documents.extend(split_documents)
        return all_documents