Merge branch 'feat/milvus-support' into deploy/dev

de5942da · jyong · 60f44f4e · 1073b964 · de5942da · de5942da
Commit de5942da authored Jul 28, 2023 by jyong
Showing with 6 additions and 60 deletions

datasets_segments.py api/controllers/console/datasets/datasets_segments.py +1 -12

indexing_runner.py api/core/indexing_runner.py +5 -5

test_task.py api/tasks/test_task.py +0 -43

No files found.
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -16,7 +16,6 @@ from models.dataset import DocumentSegment

 from libs.helper import TimestampField
 from services.dataset_service import DatasetService, DocumentService, SegmentService
-from tasks.test_task import test_task
 from tasks.enable_segment_to_index_task import enable_segment_to_index_task
 from tasks.remove_segment_from_index_task import remove_segment_from_index_task

@@ -285,15 +284,6 @@ class DatasetDocumentSegmentUpdateApi(Resource):
        }, 200


-class DatasetDocumentTest(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def patch(self):
-        test_task.delay()
-        return 200
-
-
 api.add_resource(DatasetDocumentSegmentListApi,
                 '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
 api.add_resource(DatasetDocumentSegmentApi,
@@ -302,5 +292,4 @@ api.add_resource(DatasetDocumentSegmentAddApi,
                 '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment')
 api.add_resource(DatasetDocumentSegmentUpdateApi,
                 '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
-api.add_resource(DatasetDocumentTest,
-                 '/datasets/test')
+
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -516,7 +516,7 @@ class IndexingRunner:
                model_name='gpt-3.5-turbo',
                max_tokens=2000
            )
-            threads = []
+            #threads = []
            # for doc in documents:
            #     document_format_thread = threading.Thread(target=self.format_document, kwargs={
            #         'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
@@ -524,7 +524,7 @@ class IndexingRunner:
            #     document_format_thread.start()
            # for thread in threads:
            #     thread.join()
-            asyncio.run(self.format_document(llm, documents, split_documents, document_form))
+            #asyncio.run(self.format_document(llm, documents, split_documents, document_form))
            #     threads.append(task)
            # await asyncio.gather(*threads)
            # asyncio.run(main())
@@ -538,12 +538,12 @@ class IndexingRunner:
            #     future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
            #     for future in concurrent.futures.as_completed(future_to_doc):
            #         split_documents.extend(future.result())
-
+            self.format_document(llm, documents, split_documents, document_form)
            all_documents.extend(split_documents)

        return all_documents

-    async def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str):
+    def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str):
        for document_node in documents:
            print("process:" + document_node.page_content)
            format_documents = []
@@ -561,7 +561,7 @@ class IndexingRunner:
            elif document_form == 'qa_model':

                # qa model document
-                response = await LLMGenerator.generate_qa_document(llm, document_node.page_content)
+                response = LLMGenerator.generate_qa_document_sync(llm, document_node.page_content)
                document_qa_list = self.format_split_text(response)
                qa_documents = []
                for result in document_qa_list:

--- a/api/tasks/test_task.py
+++ b/api/tasks/test_task.py
-import threading
-from time import sleep, ctime
-from typing import List
-
-from celery import shared_task
-
-
-
-
-@shared_task
-def test_task():
-    """
-    Clean dataset when dataset deleted.
-
-    Usage: test_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
-    """
-    print('---开始---:%s' % ctime())
-
-    def smoke(count: List):
-        for i in range(3):
-            print("smoke...%d" % i)
-            count.append("smoke...%d" % i)
-            sleep(1)
-
-    def drunk(count: List):
-        for i in range(3):
-            print("drink...%d" % i)
-            count.append("drink...%d" % i)
-            sleep(10)
-    count = []
-    threads = []
-    for i in range(3):
-        t1 = threading.Thread(target=smoke, kwargs={'count': count})
-        t2 = threading.Thread(target=drunk, kwargs={'count': count})
-        threads.append(t1)
-        threads.append(t2)
-        t1.start()
-        t2.start()
-    for thread in threads:
-        thread.join()
-    print(str(count))
-    # sleep(5) #
-    print('---结束---:%s' % ctime())
\ No newline at end of file