Commit de5942da authored by jyong's avatar jyong

Merge branch 'feat/milvus-support' into deploy/dev

parents 60f44f4e 1073b964
...@@ -16,7 +16,6 @@ from models.dataset import DocumentSegment ...@@ -16,7 +16,6 @@ from models.dataset import DocumentSegment
from libs.helper import TimestampField from libs.helper import TimestampField
from services.dataset_service import DatasetService, DocumentService, SegmentService from services.dataset_service import DatasetService, DocumentService, SegmentService
from tasks.test_task import test_task
from tasks.enable_segment_to_index_task import enable_segment_to_index_task from tasks.enable_segment_to_index_task import enable_segment_to_index_task
from tasks.remove_segment_from_index_task import remove_segment_from_index_task from tasks.remove_segment_from_index_task import remove_segment_from_index_task
...@@ -285,15 +284,6 @@ class DatasetDocumentSegmentUpdateApi(Resource): ...@@ -285,15 +284,6 @@ class DatasetDocumentSegmentUpdateApi(Resource):
}, 200 }, 200
class DatasetDocumentTest(Resource):
@setup_required
@login_required
@account_initialization_required
def patch(self):
test_task.delay()
return 200
api.add_resource(DatasetDocumentSegmentListApi, api.add_resource(DatasetDocumentSegmentListApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments') '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
api.add_resource(DatasetDocumentSegmentApi, api.add_resource(DatasetDocumentSegmentApi,
...@@ -302,5 +292,4 @@ api.add_resource(DatasetDocumentSegmentAddApi, ...@@ -302,5 +292,4 @@ api.add_resource(DatasetDocumentSegmentAddApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment') '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment')
api.add_resource(DatasetDocumentSegmentUpdateApi, api.add_resource(DatasetDocumentSegmentUpdateApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>') '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
api.add_resource(DatasetDocumentTest,
'/datasets/test')
...@@ -516,7 +516,7 @@ class IndexingRunner: ...@@ -516,7 +516,7 @@ class IndexingRunner:
model_name='gpt-3.5-turbo', model_name='gpt-3.5-turbo',
max_tokens=2000 max_tokens=2000
) )
threads = [] #threads = []
# for doc in documents: # for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={ # document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form}) # 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
...@@ -524,7 +524,7 @@ class IndexingRunner: ...@@ -524,7 +524,7 @@ class IndexingRunner:
# document_format_thread.start() # document_format_thread.start()
# for thread in threads: # for thread in threads:
# thread.join() # thread.join()
asyncio.run(self.format_document(llm, documents, split_documents, document_form)) #asyncio.run(self.format_document(llm, documents, split_documents, document_form))
# threads.append(task) # threads.append(task)
# await asyncio.gather(*threads) # await asyncio.gather(*threads)
# asyncio.run(main()) # asyncio.run(main())
...@@ -538,12 +538,12 @@ class IndexingRunner: ...@@ -538,12 +538,12 @@ class IndexingRunner:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents} # future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc): # for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result()) # split_documents.extend(future.result())
self.format_document(llm, documents, split_documents, document_form)
all_documents.extend(split_documents) all_documents.extend(split_documents)
return all_documents return all_documents
async def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str): def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str):
for document_node in documents: for document_node in documents:
print("process:" + document_node.page_content) print("process:" + document_node.page_content)
format_documents = [] format_documents = []
...@@ -561,7 +561,7 @@ class IndexingRunner: ...@@ -561,7 +561,7 @@ class IndexingRunner:
elif document_form == 'qa_model': elif document_form == 'qa_model':
# qa model document # qa model document
response = await LLMGenerator.generate_qa_document(llm, document_node.page_content) response = LLMGenerator.generate_qa_document_sync(llm, document_node.page_content)
document_qa_list = self.format_split_text(response) document_qa_list = self.format_split_text(response)
qa_documents = [] qa_documents = []
for result in document_qa_list: for result in document_qa_list:
......
import threading
from time import sleep, ctime
from typing import List
from celery import shared_task
@shared_task
def test_task():
"""
Clean dataset when dataset deleted.
Usage: test_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
"""
print('---开始---:%s' % ctime())
def smoke(count: List):
for i in range(3):
print("smoke...%d" % i)
count.append("smoke...%d" % i)
sleep(1)
def drunk(count: List):
for i in range(3):
print("drink...%d" % i)
count.append("drink...%d" % i)
sleep(10)
count = []
threads = []
for i in range(3):
t1 = threading.Thread(target=smoke, kwargs={'count': count})
t2 = threading.Thread(target=drunk, kwargs={'count': count})
threads.append(t1)
threads.append(t2)
t1.start()
t2.start()
for thread in threads:
thread.join()
print(str(count))
# sleep(5) #
print('---结束---:%s' % ctime())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment