Commit de5942da authored by jyong's avatar jyong

Merge branch 'feat/milvus-support' into deploy/dev

parents 60f44f4e 1073b964
......@@ -16,7 +16,6 @@ from models.dataset import DocumentSegment
from libs.helper import TimestampField
from services.dataset_service import DatasetService, DocumentService, SegmentService
from tasks.test_task import test_task
from tasks.enable_segment_to_index_task import enable_segment_to_index_task
from tasks.remove_segment_from_index_task import remove_segment_from_index_task
......@@ -285,15 +284,6 @@ class DatasetDocumentSegmentUpdateApi(Resource):
}, 200
class DatasetDocumentTest(Resource):
@setup_required
@login_required
@account_initialization_required
def patch(self):
test_task.delay()
return 200
api.add_resource(DatasetDocumentSegmentListApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
api.add_resource(DatasetDocumentSegmentApi,
......@@ -302,5 +292,4 @@ api.add_resource(DatasetDocumentSegmentAddApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment')
api.add_resource(DatasetDocumentSegmentUpdateApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
api.add_resource(DatasetDocumentTest,
'/datasets/test')
......@@ -516,7 +516,7 @@ class IndexingRunner:
model_name='gpt-3.5-turbo',
max_tokens=2000
)
threads = []
#threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
......@@ -524,7 +524,7 @@ class IndexingRunner:
# document_format_thread.start()
# for thread in threads:
# thread.join()
asyncio.run(self.format_document(llm, documents, split_documents, document_form))
#asyncio.run(self.format_document(llm, documents, split_documents, document_form))
# threads.append(task)
# await asyncio.gather(*threads)
# asyncio.run(main())
......@@ -538,12 +538,12 @@ class IndexingRunner:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
self.format_document(llm, documents, split_documents, document_form)
all_documents.extend(split_documents)
return all_documents
async def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str):
def format_document(self, llm: StreamableOpenAI, documents: List[Document], split_documents: List, document_form: str):
for document_node in documents:
print("process:" + document_node.page_content)
format_documents = []
......@@ -561,7 +561,7 @@ class IndexingRunner:
elif document_form == 'qa_model':
# qa model document
response = await LLMGenerator.generate_qa_document(llm, document_node.page_content)
response = LLMGenerator.generate_qa_document_sync(llm, document_node.page_content)
document_qa_list = self.format_split_text(response)
qa_documents = []
for result in document_qa_list:
......
import threading
from time import sleep, ctime
from typing import List
from celery import shared_task
@shared_task
def test_task():
"""
Clean dataset when dataset deleted.
Usage: test_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
"""
print('---开始---:%s' % ctime())
def smoke(count: List):
for i in range(3):
print("smoke...%d" % i)
count.append("smoke...%d" % i)
sleep(1)
def drunk(count: List):
for i in range(3):
print("drink...%d" % i)
count.append("drink...%d" % i)
sleep(10)
count = []
threads = []
for i in range(3):
t1 = threading.Thread(target=smoke, kwargs={'count': count})
t2 = threading.Thread(target=drunk, kwargs={'count': count})
threads.append(t1)
threads.append(t2)
t1.start()
t2.start()
for thread in threads:
thread.join()
print(str(count))
# sleep(5) #
print('---结束---:%s' % ctime())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment