Commit f9c41df7 authored by jyong's avatar jyong

Merge branch 'feat/milvus-support' into deploy/dev

parents 3f2bc97b 0a0960c1
...@@ -220,6 +220,7 @@ class DatasetIndexingEstimateApi(Resource): ...@@ -220,6 +220,7 @@ class DatasetIndexingEstimateApi(Resource):
parser = reqparse.RequestParser() parser = reqparse.RequestParser()
parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json') parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json') parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args() args = parser.parse_args()
# validate args # validate args
DocumentService.estimate_args_validate(args) DocumentService.estimate_args_validate(args)
...@@ -234,12 +235,12 @@ class DatasetIndexingEstimateApi(Resource): ...@@ -234,12 +235,12 @@ class DatasetIndexingEstimateApi(Resource):
raise NotFound("File not found.") raise NotFound("File not found.")
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
response = indexing_runner.file_indexing_estimate(file_details, args['process_rule']) response = indexing_runner.file_indexing_estimate(file_details, args['process_rule'], args['process_rule'])
elif args['info_list']['data_source_type'] == 'notion_import': elif args['info_list']['data_source_type'] == 'notion_import':
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
response = indexing_runner.notion_indexing_estimate(args['info_list']['notion_info_list'], response = indexing_runner.notion_indexing_estimate(args['info_list']['notion_info_list'],
args['process_rule']) args['process_rule'], args['process_rule'])
else: else:
raise ValueError('Data source type not support') raise ValueError('Data source type not support')
return response, 200 return response, 200
......
...@@ -271,7 +271,7 @@ class DatasetDocumentListApi(Resource): ...@@ -271,7 +271,7 @@ class DatasetDocumentListApi(Resource):
parser.add_argument('process_rule', type=dict, required=False, location='json') parser.add_argument('process_rule', type=dict, required=False, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json') parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json') parser.add_argument('original_document_id', type=str, required=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=True, nullable=False, location='json') parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args() args = parser.parse_args()
if not dataset.indexing_technique and not args['indexing_technique']: if not dataset.indexing_technique and not args['indexing_technique']:
...@@ -316,7 +316,7 @@ class DatasetInitApi(Resource): ...@@ -316,7 +316,7 @@ class DatasetInitApi(Resource):
nullable=False, location='json') nullable=False, location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json') parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json') parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=True, nullable=False, location='json') parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args() args = parser.parse_args()
# validate args # validate args
......
...@@ -229,7 +229,7 @@ class IndexingRunner: ...@@ -229,7 +229,7 @@ class IndexingRunner:
dataset_document.stopped_at = datetime.datetime.utcnow() dataset_document.stopped_at = datetime.datetime.utcnow()
db.session.commit() db.session.commit()
def file_indexing_estimate(self, file_details: List[UploadFile], tmp_processing_rule: dict) -> dict: def file_indexing_estimate(self, file_details: List[UploadFile], tmp_processing_rule: dict, doc_from: str = None) -> dict:
""" """
Estimate the indexing for the document. Estimate the indexing for the document.
""" """
...@@ -261,7 +261,19 @@ class IndexingRunner: ...@@ -261,7 +261,19 @@ class IndexingRunner:
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name, tokens += TokenCalculator.get_num_tokens(self.embedding_model_name,
self.filter_string(document.page_content)) self.filter_string(document.page_content))
if doc_from and doc_from == 'qa_model':
if len(preview_texts) > 0:
# qa model document
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0])
document_qa_list = self.format_split_text(response)
return {
"total_segments": total_segments,
"tokens": total_segments * 2000,
"total_price": '{:f}'.format(TokenCalculator.get_token_price('gpt-3.5-turbo', tokens, 'completion')),
"currency": TokenCalculator.get_currency(self.embedding_model_name),
"qa_preview": document_qa_list,
"preview": preview_texts
}
return { return {
"total_segments": total_segments, "total_segments": total_segments,
"tokens": tokens, "tokens": tokens,
...@@ -270,7 +282,7 @@ class IndexingRunner: ...@@ -270,7 +282,7 @@ class IndexingRunner:
"preview": preview_texts "preview": preview_texts
} }
def notion_indexing_estimate(self, notion_info_list: list, tmp_processing_rule: dict) -> dict: def notion_indexing_estimate(self, notion_info_list: list, tmp_processing_rule: dict, doc_from: str = None) -> dict:
""" """
Estimate the indexing for the document. Estimate the indexing for the document.
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment