Commit 148aa84f authored by jyong's avatar jyong

add doc-form at detail

parent c60c80c3
......@@ -220,6 +220,7 @@ class DatasetIndexingEstimateApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args()
# validate args
DocumentService.estimate_args_validate(args)
......@@ -234,12 +235,12 @@ class DatasetIndexingEstimateApi(Resource):
raise NotFound("File not found.")
indexing_runner = IndexingRunner()
response = indexing_runner.file_indexing_estimate(file_details, args['process_rule'])
response = indexing_runner.file_indexing_estimate(file_details, args['process_rule'], args['process_rule'])
elif args['info_list']['data_source_type'] == 'notion_import':
indexing_runner = IndexingRunner()
response = indexing_runner.notion_indexing_estimate(args['info_list']['notion_info_list'],
args['process_rule'])
args['process_rule'], args['process_rule'])
else:
raise ValueError('Data source type not support')
return response, 200
......
......@@ -271,7 +271,7 @@ class DatasetDocumentListApi(Resource):
parser.add_argument('process_rule', type=dict, required=False, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=True, nullable=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args()
if not dataset.indexing_technique and not args['indexing_technique']:
......@@ -316,7 +316,7 @@ class DatasetInitApi(Resource):
nullable=False, location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=True, nullable=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
args = parser.parse_args()
# validate args
......
......@@ -229,7 +229,7 @@ class IndexingRunner:
dataset_document.stopped_at = datetime.datetime.utcnow()
db.session.commit()
def file_indexing_estimate(self, file_details: List[UploadFile], tmp_processing_rule: dict) -> dict:
def file_indexing_estimate(self, file_details: List[UploadFile], tmp_processing_rule: dict, doc_from: str = None) -> dict:
"""
Estimate the indexing for the document.
"""
......@@ -261,7 +261,19 @@ class IndexingRunner:
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name,
self.filter_string(document.page_content))
if doc_from and doc_from == 'qa_model':
if len(preview_texts) > 0:
# qa model document
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0])
document_qa_list = self.format_split_text(response)
return {
"total_segments": total_segments,
"tokens": total_segments * 2000,
"total_price": '{:f}'.format(TokenCalculator.get_token_price('gpt-3.5-turbo', tokens, 'completion')),
"currency": TokenCalculator.get_currency(self.embedding_model_name),
"qa_preview": document_qa_list,
"preview": preview_texts
}
return {
"total_segments": total_segments,
"tokens": tokens,
......@@ -270,7 +282,7 @@ class IndexingRunner:
"preview": preview_texts
}
def notion_indexing_estimate(self, notion_info_list: list, tmp_processing_rule: dict) -> dict:
def notion_indexing_estimate(self, notion_info_list: list, tmp_processing_rule: dict, doc_from: str = None) -> dict:
"""
Estimate the indexing for the document.
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment