Commit 6cc4f47d authored by Jyong's avatar Jyong

add notion sync

parent a5a61197
...@@ -101,6 +101,7 @@ class DocumentResource(Resource): ...@@ -101,6 +101,7 @@ class DocumentResource(Resource):
return documents return documents
class GetProcessRuleApi(Resource): class GetProcessRuleApi(Resource):
@setup_required @setup_required
@login_required @login_required
...@@ -364,16 +365,11 @@ class DocumentIndexingStatusApi(DocumentResource): ...@@ -364,16 +365,11 @@ class DocumentIndexingStatusApi(DocumentResource):
documents = self.get_batch_documents(dataset_id, batch) documents = self.get_batch_documents(dataset_id, batch)
documents_status = [] documents_status = []
for document in documents: for document in documents:
completed_segments = DocumentSegment.query \ completed_segments = DocumentSegment.query.filter(DocumentSegment.completed_at.isnot(None),
.filter(DocumentSegment.completed_at.isnot(None),
DocumentSegment.document_id == str(document.id), DocumentSegment.document_id == str(document.id),
DocumentSegment.status != 're_segment') \ DocumentSegment.status != 're_segment').count()
) total_segments = DocumentSegment.query.filter(DocumentSegment.document_id == str(document.id),
.count() DocumentSegment.status != 're_segment').count()
total_segments = DocumentSegment.query \
.filter_by(document_id=str(document.id)) \
.count()
document.completed_segments = completed_segments document.completed_segments = completed_segments
document.total_segments = total_segments document.total_segments = total_segments
documents_status.append(marshal(document, self.document_status_fields)) documents_status.append(marshal(document, self.document_status_fields))
...@@ -447,7 +443,7 @@ class DocumentDetailApi(DocumentResource): ...@@ -447,7 +443,7 @@ class DocumentDetailApi(DocumentResource):
'created_at': document.created_at.timestamp(), 'created_at': document.created_at.timestamp(),
'tokens': document.tokens, 'tokens': document.tokens,
'indexing_status': document.indexing_status, 'indexing_status': document.indexing_status,
'completed_at': int(document.completed_at.timestamp())if document.completed_at else None, 'completed_at': int(document.completed_at.timestamp()) if document.completed_at else None,
'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None, 'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
'indexing_latency': document.indexing_latency, 'indexing_latency': document.indexing_latency,
'error': document.error, 'error': document.error,
......
...@@ -382,8 +382,11 @@ class DocumentService: ...@@ -382,8 +382,11 @@ class DocumentService:
if dataset.indexing_technique == 'high_quality': if dataset.indexing_technique == 'high_quality':
IndexBuilder.get_default_service_context(dataset.tenant_id) IndexBuilder.get_default_service_context(dataset.tenant_id)
documents = []
if 'original_document_id' in document_data and document_data["original_document_id"]: if 'original_document_id' in document_data and document_data["original_document_id"]:
document = DocumentService.update_document_with_dataset_id(dataset, document_data, account) document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
documents.append(document)
else:
# save process rule # save process rule
if not dataset_process_rule: if not dataset_process_rule:
process_rule = document_data["process_rule"] process_rule = document_data["process_rule"]
...@@ -406,7 +409,6 @@ class DocumentService: ...@@ -406,7 +409,6 @@ class DocumentService:
position = DocumentService.get_documents_position(dataset.id) position = DocumentService.get_documents_position(dataset.id)
batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999)) batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999))
document_ids = [] document_ids = []
documents = []
if document_data["data_source"]["type"] == "upload_file": if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info"] upload_file_list = document_data["data_source"]["info"]
for upload_file in upload_file_list: for upload_file in upload_file_list:
...@@ -486,8 +488,8 @@ class DocumentService: ...@@ -486,8 +488,8 @@ class DocumentService:
) )
return document return document
@staticmethod @staticmethod
def update_document_with_dataset_id(dataset: Dataset, document_data: dict, def update_document_with_dataset_id(dataset: Dataset, document_data: dict,
account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None,
created_from: str = 'web'): created_from: str = 'web'):
document = DocumentService.get_document(dataset.id, document_data["original_document_id"]) document = DocumentService.get_document(dataset.id, document_data["original_document_id"])
...@@ -518,7 +520,9 @@ def update_document_with_dataset_id(dataset: Dataset, document_data: dict, ...@@ -518,7 +520,9 @@ def update_document_with_dataset_id(dataset: Dataset, document_data: dict,
file_name = '' file_name = ''
data_source_info = {} data_source_info = {}
if document_data["data_source"]["type"] == "upload_file": if document_data["data_source"]["type"] == "upload_file":
file_id = document_data["data_source"]["info"] upload_file_list = document_data["data_source"]["info"]
for upload_file in upload_file_list:
file_id = upload_file["upload_file_id"]
file = db.session.query(UploadFile).filter( file = db.session.query(UploadFile).filter(
UploadFile.tenant_id == dataset.tenant_id, UploadFile.tenant_id == dataset.tenant_id,
UploadFile.id == file_id UploadFile.id == file_id
...@@ -532,6 +536,25 @@ def update_document_with_dataset_id(dataset: Dataset, document_data: dict, ...@@ -532,6 +536,25 @@ def update_document_with_dataset_id(dataset: Dataset, document_data: dict,
data_source_info = { data_source_info = {
"upload_file_id": file_id, "upload_file_id": file_id,
} }
elif document_data["data_source"]["type"] == "notion_import":
notion_info_list = document_data["data_source"]['info']
for notion_info in notion_info_list:
workspace_id = notion_info['workspace_id']
data_source_binding = DataSourceBinding.query.filter(
db.and_(
DataSourceBinding.tenant_id == current_user.current_tenant_id,
DataSourceBinding.provider == 'notion',
DataSourceBinding.disabled == False,
DataSourceBinding.source_info['workspace_id'] == f'"{workspace_id}"'
)
).first()
if not data_source_binding:
raise ValueError('Data source binding not found.')
for page in notion_info['pages']:
data_source_info = {
"notion_workspace_id": workspace_id,
"notion_page_id": page['page_id'],
}
document.data_source_type = document_data["data_source"]["type"] document.data_source_type = document_data["data_source"]["type"]
document.data_source_info = json.dumps(data_source_info) document.data_source_info = json.dumps(data_source_info)
document.name = file_name document.name = file_name
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment