Commit d8d27a49 authored by jyong's avatar jyong

add notion parent id and icon support

parent 3b2b8199
......@@ -218,7 +218,7 @@ class DatasetIndexingEstimateApi(Resource):
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('info_list', type=list, required=True, nullable=True, location='json')
parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
args = parser.parse_args()
# validate args
......
......@@ -357,6 +357,9 @@ class DocumentIndexingStatusApi(DocumentResource):
'total_segments': fields.Integer,
}
document_status_fields_list = {
'data': fields.List(fields.Nested(document_status_fields))
}
@setup_required
@login_required
@account_initialization_required
......@@ -374,8 +377,10 @@ class DocumentIndexingStatusApi(DocumentResource):
document.completed_segments = completed_segments
document.total_segments = total_segments
documents_status.append(marshal(document, self.document_status_fields))
return documents_status
data = {
'data': documents_status
}
return data
class DocumentDetailApi(DocumentResource):
......
......@@ -231,29 +231,30 @@ class IndexingRunner:
raise ValueError('Data source binding not found.')
reader = NotionPageReader(integration_token=data_source_binding.access_token)
for page in notion_info['pages']:
page_ids = [page['page_id']]
documents = reader.load_data_as_documents(page_ids=page_ids)
if page['type'] == 'page':
page_ids = [page['page_id']]
documents = reader.load_data_as_documents(page_ids=page_ids)
processing_rule = DatasetProcessRule(
mode=tmp_processing_rule["mode"],
rules=json.dumps(tmp_processing_rule["rules"])
)
processing_rule = DatasetProcessRule(
mode=tmp_processing_rule["mode"],
rules=json.dumps(tmp_processing_rule["rules"])
)
# get node parser for splitting
node_parser = self._get_node_parser(processing_rule)
# get node parser for splitting
node_parser = self._get_node_parser(processing_rule)
# split to nodes
nodes = self._split_to_nodes(
text_docs=documents,
node_parser=node_parser,
processing_rule=processing_rule
)
total_segments += len(nodes)
for node in nodes:
if len(preview_texts) < 5:
preview_texts.append(node.get_text())
# split to nodes
nodes = self._split_to_nodes(
text_docs=documents,
node_parser=node_parser,
processing_rule=processing_rule
)
total_segments += len(nodes)
for node in nodes:
if len(preview_texts) < 5:
preview_texts.append(node.get_text())
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name, node.get_text())
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name, node.get_text())
return {
"total_segments": total_segments,
......
......@@ -109,6 +109,68 @@ class NotionOAuth(OAuthDataSource):
def get_authorized_pages(self, access_token: str):
pages = []
page_results = self.notion_page_search(access_token)
database_results = self.notion_database_search(access_token)
# get page detail
for page_result in page_results:
page_id = page_result['id']
if 'Name' in page_result['properties']:
if len(page_result['properties']['Name']['title']) > 0:
page_name = page_result['properties']['Name']['title'][0]['plain_text']
else:
page_name = 'Untitled'
elif 'title' in page_result['properties']:
if len(page_result['properties']['title']['title']) > 0:
page_name = page_result['properties']['title']['title'][0]['plain_text']
else:
page_name = 'Untitled'
else:
page_name = 'Untitled'
page_icon = page_result['icon']
if page_icon:
icon_type = page_icon['type']
icon = page_icon[icon_type]
else:
icon = None
parent = page_result['parent']
parent_type = parent['type']
if parent_type == 'workspace':
parent_id = 'root'
else:
parent_id = parent[parent_type]
page = {
'page_id': page_id,
'page_name': page_name,
'page_icon': icon,
'parent_id': parent_id,
'type': 'page'
}
pages.append(page)
# get database detail
for database_result in database_results:
page_id = database_result['id']
if len(database_result['title']) > 0:
page_name = database_result['title'][0]['plain_text']
else:
page_name = 'Untitled'
page_icon = database_result['icon']
parent = database_result['parent']
parent_type = parent['type']
if parent_type == 'workspace':
parent_id = 'root'
else:
parent_id = parent[parent_type]
page = {
'page_id': page_id,
'page_name': page_name,
'page_icon': page_icon,
'parent_id': parent_id,
'type': 'database'
}
pages.append(page)
return pages
def notion_page_search(self, access_token: str):
data = {
'filter': {
"value": "page",
......@@ -123,25 +185,21 @@ class NotionOAuth(OAuthDataSource):
response = requests.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
response_json = response.json()
results = response_json['results']
for result in results:
page_id = result['id']
if 'Name' in result['properties']:
if len(result['properties']['Name']['title']) > 0:
page_name = result['properties']['Name']['title'][0]['plain_text']
else:
page_name = 'Untitled'
elif 'title' in result['properties']:
if len(result['properties']['title']['title']) > 0:
page_name = result['properties']['title']['title'][0]['plain_text']
else:
page_name = 'Untitled'
else:
page_name = 'Untitled'
page_icon = result['icon']
page = {
'page_id': page_id,
'page_name': page_name,
'page_icon': page_icon
return results
def notion_database_search(self, access_token: str):
data = {
'filter': {
"value": "database",
"property": "object"
}
pages.append(page)
return pages
}
headers = {
'Content-Type': 'application/json',
'Authorization': f"Bearer {access_token}",
'Notion-Version': '2022-06-28',
}
response = requests.post(url=self._NOTION_PAGE_SEARCH, json=data, headers=headers)
response_json = response.json()
results = response_json['results']
return results
......@@ -652,9 +652,15 @@ class DocumentService:
if args['data_source']['type'] not in Document.DATA_SOURCES:
raise ValueError("Data source type is invalid")
if 'info_list' not in args['data_source'] or not args['data_source']['info_list']:
raise ValueError("Data source info is required")
if args['data_source']['type'] == 'upload_file':
if 'info' not in args['data_source'] or not args['data_source']['info']:
raise ValueError("Data source info is required")
if 'file_info_list' not in args['data_source']['info_list'] or not args['data_source']['info_list']['file_info_list']:
raise ValueError("File source info is required")
if args['data_source']['type'] == 'notion_import':
if 'notion_info_list' not in args['data_source']['info_list'] or not args['data_source']['info_list']['notion_info_list']:
raise ValueError("Notion source info is required")
@classmethod
def process_rule_args_validate(cls, args: dict):
......@@ -731,7 +737,7 @@ class DocumentService:
raise ValueError("Data source info is required")
if not isinstance(args['info_list'], dict):
raise ValueError("Notion info is invalid")
raise ValueError("Data info is invalid")
if 'process_rule' not in args or not args['process_rule']:
raise ValueError("Process rule is required")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment