Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
d8d27a49
Commit
d8d27a49
authored
Jun 08, 2023
by
jyong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add notion parent id and icon support
parent
3b2b8199
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
116 additions
and
46 deletions
+116
-46
datasets.py
api/controllers/console/datasets/datasets.py
+1
-1
datasets_document.py
api/controllers/console/datasets/datasets_document.py
+7
-2
indexing_runner.py
api/core/indexing_runner.py
+20
-19
oauth_data_source.py
api/libs/oauth_data_source.py
+79
-21
dataset_service.py
api/services/dataset_service.py
+9
-3
No files found.
api/controllers/console/datasets/datasets.py
View file @
d8d27a49
...
...
@@ -218,7 +218,7 @@ class DatasetIndexingEstimateApi(Resource):
@
account_initialization_required
def
post
(
self
):
parser
=
reqparse
.
RequestParser
()
parser
.
add_argument
(
'info_list'
,
type
=
lis
t
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'info_list'
,
type
=
dic
t
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'process_rule'
,
type
=
dict
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
args
=
parser
.
parse_args
()
# validate args
...
...
api/controllers/console/datasets/datasets_document.py
View file @
d8d27a49
...
...
@@ -357,6 +357,9 @@ class DocumentIndexingStatusApi(DocumentResource):
'total_segments'
:
fields
.
Integer
,
}
document_status_fields_list
=
{
'data'
:
fields
.
List
(
fields
.
Nested
(
document_status_fields
))
}
@
setup_required
@
login_required
@
account_initialization_required
...
...
@@ -374,8 +377,10 @@ class DocumentIndexingStatusApi(DocumentResource):
document
.
completed_segments
=
completed_segments
document
.
total_segments
=
total_segments
documents_status
.
append
(
marshal
(
document
,
self
.
document_status_fields
))
return
documents_status
data
=
{
'data'
:
documents_status
}
return
data
class
DocumentDetailApi
(
DocumentResource
):
...
...
api/core/indexing_runner.py
View file @
d8d27a49
...
...
@@ -231,29 +231,30 @@ class IndexingRunner:
raise
ValueError
(
'Data source binding not found.'
)
reader
=
NotionPageReader
(
integration_token
=
data_source_binding
.
access_token
)
for
page
in
notion_info
[
'pages'
]:
page_ids
=
[
page
[
'page_id'
]]
documents
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
if
page
[
'type'
]
==
'page'
:
page_ids
=
[
page
[
'page_id'
]]
documents
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
processing_rule
=
DatasetProcessRule
(
mode
=
tmp_processing_rule
[
"mode"
],
rules
=
json
.
dumps
(
tmp_processing_rule
[
"rules"
])
)
processing_rule
=
DatasetProcessRule
(
mode
=
tmp_processing_rule
[
"mode"
],
rules
=
json
.
dumps
(
tmp_processing_rule
[
"rules"
])
)
# get node parser for splitting
node_parser
=
self
.
_get_node_parser
(
processing_rule
)
# get node parser for splitting
node_parser
=
self
.
_get_node_parser
(
processing_rule
)
# split to nodes
nodes
=
self
.
_split_to_nodes
(
text_docs
=
documents
,
node_parser
=
node_parser
,
processing_rule
=
processing_rule
)
total_segments
+=
len
(
nodes
)
for
node
in
nodes
:
if
len
(
preview_texts
)
<
5
:
preview_texts
.
append
(
node
.
get_text
())
# split to nodes
nodes
=
self
.
_split_to_nodes
(
text_docs
=
documents
,
node_parser
=
node_parser
,
processing_rule
=
processing_rule
)
total_segments
+=
len
(
nodes
)
for
node
in
nodes
:
if
len
(
preview_texts
)
<
5
:
preview_texts
.
append
(
node
.
get_text
())
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
node
.
get_text
())
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
node
.
get_text
())
return
{
"total_segments"
:
total_segments
,
...
...
api/libs/oauth_data_source.py
View file @
d8d27a49
...
...
@@ -109,6 +109,68 @@ class NotionOAuth(OAuthDataSource):
def
get_authorized_pages
(
self
,
access_token
:
str
):
pages
=
[]
page_results
=
self
.
notion_page_search
(
access_token
)
database_results
=
self
.
notion_database_search
(
access_token
)
# get page detail
for
page_result
in
page_results
:
page_id
=
page_result
[
'id'
]
if
'Name'
in
page_result
[
'properties'
]:
if
len
(
page_result
[
'properties'
][
'Name'
][
'title'
])
>
0
:
page_name
=
page_result
[
'properties'
][
'Name'
][
'title'
][
0
][
'plain_text'
]
else
:
page_name
=
'Untitled'
elif
'title'
in
page_result
[
'properties'
]:
if
len
(
page_result
[
'properties'
][
'title'
][
'title'
])
>
0
:
page_name
=
page_result
[
'properties'
][
'title'
][
'title'
][
0
][
'plain_text'
]
else
:
page_name
=
'Untitled'
else
:
page_name
=
'Untitled'
page_icon
=
page_result
[
'icon'
]
if
page_icon
:
icon_type
=
page_icon
[
'type'
]
icon
=
page_icon
[
icon_type
]
else
:
icon
=
None
parent
=
page_result
[
'parent'
]
parent_type
=
parent
[
'type'
]
if
parent_type
==
'workspace'
:
parent_id
=
'root'
else
:
parent_id
=
parent
[
parent_type
]
page
=
{
'page_id'
:
page_id
,
'page_name'
:
page_name
,
'page_icon'
:
icon
,
'parent_id'
:
parent_id
,
'type'
:
'page'
}
pages
.
append
(
page
)
# get database detail
for
database_result
in
database_results
:
page_id
=
database_result
[
'id'
]
if
len
(
database_result
[
'title'
])
>
0
:
page_name
=
database_result
[
'title'
][
0
][
'plain_text'
]
else
:
page_name
=
'Untitled'
page_icon
=
database_result
[
'icon'
]
parent
=
database_result
[
'parent'
]
parent_type
=
parent
[
'type'
]
if
parent_type
==
'workspace'
:
parent_id
=
'root'
else
:
parent_id
=
parent
[
parent_type
]
page
=
{
'page_id'
:
page_id
,
'page_name'
:
page_name
,
'page_icon'
:
page_icon
,
'parent_id'
:
parent_id
,
'type'
:
'database'
}
pages
.
append
(
page
)
return
pages
def
notion_page_search
(
self
,
access_token
:
str
):
data
=
{
'filter'
:
{
"value"
:
"page"
,
...
...
@@ -123,25 +185,21 @@ class NotionOAuth(OAuthDataSource):
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response_json
=
response
.
json
()
results
=
response_json
[
'results'
]
for
result
in
results
:
page_id
=
result
[
'id'
]
if
'Name'
in
result
[
'properties'
]:
if
len
(
result
[
'properties'
][
'Name'
][
'title'
])
>
0
:
page_name
=
result
[
'properties'
][
'Name'
][
'title'
][
0
][
'plain_text'
]
else
:
page_name
=
'Untitled'
elif
'title'
in
result
[
'properties'
]:
if
len
(
result
[
'properties'
][
'title'
][
'title'
])
>
0
:
page_name
=
result
[
'properties'
][
'title'
][
'title'
][
0
][
'plain_text'
]
else
:
page_name
=
'Untitled'
else
:
page_name
=
'Untitled'
page_icon
=
result
[
'icon'
]
page
=
{
'page_id'
:
page_id
,
'page_name'
:
page_name
,
'page_icon'
:
page_icon
return
results
def
notion_database_search
(
self
,
access_token
:
str
):
data
=
{
'filter'
:
{
"value"
:
"database"
,
"property"
:
"object"
}
pages
.
append
(
page
)
return
pages
}
headers
=
{
'Content-Type'
:
'application/json'
,
'Authorization'
:
f
"Bearer {access_token}"
,
'Notion-Version'
:
'2022-06-28'
,
}
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response_json
=
response
.
json
()
results
=
response_json
[
'results'
]
return
results
api/services/dataset_service.py
View file @
d8d27a49
...
...
@@ -652,9 +652,15 @@ class DocumentService:
if
args
[
'data_source'
][
'type'
]
not
in
Document
.
DATA_SOURCES
:
raise
ValueError
(
"Data source type is invalid"
)
if
'info_list'
not
in
args
[
'data_source'
]
or
not
args
[
'data_source'
][
'info_list'
]:
raise
ValueError
(
"Data source info is required"
)
if
args
[
'data_source'
][
'type'
]
==
'upload_file'
:
if
'info'
not
in
args
[
'data_source'
]
or
not
args
[
'data_source'
][
'info'
]:
raise
ValueError
(
"Data source info is required"
)
if
'file_info_list'
not
in
args
[
'data_source'
][
'info_list'
]
or
not
args
[
'data_source'
][
'info_list'
][
'file_info_list'
]:
raise
ValueError
(
"File source info is required"
)
if
args
[
'data_source'
][
'type'
]
==
'notion_import'
:
if
'notion_info_list'
not
in
args
[
'data_source'
][
'info_list'
]
or
not
args
[
'data_source'
][
'info_list'
][
'notion_info_list'
]:
raise
ValueError
(
"Notion source info is required"
)
@
classmethod
def
process_rule_args_validate
(
cls
,
args
:
dict
):
...
...
@@ -731,7 +737,7 @@ class DocumentService:
raise
ValueError
(
"Data source info is required"
)
if
not
isinstance
(
args
[
'info_list'
],
dict
):
raise
ValueError
(
"
Notion
info is invalid"
)
raise
ValueError
(
"
Data
info is invalid"
)
if
'process_rule'
not
in
args
or
not
args
[
'process_rule'
]:
raise
ValueError
(
"Process rule is required"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment