Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
d6c08ca6
Commit
d6c08ca6
authored
Jun 19, 2023
by
StyleZhang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merge main
parent
586c90c9
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
0 additions
and
133 deletions
+0
-133
data_source_oauth.py
api/controllers/console/auth/data_source_oauth.py
+0
-6
data_source.py
api/controllers/console/datasets/data_source.py
+0
-12
file.py
api/controllers/console/datasets/file.py
+0
-4
notion.py
api/core/data_source/notion.py
+0
-32
indexing_runner.py
api/core/indexing_runner.py
+0
-43
oauth_data_source.py
api/libs/oauth_data_source.py
+0
-20
dataset_service.py
api/services/dataset_service.py
+0
-16
No files found.
api/controllers/console/auth/data_source_oauth.py
View file @
d6c08ca6
...
@@ -39,11 +39,6 @@ class OAuthDataSource(Resource):
...
@@ -39,11 +39,6 @@ class OAuthDataSource(Resource):
print
(
vars
(
oauth_provider
))
print
(
vars
(
oauth_provider
))
if
not
oauth_provider
:
if
not
oauth_provider
:
return
{
'error'
:
'Invalid provider'
},
400
return
{
'error'
:
'Invalid provider'
},
400
<<<<<<<
HEAD
auth_url
=
oauth_provider
.
get_authorization_url
()
return
redirect
(
auth_url
)
=======
if
current_app
.
config
.
get
(
'NOTION_INTEGRATION_TYPE'
)
==
'internal'
:
if
current_app
.
config
.
get
(
'NOTION_INTEGRATION_TYPE'
)
==
'internal'
:
internal_secret
=
current_app
.
config
.
get
(
'NOTION_INTERNAL_SECRET'
)
internal_secret
=
current_app
.
config
.
get
(
'NOTION_INTERNAL_SECRET'
)
oauth_provider
.
save_internal_access_token
(
internal_secret
)
oauth_provider
.
save_internal_access_token
(
internal_secret
)
...
@@ -53,7 +48,6 @@ class OAuthDataSource(Resource):
...
@@ -53,7 +48,6 @@ class OAuthDataSource(Resource):
return
redirect
(
auth_url
)
return
redirect
(
auth_url
)
>>>>>>>
main
class
OAuthDataSourceCallback
(
Resource
):
class
OAuthDataSourceCallback
(
Resource
):
...
...
api/controllers/console/datasets/data_source.py
View file @
d6c08ca6
...
@@ -219,11 +219,7 @@ class DataSourceNotionApi(Resource):
...
@@ -219,11 +219,7 @@ class DataSourceNotionApi(Resource):
@
setup_required
@
setup_required
@
login_required
@
login_required
@
account_initialization_required
@
account_initialization_required
<<<<<<<
HEAD
def
get
(
self
,
workspace_id
,
page_id
):
=======
def
get
(
self
,
workspace_id
,
page_id
,
page_type
):
def
get
(
self
,
workspace_id
,
page_id
,
page_type
):
>>>>>>>
main
workspace_id
=
str
(
workspace_id
)
workspace_id
=
str
(
workspace_id
)
page_id
=
str
(
page_id
)
page_id
=
str
(
page_id
)
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
...
@@ -237,16 +233,12 @@ class DataSourceNotionApi(Resource):
...
@@ -237,16 +233,12 @@ class DataSourceNotionApi(Resource):
if
not
data_source_binding
:
if
not
data_source_binding
:
raise
NotFound
(
'Data source binding not found.'
)
raise
NotFound
(
'Data source binding not found.'
)
reader
=
NotionPageReader
(
integration_token
=
data_source_binding
.
access_token
)
reader
=
NotionPageReader
(
integration_token
=
data_source_binding
.
access_token
)
<<<<<<<
HEAD
page_content
=
reader
.
read_page
(
page_id
)
=======
if
page_type
==
'page'
:
if
page_type
==
'page'
:
page_content
=
reader
.
read_page
(
page_id
)
page_content
=
reader
.
read_page
(
page_id
)
elif
page_type
==
'database'
:
elif
page_type
==
'database'
:
page_content
=
reader
.
query_database_data
(
page_id
)
page_content
=
reader
.
query_database_data
(
page_id
)
else
:
else
:
page_content
=
""
page_content
=
""
>>>>>>>
main
return
{
return
{
'content'
:
page_content
'content'
:
page_content
},
200
},
200
...
@@ -304,12 +296,8 @@ class DataSourceNotionDocumentSyncApi(Resource):
...
@@ -304,12 +296,8 @@ class DataSourceNotionDocumentSyncApi(Resource):
api
.
add_resource
(
DataSourceApi
,
'/data-source/integrates'
,
'/data-source/integrates/<uuid:binding_id>/<string:action>'
)
api
.
add_resource
(
DataSourceApi
,
'/data-source/integrates'
,
'/data-source/integrates/<uuid:binding_id>/<string:action>'
)
api
.
add_resource
(
DataSourceNotionListApi
,
'/notion/pre-import/pages'
)
api
.
add_resource
(
DataSourceNotionListApi
,
'/notion/pre-import/pages'
)
<<<<<<<
HEAD
api
.
add_resource
(
DataSourceNotionApi
,
'/notion/workspaces/<uuid:workspace_id>/pages/<uuid:page_id>/preview'
,
=======
api
.
add_resource
(
DataSourceNotionApi
,
api
.
add_resource
(
DataSourceNotionApi
,
'/notion/workspaces/<uuid:workspace_id>/pages/<uuid:page_id>/<string:page_type>/preview'
,
'/notion/workspaces/<uuid:workspace_id>/pages/<uuid:page_id>/<string:page_type>/preview'
,
>>>>>>>
main
'/datasets/notion-indexing-estimate'
)
'/datasets/notion-indexing-estimate'
)
api
.
add_resource
(
DataSourceNotionDatasetSyncApi
,
'/datasets/<uuid:dataset_id>/notion/sync'
)
api
.
add_resource
(
DataSourceNotionDatasetSyncApi
,
'/datasets/<uuid:dataset_id>/notion/sync'
)
api
.
add_resource
(
DataSourceNotionDocumentSyncApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/notion/sync'
)
api
.
add_resource
(
DataSourceNotionDocumentSyncApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/notion/sync'
)
api/controllers/console/datasets/file.py
View file @
d6c08ca6
...
@@ -143,14 +143,10 @@ class FilePreviewApi(Resource):
...
@@ -143,14 +143,10 @@ class FilePreviewApi(Resource):
with
open
(
filepath
,
"rb"
)
as
fp
:
with
open
(
filepath
,
"rb"
)
as
fp
:
data
=
fp
.
read
()
data
=
fp
.
read
()
encoding
=
chardet
.
detect
(
data
)[
'encoding'
]
encoding
=
chardet
.
detect
(
data
)[
'encoding'
]
<<<<<<<
HEAD
text
=
data
.
decode
(
encoding
=
encoding
)
.
strip
()
if
data
else
''
=======
if
encoding
:
if
encoding
:
text
=
data
.
decode
(
encoding
=
encoding
)
.
strip
()
if
data
else
''
text
=
data
.
decode
(
encoding
=
encoding
)
.
strip
()
if
data
else
''
else
:
else
:
text
=
data
.
decode
(
encoding
=
'utf-8'
)
.
strip
()
if
data
else
''
text
=
data
.
decode
(
encoding
=
'utf-8'
)
.
strip
()
if
data
else
''
>>>>>>>
main
text
=
text
[
0
:
PREVIEW_WORDS_LIMIT
]
if
text
else
''
text
=
text
[
0
:
PREVIEW_WORDS_LIMIT
]
if
text
else
''
return
{
'content'
:
text
}
return
{
'content'
:
text
}
...
...
api/core/data_source/notion.py
View file @
d6c08ca6
"""Notion reader."""
"""Notion reader."""
<<<<<<<
HEAD
=======
import
json
import
json
>>>>>>>
main
import
logging
import
logging
import
os
import
os
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -18,10 +15,7 @@ BLOCK_CHILD_URL_TMPL = "https://api.notion.com/v1/blocks/{block_id}/children"
...
@@ -18,10 +15,7 @@ BLOCK_CHILD_URL_TMPL = "https://api.notion.com/v1/blocks/{block_id}/children"
DATABASE_URL_TMPL
=
"https://api.notion.com/v1/databases/{database_id}/query"
DATABASE_URL_TMPL
=
"https://api.notion.com/v1/databases/{database_id}/query"
SEARCH_URL
=
"https://api.notion.com/v1/search"
SEARCH_URL
=
"https://api.notion.com/v1/search"
RETRIEVE_PAGE_URL_TMPL
=
"https://api.notion.com/v1/pages/{page_id}"
RETRIEVE_PAGE_URL_TMPL
=
"https://api.notion.com/v1/pages/{page_id}"
<<<<<<<
HEAD
=======
RETRIEVE_DATABASE_URL_TMPL
=
"https://api.notion.com/v1/databases/{database_id}"
RETRIEVE_DATABASE_URL_TMPL
=
"https://api.notion.com/v1/databases/{database_id}"
>>>>>>>
main
HEADING_TYPE
=
[
'heading_1'
,
'heading_2'
,
'heading_3'
]
HEADING_TYPE
=
[
'heading_1'
,
'heading_2'
,
'heading_3'
]
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -66,11 +60,7 @@ class NotionPageReader(BaseReader):
...
@@ -66,11 +60,7 @@ class NotionPageReader(BaseReader):
"GET"
,
block_url
,
headers
=
self
.
headers
,
json
=
query_dict
"GET"
,
block_url
,
headers
=
self
.
headers
,
json
=
query_dict
)
)
data
=
res
.
json
()
data
=
res
.
json
()
<<<<<<<
HEAD
if
data
[
"results"
]
is
None
:
=======
if
'results'
not
in
data
or
data
[
"results"
]
is
None
:
if
'results'
not
in
data
or
data
[
"results"
]
is
None
:
>>>>>>>
main
done
=
True
done
=
True
break
break
heading
=
''
heading
=
''
...
@@ -94,12 +84,8 @@ class NotionPageReader(BaseReader):
...
@@ -94,12 +84,8 @@ class NotionPageReader(BaseReader):
heading
=
text
heading
=
text
result_block_id
=
result
[
"id"
]
result_block_id
=
result
[
"id"
]
has_children
=
result
[
"has_children"
]
has_children
=
result
[
"has_children"
]
<<<<<<<
HEAD
if
has_children
:
=======
block_type
=
result
[
"type"
]
block_type
=
result
[
"type"
]
if
has_children
and
block_type
!=
'child_page'
:
if
has_children
and
block_type
!=
'child_page'
:
>>>>>>>
main
children_text
=
self
.
_read_block
(
children_text
=
self
.
_read_block
(
result_block_id
,
num_tabs
=
num_tabs
+
1
result_block_id
,
num_tabs
=
num_tabs
+
1
)
)
...
@@ -199,12 +185,8 @@ class NotionPageReader(BaseReader):
...
@@ -199,12 +185,8 @@ class NotionPageReader(BaseReader):
result_block_id
=
result
[
"id"
]
result_block_id
=
result
[
"id"
]
has_children
=
result
[
"has_children"
]
has_children
=
result
[
"has_children"
]
<<<<<<<
HEAD
if
has_children
:
=======
block_type
=
result
[
"type"
]
block_type
=
result
[
"type"
]
if
has_children
and
block_type
!=
'child_page'
:
if
has_children
and
block_type
!=
'child_page'
:
>>>>>>>
main
children_text
=
self
.
_read_block
(
children_text
=
self
.
_read_block
(
result_block_id
,
num_tabs
=
num_tabs
+
1
result_block_id
,
num_tabs
=
num_tabs
+
1
)
)
...
@@ -232,8 +214,6 @@ class NotionPageReader(BaseReader):
...
@@ -232,8 +214,6 @@ class NotionPageReader(BaseReader):
"""Read a page as documents."""
"""Read a page as documents."""
return
self
.
_read_parent_blocks
(
page_id
)
return
self
.
_read_parent_blocks
(
page_id
)
<<<<<<<
HEAD
=======
def
query_database_data
(
def
query_database_data
(
self
,
database_id
:
str
,
query_dict
:
Dict
[
str
,
Any
]
=
{}
self
,
database_id
:
str
,
query_dict
:
Dict
[
str
,
Any
]
=
{}
)
->
str
:
)
->
str
:
...
@@ -275,7 +255,6 @@ class NotionPageReader(BaseReader):
...
@@ -275,7 +255,6 @@ class NotionPageReader(BaseReader):
return
"
\n\n
"
.
join
(
database_content_list
)
return
"
\n\n
"
.
join
(
database_content_list
)
>>>>>>>
main
def
query_database
(
def
query_database
(
self
,
database_id
:
str
,
query_dict
:
Dict
[
str
,
Any
]
=
{}
self
,
database_id
:
str
,
query_dict
:
Dict
[
str
,
Any
]
=
{}
)
->
List
[
str
]:
)
->
List
[
str
]:
...
@@ -354,15 +333,8 @@ class NotionPageReader(BaseReader):
...
@@ -354,15 +333,8 @@ class NotionPageReader(BaseReader):
docs
=
[]
docs
=
[]
if
database_id
is
not
None
:
if
database_id
is
not
None
:
# get all the pages in the database
# get all the pages in the database
<<<<<<<
HEAD
page_ids
=
self
.
query_database
(
database_id
)
for
page_id
in
page_ids
:
page_text
=
self
.
read_page
(
page_id
)
docs
.
append
(
Document
(
page_text
))
=======
page_text
=
self
.
query_database_data
(
database_id
)
page_text
=
self
.
query_database_data
(
database_id
)
docs
.
append
(
Document
(
page_text
))
docs
.
append
(
Document
(
page_text
))
>>>>>>>
main
else
:
else
:
for
page_id
in
page_ids
:
for
page_id
in
page_ids
:
page_text_list
=
self
.
read_page_as_documents
(
page_id
)
page_text_list
=
self
.
read_page_as_documents
(
page_id
)
...
@@ -379,9 +351,6 @@ class NotionPageReader(BaseReader):
...
@@ -379,9 +351,6 @@ class NotionPageReader(BaseReader):
"GET"
,
retrieve_page_url
,
headers
=
self
.
headers
,
json
=
query_dict
"GET"
,
retrieve_page_url
,
headers
=
self
.
headers
,
json
=
query_dict
)
)
data
=
res
.
json
()
data
=
res
.
json
()
<<<<<<<
HEAD
# last_edited_time = datetime.fromisoformat(data["last_edited_time"])
=======
return
data
[
"last_edited_time"
]
return
data
[
"last_edited_time"
]
def
get_database_last_edited_time
(
self
,
database_id
:
str
)
->
str
:
def
get_database_last_edited_time
(
self
,
database_id
:
str
)
->
str
:
...
@@ -392,7 +361,6 @@ class NotionPageReader(BaseReader):
...
@@ -392,7 +361,6 @@ class NotionPageReader(BaseReader):
"GET"
,
retrieve_page_url
,
headers
=
self
.
headers
,
json
=
query_dict
"GET"
,
retrieve_page_url
,
headers
=
self
.
headers
,
json
=
query_dict
)
)
data
=
res
.
json
()
data
=
res
.
json
()
>>>>>>>
main
return
data
[
"last_edited_time"
]
return
data
[
"last_edited_time"
]
...
...
api/core/indexing_runner.py
View file @
d6c08ca6
...
@@ -235,29 +235,6 @@ class IndexingRunner:
...
@@ -235,29 +235,6 @@ class IndexingRunner:
if
page
[
'type'
]
==
'page'
:
if
page
[
'type'
]
==
'page'
:
page_ids
=
[
page
[
'page_id'
]]
page_ids
=
[
page
[
'page_id'
]]
documents
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
documents
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
<<<<<<<
HEAD
processing_rule
=
DatasetProcessRule
(
mode
=
tmp_processing_rule
[
"mode"
],
rules
=
json
.
dumps
(
tmp_processing_rule
[
"rules"
])
)
# get node parser for splitting
node_parser
=
self
.
_get_node_parser
(
processing_rule
)
# split to nodes
nodes
=
self
.
_split_to_nodes
(
text_docs
=
documents
,
node_parser
=
node_parser
,
processing_rule
=
processing_rule
)
total_segments
+=
len
(
nodes
)
for
node
in
nodes
:
if
len
(
preview_texts
)
<
5
:
preview_texts
.
append
(
node
.
get_text
())
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
node
.
get_text
())
=======
elif
page
[
'type'
]
==
'database'
:
elif
page
[
'type'
]
==
'database'
:
documents
=
reader
.
load_data_as_documents
(
database_id
=
page
[
'page_id'
])
documents
=
reader
.
load_data_as_documents
(
database_id
=
page
[
'page_id'
])
else
:
else
:
...
@@ -282,7 +259,6 @@ class IndexingRunner:
...
@@ -282,7 +259,6 @@ class IndexingRunner:
preview_texts
.
append
(
node
.
get_text
())
preview_texts
.
append
(
node
.
get_text
())
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
node
.
get_text
())
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
node
.
get_text
())
>>>>>>>
main
return
{
return
{
"total_segments"
:
total_segments
,
"total_segments"
:
total_segments
,
...
@@ -314,10 +290,7 @@ class IndexingRunner:
...
@@ -314,10 +290,7 @@ class IndexingRunner:
raise
ValueError
(
"no notion page found"
)
raise
ValueError
(
"no notion page found"
)
workspace_id
=
data_source_info
[
'notion_workspace_id'
]
workspace_id
=
data_source_info
[
'notion_workspace_id'
]
page_id
=
data_source_info
[
'notion_page_id'
]
page_id
=
data_source_info
[
'notion_page_id'
]
<<<<<<<
HEAD
=======
page_type
=
data_source_info
[
'type'
]
page_type
=
data_source_info
[
'type'
]
>>>>>>>
main
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
db
.
and_
(
db
.
and_
(
DataSourceBinding
.
tenant_id
==
document
.
tenant_id
,
DataSourceBinding
.
tenant_id
==
document
.
tenant_id
,
...
@@ -328,11 +301,6 @@ class IndexingRunner:
...
@@ -328,11 +301,6 @@ class IndexingRunner:
)
.
first
()
)
.
first
()
if
not
data_source_binding
:
if
not
data_source_binding
:
raise
ValueError
(
'Data source binding not found.'
)
raise
ValueError
(
'Data source binding not found.'
)
<<<<<<<
HEAD
# add page last_edited_time to data_source_info
self
.
_get_notion_page_last_edited_time
(
page_id
,
data_source_binding
.
access_token
,
document
)
text_docs
=
self
.
_load_data_from_notion
(
page_id
,
data_source_binding
.
access_token
)
=======
if
page_type
==
'page'
:
if
page_type
==
'page'
:
# add page last_edited_time to data_source_info
# add page last_edited_time to data_source_info
self
.
_get_notion_page_last_edited_time
(
page_id
,
data_source_binding
.
access_token
,
document
)
self
.
_get_notion_page_last_edited_time
(
page_id
,
data_source_binding
.
access_token
,
document
)
...
@@ -341,7 +309,6 @@ class IndexingRunner:
...
@@ -341,7 +309,6 @@ class IndexingRunner:
# add page last_edited_time to data_source_info
# add page last_edited_time to data_source_info
self
.
_get_notion_database_last_edited_time
(
page_id
,
data_source_binding
.
access_token
,
document
)
self
.
_get_notion_database_last_edited_time
(
page_id
,
data_source_binding
.
access_token
,
document
)
text_docs
=
self
.
_load_database_data_from_notion
(
page_id
,
data_source_binding
.
access_token
)
text_docs
=
self
.
_load_database_data_from_notion
(
page_id
,
data_source_binding
.
access_token
)
>>>>>>>
main
# update document status to splitting
# update document status to splitting
self
.
_update_document_index_status
(
self
.
_update_document_index_status
(
document_id
=
document
.
id
,
document_id
=
document
.
id
,
...
@@ -383,24 +350,17 @@ class IndexingRunner:
...
@@ -383,24 +350,17 @@ class IndexingRunner:
return
text_docs
return
text_docs
<<<<<<<
HEAD
def
_load_data_from_notion
(
self
,
page_id
:
str
,
access_token
:
str
)
->
List
[
Document
]:
=======
def
_load_page_data_from_notion
(
self
,
page_id
:
str
,
access_token
:
str
)
->
List
[
Document
]:
def
_load_page_data_from_notion
(
self
,
page_id
:
str
,
access_token
:
str
)
->
List
[
Document
]:
>>>>>>>
main
page_ids
=
[
page_id
]
page_ids
=
[
page_id
]
reader
=
NotionPageReader
(
integration_token
=
access_token
)
reader
=
NotionPageReader
(
integration_token
=
access_token
)
text_docs
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
text_docs
=
reader
.
load_data_as_documents
(
page_ids
=
page_ids
)
return
text_docs
return
text_docs
<<<<<<<
HEAD
=======
def
_load_database_data_from_notion
(
self
,
database_id
:
str
,
access_token
:
str
)
->
List
[
Document
]:
def
_load_database_data_from_notion
(
self
,
database_id
:
str
,
access_token
:
str
)
->
List
[
Document
]:
reader
=
NotionPageReader
(
integration_token
=
access_token
)
reader
=
NotionPageReader
(
integration_token
=
access_token
)
text_docs
=
reader
.
load_data_as_documents
(
database_id
=
database_id
)
text_docs
=
reader
.
load_data_as_documents
(
database_id
=
database_id
)
return
text_docs
return
text_docs
>>>>>>>
main
def
_get_notion_page_last_edited_time
(
self
,
page_id
:
str
,
access_token
:
str
,
document
:
Document
):
def
_get_notion_page_last_edited_time
(
self
,
page_id
:
str
,
access_token
:
str
,
document
:
Document
):
reader
=
NotionPageReader
(
integration_token
=
access_token
)
reader
=
NotionPageReader
(
integration_token
=
access_token
)
last_edited_time
=
reader
.
get_page_last_edited_time
(
page_id
)
last_edited_time
=
reader
.
get_page_last_edited_time
(
page_id
)
...
@@ -413,8 +373,6 @@ class IndexingRunner:
...
@@ -413,8 +373,6 @@ class IndexingRunner:
Document
.
query
.
filter_by
(
id
=
document
.
id
)
.
update
(
update_params
)
Document
.
query
.
filter_by
(
id
=
document
.
id
)
.
update
(
update_params
)
db
.
session
.
commit
()
db
.
session
.
commit
()
<<<<<<<
HEAD
=======
def
_get_notion_database_last_edited_time
(
self
,
page_id
:
str
,
access_token
:
str
,
document
:
Document
):
def
_get_notion_database_last_edited_time
(
self
,
page_id
:
str
,
access_token
:
str
,
document
:
Document
):
reader
=
NotionPageReader
(
integration_token
=
access_token
)
reader
=
NotionPageReader
(
integration_token
=
access_token
)
last_edited_time
=
reader
.
get_database_last_edited_time
(
page_id
)
last_edited_time
=
reader
.
get_database_last_edited_time
(
page_id
)
...
@@ -427,7 +385,6 @@ class IndexingRunner:
...
@@ -427,7 +385,6 @@ class IndexingRunner:
Document
.
query
.
filter_by
(
id
=
document
.
id
)
.
update
(
update_params
)
Document
.
query
.
filter_by
(
id
=
document
.
id
)
.
update
(
update_params
)
db
.
session
.
commit
()
db
.
session
.
commit
()
>>>>>>>
main
def
_get_node_parser
(
self
,
processing_rule
:
DatasetProcessRule
)
->
NodeParser
:
def
_get_node_parser
(
self
,
processing_rule
:
DatasetProcessRule
)
->
NodeParser
:
"""
"""
Get the NodeParser object according to the processing rule.
Get the NodeParser object according to the processing rule.
...
...
api/libs/oauth_data_source.py
View file @
d6c08ca6
...
@@ -26,10 +26,7 @@ class NotionOAuth(OAuthDataSource):
...
@@ -26,10 +26,7 @@ class NotionOAuth(OAuthDataSource):
_TOKEN_URL
=
'https://api.notion.com/v1/oauth/token'
_TOKEN_URL
=
'https://api.notion.com/v1/oauth/token'
_NOTION_PAGE_SEARCH
=
"https://api.notion.com/v1/search"
_NOTION_PAGE_SEARCH
=
"https://api.notion.com/v1/search"
_NOTION_BLOCK_SEARCH
=
"https://api.notion.com/v1/blocks"
_NOTION_BLOCK_SEARCH
=
"https://api.notion.com/v1/blocks"
<<<<<<<
HEAD
=======
_NOTION_BOT_USER
=
"https://api.notion.com/v1/users/me"
_NOTION_BOT_USER
=
"https://api.notion.com/v1/users/me"
>>>>>>>
main
def
get_authorization_url
(
self
):
def
get_authorization_url
(
self
):
params
=
{
params
=
{
...
@@ -88,8 +85,6 @@ class NotionOAuth(OAuthDataSource):
...
@@ -88,8 +85,6 @@ class NotionOAuth(OAuthDataSource):
db
.
session
.
add
(
new_data_source_binding
)
db
.
session
.
add
(
new_data_source_binding
)
db
.
session
.
commit
()
db
.
session
.
commit
()
<<<<<<<
HEAD
=======
def
save_internal_access_token
(
self
,
access_token
:
str
):
def
save_internal_access_token
(
self
,
access_token
:
str
):
workspace_name
=
self
.
notion_workspace_name
(
access_token
)
workspace_name
=
self
.
notion_workspace_name
(
access_token
)
workspace_icon
=
None
workspace_icon
=
None
...
@@ -125,7 +120,6 @@ class NotionOAuth(OAuthDataSource):
...
@@ -125,7 +120,6 @@ class NotionOAuth(OAuthDataSource):
db
.
session
.
add
(
new_data_source_binding
)
db
.
session
.
add
(
new_data_source_binding
)
db
.
session
.
commit
()
db
.
session
.
commit
()
>>>>>>>
main
def
sync_data_source
(
self
,
binding_id
:
str
):
def
sync_data_source
(
self
,
binding_id
:
str
):
# save data source binding
# save data source binding
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
data_source_binding
=
DataSourceBinding
.
query
.
filter
(
...
@@ -170,14 +164,11 @@ class NotionOAuth(OAuthDataSource):
...
@@ -170,14 +164,11 @@ class NotionOAuth(OAuthDataSource):
page_name
=
page_result
[
'properties'
][
'title'
][
'title'
][
0
][
'plain_text'
]
page_name
=
page_result
[
'properties'
][
'title'
][
'title'
][
0
][
'plain_text'
]
else
:
else
:
page_name
=
'Untitled'
page_name
=
'Untitled'
<<<<<<<
HEAD
=======
elif
'Title'
in
page_result
[
'properties'
]:
elif
'Title'
in
page_result
[
'properties'
]:
if
len
(
page_result
[
'properties'
][
'Title'
][
'title'
])
>
0
:
if
len
(
page_result
[
'properties'
][
'Title'
][
'title'
])
>
0
:
page_name
=
page_result
[
'properties'
][
'Title'
][
'title'
][
0
][
'plain_text'
]
page_name
=
page_result
[
'properties'
][
'Title'
][
'title'
][
0
][
'plain_text'
]
else
:
else
:
page_name
=
'Untitled'
page_name
=
'Untitled'
>>>>>>>
main
else
:
else
:
page_name
=
'Untitled'
page_name
=
'Untitled'
page_icon
=
page_result
[
'icon'
]
page_icon
=
page_result
[
'icon'
]
...
@@ -267,14 +258,10 @@ class NotionOAuth(OAuthDataSource):
...
@@ -267,14 +258,10 @@ class NotionOAuth(OAuthDataSource):
}
}
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response_json
=
response
.
json
()
response_json
=
response
.
json
()
<<<<<<<
HEAD
results
=
response_json
[
'results'
]
=======
if
'results'
in
response_json
:
if
'results'
in
response_json
:
results
=
response_json
[
'results'
]
results
=
response_json
[
'results'
]
else
:
else
:
results
=
[]
results
=
[]
>>>>>>>
main
return
results
return
results
def
notion_block_parent_page_id
(
self
,
access_token
:
str
,
block_id
:
str
):
def
notion_block_parent_page_id
(
self
,
access_token
:
str
,
block_id
:
str
):
...
@@ -290,8 +277,6 @@ class NotionOAuth(OAuthDataSource):
...
@@ -290,8 +277,6 @@ class NotionOAuth(OAuthDataSource):
return
self
.
notion_block_parent_page_id
(
access_token
,
parent
[
parent_type
])
return
self
.
notion_block_parent_page_id
(
access_token
,
parent
[
parent_type
])
return
parent
[
parent_type
]
return
parent
[
parent_type
]
<<<<<<<
HEAD
=======
def
notion_workspace_name
(
self
,
access_token
:
str
):
def
notion_workspace_name
(
self
,
access_token
:
str
):
headers
=
{
headers
=
{
'Authorization'
:
f
"Bearer {access_token}"
,
'Authorization'
:
f
"Bearer {access_token}"
,
...
@@ -306,7 +291,6 @@ class NotionOAuth(OAuthDataSource):
...
@@ -306,7 +291,6 @@ class NotionOAuth(OAuthDataSource):
return
user_info
[
'workspace_name'
]
return
user_info
[
'workspace_name'
]
return
'workspace'
return
'workspace'
>>>>>>>
main
def
notion_database_search
(
self
,
access_token
:
str
):
def
notion_database_search
(
self
,
access_token
:
str
):
data
=
{
data
=
{
'filter'
:
{
'filter'
:
{
...
@@ -321,12 +305,8 @@ class NotionOAuth(OAuthDataSource):
...
@@ -321,12 +305,8 @@ class NotionOAuth(OAuthDataSource):
}
}
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response
=
requests
.
post
(
url
=
self
.
_NOTION_PAGE_SEARCH
,
json
=
data
,
headers
=
headers
)
response_json
=
response
.
json
()
response_json
=
response
.
json
()
<<<<<<<
HEAD
results
=
response_json
[
'results'
]
=======
if
'results'
in
response_json
:
if
'results'
in
response_json
:
results
=
response_json
[
'results'
]
results
=
response_json
[
'results'
]
else
:
else
:
results
=
[]
results
=
[]
>>>>>>>
main
return
results
return
results
api/services/dataset_service.py
View file @
d6c08ca6
...
@@ -479,21 +479,6 @@ class DocumentService:
...
@@ -479,21 +479,6 @@ class DocumentService:
document_data
[
"data_source"
][
"type"
],
document_data
[
"data_source"
][
"type"
],
data_source_info
,
created_from
,
position
,
data_source_info
,
created_from
,
position
,
account
,
page
[
'page_name'
],
batch
)
account
,
page
[
'page_name'
],
batch
)
<<<<<<<
HEAD
if
page
[
'type'
]
==
'database'
:
document
.
splitting_completed_at
=
datetime
.
datetime
.
utcnow
()
document
.
cleaning_completed_at
=
datetime
.
datetime
.
utcnow
()
document
.
parsing_completed_at
=
datetime
.
datetime
.
utcnow
()
document
.
completed_at
=
datetime
.
datetime
.
utcnow
()
document
.
indexing_status
=
'completed'
document
.
word_count
=
0
document
.
tokens
=
0
document
.
indexing_latency
=
0
db
.
session
.
add
(
document
)
db
.
session
.
flush
()
if
page
[
'type'
]
!=
'database'
:
document_ids
.
append
(
document
.
id
)
=======
# if page['type'] == 'database':
# if page['type'] == 'database':
# document.splitting_completed_at = datetime.datetime.utcnow()
# document.splitting_completed_at = datetime.datetime.utcnow()
# document.cleaning_completed_at = datetime.datetime.utcnow()
# document.cleaning_completed_at = datetime.datetime.utcnow()
...
@@ -507,7 +492,6 @@ class DocumentService:
...
@@ -507,7 +492,6 @@ class DocumentService:
db
.
session
.
flush
()
db
.
session
.
flush
()
# if page['type'] != 'database':
# if page['type'] != 'database':
document_ids
.
append
(
document
.
id
)
document_ids
.
append
(
document
.
id
)
>>>>>>>
main
documents
.
append
(
document
)
documents
.
append
(
document
)
position
+=
1
position
+=
1
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment