Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
a199fa63
Unverified
Commit
a199fa63
authored
Sep 01, 2023
by
takatost
Committed by
GitHub
Sep 01, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: optimize high load sql query of document segment (#1078)
parent
4c8608dc
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
5 deletions
+8
-5
keyword_table_index.py
api/core/index/keyword_table_index/keyword_table_index.py
+8
-5
No files found.
api/core/index/keyword_table_index/keyword_table_index.py
View file @
a199fa63
...
@@ -25,7 +25,7 @@ class KeywordTableIndex(BaseIndex):
...
@@ -25,7 +25,7 @@ class KeywordTableIndex(BaseIndex):
keyword_table
=
{}
keyword_table
=
{}
for
text
in
texts
:
for
text
in
texts
:
keywords
=
keyword_table_handler
.
extract_keywords
(
text
.
page_content
,
self
.
_config
.
max_keywords_per_chunk
)
keywords
=
keyword_table_handler
.
extract_keywords
(
text
.
page_content
,
self
.
_config
.
max_keywords_per_chunk
)
self
.
_update_segment_keywords
(
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
self
.
_update_segment_keywords
(
self
.
dataset
.
id
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
dataset_keyword_table
=
DatasetKeywordTable
(
dataset_keyword_table
=
DatasetKeywordTable
(
...
@@ -52,7 +52,7 @@ class KeywordTableIndex(BaseIndex):
...
@@ -52,7 +52,7 @@ class KeywordTableIndex(BaseIndex):
keyword_table
=
self
.
_get_dataset_keyword_table
()
keyword_table
=
self
.
_get_dataset_keyword_table
()
for
text
in
texts
:
for
text
in
texts
:
keywords
=
keyword_table_handler
.
extract_keywords
(
text
.
page_content
,
self
.
_config
.
max_keywords_per_chunk
)
keywords
=
keyword_table_handler
.
extract_keywords
(
text
.
page_content
,
self
.
_config
.
max_keywords_per_chunk
)
self
.
_update_segment_keywords
(
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
self
.
_update_segment_keywords
(
self
.
dataset
.
id
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
text
.
metadata
[
'doc_id'
],
list
(
keywords
))
self
.
_save_dataset_keyword_table
(
keyword_table
)
self
.
_save_dataset_keyword_table
(
keyword_table
)
...
@@ -199,15 +199,18 @@ class KeywordTableIndex(BaseIndex):
...
@@ -199,15 +199,18 @@ class KeywordTableIndex(BaseIndex):
return
sorted_chunk_indices
[:
k
]
return
sorted_chunk_indices
[:
k
]
def
_update_segment_keywords
(
self
,
node_id
:
str
,
keywords
:
List
[
str
]):
def
_update_segment_keywords
(
self
,
dataset_id
:
str
,
node_id
:
str
,
keywords
:
List
[
str
]):
document_segment
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
index_node_id
==
node_id
)
.
first
()
document_segment
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
dataset_id
==
dataset_id
,
DocumentSegment
.
index_node_id
==
node_id
)
.
first
()
if
document_segment
:
if
document_segment
:
document_segment
.
keywords
=
keywords
document_segment
.
keywords
=
keywords
db
.
session
.
commit
()
db
.
session
.
commit
()
def
create_segment_keywords
(
self
,
node_id
:
str
,
keywords
:
List
[
str
]):
def
create_segment_keywords
(
self
,
node_id
:
str
,
keywords
:
List
[
str
]):
keyword_table
=
self
.
_get_dataset_keyword_table
()
keyword_table
=
self
.
_get_dataset_keyword_table
()
self
.
_update_segment_keywords
(
node_id
,
keywords
)
self
.
_update_segment_keywords
(
self
.
dataset
.
id
,
node_id
,
keywords
)
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
node_id
,
keywords
)
keyword_table
=
self
.
_add_text_to_keyword_table
(
keyword_table
,
node_id
,
keywords
)
self
.
_save_dataset_keyword_table
(
keyword_table
)
self
.
_save_dataset_keyword_table
(
keyword_table
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment