Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
98a42e68
Commit
98a42e68
authored
Jun 25, 2023
by
John Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: dataset switch indexing_technique invalid
parent
a2d498f0
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
31 deletions
+36
-31
index.py
api/core/index/index.py
+2
-2
deal_dataset_vector_index_task.py
api/tasks/deal_dataset_vector_index_task.py
+34
-29
No files found.
api/core/index/index.py
View file @
98a42e68
...
...
@@ -10,9 +10,9 @@ from models.dataset import Dataset
class
IndexBuilder
:
@
classmethod
def
get_index
(
cls
,
dataset
:
Dataset
,
indexing_technique
:
str
):
def
get_index
(
cls
,
dataset
:
Dataset
,
indexing_technique
:
str
,
ignore_high_quality_check
:
bool
=
False
):
if
indexing_technique
==
"high_quality"
:
if
dataset
.
indexing_technique
!=
'high_quality'
:
if
not
ignore_high_quality_check
and
dataset
.
indexing_technique
!=
'high_quality'
:
return
None
model_credentials
=
LLMBuilder
.
get_model_credentials
(
...
...
api/tasks/deal_dataset_vector_index_task.py
View file @
98a42e68
...
...
@@ -26,18 +26,26 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
dataset
=
Dataset
.
query
.
filter_by
(
id
=
dataset_id
)
.
first
()
if
not
dataset
:
raise
Exception
(
'Dataset not found'
)
dataset_documents
=
DatasetDocument
.
query
.
filter_by
(
dataset_id
=
dataset_id
)
.
all
()
if
action
==
"remove"
:
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
,
ignore_high_quality_check
=
True
)
index
.
delete
()
elif
action
==
"add"
:
dataset_documents
=
db
.
session
.
query
(
DatasetDocument
)
.
filter
(
DatasetDocument
.
dataset_id
==
dataset_id
,
DatasetDocument
.
indexing_status
==
'completed'
,
DatasetDocument
.
enabled
==
True
,
DatasetDocument
.
archived
==
False
,
)
.
all
()
if
dataset_documents
:
# save vector index
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
)
if
index
:
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
,
ignore_high_quality_check
=
True
)
for
dataset_document
in
dataset_documents
:
# delete from vector index
if
action
==
"remove"
:
index
.
delete_by_document_id
(
dataset_document
.
id
)
elif
action
==
"add"
:
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
document_id
==
dataset_document
.
id
,
DocumentSegment
.
enabled
==
True
...
...
@@ -58,10 +66,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
documents
.
append
(
document
)
# save vector index
index
.
add_texts
(
documents
,
duplicate_check
=
True
)
index
.
add_texts
(
documents
)
end_at
=
time
.
perf_counter
()
logging
.
info
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment