Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
98a42e68
Commit
98a42e68
authored
Jun 25, 2023
by
John Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: dataset switch indexing_technique invalid
parent
a2d498f0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
31 deletions
+36
-31
index.py
api/core/index/index.py
+2
-2
deal_dataset_vector_index_task.py
api/tasks/deal_dataset_vector_index_task.py
+34
-29
No files found.
api/core/index/index.py
View file @
98a42e68
...
@@ -10,9 +10,9 @@ from models.dataset import Dataset
...
@@ -10,9 +10,9 @@ from models.dataset import Dataset
class
IndexBuilder
:
class
IndexBuilder
:
@
classmethod
@
classmethod
def
get_index
(
cls
,
dataset
:
Dataset
,
indexing_technique
:
str
):
def
get_index
(
cls
,
dataset
:
Dataset
,
indexing_technique
:
str
,
ignore_high_quality_check
:
bool
=
False
):
if
indexing_technique
==
"high_quality"
:
if
indexing_technique
==
"high_quality"
:
if
dataset
.
indexing_technique
!=
'high_quality'
:
if
not
ignore_high_quality_check
and
dataset
.
indexing_technique
!=
'high_quality'
:
return
None
return
None
model_credentials
=
LLMBuilder
.
get_model_credentials
(
model_credentials
=
LLMBuilder
.
get_model_credentials
(
...
...
api/tasks/deal_dataset_vector_index_task.py
View file @
98a42e68
...
@@ -26,42 +26,47 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
...
@@ -26,42 +26,47 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
dataset
=
Dataset
.
query
.
filter_by
(
dataset
=
Dataset
.
query
.
filter_by
(
id
=
dataset_id
id
=
dataset_id
)
.
first
()
)
.
first
()
if
not
dataset
:
if
not
dataset
:
raise
Exception
(
'Dataset not found'
)
raise
Exception
(
'Dataset not found'
)
dataset_documents
=
DatasetDocument
.
query
.
filter_by
(
dataset_id
=
dataset_id
)
.
all
()
if
dataset_documents
:
if
action
==
"remove"
:
# save vector index
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
,
ignore_high_quality_check
=
True
)
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
)
index
.
delete
()
if
index
:
elif
action
==
"add"
:
dataset_documents
=
db
.
session
.
query
(
DatasetDocument
)
.
filter
(
DatasetDocument
.
dataset_id
==
dataset_id
,
DatasetDocument
.
indexing_status
==
'completed'
,
DatasetDocument
.
enabled
==
True
,
DatasetDocument
.
archived
==
False
,
)
.
all
()
if
dataset_documents
:
# save vector index
index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
,
ignore_high_quality_check
=
True
)
for
dataset_document
in
dataset_documents
:
for
dataset_document
in
dataset_documents
:
# delete from vector index
# delete from vector index
if
action
==
"remove"
:
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
index
.
delete_by_document_id
(
dataset_document
.
id
)
DocumentSegment
.
document_id
==
dataset_document
.
id
,
elif
action
==
"add"
:
DocumentSegment
.
enabled
==
True
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
)
.
order_by
(
DocumentSegment
.
position
.
asc
())
.
all
()
DocumentSegment
.
document_id
==
dataset_document
.
id
,
DocumentSegment
.
enabled
==
True
)
.
order_by
(
DocumentSegment
.
position
.
asc
())
.
all
()
documents
=
[]
documents
=
[]
for
segment
in
segments
:
for
segment
in
segments
:
document
=
Document
(
document
=
Document
(
page_content
=
segment
.
content
,
page_content
=
segment
.
content
,
metadata
=
{
metadata
=
{
"doc_id"
:
segment
.
index_node_id
,
"doc_id"
:
segment
.
index_node_id
,
"doc_hash"
:
segment
.
index_node_hash
,
"doc_hash"
:
segment
.
index_node_hash
,
"document_id"
:
segment
.
document_id
,
"document_id"
:
segment
.
document_id
,
"dataset_id"
:
segment
.
dataset_id
,
"dataset_id"
:
segment
.
dataset_id
,
}
}
)
)
documents
.
append
(
document
)
documents
.
append
(
document
)
# save vector index
# save vector index
index
.
add_texts
(
index
.
add_texts
(
documents
)
documents
,
duplicate_check
=
True
)
end_at
=
time
.
perf_counter
()
end_at
=
time
.
perf_counter
()
logging
.
info
(
logging
.
info
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment