Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
9284bb9b
Commit
9284bb9b
authored
Jun 20, 2023
by
John Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: add delete dataset index
parent
ee8ce1d3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
32 additions
and
15 deletions
+32
-15
base.py
api/core/index/base.py
+3
-0
keyword_table_index.py
api/core/index/keyword_table_index/keyword_table_index.py
+6
-0
base.py
api/core/index/vector_index/base.py
+7
-1
qdrant_vector_store.py
api/core/vector_store/qdrant_vector_store.py
+5
-0
weaviate_vector_store.py
api/core/vector_store/weaviate_vector_store.py
+3
-0
clean_dataset_task.py
api/tasks/clean_dataset_task.py
+8
-14
No files found.
api/core/index/base.py
View file @
9284bb9b
...
...
@@ -37,6 +37,9 @@ class BaseIndex(ABC):
)
->
List
[
Document
]:
raise
NotImplementedError
def
delete
(
self
)
->
None
:
raise
NotImplementedError
def
_filter_duplicate_texts
(
self
,
texts
:
list
[
Document
])
->
list
[
Document
]:
for
text
in
texts
:
doc_id
=
text
.
metadata
[
'doc_id'
]
...
...
api/core/index/keyword_table_index/keyword_table_index.py
View file @
9284bb9b
...
...
@@ -114,6 +114,12 @@ class KeywordTableIndex(BaseIndex):
return
documents
def
delete
(
self
)
->
None
:
dataset_keyword_table
=
self
.
_dataset
.
dataset_keyword_table
if
dataset_keyword_table
:
db
.
session
.
delete
(
dataset_keyword_table
)
db
.
session
.
commit
()
def
_save_dataset_keyword_table
(
self
,
keyword_table
):
keyword_table_dict
=
{
'__type__'
:
'keyword_table'
,
...
...
api/core/index/vector_index/base.py
View file @
9284bb9b
...
...
@@ -89,4 +89,10 @@ class BaseVectorIndex(BaseIndex):
vector_store
=
cast
(
self
.
_get_vector_store_class
(),
vector_store
)
for
node_id
in
ids
:
vector_store
.
del_text
(
node_id
)
\ No newline at end of file
vector_store
.
del_text
(
node_id
)
def
delete
(
self
)
->
None
:
vector_store
=
self
.
_get_vector_store
()
vector_store
=
cast
(
self
.
_get_vector_store_class
(),
vector_store
)
vector_store
.
delete
()
api/core/vector_store/qdrant_vector_store.py
View file @
9284bb9b
...
...
@@ -39,6 +39,11 @@ class QdrantVectorStore(Qdrant):
return
len
(
response
)
>
0
def
delete
(
self
):
self
.
_reload_if_needed
()
self
.
client
.
delete_collection
(
collection_name
=
self
.
collection_name
)
def
_reload_if_needed
(
self
):
if
isinstance
(
self
.
client
,
QdrantLocal
):
self
.
client
=
cast
(
QdrantLocal
,
self
.
client
)
...
...
api/core/vector_store/weaviate_vector_store.py
View file @
9284bb9b
...
...
@@ -33,3 +33,6 @@ class WeaviateVectorStore(Weaviate):
return
False
return
True
def
delete
(
self
):
self
.
_client
.
schema
.
delete_class
(
self
.
_index_name
)
api/tasks/clean_dataset_task.py
View file @
9284bb9b
...
...
@@ -33,28 +33,23 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
)
documents
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
dataset_id
==
dataset_id
)
.
all
()
index_doc_ids
=
[
document
.
id
for
document
in
documents
]
segments
=
db
.
session
.
query
(
DocumentSegment
)
.
filter
(
DocumentSegment
.
dataset_id
==
dataset_id
)
.
all
()
index_node_ids
=
[
segment
.
index_node_id
for
segment
in
segments
]
vector_index
=
IndexBuilder
.
get_index
(
dataset
,
'high_quality'
)
kw_index
=
IndexBuilder
.
get_index
(
dataset
,
'economy'
)
# delete from vector index
if
vector_index
:
for
index_doc_id
in
index_doc_ids
:
try
:
vector_index
.
delete_by_document_id
(
index_doc_id
)
except
Exception
:
logging
.
exception
(
"Delete doc index failed when dataset deleted."
)
continue
# delete from keyword index
if
index_node_ids
:
try
:
kw_index
.
delete_by_ids
(
index_node_ids
)
vector_index
.
delete
(
)
except
Exception
:
logging
.
exception
(
"Delete nodes index failed when dataset deleted."
)
logging
.
exception
(
"Delete doc index failed when dataset deleted."
)
# delete from keyword index
try
:
kw_index
.
delete
()
except
Exception
:
logging
.
exception
(
"Delete nodes index failed when dataset deleted."
)
for
document
in
documents
:
db
.
session
.
delete
(
document
)
...
...
@@ -62,7 +57,6 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
for
segment
in
segments
:
db
.
session
.
delete
(
segment
)
db
.
session
.
query
(
DatasetKeywordTable
)
.
filter
(
DatasetKeywordTable
.
dataset_id
==
dataset_id
)
.
delete
()
db
.
session
.
query
(
DatasetProcessRule
)
.
filter
(
DatasetProcessRule
.
dataset_id
==
dataset_id
)
.
delete
()
db
.
session
.
query
(
DatasetQuery
)
.
filter
(
DatasetQuery
.
dataset_id
==
dataset_id
)
.
delete
()
db
.
session
.
query
(
AppDatasetJoin
)
.
filter
(
AppDatasetJoin
.
dataset_id
==
dataset_id
)
.
delete
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment