Commit 9284bb9b authored by John Wang's avatar John Wang

feat: add delete dataset index

parent ee8ce1d3
......@@ -37,6 +37,9 @@ class BaseIndex(ABC):
) -> List[Document]:
raise NotImplementedError
def delete(self) -> None:
raise NotImplementedError
def _filter_duplicate_texts(self, texts: list[Document]) -> list[Document]:
for text in texts:
doc_id = text.metadata['doc_id']
......
......@@ -114,6 +114,12 @@ class KeywordTableIndex(BaseIndex):
return documents
def delete(self) -> None:
dataset_keyword_table = self._dataset.dataset_keyword_table
if dataset_keyword_table:
db.session.delete(dataset_keyword_table)
db.session.commit()
def _save_dataset_keyword_table(self, keyword_table):
keyword_table_dict = {
'__type__': 'keyword_table',
......
......@@ -90,3 +90,9 @@ class BaseVectorIndex(BaseIndex):
for node_id in ids:
vector_store.del_text(node_id)
def delete(self) -> None:
vector_store = self._get_vector_store()
vector_store = cast(self._get_vector_store_class(), vector_store)
vector_store.delete()
......@@ -39,6 +39,11 @@ class QdrantVectorStore(Qdrant):
return len(response) > 0
def delete(self):
self._reload_if_needed()
self.client.delete_collection(collection_name=self.collection_name)
def _reload_if_needed(self):
if isinstance(self.client, QdrantLocal):
self.client = cast(QdrantLocal, self.client)
......
......@@ -33,3 +33,6 @@ class WeaviateVectorStore(Weaviate):
return False
return True
def delete(self):
self._client.schema.delete_class(self._index_name)
......@@ -33,26 +33,21 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
)
documents = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all()
index_doc_ids = [document.id for document in documents]
segments = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all()
index_node_ids = [segment.index_node_id for segment in segments]
vector_index = IndexBuilder.get_index(dataset, 'high_quality')
kw_index = IndexBuilder.get_index(dataset, 'economy')
# delete from vector index
if vector_index:
for index_doc_id in index_doc_ids:
try:
vector_index.delete_by_document_id(index_doc_id)
vector_index.delete()
except Exception:
logging.exception("Delete doc index failed when dataset deleted.")
continue
# delete from keyword index
if index_node_ids:
try:
kw_index.delete_by_ids(index_node_ids)
kw_index.delete()
except Exception:
logging.exception("Delete nodes index failed when dataset deleted.")
......@@ -62,7 +57,6 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
for segment in segments:
db.session.delete(segment)
db.session.query(DatasetKeywordTable).filter(DatasetKeywordTable.dataset_id == dataset_id).delete()
db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()
db.session.query(DatasetQuery).filter(DatasetQuery.dataset_id == dataset_id).delete()
db.session.query(AppDatasetJoin).filter(AppDatasetJoin.dataset_id == dataset_id).delete()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment