Commit 82b3bb62 authored by jyong's avatar jyong

add clean unused dataset command

parent 30373e1a
...@@ -221,31 +221,21 @@ def clean_unused_dataset_indexes(): ...@@ -221,31 +221,21 @@ def clean_unused_dataset_indexes():
Document.indexing_status == 'completed', Document.indexing_status == 'completed',
Document.enabled == True, Document.enabled == True,
Document.archived == False, Document.archived == False,
Document.updated_at < thirty_days_ago Document.updated_at > thirty_days_ago
).all() ).all()
if documents: if not documents:
for document in documents:
click.style('Start clean document segments from index: {}'.format(document.id),
fg='green')
document.enabled = False
db.session.commit()
try: try:
# remove index # remove index
vector_index = IndexBuilder.get_index(dataset, 'high_quality') vector_index = IndexBuilder.get_index(dataset, 'high_quality')
kw_index = IndexBuilder.get_index(dataset, 'economy') kw_index = IndexBuilder.get_index(dataset, 'economy')
# delete from vector index # delete from vector index
if vector_index: if vector_index:
vector_index.delete_by_document_id(document.id) vector_index.delete()
kw_index.delete()
# delete from keyword index except Exception as e:
segments = db.session.query(DocumentSegment).filter( click.echo(
DocumentSegment.document_id == document.id).all() click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
index_node_ids = [segment.index_node_id for segment in segments] fg='red'))
if index_node_ids:
kw_index.delete_by_ids(index_node_ids)
except Exception:
logging.exception("clean document from index failed: {}".format(document.id))
end_at = time.perf_counter() end_at = time.perf_counter()
click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green')) click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment