Commit e148aad0 authored by jyong's avatar jyong

Merge branch 'feat/clean-vector-dataset' into deploy/dev

parents 77f7131e 43aaac36
...@@ -221,31 +221,27 @@ def clean_unused_dataset_indexes(): ...@@ -221,31 +221,27 @@ def clean_unused_dataset_indexes():
Document.indexing_status == 'completed', Document.indexing_status == 'completed',
Document.enabled == True, Document.enabled == True,
Document.archived == False, Document.archived == False,
Document.updated_at < thirty_days_ago Document.updated_at > thirty_days_ago
).all() ).all()
if documents: if not documents and len(documents) > 0:
for document in documents: try:
click.style('Start clean document segments from index: {}'.format(document.id), update_params = {
fg='green') Document.enabled: False
document.enabled = False }
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit() db.session.commit()
try: # remove index
# remove index vector_index = IndexBuilder.get_index(dataset, 'high_quality')
vector_index = IndexBuilder.get_index(dataset, 'high_quality') kw_index = IndexBuilder.get_index(dataset, 'economy')
kw_index = IndexBuilder.get_index(dataset, 'economy') # delete from vector index
if vector_index:
# delete from vector index vector_index.delete()
if vector_index: kw_index.delete()
vector_index.delete_by_document_id(document.id) except Exception as e:
click.echo(
# delete from keyword index click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
segments = db.session.query(DocumentSegment).filter( fg='red'))
DocumentSegment.document_id == document.id).all()
index_node_ids = [segment.index_node_id for segment in segments]
if index_node_ids:
kw_index.delete_by_ids(index_node_ids)
except Exception:
logging.exception("clean document from index failed: {}".format(document.id))
end_at = time.perf_counter() end_at = time.perf_counter()
click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green')) click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment