Commit 98a42e68 authored by John Wang's avatar John Wang

fix: dataset switch indexing_technique invalid

parent a2d498f0
...@@ -10,9 +10,9 @@ from models.dataset import Dataset ...@@ -10,9 +10,9 @@ from models.dataset import Dataset
class IndexBuilder: class IndexBuilder:
@classmethod @classmethod
def get_index(cls, dataset: Dataset, indexing_technique: str): def get_index(cls, dataset: Dataset, indexing_technique: str, ignore_high_quality_check: bool = False):
if indexing_technique == "high_quality": if indexing_technique == "high_quality":
if dataset.indexing_technique != 'high_quality': if not ignore_high_quality_check and dataset.indexing_technique != 'high_quality':
return None return None
model_credentials = LLMBuilder.get_model_credentials( model_credentials = LLMBuilder.get_model_credentials(
......
...@@ -26,42 +26,47 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): ...@@ -26,42 +26,47 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
dataset = Dataset.query.filter_by( dataset = Dataset.query.filter_by(
id=dataset_id id=dataset_id
).first() ).first()
if not dataset: if not dataset:
raise Exception('Dataset not found') raise Exception('Dataset not found')
dataset_documents = DatasetDocument.query.filter_by(dataset_id=dataset_id).all()
if dataset_documents: if action == "remove":
# save vector index index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
index = IndexBuilder.get_index(dataset, 'high_quality') index.delete()
if index: elif action == "add":
dataset_documents = db.session.query(DatasetDocument).filter(
DatasetDocument.dataset_id == dataset_id,
DatasetDocument.indexing_status == 'completed',
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
).all()
if dataset_documents:
# save vector index
index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
for dataset_document in dataset_documents: for dataset_document in dataset_documents:
# delete from vector index # delete from vector index
if action == "remove": segments = db.session.query(DocumentSegment).filter(
index.delete_by_document_id(dataset_document.id) DocumentSegment.document_id == dataset_document.id,
elif action == "add": DocumentSegment.enabled == True
segments = db.session.query(DocumentSegment).filter( ) .order_by(DocumentSegment.position.asc()).all()
DocumentSegment.document_id == dataset_document.id,
DocumentSegment.enabled == True
) .order_by(DocumentSegment.position.asc()).all()
documents = [] documents = []
for segment in segments: for segment in segments:
document = Document( document = Document(
page_content=segment.content, page_content=segment.content,
metadata={ metadata={
"doc_id": segment.index_node_id, "doc_id": segment.index_node_id,
"doc_hash": segment.index_node_hash, "doc_hash": segment.index_node_hash,
"document_id": segment.document_id, "document_id": segment.document_id,
"dataset_id": segment.dataset_id, "dataset_id": segment.dataset_id,
} }
) )
documents.append(document) documents.append(document)
# save vector index # save vector index
index.add_texts( index.add_texts(documents)
documents,
duplicate_check=True
)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info( logging.info(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment