Commit 7e2d45d9 authored by John Wang's avatar John Wang

Merge branch 'feat/upgrade-langchain' into deploy/dev

parents 48552a5b 98a42e68
...@@ -10,9 +10,9 @@ from models.dataset import Dataset ...@@ -10,9 +10,9 @@ from models.dataset import Dataset
class IndexBuilder: class IndexBuilder:
@classmethod @classmethod
def get_index(cls, dataset: Dataset, indexing_technique: str): def get_index(cls, dataset: Dataset, indexing_technique: str, ignore_high_quality_check: bool = False):
if indexing_technique == "high_quality": if indexing_technique == "high_quality":
if dataset.indexing_technique != 'high_quality': if not ignore_high_quality_check and dataset.indexing_technique != 'high_quality':
return None return None
model_credentials = LLMBuilder.get_model_credentials( model_credentials = LLMBuilder.get_model_credentials(
......
...@@ -26,18 +26,26 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): ...@@ -26,18 +26,26 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
dataset = Dataset.query.filter_by( dataset = Dataset.query.filter_by(
id=dataset_id id=dataset_id
).first() ).first()
if not dataset: if not dataset:
raise Exception('Dataset not found') raise Exception('Dataset not found')
dataset_documents = DatasetDocument.query.filter_by(dataset_id=dataset_id).all()
if action == "remove":
index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
index.delete()
elif action == "add":
dataset_documents = db.session.query(DatasetDocument).filter(
DatasetDocument.dataset_id == dataset_id,
DatasetDocument.indexing_status == 'completed',
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
).all()
if dataset_documents: if dataset_documents:
# save vector index # save vector index
index = IndexBuilder.get_index(dataset, 'high_quality') index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
if index:
for dataset_document in dataset_documents: for dataset_document in dataset_documents:
# delete from vector index # delete from vector index
if action == "remove":
index.delete_by_document_id(dataset_document.id)
elif action == "add":
segments = db.session.query(DocumentSegment).filter( segments = db.session.query(DocumentSegment).filter(
DocumentSegment.document_id == dataset_document.id, DocumentSegment.document_id == dataset_document.id,
DocumentSegment.enabled == True DocumentSegment.enabled == True
...@@ -58,10 +66,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): ...@@ -58,10 +66,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
documents.append(document) documents.append(document)
# save vector index # save vector index
index.add_texts( index.add_texts(documents)
documents,
duplicate_check=True
)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info( logging.info(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment