Unverified Commit 31070ffb authored by Jyong's avatar Jyong Committed by GitHub

fix qa index processor tenant id is None error (#2713)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
parent 7f3dec7b
...@@ -62,7 +62,8 @@ class IndexingRunner: ...@@ -62,7 +62,8 @@ class IndexingRunner:
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict()) text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
# transform # transform
documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict()) documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
processing_rule.to_dict())
# save segment # save segment
self._load_segments(dataset, dataset_document, documents) self._load_segments(dataset, dataset_document, documents)
...@@ -120,7 +121,8 @@ class IndexingRunner: ...@@ -120,7 +121,8 @@ class IndexingRunner:
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict()) text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
# transform # transform
documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict()) documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
processing_rule.to_dict())
# save segment # save segment
self._load_segments(dataset, dataset_document, documents) self._load_segments(dataset, dataset_document, documents)
...@@ -750,7 +752,7 @@ class IndexingRunner: ...@@ -750,7 +752,7 @@ class IndexingRunner:
index_processor.load(dataset, documents) index_processor.load(dataset, documents)
def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset, def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
text_docs: list[Document], process_rule: dict) -> list[Document]: text_docs: list[Document], doc_language: str, process_rule: dict) -> list[Document]:
# get embedding model instance # get embedding model instance
embedding_model_instance = None embedding_model_instance = None
if dataset.indexing_technique == 'high_quality': if dataset.indexing_technique == 'high_quality':
...@@ -768,7 +770,8 @@ class IndexingRunner: ...@@ -768,7 +770,8 @@ class IndexingRunner:
) )
documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance, documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
process_rule=process_rule) process_rule=process_rule, tenant_id=dataset.tenant_id,
doc_language=doc_language)
return documents return documents
......
...@@ -7,7 +7,6 @@ from typing import Optional ...@@ -7,7 +7,6 @@ from typing import Optional
import pandas as pd import pandas as pd
from flask import Flask, current_app from flask import Flask, current_app
from flask_login import current_user
from werkzeug.datastructures import FileStorage from werkzeug.datastructures import FileStorage
from core.generator.llm_generator import LLMGenerator from core.generator.llm_generator import LLMGenerator
...@@ -31,7 +30,7 @@ class QAIndexProcessor(BaseIndexProcessor): ...@@ -31,7 +30,7 @@ class QAIndexProcessor(BaseIndexProcessor):
def transform(self, documents: list[Document], **kwargs) -> list[Document]: def transform(self, documents: list[Document], **kwargs) -> list[Document]:
splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'), splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'),
embedding_model_instance=None) embedding_model_instance=kwargs.get('embedding_model_instance'))
# Split the text documents into nodes. # Split the text documents into nodes.
all_documents = [] all_documents = []
...@@ -66,10 +65,10 @@ class QAIndexProcessor(BaseIndexProcessor): ...@@ -66,10 +65,10 @@ class QAIndexProcessor(BaseIndexProcessor):
for doc in sub_documents: for doc in sub_documents:
document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={ document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={
'flask_app': current_app._get_current_object(), 'flask_app': current_app._get_current_object(),
'tenant_id': current_user.current_tenant.id, 'tenant_id': kwargs.get('tenant_id'),
'document_node': doc, 'document_node': doc,
'all_qa_documents': all_qa_documents, 'all_qa_documents': all_qa_documents,
'document_language': kwargs.get('document_language', 'English')}) 'document_language': kwargs.get('doc_language', 'English')})
threads.append(document_format_thread) threads.append(document_format_thread)
document_format_thread.start() document_format_thread.start()
for thread in threads: for thread in threads:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment