fix qa index processor tenant id is None error (#2713)

Co-authored-by: jyong <jyong@dify.ai>

fix qa index processor tenant id is None error (#2713)
Co-authored-by: jyong <jyong@dify.ai>
31070ffb · Jyong · GitHub · 7f3dec7b · 31070ffb · 31070ffb
Unverified Commit 31070ffb authored Mar 06, 2024 by Jyong Committed by GitHub Mar 06, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 8 deletions

indexing_runner.py api/core/indexing_runner.py +7 -4

qa_index_processor.py api/core/rag/index_processor/processor/qa_index_processor.py +3 -4

No files found.
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -62,7 +62,8 @@ class IndexingRunner:
                text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())

                # transform
-                documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
+                documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
+                                            processing_rule.to_dict())
                # save segment
                self._load_segments(dataset, dataset_document, documents)

@@ -120,7 +121,8 @@ class IndexingRunner:
            text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())

            # transform
-            documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
+            documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
+                                        processing_rule.to_dict())
            # save segment
            self._load_segments(dataset, dataset_document, documents)

@@ -750,7 +752,7 @@ class IndexingRunner:
        index_processor.load(dataset, documents)

    def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
-                   text_docs: list[Document], process_rule: dict) -> list[Document]:
+                   text_docs: list[Document], doc_language: str, process_rule: dict) -> list[Document]:
        # get embedding model instance
        embedding_model_instance = None
        if dataset.indexing_technique == 'high_quality':
@@ -768,7 +770,8 @@ class IndexingRunner:
                )

        documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
-                                              process_rule=process_rule)
+                                              process_rule=process_rule, tenant_id=dataset.tenant_id,
+                                              doc_language=doc_language)

        return documents


--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@@ -7,7 +7,6 @@ from typing import Optional

 import pandas as pd
 from flask import Flask, current_app
-from flask_login import current_user
 from werkzeug.datastructures import FileStorage

 from core.generator.llm_generator import LLMGenerator
@@ -31,7 +30,7 @@ class QAIndexProcessor(BaseIndexProcessor):

    def transform(self, documents: list[Document], **kwargs) -> list[Document]:
        splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'),
-                                      embedding_model_instance=None)
+                                      embedding_model_instance=kwargs.get('embedding_model_instance'))

        # Split the text documents into nodes.
        all_documents = []
@@ -66,10 +65,10 @@ class QAIndexProcessor(BaseIndexProcessor):
            for doc in sub_documents:
                document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={
                    'flask_app': current_app._get_current_object(),
-                    'tenant_id': current_user.current_tenant.id,
+                    'tenant_id': kwargs.get('tenant_id'),
                    'document_node': doc,
                    'all_qa_documents': all_qa_documents,
-                    'document_language': kwargs.get('document_language', 'English')})
+                    'document_language': kwargs.get('doc_language', 'English')})
                threads.append(document_format_thread)
                document_format_thread.start()
            for thread in threads: