Unverified Commit f207e180 authored by Jyong's avatar Jyong Committed by GitHub

fix multi thread app context (#868)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
parent 948d64bb
...@@ -7,6 +7,7 @@ import time ...@@ -7,6 +7,7 @@ import time
import uuid import uuid
from typing import Optional, List, cast from typing import Optional, List, cast
from flask import current_app, Flask
from flask_login import current_user from flask_login import current_user
from langchain.schema import Document from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
...@@ -522,7 +523,8 @@ class IndexingRunner: ...@@ -522,7 +523,8 @@ class IndexingRunner:
sub_documents = all_documents[i:i + 10] sub_documents = all_documents[i:i + 10]
for doc in sub_documents: for doc in sub_documents:
document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={ document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={
'tenant_id': tenant_id, 'document_node': doc, 'all_qa_documents': all_qa_documents}) 'flask_app': current_app._get_current_object(), 'tenant_id': tenant_id, 'document_node': doc,
'all_qa_documents': all_qa_documents})
threads.append(document_format_thread) threads.append(document_format_thread)
document_format_thread.start() document_format_thread.start()
for thread in threads: for thread in threads:
...@@ -530,28 +532,29 @@ class IndexingRunner: ...@@ -530,28 +532,29 @@ class IndexingRunner:
return all_qa_documents return all_qa_documents
return all_documents return all_documents
def format_qa_document(self, tenant_id: str, document_node, all_qa_documents): def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents):
format_documents = [] format_documents = []
if document_node.page_content is None or not document_node.page_content.strip(): if document_node.page_content is None or not document_node.page_content.strip():
return return
try: with flask_app.app_context():
# qa model document try:
response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content) # qa model document
document_qa_list = self.format_split_text(response) response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content)
qa_documents = [] document_qa_list = self.format_split_text(response)
for result in document_qa_list: qa_documents = []
qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy()) for result in document_qa_list:
doc_id = str(uuid.uuid4()) qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy())
hash = helper.generate_text_hash(result['question']) doc_id = str(uuid.uuid4())
qa_document.metadata['answer'] = result['answer'] hash = helper.generate_text_hash(result['question'])
qa_document.metadata['doc_id'] = doc_id qa_document.metadata['answer'] = result['answer']
qa_document.metadata['doc_hash'] = hash qa_document.metadata['doc_id'] = doc_id
qa_documents.append(qa_document) qa_document.metadata['doc_hash'] = hash
format_documents.extend(qa_documents) qa_documents.append(qa_document)
except Exception as e: format_documents.extend(qa_documents)
logging.exception(e) except Exception as e:
logging.exception(e)
all_qa_documents.extend(format_documents) all_qa_documents.extend(format_documents)
def _split_to_documents_for_estimate(self, text_docs: List[Document], splitter: TextSplitter, def _split_to_documents_for_estimate(self, text_docs: List[Document], splitter: TextSplitter,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment