Commit 5b165df8 authored by jyong's avatar jyong

Merge branch 'feat/milvus-support' into deploy/dev

# Conflicts:
#	web/app/components/base/icons/src/vender/line/general/index.ts
parents 229b0e0e 3b374a46
......@@ -10,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Optional, List, cast
import openai
from billiard.pool import Pool
from flask import current_app, Flask
from flask_login import current_user
from langchain.embeddings import OpenAIEmbeddings
......@@ -538,6 +539,11 @@ class IndexingRunner:
# document_format_thread.start()
# for thread in threads:
# thread.join()
# with Pool(5) as pool:
# for doc in documents:
# result = pool.apply_async(format_document, kwds={'flask_app': current_app._get_current_object(), 'document_node': doc, 'split_documents': split_documents})
# if result.ready():
# split_documents.extend(result.get())
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
for future in concurrent.futures.as_completed(future_to_doc):
......
......@@ -12,7 +12,7 @@ from core.index.keyword_table_index.keyword_table_index import KeywordTableIndex
from core.index.vector_index.vector_index import VectorIndex
from core.llm.llm_builder import LLMBuilder
from extensions.ext_database import db
from models.dataset import Dataset
from models.dataset import Dataset, DocumentSegment
class DatasetRetrieverToolInput(BaseModel):
......@@ -68,6 +68,7 @@ class DatasetRetrieverTool(BaseTool):
)
documents = kw_table_index.search(query, search_kwargs={'k': self.k})
return str("\n".join([document.page_content for document in documents]))
else:
model_credentials = LLMBuilder.get_model_credentials(
tenant_id=dataset.tenant_id,
......@@ -98,8 +99,22 @@ class DatasetRetrieverTool(BaseTool):
hit_callback = DatasetIndexToolCallbackHandler(dataset.id)
hit_callback.on_tool_end(documents)
document_context_list = []
index_node_ids = [document.metadata['doc_id'] for document in documents]
segments = DocumentSegment.query.filter(DocumentSegment.completed_at.isnot(None),
DocumentSegment.status == 'completed',
DocumentSegment.enabled == True,
DocumentSegment.index_node_id.in_(index_node_ids)
).all()
if segments:
for segment in segments:
if segment.answer:
document_context_list.append(segment.answer)
else:
document_context_list.append(segment.content)
return str("\n".join([document.page_content for document in documents]))
return str("\n".join(document_context_list))
async def _arun(self, tool_input: str) -> str:
raise NotImplementedError()
export { default as Check } from './Check'
export { default as LinkExternal02 } from './LinkExternal02'
<<<<<<<<< Temporary merge branch 1
export { default as Edit03 } from './Edit03'
export { default as Hash02 } from './Hash02'
export { default as LinkExternal02 } from './LinkExternal02'
export { default as Loading02 } from './Loading02'
export { default as LogOut01 } from './LogOut01'
export { default as Trash03 } from './Trash03'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment