Commit f90eb9e1 authored by jyong's avatar jyong

fix merge

parent ef04df29
...@@ -11,14 +11,14 @@ from werkzeug.exceptions import NotFound ...@@ -11,14 +11,14 @@ from werkzeug.exceptions import NotFound
from core.index.index import IndexBuilder from core.index.index import IndexBuilder
from core.indexing_runner import IndexingRunner from core.indexing_runner import IndexingRunner
from core.llm.token_calculator import TokenCalculator from core.model_providers.model_factory import ModelFactory
from extensions.ext_database import db from extensions.ext_database import db
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from libs import helper from libs import helper
from models.dataset import DocumentSegment, Dataset, Document from models.dataset import DocumentSegment, Dataset, Document
@shared_task @shared_task(queue='dataset')
def batch_create_segment_to_index_task(job_id: str, content: List, dataset_id: str, document_id: str, def batch_create_segment_to_index_task(job_id: str, content: List, dataset_id: str, document_id: str,
tenant_id: str, user_id: str): tenant_id: str, user_id: str):
""" """
...@@ -54,8 +54,12 @@ def batch_create_segment_to_index_task(job_id: str, content: List, dataset_id: s ...@@ -54,8 +54,12 @@ def batch_create_segment_to_index_task(job_id: str, content: List, dataset_id: s
answer = segment['answer'] answer = segment['answer']
doc_id = str(uuid.uuid4()) doc_id = str(uuid.uuid4())
segment_hash = helper.generate_text_hash(content) segment_hash = helper.generate_text_hash(content)
embedding_model = ModelFactory.get_embedding_model(
tenant_id=dataset.tenant_id
)
# calc embedding use tokens # calc embedding use tokens
tokens = TokenCalculator.get_num_tokens('text-embedding-ada-002', content) tokens = embedding_model.get_num_tokens(content)
max_position = db.session.query(func.max(DocumentSegment.position)).filter( max_position = db.session.query(func.max(DocumentSegment.position)).filter(
DocumentSegment.document_id == dataset_document.id DocumentSegment.document_id == dataset_document.id
).scalar() ).scalar()
......
...@@ -11,7 +11,7 @@ from extensions.ext_redis import redis_client ...@@ -11,7 +11,7 @@ from extensions.ext_redis import redis_client
from models.dataset import DocumentSegment, Dataset, Document from models.dataset import DocumentSegment, Dataset, Document
@shared_task @shared_task(queue='dataset')
def delete_segment_from_index_task(segment_id: str, index_node_id: str, dataset_id: str, document_id: str): def delete_segment_from_index_task(segment_id: str, index_node_id: str, dataset_id: str, document_id: str):
""" """
Async Remove segment from index Async Remove segment from index
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment