Unverified Commit 2d604d93 authored by Jyong's avatar Jyong Committed by GitHub

Fix/filter empty segment (#1004)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
parent e7199826
...@@ -525,12 +525,13 @@ class IndexingRunner: ...@@ -525,12 +525,13 @@ class IndexingRunner:
documents = splitter.split_documents([text_doc]) documents = splitter.split_documents([text_doc])
split_documents = [] split_documents = []
for document_node in documents: for document_node in documents:
doc_id = str(uuid.uuid4())
hash = helper.generate_text_hash(document_node.page_content)
document_node.metadata['doc_id'] = doc_id
document_node.metadata['doc_hash'] = hash
split_documents.append(document_node) if document_node.page_content.strip():
doc_id = str(uuid.uuid4())
hash = helper.generate_text_hash(document_node.page_content)
document_node.metadata['doc_id'] = doc_id
document_node.metadata['doc_hash'] = hash
split_documents.append(document_node)
all_documents.extend(split_documents) all_documents.extend(split_documents)
# processing qa document # processing qa document
if document_form == 'qa_model': if document_form == 'qa_model':
......
...@@ -891,6 +891,10 @@ class SegmentService: ...@@ -891,6 +891,10 @@ class SegmentService:
if document.doc_form == 'qa_model': if document.doc_form == 'qa_model':
if 'answer' not in args or not args['answer']: if 'answer' not in args or not args['answer']:
raise ValueError("Answer is required") raise ValueError("Answer is required")
if not args['answer'].strip():
raise ValueError("Answer is empty")
if 'content' not in args or not args['content'] or not args['content'].strip():
raise ValueError("Content is empty")
@classmethod @classmethod
def create_segment(cls, args: dict, document: Document, dataset: Dataset): def create_segment(cls, args: dict, document: Document, dataset: Dataset):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment