Unverified Commit d0e1ea8f authored by crazywoola's avatar crazywoola Committed by GitHub

1506 remove duplicated code (#1511)

parent f3b9647b
...@@ -89,22 +89,6 @@ class IndexingRunner: ...@@ -89,22 +89,6 @@ class IndexingRunner:
dataset_document.stopped_at = datetime.datetime.utcnow() dataset_document.stopped_at = datetime.datetime.utcnow()
db.session.commit() db.session.commit()
def format_split_text(self, text):
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
matches = re.findall(regex, text, re.MULTILINE)
result = []
for match in matches:
q = match[0]
a = match[1]
if q and a:
result.append({
"question": q,
"answer": re.sub(r"\n\s*", "\n", a.strip())
})
return result
def run_in_splitting_status(self, dataset_document: DatasetDocument): def run_in_splitting_status(self, dataset_document: DatasetDocument):
"""Run the indexing process when the index_status is splitting.""" """Run the indexing process when the index_status is splitting."""
try: try:
...@@ -647,21 +631,16 @@ class IndexingRunner: ...@@ -647,21 +631,16 @@ class IndexingRunner:
return text return text
def format_split_text(self, text): def format_split_text(self, text):
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)" # 匹配Q和A的正则表达式 regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
matches = re.findall(regex, text, re.MULTILINE) # 获取所有匹配到的结果 matches = re.findall(regex, text, re.MULTILINE)
result = [] # 存储最终的结果 return [
for match in matches: {
q = match[0]
a = match[1]
if q and a:
# 如果Q和A都存在,就将其添加到结果中
result.append({
"question": q, "question": q,
"answer": re.sub(r"\n\s*", "\n", a.strip()) "answer": re.sub(r"\n\s*", "\n", a.strip())
}) }
for q, a in matches if q and a
return result ]
def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None: def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None:
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment