1506 remove duplicated code (#1511)

d0e1ea8f · crazywoola · GitHub · f3b9647b · d0e1ea8f
Unverified Commit d0e1ea8f authored Nov 13, 2023 by crazywoola Committed by GitHub Nov 13, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 31 deletions

indexing_runner.py api/core/indexing_runner.py +10 -31

No files found.
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -89,22 +89,6 @@ class IndexingRunner:
                dataset_document.stopped_at = datetime.datetime.utcnow()
                db.session.commit()

-    def format_split_text(self, text):
-        regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
-        matches = re.findall(regex, text, re.MULTILINE)
-
-        result = []
-        for match in matches:
-            q = match[0]
-            a = match[1]
-            if q and a:
-                result.append({
-                    "question": q,
-                    "answer": re.sub(r"\n\s*", "\n", a.strip())
-                })
-
-        return result
-
    def run_in_splitting_status(self, dataset_document: DatasetDocument):
        """Run the indexing process when the index_status is splitting."""
        try:
@@ -647,21 +631,16 @@ class IndexingRunner:
        return text

    def format_split_text(self, text):
-        regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"  # 匹配Q和A的正则表达式
-        matches = re.findall(regex, text, re.MULTILINE)  # 获取所有匹配到的结果
-
-        result = []  # 存储最终的结果
-        for match in matches:
-            q = match[0]
-            a = match[1]
-            if q and a:
-                # 如果Q和A都存在，就将其添加到结果中
-                result.append({
-                    "question": q,
-                    "answer": re.sub(r"\n\s*", "\n", a.strip())
-                })
-
-        return result
+        regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
+        matches = re.findall(regex, text, re.MULTILINE)
+
+        return [
+            {
+                "question": q,
+                "answer": re.sub(r"\n\s*", "\n", a.strip())
+            }
+            for q, a in matches if q and a
+        ]

    def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None:
        """