Commit e6891945 authored by JzoNg's avatar JzoNg

Merge branch 'feat/milvus-support' into deploy/dev

parents d2f1b6fa da455e40
......@@ -7,7 +7,7 @@ import { XMarkIcon } from '@heroicons/react/20/solid'
import cn from 'classnames'
import Link from 'next/link'
import { groupBy } from 'lodash-es'
import PreviewItem from './preview-item'
import PreviewItem, { PreviewType } from './preview-item'
import s from './index.module.css'
import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
......@@ -97,6 +97,7 @@ const StepTwo = ({
const [docForm, setDocForm] = useState<DocForm | string>(
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
)
const [previewSwitched, setPreviewSwitched] = useState(false)
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
......@@ -154,9 +155,9 @@ const StepTwo = ({
}
}
const fetchFileIndexingEstimate = async () => {
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams())
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
if (segmentationType === SegmentType.CUSTOM)
setCustomFileIndexingEstimate(res)
......@@ -214,8 +215,7 @@ const StepTwo = ({
}) as NotionInfo[]
}
// TODO
const getFileIndexingEstimateParams = () => {
const getFileIndexingEstimateParams = (docForm: DocForm) => {
let params
if (dataSourceType === DataSourceType.FILE) {
params = {
......@@ -227,6 +227,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
if (dataSourceType === DataSourceType.NOTION) {
......@@ -237,6 +238,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
return params
......@@ -337,13 +339,22 @@ const StepTwo = ({
}
}
const handleCheck = (state: boolean) => {
const handleSwitch = (state: boolean) => {
if (state)
setDocForm(DocForm.QA)
else
setDocForm(DocForm.TEXT)
}
const previewSwitch = async () => {
setPreviewSwitched(true)
if (segmentationType === SegmentType.AUTO)
setAutomaticFileIndexingEstimate(null)
else
setCustomFileIndexingEstimate(null)
await fetchFileIndexingEstimate(DocForm.QA)
}
useEffect(() => {
// fetch rules
if (!isSetting) {
......@@ -390,10 +401,12 @@ const StepTwo = ({
setAutomaticFileIndexingEstimate(null)
setShowPreview()
fetchFileIndexingEstimate()
setPreviewSwitched(false)
}
else {
hidePreview()
setCustomFileIndexingEstimate(null)
setPreviewSwitched(false)
}
}, [segmentationType, indexType])
......@@ -563,7 +576,7 @@ const StepTwo = ({
<div className='shrink-0'>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleCheck}
onChange={handleSwitch}
size='md'
/>
</div>
......@@ -644,24 +657,45 @@ const StepTwo = ({
{(showPreview)
? (
<div ref={previewScrollRef} className={cn(s.previewWrap, 'relativeh-full overflow-y-scroll border-l border-[#F2F4F7]')}>
{/* TODO preview switch */}
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`, ' flex items-center justify-between px-8')}>
<span>{t('datasetCreation.stepTwo.previewTitle')}</span>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}>
<div className='flex items-center justify-between px-8'>
<div className='grow flex items-center'>
<div>{t('datasetCreation.stepTwo.previewTitle')}</div>
{docForm === DocForm.QA && !previewSwitched && (
<Button className='ml-2 !h-[26px] !py-[3px] !px-2 !text-xs !font-medium !text-primary-600' onClick={previewSwitch}>{t('datasetCreation.stepTwo.previewButton')}</Button>
)}
</div>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
</div>
</div>
{docForm === DocForm.QA && !previewSwitched && (
<div className='px-8 pr-12 text-xs text-gray-500'>
<span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span>
<span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span>
</div>
)}
</div>
<div className='my-4 px-8 space-y-4'>
{fileIndexingEstimate?.preview
? (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem key={item} content={item} index={index + 1} />
))}
</>
)
: <div className='flex items-center justify-center h-[200px]'><Loading type='area'></Loading></div>
}
{previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && (
<>
{fileIndexingEstimate?.qa_preview.map((item, index) => (
<PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} />
))}
</>
)}
{(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} />
))}
</>
)}
{!fileIndexingEstimate?.preview && !fileIndexingEstimate?.qa_preview && (
<div className='flex items-center justify-center h-[200px]'>
<Loading type='area' />
</div>
)}
</div>
</div>
)
......
'use client'
import React, { FC } from 'react'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
export interface IPreviewItemProps {
export type IPreviewItemProps = {
type: string
index: number
content: string
content?: string
qa?: {
answer: string
question: string
}
}
export enum PreviewType {
TEXT = 'text',
QA = 'QA',
}
const sharpIcon = (
......@@ -21,12 +32,16 @@ const textIcon = (
)
const PreviewItem: FC<IPreviewItemProps> = ({
type = PreviewType.TEXT,
index,
content,
qa,
}) => {
const { t } = useTranslation()
const charNums = (content || '').length
const formatedIndex = (() => (index + '').padStart(3, '0'))()
const charNums = type === PreviewType.TEXT
? (content || '').length
: (qa?.answer || '').length + (qa?.question || '').length
const formatedIndex = (() => String(index).padStart(3, '0'))()
return (
<div className='p-4 rounded-xl bg-gray-50'>
......@@ -41,7 +56,21 @@ const PreviewItem: FC<IPreviewItemProps> = ({
</div>
</div>
<div className='mt-2 max-h-[120px] line-clamp-6 overflow-hidden text-sm text-gray-800'>
<div style={{ whiteSpace: 'pre-line'}}>{content}</div>
{type === PreviewType.TEXT && (
<div style={{ whiteSpace: 'pre-line' }}>{content}</div>
)}
{type === PreviewType.QA && (
<div style={{ whiteSpace: 'pre-line' }}>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>Q</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.question}</div>
</div>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>A</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.answer}</div>
</div>
</div>
)}
</div>
</div>
)
......
......@@ -94,6 +94,9 @@ const translation = {
sideTipP3: 'Cleaning removes unnecessary characters and formats, making datasets cleaner and easier to parse.',
sideTipP4: 'Proper segmentation and cleaning improve model performance, providing more accurate and valuable results.',
previewTitle: 'Preview',
previewButton: 'Switching to Q&A format',
previewSwitchTipStart: 'The current segment preview is in text format, switching to a question-and-answer format preview will',
previewSwitchTipEnd: ' consume additional tokens',
characters: 'characters',
indexSettedTip: 'To change the index method, please go to the ',
datasetSettingLink: 'dataset settings.',
......
......@@ -94,6 +94,9 @@ const translation = {
sideTipP3: '清洗则是对文本进行预处理,删除不必要的字符、符号或格式,使数据集更加干净、整洁,便于模型解析。',
sideTipP4: '通过对数据集进行适当的分段和清洗,可以提高模型在实际应用中的表现,从而为用户提供更准确、更有价值的结果。',
previewTitle: '分段预览',
previewButton: '切换至 Q&A 形式',
previewSwitchTipStart: '当前分段预览是文本模式,切换到 Q&A 模式将会',
previewSwitchTipEnd: '消耗额外的 token',
characters: '字符',
indexSettedTip: '要更改索引方法,请转到',
datasetSettingLink: '数据集设置。',
......
......@@ -42,12 +42,18 @@ export type DataSetListResponse = {
total: number
}
export type QA = {
question: string
answer: string
}
export type IndexingEstimateResponse = {
tokens: number
total_price: number
currency: string
total_segments: number
preview: string[]
qa_preview?: QA[]
}
export type FileIndexingEstimateResponse = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment