Commit 44fcd1c1 authored by JzoNg's avatar JzoNg

feat: embedding process

parent 47b299f5
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M5.75 4.5C6.7165 4.5 7.5 3.7165 7.5 2.75C7.5 1.7835 6.7165 1 5.75 1C4.7835 1 4 1.7835 4 2.75C4 3.7165 4.7835 4.5 5.75 4.5Z" fill="#444CE7"/>
<path d="M3.48775 4.314C3.36842 4.14172 3.30875 4.05558 3.24448 4.02712C3.18679 4.00157 3.12605 3.99844 3.06603 4.01794C2.99918 4.03965 2.94661 4.10099 2.84146 4.22367C2.41951 4.71598 2.13172 5.32705 2.03543 6.00009H2C1.72386 6.00009 1.5 5.77623 1.5 5.50009C1.5 5.31565 1.59961 5.15388 1.75036 5.06668C1.98939 4.9284 2.07107 4.62254 1.9328 4.38351C1.79453 4.14448 1.48867 4.0628 1.24964 4.20107C0.802591 4.45967 0.5 4.94425 0.5 5.50009C0.5 6.32852 1.17157 7.00009 2 7.00009H2.03545C2.14342 7.75422 2.49192 8.43113 2.99997 8.94961L2.99997 10.1117C2.99994 10.1712 2.99992 10.2424 3.00504 10.305C3.01097 10.3776 3.02619 10.4816 3.08171 10.5906C3.15362 10.7317 3.26835 10.8465 3.40948 10.9184C3.51845 10.9739 3.62245 10.9891 3.69505 10.9951C3.7577 11.0002 3.82881 11.0001 3.88836 11.0001H4.86154C4.92109 11.0001 4.99224 11.0002 5.05488 10.9951C5.12749 10.9891 5.23149 10.9739 5.34046 10.9184C5.48158 10.8465 5.59632 10.7317 5.66822 10.5906C5.72375 10.4816 5.73897 10.3776 5.7449 10.305C5.75002 10.2424 5.75 10.1712 5.74997 10.1117L5.74997 10.0001H6.24998L6.24997 10.1115C6.24995 10.1711 6.24992 10.2422 6.25504 10.3048C6.26097 10.3775 6.2762 10.4815 6.33172 10.5904C6.40363 10.7315 6.51836 10.8463 6.65948 10.9182C6.76846 10.9737 6.87245 10.9889 6.94506 10.9949C7.0077 11 7.0788 11 7.13835 10.9999H8.11159C8.17113 11 8.24229 11 8.30493 10.9949C8.37753 10.9889 8.48153 10.9737 8.5905 10.9182C8.73162 10.8463 8.84636 10.7315 8.91827 10.5904C8.97379 10.4815 8.98901 10.3775 8.99494 10.3048C9.00006 10.2422 9.00004 10.1711 9.00001 10.1115L9.00001 9.66299C9.55312 9.40029 10.0258 8.99721 10.3726 8.49993L10.6116 8.49994C10.6711 8.49996 10.7423 8.49999 10.8049 8.49487C10.8775 8.48893 10.9815 8.47371 11.0905 8.41819C11.2316 8.34628 11.3464 8.23155 11.4183 8.09043C11.4738 7.98145 11.489 7.87746 11.4949 7.80485C11.5001 7.74221 11.5 7.67109 11.5 7.61154V5.88181C11.5 5.82509 11.5001 5.75735 11.4954 5.69761C11.49 5.62851 11.4763 5.5294 11.4257 5.42448C11.352 5.27143 11.2285 5.14794 11.0755 5.07422C10.9705 5.02369 10.8714 5.00992 10.8023 5.00454C10.7577 5.00106 10.7087 5.0002 10.6631 4.99999C10.4953 4.64662 10.2702 4.32616 10 4.05044L10 3.51615C10 3.43874 10.0001 3.35111 9.99335 3.27574C9.98593 3.19252 9.96656 3.06385 9.88754 2.93633C9.78902 2.77733 9.63465 2.66089 9.4547 2.60984C9.31038 2.56889 9.18134 2.58561 9.09929 2.60134C9.02497 2.61559 8.94073 2.63969 8.8663 2.66098L8.78839 2.68324C8.6859 2.71252 8.63465 2.72716 8.59861 2.75356C8.5638 2.77904 8.54252 2.80415 8.52309 2.84266C8.50297 2.88255 8.49603 2.94339 8.48215 3.06506C8.32585 4.43546 7.16224 5.5 5.75 5.5C4.81225 5.5 3.98413 5.03063 3.48775 4.314Z" fill="#444CE7"/>
</svg>
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M6 0.5C6.27614 0.5 6.5 0.723858 6.5 1V2C6.5 2.27614 6.27614 2.5 6 2.5C5.72386 2.5 5.5 2.27614 5.5 2V1C5.5 0.723858 5.72386 0.5 6 0.5Z" fill="#FB6514"/>
<path d="M2.81791 2.11092C2.62265 1.91566 2.30606 1.91566 2.1108 2.11092C1.91554 2.30619 1.91554 2.62277 2.1108 2.81803L2.81791 3.52514C3.01317 3.7204 3.32975 3.7204 3.52502 3.52514C3.72028 3.32988 3.72028 3.01329 3.52502 2.81803L2.81791 2.11092Z" fill="#FB6514"/>
<path d="M0.5 6C0.5 5.72386 0.723858 5.5 1 5.5H2C2.27614 5.5 2.5 5.72386 2.5 6C2.5 6.27614 2.27614 6.5 2 6.5H1C0.723858 6.5 0.5 6.27614 0.5 6Z" fill="#FB6514"/>
<path d="M10 5.5C9.72386 5.5 9.5 5.72386 9.5 6C9.5 6.27614 9.72386 6.5 10 6.5H11C11.2761 6.5 11.5 6.27614 11.5 6C11.5 5.72386 11.2761 5.5 11 5.5H10Z" fill="#FB6514"/>
<path d="M9.18192 8.47482C8.98666 8.27955 8.67008 8.27955 8.47482 8.47482C8.27955 8.67008 8.27955 8.98666 8.47482 9.18192L9.18192 9.88903C9.37718 10.0843 9.69377 10.0843 9.88903 9.88903C10.0843 9.69377 10.0843 9.37718 9.88903 9.18192L9.18192 8.47482Z" fill="#FB6514"/>
<path d="M9.88903 2.81803C10.0843 2.62277 10.0843 2.30619 9.88903 2.11092C9.69377 1.91566 9.37718 1.91566 9.18192 2.11092L8.47482 2.81803C8.27955 3.01329 8.27955 3.32988 8.47482 3.52514C8.67008 3.7204 8.98666 3.7204 9.18192 3.52514L9.88903 2.81803Z" fill="#FB6514"/>
<path d="M6 9.5C6.27614 9.5 6.5 9.72386 6.5 10V11C6.5 11.2761 6.27614 11.5 6 11.5C5.72386 11.5 5.5 11.2761 5.5 11V10C5.5 9.72386 5.72386 9.5 6 9.5Z" fill="#FB6514"/>
<path d="M3.52502 9.18192C3.72028 8.98666 3.72028 8.67008 3.52502 8.47482C3.32975 8.27955 3.01317 8.27955 2.81791 8.47482L2.1108 9.18192C1.91554 9.37718 1.91554 9.69377 2.1108 9.88903C2.30606 10.0843 2.62265 10.0843 2.81791 9.88903L3.52502 9.18192Z" fill="#FB6514"/>
<path d="M6.44837 3.27869C6.36413 3.10804 6.19032 3 6.00001 3C5.8097 3 5.6359 3.10804 5.55166 3.27869L4.89538 4.60823L3.4277 4.82276C3.23942 4.85028 3.08308 4.98228 3.02439 5.16328C2.9657 5.34429 3.01484 5.54291 3.15115 5.67568L4.21275 6.70968L3.96221 8.17048C3.93004 8.35807 4.00716 8.54766 4.16115 8.65953C4.31514 8.77139 4.51928 8.78613 4.68774 8.69754L6.00001 8.00742L7.31229 8.69754C7.48075 8.78613 7.68489 8.77139 7.83888 8.65953C7.99287 8.54766 8.06999 8.35807 8.03782 8.17048L7.78728 6.70968L8.84888 5.67568C8.98519 5.54291 9.03433 5.34429 8.97564 5.16328C8.91695 4.98228 8.76061 4.85028 8.57233 4.82276L7.10465 4.60823L6.44837 3.27869Z" fill="#FB6514"/>
</svg>
.progressBar {
@apply absolute top-0 h-4;
}
.barPaused {
background: linear-gradient(
270deg,
rgba(208, 213, 221, 0.8) -2.21%,
rgba(208, 213, 221, 0.5) 100%
);
}
.barProcessing {
background: linear-gradient(
90deg,
rgba(41, 112, 255, 0.9) 0%,
rgba(21, 94, 239, 0.9) 100%
);
}
.progressContainer {
@apply relative flex mb-2 h-4 rounded-md w-full;
}
.progressBgItem {
@apply flex-1 border-r border-r-white first:rounded-l-md;
}
.progressBgItem:nth-last-child(2) {
@apply rounded-r-md;
}
.cost {
@apply w-full flex justify-between items-center text-xs text-gray-700;
}
.embeddingStatus {
@apply flex items-center justify-between text-gray-900 font-medium text-sm mr-2;
}
.commonIcon {
@apply w-3 h-3 mr-1 inline-block align-middle;
}
.highIcon {
mask-image: url(/../assets/star.svg);
@apply bg-orange-500;
}
.economyIcon {
background-color: #444ce7;
mask-image: url(/../assets/normal.svg);
}
.tokens {
@apply text-xs font-medium px-1;
}
.price {
color: #f79009;
@apply text-xs font-medium;
}
import type { FC } from 'react'
import React, { useCallback, useEffect, useMemo } from 'react'
import useSWR from 'swr'
import { useRouter } from 'next/navigation'
import { useTranslation } from 'react-i18next'
import { omit } from 'lodash-es'
import { ArrowRightIcon } from '@heroicons/react/24/solid'
import { useGetState } from 'ahooks'
import cn from 'classnames'
import s from './index.module.css'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import Button from '@/app/components/base/button'
import type { FullDocumentDetail, ProcessRuleResponse } from '@/models/datasets'
import { formatNumber } from '@/utils/format'
import { fetchIndexingStatus as doFetchIndexingStatus, fetchIndexingEstimate, fetchProcessRule } from '@/service/datasets'
type Props = {
datasetId: string
batchId: string
documents?: FullDocumentDetail[]
indexingType?: string
}
const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
const { t } = useTranslation()
const segmentationRuleMap = {
mode: t('datasetDocuments.embedding.mode'),
segmentLength: t('datasetDocuments.embedding.segmentLength'),
textCleaning: t('datasetDocuments.embedding.textCleaning'),
}
const getRuleName = (key: string) => {
if (key === 'remove_extra_spaces')
return t('datasetCreation.stepTwo.removeExtraSpaces')
if (key === 'remove_urls_emails')
return t('datasetCreation.stepTwo.removeUrlEmails')
if (key === 'remove_stopwords')
return t('datasetCreation.stepTwo.removeStopwords')
}
const getValue = useCallback((field: string) => {
let value: string | number | undefined = '-'
switch (field) {
case 'mode':
value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string)
break
case 'segmentLength':
value = sourceData?.rules?.segmentation?.max_tokens
break
default:
value = sourceData?.mode === 'automatic'
? (t('datasetDocuments.embedding.automatic') as string)
// eslint-disable-next-line array-callback-return
: sourceData?.rules?.pre_processing_rules?.map((rule) => {
if (rule.enabled)
return getRuleName(rule.id)
}).filter(Boolean).join(';')
break
}
return value
}, [sourceData])
return <div className='flex flex-col pt-8 pb-10 first:mt-0'>
{Object.keys(segmentationRuleMap).map((field) => {
return <FieldInfo
key={field}
label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
displayedValue={String(getValue(field))}
/>
})}
</div>
}
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
const { t } = useTranslation()
const getFirstDocument = documents[0]
const [indexingStatusDetail, setIndexingStatusDetail, getIndexingStatusDetail] = useGetState<any>(null)
const fetchIndexingStatus = async () => {
const status = await doFetchIndexingStatus({ datasetId, documentId: getFirstDocument.id })
setIndexingStatusDetail(status)
}
const [runId, setRunId, getRunId] = useGetState<any>(null)
const stopQueryStatus = () => {
clearInterval(getRunId())
}
const startQueryStatus = () => {
const runId = setInterval(() => {
const indexingStatusDetail = getIndexingStatusDetail()
if (indexingStatusDetail?.indexing_status === 'completed') {
stopQueryStatus()
return
}
fetchIndexingStatus()
}, 2500)
setRunId(runId)
}
useEffect(() => {
fetchIndexingStatus()
startQueryStatus()
return () => {
stopQueryStatus()
}
}, [])
// get rule
// get cost
// get index status
const { data: indexingEstimateDetail, error: indexingEstimateErr } = useSWR({
action: 'fetchIndexingEstimate',
datasetId,
documentId: getFirstDocument.id,
}, apiParams => fetchIndexingEstimate(omit(apiParams, 'action')), {
revalidateOnFocus: false,
})
const { data: ruleDetail, error: ruleError } = useSWR({
action: 'fetchProcessRule',
params: { documentId: getFirstDocument.id },
}, apiParams => fetchProcessRule(omit(apiParams, 'action')), {
revalidateOnFocus: false,
})
const router = useRouter()
const navToDocumentList = () => {
router.push(`/datasets/${datasetId}/documents`)
}
const isEmbedding = useMemo(() => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
const isEmbeddingCompleted = useMemo(() => ['completed'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
const isEmbeddingPaused = useMemo(() => ['paused'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
const isEmbeddingError = useMemo(() => ['error'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail])
const percent = useMemo(() => {
const completedCount = indexingStatusDetail?.completed_segments || 0
const totalCount = indexingStatusDetail?.total_segments || 0
if (totalCount === 0)
return 0
const percent = Math.round(completedCount * 100 / totalCount)
return percent > 100 ? 100 : percent
}, [indexingStatusDetail])
return (
<>
<div className='h-5 flex justify-between items-center mb-5'>
<div className={s.embeddingStatus}>
{isEmbedding && t('datasetDocuments.embedding.processing')}
{isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
{isEmbeddingPaused && t('datasetDocuments.embedding.paused')}
{isEmbeddingError && t('datasetDocuments.embedding.error')}
</div>
<div className={s.cost}>
{indexingType === 'high_quaility' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.highIcon)} />
{t('datasetDocuments.embedding.highQuality')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>{formatNumber(indexingEstimateDetail?.tokens || 0)}</span>tokens
(<span className={s.price}>${formatNumber(indexingEstimateDetail?.total_price || 0)}</span>)
</div>
)}
{indexingType === 'economy' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.economyIcon)} />
{t('datasetDocuments.embedding.economy')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>0</span>tokens
</div>
)}
</div>
</div>
{/* TODO progress bar */}
<div className={s.progressContainer}>
{new Array(10).fill('').map((_, idx) => <div
key={idx}
className={cn(s.progressBgItem, isEmbedding ? 'bg-primary-50' : 'bg-gray-100')}
/>)}
<div
className={cn(
'rounded-l-md',
s.progressBar,
(isEmbedding || isEmbeddingCompleted) && s.barProcessing,
(isEmbeddingPaused || isEmbeddingError) && s.barPaused,
indexingStatusDetail?.indexing_status === 'completed' && 'rounded-r-md',
)}
style={{ width: `${percent}%` }}
/>
</div>
<RuleDetail sourceData={ruleDetail} />
<div className='flex items-center gap-2 mt-10'>
<Button className='w-fit' type='primary' onClick={navToDocumentList}>
<span>{t('datasetCreation.stepThree.navTo')}</span>
<ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
</Button>
</div>
</>
)
}
export default EmbeddingProcess
......@@ -34,7 +34,6 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
const [result, setResult] = useState<createDocumentResponse | undefined>()
const [hasError, setHasError] = useState(false)
// TODO
const [notionPages, setNotionPages] = useState<Page[]>([])
const updateNotionPages = (value: Page[]) => {
setNotionPages(value)
......
......@@ -2,7 +2,7 @@
import React from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import EmbeddingDetail from '../../documents/detail/embedding'
import EmbeddingProcess from '../embedding-process'
import s from './index.module.css'
import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets'
......@@ -38,13 +38,11 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
<div className={s.content}>{`${t('datasetCreation.stepThree.additionP1')} ${datasetName || creationCache?.dataset?.name} ${t('datasetCreation.stepThree.additionP2')}`}</div>
</div>
)}
{/* TODO multi doc display */}
<EmbeddingDetail
datasetId={datasetId || creationCache?.dataset?.id}
documentId={creationCache?.documents[0].id}
<EmbeddingProcess
datasetId={datasetId || creationCache?.dataset?.id || ''}
batchId={creationCache?.batch || ''}
documents={creationCache?.documents as FullDocumentDetail[]}
indexingType={indexingType || creationCache?.dataset?.indexing_technique}
stopPosition='bottom'
detail={creationCache?.documents[0] as FullDocumentDetail}
/>
</div>
</div>
......
......@@ -14,9 +14,26 @@
}
.fixed {
padding-top: 12px;
font-size: 12px;
line-height: 18px;
background: rgba(255, 255, 255, 0.9);
border-bottom: 0.5px solid #EAECF0;
backdrop-filter: blur(4px);
animation: fix 0.5s;
}
@keyframes fix {
from {
padding-top: 42px;
font-size: 18px;
line-height: 28px;
}
to {
padding-top: 12px;
font-size: 12px;
line-height: 18px;
}
}
.form {
......
......@@ -311,6 +311,7 @@ const StepTwo = ({
})
updateIndexingTypeCache && updateIndexingTypeCache(indexType)
updateResultCache && updateResultCache({
batch: res.batch,
documents: [res],
})
}
......
......@@ -130,6 +130,7 @@ export type DataSourceInfo = {
export type InitialDocumentDetail = {
id: string
batch: string
position: number
dataset_id: string
data_source_type: 'upload_file'
......@@ -195,6 +196,7 @@ export type ProcessRule = {
export type createDocumentResponse = {
dataset?: DataSet
batch: string
documents: InitialDocumentDetail[]
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment