Commit 9763fc28 authored by jyong's avatar jyong

Merge remote-tracking branch 'origin/feat/milvus-support' into feat/milvus-support

parents b87e2ff5 c76b38b5
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="hash-02">
<path id="Icon" d="M4.74999 1.5L3.24999 10.5M8.74998 1.5L7.24998 10.5M10.25 4H1.75M9.75 8H1.25" stroke="#98A2B3" stroke-linecap="round" stroke-linejoin="round"/>
</g>
</svg>
{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"width": "12",
"height": "12",
"viewBox": "0 0 12 12",
"fill": "none",
"xmlns": "http://www.w3.org/2000/svg"
},
"children": [
{
"type": "element",
"name": "g",
"attributes": {
"id": "hash-02"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"id": "Icon",
"d": "M4.74999 1.5L3.24999 10.5M8.74998 1.5L7.24998 10.5M10.25 4H1.75M9.75 8H1.25",
"stroke": "currentColor",
"stroke-linecap": "round",
"stroke-linejoin": "round"
},
"children": []
}
]
}
]
},
"name": "Hash02"
}
\ No newline at end of file
// GENERATE BY script
// DON NOT EDIT IT MANUALLY
import * as React from 'react'
import data from './Hash02.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)
export default Icon
export { default as Check } from './Check'
export { default as Edit03 } from './Edit03'
export { default as Hash02 } from './Hash02'
export { default as Loading02 } from './Loading02'
export { default as LogOut01 } from './LogOut01'
export { default as Trash03 } from './Trash03'
......
......@@ -7,7 +7,7 @@ import { XMarkIcon } from '@heroicons/react/20/solid'
import cn from 'classnames'
import Link from 'next/link'
import { groupBy } from 'lodash-es'
import PreviewItem from './preview-item'
import PreviewItem, { PreviewType } from './preview-item'
import s from './index.module.css'
import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
......@@ -97,6 +97,7 @@ const StepTwo = ({
const [docForm, setDocForm] = useState<DocForm | string>(
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
)
const [previewSwitched, setPreviewSwitched] = useState(false)
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
......@@ -154,9 +155,9 @@ const StepTwo = ({
}
}
const fetchFileIndexingEstimate = async () => {
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams())
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
if (segmentationType === SegmentType.CUSTOM)
setCustomFileIndexingEstimate(res)
......@@ -214,8 +215,7 @@ const StepTwo = ({
}) as NotionInfo[]
}
// TODO
const getFileIndexingEstimateParams = () => {
const getFileIndexingEstimateParams = (docForm: DocForm) => {
let params
if (dataSourceType === DataSourceType.FILE) {
params = {
......@@ -227,6 +227,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
if (dataSourceType === DataSourceType.NOTION) {
......@@ -237,6 +238,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
return params
......@@ -337,13 +339,22 @@ const StepTwo = ({
}
}
const handleCheck = (state: boolean) => {
const handleSwitch = (state: boolean) => {
if (state)
setDocForm(DocForm.QA)
else
setDocForm(DocForm.TEXT)
}
const previewSwitch = async () => {
setPreviewSwitched(true)
if (segmentationType === SegmentType.AUTO)
setAutomaticFileIndexingEstimate(null)
else
setCustomFileIndexingEstimate(null)
await fetchFileIndexingEstimate(DocForm.QA)
}
useEffect(() => {
// fetch rules
if (!isSetting) {
......@@ -390,10 +401,12 @@ const StepTwo = ({
setAutomaticFileIndexingEstimate(null)
setShowPreview()
fetchFileIndexingEstimate()
setPreviewSwitched(false)
}
else {
hidePreview()
setCustomFileIndexingEstimate(null)
setPreviewSwitched(false)
}
}, [segmentationType, indexType])
......@@ -563,7 +576,7 @@ const StepTwo = ({
<div className='shrink-0'>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleCheck}
onChange={handleSwitch}
size='md'
/>
</div>
......@@ -644,24 +657,45 @@ const StepTwo = ({
{(showPreview)
? (
<div ref={previewScrollRef} className={cn(s.previewWrap, 'relativeh-full overflow-y-scroll border-l border-[#F2F4F7]')}>
{/* TODO preview switch */}
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`, ' flex items-center justify-between px-8')}>
<span>{t('datasetCreation.stepTwo.previewTitle')}</span>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}>
<div className='flex items-center justify-between px-8'>
<div className='grow flex items-center'>
<div>{t('datasetCreation.stepTwo.previewTitle')}</div>
{docForm === DocForm.QA && !previewSwitched && (
<Button className='ml-2 !h-[26px] !py-[3px] !px-2 !text-xs !font-medium !text-primary-600' onClick={previewSwitch}>{t('datasetCreation.stepTwo.previewButton')}</Button>
)}
</div>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
</div>
</div>
{docForm === DocForm.QA && !previewSwitched && (
<div className='px-8 pr-12 text-xs text-gray-500'>
<span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span>
<span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span>
</div>
)}
</div>
<div className='my-4 px-8 space-y-4'>
{fileIndexingEstimate?.preview
? (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem key={item} content={item} index={index + 1} />
))}
</>
)
: <div className='flex items-center justify-center h-[200px]'><Loading type='area'></Loading></div>
}
{previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && (
<>
{fileIndexingEstimate?.qa_preview.map((item, index) => (
<PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} />
))}
</>
)}
{(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} />
))}
</>
)}
{!fileIndexingEstimate?.preview && !fileIndexingEstimate?.qa_preview && (
<div className='flex items-center justify-center h-[200px]'>
<Loading type='area' />
</div>
)}
</div>
</div>
)
......
'use client'
import React, { FC } from 'react'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
export interface IPreviewItemProps {
export type IPreviewItemProps = {
type: string
index: number
content: string
content?: string
qa?: {
answer: string
question: string
}
}
export enum PreviewType {
TEXT = 'text',
QA = 'QA',
}
const sharpIcon = (
......@@ -21,12 +32,16 @@ const textIcon = (
)
const PreviewItem: FC<IPreviewItemProps> = ({
type = PreviewType.TEXT,
index,
content,
qa,
}) => {
const { t } = useTranslation()
const charNums = (content || '').length
const formatedIndex = (() => (index + '').padStart(3, '0'))()
const charNums = type === PreviewType.TEXT
? (content || '').length
: (qa?.answer || '').length + (qa?.question || '').length
const formatedIndex = (() => String(index).padStart(3, '0'))()
return (
<div className='p-4 rounded-xl bg-gray-50'>
......@@ -41,7 +56,21 @@ const PreviewItem: FC<IPreviewItemProps> = ({
</div>
</div>
<div className='mt-2 max-h-[120px] line-clamp-6 overflow-hidden text-sm text-gray-800'>
<div style={{ whiteSpace: 'pre-line'}}>{content}</div>
{type === PreviewType.TEXT && (
<div style={{ whiteSpace: 'pre-line' }}>{content}</div>
)}
{type === PreviewType.QA && (
<div style={{ whiteSpace: 'pre-line' }}>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>Q</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.question}</div>
</div>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>A</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.answer}</div>
</div>
</div>
)}
</div>
</div>
)
......
......@@ -25,6 +25,7 @@ import type { CommonResponse } from '@/models/common'
import { Edit03, XClose } from '@/app/components/base/icons/src/vender/line/general'
import AutoHeightTextarea from '@/app/components/base/auto-height-textarea/common'
import Button from '@/app/components/base/button'
import NewSegmentModal from '@/app/components/datasets/documents/detail/new-segment-modal'
export const SegmentIndexTag: FC<{ positionId: string | number; className?: string }> = ({ positionId, className }) => {
const localPositionId = useMemo(() => {
......@@ -183,13 +184,15 @@ export const splitArray = (arr: any[], size = 3) => {
}
type ICompletedProps = {
showNewSegmentModal: boolean
onNewSegmentModalChange: (state: boolean) => void
// data: Array<{}> // all/part segments
}
/**
* Embedding done, show list of all segments
* Support search and filter
*/
const Completed: FC<ICompletedProps> = () => {
const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModalChange }) => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const { datasetId = '', documentId = '', docForm } = useContext(DocumentContext)
......@@ -329,6 +332,12 @@ const Completed: FC<ICompletedProps> = () => {
onCancel={onCloseModal}
/>
</Modal>
<NewSegmentModal
isShow={showNewSegmentModal}
docForm={docForm}
onCancel={() => onNewSegmentModalChange(false)}
onSave={() => getSegments(false)}
/>
</>
)
}
......
......@@ -54,6 +54,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
const { t } = useTranslation()
const router = useRouter()
const [showMetadata, setShowMetadata] = useState(true)
const [showNewSegmentModal, setShowNewSegmentModal] = useState(false)
const { data: documentDetail, error, mutate: detailMutate } = useSWR({
action: 'fetchDocumentDetail',
......@@ -100,10 +101,12 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
enabled: documentDetail?.enabled || false,
archived: documentDetail?.archived || false,
id: documentId,
doc_form: documentDetail?.doc_form || '',
}}
datasetId={datasetId}
onUpdate={handleOperate}
className='!w-[216px]'
showNewSegmentModal={() => setShowNewSegmentModal(true)}
/>
<button
className={cn(style.layoutRightIcon, showMetadata ? style.iconShow : style.iconClose)}
......@@ -114,7 +117,13 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
{isDetailLoading
? <Loading type='app' />
: <div className={`box-border h-full w-full overflow-y-scroll ${embedding ? 'py-12 px-16' : 'pb-[30px] pt-3 px-6'}`}>
{embedding ? <Embedding detail={documentDetail} detailUpdate={detailMutate} /> : <Completed />}
{embedding
? <Embedding detail={documentDetail} detailUpdate={detailMutate} />
: <Completed
showNewSegmentModal={showNewSegmentModal}
onNewSegmentModalChange={setShowNewSegmentModal}
/>
}
</div>
}
{showMetadata && <Metadata
......
import { memo, useState } from 'react'
import type { FC } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import { useParams } from 'next/navigation'
import Modal from '@/app/components/base/modal'
import Button from '@/app/components/base/button'
import AutoHeightTextarea from '@/app/components/base/auto-height-textarea/common'
import { Hash02, XClose } from '@/app/components/base/icons/src/vender/line/general'
import { ToastContext } from '@/app/components/base/toast'
import type { SegmentUpdator } from '@/models/datasets'
import { addSegment } from '@/service/datasets'
type NewSegmentModalProps = {
isShow: boolean
onCancel: () => void
docForm: string
onSave: () => void
}
const NewSegmentModal: FC<NewSegmentModalProps> = memo(({
isShow,
onCancel,
docForm,
onSave,
}) => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const [question, setQuestion] = useState('')
const [answer, setAnswer] = useState('')
const { datasetId, documentId } = useParams()
const handleSave = async () => {
const params: SegmentUpdator = { content: '' }
if (docForm === 'qa_model') {
if (!question.trim())
return notify({ type: 'error', message: t('datasetDocuments.segment.questionEmpty') })
if (!answer.trim())
return notify({ type: 'error', message: t('datasetDocuments.segment.answerEmpty') })
params.content = question
params.answer = answer
}
else {
if (!question.trim())
return notify({ type: 'error', message: t('datasetDocuments.segment.contentEmpty') })
params.content = question
}
await addSegment({ datasetId, documentId, body: params })
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
onCancel()
onSave()
}
const renderContent = () => {
if (docForm === 'qa_model') {
return (
<>
<div className='mb-1 text-xs font-medium text-gray-500'>QUESTION</div>
<AutoHeightTextarea
outerClassName='mb-4'
className='leading-6 text-md text-gray-800'
value={question}
placeholder={t('datasetDocuments.segment.questionPlaceholder') || ''}
onChange={e => setQuestion(e.target.value)}
autoFocus
/>
<div className='mb-1 text-xs font-medium text-gray-500'>ANSWER</div>
<AutoHeightTextarea
outerClassName='mb-4'
className='leading-6 text-md text-gray-800'
value={answer}
placeholder={t('datasetDocuments.segment.answerPlaceholder') || ''}
onChange={e => setAnswer(e.target.value)}
/>
</>
)
}
return (
<AutoHeightTextarea
className='leading-6 text-md text-gray-800'
value={question}
placeholder={t('datasetDocuments.segment.contentPlaceholder') || ''}
onChange={e => setQuestion(e.target.value)}
autoFocus
/>
)
}
return (
<Modal isShow={isShow} onClose={() => {}} className='pt-8 px-8 pb-6 !max-w-[640px] !rounded-xl'>
<div className={'flex flex-col relative'}>
<div className='absolute right-0 -top-0.5 flex items-center h-6'>
<div className='flex justify-center items-center w-6 h-6 cursor-pointer' onClick={onCancel}>
<XClose className='w-4 h-4 text-gray-500' />
</div>
</div>
<div className='mb-[14px]'>
<span className='inline-flex items-center px-1.5 h-5 border border-gray-200 rounded-md'>
<Hash02 className='mr-0.5 w-3 h-3 text-gray-400' />
<span className='text-[11px] font-medium text-gray-500 italic'>
{
docForm === 'qa_model'
? t('datasetDocuments.segment.newQaSegment')
: t('datasetDocuments.segment.newTextSegment')
}
</span>
</span>
</div>
<div className='mb-4 py-1.5 h-[420px] overflow-auto'>{renderContent()}</div>
<div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetDocuments.segment.keywords')}</div>
<div className='mb-8'></div>
<div className='flex justify-end'>
<Button
className='mr-2 !h-9 !px-4 !py-2 text-sm font-medium text-gray-700 !rounded-lg'
onClick={onCancel}>
{t('common.operation.cancel')}
</Button>
<Button
type='primary'
className='!h-9 !px-4 !py-2 text-sm font-medium !rounded-lg'
onClick={handleSave}>
{t('common.operation.save')}
</Button>
</div>
</div>
</Modal>
)
})
export default NewSegmentModal
......@@ -27,6 +27,7 @@ import NotionIcon from '@/app/components/base/notion-icon'
import ProgressBar from '@/app/components/base/progress-bar'
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
import type { CommonResponse } from '@/models/common'
import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files'
export const SettingsIcon: FC<{ className?: string }> = ({ className }) => {
return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
......@@ -94,12 +95,14 @@ export const OperationAction: FC<{
archived: boolean
id: string
data_source_type: string
doc_form: string
}
datasetId: string
onUpdate: (operationName?: string) => void
scene?: 'list' | 'detail'
className?: string
}> = ({ datasetId, detail, onUpdate, scene = 'list', className = '' }) => {
showNewSegmentModal?: () => void
}> = ({ datasetId, detail, onUpdate, scene = 'list', className = '', showNewSegmentModal }) => {
const { id, enabled = false, archived = false, data_source_type } = detail || {}
const [showModal, setShowModal] = useState(false)
const { notify } = useContext(ToastContext)
......@@ -185,6 +188,14 @@ export const OperationAction: FC<{
<SettingsIcon />
<span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
</div>
{
!isListScene && (
<div className={s.actionItem} onClick={showNewSegmentModal}>
<FilePlus02 className='w-4 h-4 text-gray-500' />
<span className={s.actionName}>{t('datasetDocuments.list.action.add')}</span>
</div>
)
}
{
data_source_type === 'notion_import' && (
<div className={s.actionItem} onClick={() => onOperate('sync')}>
......@@ -339,7 +350,7 @@ const DocumentList: FC<IDocumentListProps> = ({ documents = [], datasetId, onUpd
<td>
<OperationAction
datasetId={datasetId}
detail={pick(doc, ['enabled', 'archived', 'id', 'data_source_type'])}
detail={pick(doc, ['enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
onUpdate={onUpdate}
/>
</td>
......
......@@ -94,6 +94,9 @@ const translation = {
sideTipP3: 'Cleaning removes unnecessary characters and formats, making datasets cleaner and easier to parse.',
sideTipP4: 'Proper segmentation and cleaning improve model performance, providing more accurate and valuable results.',
previewTitle: 'Preview',
previewButton: 'Switching to Q&A format',
previewSwitchTipStart: 'The current segment preview is in text format, switching to a question-and-answer format preview will',
previewSwitchTipEnd: ' consume additional tokens',
characters: 'characters',
indexSettedTip: 'To change the index method, please go to the ',
datasetSettingLink: 'dataset settings.',
......
......@@ -94,6 +94,9 @@ const translation = {
sideTipP3: '清洗则是对文本进行预处理,删除不必要的字符、符号或格式,使数据集更加干净、整洁,便于模型解析。',
sideTipP4: '通过对数据集进行适当的分段和清洗,可以提高模型在实际应用中的表现,从而为用户提供更准确、更有价值的结果。',
previewTitle: '分段预览',
previewButton: '切换至 Q&A 形式',
previewSwitchTipStart: '当前分段预览是文本模式,切换到 Q&A 模式将会',
previewSwitchTipEnd: '消耗额外的 token',
characters: '字符',
indexSettedTip: '要更改索引方法,请转到',
datasetSettingLink: '数据集设置。',
......
......@@ -17,6 +17,7 @@ const translation = {
action: {
uploadFile: 'Upload new file',
settings: 'Segment settings',
add: 'Add new segment',
archive: 'Archive',
delete: 'Delete',
enableWarning: 'Archived file cannot be enabled',
......@@ -316,6 +317,8 @@ const translation = {
answerEmpty: 'Answer can not be empty',
contentPlaceholder: 'add content here',
contentEmpty: 'Content can not be empty',
newTextSegment: 'New Text Segment',
newQaSegment: 'New Q&A Segment',
},
}
......
......@@ -17,6 +17,7 @@ const translation = {
action: {
uploadFile: '上传新文件',
settings: '分段设置',
add: '添加新分段',
archive: '归档',
delete: '删除',
enableWarning: '归档的文件无法启用',
......@@ -315,6 +316,8 @@ const translation = {
answerEmpty: '答案不能为空',
contentPlaceholder: '在这里添加内容',
contentEmpty: '内容不能为空',
newTextSegment: '新文本分段',
newQaSegment: '新问答分段',
},
}
......
......@@ -42,12 +42,18 @@ export type DataSetListResponse = {
total: number
}
export type QA = {
question: string
answer: string
}
export type IndexingEstimateResponse = {
tokens: number
total_price: number
currency: string
total_segments: number
preview: string[]
qa_preview?: QA[]
}
export type FileIndexingEstimateResponse = {
......
......@@ -160,6 +160,10 @@ export const updateSegment: Fetcher<{ data: SegmentDetailModel; doc_form: string
return patch(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}`, { body }) as Promise<{ data: SegmentDetailModel; doc_form: string }>
}
export const addSegment: Fetcher<{ data: SegmentDetailModel; doc_form: string }, { datasetId: string; documentId: string; body: SegmentUpdator }> = ({ datasetId, documentId, body }) => {
return post(`/datasets/${datasetId}/documents/${documentId}/segment`, { body }) as Promise<{ data: SegmentDetailModel; doc_form: string }>
}
// hit testing
export const hitTesting: Fetcher<HitTestingResponse, { datasetId: string; queryText: string }> = ({ datasetId, queryText }) => {
return post(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText } }) as Promise<HitTestingResponse>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment