Unverified Commit 155a4733 authored by Matri's avatar Matri Committed by GitHub

Feat/customizable file upload config (#818)

parent b7c29ea1
......@@ -62,7 +62,9 @@ DEFAULTS = {
'HOSTED_ANTHROPIC_PAID_ENABLED': 'False',
'HOSTED_ANTHROPIC_PAID_INCREASE_QUOTA': 1,
'TENANT_DOCUMENT_COUNT': 100,
'CLEAN_DAY_SETTING': 30
'CLEAN_DAY_SETTING': 30,
'UPLOAD_FILE_SIZE_LIMIT': 15,
'UPLOAD_FILE_BATCH_LIMIT': 5,
}
......@@ -244,6 +246,10 @@ class Config:
self.TENANT_DOCUMENT_COUNT = get_env('TENANT_DOCUMENT_COUNT')
self.CLEAN_DAY_SETTING = get_env('CLEAN_DAY_SETTING')
# uploading settings
self.UPLOAD_FILE_SIZE_LIMIT = int(get_env('UPLOAD_FILE_SIZE_LIMIT'))
self.UPLOAD_FILE_BATCH_LIMIT = int(get_env('UPLOAD_FILE_BATCH_LIMIT'))
class CloudEditionConfig(Config):
......
......@@ -21,10 +21,6 @@ from tasks.document_indexing_sync_task import document_indexing_sync_task
cache = TTLCache(maxsize=None, ttl=30)
FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm']
PREVIEW_WORDS_LIMIT = 3000
class DataSourceApi(Resource):
integrate_icon_fields = {
......
......@@ -25,12 +25,28 @@ from models.model import UploadFile
cache = TTLCache(maxsize=None, ttl=30)
FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx']
PREVIEW_WORDS_LIMIT = 3000
class FileApi(Resource):
upload_config_fields = {
'file_size_limit': fields.Integer,
'batch_count_limit': fields.Integer
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(upload_config_fields)
def get(self):
file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT")
batch_count_limit = current_app.config.get("UPLOAD_FILE_BATCH_LIMIT")
return {
'file_size_limit': file_size_limit,
'batch_count_limit': batch_count_limit
}, 200
file_fields = {
'id': fields.String,
'name': fields.String,
......@@ -60,8 +76,9 @@ class FileApi(Resource):
file_content = file.read()
file_size = len(file_content)
if file_size > FILE_SIZE_LIMIT:
message = "({file_size} > {FILE_SIZE_LIMIT})"
file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT") * 1024 * 1024
if file_size > file_size_limit:
message = "({file_size} > {file_size_limit})"
raise FileTooLargeError(message)
extension = file.filename.split('.')[-1]
......
......@@ -4,7 +4,7 @@ import { useTranslation } from 'react-i18next'
import cn from 'classnames'
import { XMarkIcon } from '@heroicons/react/20/solid'
import s from './index.module.css'
import type { File } from '@/models/datasets'
import type { CustomFile as File } from '@/models/datasets'
import { fetchFilePreview } from '@/service/common'
type IProps = {
......@@ -37,7 +37,7 @@ const FilePreview = ({
}
useEffect(() => {
if (file) {
if (file?.id) {
setLoading(true)
getPreviewContent(file.id)
}
......
'use client'
import React, { useEffect, useRef, useState } from 'react'
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import cn from 'classnames'
import useSWR from 'swr'
import s from './index.module.css'
import type { File as FileEntity } from '@/models/datasets'
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { ToastContext } from '@/app/components/base/toast'
import { upload } from '@/service/base'
import { fetchFileUploadConfig } from '@/service/common'
type IFileUploaderProps = {
fileList: any[]
fileList: FileItem[]
titleClassName?: string
prepareFileList: (files: any[]) => void
onFileUpdate: (fileItem: any, progress: number, list: any[]) => void
prepareFileList: (files: FileItem[]) => void
onFileUpdate: (fileItem: FileItem, progress: number, list: FileItem[]) => void
onFileListUpdate?: (files: any) => void
onPreview: (file: FileEntity) => void
onPreview: (file: File) => void
}
const ACCEPTS = [
......@@ -30,9 +32,6 @@ const ACCEPTS = [
'.csv',
]
const MAX_SIZE = 15 * 1024 * 1024
const BATCH_COUNT = 5
const FileUploader = ({
fileList,
titleClassName,
......@@ -48,7 +47,13 @@ const FileUploader = ({
const dragRef = useRef<HTMLDivElement>(null)
const fileUploader = useRef<HTMLInputElement>(null)
const fileListRef = useRef<any>([])
const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig)
const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
file_size_limit: 15,
batch_count_limit: 5,
}, [fileUploadConfigResponse])
const fileListRef = useRef<FileItem[]>([])
// utils
const getFileType = (currentFile: File) => {
......@@ -66,21 +71,21 @@ const FileUploader = ({
return `${(size / 1024 / 1024).toFixed(2)}MB`
}
const isValid = (file: File) => {
const isValid = useCallback((file: File) => {
const { size } = file
const ext = `.${getFileType(file)}`
const isValidType = ACCEPTS.includes(ext)
if (!isValidType)
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') })
const isValidSize = size <= MAX_SIZE
const isValidSize = size <= fileUploadConfig.file_size_limit * 1024 * 1024
if (!isValidSize)
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size') })
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size', { size: fileUploadConfig.file_size_limit }) })
return isValidType && isValidSize
}
}, [fileUploadConfig, notify, t])
const fileUpload = async (fileItem: any) => {
const fileUpload = useCallback(async (fileItem: FileItem): Promise<FileItem> => {
const formData = new FormData()
formData.append('file', fileItem.file)
const onProgress = (e: ProgressEvent) => {
......@@ -90,19 +95,19 @@ const FileUploader = ({
}
}
const fileListCopy = fileListRef.current
return upload({
xhr: new XMLHttpRequest(),
data: formData,
onprogress: onProgress,
})
.then((res: FileEntity) => {
const fileListCopy = fileListRef.current
.then((res: File) => {
const completeFile = {
fileID: fileItem.fileID,
file: res,
progress: -1,
}
const index = fileListCopy.findIndex((item: any) => item.fileID === fileItem.fileID)
const index = fileListCopy.findIndex(item => item.fileID === fileItem.fileID)
fileListCopy[index] = completeFile
onFileUpdate(completeFile, 100, fileListCopy)
return Promise.resolve({ ...completeFile })
......@@ -113,42 +118,44 @@ const FileUploader = ({
return Promise.resolve({ ...fileItem })
})
.finally()
}
const uploadBatchFiles = (bFiles: any) => {
bFiles.forEach((bf: any) => (bf.progress = 0))
return Promise.all(bFiles.map((bFile: any) => fileUpload(bFile)))
}
const uploadMultipleFiles = async (files: any) => {
}, [fileListRef, notify, onFileUpdate, t])
const uploadBatchFiles = useCallback((bFiles: FileItem[]) => {
bFiles.forEach(bf => (bf.progress = 0))
return Promise.all(bFiles.map(fileUpload))
}, [fileUpload])
const uploadMultipleFiles = useCallback(async (files: FileItem[]) => {
const batchCountLimit = fileUploadConfig.batch_count_limit
const length = files.length
let start = 0
let end = 0
while (start < length) {
if (start + BATCH_COUNT > length)
if (start + batchCountLimit > length)
end = length
else
end = start + BATCH_COUNT
end = start + batchCountLimit
const bFiles = files.slice(start, end)
await uploadBatchFiles(bFiles)
start = end
}
}
const initialUpload = (files: any) => {
}, [fileUploadConfig, uploadBatchFiles])
const initialUpload = useCallback((files: File[]) => {
if (!files.length)
return false
const preparedFiles = files.map((file: any, index: number) => {
const fileItem = {
fileID: `file${index}-${Date.now()}`,
file,
progress: -1,
}
return fileItem
})
const preparedFiles = files.map((file, index) => ({
fileID: `file${index}-${Date.now()}`,
file,
progress: -1,
}))
const newFiles = [...fileListRef.current, ...preparedFiles]
prepareFileList(newFiles)
fileListRef.current = newFiles
uploadMultipleFiles(preparedFiles)
}
}, [prepareFileList, uploadMultipleFiles])
const handleDragEnter = (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
......@@ -164,18 +171,17 @@ const FileUploader = ({
e.target === dragRef.current && setDragging(false)
}
const handleDrop = (e: DragEvent) => {
const handleDrop = useCallback((e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
setDragging(false)
if (!e.dataTransfer)
return
const files = [...e.dataTransfer.files]
const validFiles = files.filter(file => isValid(file))
// fileUpload(files[0])
const files = [...e.dataTransfer.files] as File[]
const validFiles = files.filter(isValid)
initialUpload(validFiles)
}
}, [initialUpload, isValid])
const selectHandle = () => {
if (fileUploader.current)
......@@ -186,13 +192,13 @@ const FileUploader = ({
if (fileUploader.current)
fileUploader.current.value = ''
fileListRef.current = fileListRef.current.filter((item: any) => item.fileID !== fileID)
fileListRef.current = fileListRef.current.filter(item => item.fileID !== fileID)
onFileListUpdate?.([...fileListRef.current])
}
const fileChangeHandle = (e: React.ChangeEvent<HTMLInputElement>) => {
const files = [...(e.target.files ?? [])].filter(file => isValid(file))
initialUpload(files)
}
const fileChangeHandle = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
const files = [...(e.target.files ?? [])] as File[]
initialUpload(files.filter(isValid))
}, [isValid, initialUpload])
useEffect(() => {
dropRef.current?.addEventListener('dragenter', handleDragEnter)
......@@ -205,7 +211,7 @@ const FileUploader = ({
dropRef.current?.removeEventListener('dragleave', handleDragLeave)
dropRef.current?.removeEventListener('drop', handleDrop)
}
}, [])
}, [handleDrop])
return (
<div className={s.fileUploader}>
......@@ -225,7 +231,7 @@ const FileUploader = ({
<span>{t('datasetCreation.stepOne.uploader.button')}</span>
<label className={s.browse} onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
</div>
<div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip')}</div>
<div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip', { size: fileUploadConfig.file_size_limit })}</div>
{dragging && <div ref={dragRef} className={s.draggingCover}/>}
</div>
<div className={s.fileList}>
......
......@@ -8,7 +8,7 @@ import StepOne from './step-one'
import StepTwo from './step-two'
import StepThree from './step-three'
import { DataSourceType } from '@/models/datasets'
import type { DataSet, createDocumentResponse } from '@/models/datasets'
import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
import { fetchDataSource, fetchTenantInfo } from '@/service/common'
import { fetchDataDetail } from '@/service/datasets'
import type { DataSourceNotionPage } from '@/models/common'
......@@ -30,7 +30,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
const [step, setStep] = useState(1)
const [indexingTypeCache, setIndexTypeCache] = useState('')
const [fileList, setFiles] = useState<any[]>([])
const [fileList, setFiles] = useState<FileItem[]>([])
const [result, setResult] = useState<createDocumentResponse | undefined>()
const [hasError, setHasError] = useState(false)
......@@ -39,12 +39,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
setNotionPages(value)
}
const updateFileList = (preparedFiles: any) => {
const updateFileList = (preparedFiles: FileItem[]) => {
setFiles(preparedFiles)
}
const updateFile = (fileItem: any, progress: number, list: any[]) => {
const targetIndex = list.findIndex((file: any) => file.fileID === fileItem.fileID)
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
list[targetIndex] = {
...list[targetIndex],
progress,
......
......@@ -7,7 +7,7 @@ import FileUploader from '../file-uploader'
import NotionPagePreview from '../notion-page-preview'
import EmptyDatasetCreationModal from '../empty-dataset-creation-modal'
import s from './index.module.css'
import type { File } from '@/models/datasets'
import type { FileItem } from '@/models/datasets'
import type { DataSourceNotionPage } from '@/models/common'
import { DataSourceType } from '@/models/datasets'
import Button from '@/app/components/base/button'
......@@ -20,9 +20,9 @@ type IStepOneProps = {
dataSourceTypeDisable: Boolean
hasConnection: boolean
onSetting: () => void
files: any[]
updateFileList: (files: any[]) => void
updateFile: (fileItem: any, progress: number, list: any[]) => void
files: FileItem[]
updateFileList: (files: FileItem[]) => void
updateFile: (fileItem: FileItem, progress: number, list: FileItem[]) => void
notionPages?: any[]
updateNotionPages: (value: any[]) => void
onStepChange: () => void
......
......@@ -9,7 +9,7 @@ import Link from 'next/link'
import { groupBy } from 'lodash-es'
import PreviewItem, { PreviewType } from './preview-item'
import s from './index.module.css'
import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
createDocument,
createFirstDocument,
......@@ -39,7 +39,7 @@ type StepTwoProps = {
datasetId?: string
indexingType?: string
dataSourceType: DataSourceType
files: File[]
files: CustomFile[]
notionPages?: Page[]
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
......
......@@ -23,10 +23,10 @@ const translation = {
title: 'Upload text file',
button: 'Drag and drop file, or',
browse: 'Browse',
tip: 'Supports txt, html, markdown, xlsx, and pdf. Max 15MB each.',
tip: 'Supports txt, html, markdown, xlsx, and pdf. Max {{size}}MB each.',
validation: {
typeError: 'File type not supported',
size: 'File too large. Maximum is 15MB',
size: 'File too large. Maximum is {{size}}MB',
count: 'Multiple files not supported',
},
cancel: 'Cancel',
......
......@@ -23,10 +23,10 @@ const translation = {
title: '上传文本文件',
button: '拖拽文件至此,或者',
browse: '选择文件',
tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 15 MB。',
tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 {{size}}MB。',
validation: {
typeError: '文件类型不支持',
size: '文件太大了,不能超过 15MB',
size: '文件太大了,不能超过 {{size}}MB',
count: '暂不支持多个文件',
},
cancel: '取消',
......
......@@ -168,3 +168,8 @@ export type PluginProvider = {
api_key: string
} | null
}
export type FileUploadConfigResponse = {
file_size_limit: number
batch_count_limit: number
}
......@@ -24,14 +24,18 @@ export type DataSet = {
word_count: number
}
export type File = {
id: string
name: string
size: number
extension: string
mime_type: string
created_by: string
created_at: number
export type CustomFile = File & {
id?: string
extension?: string
mime_type?: string
created_by?: string
created_at?: number
}
export type FileItem = {
fileID: string
file: CustomFile
progress: number
}
export type DataSetListResponse = {
......
......@@ -2,6 +2,7 @@ import type { Fetcher } from 'swr'
import { del, get, patch, post, put } from './base'
import type {
AccountIntegrate, CommonResponse, DataSourceNotion,
FileUploadConfigResponse,
ICurrentWorkspace,
IWorkspace, LangGeniusVersionResponse, Member,
OauthResponse, PluginProvider, Provider, ProviderAnthropicToken, ProviderAzureToken,
......@@ -178,3 +179,7 @@ export const updateDefaultModel: Fetcher<CommonResponse, { url: string; body: an
export const submitFreeQuota: Fetcher<{ type: string; redirect_url?: string; result?: string }, string> = (url) => {
return post(url) as Promise<{ type: string; redirect_url?: string; result?: string }>
}
export const fetchFileUploadConfig: Fetcher<FileUploadConfigResponse, { url: string }> = ({ url }) => {
return get(url) as Promise<FileUploadConfigResponse>
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment