Unverified Commit 97fe8171 authored by Jyong's avatar Jyong Committed by GitHub

Fix/upload limit (#2521)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
Co-authored-by: 's avatarStyleZhang <jasonapring2015@outlook.com>
parent 52b12ed7
...@@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL= ...@@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL=
SSRF_PROXY_HTTP_URL= SSRF_PROXY_HTTP_URL=
SSRF_PROXY_HTTPS_URL= SSRF_PROXY_HTTPS_URL=
BATCH_UPLOAD_LIMIT=10
\ No newline at end of file
...@@ -56,6 +56,7 @@ DEFAULTS = { ...@@ -56,6 +56,7 @@ DEFAULTS = {
'BILLING_ENABLED': 'False', 'BILLING_ENABLED': 'False',
'CAN_REPLACE_LOGO': 'False', 'CAN_REPLACE_LOGO': 'False',
'ETL_TYPE': 'dify', 'ETL_TYPE': 'dify',
'BATCH_UPLOAD_LIMIT': 20
} }
...@@ -285,6 +286,8 @@ class Config: ...@@ -285,6 +286,8 @@ class Config:
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED') self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO') self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
class CloudEditionConfig(Config): class CloudEditionConfig(Config):
......
...@@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment ...@@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from models.model import UploadFile from models.model import UploadFile
from models.source import DataSourceBinding from models.source import DataSourceBinding
from services.feature_service import FeatureService
class IndexingRunner: class IndexingRunner:
...@@ -244,6 +245,14 @@ class IndexingRunner: ...@@ -244,6 +245,14 @@ class IndexingRunner:
""" """
Estimate the indexing for the document. Estimate the indexing for the document.
""" """
# check document limit
features = FeatureService.get_features(tenant_id)
if features.billing.enabled:
count = len(file_details)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
embedding_model_instance = None embedding_model_instance = None
if dataset_id: if dataset_id:
dataset = Dataset.query.filter_by( dataset = Dataset.query.filter_by(
...@@ -361,6 +370,14 @@ class IndexingRunner: ...@@ -361,6 +370,14 @@ class IndexingRunner:
""" """
Estimate the indexing for the document. Estimate the indexing for the document.
""" """
# check document limit
features = FeatureService.get_features(tenant_id)
if features.billing.enabled:
count = len(notion_info_list)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
embedding_model_instance = None embedding_model_instance = None
if dataset_id: if dataset_id:
dataset = Dataset.query.filter_by( dataset = Dataset.query.filter_by(
......
...@@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound ...@@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound
from extensions.ext_database import db from extensions.ext_database import db
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
from services.feature_service import FeatureService
from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
...@@ -284,6 +285,12 @@ class AppAnnotationService: ...@@ -284,6 +285,12 @@ class AppAnnotationService:
result.append(content) result.append(content)
if len(result) == 0: if len(result) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")
# check annotation limit
features = FeatureService.get_features(current_user.current_tenant_id)
if features.billing.enabled:
annotation_quota_limit = features.annotation_quota_limit
if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
raise ValueError("The number of annotations exceeds the limit of your subscription.")
# async job # async job
job_id = str(uuid.uuid4()) job_id = str(uuid.uuid4())
indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id)) indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
......
...@@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError ...@@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError
from services.errors.dataset import DatasetNameDuplicateError from services.errors.dataset import DatasetNameDuplicateError
from services.errors.document import DocumentIndexingError from services.errors.document import DocumentIndexingError
from services.errors.file import FileNotExistsError from services.errors.file import FileNotExistsError
from services.feature_service import FeatureService
from services.vector_service import VectorService from services.vector_service import VectorService
from tasks.clean_notion_document_task import clean_notion_document_task from tasks.clean_notion_document_task import clean_notion_document_task
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
...@@ -452,7 +453,9 @@ class DocumentService: ...@@ -452,7 +453,9 @@ class DocumentService:
created_from: str = 'web'): created_from: str = 'web'):
# check document limit # check document limit
if current_app.config['EDITION'] == 'CLOUD': features = FeatureService.get_features(current_user.current_tenant_id)
if features.billing.enabled:
if 'original_document_id' not in document_data or not document_data['original_document_id']: if 'original_document_id' not in document_data or not document_data['original_document_id']:
count = 0 count = 0
if document_data["data_source"]["type"] == "upload_file": if document_data["data_source"]["type"] == "upload_file":
...@@ -462,6 +465,9 @@ class DocumentService: ...@@ -462,6 +465,9 @@ class DocumentService:
notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
for notion_info in notion_info_list: for notion_info in notion_info_list:
count = count + len(notion_info['pages']) count = count + len(notion_info['pages'])
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
# if dataset is empty, update dataset data_source_type # if dataset is empty, update dataset data_source_type
if not dataset.data_source_type: if not dataset.data_source_type:
dataset.data_source_type = document_data["data_source"]["type"] dataset.data_source_type = document_data["data_source"]["type"]
...@@ -741,6 +747,9 @@ class DocumentService: ...@@ -741,6 +747,9 @@ class DocumentService:
@staticmethod @staticmethod
def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account): def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
features = FeatureService.get_features(current_user.current_tenant_id)
if features.billing.enabled:
count = 0 count = 0
if document_data["data_source"]["type"] == "upload_file": if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids'] upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
...@@ -749,6 +758,9 @@ class DocumentService: ...@@ -749,6 +758,9 @@ class DocumentService:
notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
for notion_info in notion_info_list: for notion_info in notion_info_list:
count = count + len(notion_info['pages']) count = count + len(notion_info['pages'])
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
embedding_model = None embedding_model = None
dataset_collection_binding_id = None dataset_collection_binding_id = None
...@@ -1139,7 +1151,7 @@ class SegmentService: ...@@ -1139,7 +1151,7 @@ class SegmentService:
segment.answer = args['answer'] segment.answer = args['answer']
if 'keywords' in args and args['keywords']: if 'keywords' in args and args['keywords']:
segment.keywords = args['keywords'] segment.keywords = args['keywords']
if'enabled' in args and args['enabled'] is not None: if 'enabled' in args and args['enabled'] is not None:
segment.enabled = args['enabled'] segment.enabled = args['enabled']
db.session.add(segment) db.session.add(segment)
db.session.commit() db.session.commit()
......
...@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError ...@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
PREVIEW_WORDS_LIMIT = 3000 PREVIEW_WORDS_LIMIT = 3000
......
...@@ -4,10 +4,12 @@ import time ...@@ -4,10 +4,12 @@ import time
import click import click
from celery import shared_task from celery import shared_task
from flask import current_app
from core.indexing_runner import DocumentIsPausedException, IndexingRunner from core.indexing_runner import DocumentIsPausedException, IndexingRunner
from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Document from models.dataset import Dataset, Document
from services.feature_service import FeatureService
@shared_task(queue='dataset') @shared_task(queue='dataset')
...@@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list): ...@@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list):
""" """
documents = [] documents = []
start_at = time.perf_counter() start_at = time.perf_counter()
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
# check document limit
features = FeatureService.get_features(dataset.tenant_id)
try:
if features.billing.enabled:
vector_space = features.vector_space
count = len(document_ids)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
if 0 < vector_space.limit <= vector_space.size:
raise ValueError("Your total number of documents plus the number of uploads have over the limit of "
"your subscription.")
except Exception as e:
for document_id in document_ids:
document = db.session.query(Document).filter(
Document.id == document_id,
Document.dataset_id == dataset_id
).first()
if document:
document.indexing_status = 'error'
document.error = str(e)
document.stopped_at = datetime.datetime.utcnow()
db.session.add(document)
db.session.commit()
return
for document_id in document_ids: for document_id in document_ids:
logging.info(click.style('Start process document: {}'.format(document_id), fg='green')) logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
......
...@@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets' ...@@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
import I18n from '@/context/i18n' import I18n from '@/context/i18n'
import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language' import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
const FILES_NUMBER_LIMIT = 20
type IFileUploaderProps = { type IFileUploaderProps = {
fileList: FileItem[] fileList: FileItem[]
titleClassName?: string titleClassName?: string
...@@ -176,6 +178,11 @@ const FileUploader = ({ ...@@ -176,6 +178,11 @@ const FileUploader = ({
if (!files.length) if (!files.length)
return false return false
if (files.length + fileList.length > FILES_NUMBER_LIMIT) {
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) })
return false
}
const preparedFiles = files.map((file, index) => ({ const preparedFiles = files.map((file, index) => ({
fileID: `file${index}-${Date.now()}`, fileID: `file${index}-${Date.now()}`,
file, file,
...@@ -185,7 +192,7 @@ const FileUploader = ({ ...@@ -185,7 +192,7 @@ const FileUploader = ({
prepareFileList(newFiles) prepareFileList(newFiles)
fileListRef.current = newFiles fileListRef.current = newFiles
uploadMultipleFiles(preparedFiles) uploadMultipleFiles(preparedFiles)
}, [prepareFileList, uploadMultipleFiles]) }, [prepareFileList, uploadMultipleFiles, notify, t, fileList])
const handleDragEnter = (e: DragEvent) => { const handleDragEnter = (e: DragEvent) => {
e.preventDefault() e.preventDefault()
......
...@@ -28,6 +28,7 @@ const translation = { ...@@ -28,6 +28,7 @@ const translation = {
typeError: 'File type not supported', typeError: 'File type not supported',
size: 'File too large. Maximum is {{size}}MB', size: 'File too large. Maximum is {{size}}MB',
count: 'Multiple files not supported', count: 'Multiple files not supported',
filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.',
}, },
cancel: 'Cancel', cancel: 'Cancel',
change: 'Change', change: 'Change',
......
...@@ -28,6 +28,7 @@ const translation = { ...@@ -28,6 +28,7 @@ const translation = {
typeError: 'Tipo de arquivo não suportado', typeError: 'Tipo de arquivo não suportado',
size: 'Arquivo muito grande. Máximo é {{size}}MB', size: 'Arquivo muito grande. Máximo é {{size}}MB',
count: 'Vários arquivos não suportados', count: 'Vários arquivos não suportados',
filesNumber: 'Limite de upload em massa {{filesNumber}}.',
}, },
cancel: 'Cancelar', cancel: 'Cancelar',
change: 'Alterar', change: 'Alterar',
......
...@@ -28,6 +28,7 @@ const translation = { ...@@ -28,6 +28,7 @@ const translation = {
typeError: 'Тип файлу не підтримується', typeError: 'Тип файлу не підтримується',
size: 'Файл занадто великий. Максимум – {{size}} МБ', size: 'Файл занадто великий. Максимум – {{size}} МБ',
count: 'Не підтримується завантаження кількох файлів', count: 'Не підтримується завантаження кількох файлів',
filesNumber: 'Ліміт масового завантаження {{filesNumber}}.',
}, },
cancel: 'Скасувати', cancel: 'Скасувати',
change: 'Змінити', change: 'Змінити',
......
...@@ -28,6 +28,7 @@ const translation = { ...@@ -28,6 +28,7 @@ const translation = {
typeError: '文件类型不支持', typeError: '文件类型不支持',
size: '文件太大了,不能超过 {{size}}MB', size: '文件太大了,不能超过 {{size}}MB',
count: '暂不支持多个文件', count: '暂不支持多个文件',
filesNumber: '批量上传限制 {{filesNumber}}。',
}, },
cancel: '取消', cancel: '取消',
change: '更改文件', change: '更改文件',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment