Unverified Commit 20b932da authored by Jyong's avatar Jyong Committed by GitHub

del doc support (#2494)

Co-authored-by: 's avatarjyong <jyong@dify.ai>
parent 207080ba
...@@ -69,7 +69,7 @@ class FileExtractor: ...@@ -69,7 +69,7 @@ class FileExtractor:
else MarkdownLoader(file_path, autodetect_encoding=True) else MarkdownLoader(file_path, autodetect_encoding=True)
elif file_extension in ['.htm', '.html']: elif file_extension in ['.htm', '.html']:
loader = HTMLLoader(file_path) loader = HTMLLoader(file_path)
elif file_extension in ['.docx', '.doc']: elif file_extension in ['.docx']:
loader = Docx2txtLoader(file_path) loader = Docx2txtLoader(file_path)
elif file_extension == '.csv': elif file_extension == '.csv':
loader = CSVLoader(file_path, autodetect_encoding=True) loader = CSVLoader(file_path, autodetect_encoding=True)
...@@ -96,7 +96,7 @@ class FileExtractor: ...@@ -96,7 +96,7 @@ class FileExtractor:
loader = MarkdownLoader(file_path, autodetect_encoding=True) loader = MarkdownLoader(file_path, autodetect_encoding=True)
elif file_extension in ['.htm', '.html']: elif file_extension in ['.htm', '.html']:
loader = HTMLLoader(file_path) loader = HTMLLoader(file_path)
elif file_extension in ['.docx', '.doc']: elif file_extension in ['.docx']:
loader = Docx2txtLoader(file_path) loader = Docx2txtLoader(file_path)
elif file_extension == '.csv': elif file_extension == '.csv':
loader = CSVLoader(file_path, autodetect_encoding=True) loader = CSVLoader(file_path, autodetect_encoding=True)
......
...@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError ...@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
PREVIEW_WORDS_LIMIT = 3000 PREVIEW_WORDS_LIMIT = 3000
...@@ -162,7 +162,7 @@ class FileService: ...@@ -162,7 +162,7 @@ class FileService:
generator = storage.load(upload_file.key, stream=True) generator = storage.load(upload_file.key, stream=True)
return generator, upload_file.mime_type return generator, upload_file.mime_type
@staticmethod @staticmethod
def get_public_image_preview(file_id: str) -> str: def get_public_image_preview(file_id: str) -> str:
upload_file = db.session.query(UploadFile) \ upload_file = db.session.query(UploadFile) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment