Unverified Commit eeb2c285 authored by Columbus's avatar Columbus Committed by GitHub

Fix the issue of decoding a non-UTF-8 encoded file using UTF-8 encodi… (#378)

parent 673288d5
import datetime import datetime
import hashlib import hashlib
import tempfile import tempfile
import chardet
import time import time
import uuid import uuid
from pathlib import Path from pathlib import Path
...@@ -141,7 +142,8 @@ class FilePreviewApi(Resource): ...@@ -141,7 +142,8 @@ class FilePreviewApi(Resource):
# ['txt', 'markdown', 'md'] # ['txt', 'markdown', 'md']
with open(filepath, "rb") as fp: with open(filepath, "rb") as fp:
data = fp.read() data = fp.read()
text = data.decode(encoding='utf-8').strip() if data else '' encoding = chardet.detect(data)['encoding']
text = data.decode(encoding=encoding).strip() if data else ''
text = text[0:PREVIEW_WORDS_LIMIT] if text else '' text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
return {'content': text} return {'content': text}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment