Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
8f27ea10
Commit
8f27ea10
authored
Jun 21, 2023
by
John Wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix: notion error
parent
7a7fb8c6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
7 additions
and
14 deletions
+7
-14
csv.py
api/core/data_loader/loader/csv.py
+1
-1
excel.py
api/core/data_loader/loader/excel.py
+1
-2
html.py
api/core/data_loader/loader/html.py
+1
-2
markdown.py
api/core/data_loader/loader/markdown.py
+2
-3
notion.py
api/core/data_loader/loader/notion.py
+1
-1
pdf.py
api/core/data_loader/loader/pdf.py
+1
-4
requirements.txt
api/requirements.txt
+0
-1
No files found.
api/core/data_loader/loader/csv.py
View file @
8f27ea10
...
...
@@ -54,7 +54,7 @@ class CSVLoader(LCCSVLoader):
source
=
(
row
[
self
.
source_column
]
if
self
.
source_column
is
not
None
else
self
.
file_path
else
''
)
except
KeyError
:
raise
ValueError
(
...
...
api/core/data_loader/loader/excel.py
View file @
8f27ea10
...
...
@@ -40,5 +40,4 @@ class ExcelLoader(BaseLoader):
row_dict
=
{
k
:
v
for
k
,
v
in
row_dict
.
items
()
if
v
}
data
.
append
(
json
.
dumps
(
row_dict
,
ensure_ascii
=
False
))
metadata
=
{
"source"
:
self
.
_file_path
}
return
[
Document
(
page_content
=
'
\n\n
'
.
join
(
data
),
metadata
=
metadata
)]
return
[
Document
(
page_content
=
'
\n\n
'
.
join
(
data
))]
api/core/data_loader/loader/html.py
View file @
8f27ea10
...
...
@@ -24,8 +24,7 @@ class HTMLLoader(BaseLoader):
self
.
_file_path
=
file_path
def
load
(
self
)
->
List
[
Document
]:
metadata
=
{
"source"
:
self
.
_file_path
}
return
[
Document
(
page_content
=
self
.
_load_as_text
(),
metadata
=
metadata
)]
return
[
Document
(
page_content
=
self
.
_load_as_text
())]
def
_load_as_text
(
self
)
->
str
:
with
open
(
self
.
_file_path
,
"rb"
)
as
fp
:
...
...
api/core/data_loader/loader/markdown.py
View file @
8f27ea10
...
...
@@ -45,13 +45,12 @@ class MarkdownLoader(BaseLoader):
def
load
(
self
)
->
List
[
Document
]:
tups
=
self
.
parse_tups
(
self
.
_file_path
)
documents
=
[]
metadata
=
{
"source"
:
self
.
_file_path
}
for
header
,
value
in
tups
:
value
=
value
.
strip
()
if
header
is
None
:
documents
.
append
(
Document
(
page_content
=
value
,
metadata
=
metadata
))
documents
.
append
(
Document
(
page_content
=
value
))
else
:
documents
.
append
(
Document
(
page_content
=
f
"
\n\n
{header}
\n
{value}"
,
metadata
=
metadata
))
documents
.
append
(
Document
(
page_content
=
f
"
\n\n
{header}
\n
{value}"
))
return
documents
...
...
api/core/data_loader/loader/notion.py
View file @
8f27ea10
...
...
@@ -182,7 +182,7 @@ class NotionLoader(BaseLoader):
block_type
=
result
[
"type"
]
if
has_children
and
block_type
!=
'child_page'
:
children_text
=
self
.
_read_block
(
result_block_id
,
num_tabs
=
num_tabs
+
1
result_block_id
,
num_tabs
=
1
)
cur_result_text_arr
.
append
(
children_text
)
...
...
api/core/data_loader/loader/pdf.py
View file @
8f27ea10
...
...
@@ -4,7 +4,6 @@ from typing import List, Optional
from
langchain.document_loaders
import
PyPDFium2Loader
from
langchain.document_loaders.base
import
BaseLoader
from
langchain.schema
import
Document
from
pypdf
import
PdfReader
from
extensions.ext_storage
import
storage
from
models.model
import
UploadFile
...
...
@@ -39,8 +38,7 @@ class PdfLoader(BaseLoader):
try
:
text
=
storage
.
load
(
plaintext_file_key
)
.
decode
(
'utf-8'
)
plaintext_file_exists
=
True
metadata
=
{
"source"
:
self
.
_file_path
}
return
[
Document
(
page_content
=
text
,
metadata
=
metadata
)]
return
[
Document
(
page_content
=
text
)]
except
FileNotFoundError
:
pass
documents
=
PyPDFium2Loader
(
file_path
=
self
.
_file_path
)
.
load
()
...
...
@@ -53,6 +51,5 @@ class PdfLoader(BaseLoader):
if
not
plaintext_file_exists
and
plaintext_file_key
:
storage
.
save
(
plaintext_file_key
,
text
.
encode
(
'utf-8'
))
metadata
=
{
"source"
:
self
.
_file_path
}
return
documents
api/requirements.txt
View file @
8f27ea10
...
...
@@ -28,7 +28,6 @@ sentry-sdk[flask]~=1.21.1
jieba==0.42.1
celery==5.2.7
redis~=4.5.4
pypdf==3.8.1
openpyxl==3.1.2
chardet~=5.1.0
docx2txt==0.8
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment