Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
f9c41df7
Commit
f9c41df7
authored
Jul 25, 2023
by
jyong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feat/milvus-support' into deploy/dev
parents
3f2bc97b
0a0960c1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
7 deletions
+20
-7
datasets.py
api/controllers/console/datasets/datasets.py
+3
-2
datasets_document.py
api/controllers/console/datasets/datasets_document.py
+2
-2
indexing_runner.py
api/core/indexing_runner.py
+15
-3
No files found.
api/controllers/console/datasets/datasets.py
View file @
f9c41df7
...
...
@@ -220,6 +220,7 @@ class DatasetIndexingEstimateApi(Resource):
parser
=
reqparse
.
RequestParser
()
parser
.
add_argument
(
'info_list'
,
type
=
dict
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'process_rule'
,
type
=
dict
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'doc_form'
,
type
=
str
,
default
=
'text_model'
,
required
=
False
,
nullable
=
False
,
location
=
'json'
)
args
=
parser
.
parse_args
()
# validate args
DocumentService
.
estimate_args_validate
(
args
)
...
...
@@ -234,12 +235,12 @@ class DatasetIndexingEstimateApi(Resource):
raise
NotFound
(
"File not found."
)
indexing_runner
=
IndexingRunner
()
response
=
indexing_runner
.
file_indexing_estimate
(
file_details
,
args
[
'process_rule'
])
response
=
indexing_runner
.
file_indexing_estimate
(
file_details
,
args
[
'process_rule'
]
,
args
[
'process_rule'
]
)
elif
args
[
'info_list'
][
'data_source_type'
]
==
'notion_import'
:
indexing_runner
=
IndexingRunner
()
response
=
indexing_runner
.
notion_indexing_estimate
(
args
[
'info_list'
][
'notion_info_list'
],
args
[
'process_rule'
])
args
[
'process_rule'
]
,
args
[
'process_rule'
]
)
else
:
raise
ValueError
(
'Data source type not support'
)
return
response
,
200
...
...
api/controllers/console/datasets/datasets_document.py
View file @
f9c41df7
...
...
@@ -271,7 +271,7 @@ class DatasetDocumentListApi(Resource):
parser
.
add_argument
(
'process_rule'
,
type
=
dict
,
required
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'duplicate'
,
type
=
bool
,
nullable
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'original_document_id'
,
type
=
str
,
required
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'doc_form'
,
type
=
str
,
default
=
'text_model'
,
required
=
Tru
e
,
nullable
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'doc_form'
,
type
=
str
,
default
=
'text_model'
,
required
=
Fals
e
,
nullable
=
False
,
location
=
'json'
)
args
=
parser
.
parse_args
()
if
not
dataset
.
indexing_technique
and
not
args
[
'indexing_technique'
]:
...
...
@@ -316,7 +316,7 @@ class DatasetInitApi(Resource):
nullable
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'data_source'
,
type
=
dict
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'process_rule'
,
type
=
dict
,
required
=
True
,
nullable
=
True
,
location
=
'json'
)
parser
.
add_argument
(
'doc_form'
,
type
=
str
,
default
=
'text_model'
,
required
=
Tru
e
,
nullable
=
False
,
location
=
'json'
)
parser
.
add_argument
(
'doc_form'
,
type
=
str
,
default
=
'text_model'
,
required
=
Fals
e
,
nullable
=
False
,
location
=
'json'
)
args
=
parser
.
parse_args
()
# validate args
...
...
api/core/indexing_runner.py
View file @
f9c41df7
...
...
@@ -229,7 +229,7 @@ class IndexingRunner:
dataset_document
.
stopped_at
=
datetime
.
datetime
.
utcnow
()
db
.
session
.
commit
()
def
file_indexing_estimate
(
self
,
file_details
:
List
[
UploadFile
],
tmp_processing_rule
:
dict
)
->
dict
:
def
file_indexing_estimate
(
self
,
file_details
:
List
[
UploadFile
],
tmp_processing_rule
:
dict
,
doc_from
:
str
=
None
)
->
dict
:
"""
Estimate the indexing for the document.
"""
...
...
@@ -261,7 +261,19 @@ class IndexingRunner:
tokens
+=
TokenCalculator
.
get_num_tokens
(
self
.
embedding_model_name
,
self
.
filter_string
(
document
.
page_content
))
if
doc_from
and
doc_from
==
'qa_model'
:
if
len
(
preview_texts
)
>
0
:
# qa model document
response
=
LLMGenerator
.
generate_qa_document
(
current_user
.
current_tenant_id
,
preview_texts
[
0
])
document_qa_list
=
self
.
format_split_text
(
response
)
return
{
"total_segments"
:
total_segments
,
"tokens"
:
total_segments
*
2000
,
"total_price"
:
'{:f}'
.
format
(
TokenCalculator
.
get_token_price
(
'gpt-3.5-turbo'
,
tokens
,
'completion'
)),
"currency"
:
TokenCalculator
.
get_currency
(
self
.
embedding_model_name
),
"qa_preview"
:
document_qa_list
,
"preview"
:
preview_texts
}
return
{
"total_segments"
:
total_segments
,
"tokens"
:
tokens
,
...
...
@@ -270,7 +282,7 @@ class IndexingRunner:
"preview"
:
preview_texts
}
def
notion_indexing_estimate
(
self
,
notion_info_list
:
list
,
tmp_processing_rule
:
dict
)
->
dict
:
def
notion_indexing_estimate
(
self
,
notion_info_list
:
list
,
tmp_processing_rule
:
dict
,
doc_from
:
str
=
None
)
->
dict
:
"""
Estimate the indexing for the document.
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment