Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
9e7efa45
Unverified
Commit
9e7efa45
authored
Oct 10, 2023
by
Charlie.Wei
Committed by
GitHub
Oct 10, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
document segmentApi Add get&update&delete operate (#1285)
Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
parent
8bf892b3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
640 additions
and
83 deletions
+640
-83
segment.py
api/controllers/service_api/dataset/segment.py
+145
-3
dataset_service.py
api/services/dataset_service.py
+2
-0
template.en.mdx
web/app/(commonLayout)/datasets/template/template.en.mdx
+244
-38
template.zh.mdx
web/app/(commonLayout)/datasets/template/template.zh.mdx
+249
-42
No files found.
api/controllers/service_api/dataset/segment.py
View file @
9e7efa45
from
flask_login
import
current_user
from
flask_login
import
current_user
from
flask_restful
import
reqparse
,
marshal
from
flask_restful
import
reqparse
,
marshal
from
werkzeug.exceptions
import
NotFound
from
werkzeug.exceptions
import
NotFound
from
controllers.service_api
import
api
from
controllers.service_api
import
api
from
controllers.service_api.app.error
import
ProviderNotInitializeError
from
controllers.service_api.app.error
import
ProviderNotInitializeError
from
controllers.service_api.wraps
import
DatasetApiResource
from
controllers.service_api.wraps
import
DatasetApiResource
...
@@ -9,8 +8,8 @@ from core.model_providers.error import ProviderTokenNotInitError, LLMBadRequestE
...
@@ -9,8 +8,8 @@ from core.model_providers.error import ProviderTokenNotInitError, LLMBadRequestE
from
core.model_providers.model_factory
import
ModelFactory
from
core.model_providers.model_factory
import
ModelFactory
from
extensions.ext_database
import
db
from
extensions.ext_database
import
db
from
fields.segment_fields
import
segment_fields
from
fields.segment_fields
import
segment_fields
from
models.dataset
import
Dataset
from
models.dataset
import
Dataset
,
DocumentSegment
from
services.dataset_service
import
DocumentService
,
SegmentService
from
services.dataset_service
import
D
atasetService
,
D
ocumentService
,
SegmentService
class
SegmentApi
(
DatasetApiResource
):
class
SegmentApi
(
DatasetApiResource
):
...
@@ -24,6 +23,8 @@ class SegmentApi(DatasetApiResource):
...
@@ -24,6 +23,8 @@ class SegmentApi(DatasetApiResource):
Dataset
.
tenant_id
==
tenant_id
,
Dataset
.
tenant_id
==
tenant_id
,
Dataset
.
id
==
dataset_id
Dataset
.
id
==
dataset_id
)
.
first
()
)
.
first
()
if
not
dataset
:
raise
NotFound
(
'Dataset not found.'
)
# check document
# check document
document_id
=
str
(
document_id
)
document_id
=
str
(
document_id
)
document
=
DocumentService
.
get_document
(
dataset
.
id
,
document_id
)
document
=
DocumentService
.
get_document
(
dataset
.
id
,
document_id
)
...
@@ -55,5 +56,146 @@ class SegmentApi(DatasetApiResource):
...
@@ -55,5 +56,146 @@ class SegmentApi(DatasetApiResource):
'doc_form'
:
document
.
doc_form
'doc_form'
:
document
.
doc_form
},
200
},
200
def
get
(
self
,
tenant_id
,
dataset_id
,
document_id
):
"""Create single segment."""
# check dataset
dataset_id
=
str
(
dataset_id
)
tenant_id
=
str
(
tenant_id
)
dataset
=
db
.
session
.
query
(
Dataset
)
.
filter
(
Dataset
.
tenant_id
==
tenant_id
,
Dataset
.
id
==
dataset_id
)
.
first
()
if
not
dataset
:
raise
NotFound
(
'Dataset not found.'
)
# check document
document_id
=
str
(
document_id
)
document
=
DocumentService
.
get_document
(
dataset
.
id
,
document_id
)
if
not
document
:
raise
NotFound
(
'Document not found.'
)
# check embedding model setting
if
dataset
.
indexing_technique
==
'high_quality'
:
try
:
ModelFactory
.
get_embedding_model
(
tenant_id
=
current_user
.
current_tenant_id
,
model_provider_name
=
dataset
.
embedding_model_provider
,
model_name
=
dataset
.
embedding_model
)
except
LLMBadRequestError
:
raise
ProviderNotInitializeError
(
f
"No Embedding Model available. Please configure a valid provider "
f
"in the Settings -> Model Provider."
)
except
ProviderTokenNotInitError
as
ex
:
raise
ProviderNotInitializeError
(
ex
.
description
)
parser
=
reqparse
.
RequestParser
()
parser
.
add_argument
(
'status'
,
type
=
str
,
action
=
'append'
,
default
=
[],
location
=
'args'
)
parser
.
add_argument
(
'keyword'
,
type
=
str
,
default
=
None
,
location
=
'args'
)
args
=
parser
.
parse_args
()
status_list
=
args
[
'status'
]
keyword
=
args
[
'keyword'
]
query
=
DocumentSegment
.
query
.
filter
(
DocumentSegment
.
document_id
==
str
(
document_id
),
DocumentSegment
.
tenant_id
==
current_user
.
current_tenant_id
)
if
status_list
:
query
=
query
.
filter
(
DocumentSegment
.
status
.
in_
(
status_list
))
if
keyword
:
query
=
query
.
where
(
DocumentSegment
.
content
.
ilike
(
f
'
%
{keyword}
%
'
))
total
=
query
.
count
()
segments
=
query
.
order_by
(
DocumentSegment
.
position
)
.
all
()
return
{
'data'
:
marshal
(
segments
,
segment_fields
),
'doc_form'
:
document
.
doc_form
,
'total'
:
total
},
200
class
DatasetSegmentApi
(
DatasetApiResource
):
def
delete
(
self
,
tenant_id
,
dataset_id
,
document_id
,
segment_id
):
# check dataset
dataset_id
=
str
(
dataset_id
)
tenant_id
=
str
(
tenant_id
)
dataset
=
db
.
session
.
query
(
Dataset
)
.
filter
(
Dataset
.
tenant_id
==
tenant_id
,
Dataset
.
id
==
dataset_id
)
.
first
()
if
not
dataset
:
raise
NotFound
(
'Dataset not found.'
)
# check user's model setting
DatasetService
.
check_dataset_model_setting
(
dataset
)
# check document
document_id
=
str
(
document_id
)
document
=
DocumentService
.
get_document
(
dataset_id
,
document_id
)
if
not
document
:
raise
NotFound
(
'Document not found.'
)
# check segment
segment
=
DocumentSegment
.
query
.
filter
(
DocumentSegment
.
id
==
str
(
segment_id
),
DocumentSegment
.
tenant_id
==
current_user
.
current_tenant_id
)
.
first
()
if
not
segment
:
raise
NotFound
(
'Segment not found.'
)
SegmentService
.
delete_segment
(
segment
,
document
,
dataset
)
return
{
'result'
:
'success'
},
200
def
post
(
self
,
tenant_id
,
dataset_id
,
document_id
,
segment_id
):
# check dataset
dataset_id
=
str
(
dataset_id
)
tenant_id
=
str
(
tenant_id
)
dataset
=
db
.
session
.
query
(
Dataset
)
.
filter
(
Dataset
.
tenant_id
==
tenant_id
,
Dataset
.
id
==
dataset_id
)
.
first
()
if
not
dataset
:
raise
NotFound
(
'Dataset not found.'
)
# check user's model setting
DatasetService
.
check_dataset_model_setting
(
dataset
)
# check document
document_id
=
str
(
document_id
)
document
=
DocumentService
.
get_document
(
dataset_id
,
document_id
)
if
not
document
:
raise
NotFound
(
'Document not found.'
)
if
dataset
.
indexing_technique
==
'high_quality'
:
# check embedding model setting
try
:
ModelFactory
.
get_embedding_model
(
tenant_id
=
current_user
.
current_tenant_id
,
model_provider_name
=
dataset
.
embedding_model_provider
,
model_name
=
dataset
.
embedding_model
)
except
LLMBadRequestError
:
raise
ProviderNotInitializeError
(
f
"No Embedding Model available. Please configure a valid provider "
f
"in the Settings -> Model Provider."
)
except
ProviderTokenNotInitError
as
ex
:
raise
ProviderNotInitializeError
(
ex
.
description
)
# check segment
segment_id
=
str
(
segment_id
)
segment
=
DocumentSegment
.
query
.
filter
(
DocumentSegment
.
id
==
str
(
segment_id
),
DocumentSegment
.
tenant_id
==
current_user
.
current_tenant_id
)
.
first
()
if
not
segment
:
raise
NotFound
(
'Segment not found.'
)
# validate args
parser
=
reqparse
.
RequestParser
()
parser
.
add_argument
(
'segments'
,
type
=
dict
,
required
=
False
,
nullable
=
True
,
location
=
'json'
)
args
=
parser
.
parse_args
()
SegmentService
.
segment_create_args_validate
(
args
[
'segments'
],
document
)
segment
=
SegmentService
.
update_segment
(
args
[
'segments'
],
segment
,
document
,
dataset
)
return
{
'data'
:
marshal
(
segment
,
segment_fields
),
'doc_form'
:
document
.
doc_form
},
200
api
.
add_resource
(
SegmentApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments'
)
api
.
add_resource
(
SegmentApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments'
)
api
.
add_resource
(
DatasetSegmentApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>'
)
api/services/dataset_service.py
View file @
9e7efa45
...
@@ -1091,6 +1091,8 @@ class SegmentService:
...
@@ -1091,6 +1091,8 @@ class SegmentService:
segment
.
answer
=
args
[
'answer'
]
segment
.
answer
=
args
[
'answer'
]
if
args
[
'keywords'
]:
if
args
[
'keywords'
]:
segment
.
keywords
=
args
[
'keywords'
]
segment
.
keywords
=
args
[
'keywords'
]
if
args
[
'enabled'
]
is
not
None
:
segment
.
enabled
=
args
[
'enabled'
]
db
.
session
.
add
(
segment
)
db
.
session
.
add
(
segment
)
db
.
session
.
commit
()
db
.
session
.
commit
()
# update segment index task
# update segment index task
...
...
web/app/(commonLayout)/datasets/template/template.en.mdx
View file @
9e7efa45
...
@@ -89,7 +89,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -89,7 +89,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets?page=1&limit=20' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets?page=1&limit=20' \
--header 'Authorization: Bearer {api_key}'
--header 'Authorization: Bearer {api_key}'
```
```
</CodeGroup>
</CodeGroup>
...
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>id</code> (string) Unique identifier for the preprocessing rule
- <code>id</code> (string) Unique identifier for the preprocessing rule
- enumerate
- enumerate
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
...
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/create_by_text' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/create_by_text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>id</code> (string) Unique identifier for the preprocessing rule
- <code>id</code> (string) Unique identifier for the preprocessing rule
- enumerate
- enumerate
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
...
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/create_by_file' \
curl --location POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/create_by_file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
--form 'file=@"/path/to/file"'
...
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>id</code> (string) Unique identifier for the preprocessing rule
- <code>id</code> (string) Unique identifier for the preprocessing rule
- enumerate
- enumerate
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
...
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
- <code>id</code> (string) Unique identifier for the preprocessing rule
- <code>id</code> (string) Unique identifier for the preprocessing rule
- enumerate
- enumerate
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
...
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/{document_id}/create_by_file' \
curl --location POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/{document_id}/create_by_file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
--form 'file=@"/path/to/file"'
...
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="GET"
tag="GET"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{batch}/indexing-status' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{batch}/indexing-status' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
{
{
"data":[{
"data":[{
"id": "",
"id": "",
"indexing_status": "indexing",
"indexing_status": "indexing",
"processing_started_at": 1681623462.0,
"processing_started_at": 1681623462.0,
"parsing_completed_at": 1681623462.0,
"parsing_completed_at": 1681623462.0,
"cleaning_completed_at": 1681623462.0,
"cleaning_completed_at": 1681623462.0,
...
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="DELETE"
tag="DELETE"
label="/datasets/{dataset_id}/documents/{document_id}"
label="/datasets/{dataset_id}/documents/{document_id}"
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request DELETE '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}' \
curl --location --request DELETE '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="GET"
tag="GET"
label="/datasets/{dataset_id}/documents"
label="/datasets/{dataset_id}/documents"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}/segments' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}/segments' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -778,6 +778,212 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -778,6 +778,212 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='GET'
title='get documents segments'
name='#get_segment'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID
</Property>
<Property name='document_id' type='string' key='document_id'>
Document ID
</Property>
</Properties>
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
keyword,choosable
</Property>
<Property name='status' type='string' key='status'>
Search status,completed
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": [{
"id": "",
"position": 1,
"document_id": "",
"content": "1",
"answer": "1",
"word_count": 25,
"tokens": 0,
"keywords": [
"a"
],
"index_node_id": "",
"index_node_hash": "",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "",
"created_at": 1695312007,
"indexing_at": 1695312007,
"completed_at": 1695312007,
"error": null,
"stopped_at": null
}],
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
---
<Heading
url='/datasets/{dataset_id}/segments/{segment_id}'
method='DELETE'
title='delete document segment'
name='#delete_segment'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
Document Segment ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="DELETE"
label="/datasets/{dataset_id}/segments/{segment_id}"
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"result": "success"
}
```
</CodeGroup>
</Col>
</Row>
---
<Heading
url='/datasets/{dataset_id}/segments/{segment_id}'
method='POST'
title='update document segment'
name='#update_segment'
/>
<Row>
<Col>
### POST
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
Document Segment ID
</Property>
</Properties>
### Request Body
<Properties>
<Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) text content/question content,required
- <code>answer</code> (text) Answer content, not required, passed if the data set is in qa mode
- <code>keywords</code> (list) keyword, not required
- <code>enabled</code> (bool) false/true, not required
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/segments/{segment_id}"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Content-Type: application/json' \
--data-raw '{
"segments": {
"content": "1",
"answer": "1",
"keywords": ["a"],
"enabled": false
}
}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": [{
"id": "",
"position": 1,
"document_id": "",
"content": "1",
"answer": "1",
"word_count": 25,
"tokens": 0,
"keywords": [
"a"
],
"index_node_id": "",
"index_node_hash": "",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "",
"created_at": 1695312007,
"indexing_at": 1695312007,
"completed_at": 1695312007,
"error": null,
"stopped_at": null
}],
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
---
<Row>
<Row>
<Col>
<Col>
### Error message
### Error message
...
...
web/app/(commonLayout)/datasets/template/template.zh.mdx
View file @
9e7efa45
...
@@ -27,7 +27,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -27,7 +27,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -82,14 +82,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -82,14 +82,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets"
label="/datasets"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets?page=1&limit=20' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets?page=1&limit=20' \
--header 'Authorization: Bearer {api_key}'
--header 'Authorization: Bearer {api_key}'
```
```
</CodeGroup>
</CodeGroup>
...
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>id</code> (string) 预处理规则的唯一标识符
- <code>id</code> (string) 预处理规则的唯一标识符
- 枚举:
- 枚举:
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
...
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/create_by_text' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/create_by_text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>id</code> (string) 预处理规则的唯一标识符
- <code>id</code> (string) 预处理规则的唯一标识符
- 枚举:
- 枚举:
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
...
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/create_by_file' \
curl --location POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/create_by_file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
--form 'file=@"/path/to/file"'
...
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>id</code> (string) 预处理规则的唯一标识符
- <code>id</code> (string) 预处理规则的唯一标识符
- 枚举:
- 枚举:
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
...
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>pre_processing_rules</code> (array[object]) 预处理规则
- <code>id</code> (string) 预处理规则的唯一标识符
- <code>id</code> (string) 预处理规则的唯一标识符
- 枚举:
- 枚举:
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>remove_urls_emails</code> 删除 URL、电子邮件地址
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
...
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/document/{document_id}/create_by_file' \
curl --location POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/document/{document_id}/create_by_file' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
--form 'file=@"/path/to/file"'
...
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="GET"
tag="GET"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{batch}/indexing-status' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{batch}/indexing-status' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
{
{
"data":[{
"data":[{
"id": "",
"id": "",
"indexing_status": "indexing",
"indexing_status": "indexing",
"processing_started_at": 1681623462.0,
"processing_started_at": 1681623462.0,
"parsing_completed_at": 1681623462.0,
"parsing_completed_at": 1681623462.0,
"cleaning_completed_at": 1681623462.0,
"cleaning_completed_at": 1681623462.0,
...
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="DELETE"
tag="DELETE"
label="/datasets/{dataset_id}/documents/{document_id}"
label="/datasets/{dataset_id}/documents/{document_id}"
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request DELETE '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}' \
curl --location --request DELETE '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="GET"
tag="GET"
label="/datasets/{dataset_id}/documents"
label="/datasets/{dataset_id}/documents"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request GET '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents' \
curl --location --request GET '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
```
```
</CodeGroup>
</CodeGroup>
...
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Properties>
</Col>
</Col>
<Col sticky>
<Col sticky>
<CodeGroup
<CodeGroup
title="Request"
title="Request"
tag="POST"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
>
>
```bash {{ title: 'cURL' }}
```bash {{ title: 'cURL' }}
curl --location --request POST '
https://api.dify.ai/v1
/datasets/{dataset_id}/documents/{document_id}/segments' \
curl --location --request POST '
${props.apiBaseUrl}
/datasets/{dataset_id}/documents/{document_id}/segments' \
--header 'Authorization: Bearer {api_key}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--header 'Content-Type: application/json' \
--data-raw '{
--data-raw '{
...
@@ -778,6 +778,213 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
...
@@ -778,6 +778,213 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='GET'
title='查询文档分段'
name='#get_segment'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
数据集 ID
</Property>
<Property name='document_id' type='string' key='document_id'>
文档 ID
</Property>
</Properties>
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
搜索关键词,可选
</Property>
<Property name='status' type='string' key='status'>
搜索状态,completed
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/segments"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": [{
"id": "",
"position": 1,
"document_id": "",
"content": "1",
"answer": "1",
"word_count": 25,
"tokens": 0,
"keywords": [
"a"
],
"index_node_id": "",
"index_node_hash": "",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "",
"created_at": 1695312007,
"indexing_at": 1695312007,
"completed_at": 1695312007,
"error": null,
"stopped_at": null
}],
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
---
<Heading
url='/datasets/{dataset_id}/segments/{segment_id}'
method='DELETE'
title='删除文档分段'
name='#delete_segment'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
数据集 ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
文档分段ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="DELETE"
label="/datasets/{dataset_id}/segments/{segment_id}"
targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
>
```bash {{ title: 'cURL' }}
curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"result": "success"
}
```
</CodeGroup>
</Col>
</Row>
---
<Heading
url='/datasets/{dataset_id}/segments/{segment_id}'
method='POST'
title='更新文档分段'
name='#update_segment'
/>
<Row>
<Col>
### POST
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
数据集 ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
文档分段ID
</Property>
</Properties>
### Request Body
<Properties>
<Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) 文本内容/问题内容,必填
- <code>answer</code> (text) 答案内容,非必填,如果数据集的模式为qa模式则传值
- <code>keywords</code> (list) 关键字,非必填
- <code>enabled</code> (bool) false/true,非必填
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/segments/{segment_id}"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
"segments": {
"content": "1",
"answer": "1",
"keywords": ["a"],
"enabled": false
}
}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": [{
"id": "",
"position": 1,
"document_id": "",
"content": "1",
"answer": "1",
"word_count": 25,
"tokens": 0,
"keywords": [
"a"
],
"index_node_id": "",
"index_node_hash": "",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "",
"created_at": 1695312007,
"indexing_at": 1695312007,
"completed_at": 1695312007,
"error": null,
"stopped_at": null
}],
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
---
<Row>
<Row>
<Col>
<Col>
### 错误信息
### 错误信息
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment