Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
de5942da
Commit
de5942da
authored
Jul 28, 2023
by
jyong
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feat/milvus-support' into deploy/dev
parents
60f44f4e
1073b964
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
60 deletions
+6
-60
datasets_segments.py
api/controllers/console/datasets/datasets_segments.py
+1
-12
indexing_runner.py
api/core/indexing_runner.py
+5
-5
test_task.py
api/tasks/test_task.py
+0
-43
No files found.
api/controllers/console/datasets/datasets_segments.py
View file @
de5942da
...
...
@@ -16,7 +16,6 @@ from models.dataset import DocumentSegment
from
libs.helper
import
TimestampField
from
services.dataset_service
import
DatasetService
,
DocumentService
,
SegmentService
from
tasks.test_task
import
test_task
from
tasks.enable_segment_to_index_task
import
enable_segment_to_index_task
from
tasks.remove_segment_from_index_task
import
remove_segment_from_index_task
...
...
@@ -285,15 +284,6 @@ class DatasetDocumentSegmentUpdateApi(Resource):
},
200
class
DatasetDocumentTest
(
Resource
):
@
setup_required
@
login_required
@
account_initialization_required
def
patch
(
self
):
test_task
.
delay
()
return
200
api
.
add_resource
(
DatasetDocumentSegmentListApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments'
)
api
.
add_resource
(
DatasetDocumentSegmentApi
,
...
...
@@ -302,5 +292,4 @@ api.add_resource(DatasetDocumentSegmentAddApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment'
)
api
.
add_resource
(
DatasetDocumentSegmentUpdateApi
,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>'
)
api
.
add_resource
(
DatasetDocumentTest
,
'/datasets/test'
)
api/core/indexing_runner.py
View file @
de5942da
...
...
@@ -516,7 +516,7 @@ class IndexingRunner:
model_name
=
'gpt-3.5-turbo'
,
max_tokens
=
2000
)
threads
=
[]
#
threads = []
# for doc in documents:
# document_format_thread = threading.Thread(target=self.format_document, kwargs={
# 'llm': llm, 'document_node': doc, 'split_documents': split_documents, 'document_form': document_form})
...
...
@@ -524,7 +524,7 @@ class IndexingRunner:
# document_format_thread.start()
# for thread in threads:
# thread.join()
asyncio
.
run
(
self
.
format_document
(
llm
,
documents
,
split_documents
,
document_form
))
#
asyncio.run(self.format_document(llm, documents, split_documents, document_form))
# threads.append(task)
# await asyncio.gather(*threads)
# asyncio.run(main())
...
...
@@ -538,12 +538,12 @@ class IndexingRunner:
# future_to_doc = {executor.submit(format_document, current_app._get_current_object(), doc): doc for doc in documents}
# for future in concurrent.futures.as_completed(future_to_doc):
# split_documents.extend(future.result())
self
.
format_document
(
llm
,
documents
,
split_documents
,
document_form
)
all_documents
.
extend
(
split_documents
)
return
all_documents
async
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
documents
:
List
[
Document
],
split_documents
:
List
,
document_form
:
str
):
def
format_document
(
self
,
llm
:
StreamableOpenAI
,
documents
:
List
[
Document
],
split_documents
:
List
,
document_form
:
str
):
for
document_node
in
documents
:
print
(
"process:"
+
document_node
.
page_content
)
format_documents
=
[]
...
...
@@ -561,7 +561,7 @@ class IndexingRunner:
elif
document_form
==
'qa_model'
:
# qa model document
response
=
await
LLMGenerator
.
generate_qa_document
(
llm
,
document_node
.
page_content
)
response
=
LLMGenerator
.
generate_qa_document_sync
(
llm
,
document_node
.
page_content
)
document_qa_list
=
self
.
format_split_text
(
response
)
qa_documents
=
[]
for
result
in
document_qa_list
:
...
...
api/tasks/test_task.py
deleted
100644 → 0
View file @
60f44f4e
import
threading
from
time
import
sleep
,
ctime
from
typing
import
List
from
celery
import
shared_task
@
shared_task
def
test_task
():
"""
Clean dataset when dataset deleted.
Usage: test_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
"""
print
(
'---开始---:
%
s'
%
ctime
())
def
smoke
(
count
:
List
):
for
i
in
range
(
3
):
print
(
"smoke...
%
d"
%
i
)
count
.
append
(
"smoke...
%
d"
%
i
)
sleep
(
1
)
def
drunk
(
count
:
List
):
for
i
in
range
(
3
):
print
(
"drink...
%
d"
%
i
)
count
.
append
(
"drink...
%
d"
%
i
)
sleep
(
10
)
count
=
[]
threads
=
[]
for
i
in
range
(
3
):
t1
=
threading
.
Thread
(
target
=
smoke
,
kwargs
=
{
'count'
:
count
})
t2
=
threading
.
Thread
(
target
=
drunk
,
kwargs
=
{
'count'
:
count
})
threads
.
append
(
t1
)
threads
.
append
(
t2
)
t1
.
start
()
t2
.
start
()
for
thread
in
threads
:
thread
.
join
()
print
(
str
(
count
))
# sleep(5) #
print
(
'---结束---:
%
s'
%
ctime
())
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment