Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
4a45e94b
Commit
4a45e94b
authored
Jul 16, 2023
by
jyong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix clean dataset task
parent
5269b00d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
88 additions
and
2 deletions
+88
-2
app.py
api/app.py
+13
-0
llm_generator.py
api/core/generator/llm_generator.py
+2
-1
indexing_runner.py
api/core/indexing_runner.py
+49
-1
generate_test_task.py
api/tasks/generate_test_task.py
+24
-0
No files found.
api/app.py
View file @
4a45e94b
...
...
@@ -2,6 +2,10 @@
import
os
from
datetime
import
datetime
import
requests
from
tasks.generate_test_task
import
generate_test_task
if
not
os
.
environ
.
get
(
"DEBUG"
)
or
os
.
environ
.
get
(
"DEBUG"
)
.
lower
()
!=
'true'
:
from
gevent
import
monkey
monkey
.
patch_all
()
...
...
@@ -199,6 +203,15 @@ def health():
}),
status
=
200
,
content_type
=
"application/json"
)
@
app
.
route
(
'/test'
)
def
test
():
generate_test_task
.
delay
()
res
=
requests
.
post
(
'https://api.openai.com/v1/chat/completions'
)
print
(
res
)
return
Response
(
json
.
dumps
({
'status'
:
'ok'
,
}),
status
=
200
,
content_type
=
"application/json"
)
@
app
.
route
(
'/threads'
)
def
threads
():
num_threads
=
threading
.
active_count
()
...
...
api/core/generator/llm_generator.py
View file @
4a45e94b
...
...
@@ -27,7 +27,8 @@ class LLMGenerator:
llm
:
StreamableOpenAI
=
LLMBuilder
.
to_llm
(
tenant_id
=
tenant_id
,
model_name
=
'gpt-3.5-turbo'
,
max_tokens
=
50
max_tokens
=
50
,
timeout
=
600
)
if
isinstance
(
llm
,
BaseChatModel
):
...
...
api/core/indexing_runner.py
View file @
4a45e94b
...
...
@@ -6,6 +6,7 @@ import time
import
uuid
from
typing
import
Optional
,
List
,
cast
import
openai
from
flask
import
current_app
from
flask_login
import
current_user
from
langchain.embeddings
import
OpenAIEmbeddings
...
...
@@ -471,9 +472,41 @@ class IndexingRunner:
for
document
in
documents
:
if
document
.
page_content
is
None
or
not
document
.
page_content
.
strip
():
continue
#
response
=
LLMGenerator
.
generate_qa_document
(
tenant_id
,
document
.
page_content
)
document_qa_list
=
self
.
format_split_text
(
response
)
# CONVERSATION_PROMPT = (
# "你是出题人.\n"
# "用户会发送一段长文本.\n请一步一步思考"
# 'Step1:了解并总结这段文本的主要内容\n'
# 'Step2:这段文本提到了哪些关键信息或概念\n'
# 'Step3:可分解或结合多个信息与概念\n'
# 'Step4:将这些关键信息与概念生成 10 个问题与答案,问题描述清楚并且详细完整,答案详细完整.\n'
# "按格式回答: Q1:\nA1:\nQ2:\nA2:...\n"
# )
# openai.api_key = "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"
# response = openai.ChatCompletion.create(
# model='gpt-3.5-turbo',
# messages=[
# {
# 'role': 'system',
# 'content': CONVERSATION_PROMPT
# },
# {
# 'role': 'user',
# 'content': document.page_content
# }
# ],
# temperature=0,
# stream=False, # this time, we set stream=True
#
# n=1,
# top_p=1,
# frequency_penalty=0,
# presence_penalty=0
# )
# # response = LLMGenerator.generate_qa_document('84b2202c-c359-46b7-a810-bce50feaa4d1', doc.page_content)
# document_qa_list = self.format_split_text(response['choices'][0]['message']['content'])
qa_documents
=
[]
for
result
in
document_qa_list
:
document
=
Document
(
page_content
=
result
[
'question'
],
metadata
=
{
'source'
:
result
[
'answer'
]})
...
...
@@ -517,7 +550,22 @@ class IndexingRunner:
text
=
re
.
sub
(
pattern
,
''
,
text
)
return
text
def
format_split_text
(
self
,
text
):
regex
=
r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
# 匹配Q和A的正则表达式
matches
=
re
.
findall
(
regex
,
text
,
re
.
MULTILINE
)
# 获取所有匹配到的结果
result
=
[]
# 存储最终的结果
for
match
in
matches
:
q
=
match
[
0
]
a
=
match
[
1
]
if
q
and
a
:
# 如果Q和A都存在,就将其添加到结果中
result
.
append
({
"question"
:
q
,
"answer"
:
re
.
sub
(
r"\n\s*"
,
"
\n
"
,
a
.
strip
())
})
return
result
def
_build_index
(
self
,
dataset
:
Dataset
,
dataset_document
:
DatasetDocument
,
documents
:
List
[
Document
])
->
None
:
"""
Build the index for the document.
...
...
api/tasks/generate_test_task.py
0 → 100644
View file @
4a45e94b
import
logging
import
time
import
click
import
requests
from
celery
import
shared_task
from
core.generator.llm_generator
import
LLMGenerator
@
shared_task
def
generate_test_task
():
logging
.
info
(
click
.
style
(
'Start generate test'
,
fg
=
'green'
))
start_at
=
time
.
perf_counter
()
try
:
#res = requests.post('https://api.openai.com/v1/chat/completions')
answer
=
LLMGenerator
.
generate_conversation_name
(
'84b2202c-c359-46b7-a810-bce50feaa4d1'
,
'avb'
,
'ccc'
)
print
(
f
'answer: {answer}'
)
end_at
=
time
.
perf_counter
()
logging
.
info
(
click
.
style
(
'Conversation test, latency: {}'
.
format
(
end_at
-
start_at
),
fg
=
'green'
))
except
Exception
:
logging
.
exception
(
"generate test failed"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment