Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
dbecab70
Commit
dbecab70
authored
Jul 04, 2023
by
StyleZhang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add audio-to-text api
parent
fa55e1c1
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
184 additions
and
9 deletions
+184
-9
.gitignore
.gitignore
+2
-0
completion.py
api/controllers/console/explore/completion.py
+48
-1
completion.py
api/controllers/web/completion.py
+47
-2
audio_service.py
api/services/audio_service.py
+39
-0
index.tsx
web/app/components/app/chat/index.tsx
+12
-1
index.tsx
web/app/components/base/voice-input/index.tsx
+22
-4
index.tsx
web/app/components/share/chat/index.tsx
+2
-0
base.ts
web/service/base.ts
+8
-1
share.ts
web/service/share.ts
+4
-0
No files found.
.gitignore
View file @
dbecab70
...
...
@@ -147,3 +147,5 @@ docker/volumes/weaviate/*
sdks/python-client/build
sdks/python-client/dist
sdks/python-client/dify_client.egg-info
.vscode/
\ No newline at end of file
api/controllers/console/explore/completion.py
View file @
dbecab70
...
...
@@ -3,7 +3,7 @@ import json
import
logging
from
typing
import
Generator
,
Union
from
flask
import
Response
,
stream_with_context
from
flask
import
Response
,
stream_with_context
,
request
from
flask_login
import
current_user
from
flask_restful
import
reqparse
from
werkzeug.exceptions
import
InternalServerError
,
NotFound
...
...
@@ -19,6 +19,9 @@ from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthor
LLMRateLimitError
,
ProviderTokenNotInitError
,
QuotaExceededError
,
ModelCurrentlyNotSupportError
from
libs.helper
import
uuid_value
from
services.completion_service
import
CompletionService
from
services.completion_service
import
CompletionService
from
services.audio_service
import
AudioService
from
controllers.console.datasets.error
import
NoFileUploadedError
,
TooManyFilesError
# define completion api for user
...
...
@@ -138,6 +141,49 @@ class ChatStopApi(InstalledAppResource):
PubHandler
.
stop
(
current_user
,
task_id
)
return
{
'result'
:
'success'
},
200
class
AudioApi
(
InstalledAppResource
):
def
post
(
self
,
app_model
,
end_user
):
if
app_model
.
mode
!=
'chat'
:
raise
NotChatAppError
()
file
=
request
.
files
[
'file'
]
# check file
if
'file'
not
in
request
.
files
:
raise
NoFileUploadedError
()
if
len
(
request
.
files
)
>
1
:
raise
TooManyFilesError
()
try
:
response
=
AudioService
.
transcript
(
app_model
=
app_model
,
file
=
file
,
)
return
response
except
services
.
errors
.
conversation
.
ConversationNotExistsError
:
raise
NotFound
(
"Conversation Not Exists."
)
except
services
.
errors
.
conversation
.
ConversationCompletedError
:
raise
ConversationCompletedError
()
except
services
.
errors
.
app_model_config
.
AppModelConfigBrokenError
:
logging
.
exception
(
"App model config broken."
)
raise
AppUnavailableError
()
except
ProviderTokenNotInitError
:
raise
ProviderNotInitializeError
()
except
QuotaExceededError
:
raise
ProviderQuotaExceededError
()
except
ModelCurrentlyNotSupportError
:
raise
ProviderModelCurrentlyNotSupportError
()
except
(
LLMBadRequestError
,
LLMAPIConnectionError
,
LLMAPIUnavailableError
,
LLMRateLimitError
,
LLMAuthorizationError
)
as
e
:
raise
CompletionRequestError
(
str
(
e
))
except
ValueError
as
e
:
raise
e
except
Exception
as
e
:
logging
.
exception
(
"internal server error."
)
raise
InternalServerError
()
def
compact_response
(
response
:
Union
[
dict
|
Generator
])
->
Response
:
...
...
@@ -178,3 +224,4 @@ api.add_resource(CompletionApi, '/installed-apps/<uuid:installed_app_id>/complet
api
.
add_resource
(
CompletionStopApi
,
'/installed-apps/<uuid:installed_app_id>/completion-messages/<string:task_id>/stop'
,
endpoint
=
'installed_app_stop_completion'
)
api
.
add_resource
(
ChatApi
,
'/installed-apps/<uuid:installed_app_id>/chat-messages'
,
endpoint
=
'installed_app_chat_completion'
)
api
.
add_resource
(
ChatStopApi
,
'/installed-apps/<uuid:installed_app_id>/chat-messages/<string:task_id>/stop'
,
endpoint
=
'installed_app_stop_chat_completion'
)
api
.
add_resource
(
AudioApi
,
'/installed-apps/<uuid:installed_app_id>/audio-to-text'
)
api/controllers/web/completion.py
View file @
dbecab70
...
...
@@ -3,7 +3,7 @@ import json
import
logging
from
typing
import
Generator
,
Union
from
flask
import
Response
,
stream_with_context
from
flask
import
Response
,
stream_with_context
,
request
from
flask_restful
import
reqparse
from
werkzeug.exceptions
import
InternalServerError
,
NotFound
...
...
@@ -18,7 +18,8 @@ from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthor
LLMRateLimitError
,
ProviderTokenNotInitError
,
QuotaExceededError
,
ModelCurrentlyNotSupportError
from
libs.helper
import
uuid_value
from
services.completion_service
import
CompletionService
from
services.audio_service
import
AudioService
from
controllers.console.datasets.error
import
NoFileUploadedError
,
TooManyFilesError
# define completion api for user
class
CompletionApi
(
WebApiResource
):
...
...
@@ -133,6 +134,49 @@ class ChatStopApi(WebApiResource):
PubHandler
.
stop
(
end_user
,
task_id
)
return
{
'result'
:
'success'
},
200
class
AudioApi
(
WebApiResource
):
def
post
(
self
,
app_model
,
end_user
):
if
app_model
.
mode
!=
'chat'
:
raise
NotChatAppError
()
file
=
request
.
files
[
'file'
]
# check file
if
'file'
not
in
request
.
files
:
raise
NoFileUploadedError
()
if
len
(
request
.
files
)
>
1
:
raise
TooManyFilesError
()
try
:
response
=
AudioService
.
transcript
(
app_model
=
app_model
,
file
=
file
,
)
return
response
except
services
.
errors
.
conversation
.
ConversationNotExistsError
:
raise
NotFound
(
"Conversation Not Exists."
)
except
services
.
errors
.
conversation
.
ConversationCompletedError
:
raise
ConversationCompletedError
()
except
services
.
errors
.
app_model_config
.
AppModelConfigBrokenError
:
logging
.
exception
(
"App model config broken."
)
raise
AppUnavailableError
()
except
ProviderTokenNotInitError
:
raise
ProviderNotInitializeError
()
except
QuotaExceededError
:
raise
ProviderQuotaExceededError
()
except
ModelCurrentlyNotSupportError
:
raise
ProviderModelCurrentlyNotSupportError
()
except
(
LLMBadRequestError
,
LLMAPIConnectionError
,
LLMAPIUnavailableError
,
LLMRateLimitError
,
LLMAuthorizationError
)
as
e
:
raise
CompletionRequestError
(
str
(
e
))
except
ValueError
as
e
:
raise
e
except
Exception
as
e
:
logging
.
exception
(
"internal server error."
)
raise
InternalServerError
()
def
compact_response
(
response
:
Union
[
dict
|
Generator
])
->
Response
:
...
...
@@ -173,3 +217,4 @@ api.add_resource(CompletionApi, '/completion-messages')
api
.
add_resource
(
CompletionStopApi
,
'/completion-messages/<string:task_id>/stop'
)
api
.
add_resource
(
ChatApi
,
'/chat-messages'
)
api
.
add_resource
(
ChatStopApi
,
'/chat-messages/<string:task_id>/stop'
)
api
.
add_resource
(
AudioApi
,
'/audio-to-text'
)
api/services/audio_service.py
0 → 100644
View file @
dbecab70
import
openai
from
core.llm.llm_builder
import
LLMBuilder
from
core.llm.provider.llm_provider_service
import
LLMProviderService
from
models.model
import
App
from
controllers.console.datasets.error
import
FileTooLargeError
,
\
UnsupportedFileTypeError
FILE_SIZE_LIMIT
=
25
*
1024
*
1024
# 25MB
ALLOWED_EXTENSIONS
=
[
'mp3'
,
'mp4'
,
'mpeg'
,
'mpga'
,
'm4a'
,
'wav'
,
'webm'
]
class
AudioService
:
@
classmethod
def
transcript
(
cls
,
app_model
:
App
,
file
,
**
params
):
file_content
=
file
.
read
()
file_size
=
len
(
file_content
)
if
file_size
>
FILE_SIZE_LIMIT
:
message
=
"({file_size} > {FILE_SIZE_LIMIT})"
raise
FileTooLargeError
(
message
)
extension
=
file
.
filename
.
split
(
'.'
)[
-
1
]
if
extension
not
in
ALLOWED_EXTENSIONS
:
raise
UnsupportedFileTypeError
()
provider_name
=
LLMBuilder
.
get_default_provider
(
app_model
.
tenant_id
)
provider
=
LLMProviderService
(
app_model
.
tenant_id
,
provider_name
)
credentials
=
provider
.
get_credentials
(
provider_name
)
transcript
=
openai
.
Audio
.
transcribe
(
model
=
'whisper-1'
,
file
=
file
,
api_key
=
credentials
.
get
(
'openai_api_key'
),
api_base
=
credentials
.
get
(
'openai_api_base'
),
api_type
=
credentials
.
get
(
'openai_api_type'
),
api_version
=
credentials
.
get
(
'openai_api_version'
),
params
=
params
)
return
transcript
\ No newline at end of file
web/app/components/app/chat/index.tsx
View file @
dbecab70
...
...
@@ -63,6 +63,8 @@ export type IChatProps = {
controlFocus
?:
number
isShowSuggestion
?:
boolean
suggestionList
?:
string
[]
isInstalledApp
:
boolean
installedAppId
:
string
}
export
type
MessageMore
=
{
...
...
@@ -425,6 +427,8 @@ const Chat: FC<IChatProps> = ({
controlFocus
,
isShowSuggestion
,
suggestionList
,
isInstalledApp
,
installedAppId
,
})
=>
{
const
{
t
}
=
useTranslation
()
const
{
notify
}
=
useContext
(
ToastContext
)
...
...
@@ -606,7 +610,14 @@ const Chat: FC<IChatProps> = ({
)
}
</
div
>
{
voiceInputShow
&&
<
VoiceInput
onCancel=
{
()
=>
setVoiceInputShow
(
false
)
}
onConverted=
{
()
=>
setVoiceInputShow
(
false
)
}
/>
voiceInputShow
&&
(
<
VoiceInput
isInstalledApp=
{
isInstalledApp
}
installedAppId=
{
installedAppId
}
onCancel=
{
()
=>
setVoiceInputShow
(
false
)
}
onConverted=
{
text
=>
setQuery
(
text
)
}
/>
)
}
</
div
>
</
div
>
...
...
web/app/components/base/voice-input/index.tsx
View file @
dbecab70
...
...
@@ -5,13 +5,18 @@ import Recorder from 'js-audio-recorder'
import
s
from
'./index.module.css'
import
{
StopCircle
}
from
'@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import
{
Loading02
,
XClose
}
from
'@/app/components/base/icons/src/vender/line/general'
import
{
audioToText
}
from
'@/service/share'
type
VoiceInputTypes
=
{
isInstalledApp
:
boolean
installedAppId
:
string
onConverted
:
(
text
:
string
)
=>
void
onCancel
:
()
=>
void
}
const
VoiceInput
=
({
isInstalledApp
,
installedAppId
,
onCancel
,
onConverted
,
}:
VoiceInputTypes
)
=>
{
...
...
@@ -49,7 +54,7 @@ const VoiceInput = ({
}
ctx
.
closePath
()
},
[])
const
handleStopRecorder
=
useCallback
(()
=>
{
const
handleStopRecorder
=
useCallback
(
async
()
=>
{
setStartRecord
(
false
)
setStartConvert
(
true
)
recorder
.
current
.
stop
()
...
...
@@ -58,9 +63,22 @@ const VoiceInput = ({
const
canvas
=
canvasRef
.
current
!
const
ctx
=
ctxRef
.
current
!
ctx
.
clearRect
(
0
,
0
,
canvas
.
width
,
canvas
.
height
)
// const wavBlob = recorder.current.getWAVBlob()
// const wavFile = new File([wavBlob], 'audio.wav', { type: 'audio/wav' })
// onConverted('')
const
wavBlob
=
recorder
.
current
.
getWAVBlob
()
const
wavFile
=
new
File
([
wavBlob
],
'a.wav'
,
{
type
:
'audio/wav'
})
const
formData
=
new
FormData
()
formData
.
append
(
'file'
,
wavBlob
)
try
{
const
audioResponse
=
await
audioToText
(
isInstalledApp
,
installedAppId
,
formData
)
const
audioData
=
await
audioResponse
.
json
()
onConverted
(
audioData
.
text
)
}
catch
(
e
)
{
onConverted
(
''
)
}
finally
{
onCancel
()
}
},
[])
const
handleStartRecord
=
()
=>
{
setStartRecord
(
true
)
...
...
web/app/components/share/chat/index.tsx
View file @
dbecab70
...
...
@@ -617,6 +617,8 @@ const Main: FC<IMainProps> = ({
controlFocus=
{
controlFocus
}
isShowSuggestion=
{
doShowSuggestion
}
suggestionList=
{
suggestQuestions
}
isInstalledApp=
{
isInstalledApp
}
installedAppId=
{
installedAppInfo
?.
id
||
''
}
/>
</
div
>
</
div
>)
...
...
web/service/base.ts
View file @
dbecab70
...
...
@@ -35,7 +35,9 @@ export type IOnError = (msg: string) => void
type
IOtherOptions
=
{
isPublicAPI
?:
boolean
bodyStringify
?:
boolean
needAllResponseContent
?:
boolean
deleteContentType
?:
boolean
onData
?:
IOnData
// for stream
onError
?:
IOnError
onCompleted
?:
IOnCompleted
// for stream
...
...
@@ -132,7 +134,9 @@ const baseFetch = (
fetchOptions
:
any
,
{
isPublicAPI
=
false
,
bodyStringify
=
true
,
needAllResponseContent
,
deleteContentType
,
}:
IOtherOptions
,
)
=>
{
const
options
=
Object
.
assign
({},
baseOptions
,
fetchOptions
)
...
...
@@ -141,6 +145,9 @@ const baseFetch = (
options
.
headers
.
set
(
'Authorization'
,
`bearer
${
sharedToken
}
`
)
}
if
(
deleteContentType
)
options
.
headers
.
delete
(
'Content-Type'
)
const
urlPrefix
=
isPublicAPI
?
PUBLIC_API_PREFIX
:
API_PREFIX
let
urlWithPrefix
=
`
${
urlPrefix
}${
url
.
startsWith
(
'/'
)
?
url
:
`/
${
url
}
`
}
`
...
...
@@ -160,7 +167,7 @@ const baseFetch = (
delete
options
.
params
}
if
(
body
)
if
(
body
&&
bodyStringify
)
options
.
body
=
JSON
.
stringify
(
body
)
// Handle timeout
...
...
web/service/share.ts
View file @
dbecab70
...
...
@@ -114,3 +114,7 @@ export const removeMessage = (messageId: string, isInstalledApp: boolean, instal
export
const
fetchSuggestedQuestions
=
(
messageId
:
string
,
isInstalledApp
:
boolean
,
installedAppId
=
''
)
=>
{
return
(
getAction
(
'get'
,
isInstalledApp
))(
getUrl
(
`/messages/
${
messageId
}
/suggested-questions`
,
isInstalledApp
,
installedAppId
))
}
export
const
audioToText
=
(
isInstalledApp
:
boolean
,
installedAppId
:
string
,
body
:
FormData
)
=>
{
return
(
getAction
(
'post'
,
isInstalledApp
))(
getUrl
(
'/audio-to-text'
,
isInstalledApp
,
installedAppId
),
{
body
},
{
bodyStringify
:
false
,
deleteContentType
:
true
})
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment