Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
397a92f2
Unverified
Commit
397a92f2
authored
Jul 12, 2023
by
zxhlyh
Committed by
GitHub
Jul 12, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
convert audio wav to mp3 (#552)
parent
b91e2260
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
64 additions
and
27 deletions
+64
-27
audio_service.py
api/services/audio_service.py
+5
-4
audio.py
api/services/errors/audio.py
+8
-18
index.tsx
web/app/components/base/voice-input/index.tsx
+10
-4
utils.ts
web/app/components/base/voice-input/utils.ts
+38
-0
global.d.ts
web/global.d.ts
+1
-0
package.json
web/package.json
+2
-1
No files found.
api/services/audio_service.py
View file @
397a92f2
...
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
...
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
from
core.llm.whisper
import
Whisper
from
core.llm.whisper
import
Whisper
from
models.provider
import
ProviderName
from
models.provider
import
ProviderName
FILE_SIZE_LIMIT
=
1
*
1024
*
1024
FILE_SIZE
=
15
FILE_SIZE_LIMIT
=
FILE_SIZE
*
1024
*
1024
ALLOWED_EXTENSIONS
=
[
'mp3'
,
'mp4'
,
'mpeg'
,
'mpga'
,
'm4a'
,
'wav'
,
'webm'
]
ALLOWED_EXTENSIONS
=
[
'mp3'
,
'mp4'
,
'mpeg'
,
'mpga'
,
'm4a'
,
'wav'
,
'webm'
]
class
AudioService
:
class
AudioService
:
...
@@ -23,17 +24,17 @@ class AudioService:
...
@@ -23,17 +24,17 @@ class AudioService:
file_size
=
len
(
file_content
)
file_size
=
len
(
file_content
)
if
file_size
>
FILE_SIZE_LIMIT
:
if
file_size
>
FILE_SIZE_LIMIT
:
message
=
f
"
({file_size} > {FILE_SIZE_LIMIT})
"
message
=
f
"
Audio size larger than {FILE_SIZE} mb
"
raise
AudioTooLargeServiceError
(
message
)
raise
AudioTooLargeServiceError
(
message
)
provider_name
=
LLMBuilder
.
get_default_provider
(
tenant_id
)
provider_name
=
LLMBuilder
.
get_default_provider
(
tenant_id
)
if
provider_name
!=
ProviderName
.
OPENAI
.
value
:
if
provider_name
!=
ProviderName
.
OPENAI
.
value
:
raise
ProviderNotSupportSpeechToTextServiceError
(
'haha'
)
raise
ProviderNotSupportSpeechToTextServiceError
()
provider_service
=
LLMProviderService
(
tenant_id
,
provider_name
)
provider_service
=
LLMProviderService
(
tenant_id
,
provider_name
)
buffer
=
io
.
BytesIO
(
file_content
)
buffer
=
io
.
BytesIO
(
file_content
)
buffer
.
name
=
'temp.
wav
'
buffer
.
name
=
'temp.
mp3
'
return
Whisper
(
provider_service
.
provider
)
.
transcribe
(
buffer
)
return
Whisper
(
provider_service
.
provider
)
.
transcribe
(
buffer
)
...
...
api/services/errors/audio.py
View file @
397a92f2
from
services.errors.base
import
BaseServiceError
class
NoAudioUploadedServiceError
(
Exception
):
pass
class
NoAudioUploadedServiceError
(
BaseServiceError
):
error_code
=
'no_audio_uploaded'
description
=
"Please upload your audio."
code
=
400
class
AudioTooLargeServiceError
(
Exception
):
pass
class
AudioTooLargeServiceError
(
BaseServiceError
):
error_code
=
'audio_too_large'
description
=
"Audio size exceeded. {message}"
code
=
413
class
UnsupportedAudioTypeServiceError
(
Exception
):
pass
class
UnsupportedAudioTypeServiceError
(
BaseServiceError
):
class
ProviderNotSupportSpeechToTextServiceError
(
Exception
):
error_code
=
'unsupported_audio_type'
pass
description
=
"Audio type not allowed."
\ No newline at end of file
code
=
415
class
ProviderNotSupportSpeechToTextServiceError
(
BaseServiceError
):
error_code
=
'provider_not_support_speech_to_text'
description
=
"Provider not support speech to text. {message}"
code
=
400
\ No newline at end of file
web/app/components/base/voice-input/index.tsx
View file @
397a92f2
...
@@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
...
@@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
import
cn
from
'classnames'
import
cn
from
'classnames'
import
Recorder
from
'js-audio-recorder'
import
Recorder
from
'js-audio-recorder'
import
{
useRafInterval
}
from
'ahooks'
import
{
useRafInterval
}
from
'ahooks'
import
{
convertToMp3
}
from
'./utils'
import
s
from
'./index.module.css'
import
s
from
'./index.module.css'
import
{
StopCircle
}
from
'@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import
{
StopCircle
}
from
'@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import
{
Loading02
,
XClose
}
from
'@/app/components/base/icons/src/vender/line/general'
import
{
Loading02
,
XClose
}
from
'@/app/components/base/icons/src/vender/line/general'
...
@@ -19,7 +20,12 @@ const VoiceInput = ({
...
@@ -19,7 +20,12 @@ const VoiceInput = ({
onConverted
,
onConverted
,
}:
VoiceInputTypes
)
=>
{
}:
VoiceInputTypes
)
=>
{
const
{
t
}
=
useTranslation
()
const
{
t
}
=
useTranslation
()
const
recorder
=
useRef
(
new
Recorder
())
const
recorder
=
useRef
(
new
Recorder
({
sampleBits
:
16
,
sampleRate
:
16000
,
numChannels
:
1
,
compiling
:
false
,
}))
const
canvasRef
=
useRef
<
HTMLCanvasElement
|
null
>
(
null
)
const
canvasRef
=
useRef
<
HTMLCanvasElement
|
null
>
(
null
)
const
ctxRef
=
useRef
<
CanvasRenderingContext2D
|
null
>
(
null
)
const
ctxRef
=
useRef
<
CanvasRenderingContext2D
|
null
>
(
null
)
const
drawRecordId
=
useRef
<
number
|
null
>
(
null
)
const
drawRecordId
=
useRef
<
number
|
null
>
(
null
)
...
@@ -75,10 +81,10 @@ const VoiceInput = ({
...
@@ -75,10 +81,10 @@ const VoiceInput = ({
const
canvas
=
canvasRef
.
current
!
const
canvas
=
canvasRef
.
current
!
const
ctx
=
ctxRef
.
current
!
const
ctx
=
ctxRef
.
current
!
ctx
.
clearRect
(
0
,
0
,
canvas
.
width
,
canvas
.
height
)
ctx
.
clearRect
(
0
,
0
,
canvas
.
width
,
canvas
.
height
)
const
wavBlob
=
recorder
.
current
.
getWAVBlob
(
)
const
mp3Blob
=
convertToMp3
(
recorder
.
current
)
const
wavFile
=
new
File
([
wavBlob
],
'a.wav'
,
{
type
:
'audio/wav
'
})
const
mp3File
=
new
File
([
mp3Blob
],
'temp.mp3'
,
{
type
:
'audio/mp3
'
})
const
formData
=
new
FormData
()
const
formData
=
new
FormData
()
formData
.
append
(
'file'
,
wav
File
)
formData
.
append
(
'file'
,
mp3
File
)
let
url
=
''
let
url
=
''
let
isPublic
=
false
let
isPublic
=
false
...
...
web/app/components/base/voice-input/utils.ts
0 → 100644
View file @
397a92f2
import
lamejs
from
'lamejs'
export
const
convertToMp3
=
(
recorder
:
any
)
=>
{
const
wav
=
lamejs
.
WavHeader
.
readHeader
(
recorder
.
getWAV
())
const
{
channels
,
sampleRate
}
=
wav
const
mp3enc
=
new
lamejs
.
Mp3Encoder
(
channels
,
sampleRate
,
128
)
const
result
=
recorder
.
getChannelData
()
const
buffer
=
[]
const
leftData
=
result
.
left
&&
new
Int16Array
(
result
.
left
.
buffer
,
0
,
result
.
left
.
byteLength
/
2
)
const
rightData
=
result
.
right
&&
new
Int16Array
(
result
.
right
.
buffer
,
0
,
result
.
right
.
byteLength
/
2
)
const
remaining
=
leftData
.
length
+
(
rightData
?
rightData
.
length
:
0
)
const
maxSamples
=
1152
for
(
let
i
=
0
;
i
<
remaining
;
i
+=
maxSamples
)
{
const
left
=
leftData
.
subarray
(
i
,
i
+
maxSamples
)
let
right
=
null
let
mp3buf
=
null
if
(
channels
===
2
)
{
right
=
rightData
.
subarray
(
i
,
i
+
maxSamples
)
mp3buf
=
mp3enc
.
encodeBuffer
(
left
,
right
)
}
else
{
mp3buf
=
mp3enc
.
encodeBuffer
(
left
)
}
if
(
mp3buf
.
length
>
0
)
buffer
.
push
(
mp3buf
)
}
const
enc
=
mp3enc
.
flush
()
if
(
enc
.
length
>
0
)
buffer
.
push
(
enc
)
return
new
Blob
(
buffer
,
{
type
:
'audio/mp3'
})
}
web/global.d.ts
0 → 100644
View file @
397a92f2
declare
module
'lamejs'
;
\ No newline at end of file
web/package.json
View file @
397a92f2
...
@@ -81,7 +81,8 @@
...
@@ -81,7 +81,8 @@
"swr"
:
"^2.1.0"
,
"swr"
:
"^2.1.0"
,
"tailwindcss"
:
"^3.2.7"
,
"tailwindcss"
:
"^3.2.7"
,
"typescript"
:
"4.9.5"
,
"typescript"
:
"4.9.5"
,
"use-context-selector"
:
"^1.4.1"
"use-context-selector"
:
"^1.4.1"
,
"lamejs"
:
"1.2.0"
},
},
"devDependencies"
:
{
"devDependencies"
:
{
"@antfu/eslint-config"
:
"^0.36.0"
,
"@antfu/eslint-config"
:
"^0.36.0"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment