Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dify
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ai-tech
dify
Commits
62b2bcdf
Commit
62b2bcdf
authored
Jul 12, 2023
by
StyleZhang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
convert audio wav to mp3
parent
da5782df
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
64 additions
and
27 deletions
+64
-27
audio_service.py
api/services/audio_service.py
+5
-4
audio.py
api/services/errors/audio.py
+8
-18
index.tsx
web/app/components/base/voice-input/index.tsx
+10
-4
utils.ts
web/app/components/base/voice-input/utils.ts
+38
-0
global.d.ts
web/global.d.ts
+1
-0
package.json
web/package.json
+2
-1
No files found.
api/services/audio_service.py
View file @
62b2bcdf
...
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
...
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
from
core.llm.whisper
import
Whisper
from
core.llm.whisper
import
Whisper
from
models.provider
import
ProviderName
from
models.provider
import
ProviderName
FILE_SIZE_LIMIT
=
1
*
1024
*
1024
FILE_SIZE
=
15
FILE_SIZE_LIMIT
=
FILE_SIZE
*
1024
*
1024
ALLOWED_EXTENSIONS
=
[
'mp3'
,
'mp4'
,
'mpeg'
,
'mpga'
,
'm4a'
,
'wav'
,
'webm'
]
ALLOWED_EXTENSIONS
=
[
'mp3'
,
'mp4'
,
'mpeg'
,
'mpga'
,
'm4a'
,
'wav'
,
'webm'
]
class
AudioService
:
class
AudioService
:
...
@@ -23,17 +24,17 @@ class AudioService:
...
@@ -23,17 +24,17 @@ class AudioService:
file_size
=
len
(
file_content
)
file_size
=
len
(
file_content
)
if
file_size
>
FILE_SIZE_LIMIT
:
if
file_size
>
FILE_SIZE_LIMIT
:
message
=
f
"
({file_size} > {FILE_SIZE_LIMIT})
"
message
=
f
"
Audio size larger than {FILE_SIZE} mb
"
raise
AudioTooLargeServiceError
(
message
)
raise
AudioTooLargeServiceError
(
message
)
provider_name
=
LLMBuilder
.
get_default_provider
(
tenant_id
)
provider_name
=
LLMBuilder
.
get_default_provider
(
tenant_id
)
if
provider_name
!=
ProviderName
.
OPENAI
.
value
:
if
provider_name
!=
ProviderName
.
OPENAI
.
value
:
raise
ProviderNotSupportSpeechToTextServiceError
(
'haha'
)
raise
ProviderNotSupportSpeechToTextServiceError
()
provider_service
=
LLMProviderService
(
tenant_id
,
provider_name
)
provider_service
=
LLMProviderService
(
tenant_id
,
provider_name
)
buffer
=
io
.
BytesIO
(
file_content
)
buffer
=
io
.
BytesIO
(
file_content
)
buffer
.
name
=
'temp.
wav
'
buffer
.
name
=
'temp.
mp3
'
return
Whisper
(
provider_service
.
provider
)
.
transcribe
(
buffer
)
return
Whisper
(
provider_service
.
provider
)
.
transcribe
(
buffer
)
...
...
api/services/errors/audio.py
View file @
62b2bcdf
from
services.errors.base
import
BaseServiceError
class
NoAudioUploadedServiceError
(
Exception
):
pass
class
NoAudioUploadedServiceError
(
BaseServiceError
):
error_code
=
'no_audio_uploaded'
description
=
"Please upload your audio."
code
=
400
class
AudioTooLargeServiceError
(
Exception
):
pass
class
AudioTooLargeServiceError
(
BaseServiceError
):
error_code
=
'audio_too_large'
description
=
"Audio size exceeded. {message}"
code
=
413
class
UnsupportedAudioTypeServiceError
(
Exception
):
pass
class
UnsupportedAudioTypeServiceError
(
BaseServiceError
):
class
ProviderNotSupportSpeechToTextServiceError
(
Exception
):
error_code
=
'unsupported_audio_type'
pass
description
=
"Audio type not allowed."
\ No newline at end of file
code
=
415
class
ProviderNotSupportSpeechToTextServiceError
(
BaseServiceError
):
error_code
=
'provider_not_support_speech_to_text'
description
=
"Provider not support speech to text. {message}"
code
=
400
\ No newline at end of file
web/app/components/base/voice-input/index.tsx
View file @
62b2bcdf
...
@@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
...
@@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
import
cn
from
'classnames'
import
cn
from
'classnames'
import
Recorder
from
'js-audio-recorder'
import
Recorder
from
'js-audio-recorder'
import
{
useRafInterval
}
from
'ahooks'
import
{
useRafInterval
}
from
'ahooks'
import
{
convertToMp3
}
from
'./utils'
import
s
from
'./index.module.css'
import
s
from
'./index.module.css'
import
{
StopCircle
}
from
'@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import
{
StopCircle
}
from
'@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import
{
Loading02
,
XClose
}
from
'@/app/components/base/icons/src/vender/line/general'
import
{
Loading02
,
XClose
}
from
'@/app/components/base/icons/src/vender/line/general'
...
@@ -19,7 +20,12 @@ const VoiceInput = ({
...
@@ -19,7 +20,12 @@ const VoiceInput = ({
onConverted
,
onConverted
,
}:
VoiceInputTypes
)
=>
{
}:
VoiceInputTypes
)
=>
{
const
{
t
}
=
useTranslation
()
const
{
t
}
=
useTranslation
()
const
recorder
=
useRef
(
new
Recorder
())
const
recorder
=
useRef
(
new
Recorder
({
sampleBits
:
16
,
sampleRate
:
16000
,
numChannels
:
1
,
compiling
:
false
,
}))
const
canvasRef
=
useRef
<
HTMLCanvasElement
|
null
>
(
null
)
const
canvasRef
=
useRef
<
HTMLCanvasElement
|
null
>
(
null
)
const
ctxRef
=
useRef
<
CanvasRenderingContext2D
|
null
>
(
null
)
const
ctxRef
=
useRef
<
CanvasRenderingContext2D
|
null
>
(
null
)
const
drawRecordId
=
useRef
<
number
|
null
>
(
null
)
const
drawRecordId
=
useRef
<
number
|
null
>
(
null
)
...
@@ -75,10 +81,10 @@ const VoiceInput = ({
...
@@ -75,10 +81,10 @@ const VoiceInput = ({
const
canvas
=
canvasRef
.
current
!
const
canvas
=
canvasRef
.
current
!
const
ctx
=
ctxRef
.
current
!
const
ctx
=
ctxRef
.
current
!
ctx
.
clearRect
(
0
,
0
,
canvas
.
width
,
canvas
.
height
)
ctx
.
clearRect
(
0
,
0
,
canvas
.
width
,
canvas
.
height
)
const
wavBlob
=
recorder
.
current
.
getWAVBlob
(
)
const
mp3Blob
=
convertToMp3
(
recorder
.
current
)
const
wavFile
=
new
File
([
wavBlob
],
'a.wav'
,
{
type
:
'audio/wav
'
})
const
mp3File
=
new
File
([
mp3Blob
],
'temp.mp3'
,
{
type
:
'audio/mp3
'
})
const
formData
=
new
FormData
()
const
formData
=
new
FormData
()
formData
.
append
(
'file'
,
wav
File
)
formData
.
append
(
'file'
,
mp3
File
)
let
url
=
''
let
url
=
''
let
isPublic
=
false
let
isPublic
=
false
...
...
web/app/components/base/voice-input/utils.ts
0 → 100644
View file @
62b2bcdf
import
lamejs
from
'lamejs'
export
const
convertToMp3
=
(
recorder
:
any
)
=>
{
const
wav
=
lamejs
.
WavHeader
.
readHeader
(
recorder
.
getWAV
())
const
{
channels
,
sampleRate
}
=
wav
const
mp3enc
=
new
lamejs
.
Mp3Encoder
(
channels
,
sampleRate
,
128
)
const
result
=
recorder
.
getChannelData
()
const
buffer
=
[]
const
leftData
=
result
.
left
&&
new
Int16Array
(
result
.
left
.
buffer
,
0
,
result
.
left
.
byteLength
/
2
)
const
rightData
=
result
.
right
&&
new
Int16Array
(
result
.
right
.
buffer
,
0
,
result
.
right
.
byteLength
/
2
)
const
remaining
=
leftData
.
length
+
(
rightData
?
rightData
.
length
:
0
)
const
maxSamples
=
1152
for
(
let
i
=
0
;
i
<
remaining
;
i
+=
maxSamples
)
{
const
left
=
leftData
.
subarray
(
i
,
i
+
maxSamples
)
let
right
=
null
let
mp3buf
=
null
if
(
channels
===
2
)
{
right
=
rightData
.
subarray
(
i
,
i
+
maxSamples
)
mp3buf
=
mp3enc
.
encodeBuffer
(
left
,
right
)
}
else
{
mp3buf
=
mp3enc
.
encodeBuffer
(
left
)
}
if
(
mp3buf
.
length
>
0
)
buffer
.
push
(
mp3buf
)
}
const
enc
=
mp3enc
.
flush
()
if
(
enc
.
length
>
0
)
buffer
.
push
(
enc
)
return
new
Blob
(
buffer
,
{
type
:
'audio/mp3'
})
}
web/global.d.ts
0 → 100644
View file @
62b2bcdf
declare
module
'lamejs'
;
\ No newline at end of file
web/package.json
View file @
62b2bcdf
...
@@ -81,7 +81,8 @@
...
@@ -81,7 +81,8 @@
"swr"
:
"^2.1.0"
,
"swr"
:
"^2.1.0"
,
"tailwindcss"
:
"^3.2.7"
,
"tailwindcss"
:
"^3.2.7"
,
"typescript"
:
"4.9.5"
,
"typescript"
:
"4.9.5"
,
"use-context-selector"
:
"^1.4.1"
"use-context-selector"
:
"^1.4.1"
,
"lamejs"
:
"1.2.0"
},
},
"devDependencies"
:
{
"devDependencies"
:
{
"@antfu/eslint-config"
:
"^0.36.0"
,
"@antfu/eslint-config"
:
"^0.36.0"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment