Commit 0b928a0a authored by StyleZhang's avatar StyleZhang

convert audio file to bytes

parent 66fa5537
......@@ -142,7 +142,8 @@ class ChatStopApi(InstalledAppResource):
return {'result': 'success'}, 200
class AudioApi(InstalledAppResource):
def post(self, app_model, end_user):
def post(self, installed_app):
app_model = installed_app.app
if app_model.mode != 'chat':
raise NotChatAppError()
......
import openai
import io
from werkzeug.datastructures import FileStorage
from core.llm.llm_builder import LLMBuilder
from core.llm.provider.llm_provider_service import LLMProviderService
from models.model import App
from controllers.console.datasets.error import FileTooLargeError, \
UnsupportedFileTypeError
from controllers.console.datasets.error import FileTooLargeError, UnsupportedFileTypeError
FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25MB
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
class AudioService:
@classmethod
def transcript(cls, app_model: App, file, **params):
def transcript(cls, app_model: App, file: FileStorage, **params):
file_content = file.read()
file_size = len(file_content)
if file_size > FILE_SIZE_LIMIT:
message = "({file_size} > {FILE_SIZE_LIMIT})"
message = f"({file_size} > {FILE_SIZE_LIMIT})"
raise FileTooLargeError(message)
extension = file.filename.split('.')[-1]
if extension not in ALLOWED_EXTENSIONS:
extension = file.mimetype
if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
raise UnsupportedFileTypeError()
provider_name = LLMBuilder.get_default_provider(app_model.tenant_id)
provider = LLMProviderService(app_model.tenant_id, provider_name)
credentials = provider.get_credentials(provider_name)
buffer = io.BytesIO(file_content)
buffer.name = 'temp.wav'
transcript = openai.Audio.transcribe(
model='whisper-1',
file=file,
file=buffer,
api_key=credentials.get('openai_api_key'),
api_base=credentials.get('openai_api_base'),
api_type=credentials.get('openai_api_type'),
......
......@@ -578,7 +578,7 @@ const Chat: FC<IChatProps> = ({
{
query
? (
<div className='flex justify-center items-center w-8 h-8 cursor-pointer' onClick={() => setQuery('')}>
<div className='flex justify-center items-center w-8 h-8 cursor-pointer hover:bg-gray-100 rounded-lg' onClick={() => setQuery('')}>
<XCircle className='w-4 h-4 text-[#98A2B3]' />
</div>
)
......
......@@ -26,6 +26,7 @@ const VoiceInput = ({
const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
const drawRecordId = useRef<number | null>(null)
const [duration, setDuration] = useState('00:00')
const [originDuration, setOriginDuration] = useState(0)
const [startRecord, setStartRecord] = useState(false)
const [startConvert, setStartConvert] = useState(false)
const drawRecord = useCallback(() => {
......@@ -66,17 +67,15 @@ const VoiceInput = ({
const wavBlob = recorder.current.getWAVBlob()
const wavFile = new File([wavBlob], 'a.wav', { type: 'audio/wav' })
const formData = new FormData()
formData.append('file', wavBlob)
formData.append('file', wavFile)
try {
const audioResponse = await audioToText(isInstalledApp, installedAppId, formData)
const audioData = await audioResponse.json()
onConverted(audioData.text)
onConverted(audioResponse.text)
onCancel()
}
catch (e) {
onConverted('')
}
finally {
onCancel()
}
}, [])
......@@ -86,10 +85,9 @@ const VoiceInput = ({
recorder.current.start()
recorder.current.onprogress = (params) => {
const originDuration = params.duration
if (originDuration > 65) {
console.log('stop')
setOriginDuration(originDuration)
if (originDuration >= 120)
handleStopRecorder()
}
const minutes = parseInt(`${parseInt(`${originDuration}`) / 60}`)
const seconds = parseInt(`${originDuration}`) % 60
setDuration(`0${minutes.toFixed(0)}:${seconds >= 10 ? seconds : `0${seconds}`}`)
......@@ -158,14 +156,14 @@ const VoiceInput = ({
{
startConvert && (
<div
className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-primary-100 rounded-lg cursor-pointer'
className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-gray-200 rounded-lg cursor-pointer'
onClick={onCancel}
>
<XClose className='w-4 h-4 text-gray-500' />
</div>
)
}
<div className='w-[45px] pl-1 text-xs font-medium text-gray-700'>{duration}</div>
<div className={`w-[45px] pl-1 text-xs font-medium ${originDuration > 110 ? 'text-[#F04438]' : 'text-gray-700'}`}>{duration}</div>
</div>
</div>
)
......
......@@ -116,5 +116,5 @@ export const fetchSuggestedQuestions = (messageId: string, isInstalledApp: boole
}
export const audioToText = (isInstalledApp: boolean, installedAppId: string, body: FormData) => {
return (getAction('post', isInstalledApp))(getUrl('/audio-to-text', isInstalledApp, installedAppId), { body }, { bodyStringify: false, deleteContentType: true })
return (getAction('post', isInstalledApp))(getUrl('/audio-to-text', isInstalledApp, installedAppId), { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }>
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment