convert audio file to bytes

0b928a0a · StyleZhang · 66fa5537 · 0b928a0a · 0b928a0a · 0b928a0a
Commit 0b928a0a authored Jul 05, 2023 by StyleZhang
5 changed files
--- a/api/controllers/console/explore/completion.py
+++ b/api/controllers/console/explore/completion.py
@@ -142,7 +142,8 @@ class ChatStopApi(InstalledAppResource):
        return {'result': 'success'}, 200
    
 class AudioApi(InstalledAppResource):
-    def post(self, app_model, end_user):
+    def post(self, installed_app):
+        app_model = installed_app.app
        if app_model.mode != 'chat':
            raise NotChatAppError()
        

--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
 import openai
+import io
+from werkzeug.datastructures import FileStorage
 from core.llm.llm_builder import LLMBuilder
 from core.llm.provider.llm_provider_service import LLMProviderService
 from models.model import App
-from controllers.console.datasets.error import FileTooLargeError, \
-    UnsupportedFileTypeError
+from controllers.console.datasets.error import FileTooLargeError, UnsupportedFileTypeError

 FILE_SIZE_LIMIT = 25 * 1024 * 1024  # 25MB
 ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']

 class AudioService:
    @classmethod
-    def transcript(cls, app_model: App, file, **params):
+    def transcript(cls, app_model: App, file: FileStorage, **params):
        file_content = file.read()
        file_size = len(file_content)

        if file_size > FILE_SIZE_LIMIT:
-            message = "({file_size} > {FILE_SIZE_LIMIT})"
+            message = f"({file_size} > {FILE_SIZE_LIMIT})"
            raise FileTooLargeError(message)
        
-        extension = file.filename.split('.')[-1]
-        if extension not in ALLOWED_EXTENSIONS:
+        extension = file.mimetype
+        if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
            raise UnsupportedFileTypeError()

        provider_name = LLMBuilder.get_default_provider(app_model.tenant_id)
        provider = LLMProviderService(app_model.tenant_id, provider_name)
        credentials = provider.get_credentials(provider_name)

+        buffer = io.BytesIO(file_content)
+        buffer.name = 'temp.wav'
+
        transcript = openai.Audio.transcribe(
                        model='whisper-1', 
-                        file=file,
+                        file=buffer,
                        api_key=credentials.get('openai_api_key'),
                        api_base=credentials.get('openai_api_base'),
                        api_type=credentials.get('openai_api_type'),

--- a/web/app/components/app/chat/index.tsx
+++ b/web/app/components/app/chat/index.tsx
@@ -578,7 +578,7 @@ const Chat: FC<IChatProps> = ({
                {
                  query
                    ? (
-                      <div className='flex justify-center items-center w-8 h-8 cursor-pointer' onClick={() => setQuery('')}>
+                      <div className='flex justify-center items-center w-8 h-8 cursor-pointer hover:bg-gray-100 rounded-lg' onClick={() => setQuery('')}>
                        <XCircle className='w-4 h-4 text-[#98A2B3]' />
                      </div>
                    )

--- a/web/app/components/base/voice-input/index.tsx
+++ b/web/app/components/base/voice-input/index.tsx
@@ -26,6 +26,7 @@ const VoiceInput = ({
  const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
  const drawRecordId = useRef<number | null>(null)
  const [duration, setDuration] = useState('00:00')
+  const [originDuration, setOriginDuration] = useState(0)
  const [startRecord, setStartRecord] = useState(false)
  const [startConvert, setStartConvert] = useState(false)
  const drawRecord = useCallback(() => {
@@ -66,17 +67,15 @@ const VoiceInput = ({
    const wavBlob = recorder.current.getWAVBlob()
    const wavFile = new File([wavBlob], 'a.wav', { type: 'audio/wav' })
    const formData = new FormData()
-    formData.append('file', wavBlob)
+    formData.append('file', wavFile)

    try {
      const audioResponse = await audioToText(isInstalledApp, installedAppId, formData)
-      const audioData = await audioResponse.json()
-      onConverted(audioData.text)
+      onConverted(audioResponse.text)
+      onCancel()
    }
    catch (e) {
      onConverted('')
-    }
-    finally {
      onCancel()
    }
  }, [])
@@ -86,10 +85,9 @@ const VoiceInput = ({
    recorder.current.start()
    recorder.current.onprogress = (params) => {
      const originDuration = params.duration
-      if (originDuration > 65) {
-        console.log('stop')
+      setOriginDuration(originDuration)
+      if (originDuration >= 120)
        handleStopRecorder()
-      }
      const minutes = parseInt(`${parseInt(`${originDuration}`) / 60}`)
      const seconds = parseInt(`${originDuration}`) % 60
      setDuration(`0${minutes.toFixed(0)}:${seconds >= 10 ? seconds : `0${seconds}`}`)
@@ -158,14 +156,14 @@ const VoiceInput = ({
        {
          startConvert && (
            <div
-              className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-primary-100 rounded-lg  cursor-pointer'
+              className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-gray-200 rounded-lg  cursor-pointer'
              onClick={onCancel}
            >
              <XClose className='w-4 h-4 text-gray-500' />
            </div>
          )
        }
-        <div className='w-[45px] pl-1 text-xs font-medium text-gray-700'>{duration}</div>
+        <div className={`w-[45px] pl-1 text-xs font-medium ${originDuration > 110 ? 'text-[#F04438]' : 'text-gray-700'}`}>{duration}</div>
      </div>
    </div>
  )

--- a/web/service/share.ts
+++ b/web/service/share.ts
@@ -116,5 +116,5 @@ export const fetchSuggestedQuestions = (messageId: string, isInstalledApp: boole
 }

 export const audioToText = (isInstalledApp: boolean, installedAppId: string, body: FormData) => {
-  return (getAction('post', isInstalledApp))(getUrl('/audio-to-text', isInstalledApp, installedAppId), { body }, { bodyStringify: false, deleteContentType: true })
+  return (getAction('post', isInstalledApp))(getUrl('/audio-to-text', isInstalledApp, installedAppId), { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }>
 }