Commit 56c57963 authored by StyleZhang's avatar StyleZhang

Merge branch 'feat/chat-support-voice-input' into deploy/dev

parents 9a1bd85b 3f22fdd0
......@@ -147,3 +147,5 @@ docker/volumes/weaviate/*
sdks/python-client/build
sdks/python-client/dist
sdks/python-client/dify_client.egg-info
.vscode/
\ No newline at end of file
......@@ -3,7 +3,7 @@ import json
import logging
from typing import Generator, Union
from flask import Response, stream_with_context
from flask import Response, stream_with_context, request
from flask_login import current_user
from flask_restful import reqparse
from werkzeug.exceptions import InternalServerError, NotFound
......@@ -19,6 +19,8 @@ from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthor
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value
from services.completion_service import CompletionService
from services.completion_service import CompletionService
from controllers.console.datasets.error import NoFileUploadedError, TooManyFilesError
# define completion api for user
......@@ -138,6 +140,52 @@ class ChatStopApi(InstalledAppResource):
PubHandler.stop(current_user, task_id)
return {'result': 'success'}, 200
class AudioApi(InstalledAppResource):
def post(self, installed_app):
app_model = installed_app.app
if app_model.mode != 'chat':
raise NotChatAppError()
file = request.files['file']
# check file
if 'file' not in request.files:
raise NoFileUploadedError()
if len(request.files) > 1:
raise TooManyFilesError()
from services.audio_service import AudioService
try:
response = AudioService.transcript(
app_model=app_model,
file=file,
)
return response
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
def compact_response(response: Union[dict | Generator]) -> Response:
......@@ -178,3 +226,4 @@ api.add_resource(CompletionApi, '/installed-apps/<uuid:installed_app_id>/complet
api.add_resource(CompletionStopApi, '/installed-apps/<uuid:installed_app_id>/completion-messages/<string:task_id>/stop', endpoint='installed_app_stop_completion')
api.add_resource(ChatApi, '/installed-apps/<uuid:installed_app_id>/chat-messages', endpoint='installed_app_chat_completion')
api.add_resource(ChatStopApi, '/installed-apps/<uuid:installed_app_id>/chat-messages/<string:task_id>/stop', endpoint='installed_app_stop_chat_completion')
api.add_resource(AudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text')
......@@ -3,7 +3,7 @@ import json
import logging
from typing import Generator, Union
from flask import Response, stream_with_context
from flask import Response, stream_with_context, request
from flask_restful import reqparse
from werkzeug.exceptions import InternalServerError, NotFound
......@@ -18,7 +18,8 @@ from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthor
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value
from services.completion_service import CompletionService
from services.audio_service import AudioService
from controllers.console.datasets.error import NoFileUploadedError, TooManyFilesError
# define completion api for user
class CompletionApi(WebApiResource):
......@@ -133,6 +134,49 @@ class ChatStopApi(WebApiResource):
PubHandler.stop(end_user, task_id)
return {'result': 'success'}, 200
class AudioApi(WebApiResource):
def post(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
file = request.files['file']
# check file
if 'file' not in request.files:
raise NoFileUploadedError()
if len(request.files) > 1:
raise TooManyFilesError()
try:
response = AudioService.transcript(
app_model=app_model,
file=file,
)
return response
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
def compact_response(response: Union[dict | Generator]) -> Response:
......@@ -173,3 +217,4 @@ api.add_resource(CompletionApi, '/completion-messages')
api.add_resource(CompletionStopApi, '/completion-messages/<string:task_id>/stop')
api.add_resource(ChatApi, '/chat-messages')
api.add_resource(ChatStopApi, '/chat-messages/<string:task_id>/stop')
api.add_resource(AudioApi, '/audio-to-text')
import openai
import io
from werkzeug.datastructures import FileStorage
from core.llm.llm_builder import LLMBuilder
from core.llm.provider.llm_provider_service import LLMProviderService
from models.model import App
from controllers.console.datasets.error import FileTooLargeError, UnsupportedFileTypeError
FILE_SIZE_LIMIT = 25 * 1024 * 1024 # 25MB
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
class AudioService:
@classmethod
def transcript(cls, app_model: App, file: FileStorage, **params):
file_content = file.read()
file_size = len(file_content)
if file_size > FILE_SIZE_LIMIT:
message = f"({file_size} > {FILE_SIZE_LIMIT})"
raise FileTooLargeError(message)
extension = file.mimetype
if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
raise UnsupportedFileTypeError()
provider_name = LLMBuilder.get_default_provider(app_model.tenant_id)
provider = LLMProviderService(app_model.tenant_id, provider_name)
credentials = provider.get_credentials(provider_name)
buffer = io.BytesIO(file_content)
buffer.name = 'temp.wav'
transcript = openai.Audio.transcribe(
model='whisper-1',
file=buffer,
api_key=credentials.get('openai_api_key'),
api_base=credentials.get('openai_api_base'),
api_type=credentials.get('openai_api_type'),
api_version=credentials.get('openai_api_version'),
params=params
)
return transcript
\ No newline at end of file
......@@ -63,6 +63,8 @@ export type IChatProps = {
controlFocus?: number
isShowSuggestion?: boolean
suggestionList?: string[]
isInstalledApp: boolean
installedAppId: string
}
export type MessageMore = {
......@@ -425,6 +427,8 @@ const Chat: FC<IChatProps> = ({
controlFocus,
isShowSuggestion,
suggestionList,
isInstalledApp,
installedAppId,
}) => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
......@@ -574,7 +578,7 @@ const Chat: FC<IChatProps> = ({
{
query
? (
<div className='flex justify-center items-center w-8 h-8 cursor-pointer' onClick={() => setQuery('')}>
<div className='flex justify-center items-center w-8 h-8 cursor-pointer hover:bg-gray-100 rounded-lg' onClick={() => setQuery('')}>
<XCircle className='w-4 h-4 text-[#98A2B3]' />
</div>
)
......@@ -606,7 +610,14 @@ const Chat: FC<IChatProps> = ({
)}
</div>
{
voiceInputShow && <VoiceInput onCancel={() => setVoiceInputShow(false)} onConverted={() => setVoiceInputShow(false)} />
voiceInputShow && (
<VoiceInput
isInstalledApp={isInstalledApp}
installedAppId={installedAppId}
onCancel={() => setVoiceInputShow(false)}
onConverted={text => setQuery(text)}
/>
)
}
</div>
</div>
......
......@@ -5,13 +5,18 @@ import Recorder from 'js-audio-recorder'
import s from './index.module.css'
import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import { Loading02, XClose } from '@/app/components/base/icons/src/vender/line/general'
import { audioToText } from '@/service/share'
type VoiceInputTypes = {
isInstalledApp: boolean
installedAppId: string
onConverted: (text: string) => void
onCancel: () => void
}
const VoiceInput = ({
isInstalledApp,
installedAppId,
onCancel,
onConverted,
}: VoiceInputTypes) => {
......@@ -21,6 +26,7 @@ const VoiceInput = ({
const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
const drawRecordId = useRef<number | null>(null)
const [duration, setDuration] = useState('00:00')
const [originDuration, setOriginDuration] = useState(0)
const [startRecord, setStartRecord] = useState(false)
const [startConvert, setStartConvert] = useState(false)
const drawRecord = useCallback(() => {
......@@ -49,7 +55,7 @@ const VoiceInput = ({
}
ctx.closePath()
}, [])
const handleStopRecorder = useCallback(() => {
const handleStopRecorder = useCallback(async () => {
setStartRecord(false)
setStartConvert(true)
recorder.current.stop()
......@@ -58,9 +64,20 @@ const VoiceInput = ({
const canvas = canvasRef.current!
const ctx = ctxRef.current!
ctx.clearRect(0, 0, canvas.width, canvas.height)
// const wavBlob = recorder.current.getWAVBlob()
// const wavFile = new File([wavBlob], 'audio.wav', { type: 'audio/wav' })
// onConverted('')
const wavBlob = recorder.current.getWAVBlob()
const wavFile = new File([wavBlob], 'a.wav', { type: 'audio/wav' })
const formData = new FormData()
formData.append('file', wavFile)
try {
const audioResponse = await audioToText(isInstalledApp, installedAppId, formData)
onConverted(audioResponse.text)
onCancel()
}
catch (e) {
onConverted('')
onCancel()
}
}, [])
const handleStartRecord = () => {
setStartRecord(true)
......@@ -68,10 +85,9 @@ const VoiceInput = ({
recorder.current.start()
recorder.current.onprogress = (params) => {
const originDuration = params.duration
if (originDuration > 65) {
console.log('stop')
setOriginDuration(originDuration)
if (originDuration >= 120)
handleStopRecorder()
}
const minutes = parseInt(`${parseInt(`${originDuration}`) / 60}`)
const seconds = parseInt(`${originDuration}`) % 60
setDuration(`0${minutes.toFixed(0)}:${seconds >= 10 ? seconds : `0${seconds}`}`)
......@@ -140,14 +156,14 @@ const VoiceInput = ({
{
startConvert && (
<div
className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-primary-100 rounded-lg cursor-pointer'
className='flex justify-center items-center mr-1 w-8 h-8 hover:bg-gray-200 rounded-lg cursor-pointer'
onClick={onCancel}
>
<XClose className='w-4 h-4 text-gray-500' />
</div>
)
}
<div className='w-[45px] pl-1 text-xs font-medium text-gray-700'>{duration}</div>
<div className={`w-[45px] pl-1 text-xs font-medium ${originDuration > 110 ? 'text-[#F04438]' : 'text-gray-700'}`}>{duration}</div>
</div>
</div>
)
......
......@@ -620,6 +620,8 @@ const Main: FC<IMainProps> = ({
controlFocus={controlFocus}
isShowSuggestion={doShowSuggestion}
suggestionList={suggestQuestions}
isInstalledApp={isInstalledApp}
installedAppId={installedAppInfo?.id || ''}
/>
</div>
</div>)
......
......@@ -35,7 +35,9 @@ export type IOnError = (msg: string) => void
type IOtherOptions = {
isPublicAPI?: boolean
bodyStringify?: boolean
needAllResponseContent?: boolean
deleteContentType?: boolean
onData?: IOnData // for stream
onError?: IOnError
onCompleted?: IOnCompleted // for stream
......@@ -140,7 +142,9 @@ const baseFetch = (
fetchOptions: any,
{
isPublicAPI = false,
bodyStringify = true,
needAllResponseContent,
deleteContentType,
}: IOtherOptions,
) => {
const options = Object.assign({}, baseOptions, fetchOptions)
......@@ -149,6 +153,9 @@ const baseFetch = (
options.headers.set('Authorization', `bearer ${sharedToken}`)
}
if (deleteContentType)
options.headers.delete('Content-Type')
const urlPrefix = isPublicAPI ? PUBLIC_API_PREFIX : API_PREFIX
let urlWithPrefix = `${urlPrefix}${url.startsWith('/') ? url : `/${url}`}`
......@@ -168,7 +175,7 @@ const baseFetch = (
delete options.params
}
if (body)
if (body && bodyStringify)
options.body = JSON.stringify(body)
// Handle timeout
......
......@@ -114,3 +114,7 @@ export const removeMessage = (messageId: string, isInstalledApp: boolean, instal
export const fetchSuggestedQuestions = (messageId: string, isInstalledApp: boolean, installedAppId = '') => {
return (getAction('get', isInstalledApp))(getUrl(`/messages/${messageId}/suggested-questions`, isInstalledApp, installedAppId))
}
export const audioToText = (isInstalledApp: boolean, installedAppId: string, body: FormData) => {
return (getAction('post', isInstalledApp))(getUrl('/audio-to-text', isInstalledApp, installedAppId), { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }>
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment