Unverified Commit c6e2900b authored by Charlie.Wei's avatar Charlie.Wei Committed by GitHub

Display selected tts voice name (#2459)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: 's avatarcrazywoola <427733928@qq.com>
Co-authored-by: 's avatarcrazywoola <100913391+crazywoola@users.noreply.github.com>
parent 963d9b60
...@@ -53,7 +53,7 @@ class TTSModel(AIModel): ...@@ -53,7 +53,7 @@ class TTSModel(AIModel):
""" """
raise NotImplementedError raise NotImplementedError
def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list: def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
""" """
Get voice for given tts model voices Get voice for given tts model voices
...@@ -66,7 +66,10 @@ class TTSModel(AIModel): ...@@ -66,7 +66,10 @@ class TTSModel(AIModel):
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties: if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
voices = model_schema.model_properties[ModelPropertyKey.VOICES] voices = model_schema.model_properties[ModelPropertyKey.VOICES]
if language:
return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')] return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
else:
return [{'name': d['name'], 'value': d['mode']} for d in voices]
def _get_model_default_voice(self, model: str, credentials: dict) -> any: def _get_model_default_voice(self, model: str, credentials: dict) -> any:
""" """
......
...@@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): ...@@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
""" """
# transform credentials to kwargs for model instance # transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
if not voice: if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
voice = self._get_model_default_voice(model, credentials) voice = self._get_model_default_voice(model, credentials)
word_limit = self._get_model_word_limit(model, credentials) word_limit = self._get_model_word_limit(model, credentials)
audio_type = self._get_model_audio_type(model, credentials) audio_type = self._get_model_audio_type(model, credentials)
......
...@@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): ...@@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
:return: text translated to audio file :return: text translated to audio file
""" """
audio_type = self._get_model_audio_type(model, credentials) audio_type = self._get_model_audio_type(model, credentials)
if not voice: if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
voice = self._get_model_default_voice(model, credentials) voice = self._get_model_default_voice(model, credentials)
if streaming: if streaming:
return Response(stream_with_context(self._tts_invoke_streaming(model=model, return Response(stream_with_context(self._tts_invoke_streaming(model=model,
......
...@@ -3,7 +3,7 @@ import type { FC } from 'react' ...@@ -3,7 +3,7 @@ import type { FC } from 'react'
import { memo, useState } from 'react' import { memo, useState } from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import cn from 'classnames' import cn from 'classnames'
import ParamConfigContent from './param-config-content' import VoiceParamConfig from './param-config-content'
import { Settings01 } from '@/app/components/base/icons/src/vender/line/general' import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
import { import {
PortalToFollowElem, PortalToFollowElem,
...@@ -27,12 +27,12 @@ const ParamsConfig: FC = () => { ...@@ -27,12 +27,12 @@ const ParamsConfig: FC = () => {
<PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}> <PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}>
<div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}> <div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}>
<Settings01 className='w-3.5 h-3.5 ' /> <Settings01 className='w-3.5 h-3.5 ' />
<div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.vision.settings')}</div> <div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.voice.settings')}</div>
</div> </div>
</PortalToFollowElemTrigger> </PortalToFollowElemTrigger>
<PortalToFollowElemContent style={{ zIndex: 50 }}> <PortalToFollowElemContent style={{ zIndex: 50 }}>
<div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'> <div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'>
<ParamConfigContent /> <VoiceParamConfig />
</div> </div>
</PortalToFollowElemContent> </PortalToFollowElemContent>
</PortalToFollowElem> </PortalToFollowElem>
......
...@@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => { ...@@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => {
const languageItem = languages.find(item => item.value === textToSpeechConfig.language) const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select') const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data const language = languageItem?.value
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select') const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
......
'use client' 'use client'
import useSWR from 'swr'
import React, { type FC } from 'react' import React, { type FC } from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector' import { useContext } from 'use-context-selector'
import { usePathname } from 'next/navigation'
import Panel from '@/app/components/app/configuration/base/feature-panel' import Panel from '@/app/components/app/configuration/base/feature-panel'
import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import ConfigContext from '@/context/debug-configuration' import ConfigContext from '@/context/debug-configuration'
import { languages } from '@/utils/language' import { languages } from '@/utils/language'
import { fetchAppVoices } from '@/service/apps'
const TextToSpeech: FC = () => { const TextToSpeech: FC = () => {
const { t } = useTranslation() const { t } = useTranslation()
const { const {
textToSpeechConfig, textToSpeechConfig,
} = useContext(ConfigContext) } = useContext(ConfigContext)
const pathname = usePathname()
const matched = pathname.match(/\/app\/([^/]+)/)
const appId = (matched?.length && matched[1]) ? matched[1] : ''
const language = textToSpeechConfig.language
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
return ( return (
<Panel <Panel
title={ title={
...@@ -22,7 +32,7 @@ const TextToSpeech: FC = () => { ...@@ -22,7 +32,7 @@ const TextToSpeech: FC = () => {
headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />} headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
headerRight={ headerRight={
<div className='text-xs text-gray-500'> <div className='text-xs text-gray-500'>
{languages.find(i => i.value === textToSpeechConfig.language)?.name} {textToSpeechConfig.voice} {languages.find(i => i.value === textToSpeechConfig.language)?.name} - {voiceItem?.name ?? t('appDebug.voice.defaultDisplay')}
</div> </div>
} }
noBodySpacing noBodySpacing
......
...@@ -300,6 +300,7 @@ const translation = { ...@@ -300,6 +300,7 @@ const translation = {
}, },
voice: { voice: {
name: 'Voice', name: 'Voice',
defaultDisplay: 'Default Voice',
description: 'Text to speech voice Settings', description: 'Text to speech voice Settings',
settings: 'Settings', settings: 'Settings',
voiceSettings: { voiceSettings: {
......
...@@ -300,6 +300,7 @@ const translation = { ...@@ -300,6 +300,7 @@ const translation = {
}, },
voice: { voice: {
name: 'voz', name: 'voz',
defaultDisplay: 'Voz padrão',
description: 'Texto para configurações de timbre de voz', description: 'Texto para configurações de timbre de voz',
settings: 'As configurações', settings: 'As configurações',
voiceSettings: { voiceSettings: {
......
...@@ -296,6 +296,7 @@ const translation = { ...@@ -296,6 +296,7 @@ const translation = {
}, },
voice: { voice: {
name: '音色', name: '音色',
defaultDisplay: '缺省音色',
description: '文本转语音音色设置', description: '文本转语音音色设置',
settings: '设置', settings: '设置',
voiceSettings: { voiceSettings: {
......
...@@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u ...@@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u
return post<GenerationIntroductionResponse>(url, { body }) return post<GenerationIntroductionResponse>(url, { body })
} }
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { url: string }> = ({ url }) => { export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
return get<AppVoicesListResponse>(url) return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment