{
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Text to convert to speech. Maximum 5000 characters. Use <#x#> between words to control pause duration (0.01-99.99s)"
},
"voice_id": {
"type": "string",
"enum": [
"Wise_Woman",
"Friendly_Person",
"Inspirational_girl",
"Deep_Voice_Man",
"Calm_Woman",
"Casual_Guy",
"Lively_Girl",
"Patient_Man",
"Young_Knight",
"Determined_Man",
"Lovely_Girl",
"Decent_Boy",
"Imposing_Manner",
"Elegant_Man",
"Abbess",
"Sweet_Girl_2",
"Exuberant_Girl"
],
"default": "Wise_Woman",
"description": "Voice ID for text-to-speech generation"
},
"pitch": {
"type": "number",
"default": 0,
"description": "Speech pitch (-12 to 12, default: 0)"
},
"speed": {
"type": "number",
"default": 1,
"description": "Speech speed multiplier (0.5 to 2, default: 1)"
},
"volume": {
"type": "number",
"default": 1,
"description": "Speech volume level (0 to 10, default: 1)"
},
"emotion": {
"type": "string",
"enum": [
"auto",
"neutral",
"happy",
"sad",
"angry",
"fearful",
"disgusted",
"surprised"
],
"default": "auto",
"description": "Emotion to apply to speech (default: auto)"
},
"sample_rate": {
"type": "number",
"default": 32000,
"description": "Audio sample rate (default: 32000)"
},
"language_boost": {
"type": "string",
"enum": [
"None",
"Automatic",
"Chinese",
"Chinese,Yue",
"English",
"Arabic",
"Russian",
"Spanish",
"French",
"Portuguese",
"German",
"Turkish",
"Dutch",
"Ukrainian",
"Vietnamese",
"Indonesian",
"Japanese",
"Italian",
"Korean",
"Thai",
"Polish",
"Romanian",
"Greek",
"Czech",
"Finnish",
"Hindi"
],
"default": "None",
"description": "Language enhancement for better pronunciation"
}
},
"required": [
"text"
]
}