diff --git a/fern/products/platform/pages/calling/voice/TTS/azure.mdx b/fern/products/platform/pages/calling/voice/TTS/azure.mdx index 6ad66c81db..cb359ca0ef 100644 --- a/fern/products/platform/pages/calling/voice/TTS/azure.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/azure.mdx @@ -1,6 +1,7 @@ --- id: 3d4a77e2-2277-4165-916c-21e9db487c31 title: Azure +subtitle: Text-to-speech description: Learn how to use Azure TTS voices on the SignalWire platform. slug: /voice/tts/azure --- diff --git a/fern/products/platform/pages/calling/voice/TTS/cartesia.mdx b/fern/products/platform/pages/calling/voice/TTS/cartesia.mdx index 78502a2484..415e6a2406 100644 --- a/fern/products/platform/pages/calling/voice/TTS/cartesia.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/cartesia.mdx @@ -1,6 +1,7 @@ --- id: 2b73a251-1dda-48f8-a437-08e076279e7f title: Cartesia +subtitle: Text-to-speech description: Learn how to use Cartesia TTS voices on the SignalWire platform. slug: /voice/tts/cartesia --- diff --git a/fern/products/platform/pages/calling/voice/TTS/deepgram.mdx b/fern/products/platform/pages/calling/voice/TTS/deepgram.mdx index 39d162445a..ffd214572c 100644 --- a/fern/products/platform/pages/calling/voice/TTS/deepgram.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/deepgram.mdx @@ -1,6 +1,7 @@ --- id: c2a4f703-3d37-4239-9c5c-a7a3498dd58d title: Deepgram +subtitle: Text-to-speech description: Learn how to use Deepgram TTS voices on the SignalWire platform. slug: /voice/tts/deepgram --- diff --git a/fern/products/platform/pages/calling/voice/TTS/elevenlabs.mdx b/fern/products/platform/pages/calling/voice/TTS/elevenlabs.mdx index d0c4a493d7..4dc43c2b1a 100644 --- a/fern/products/platform/pages/calling/voice/TTS/elevenlabs.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/elevenlabs.mdx @@ -1,6 +1,7 @@ --- id: 042baf57-35a8-47ce-93cb-8387cacff266 title: ElevenLabs +subtitle: Text-to-speech description: Learn how to use ElevenLabs TTS voices on the SignalWire platform. slug: /voice/tts/elevenlabs --- diff --git a/fern/products/platform/pages/calling/voice/TTS/google.mdx b/fern/products/platform/pages/calling/voice/TTS/google.mdx index 88e51b8ac8..2de55a4b17 100644 --- a/fern/products/platform/pages/calling/voice/TTS/google.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/google.mdx @@ -1,6 +1,7 @@ --- id: 2724fea7-d6f8-44e3-b397-a3940ac4e663 title: Google Cloud +subtitle: Text-to-speech description: Learn how to use Google Cloud TTS voices on the SignalWire platform. slug: /voice/tts/gcloud --- diff --git a/fern/products/platform/pages/calling/voice/TTS/index.mdx b/fern/products/platform/pages/calling/voice/TTS/index.mdx index 41a6549e98..975a7a893a 100644 --- a/fern/products/platform/pages/calling/voice/TTS/index.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/index.mdx @@ -18,6 +18,8 @@ description: Detailed list of all the TTS providers and voices SignalWire suppor [deepgram]: /docs/platform/voice/tts/deepgram [elevenlabs]: /docs/platform/voice/tts/elevenlabs [gcloud]: /docs/platform/voice/tts/gcloud +[inworld]: /docs/platform/voice/tts/inworld +[minimax]: /docs/platform/voice/tts/minimax [openai]: /docs/platform/voice/tts/openai [rime]: /docs/platform/voice/tts/rime @@ -43,6 +45,10 @@ English, Spanish, French, German, and Hindi (Arcana model only). English, Spanish, German, French, Dutch, Italian, and Japanese. - [Amazon Polly][polly], [Azure][azure], [Cartesia][cartesia], and [Google Cloud][gcloud] offer a wide range of supported languages. +- [Inworld][inworld] voices support English plus Arabic, Chinese (Mandarin), Dutch, French, +German, Hebrew, Hindi, Italian, Japanese, Korean, Polish, Portuguese, Russian, and Spanish. +- [MiniMax][minimax] voices span more than 20 languages, including English, Spanish, Portuguese, +French, German, Italian, Chinese, Japanese, Korean, and many more, with automatic language detection. - All [ElevenLabs][elevenlabs] and [OpenAI][openai] voices are fully multilingual. **SSML support:** Google Cloud and Amazon Polly support @@ -78,16 +84,18 @@ GCLOUD.EN-US-NEURAL2-A For detailed instructions for each provider, consult the voice ID references linked in the **Usage** column of the below table. -| TTS provider | Engine code | Sample voice ID string | Usage | -| :-------------- | :----------- | :---------------------------------------------- | ---------------------------------------------- | -| Amazon Polly | `amazon` | `amazon.Joanna-Neural` | [Reference](/docs/platform/voice/tts/amazon-polly#usage) | -| Azure | `azure` | `en-US-AvaNeural` | [Reference](/docs/platform/voice/tts/azure#usage) | -| Cartesia | `cartesia` | `cartesia.a167e0f3-df7e-4d52-a9c3-f949145efdab` | [Reference](/docs/platform/voice/tts/cartesia#usage) | -| Deepgram | `deepgram` | `deepgram.aura-asteria-en` | [Reference](/docs/platform/voice/tts/deepgram#usage) | -| ElevenLabs | `elevenlabs` | `elevenlabs.thomas` | [Reference](/docs/platform/voice/tts/elevenlabs#usage) | -| Google Cloud | `gcloud` | `gcloud.en-US-Casual-K` | [Reference](/docs/platform/voice/tts/gcloud#usage) | -| OpenAI | `openai` | `openai.alloy` | [Reference](/docs/platform/voice/tts/openai#usage) | -| Rime | `rime` | `rime.luna:arcana` | [Reference](/docs/platform/voice/tts/rime#voices) | +| TTS provider | Sample voice ID string | Usage | +| :-------------- | :---------------------------------------------- | ---------------------------------------------- | +| Amazon Polly | `amazon.Joanna-Neural` | [Reference](/docs/platform/voice/tts/amazon-polly#usage) | +| Azure | `azure.en-US-AvaNeural` | [Reference](/docs/platform/voice/tts/azure#usage) | +| Cartesia | `cartesia.a167e0f3-df7e-4d52-a9c3-f949145efdab` | [Reference](/docs/platform/voice/tts/cartesia#usage) | +| Deepgram | `deepgram.aura-asteria-en` | [Reference](/docs/platform/voice/tts/deepgram#usage) | +| ElevenLabs | `elevenlabs.thomas` | [Reference](/docs/platform/voice/tts/elevenlabs#usage) | +| Google Cloud | `gcloud.en-US-Casual-K` | [Reference](/docs/platform/voice/tts/gcloud#usage) | +| Inworld | `inworld.Lauren:inworld-tts-1.5-mini` | [Reference](/docs/platform/voice/tts/inworld#voices) | +| MiniMax | `minimax.English_CalmWoman:speech-2.6-turbo` | [Reference](/docs/platform/voice/tts/minimax#voices) | +| OpenAI | `openai.alloy` | [Reference](/docs/platform/voice/tts/openai#usage) | +| Rime | `rime.luna:arcana` | [Reference](/docs/platform/voice/tts/rime#voices) |
diff --git a/fern/products/platform/pages/calling/voice/TTS/inworld.mdx b/fern/products/platform/pages/calling/voice/TTS/inworld.mdx new file mode 100644 index 0000000000..10800bb434 --- /dev/null +++ b/fern/products/platform/pages/calling/voice/TTS/inworld.mdx @@ -0,0 +1,185 @@ +--- +title: Inworld +subtitle: Text-to-speech +slug: /voice/tts/inworld +description: Learn how to use Inworld's expressive, multilingual TTS models with SignalWire AI Voice applications. +max-toc-depth: 3 +--- + + +Inworld is a text-to-speech engine offering high-quality, expressive voices across many languages. + +## Models + +SignalWire supports two Inworld models. +See [Inworld's model documentation](https://docs.inworld.ai/tts/tts-models) for more detail. + +| Model | Description | +|-------|-------------| +| `inworld-tts-1.5-mini` | Faster, lower-cost model, suited for high-volume, latency-sensitive applications | +| `inworld-tts-1.5-max` | Higher-quality model, suited for expressive, character-driven, and premium reads | + + +Defaults to `inworld-tts-1.5-mini` when used with [AI agents](/docs/swml/reference/ai); otherwise it +defaults to `inworld-tts-1.5-max`. Set a model explicitly to override this. + + +## Voices + +Inworld provides a large library of expressive voices across many languages. +A voice's name is its voice ID — for example, `Lauren` becomes `inworld.Lauren` in the +[voice string](#usage). + + +Only Inworld's built-in voices are supported. Cloned and custom voices do not work — you must use +one of the default voice IDs that Inworld provides. + + +Preview the voices and find their IDs in Inworld's documentation: + + + + Preview Inworld voices and models in the browser. + + + Reference for voices, models, and languages. + + + +## Languages + +Inworld supports the following languages: + +| Language | Code | +| :------- | :--- | +| English | `en` | +| Arabic | `ar` | +| Chinese (Mandarin) | `zh` | +| Dutch | `nl` | +| French | `fr` | +| German | `de` | +| Hebrew | `he` | +| Hindi | `hi` | +| Italian | `it` | +| Japanese | `ja` | +| Korean | `ko` | +| Polish | `pl` | +| Portuguese | `pt` | +| Russian | `ru` | +| Spanish | `es` | + +Refer to the [Inworld TTS docs](https://docs.inworld.ai/tts/tts) +for the most up-to-date reference to supported languages and voices. + +## Usage + +A voice identifier string has three parts: the `inworld` engine code, a voice ID, and an optional model. + +**Format**: `inworld.:` + +- `voiceId` (required): an Inworld voice name (for example, `Lauren`); see [Voices](#voices). +- `model` (optional): one of the [models](#models) above. If you omit it, a default model is used. + +**Examples**: +```text +inworld.Lauren:inworld-tts-1.5-mini +inworld.Brian:inworld-tts-1.5-max +inworld.Asuka:inworld-tts-1.5-mini +``` + +You can also set the model with the separate +[`model`](/docs/swml/reference/ai/languages#use-voice-strings) +parameter of the [`languages`](/docs/swml/reference/ai/languages) SWML method +instead of appending `:model` to the voice string: + +```yaml +languages: +- name: English + code: en-US + voice: inworld.Lauren + model: inworld-tts-1.5-mini +``` + +--- + +## Build with Inworld on SignalWire + + + +### Create a Space and add credit + +If you don't have one yet, you'll need to +[create a SignalWire Space](/docs/platform/signing-up-for-a-space). +Be sure to add some credit to test with. + +### Add a new Resource + + + +### Create a SWML Script + +From the Resources menu, select **SWML Script**. +Name it something fun and recognizable. +Ours is titled Inworld Wizard. + +Next, paste the following starter script into the text box, and hit Save: + +```yaml +version: 1.0.0 +sections: + main: + - ai: + prompt: + text: | + You're Lauren, a voice from Inworld's TTS engine! + Introduce yourself, and have a conversation about programmable unified communications on the SignalWire platform. + languages: + - name: English + code: en-US + voice: inworld.Lauren:inworld-tts-1.5-mini +``` + +### Buy and assign a phone number + +Navigate to the **Phone Numbers** section of the Dashboard's left sidebar menu. + +Purchase a phone number and assign it to the desired SWML script. + + + ![A purchased phone number showing assignment to a specified Resource.](/assets/images/dashboard/phone-numbers/assign-resource-voice.webp) + + +### Give it a call! + +Call the number you just assigned to chat with your new AI voice application on the phone. + + + +## Next steps with SWML + +Now you've deployed your very first SignalWire voice AI application using Inworld voices. +Next, dive deeper into SWML to explore its capabilities! + + + + Documentation for all SWML methods + + + Build advanced AI applications using SignalWire Markup Language + + + SWML guides and demo applications + + diff --git a/fern/products/platform/pages/calling/voice/TTS/minimax.mdx b/fern/products/platform/pages/calling/voice/TTS/minimax.mdx new file mode 100644 index 0000000000..88de3efa00 --- /dev/null +++ b/fern/products/platform/pages/calling/voice/TTS/minimax.mdx @@ -0,0 +1,175 @@ +--- +title: MiniMax +subtitle: Text-to-speech +slug: /voice/tts/minimax +description: Learn how to use MiniMax's expressive, multilingual TTS voices with SignalWire AI Voice applications. +max-toc-depth: 3 +--- + + +MiniMax is a text-to-speech engine offering expressive voices across many languages, with controls +for emotion, speed, pitch, and volume. + +## Models + +SignalWire supports the following MiniMax models. Pick a `turbo` model for speed and cost, or an +`hd` model for the highest quality. If you don't specify a model, `speech-2.6-turbo` is used. + +| Model | Description | +|-------|-------------| +| `speech-2.6-turbo` | **Default.** Latest turbo model: faster and lower-cost | +| `speech-2.6-hd` | Latest HD model: higher quality | +| `speech-02-turbo` | Previous-generation turbo model | +| `speech-02-hd` | Previous-generation HD model | +| `speech-01-turbo` | First-generation turbo model | +| `speech-01-hd` | First-generation HD model | + +## Voices + +MiniMax provides a large library of system voices across many languages. +A voice's **Voice ID** is what you put in the [voice string](#usage): for example, +`English_CalmWoman` becomes `minimax.English_CalmWoman`. + + +Only MiniMax's built-in **system voices** are supported. Cloned and AI-generated voices do not +work — you must use one of the default voice IDs that MiniMax provides. + + + +Some voice IDs contain spaces or parentheses (for example, `Cantonese_ProfessionalHost (F)`). +Wrap the whole voice string in quotes when a voice ID isn't a plain word, for example +`voice: "minimax.Cantonese_ProfessionalHost (F):speech-2.6-turbo"`. + + +Browse the full list of voice IDs in MiniMax's documentation: + + + + Browse the full list of MiniMax system voice IDs. + + + Reference for models, voices, and parameters. + + + +## Languages + +MiniMax supports the following languages and automatically detects the language of your text: + +Arabic, Cantonese, Chinese — Mandarin, Czech, Dutch, English, Finnish, French, German, Greek, Hindi, Indonesian, Italian, Japanese, Korean, Polish, Portuguese, Romanian, Russian, Spanish, Thai, Turkish, Ukrainian, Vietnamese. + +Refer to the [MiniMax developer platform](https://platform.minimax.io/) +for the most up-to-date reference to supported languages and voices. + +## Usage + +A voice identifier string has three parts: the `minimax` engine code, a voice ID, and an optional model. + +**Format**: `minimax.:` + +- `voiceId` (required): a MiniMax system voice ID (for example, `English_CalmWoman`); see [Voices](#voices). +- `model` (optional): one of the [models](#models) above. Defaults to `speech-2.6-turbo`. + +**Examples**: +```text +minimax.English_CalmWoman:speech-2.6-turbo +minimax.English_Trustworth_Man:speech-2.6-hd +minimax.Japanese_KindLady:speech-2.6-turbo +``` + +You can also set the model with the separate +[`model`](/docs/swml/reference/ai/languages#use-voice-strings) +parameter of the [`languages`](/docs/swml/reference/ai/languages) SWML method +instead of appending `:model` to the voice string: + +```yaml +languages: +- name: English + code: en-US + voice: minimax.English_CalmWoman + model: speech-2.6-turbo +``` + +--- + +## Build with MiniMax on SignalWire + + + +### Create a Space and add credit + +If you don't have one yet, you'll need to +[create a SignalWire Space](/docs/platform/signing-up-for-a-space). +Be sure to add some credit to test with. + +### Add a new Resource + + + +### Create a SWML Script + +From the Resources menu, select **SWML Script**. +Name it something fun and recognizable. +Ours is titled MiniMax Wizard. + +Next, paste the following starter script into the text box, and hit Save: + +```yaml +version: 1.0.0 +sections: + main: + - ai: + prompt: + text: | + You're the Calm Woman, a voice from MiniMax's TTS engine! + Introduce yourself, and have a conversation about programmable unified communications on the SignalWire platform. + languages: + - name: English + code: en-US + voice: minimax.English_CalmWoman:speech-2.6-turbo +``` + +### Buy and assign a phone number + +Navigate to the **Phone Numbers** section of the Dashboard's left sidebar menu. + +Purchase a phone number and assign it to the desired SWML script. + + + ![A purchased phone number showing assignment to a specified Resource.](/assets/images/dashboard/phone-numbers/assign-resource-voice.webp) + + +### Give it a call! + +Call the number you just assigned to chat with your new AI voice application on the phone. + + + +## Next steps with SWML + +Now you've deployed your very first SignalWire voice AI application using MiniMax voices. +Next, dive deeper into SWML to explore its capabilities! + + + + Documentation for all SWML methods + + + Build advanced AI applications using SignalWire Markup Language + + + SWML guides and demo applications + + diff --git a/fern/products/platform/pages/calling/voice/TTS/openai.mdx b/fern/products/platform/pages/calling/voice/TTS/openai.mdx index a3f8c80264..b3034a5cba 100644 --- a/fern/products/platform/pages/calling/voice/TTS/openai.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/openai.mdx @@ -1,6 +1,7 @@ --- id: 967bb43c-abe7-4038-8423-4ba5f7cb50ea title: OpenAI +subtitle: Text-to-speech description: Learn how to use OpenAI TTS voices on the SignalWire platform. slug: /voice/tts/openai --- diff --git a/fern/products/platform/pages/calling/voice/TTS/polly.mdx b/fern/products/platform/pages/calling/voice/TTS/polly.mdx index a499fda515..2a5e237714 100644 --- a/fern/products/platform/pages/calling/voice/TTS/polly.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/polly.mdx @@ -1,6 +1,7 @@ --- id: f7fbcf5a-022b-4b2e-ae2b-fdad62017204 title: Amazon Polly +subtitle: Text-to-speech description: Learn how to use Polly TTS voices on the SignalWire platform. slug: /voice/tts/amazon-polly --- diff --git a/fern/products/platform/pages/calling/voice/TTS/rime.mdx b/fern/products/platform/pages/calling/voice/TTS/rime.mdx index 064486efc1..60bff5d9e5 100644 --- a/fern/products/platform/pages/calling/voice/TTS/rime.mdx +++ b/fern/products/platform/pages/calling/voice/TTS/rime.mdx @@ -1,6 +1,6 @@ --- id: a3dbb231-d452-48ca-ba30-acd8ec499349 -title: The Rime TTS engine +title: Rime subtitle: Text-to-speech slug: /voice/tts/rime description: Learn how to use Rime's Arcana and Mist v2 TTS models with SignalWire AI Voice applications. diff --git a/fern/products/swml/pages/reference/methods/calling/ai/languages/index.mdx b/fern/products/swml/pages/reference/methods/calling/ai/languages/index.mdx index 9f009d7478..ef99bc81e7 100644 --- a/fern/products/swml/pages/reference/methods/calling/ai/languages/index.mdx +++ b/fern/products/swml/pages/reference/methods/calling/ai/languages/index.mdx @@ -35,15 +35,15 @@ Use `ai.languages` to configure the spoken language of your AI Agent, as well as String format: `.`. -Select engine from `gcloud`, `polly`, `elevenlabs`, or `deepgram`. Select voice from [TTS provider reference][tts-providers]. +Select engine from `gcloud`, `polly`, `elevenlabs`, `deepgram`, `cartesia`, `rime`, `inworld`, or `minimax`. Select voice from [TTS provider reference][tts-providers]. For example, `"gcloud.fr-FR-Neural2-B"`. See [`voice` usage](#use-voice-strings) for more details. - Enables emotion for the set TTS engine. This allows the AI to express emotions when speaking. A global emotion or specific emotions for certain topics can be set within the prompt of the AI. + Enables automatic emotion for the set TTS engine. This allows the AI to express emotions when speaking. A global emotion or specific emotions for certain topics can be set within the prompt of the AI. *Valid values:* `auto` -Only works with `Cartesia` TTS engine. +Only works with the `Cartesia` and `MiniMax` TTS engines. For a fixed MiniMax emotion, use [`params.emotion`](#paramsemotion) instead. @@ -88,6 +88,58 @@ See [`voice` usage](#use-voice-strings) for more details. Only works with the ElevenLabs TTS engine. + + + Adjusts how quickly the voice speaks. + Values below `1.0` slow the voice down; values above `1.0` speed it up. + Valid values range from `0.5` to `1.5`. + + Only works with the Inworld TTS engine. + + + + + Controls the randomness and expressiveness of the generated speech. + Lower values produce a more consistent, predictable delivery; higher values introduce more variation. + Valid values range from `0.0` to `2.0`. + + Only works with the Inworld TTS engine. + + + + + How quickly the voice speaks. + Values below `1.0` slow the voice down; values above `1.0` speed it up. + Valid values range from `0.5` to `2.0`. + + Only works with the MiniMax TTS engine. + + + + + The speaking volume. Lower values are quieter. + Valid values range from `0.1` to `1.0`. + + Only works with the MiniMax TTS engine. + + + + + The pitch shift in semitones. Negative values lower the pitch; positive values raise it. + Valid values range from `-12` to `12`. + + Only works with the MiniMax TTS engine. + + + + + A fixed emotional tone for the generated speech. + Valid values are `happy`, `sad`, `angry`, `fearful`, `disgusted`, `surprised`, and `neutral`. + To vary the emotion automatically during a conversation, use [`languages[].emotion`](#languagesemotion) set to `auto` instead. + + Only works with the MiniMax TTS engine. + + @@ -105,7 +157,7 @@ See [`voice` usage](#use-voice-strings) for more details. Compose the `voice` string using the `.` syntax. -First, select your engine using the `gcloud`, `polly`, `elevenlabs`, or `deepgram` identifier. +First, select your engine using the `gcloud`, `polly`, `elevenlabs`, `deepgram`, `cartesia`, `rime`, `inworld`, or `minimax` identifier. Append a period (`.`), and then the specific voice ID (for example, `en-US-Casual-K`) from the TTS provider. Refer to SignalWire's [Supported Voices and Languages][tts-providers] for guides on configuring voice ID strings for each provider. diff --git a/specs/swml/calling/Methods/ai/ai_languages.tsp b/specs/swml/calling/Methods/ai/ai_languages.tsp index b16db7971d..a93ca5d2f9 100644 --- a/specs/swml/calling/Methods/ai/ai_languages.tsp +++ b/specs/swml/calling/Methods/ai/ai_languages.tsp @@ -21,7 +21,7 @@ model LanguagesBase { @doc(""" Voice to use for the language. String format: `.`. - Select engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, or `deepgram`. + Select engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, `deepgram`, `rime`, `inworld`, or `minimax`. For example, `gcloud.fr-FR-Neural2-B`. """) @example("gcloud.fr-FR-Neural2-B") @@ -32,9 +32,10 @@ model LanguagesBase { `model`?: string; @doc(""" - Enables emotion detection for the set TTS engine. This allows the AI to express emotions when speaking. + Enables automatic emotion detection for the set TTS engine. This allows the AI to express emotions when speaking. A global emotion or specific emotions for certain topics can be set within the prompt of the AI. - IMPORTANT: Only works with [`Cartesia`](/docs/platform/voice/tts/cartesia) TTS engine. + IMPORTANT: Only works with the [`Cartesia`](/docs/platform/voice/tts/cartesia) and [`MiniMax`](/docs/platform/voice/tts/minimax) TTS engines. + For a fixed (non-automatic) MiniMax emotion, use [`params.emotion`](#languagesparams) instead. """) @example("auto") emotion?: "auto"; @@ -67,6 +68,39 @@ model LanguageParams { @minValue(0.0) @maxValue(1.0) similarity?: float | SWMLVar = 0.75; + + @doc("Adjusts how quickly the voice speaks. Values below `1.0` slow the voice down; values above `1.0` speed it up. IMPORTANT: Only works with the Inworld TTS engine.") + @minValue(0.5) + @maxValue(1.5) + speakingRate?: float | SWMLVar = 1.0; + + @doc("Controls the randomness and expressiveness of the generated speech. Lower values produce a more consistent, predictable delivery; higher values introduce more variation. IMPORTANT: Only works with the Inworld TTS engine.") + @minValue(0.0) + @maxValue(2.0) + temperature?: float | SWMLVar = 1.0; + + @doc("How quickly the voice speaks. Values below `1.0` slow the voice down; values above `1.0` speed it up. IMPORTANT: Only works with the MiniMax TTS engine.") + @minValue(0.5) + @maxValue(2.0) + speed?: float | SWMLVar = 1.0; + + @doc("The speaking volume. Lower values are quieter. IMPORTANT: Only works with the MiniMax TTS engine.") + @minValue(0.1) + @maxValue(1.0) + vol?: float | SWMLVar = 1.0; + + @doc("The pitch shift in semitones. Negative values lower the pitch; positive values raise it. IMPORTANT: Only works with the MiniMax TTS engine.") + @minValue(-12) + @maxValue(12) + pitch?: int32 | SWMLVar = 0; + + @doc(""" + A fixed emotional tone for the generated speech. + To vary the emotion automatically during a conversation, use [`languages[].emotion`](#languagesemotion) set to `auto` instead. + IMPORTANT: Only works with the MiniMax TTS engine. + """) + @example("happy") + emotion?: "happy" | "sad" | "angry" | "fearful" | "disgusted" | "surprised" | "neutral"; } @summary("Language with Fillers (Deprecated)") diff --git a/specs/swml/calling/tsp-output/@typespec/json-schema/SWMLObject.json b/specs/swml/calling/tsp-output/@typespec/json-schema/SWMLObject.json index 6b85dc1103..316826ec74 100644 --- a/specs/swml/calling/tsp-output/@typespec/json-schema/SWMLObject.json +++ b/specs/swml/calling/tsp-output/@typespec/json-schema/SWMLObject.json @@ -7671,7 +7671,7 @@ "examples": [ "gcloud.fr-FR-Neural2-B" ], - "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, or `deepgram`.\nFor example, `gcloud.fr-FR-Neural2-B`." + "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, `deepgram`, `rime`, `inworld`, or `minimax`.\nFor example, `gcloud.fr-FR-Neural2-B`." }, "model": { "type": "string", @@ -7686,7 +7686,7 @@ "examples": [ "auto" ], - "description": "Enables emotion detection for the set TTS engine. This allows the AI to express emotions when speaking.\nA global emotion or specific emotions for certain topics can be set within the prompt of the AI.\nIMPORTANT: Only works with [`Cartesia`](/docs/platform/voice/tts/cartesia) TTS engine." + "description": "Enables automatic emotion detection for the set TTS engine. This allows the AI to express emotions when speaking.\nA global emotion or specific emotions for certain topics can be set within the prompt of the AI.\nIMPORTANT: Only works with the [`Cartesia`](/docs/platform/voice/tts/cartesia) and [`MiniMax`](/docs/platform/voice/tts/minimax) TTS engines.\nFor a fixed (non-automatic) MiniMax emotion, use [`params.emotion`](#languagesparams) instead." }, "speed": { "type": "string", @@ -7755,7 +7755,7 @@ "examples": [ "gcloud.fr-FR-Neural2-B" ], - "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, or `deepgram`.\nFor example, `gcloud.fr-FR-Neural2-B`." + "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, `deepgram`, `rime`, `inworld`, or `minimax`.\nFor example, `gcloud.fr-FR-Neural2-B`." }, "model": { "type": "string", @@ -7770,7 +7770,7 @@ "examples": [ "auto" ], - "description": "Enables emotion detection for the set TTS engine. This allows the AI to express emotions when speaking.\nA global emotion or specific emotions for certain topics can be set within the prompt of the AI.\nIMPORTANT: Only works with [`Cartesia`](/docs/platform/voice/tts/cartesia) TTS engine." + "description": "Enables automatic emotion detection for the set TTS engine. This allows the AI to express emotions when speaking.\nA global emotion or specific emotions for certain topics can be set within the prompt of the AI.\nIMPORTANT: Only works with the [`Cartesia`](/docs/platform/voice/tts/cartesia) and [`MiniMax`](/docs/platform/voice/tts/minimax) TTS engines.\nFor a fixed (non-automatic) MiniMax emotion, use [`params.emotion`](#languagesparams) instead." }, "speed": { "type": "string", @@ -9391,6 +9391,114 @@ "minimum": 0, "maximum": 1, "description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine." + }, + "speakingRate": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 1, + "minimum": 0.5, + "maximum": 1.5, + "description": "Adjusts how quickly the voice speaks. Values below `1.0` slow the voice down; values above `1.0` speed it up. IMPORTANT: Only works with the Inworld TTS engine." + }, + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 1, + "minimum": 0, + "maximum": 2, + "description": "Controls the randomness and expressiveness of the generated speech. Lower values produce a more consistent, predictable delivery; higher values introduce more variation. IMPORTANT: Only works with the Inworld TTS engine." + }, + "speed": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 1, + "minimum": 0.5, + "maximum": 2, + "description": "How quickly the voice speaks. Values below `1.0` slow the voice down; values above `1.0` speed it up. IMPORTANT: Only works with the MiniMax TTS engine." + }, + "vol": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 1, + "minimum": 0.1, + "maximum": 1, + "description": "The speaking volume. Lower values are quieter. IMPORTANT: Only works with the MiniMax TTS engine." + }, + "pitch": { + "anyOf": [ + { + "type": "integer", + "minimum": -2147483648, + "maximum": 2147483647 + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 0, + "minimum": -12, + "maximum": 12, + "description": "The pitch shift in semitones. Negative values lower the pitch; positive values raise it. IMPORTANT: Only works with the MiniMax TTS engine." + }, + "emotion": { + "anyOf": [ + { + "type": "string", + "const": "happy" + }, + { + "type": "string", + "const": "sad" + }, + { + "type": "string", + "const": "angry" + }, + { + "type": "string", + "const": "fearful" + }, + { + "type": "string", + "const": "disgusted" + }, + { + "type": "string", + "const": "surprised" + }, + { + "type": "string", + "const": "neutral" + } + ], + "examples": [ + "happy" + ], + "description": "A fixed emotional tone for the generated speech.\nTo vary the emotion automatically during a conversation, use [`languages[].emotion`](#languagesemotion) set to `auto` instead.\nIMPORTANT: Only works with the MiniMax TTS engine." } }, "unevaluatedProperties": {