From ab5a65a1a887a39449aab8ea864143915de53d9d Mon Sep 17 00:00:00 2001 From: James Walker Date: Tue, 27 Jan 2026 10:24:11 +0000 Subject: [PATCH 1/2] Add new field to finals and partials --- spec/realtime.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spec/realtime.yaml b/spec/realtime.yaml index 58be4291..9c89fb14 100644 --- a/spec/realtime.yaml +++ b/spec/realtime.yaml @@ -483,6 +483,10 @@ components: type: string description: | The channel identifier to which the audio belongs. This field is only seen in multichannel. + force_end_of_utterance: + type: boolean + description: | + Indicates whether this message was triggered as a result of a ForceEndOfUtterance request. required: - message - metadata @@ -506,6 +510,10 @@ components: type: string description: | The channel identifier to which the audio belongs. This field is only seen in multichannel. + force_end_of_utterance: + type: boolean + description: | + Indicates whether this message was triggered as a result of a ForceEndOfUtterance request. required: - message - metadata From 2a62bce7bbc7edacdc68bccb42f5e24c5f722c42 Mon Sep 17 00:00:00 2001 From: James Walker Date: Thu, 29 Jan 2026 14:15:04 +0000 Subject: [PATCH 2/2] Change name and add forced mention in turn detection --- docs/speech-to-text/realtime/turn-detection.mdx | 2 +- spec/realtime.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/speech-to-text/realtime/turn-detection.mdx b/docs/speech-to-text/realtime/turn-detection.mdx index 396993e9..d96fcf44 100644 --- a/docs/speech-to-text/realtime/turn-detection.mdx +++ b/docs/speech-to-text/realtime/turn-detection.mdx @@ -120,7 +120,7 @@ You can also use `ForceEndOfUtterance` with multi-channel diarization: } ``` -When this message is received, the server will send an [AddTranscript](../../api-ref/realtime-transcription-websocket#addtranscript) message, followed by an [EndOfUtterance](../../api-ref/realtime-transcription-websocket#endofutterance) message. +When this message is received, the server will send an [AddTranscript](../../api-ref/realtime-transcription-websocket#addtranscript) message, with a `forced` field to indicate it came from a ForceEndOfUtterance request, followed by an [EndOfUtterance](../../api-ref/realtime-transcription-websocket#endofutterance) message.. ## Semantic turn detection diff --git a/spec/realtime.yaml b/spec/realtime.yaml index 9c89fb14..424d8406 100644 --- a/spec/realtime.yaml +++ b/spec/realtime.yaml @@ -483,7 +483,7 @@ components: type: string description: | The channel identifier to which the audio belongs. This field is only seen in multichannel. - force_end_of_utterance: + forced: type: boolean description: | Indicates whether this message was triggered as a result of a ForceEndOfUtterance request. @@ -510,7 +510,7 @@ components: type: string description: | The channel identifier to which the audio belongs. This field is only seen in multichannel. - force_end_of_utterance: + forced: type: boolean description: | Indicates whether this message was triggered as a result of a ForceEndOfUtterance request.