livekit · hiroshihorie · Feb 11, 2026 · Feb 11, 2026
diff --git a/.changes/raw-bytes-audio-renderer b/.changes/raw-bytes-audio-renderer
@@ -0,0 +1 @@
+patch type="performance" "Send raw PCM bytes in audio renderer instead of boxed int arrays"
diff --git a/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt b/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt
@@ -105,175 +105,153 @@ class AudioRenderer(
     }
   }
 
+  /**
+   * Converts audio data to raw interleaved bytes.
+   *
+   * If source and target channel counts match, data is copied directly.
+   * If target requests fewer channels, the first channels are kept and interleaved.
+   *
+   * Sends raw byte arrays instead of boxed sample lists.
+   */
   private fun convertAudioData(
     audioData: ByteBuffer,
     bitsPerSample: Int,
     sampleRate: Int,
     numberOfChannels: Int,
     numberOfFrames: Int
   ): Map<String, Any> {
-    // Create result similar to iOS implementation
+    require(bitsPerSample == 16 || bitsPerSample == 32) {
+      "Unsupported bitsPerSample: $bitsPerSample"
+    }
+    require(numberOfChannels > 0) {
+      "Invalid numberOfChannels: $numberOfChannels"
+    }
+
+    val outChannels = targetFormat.numberOfChannels.coerceAtMost(numberOfChannels)
+
     val result = mutableMapOf<String, Any>(
       "sampleRate" to sampleRate,
-      "channels" to numberOfChannels,
-      "frameLength" to numberOfFrames
+      "channels" to outChannels,
     )
 
-    // Convert based on target format
+    val buffer = audioData.duplicate()
+    buffer.order(ByteOrder.LITTLE_ENDIAN)
+    buffer.rewind()
+
     when (targetFormat.commonFormat) {
       "int16" -> {
         result["commonFormat"] = "int16"
-        result["data"] =
-          convertToInt16(audioData, bitsPerSample, numberOfChannels, numberOfFrames)
+        result["data"] = extractAsInt16Bytes(buffer, bitsPerSample, numberOfChannels, outChannels, numberOfFrames)
       }
-
       "float32" -> {
         result["commonFormat"] = "float32"
-        result["data"] =
-          convertToFloat32(audioData, bitsPerSample, numberOfChannels, numberOfFrames)
+        result["data"] = extractAsFloat32Bytes(buffer, bitsPerSample, numberOfChannels, outChannels, numberOfFrames)
       }
-
       else -> {
-        result["commonFormat"] = "int16" // Default fallback
-        result["data"] =
-          convertToInt16(audioData, bitsPerSample, numberOfChannels, numberOfFrames)
+        result["commonFormat"] = "int16"
+        result["data"] = extractAsInt16Bytes(buffer, bitsPerSample, numberOfChannels, outChannels, numberOfFrames)
       }
     }
 
     return result
   }
 
-  private fun convertToInt16(
-    audioData: ByteBuffer,
+  private fun extractAsInt16Bytes(
+    buffer: ByteBuffer,
     bitsPerSample: Int,
-    numberOfChannels: Int,
+    srcChannels: Int,
+    outChannels: Int,
     numberOfFrames: Int
-  ): List<List<Int>> {
-    val channelsData = mutableListOf<List<Int>>()
+  ): ByteArray {
+    // Fast path for int16 with matching channel count.
+    if (bitsPerSample == 16 && srcChannels == outChannels) {
+      val totalBytes = numberOfFrames * outChannels * 2
+      val out = ByteArray(totalBytes)
+      buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining()))
+      return out
+    }
 
-    // Prepare buffer for reading
-    val buffer = audioData.duplicate()
-    buffer.order(ByteOrder.LITTLE_ENDIAN)
-    buffer.rewind()
+    val out = ByteArray(numberOfFrames * outChannels * 2)
+    val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
 
     when (bitsPerSample) {
       16 -> {
-        // Already 16-bit, just reformat by channels
-        for (channel in 0 until numberOfChannels) {
-          val channelData = mutableListOf<Int>()
-          buffer.position(0) // Start from beginning for each channel
-
-          for (frame in 0 until numberOfFrames) {
-            val sampleIndex = frame * numberOfChannels + channel
-            val byteIndex = sampleIndex * 2
-
+        for (frame in 0 until numberOfFrames) {
+          val srcOffset = frame * srcChannels * 2
+          for (ch in 0 until outChannels) {
+            val byteIndex = srcOffset + ch * 2
             if (byteIndex + 1 < buffer.capacity()) {
               buffer.position(byteIndex)
-              val sample = buffer.short.toInt()
-              channelData.add(sample)
+              outBuf.putShort((frame * outChannels + ch) * 2, buffer.short)
             }
           }
-          channelsData.add(channelData)
         }
       }
-
       32 -> {
-        // Convert from 32-bit to 16-bit
-        for (channel in 0 until numberOfChannels) {
-          val channelData = mutableListOf<Int>()
-          buffer.position(0)
-
-          for (frame in 0 until numberOfFrames) {
-            val sampleIndex = frame * numberOfChannels + channel
-            val byteIndex = sampleIndex * 4
-
+        for (frame in 0 until numberOfFrames) {
+          val srcOffset = frame * srcChannels * 4
+          for (ch in 0 until outChannels) {
+            val byteIndex = srcOffset + ch * 4
             if (byteIndex + 3 < buffer.capacity()) {
               buffer.position(byteIndex)
-              val sample32 = buffer.int
-              // Convert 32-bit to 16-bit by right-shifting
-              val sample16 = (sample32 shr 16).toShort().toInt()
-              channelData.add(sample16)
+              val sample16 = (buffer.int shr 16).toShort()
+              outBuf.putShort((frame * outChannels + ch) * 2, sample16)
             }
           }
-          channelsData.add(channelData)
-        }
-      }
-
-      else -> {
-        // Unsupported format, return empty data
-        repeat(numberOfChannels) {
-          channelsData.add(emptyList())
         }
       }
     }
 
-    return channelsData
+    return out
   }
 
-  private fun convertToFloat32(
-    audioData: ByteBuffer,
+  private fun extractAsFloat32Bytes(
+    buffer: ByteBuffer,
     bitsPerSample: Int,
-    numberOfChannels: Int,
+    srcChannels: Int,
+    outChannels: Int,
     numberOfFrames: Int
-  ): List<List<Float>> {
-    val channelsData = mutableListOf<List<Float>>()
+  ): ByteArray {
+    // Fast path for float32 with matching channel count.
+    if (bitsPerSample == 32 && srcChannels == outChannels) {
+      val totalBytes = numberOfFrames * outChannels * 4
+      val out = ByteArray(totalBytes)
+      buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining()))
+      return out
+    }
 
-    val buffer = audioData.duplicate()
-    buffer.order(ByteOrder.LITTLE_ENDIAN)
-    buffer.rewind()
+    val out = ByteArray(numberOfFrames * outChannels * 4)
+    val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
 
     when (bitsPerSample) {
       16 -> {
-        // Convert from 16-bit to float32
-        for (channel in 0 until numberOfChannels) {
-          val channelData = mutableListOf<Float>()
-          buffer.position(0)
-
-          for (frame in 0 until numberOfFrames) {
-            val sampleIndex = frame * numberOfChannels + channel
-            val byteIndex = sampleIndex * 2
-
+        for (frame in 0 until numberOfFrames) {
+          val srcOffset = frame * srcChannels * 2
+          for (ch in 0 until outChannels) {
+            val byteIndex = srcOffset + ch * 2
             if (byteIndex + 1 < buffer.capacity()) {
               buffer.position(byteIndex)
-              val sample16 = buffer.short
-              // Convert to float (-1.0 to 1.0)
-              val sampleFloat = sample16.toFloat() / Short.MAX_VALUE
-              channelData.add(sampleFloat)
+              val sampleFloat = buffer.short.toFloat() / Short.MAX_VALUE
+              outBuf.putFloat((frame * outChannels + ch) * 4, sampleFloat)
             }
           }
-          channelsData.add(channelData)
         }
       }
-
       32 -> {
-        // Assume 32-bit float input
-        for (channel in 0 until numberOfChannels) {
-          val channelData = mutableListOf<Float>()
-          buffer.position(0)
-
-          for (frame in 0 until numberOfFrames) {
-            val sampleIndex = frame * numberOfChannels + channel
-            val byteIndex = sampleIndex * 4
-
+        for (frame in 0 until numberOfFrames) {
+          val srcOffset = frame * srcChannels * 4
+          for (ch in 0 until outChannels) {
+            val byteIndex = srcOffset + ch * 4
             if (byteIndex + 3 < buffer.capacity()) {
               buffer.position(byteIndex)
-              val sampleFloat = buffer.float
-              channelData.add(sampleFloat)
+              outBuf.putFloat((frame * outChannels + ch) * 4, buffer.float)
             }
           }
-          channelsData.add(channelData)
-        }
-      }
-
-      else -> {
-        // Unsupported format
-        repeat(numberOfChannels) {
-          channelsData.add(emptyList())
         }
       }
     }
 
-    return channelsData
+    return out
   }
 }
 

diff --git a/lib/src/preconnect/pre_connect_audio_buffer.dart b/lib/src/preconnect/pre_connect_audio_buffer.dart
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 import 'dart:async';
-import 'dart:typed_data';
 
 import 'package:flutter/services.dart';
 
@@ -41,8 +40,8 @@ typedef PreConnectOnError = void Function(Object error);
 /// still connecting and dispatching an agent, then the buffered audio is sent
 /// once the agent becomes active.
 ///
-/// Audio is buffered in memory (bounded by [defaultMaxSize]); if it overflows,
-/// the oldest audio is dropped until the agent is ready.
+/// Audio is buffered in memory and bounded by [defaultMaxSize].
+/// If it overflows, the oldest audio is dropped until the agent is ready.
 class PreConnectAudioBuffer {
   /// Topic used to send the buffered audio stream to agents.
   static const String dataTopic = 'lk.agent.pre-connect-audio-buffer';
@@ -66,6 +65,7 @@ class PreConnectAudioBuffer {
   PreConnectOnError? _onError;
   final int _requestSampleRate;
   int? _renderedSampleRate;
+  int? _renderedChannels;
 
   bool _nativeRecordingStarted = false;
   bool _hasLoggedOverflow = false;
@@ -107,8 +107,8 @@ class PreConnectAudioBuffer {
   /// [agentReadyFuture] completes with an error and callers should [reset] the
   /// buffer.
   ///
-  /// - Note: Ensure microphone permissions are granted before calling this, or
-  ///   audio capture may fail depending on platform.
+  /// Ensure microphone permissions are granted before calling this.
+  /// Audio capture may fail without permissions.
   Future<void> startRecording({
     Duration timeout = const Duration(seconds: 20),
   }) async {
@@ -161,13 +161,11 @@ class PreConnectAudioBuffer {
       }
 
       try {
-        // Actual sample rate of the audio data, can differ from the request sample rate
+        // Audio format can differ from what was requested.
         _renderedSampleRate = event['sampleRate'] as int;
-        final dataChannels = event['data'] as List<dynamic>;
-        final monoData = dataChannels[0].cast<int>();
-        // Convert Int16 values to bytes using typed data view
-        final int16List = Int16List.fromList(monoData);
-        final bytes = int16List.buffer.asUint8List();
+        _renderedChannels = event['channels'] as int;
+        // Native sends raw interleaved PCM bytes.
+        final Uint8List bytes = event['data'] as Uint8List;
 
         final didOverflow = _buffer.write(bytes);
         if (didOverflow && !_hasLoggedOverflow) {
@@ -181,7 +179,7 @@ class PreConnectAudioBuffer {
       }
     });
 
-    // Listen for agent readiness; when active, attempt to send buffer once.
+    // Listen for agent readiness and send the buffer when active.
     _participantStateListener = _room.events.on<ParticipantStateUpdatedEvent>(
         filter: (event) => event.participant.kind == ParticipantKind.AGENT && event.state == ParticipantState.active,
         (event) async {
@@ -260,7 +258,7 @@ class PreConnectAudioBuffer {
     _participantStateListener = null;
     _buffer.clear();
 
-    // Don't stop the local track - it will continue to be used by the Room
+    // Keep the local track because the Room still uses it.
     _localTrack = null;
 
     _agentReadyManager.reset();
@@ -269,6 +267,7 @@ class PreConnectAudioBuffer {
     // Reset the _isSent flag to allow data sending on next use
     _isBufferSent = false;
     _hasLoggedOverflow = false;
+    _renderedChannels = null;
 
     logger.info('[Preconnect audio] reset');
   }
@@ -281,8 +280,8 @@ class PreConnectAudioBuffer {
 
   /// Sends the currently buffered audio to one or more agent identities.
   ///
-  /// This is a one-shot operation; repeated calls are ignored after the buffer
-  /// has been sent.
+  /// This is a one shot operation.
+  /// Repeated calls are ignored after the buffer has been sent.
   ///
   /// The stream is written to [topic] (default: [dataTopic]) and includes
   /// attributes that help the agent interpret the raw audio payload.
@@ -294,10 +293,15 @@ class PreConnectAudioBuffer {
     if (agents.isEmpty) return;
 
     final sampleRate = _renderedSampleRate;
+    final rawChannels = _renderedChannels ?? 1;
+    final channels = rawChannels > 0 ? rawChannels : 1;
     if (sampleRate == null) {
       logger.severe('[Preconnect audio] renderedSampleRate is null');
       return;
     }
+    if (rawChannels <= 0) {
+      logger.warning('[Preconnect audio] Invalid rendered channels: $rawChannels. Falling back to mono.');
+    }
 
     // Wait for local track published event
     final localTrackPublishedEvent = await _localTrackPublishedEvent;
@@ -320,7 +324,7 @@ class PreConnectAudioBuffer {
       topic: topic,
       attributes: {
         'sampleRate': sampleRate.toString(),
-        'channels': '1',
+        'channels': channels.toString(),
         'trackId': localTrackSid,
       },
       totalSize: data.length,
@@ -334,9 +338,10 @@ class PreConnectAudioBuffer {
     await writer.close();
 
     // Compute seconds of audio data sent
-    final int bytesPerSample = 2; // Assuming 16-bit audio
-    final int totalSamples = data.length ~/ bytesPerSample;
-    final double secondsOfAudio = totalSamples / sampleRate;
+    final int bytesPerSample = 2; // 16-bit audio
+    final int bytesPerFrame = bytesPerSample * channels;
+    final int totalFrames = data.length ~/ bytesPerFrame;
+    final double secondsOfAudio = totalFrames / sampleRate;
 
     logger.info(
         '[Preconnect audio] sent ${(data.length / 1024).toStringAsFixed(1)}KB of audio (${secondsOfAudio.toStringAsFixed(2)} seconds) to ${agents} agent(s)');
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		patch type="performance" "Send raw PCM bytes in audio renderer instead of boxed int arrays"