From 1e34dedff12f6a1013ba3234932203139e1ba0cf Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Sun, 31 May 2026 16:46:34 -0400 Subject: [PATCH] feat: switch live stream client to WebRTC transport --- README.md | 2 +- docs/api/rest.md | 13 +- docs/guide/video.md | 15 + packages/client/src/app/AppShell.tsx | 21 +- .../src/features/simulators/SimulatorMenu.tsx | 12 + .../client/src/features/stream/streamTypes.ts | 1 + .../src/features/stream/streamWorkerClient.ts | 101 ++- .../src/features/stream/useLiveStream.ts | 8 + .../client/src/features/toolbar/Toolbar.tsx | 6 + packages/client/src/styles/components.css | 16 + packages/server/build.rs | 2 + .../server/native/bridge/XCWNativeBridge.h | 12 + .../server/native/bridge/XCWNativeBridge.m | 564 +++++++++++++ packages/server/native_stubs.c | 37 + packages/server/src/android.rs | 1 - packages/server/src/native/ffi.rs | 27 + packages/server/src/transport/webrtc.rs | 749 +++++++++++++++++- 17 files changed, 1572 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 8b1a7942..759befc3 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ view inside the editor. ## Features -- Supports streaming both iOS simulators and Android emulators +- Supports streaming iOS simulators and Android emulators, including WebRTC audio - Full simulator control & inspection using private iOS accessibility APIs and Android UIAutomator - available using `simdeck` CLI - Real-time screen `describe` command using accessibility view tree - available in token-efficient format for agents - Profiling built-in: CPU, memory, disk writes, network throughput, hang signals, and stack sampling diff --git a/docs/api/rest.md b/docs/api/rest.md index 5e5828fa..16493de2 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -173,7 +173,7 @@ Performance query parameters: | `GET` | `/api/simulators/{udid}/control` | Alias for input control WebSocket | | `POST` | `/api/simulators/{udid}/refresh` | Request a fresh frame or keyframe | -For normal clients, copy the browser behavior instead of hand-coding a raw decoder. The UI supports WebRTC first and H.264 WebSocket fallback. +For normal clients, copy the browser behavior instead of hand-coding a raw decoder. The UI supports WebRTC first and H.264 WebSocket fallback. WebRTC carries H.264 video and, when the offer includes an audio receiver, a PCMU simulator-audio track sourced from the selected simulator or emulator process tree. The H.264 WebSocket fallback is video-only. Minimal WebRTC request: @@ -194,7 +194,16 @@ Response: ```json { "type": "answer", - "sdp": "v=0..." + "sdp": "v=0...", + "audio": { + "codec": "PCMU", + "sampleRate": 8000, + "channels": 1 + }, + "video": { + "width": 1179, + "height": 2556 + } } ``` diff --git a/docs/guide/video.md b/docs/guide/video.md index 372ca1b4..8f8c9c83 100644 --- a/docs/guide/video.md +++ b/docs/guide/video.md @@ -4,6 +4,10 @@ SimDeck streams live device video to the browser. Local sessions default to high iOS simulator H.264 uses VideoToolbox for hardware encoding and x264 for software encoding. +WebRTC streams also include simulator audio. The browser menu exposes a Sound +toggle so viewers can keep playback muted until they want to hear the device. +H.264 WebSocket fallback remains video-only. + ## When encoding runs SimDeck starts encoding when a browser stream needs H.264 frames. The server @@ -73,6 +77,17 @@ simdeck service restart --video-codec software --low-latency The browser tries WebRTC first. If WebRTC cannot render a frame, the UI can fall back to H.264 over WebSocket when the browser supports WebCodecs. +Audio is carried on the WebRTC path using a browser-compatible PCMU track. On +macOS 14.2 and newer, SimDeck uses Core Audio process taps over the selected +simulator or emulator process tree, then routes that tap through a private +aggregate device into the WebRTC audio track. If macOS has not granted system +audio recording access, video still streams and the server logs the +audio-capture failure. While the tap is being read, Core Audio mutes the tapped +simulator process at the hardware output; browser playback is controlled by the +Sound toggle. Android emulators launched by SimDeck are started with host audio +enabled, so restart older no-audio emulator processes before testing Android +sound. + Force a mode while debugging: ```text diff --git a/packages/client/src/app/AppShell.tsx b/packages/client/src/app/AppShell.tsx index a670cea6..a685842d 100644 --- a/packages/client/src/app/AppShell.tsx +++ b/packages/client/src/app/AppShell.tsx @@ -54,7 +54,10 @@ import { simulatorUsesInsetChromeButtons, } from "../features/simulators/simulatorDisplay"; import { useSimulatorList } from "../features/simulators/useSimulatorList"; -import { sendWebRtcControlMessage } from "../features/stream/streamWorkerClient"; +import { + sendWebRtcControlMessage, + setActiveStreamAudioMuted, +} from "../features/stream/streamWorkerClient"; import type { StreamConfig, StreamEncoder, @@ -560,6 +563,8 @@ export function AppShell({ const [streamTransport, setStreamTransport] = useState( initialStreamTransportRef.current, ); + const [streamAudioMuted, setStreamAudioMuted] = useState(true); + const streamAudioMutedRef = useRef(streamAudioMuted); const [streamConfigApplyKey, setStreamConfigApplyKey] = useState(0); const [streamConfigReady, setStreamConfigReady] = useState(false); const [touchIndicators, setTouchIndicators] = useState([]); @@ -812,6 +817,7 @@ export function AppShell({ streamBackend, streamCanvasKey, } = useLiveStream({ + audioMuted: streamAudioMuted, canvasElement: streamCanvasElement, paused: !streamConfigReady, remote: remoteStream, @@ -877,6 +883,17 @@ export function AppShell({ [remoteStream], ); + const toggleStreamAudioMuted = useCallback(() => { + const next = !streamAudioMutedRef.current; + streamAudioMutedRef.current = next; + setActiveStreamAudioMuted(next); + setStreamAudioMuted(next); + }, []); + + useEffect(() => { + streamAudioMutedRef.current = streamAudioMuted; + }, [streamAudioMuted]); + useEffect(() => { if ( !selectedSimulator || @@ -2931,6 +2948,7 @@ export function AppShell({ onStreamFpsChange={updateStreamFps} onStreamQualityChange={updateStreamQuality} onStreamTransportChange={updateStreamTransport} + onToggleStreamAudioMuted={toggleStreamAudioMuted} onShutdown={() => { if (!selectedSimulator) { return; @@ -2989,6 +3007,7 @@ export function AppShell({ !selectedSimulatorTransitionKind, )} streamConfig={streamConfig} + streamAudioMuted={streamAudioMuted} streamTransport={streamTransport} simulatorMenuOpen={simulatorMenuOpen} simulatorMenuRef={simulatorMenuRef} diff --git a/packages/client/src/features/simulators/SimulatorMenu.tsx b/packages/client/src/features/simulators/SimulatorMenu.tsx index 0347f7bf..c6c35e49 100644 --- a/packages/client/src/features/simulators/SimulatorMenu.tsx +++ b/packages/client/src/features/simulators/SimulatorMenu.tsx @@ -34,6 +34,7 @@ interface SimulatorMenuProps { onStreamFpsChange: (fps: StreamFps) => void; onStreamQualityChange: (quality: StreamQualityPreset) => void; onStreamTransportChange: (transport: StreamTransport) => void; + onToggleStreamAudioMuted: () => void; onToggleAppearance: () => void; onToggleDebug: () => void; onToggleMenu: () => void; @@ -47,6 +48,7 @@ interface SimulatorMenuProps { showBootButton: boolean; showStopButton: boolean; streamConfig: StreamConfig; + streamAudioMuted: boolean; streamTransport: StreamTransport; touchOverlayVisible: boolean; } @@ -74,6 +76,7 @@ export function SimulatorMenu({ onStreamFpsChange, onStreamQualityChange, onStreamTransportChange, + onToggleStreamAudioMuted, onToggleAppearance, onToggleDebug, onToggleMenu, @@ -87,6 +90,7 @@ export function SimulatorMenu({ showBootButton, showStopButton, streamConfig, + streamAudioMuted, streamTransport, touchOverlayVisible, }: SimulatorMenuProps) { @@ -200,6 +204,14 @@ export function SimulatorMenu({ )} +
diff --git a/packages/client/src/features/stream/streamTypes.ts b/packages/client/src/features/stream/streamTypes.ts index 4e9b6099..acee1ae2 100644 --- a/packages/client/src/features/stream/streamTypes.ts +++ b/packages/client/src/features/stream/streamTypes.ts @@ -1,6 +1,7 @@ import type { Size } from "../viewport/types"; export interface StreamConnectTarget { + audioMuted?: boolean; clientId?: string; platform?: string; remote?: boolean; diff --git a/packages/client/src/features/stream/streamWorkerClient.ts b/packages/client/src/features/stream/streamWorkerClient.ts index b49842a4..9aa73b70 100644 --- a/packages/client/src/features/stream/streamWorkerClient.ts +++ b/packages/client/src/features/stream/streamWorkerClient.ts @@ -97,6 +97,10 @@ export function sendWebRtcStreamControl(options: { ); } +export function setActiveStreamAudioMuted(muted: boolean) { + activeStreamClient?.setAudioMuted(muted); +} + function sendStreamQualityConfig(config: StreamConfig): boolean { const encoded = JSON.stringify({ config: streamQualityPayload(config), @@ -230,6 +234,7 @@ function compareVideoToImage( export function buildStreamTarget( udid: string, options: { + audioMuted?: boolean; clientId?: string; platform?: string; remote?: boolean; @@ -238,6 +243,7 @@ export function buildStreamTarget( } = {}, ): StreamConnectTarget { return { + audioMuted: options.audioMuted, clientId: options.clientId, platform: options.platform, remote: options.remote, @@ -290,6 +296,7 @@ interface StreamClientBackend { disconnect(): void; applyStreamConfig?(config?: StreamConfig): void | Promise; sendControl?(payload: unknown): boolean; + setAudioMuted?(muted: boolean): void; } export interface VisualArtifactSample { @@ -389,6 +396,11 @@ interface WebCodecsVideoDecoderConstructor { } interface WebRtcAnswerPayload extends RTCSessionDescriptionInit { + audio?: { + channels?: number; + codec?: string; + sampleRate?: number; + }; video?: { height?: number; width?: number; @@ -1295,6 +1307,8 @@ function hexByte(byte: number): string { } class WebRtcStreamClient implements StreamClientBackend { + private audioElement: HTMLAudioElement | null = null; + private audioMuted = true; private animationFrame = 0; private canvas: HTMLCanvasElement | null = null; private canvasContext: CanvasRenderingContext2D | null = null; @@ -1408,6 +1422,7 @@ class WebRtcStreamClient implements StreamClientBackend { this.shouldReconnect = true; this.remoteMode = Boolean(target.remote); this.streamTarget = target; + this.audioMuted = target.audioMuted ?? true; if (!wasReconnecting) { this.reconnectDelayMs = WEBRTC_RECONNECT_BASE_DELAY_MS; } @@ -1435,6 +1450,14 @@ class WebRtcStreamClient implements StreamClientBackend { const useRgbaTransport = shouldUseLocalAndroidRgbaWebRtc(target); this.rgbaMode = useRgbaTransport; this.attachDiagnostics(peerConnection, target, generation); + const audioTransceiver = peerConnection.addTransceiver("audio", { + direction: "recvonly", + }); + configureAudioReceiverCodecPreferences(audioTransceiver); + configureLowLatencyReceiver( + audioTransceiver.receiver, + receiverBufferSeconds(target), + ); if (!useRgbaTransport) { this.startReceiverStatsPolling(peerConnection, target, generation); const transceiver = peerConnection.addTransceiver("video", { @@ -1485,17 +1508,21 @@ class WebRtcStreamClient implements StreamClientBackend { }; peerConnection.ontrack = (event) => { - if (useRgbaTransport) { + if (generation !== this.connectGeneration) { return; } - if (generation !== this.connectGeneration) { + if (event.track.kind === "audio") { + this.attachAudioTrack(event.track, generation); + return; + } + if (useRgbaTransport || event.track.kind !== "video") { return; } event.track.contentHint = "motion"; for (const receiver of peerConnection.getReceivers()) { configureLowLatencyReceiver(receiver, receiverBufferSeconds(target)); } - const stream = event.streams[0] ?? new MediaStream([event.track]); + const stream = new MediaStream([event.track]); const video = document.createElement("video"); video.autoplay = true; video.className = "stream-video"; @@ -1606,6 +1633,19 @@ class WebRtcStreamClient implements StreamClientBackend { return sendDataChannelMessage(this.controlChannel, JSON.stringify(payload)); } + setAudioMuted(muted: boolean) { + this.audioMuted = muted; + if (!this.audioElement) { + return; + } + this.audioElement.muted = muted; + if (!muted) { + void this.audioElement.play().catch(() => { + // Some browsers require the menu click that unmutes to happen in the page. + }); + } + } + async applyStreamConfig(config?: StreamConfig) { if (!config) { return; @@ -1703,6 +1743,12 @@ class WebRtcStreamClient implements StreamClientBackend { this.video.remove(); } this.video = null; + this.audioElement?.pause(); + if (this.audioElement) { + this.audioElement.srcObject = null; + this.audioElement.remove(); + } + this.audioElement = null; this.reportedVideoHeight = 0; this.reportedVideoWidth = 0; this.controlChannel?.close(); @@ -2122,6 +2168,33 @@ class WebRtcStreamClient implements StreamClientBackend { } } + private attachAudioTrack(track: MediaStreamTrack, generation: number) { + this.audioElement?.pause(); + if (this.audioElement) { + this.audioElement.srcObject = null; + this.audioElement.remove(); + } + const audio = document.createElement("audio"); + audio.autoplay = true; + audio.muted = this.audioMuted; + audio.preload = "auto"; + audio.srcObject = new MediaStream([track]); + audio.style.display = "none"; + document.body.appendChild(audio); + this.audioElement = audio; + const startPlayback = () => { + if (generation !== this.connectGeneration || audio !== this.audioElement) { + return; + } + void audio.play().catch(() => { + // Muted autoplay is best effort; unmuting from the menu retries playback. + }); + }; + audio.addEventListener("loadedmetadata", startPlayback); + audio.addEventListener("canplay", startPlayback); + startPlayback(); + } + private attachRgbaDataChannel(channel: RTCDataChannel, generation: number) { this.rgbaChannel?.close(); this.rgbaChannel = channel; @@ -2756,6 +2829,24 @@ function configureReceiverCodecPreferences(transceiver: RTCRtpTransceiver) { ]); } +function configureAudioReceiverCodecPreferences(transceiver: RTCRtpTransceiver) { + if (!transceiver.setCodecPreferences) { + return; + } + const capabilities = RTCRtpReceiver.getCapabilities("audio"); + const codecs = capabilities?.codecs ?? []; + const preferred = codecs.filter( + (codec) => codec.mimeType.toLowerCase() === "audio/pcmu", + ); + if (preferred.length === 0) { + return; + } + transceiver.setCodecPreferences([ + ...preferred, + ...codecs.filter((codec) => codec.mimeType.toLowerCase() !== "audio/pcmu"), + ]); +} + function safariBaselineH264Offer( offer: RTCSessionDescriptionInit, ): RTCSessionDescriptionInit { @@ -3033,6 +3124,10 @@ export class StreamWorkerClient { ); } + setAudioMuted(muted: boolean) { + this.backend?.setAudioMuted?.(muted); + } + applyStreamConfig(config?: StreamConfig) { try { const result = this.backend?.applyStreamConfig?.(config); diff --git a/packages/client/src/features/stream/useLiveStream.ts b/packages/client/src/features/stream/useLiveStream.ts index 77990731..350c4f53 100644 --- a/packages/client/src/features/stream/useLiveStream.ts +++ b/packages/client/src/features/stream/useLiveStream.ts @@ -28,6 +28,7 @@ const CLIENT_TELEMETRY_ID_STORAGE_KEY = "simdeck.streamClientId"; const VISUAL_ARTIFACT_TELEMETRY_INTERVAL_MS = 30000; interface UseLiveStreamOptions { + audioMuted?: boolean; canvasElement: HTMLCanvasElement | null; paused?: boolean; remote?: boolean; @@ -108,6 +109,7 @@ function isViewerForeground(canvasVisible: boolean): boolean { } export function useLiveStream({ + audioMuted = true, canvasElement, paused = false, remote = false, @@ -370,6 +372,7 @@ export function useLiveStream({ workerClient.connect( buildStreamTarget(simulator.udid, { clientId: clientTelemetryIdRef.current, + audioMuted, platform: simulator.platform, remote, streamConfig, @@ -387,8 +390,13 @@ export function useLiveStream({ paused, remote, streamTransport, + audioMuted, ]); + useEffect(() => { + workerClientRef.current?.setAudioMuted(audioMuted); + }, [audioMuted]); + useEffect(() => { if (!simulator?.udid || paused) { return; diff --git a/packages/client/src/features/toolbar/Toolbar.tsx b/packages/client/src/features/toolbar/Toolbar.tsx index 128eb85a..ad72b4d9 100644 --- a/packages/client/src/features/toolbar/Toolbar.tsx +++ b/packages/client/src/features/toolbar/Toolbar.tsx @@ -51,6 +51,7 @@ interface ToolbarProps { onStreamFpsChange: (fps: StreamFps) => void; onStreamQualityChange: (quality: StreamQualityPreset) => void; onStreamTransportChange: (transport: StreamTransport) => void; + onToggleStreamAudioMuted: () => void; onToggleAppearance: () => void; onToggleDebug: () => void; onToggleDevTools: () => void; @@ -71,6 +72,7 @@ interface ToolbarProps { showBootButton: boolean; showStopButton: boolean; streamConfig: StreamConfig; + streamAudioMuted: boolean; streamTransport: StreamTransport; touchOverlayVisible: boolean; menuOpen: boolean; @@ -114,6 +116,7 @@ export function Toolbar({ onStreamFpsChange, onStreamQualityChange, onStreamTransportChange, + onToggleStreamAudioMuted, onToggleAppearance, onToggleDebug, onToggleDevTools, @@ -133,6 +136,7 @@ export function Toolbar({ showBootButton, showStopButton, streamConfig, + streamAudioMuted, streamTransport, simulatorMenuOpen, simulatorMenuRef, @@ -194,6 +198,7 @@ export function Toolbar({ onStreamFpsChange={onStreamFpsChange} onStreamQualityChange={onStreamQualityChange} onStreamTransportChange={onStreamTransportChange} + onToggleStreamAudioMuted={onToggleStreamAudioMuted} onToggleAppearance={onToggleAppearance} onToggleDebug={onToggleDebug} onToggleMenu={onToggleMenu} @@ -208,6 +213,7 @@ export function Toolbar({ showStopButton={showStopButton} canInstallApp={canInstallApp} streamConfig={streamConfig} + streamAudioMuted={streamAudioMuted} streamTransport={streamTransport} touchOverlayVisible={touchOverlayVisible} /> diff --git a/packages/client/src/styles/components.css b/packages/client/src/styles/components.css index 7fb17266..2c7ca799 100644 --- a/packages/client/src/styles/components.css +++ b/packages/client/src/styles/components.css @@ -413,6 +413,22 @@ text-transform: uppercase; } +.menu-toggle { + display: flex; + align-items: center; + gap: 8px; + min-height: 30px; + color: var(--text); + font-size: 12px; +} + +.menu-toggle input { + width: 15px; + height: 15px; + flex: 0 0 auto; + accent-color: var(--accent); +} + .menu-select { width: 100%; height: 32px; diff --git a/packages/server/build.rs b/packages/server/build.rs index 7606854e..7b763821 100644 --- a/packages/server/build.rs +++ b/packages/server/build.rs @@ -101,7 +101,9 @@ fn main() { "Foundation", "Accelerate", "AppKit", + "AudioToolbox", "AVFoundation", + "CoreAudio", "CoreImage", "CoreGraphics", "CoreMedia", diff --git a/packages/server/native/bridge/XCWNativeBridge.h b/packages/server/native/bridge/XCWNativeBridge.h index 35ffc647..cd6d6fca 100644 --- a/packages/server/native/bridge/XCWNativeBridge.h +++ b/packages/server/native/bridge/XCWNativeBridge.h @@ -28,7 +28,15 @@ typedef struct xcw_native_frame { xcw_native_shared_bytes data; } xcw_native_frame; +typedef struct xcw_native_audio_sample { + uint64_t timestamp_us; + uint32_t sample_rate; + uint16_t channels; + xcw_native_shared_bytes data; +} xcw_native_audio_sample; + typedef void (*xcw_native_frame_callback)(const xcw_native_frame * _Nonnull frame, void * _Nullable user_data); +typedef void (*xcw_native_audio_callback)(const xcw_native_audio_sample * _Nonnull sample, void * _Nullable user_data); void xcw_native_initialize_app(void); void xcw_native_run_main_loop_slice(double duration_seconds); @@ -109,6 +117,10 @@ void xcw_native_h264_encoder_destroy(void * _Nullable handle); bool xcw_native_h264_encoder_encode_rgba(void * _Nonnull handle, const uint8_t * _Nonnull rgba, size_t length, uint32_t width, uint32_t height, uint64_t timestamp_us, char * _Nullable * _Nullable error_message); void xcw_native_h264_encoder_request_keyframe(void * _Nonnull handle); +void * _Nullable xcw_native_audio_capture_create(const int32_t * _Nullable process_ids, size_t process_count, xcw_native_audio_callback _Nullable callback, void * _Nullable user_data, char * _Nullable * _Nullable error_message); +bool xcw_native_audio_capture_update_processes(void * _Nonnull handle, const int32_t * _Nullable process_ids, size_t process_count, char * _Nullable * _Nullable error_message); +void xcw_native_audio_capture_destroy(void * _Nullable handle); + void xcw_native_free_string(char * _Nullable value); void xcw_native_free_bytes(xcw_native_owned_bytes bytes); void xcw_native_release_shared_bytes(xcw_native_shared_bytes bytes); diff --git a/packages/server/native/bridge/XCWNativeBridge.m b/packages/server/native/bridge/XCWNativeBridge.m index 1c2746d7..5cccd444 100644 --- a/packages/server/native/bridge/XCWNativeBridge.m +++ b/packages/server/native/bridge/XCWNativeBridge.m @@ -8,8 +8,15 @@ #import "XCWSimctl.h" #import +#import +#import +#import +#import #import +#import #import +#include +#include #include #include @@ -315,6 +322,523 @@ - (void)invalidate { @end +static NSString *XCWAudioDictionaryKey(const char *key) { + return [NSString stringWithUTF8String:key] ?: @""; +} + +static NSString *XCWAudioOSStatusString(OSStatus status) { + UInt32 code = CFSwapInt32HostToBig((UInt32)status); + char text[5] = {0}; + memcpy(text, &code, 4); + BOOL printable = YES; + for (NSUInteger index = 0; index < 4; index++) { + if (text[index] < 32 || text[index] > 126) { + printable = NO; + break; + } + } + if (printable) { + return [NSString stringWithFormat:@"%d ('%s')", (int)status, text]; + } + return [NSString stringWithFormat:@"%d", (int)status]; +} + +static NSError *XCWAudioCaptureError(NSInteger code, NSString *description) { + return [NSError errorWithDomain:@"SimDeck.AudioCapture" + code:code + userInfo:@{ NSLocalizedDescriptionKey: description ?: @"Audio capture failed." }]; +} + +static NSError *XCWAudioCaptureStatusError(NSInteger code, NSString *operation, OSStatus status) { + return XCWAudioCaptureError(code, [NSString stringWithFormat:@"%@ failed with OSStatus %@.", operation, XCWAudioOSStatusString(status)]); +} + +static int16_t XCWClampPCM16(double value) { + if (!isfinite(value)) { + return 0; + } + if (value <= -1.0) { + return INT16_MIN; + } + if (value >= 1.0) { + return INT16_MAX; + } + return (int16_t)lrint(value * 32767.0); +} + +static int16_t XCWReadPCM16Sample(const AudioBufferList *bufferList, + const AudioStreamBasicDescription *asbd, + NSUInteger frame, + NSUInteger channel) { + if (bufferList == NULL || asbd == NULL || bufferList->mNumberBuffers == 0) { + return 0; + } + + const UInt32 bitsPerChannel = asbd->mBitsPerChannel; + const NSUInteger bytesPerSample = MAX((NSUInteger)bitsPerChannel / 8, 1); + const BOOL nonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0; + const BOOL isFloat = (asbd->mFormatFlags & kAudioFormatFlagIsFloat) != 0; + const BOOL isSigned = (asbd->mFormatFlags & kAudioFormatFlagIsSignedInteger) != 0; + const BOOL isBigEndian = (asbd->mFormatFlags & kAudioFormatFlagIsBigEndian) != 0; + const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1); + const NSUInteger bufferIndex = nonInterleaved + ? MIN(channel, (NSUInteger)bufferList->mNumberBuffers - 1) + : 0; + const NSUInteger channelInBuffer = nonInterleaved ? 0 : MIN(channel, sourceChannels - 1); + const AudioBuffer buffer = bufferList->mBuffers[bufferIndex]; + if (buffer.mData == NULL || buffer.mDataByteSize == 0) { + return 0; + } + + const NSUInteger fallbackBytesPerFrame = bytesPerSample * (nonInterleaved ? 1 : sourceChannels); + const NSUInteger bytesPerFrame = MAX((NSUInteger)asbd->mBytesPerFrame, fallbackBytesPerFrame); + const NSUInteger offset = frame * bytesPerFrame + channelInBuffer * bytesPerSample; + if (offset + bytesPerSample > buffer.mDataByteSize) { + return 0; + } + + const uint8_t *sample = (const uint8_t *)buffer.mData + offset; + if (isFloat && bytesPerSample == sizeof(float)) { + float value = 0.0f; + memcpy(&value, sample, sizeof(value)); + return XCWClampPCM16((double)value); + } + if (isFloat && bytesPerSample == sizeof(double)) { + double value = 0.0; + memcpy(&value, sample, sizeof(value)); + return XCWClampPCM16(value); + } + if (bytesPerSample == sizeof(int16_t)) { + uint16_t raw = 0; + memcpy(&raw, sample, sizeof(raw)); + if (isBigEndian) { + raw = CFSwapInt16BigToHost(raw); + } + return (int16_t)raw; + } + if (bytesPerSample == sizeof(int32_t)) { + uint32_t raw = 0; + memcpy(&raw, sample, sizeof(raw)); + if (isBigEndian) { + raw = CFSwapInt32BigToHost(raw); + } + return (int16_t)(((int32_t)raw) >> 16); + } + if (bytesPerSample == sizeof(uint8_t)) { + if (isSigned) { + return (int16_t)(((int8_t)sample[0]) << 8); + } + return (int16_t)(((int)sample[0] - 128) << 8); + } + + return 0; +} + +static NSUInteger XCWAudioFrameCount(const AudioBufferList *bufferList, + const AudioStreamBasicDescription *asbd) { + if (bufferList == NULL || asbd == NULL || bufferList->mNumberBuffers == 0) { + return 0; + } + const AudioBuffer buffer = bufferList->mBuffers[0]; + if (buffer.mData == NULL || buffer.mDataByteSize == 0) { + return 0; + } + const NSUInteger bytesPerSample = MAX((NSUInteger)asbd->mBitsPerChannel / 8, 1); + const BOOL nonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0; + const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1); + const NSUInteger fallbackBytesPerFrame = bytesPerSample * (nonInterleaved ? 1 : sourceChannels); + const NSUInteger bytesPerFrame = MAX((NSUInteger)asbd->mBytesPerFrame, fallbackBytesPerFrame); + if (bytesPerFrame == 0) { + return 0; + } + return (NSUInteger)buffer.mDataByteSize / bytesPerFrame; +} + +static NSData *XCWPCM16InterleavedDataFromAudioBufferList(const AudioBufferList *bufferList, + const AudioStreamBasicDescription *asbd, + uint32_t *sampleRate, + uint16_t *channels) { + if (bufferList == NULL || asbd == NULL || asbd->mFormatID != kAudioFormatLinearPCM) { + return nil; + } + const NSUInteger frameCount = XCWAudioFrameCount(bufferList, asbd); + const NSUInteger sourceChannels = MAX((NSUInteger)asbd->mChannelsPerFrame, 1); + const NSUInteger outputChannels = MIN(sourceChannels, (NSUInteger)2); + if (frameCount == 0 || outputChannels == 0) { + return nil; + } + + NSMutableData *output = [NSMutableData dataWithLength:frameCount * outputChannels * sizeof(int16_t)]; + int16_t *outputSamples = (int16_t *)output.mutableBytes; + for (NSUInteger frame = 0; frame < frameCount; frame++) { + for (NSUInteger channel = 0; channel < outputChannels; channel++) { + outputSamples[frame * outputChannels + channel] = XCWReadPCM16Sample(bufferList, asbd, frame, channel); + } + } + + if (sampleRate != NULL) { + *sampleRate = (uint32_t)llround(asbd->mSampleRate > 0 ? asbd->mSampleRate : 48000.0); + } + if (channels != NULL) { + *channels = (uint16_t)outputChannels; + } + return output; +} + +static uint64_t XCWAudioTimestampUS(const AudioTimeStamp *timeStamp) { + if (timeStamp != NULL && (timeStamp->mFlags & kAudioTimeStampHostTimeValid) != 0 && timeStamp->mHostTime != 0) { + return AudioConvertHostTimeToNanos(timeStamp->mHostTime) / 1000; + } + return (uint64_t)llround([[NSDate date] timeIntervalSince1970] * 1000000.0); +} + +static AudioObjectID XCWAudioProcessObjectIDForPID(pid_t pid) { + if (pid <= 0) { + return kAudioObjectUnknown; + } + AudioObjectPropertyAddress address = { + .mSelector = kAudioHardwarePropertyTranslatePIDToProcessObject, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMain, + }; + AudioObjectID processObjectID = kAudioObjectUnknown; + UInt32 size = sizeof(processObjectID); + OSStatus status = AudioObjectGetPropertyData(kAudioObjectSystemObject, + &address, + sizeof(pid), + &pid, + &size, + &processObjectID); + if (status != noErr) { + return kAudioObjectUnknown; + } + return processObjectID; +} + +static NSArray *XCWAudioProcessObjectIDsForProcessIDs(const int32_t *processIDs, + size_t processCount) { + NSMutableSet *seen = [NSMutableSet set]; + NSMutableArray *objects = [NSMutableArray array]; + for (size_t index = 0; index < processCount; index++) { + pid_t pid = (pid_t)processIDs[index]; + if (pid <= 0) { + continue; + } + AudioObjectID objectID = XCWAudioProcessObjectIDForPID(pid); + if (objectID == kAudioObjectUnknown) { + continue; + } + NSNumber *boxed = @(objectID); + if ([seen containsObject:boxed]) { + continue; + } + [seen addObject:boxed]; + [objects addObject:boxed]; + } + [objects sortUsingSelector:@selector(compare:)]; + return objects; +} + +static CATapDescription *XCWAudioTapDescription(NSArray *processObjectIDs) API_AVAILABLE(macos(14.2)) { + CATapDescription *description = [[CATapDescription alloc] initStereoMixdownOfProcesses:processObjectIDs]; + description.name = @"SimDeck Simulator Audio"; + description.privateTap = YES; + description.muteBehavior = CATapMutedWhenTapped; + description.mixdown = YES; + description.mono = NO; + description.exclusive = NO; + return description; +} + +static NSString *XCWAudioTapUID(AudioObjectID tapID, NSError * _Nullable __autoreleasing *error) { + AudioObjectPropertyAddress address = { + .mSelector = kAudioTapPropertyUID, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMain, + }; + CFStringRef uid = NULL; + UInt32 size = sizeof(uid); + OSStatus status = AudioObjectGetPropertyData(tapID, &address, 0, NULL, &size, &uid); + if (status != noErr || uid == NULL) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(22, @"Read Core Audio tap UID", status); + } + return nil; + } + return CFBridgingRelease(uid); +} + +static BOOL XCWAudioGetObjectStreamFormat(AudioObjectID objectID, + AudioObjectPropertySelector selector, + AudioObjectPropertyScope scope, + AudioStreamBasicDescription *asbd) { + if (asbd == NULL || objectID == kAudioObjectUnknown) { + return NO; + } + AudioObjectPropertyAddress address = { + .mSelector = selector, + .mScope = scope, + .mElement = kAudioObjectPropertyElementMain, + }; + UInt32 size = sizeof(*asbd); + OSStatus status = AudioObjectGetPropertyData(objectID, &address, 0, NULL, &size, asbd); + return status == noErr && asbd->mSampleRate > 0 && asbd->mChannelsPerFrame > 0; +} + +@class XCWNativeAudioCapture; +static OSStatus XCWNativeAudioDeviceIOProc(AudioObjectID inDevice, + const AudioTimeStamp *inNow, + const AudioBufferList *inInputData, + const AudioTimeStamp *inInputTime, + AudioBufferList *outOutputData, + const AudioTimeStamp *inOutputTime, + void *inClientData); + +@interface XCWNativeAudioCapture : NSObject + +- (instancetype)initWithAudioCallback:(xcw_native_audio_callback)callback + userData:(void *)userData; +- (BOOL)startWithProcessIDs:(const int32_t *)processIDs + count:(size_t)processCount + error:(NSError * _Nullable __autoreleasing *)error; +- (BOOL)updateProcessIDs:(const int32_t *)processIDs + count:(size_t)processCount + error:(NSError * _Nullable __autoreleasing *)error; +- (void)invalidate; +- (void)handleInputData:(const AudioBufferList *)inputData + inputTime:(const AudioTimeStamp *)inputTime; + +@end + +@implementation XCWNativeAudioCapture { + xcw_native_audio_callback _callback; + void *_callbackUserData; + BOOL _invalidated; + AudioObjectID _tapID; + AudioObjectID _aggregateDeviceID; + AudioDeviceIOProcID _ioProcID; + AudioStreamBasicDescription _streamDescription; + NSArray *_processObjectIDs; +} + +- (instancetype)initWithAudioCallback:(xcw_native_audio_callback)callback + userData:(void *)userData { + self = [super init]; + if (self == nil) { + return nil; + } + _callback = callback; + _callbackUserData = userData; + _tapID = kAudioObjectUnknown; + _aggregateDeviceID = kAudioObjectUnknown; + _ioProcID = NULL; + _processObjectIDs = @[]; + return self; +} + +- (void)dealloc { + [self invalidate]; +} + +- (BOOL)startWithProcessIDs:(const int32_t *)processIDs + count:(size_t)processCount + error:(NSError * _Nullable __autoreleasing *)error { + return [self rebuildWithProcessIDs:processIDs count:processCount requireProcesses:YES error:error]; +} + +- (BOOL)updateProcessIDs:(const int32_t *)processIDs + count:(size_t)processCount + error:(NSError * _Nullable __autoreleasing *)error { + return [self rebuildWithProcessIDs:processIDs count:processCount requireProcesses:NO error:error]; +} + +- (BOOL)rebuildWithProcessIDs:(const int32_t *)processIDs + count:(size_t)processCount + requireProcesses:(BOOL)requireProcesses + error:(NSError * _Nullable __autoreleasing *)error { + if (@available(macOS 14.2, *)) { + NSArray *processObjectIDs = XCWAudioProcessObjectIDsForProcessIDs(processIDs, processCount); + if (processObjectIDs.count == 0) { + [self stopGraph]; + if (requireProcesses && error != NULL) { + *error = XCWAudioCaptureError(20, @"No simulator audio processes are currently connected to Core Audio."); + } + return !requireProcesses; + } + if (_aggregateDeviceID != kAudioObjectUnknown && [_processObjectIDs isEqualToArray:processObjectIDs]) { + return YES; + } + [self stopGraph]; + return [self startGraphWithProcessObjectIDs:processObjectIDs error:error]; + } + + if (error != NULL) { + *error = XCWAudioCaptureError(21, @"Per-simulator audio capture requires macOS 14.2 or newer."); + } + return NO; +} + +- (BOOL)startGraphWithProcessObjectIDs:(NSArray *)processObjectIDs + error:(NSError * _Nullable __autoreleasing *)error API_AVAILABLE(macos(14.2)) { + CATapDescription *tapDescription = XCWAudioTapDescription(processObjectIDs); + OSStatus status = AudioHardwareCreateProcessTap(tapDescription, &_tapID); + if (status != noErr || _tapID == kAudioObjectUnknown) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(23, @"Create Core Audio process tap", status); + } + _tapID = kAudioObjectUnknown; + return NO; + } + + NSError *tapUIDError = nil; + NSString *tapUID = XCWAudioTapUID(_tapID, &tapUIDError); + if (tapUID.length == 0) { + if (error != NULL) { + *error = tapUIDError ?: XCWAudioCaptureError(24, @"Core Audio process tap did not expose a UID."); + } + [self stopGraph]; + return NO; + } + + NSString *aggregateUID = [NSString stringWithFormat:@"dev.simdeck.audio.%@", NSUUID.UUID.UUIDString]; + NSDictionary *aggregateDescription = @{ + XCWAudioDictionaryKey(kAudioAggregateDeviceNameKey): @"SimDeck Simulator Audio", + XCWAudioDictionaryKey(kAudioAggregateDeviceUIDKey): aggregateUID, + XCWAudioDictionaryKey(kAudioAggregateDeviceIsPrivateKey): @YES, + XCWAudioDictionaryKey(kAudioAggregateDeviceTapListKey): @[ + @{ XCWAudioDictionaryKey(kAudioSubTapUIDKey): tapUID } + ], + }; + status = AudioHardwareCreateAggregateDevice((__bridge CFDictionaryRef)aggregateDescription, &_aggregateDeviceID); + if (status != noErr || _aggregateDeviceID == kAudioObjectUnknown) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(25, @"Create Core Audio aggregate device", status); + } + [self stopGraph]; + return NO; + } + + CFArrayRef tapList = (__bridge CFArrayRef)@[ tapUID ]; + AudioObjectPropertyAddress tapListAddress = { + .mSelector = kAudioAggregateDevicePropertyTapList, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMain, + }; + status = AudioObjectSetPropertyData(_aggregateDeviceID, + &tapListAddress, + 0, + NULL, + sizeof(tapList), + &tapList); + if (status != noErr) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(26, @"Attach Core Audio tap to aggregate device", status); + } + [self stopGraph]; + return NO; + } + + memset(&_streamDescription, 0, sizeof(_streamDescription)); + if (!XCWAudioGetObjectStreamFormat(_aggregateDeviceID, kAudioDevicePropertyStreamFormat, kAudioObjectPropertyScopeInput, &_streamDescription) && + !XCWAudioGetObjectStreamFormat(_tapID, kAudioTapPropertyFormat, kAudioObjectPropertyScopeGlobal, &_streamDescription)) { + if (error != NULL) { + *error = XCWAudioCaptureError(27, @"Core Audio tap did not expose a readable linear PCM format."); + } + [self stopGraph]; + return NO; + } + + status = AudioDeviceCreateIOProcID(_aggregateDeviceID, + XCWNativeAudioDeviceIOProc, + (__bridge void *)self, + &_ioProcID); + if (status != noErr || _ioProcID == NULL) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(28, @"Create Core Audio tap IOProc", status); + } + [self stopGraph]; + return NO; + } + + status = AudioDeviceStart(_aggregateDeviceID, _ioProcID); + if (status != noErr) { + if (error != NULL) { + *error = XCWAudioCaptureStatusError(29, @"Start Core Audio tap device", status); + } + [self stopGraph]; + return NO; + } + + _processObjectIDs = [processObjectIDs copy]; + return YES; +} + +- (void)stopGraph { + if (_aggregateDeviceID != kAudioObjectUnknown && _ioProcID != NULL) { + AudioDeviceStop(_aggregateDeviceID, _ioProcID); + AudioDeviceDestroyIOProcID(_aggregateDeviceID, _ioProcID); + _ioProcID = NULL; + } + if (_aggregateDeviceID != kAudioObjectUnknown) { + AudioHardwareDestroyAggregateDevice(_aggregateDeviceID); + _aggregateDeviceID = kAudioObjectUnknown; + } + if (_tapID != kAudioObjectUnknown) { + AudioHardwareDestroyProcessTap(_tapID); + _tapID = kAudioObjectUnknown; + } + _processObjectIDs = @[]; + memset(&_streamDescription, 0, sizeof(_streamDescription)); +} + +- (void)invalidate { + _invalidated = YES; + [self stopGraph]; +} + +- (void)handleInputData:(const AudioBufferList *)inputData + inputTime:(const AudioTimeStamp *)inputTime { + if (_invalidated || _callback == NULL || inputData == NULL) { + return; + } + + AudioStreamBasicDescription streamDescription = _streamDescription; + uint32_t sampleRate = 0; + uint16_t channels = 0; + NSData *pcm = XCWPCM16InterleavedDataFromAudioBufferList(inputData, &streamDescription, &sampleRate, &channels); + if (pcm.length == 0 || sampleRate == 0 || channels == 0) { + return; + } + + xcw_native_audio_sample sample = { + .timestamp_us = XCWAudioTimestampUS(inputTime), + .sample_rate = sampleRate, + .channels = channels, + .data = XCWSharedBytesFromData(pcm), + }; + _callback(&sample, _callbackUserData); +} + +@end + +static OSStatus XCWNativeAudioDeviceIOProc(AudioObjectID inDevice, + const AudioTimeStamp *inNow, + const AudioBufferList *inInputData, + const AudioTimeStamp *inInputTime, + AudioBufferList *outOutputData, + const AudioTimeStamp *inOutputTime, + void *inClientData) { + (void)inDevice; + (void)inNow; + (void)outOutputData; + (void)inOutputTime; + @autoreleasepool { + XCWNativeAudioCapture *capture = (__bridge XCWNativeAudioCapture *)inClientData; + [capture handleInputData:inInputData inputTime:inInputTime]; + } + return noErr; +} + static XCWNativeH264Encoder *XCWNativeH264EncoderFromHandle(void *handle) { return (__bridge XCWNativeH264Encoder *)handle; } @@ -1367,6 +1891,46 @@ void xcw_native_h264_encoder_request_keyframe(void *handle) { } } +void *xcw_native_audio_capture_create(const int32_t *process_ids, size_t process_count, xcw_native_audio_callback callback, void *user_data, char **error_message) { + @autoreleasepool { + XCWNativeAudioCapture *capture = [[XCWNativeAudioCapture alloc] initWithAudioCallback:callback + userData:user_data]; + NSError *error = nil; + BOOL ok = [capture startWithProcessIDs:process_ids count:process_count error:&error]; + if (!ok) { + XCWSetErrorMessage(error_message, error); + return NULL; + } + return (__bridge_retained void *)capture; + } +} + +bool xcw_native_audio_capture_update_processes(void *handle, const int32_t *process_ids, size_t process_count, char **error_message) { + if (handle == NULL) { + XCWSetErrorMessage(error_message, XCWAudioCaptureError(30, @"Audio capture handle is null.")); + return false; + } + @autoreleasepool { + XCWNativeAudioCapture *capture = (__bridge XCWNativeAudioCapture *)handle; + NSError *error = nil; + BOOL ok = [capture updateProcessIDs:process_ids count:process_count error:&error]; + if (!ok) { + XCWSetErrorMessage(error_message, error); + } + return ok; + } +} + +void xcw_native_audio_capture_destroy(void *handle) { + if (handle == NULL) { + return; + } + @autoreleasepool { + XCWNativeAudioCapture *capture = CFBridgingRelease(handle); + [capture invalidate]; + } +} + void xcw_native_free_string(char *value) { if (value != NULL) { free(value); diff --git a/packages/server/native_stubs.c b/packages/server/native_stubs.c index 861881a6..da28535d 100644 --- a/packages/server/native_stubs.c +++ b/packages/server/native_stubs.c @@ -26,8 +26,17 @@ typedef struct { xcw_native_shared_bytes data; } xcw_native_frame; +typedef struct { + uint64_t timestamp_us; + uint32_t sample_rate; + uint16_t channels; + xcw_native_shared_bytes data; +} xcw_native_audio_sample; + typedef void (*xcw_native_frame_callback)(const xcw_native_frame *frame, void *user_data); +typedef void (*xcw_native_audio_callback)(const xcw_native_audio_sample *sample, + void *user_data); static char *xcw_strdup(const char *value) { if (value == NULL) { @@ -577,6 +586,34 @@ bool xcw_native_h264_encoder_encode_rgba(void *handle, const uint8_t *rgba, void xcw_native_h264_encoder_request_keyframe(void *handle) { (void)handle; } +void *xcw_native_audio_capture_create(const int32_t *process_ids, + uintptr_t process_count, + xcw_native_audio_callback callback, + void *user_data, + char **error_message) { + (void)process_ids; + (void)process_count; + (void)callback; + (void)user_data; + xcw_set_error(error_message, + "Audio capture is only available in the macOS native bridge."); + return NULL; +} + +bool xcw_native_audio_capture_update_processes(void *handle, + const int32_t *process_ids, + uintptr_t process_count, + char **error_message) { + (void)handle; + (void)process_ids; + (void)process_count; + xcw_set_error(error_message, + "Audio capture is only available in the macOS native bridge."); + return false; +} + +void xcw_native_audio_capture_destroy(void *handle) { (void)handle; } + void xcw_native_free_string(char *value) { free(value); } void xcw_native_free_bytes(xcw_native_owned_bytes bytes) { free(bytes.data); } diff --git a/packages/server/src/android.rs b/packages/server/src/android.rs index 0688bf43..1f9a667d 100644 --- a/packages/server/src/android.rs +++ b/packages/server/src/android.rs @@ -324,7 +324,6 @@ impl AndroidBridge { "-avd", &avd_name, "-no-window", - "-no-audio", "-gpu", "swiftshader_indirect", "-grpc", diff --git a/packages/server/src/native/ffi.rs b/packages/server/src/native/ffi.rs index 8a8a47b5..043df61c 100644 --- a/packages/server/src/native/ffi.rs +++ b/packages/server/src/native/ffi.rs @@ -26,10 +26,22 @@ pub struct xcw_native_frame { pub data: xcw_native_shared_bytes, } +#[repr(C)] +pub struct xcw_native_audio_sample { + pub timestamp_us: u64, + pub sample_rate: u32, + pub channels: u16, + pub data: xcw_native_shared_bytes, +} + #[allow(non_camel_case_types)] pub type xcw_native_frame_callback = unsafe extern "C" fn(frame: *const xcw_native_frame, user_data: *mut c_void); +#[allow(non_camel_case_types)] +pub type xcw_native_audio_callback = + unsafe extern "C" fn(sample: *const xcw_native_audio_sample, user_data: *mut c_void); + unsafe extern "C" { pub fn simdeck_camera_list_webcams_json(error_message: *mut *mut c_char) -> *mut c_char; pub fn simdeck_camera_start( @@ -351,6 +363,21 @@ unsafe extern "C" { ) -> bool; pub fn xcw_native_h264_encoder_request_keyframe(handle: *mut c_void); + pub fn xcw_native_audio_capture_create( + process_ids: *const i32, + process_count: usize, + callback: Option, + user_data: *mut c_void, + error_message: *mut *mut c_char, + ) -> *mut c_void; + pub fn xcw_native_audio_capture_update_processes( + handle: *mut c_void, + process_ids: *const i32, + process_count: usize, + error_message: *mut *mut c_char, + ) -> bool; + pub fn xcw_native_audio_capture_destroy(handle: *mut c_void); + pub fn xcw_native_free_string(value: *mut c_char); pub fn xcw_native_free_bytes(bytes: xcw_native_owned_bytes); pub fn xcw_native_release_shared_bytes(bytes: xcw_native_shared_bytes); diff --git a/packages/server/src/transport/webrtc.rs b/packages/server/src/transport/webrtc.rs index 2391ceda..88ac8fd7 100644 --- a/packages/server/src/transport/webrtc.rs +++ b/packages/server/src/transport/webrtc.rs @@ -11,8 +11,9 @@ use crate::native::ffi; use crate::transport::packet::{FramePacket, SharedFrame}; use bytes::{BufMut, Bytes, BytesMut}; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, VecDeque}; +use std::collections::{BTreeSet, HashMap, VecDeque}; use std::ffi::{c_void, CStr}; +use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak}; use std::time::Duration; @@ -21,7 +22,7 @@ use tokio::task; use tokio::time::{self, Instant}; use tracing::{info, warn}; use webrtc::api::interceptor_registry::register_default_interceptors; -use webrtc::api::media_engine::{MediaEngine, MIME_TYPE_H264}; +use webrtc::api::media_engine::{MediaEngine, MIME_TYPE_H264, MIME_TYPE_PCMU}; use webrtc::api::APIBuilder; use webrtc::data_channel::data_channel_init::RTCDataChannelInit; use webrtc::data_channel::data_channel_message::DataChannelMessage; @@ -29,6 +30,7 @@ use webrtc::data_channel::data_channel_state::RTCDataChannelState; use webrtc::data_channel::RTCDataChannel; use webrtc::ice_transport::ice_server::RTCIceServer; use webrtc::interceptor::registry::Registry; +use webrtc::media::Sample as WebRtcSample; use webrtc::peer_connection::configuration::RTCConfiguration; use webrtc::peer_connection::peer_connection_state::RTCPeerConnectionState; use webrtc::peer_connection::policy::ice_transport_policy::RTCIceTransportPolicy; @@ -44,6 +46,7 @@ use webrtc::rtp_transceiver::rtp_codec::{ }; use webrtc::rtp_transceiver::RTCPFeedback; use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP; +use webrtc::track::track_local::track_local_static_sample::TrackLocalStaticSample; use webrtc::track::track_local::TrackLocal; use webrtc::track::track_local::TrackLocalWriter; @@ -57,6 +60,7 @@ const WEBRTC_MAX_LOCAL_STREAM_FPS: u32 = 240; const WEBRTC_WRITE_TIMEOUT: Duration = Duration::from_millis(120); const WEBRTC_REALTIME_WRITE_TIMEOUT: Duration = Duration::from_millis(45); const WEBRTC_REALTIME_KEYFRAME_WRITE_TIMEOUT: Duration = Duration::from_millis(90); +const WEBRTC_AUDIO_WRITE_TIMEOUT: Duration = Duration::from_millis(120); const WEBRTC_INITIAL_KEYFRAME_TIMEOUT: Duration = Duration::from_secs(5); const WEBRTC_FAST_ICE_GATHER_TIMEOUT: Duration = Duration::from_millis(250); const WEBRTC_FULL_ICE_GATHER_TIMEOUT: Duration = Duration::from_secs(3); @@ -72,6 +76,11 @@ const ANDROID_WEBRTC_RGBA_VERSION: u8 = 1; const ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888: u8 = 1; const ANDROID_WEBRTC_RGBA_BUFFERED_FRAME_LIMIT: usize = 2; const ANDROID_WEBRTC_FPS: u64 = 30; +const WEBRTC_AUDIO_PROCESS_REFRESH_INTERVAL: Duration = Duration::from_secs(1); +const WEBRTC_AUDIO_SAMPLE_RATE: u32 = 8_000; +const WEBRTC_AUDIO_CHANNELS: u16 = 1; +const WEBRTC_AUDIO_FRAME_SAMPLES: usize = 160; +const WEBRTC_AUDIO_FRAME_DURATION: Duration = Duration::from_millis(20); static WEBRTC_MEDIA_STREAMS: OnceLock>>> = OnceLock::new(); const MAX_WEBRTC_MEDIA_STREAMS_PER_UDID: usize = 16; @@ -100,9 +109,19 @@ pub struct WebRtcAnswerPayload { pub sdp: String, #[serde(rename = "type")] pub kind: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio: Option, pub video: WebRtcVideoMetadata, } +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct WebRtcAudioMetadata { + pub channels: u16, + pub codec: String, + pub sample_rate: u32, +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct WebRtcVideoMetadata { @@ -206,6 +225,7 @@ pub async fn create_answer( "WebRTC preview requires H.264. Restart SimDeck with `--video-codec auto`, `hardware`, or `software`.", )); } + let wants_audio = sdp_has_media_type(&payload.sdp, "audio"); let h264_fmtp_line = h264_sdp_fmtp_line(&codec, &payload.sdp); let mut media_engine = MediaEngine::default(); @@ -225,6 +245,9 @@ pub async fn create_answer( RTPCodecType::Video, ) .map_err(|error| AppError::internal(format!("register WebRTC H.264 codec: {error}")))?; + if wants_audio { + register_pcmu_audio_codec(&mut media_engine)?; + } let mut registry = Registry::new(); registry = register_default_interceptors(registry, &mut media_engine) .map_err(|error| AppError::internal(format!("register WebRTC interceptors: {error}")))?; @@ -277,6 +300,21 @@ pub async fn create_answer( .add_track(video_track.clone() as Arc) .await .map_err(|error| AppError::internal(format!("add WebRTC video track: {error}")))?; + let audio_track = if wants_audio { + let track = Arc::new(TrackLocalStaticSample::new( + pcmu_audio_codec_capability(), + "simdeck-audio".to_owned(), + "simdeck".to_owned(), + )); + let audio_sender = peer_connection + .add_track(track.clone() as Arc) + .await + .map_err(|error| AppError::internal(format!("add WebRTC audio track: {error}")))?; + tokio::spawn(async move { while audio_sender.read_rtcp().await.is_ok() {} }); + Some(track) + } else { + None + }; let rtcp_source = source.clone(); let rtcp_udid = udid.clone(); tokio::spawn(async move { @@ -346,6 +384,7 @@ pub async fn create_answer( first_frame, peer_connection, video_track, + audio_track, cancellation_token, cancellation, stream_control_rx, @@ -356,6 +395,11 @@ pub async fn create_answer( Ok(WebRtcAnswerPayload { sdp: local_description.sdp, kind: "answer".to_owned(), + audio: wants_audio.then(|| WebRtcAudioMetadata { + channels: WEBRTC_AUDIO_CHANNELS, + codec: "PCMU".to_owned(), + sample_rate: WEBRTC_AUDIO_SAMPLE_RATE, + }), video: WebRtcVideoMetadata { width: first_frame_width, height: first_frame_height, @@ -368,6 +412,7 @@ async fn create_android_rgba_answer( udid: String, payload: WebRtcOfferPayload, ) -> Result { + let wants_audio = sdp_has_media_type(&payload.sdp, "audio"); let source = AndroidWebRtcSource::start( state.android.clone(), state.metrics.clone(), @@ -387,7 +432,22 @@ async fn create_android_rgba_answer( ice_transport_policy_label() ); - let api = APIBuilder::new().build(); + let api = if wants_audio { + let mut media_engine = MediaEngine::default(); + register_pcmu_audio_codec(&mut media_engine)?; + let mut registry = Registry::new(); + registry = register_default_interceptors(registry, &mut media_engine).map_err(|error| { + AppError::internal(format!( + "register Android RGBA WebRTC interceptors: {error}" + )) + })?; + APIBuilder::new() + .with_media_engine(media_engine) + .with_interceptor_registry(registry) + .build() + } else { + APIBuilder::new().build() + }; let peer_connection = Arc::new( api.new_peer_connection(RTCConfiguration { ice_servers: ice_servers(), @@ -417,6 +477,23 @@ async fn create_android_rgba_answer( ) .await .map_err(|error| AppError::internal(format!("create RGBA WebRTC data channel: {error}")))?; + let audio_track = if wants_audio { + let track = Arc::new(TrackLocalStaticSample::new( + pcmu_audio_codec_capability(), + "simdeck-audio".to_owned(), + "simdeck".to_owned(), + )); + let audio_sender = peer_connection + .add_track(track.clone() as Arc) + .await + .map_err(|error| { + AppError::internal(format!("add Android RGBA WebRTC audio track: {error}")) + })?; + tokio::spawn(async move { while audio_sender.read_rtcp().await.is_ok() {} }); + Some(track) + } else { + None + }; let fast_gather = has_sdp_candidate_type(&payload.sdp, "host") && ice_transport_policy_label() == "all"; @@ -467,6 +544,7 @@ async fn create_android_rgba_answer( source, peer_connection, rgba_channel, + audio_track, cancellation_token, cancellation, stream_control_rx, @@ -477,6 +555,11 @@ async fn create_android_rgba_answer( Ok(WebRtcAnswerPayload { sdp: local_description.sdp, kind: "answer".to_owned(), + audio: wants_audio.then(|| WebRtcAudioMetadata { + channels: WEBRTC_AUDIO_CHANNELS, + codec: "PCMU".to_owned(), + sample_rate: WEBRTC_AUDIO_SAMPLE_RATE, + }), video: WebRtcVideoMetadata { width: 0, height: 0, @@ -582,6 +665,13 @@ fn summarize_sdp_candidate_types(sdp: &str) -> String { format!("host={host},srflx={srflx},prflx={prflx},relay={relay},other={other}") } +fn sdp_has_media_type(sdp: &str, media_type: &str) -> bool { + let prefix = format!("m={media_type} "); + sdp.lines() + .map(str::trim_start) + .any(|line| line.starts_with(&prefix)) +} + fn redact_candidate_address(address: &str) -> String { if address.is_empty() { return String::new(); @@ -1259,6 +1349,29 @@ fn h264_rtcp_feedback() -> Vec { ] } +fn pcmu_audio_codec_capability() -> RTCRtpCodecCapability { + RTCRtpCodecCapability { + mime_type: MIME_TYPE_PCMU.to_owned(), + clock_rate: WEBRTC_AUDIO_SAMPLE_RATE, + channels: WEBRTC_AUDIO_CHANNELS, + sdp_fmtp_line: String::new(), + rtcp_feedback: Vec::new(), + } +} + +fn register_pcmu_audio_codec(media_engine: &mut MediaEngine) -> Result<(), AppError> { + media_engine + .register_codec( + RTCRtpCodecParameters { + capability: pcmu_audio_codec_capability(), + payload_type: 0, + ..Default::default() + }, + RTPCodecType::Audio, + ) + .map_err(|error| AppError::internal(format!("register WebRTC PCMU codec: {error}"))) +} + fn rtcp_packet_requests_keyframe(packet: &(dyn RtcpPacket + Send + Sync)) -> bool { packet.as_any().is::() || packet.as_any().is::() } @@ -1425,6 +1538,503 @@ fn ice_transport_policy() -> RTCIceTransportPolicy { } } +#[derive(Clone)] +struct SimulatorAudioCapture { + inner: Arc, +} + +struct SimulatorAudioCaptureInner { + handle: AtomicUsize, + callback_user_data: AtomicUsize, + sender: mpsc::UnboundedSender, +} + +#[derive(Debug)] +struct AudioPcmSample { + sample_rate: u32, + channels: u16, + data: Bytes, +} + +type SharedAudioPcmSample = Arc; + +impl SimulatorAudioCapture { + fn start( + process_ids: &[i32], + sender: mpsc::UnboundedSender, + ) -> Result { + if process_ids.is_empty() { + return Err(AppError::native( + "No simulator audio process IDs were available.", + )); + } + let inner = Arc::new(SimulatorAudioCaptureInner { + handle: AtomicUsize::new(0), + callback_user_data: AtomicUsize::new(0), + sender, + }); + let user_data = Weak::into_raw(Arc::downgrade(&inner)) as *mut c_void; + let mut error = std::ptr::null_mut(); + let handle = unsafe { + ffi::xcw_native_audio_capture_create( + process_ids.as_ptr(), + process_ids.len(), + Some(host_audio_capture_callback), + user_data, + &mut error, + ) + }; + if handle.is_null() { + unsafe { + let _ = Weak::from_raw(user_data as *const SimulatorAudioCaptureInner); + } + return Err(unsafe { take_native_error(error) } + .unwrap_or_else(|| AppError::native("Unable to start simulator audio capture."))); + } + inner.handle.store(handle as usize, Ordering::Release); + inner + .callback_user_data + .store(user_data as usize, Ordering::Release); + Ok(Self { inner }) + } + + fn update_processes(&self, process_ids: &[i32]) -> Result<(), AppError> { + if process_ids.is_empty() { + return Ok(()); + } + let handle = self.inner.handle.load(Ordering::Acquire); + if handle == 0 { + return Err(AppError::native( + "Simulator audio capture handle was already closed.", + )); + } + let mut error = std::ptr::null_mut(); + let ok = unsafe { + ffi::xcw_native_audio_capture_update_processes( + handle as *mut c_void, + process_ids.as_ptr(), + process_ids.len(), + &mut error, + ) + }; + if ok { + Ok(()) + } else { + Err(unsafe { take_native_error(error) }.unwrap_or_else(|| { + AppError::native("Unable to update simulator audio capture processes.") + })) + } + } +} + +impl Drop for SimulatorAudioCaptureInner { + fn drop(&mut self) { + let handle = self.handle.load(Ordering::Acquire); + let callback_user_data = self.callback_user_data.load(Ordering::Acquire); + unsafe { + if handle != 0 { + ffi::xcw_native_audio_capture_destroy(handle as *mut c_void); + } + if callback_user_data != 0 { + let _ = Weak::from_raw(callback_user_data as *const SimulatorAudioCaptureInner); + } + } + } +} + +unsafe extern "C" fn host_audio_capture_callback( + sample: *const ffi::xcw_native_audio_sample, + user_data: *mut c_void, +) { + if sample.is_null() || user_data.is_null() { + return; + } + + let weak = unsafe { Weak::from_raw(user_data as *const SimulatorAudioCaptureInner) }; + if let Some(inner) = weak.upgrade() { + unsafe { + inner.handle_audio_sample(&*sample); + } + } + let _ = Weak::into_raw(weak); +} + +impl SimulatorAudioCaptureInner { + unsafe fn handle_audio_sample(&self, sample: &ffi::xcw_native_audio_sample) { + if sample.sample_rate == 0 || sample.channels == 0 { + unsafe { + ffi::xcw_native_release_shared_bytes(sample.data); + } + return; + } + let Some(data) = (unsafe { copy_native_shared_bytes(sample.data) }) else { + return; + }; + if data.is_empty() { + return; + } + let packet = Arc::new(AudioPcmSample { + sample_rate: sample.sample_rate, + channels: sample.channels, + data, + }); + let _ = self.sender.send(packet); + } +} + +fn spawn_simulator_audio_stream( + state: AppState, + udid: String, + audio_track: Arc, + mut cancellation: broadcast::Receiver<()>, +) { + tokio::spawn(async move { + let (sample_tx, mut sample_rx) = mpsc::unbounded_channel(); + let mut capture: Option = None; + let mut refresh = time::interval(WEBRTC_AUDIO_PROCESS_REFRESH_INTERVAL); + let mut packetizer = PcmuAudioPacketizer::new(); + loop { + tokio::select! { + _ = cancellation.recv() => break, + _ = refresh.tick() => { + let process_ids = match resolve_simulator_audio_process_ids(state.clone(), &udid).await { + Ok(process_ids) => process_ids, + Err(error) => { + warn!("WebRTC audio process discovery failed for {udid}: {error}"); + continue; + } + }; + if process_ids.is_empty() { + if capture.take().is_some() { + packetizer.reset(); + } + continue; + } + if let Some(active_capture) = capture.as_ref().cloned() { + let update_process_ids = process_ids.clone(); + let update_result = task::spawn_blocking(move || { + active_capture.update_processes(&update_process_ids) + }).await; + let update_result = match update_result { + Ok(result) => result, + Err(error) => Err(AppError::internal(format!( + "Failed to join audio capture update task: {error}" + ))), + }; + if let Err(error) = update_result { + warn!("WebRTC audio capture update failed for {udid}: {error}"); + capture = None; + packetizer.reset(); + } + continue; + } + let tx = sample_tx.clone(); + match task::spawn_blocking(move || SimulatorAudioCapture::start(&process_ids, tx)).await { + Ok(Ok(new_capture)) => { + capture = Some(new_capture); + packetizer.reset(); + } + Ok(Err(error)) => { + warn!("WebRTC audio capture unavailable for {udid}: {error}"); + } + Err(error) => { + warn!("WebRTC audio capture task failed for {udid}: {error}"); + } + } + } + sample = sample_rx.recv() => { + let Some(sample) = sample else { + break; + }; + for packet in packetizer.push(&sample) { + let sample = WebRtcSample { + data: packet, + duration: WEBRTC_AUDIO_FRAME_DURATION, + ..Default::default() + }; + match time::timeout(WEBRTC_AUDIO_WRITE_TIMEOUT, audio_track.write_sample(&sample)).await { + Ok(Ok(())) => {} + Ok(Err(error)) => { + warn!("WebRTC audio write failed for {udid}: {error}"); + return; + } + Err(_) => { + packetizer.reset(); + } + } + } + } + } + } + }); +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct HostAudioProcess { + pid: i32, + parent_pid: i32, + command: String, +} + +async fn resolve_simulator_audio_process_ids( + state: AppState, + udid: &str, +) -> Result, AppError> { + let udid = udid.to_owned(); + task::spawn_blocking(move || simulator_audio_process_ids_blocking(&state, &udid)) + .await + .map_err(|error| { + AppError::internal(format!( + "Failed to join audio process discovery task: {error}" + )) + })? +} + +fn simulator_audio_process_ids_blocking( + state: &AppState, + udid: &str, +) -> Result, AppError> { + let processes = list_host_audio_processes()?; + let root_processes = if android::is_android_id(udid) { + android_audio_root_process_ids(udid, &processes)? + } else { + let bridge = state.registry.bridge().clone(); + let simulator = bridge + .simulator(udid)? + .ok_or_else(|| AppError::not_found(format!("Unknown simulator `{udid}`.")))?; + let data_path = simulator + .data_path + .as_str() + .filter(|value| !value.trim().is_empty()) + .map(ToOwned::to_owned); + ios_simulator_audio_root_process_ids(udid, data_path.as_deref(), &processes) + }; + Ok(process_tree_process_ids(&processes, root_processes)) +} + +fn list_host_audio_processes() -> Result, AppError> { + let output = Command::new("ps") + .args(["-axo", "pid=,ppid=,command="]) + .output() + .map_err(|error| AppError::native(format!("Unable to list host processes: {error}")))?; + if !output.status.success() { + return Err(AppError::native("Unable to list host processes.")); + } + Ok(String::from_utf8_lossy(&output.stdout) + .lines() + .filter_map(parse_host_audio_process_line) + .collect()) +} + +fn parse_host_audio_process_line(line: &str) -> Option { + let trimmed = line.trim(); + if trimmed.is_empty() { + return None; + } + let mut parts = trimmed.split_whitespace(); + let pid = parts.next()?.parse::().ok()?; + let parent_pid = parts.next()?.parse::().ok()?; + let command = parts.collect::>().join(" "); + if command.is_empty() { + return None; + } + Some(HostAudioProcess { + pid, + parent_pid, + command, + }) +} + +fn ios_simulator_audio_root_process_ids( + udid: &str, + data_path: Option<&str>, + processes: &[HostAudioProcess], +) -> BTreeSet { + let device_path = data_path + .and_then(|path| path.strip_suffix("/data")) + .filter(|path| !path.is_empty()); + processes + .iter() + .filter(|process| { + !is_simulator_audio_probe_process(&process.command) + && (process.command.contains(udid) + || data_path.is_some_and(|path| process.command.contains(path)) + || device_path.is_some_and(|path| process.command.contains(path))) + }) + .map(|process| process.pid) + .collect() +} + +fn android_audio_root_process_ids( + udid: &str, + processes: &[HostAudioProcess], +) -> Result, AppError> { + let avd_name = android::avd_from_id(udid)?; + let avd_arg = format!("-avd {avd_name}"); + let avd_at_arg = format!("@{avd_name}"); + let avd_dir = format!(".android/avd/{avd_name}.avd"); + Ok(processes + .iter() + .filter(|process| { + let command = process.command.as_str(); + !is_simulator_audio_probe_process(command) + && (command.contains(&avd_arg) + || command.contains(&avd_at_arg) + || command.contains(&avd_dir)) + }) + .map(|process| process.pid) + .collect()) +} + +fn process_tree_process_ids(processes: &[HostAudioProcess], roots: BTreeSet) -> Vec { + let mut by_parent: HashMap> = HashMap::new(); + for process in processes { + by_parent + .entry(process.parent_pid) + .or_default() + .push(process.pid); + } + + let mut selected = roots; + let mut stack = selected.iter().copied().collect::>(); + while let Some(parent_pid) = stack.pop() { + if let Some(children) = by_parent.get(&parent_pid) { + for child_pid in children { + if selected.insert(*child_pid) { + stack.push(*child_pid); + } + } + } + } + selected.into_iter().collect() +} + +fn is_simulator_audio_probe_process(command: &str) -> bool { + let executable = command + .split_whitespace() + .next() + .and_then(|value| value.rsplit('/').next()) + .unwrap_or_default(); + executable == "simctl" + || executable == "xcrun" && command.contains(" simctl ") + || executable == "ps" +} + +struct PcmuAudioPacketizer { + sample_rate: u32, + channels: u16, + source_position: f64, + mono_samples: Vec, + encoded_samples: Vec, +} + +impl PcmuAudioPacketizer { + fn new() -> Self { + Self { + sample_rate: 0, + channels: 0, + source_position: 0.0, + mono_samples: Vec::new(), + encoded_samples: Vec::new(), + } + } + + fn reset(&mut self) { + self.sample_rate = 0; + self.channels = 0; + self.source_position = 0.0; + self.mono_samples.clear(); + self.encoded_samples.clear(); + } + + fn push(&mut self, sample: &AudioPcmSample) -> Vec { + if sample.sample_rate == 0 || sample.channels == 0 { + return Vec::new(); + } + if self.sample_rate != sample.sample_rate || self.channels != sample.channels { + self.reset(); + self.sample_rate = sample.sample_rate; + self.channels = sample.channels; + } + + self.append_mono_samples(sample); + self.encode_available_samples(); + self.drain_audio_packets() + } + + fn append_mono_samples(&mut self, sample: &AudioPcmSample) { + let channels = usize::from(sample.channels); + let bytes_per_frame = channels * 2; + if bytes_per_frame == 0 { + return; + } + for frame in sample.data.chunks_exact(bytes_per_frame) { + let mut sum = 0i32; + for channel in 0..channels { + let offset = channel * 2; + sum += i16::from_le_bytes([frame[offset], frame[offset + 1]]) as i32; + } + self.mono_samples.push((sum / channels as i32) as i16); + } + } + + fn encode_available_samples(&mut self) { + if self.sample_rate == 0 || self.mono_samples.is_empty() { + return; + } + let step = f64::from(self.sample_rate) / f64::from(WEBRTC_AUDIO_SAMPLE_RATE); + let len = self.mono_samples.len() as f64; + while self.source_position < len { + let index = self.source_position.floor() as usize; + let Some(sample) = self.mono_samples.get(index).copied() else { + break; + }; + self.encoded_samples.push(linear_pcm_to_mulaw(sample)); + self.source_position += step; + } + + let consumed = (self.source_position.floor() as usize).min(self.mono_samples.len()); + if consumed > 0 { + self.mono_samples.drain(0..consumed); + self.source_position -= consumed as f64; + } + } + + fn drain_audio_packets(&mut self) -> Vec { + let mut packets = Vec::new(); + while self.encoded_samples.len() >= WEBRTC_AUDIO_FRAME_SAMPLES { + let packet = + Bytes::copy_from_slice(&self.encoded_samples[..WEBRTC_AUDIO_FRAME_SAMPLES]); + self.encoded_samples.drain(0..WEBRTC_AUDIO_FRAME_SAMPLES); + packets.push(packet); + } + packets + } +} + +fn linear_pcm_to_mulaw(sample: i16) -> u8 { + const BIAS: i32 = 0x84; + const CLIP: i32 = 32635; + + let mut pcm = i32::from(sample); + let sign = if pcm < 0 { + pcm = -pcm; + 0x80 + } else { + 0x00 + }; + pcm = pcm.min(CLIP) + BIAS; + + let mut exponent = 7; + let mut mask = 0x4000; + while exponent > 0 && (pcm & mask) == 0 { + exponent -= 1; + mask >>= 1; + } + let mantissa = (pcm >> (exponent + 3)) & 0x0f; + (!(sign | (exponent << 4) | mantissa) & 0xff) as u8 +} + #[derive(Clone)] pub(crate) struct AndroidWebRtcSource { inner: Arc, @@ -1904,6 +2514,7 @@ struct WebRtcMediaStream { first_frame: SharedFrame, peer_connection: Arc, video_track: Arc, + audio_track: Option>, cancellation_token: broadcast::Sender<()>, cancellation: broadcast::Receiver<()>, stream_control_rx: mpsc::UnboundedReceiver, @@ -1915,6 +2526,7 @@ struct WebRtcRgbaStream { udid: String, peer_connection: Arc, rgba_channel: Arc, + audio_track: Option>, cancellation_token: broadcast::Sender<()>, cancellation: broadcast::Receiver<()>, stream_control_rx: mpsc::UnboundedReceiver, @@ -1928,6 +2540,7 @@ impl WebRtcRgbaStream { udid, peer_connection, rgba_channel, + audio_track, cancellation_token, mut cancellation, mut stream_control_rx, @@ -1937,6 +2550,14 @@ impl WebRtcRgbaStream { let mut peer_disconnected_since: Option = None; let mut sequence = 0u64; let _guard = WebRtcMetricsGuard::new(state.metrics.clone()); + if let Some(audio_track) = audio_track { + spawn_simulator_audio_stream( + state.clone(), + udid.clone(), + audio_track, + cancellation_token.subscribe(), + ); + } rgba_channel.on_open(Box::new({ let udid = udid.clone(); move || { @@ -2048,6 +2669,7 @@ impl WebRtcMediaStream { first_frame, peer_connection, video_track, + audio_track, cancellation_token, mut cancellation, mut stream_control_rx, @@ -2067,6 +2689,14 @@ impl WebRtcMediaStream { let mut waiting_for_keyframe = false; let mut peer_disconnected_since: Option = None; let _guard = WebRtcMetricsGuard::new(state.metrics.clone()); + if let Some(audio_track) = audio_track { + spawn_simulator_audio_stream( + state.clone(), + udid.clone(), + audio_track, + cancellation_token.subscribe(), + ); + } let first_frame_duration = send_timing.duration_for(&first_frame, realtime_stream); match write_frame_sample_with_timeout( @@ -2645,10 +3275,11 @@ mod tests { use super::{ android_rgba_webrtc_frame_chunks, append_avcc_parameter_sets, append_length_prefixed_nalus, h264_annex_b_sample, h264_frame_has_idr, h264_frame_is_decoder_sync, h264_sdp_fmtp_line, - is_annex_b, is_h264_codec, rtcp_packet_requests_keyframe, rtp_packet_pacing, - WebRtcMetricsGuard, WebRtcSendTiming, ANDROID_WEBRTC_RGBA_CHUNK_BYTES, - ANDROID_WEBRTC_RGBA_CHUNK_HEADER_BYTES, ANDROID_WEBRTC_RGBA_CHUNK_MAGIC, - ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888, ANDROID_WEBRTC_RGBA_VERSION, ANNEX_B_START_CODE, + is_annex_b, is_h264_codec, linear_pcm_to_mulaw, rtcp_packet_requests_keyframe, + rtp_packet_pacing, sdp_has_media_type, PcmuAudioPacketizer, WebRtcMetricsGuard, + WebRtcSendTiming, ANDROID_WEBRTC_RGBA_CHUNK_BYTES, ANDROID_WEBRTC_RGBA_CHUNK_HEADER_BYTES, + ANDROID_WEBRTC_RGBA_CHUNK_MAGIC, ANDROID_WEBRTC_RGBA_FORMAT_RGBA8888, + ANDROID_WEBRTC_RGBA_VERSION, ANNEX_B_START_CODE, WEBRTC_AUDIO_FRAME_SAMPLES, }; use crate::android; use crate::metrics::counters::Metrics; @@ -2705,6 +3336,110 @@ mod tests { assert!(!rtcp_packet_requests_keyframe(&SenderReport::default())); } + #[test] + fn detects_audio_m_lines_in_browser_offers() { + assert!(sdp_has_media_type( + "v=0\r\nm=audio 9 UDP/TLS/RTP/SAVPF 0\r\nm=video 9 UDP/TLS/RTP/SAVPF 96\r\n", + "audio" + )); + assert!(!sdp_has_media_type( + "v=0\r\nm=video 9 UDP/TLS/RTP/SAVPF 96\r\n", + "audio" + )); + } + + #[test] + fn pcmu_packetizer_outputs_twenty_ms_silence_frames() { + let mut packetizer = PcmuAudioPacketizer::new(); + let pcm = vec![0_u8; 960 * 2 * 2]; + let packets = packetizer.push(&super::AudioPcmSample { + sample_rate: 48_000, + channels: 2, + data: Bytes::from(pcm), + }); + + assert_eq!(packets.len(), 1); + assert_eq!(packets[0].len(), WEBRTC_AUDIO_FRAME_SAMPLES); + assert!(packets[0] + .iter() + .all(|sample| *sample == linear_pcm_to_mulaw(0))); + } + + #[test] + fn parses_host_audio_process_lines_with_commands_containing_spaces() { + assert_eq!( + super::parse_host_audio_process_line(" 42 1 /tmp/My App.app/My App --flag value"), + Some(super::HostAudioProcess { + pid: 42, + parent_pid: 1, + command: "/tmp/My App.app/My App --flag value".to_owned(), + }) + ); + } + + #[test] + fn ios_audio_process_discovery_includes_device_descendants() { + let processes = vec![ + super::HostAudioProcess { + pid: 10, + parent_pid: 1, + command: "/Library/Developer/CoreSimulator/Profiles/Runtimes/iOS.simruntime/Contents/Resources/RuntimeRoot/usr/libexec/launchd_sim /Users/me/Library/Developer/CoreSimulator/Devices/UDID-1/data" + .to_owned(), + }, + super::HostAudioProcess { + pid: 11, + parent_pid: 10, + command: "/Applications/Fixture.app/Fixture".to_owned(), + }, + super::HostAudioProcess { + pid: 12, + parent_pid: 1, + command: "/usr/bin/xcrun simctl spawn UDID-1 launchctl print user/501" + .to_owned(), + }, + ]; + let roots = super::ios_simulator_audio_root_process_ids( + "UDID-1", + Some("/Users/me/Library/Developer/CoreSimulator/Devices/UDID-1/data"), + &processes, + ); + + assert_eq!( + super::process_tree_process_ids(&processes, roots), + vec![10, 11] + ); + } + + #[test] + fn android_audio_process_discovery_includes_emulator_descendants() { + let processes = vec![ + super::HostAudioProcess { + pid: 20, + parent_pid: 1, + command: + "/Users/me/Library/Android/sdk/emulator/emulator -avd Pixel_8_API_36 -no-window" + .to_owned(), + }, + super::HostAudioProcess { + pid: 21, + parent_pid: 20, + command: "qemu-system-aarch64 -some-child-arg".to_owned(), + }, + super::HostAudioProcess { + pid: 22, + parent_pid: 1, + command: "/Users/me/Library/Android/sdk/emulator/emulator -avd Other".to_owned(), + }, + ]; + let roots = + super::android_audio_root_process_ids("android:Pixel_8_API_36", &processes).unwrap(); + + assert_eq!( + super::process_tree_process_ids(&processes, roots), + vec![20, 21] + ); + } + #[test] fn realtime_h264_advertises_retransmission_feedback() { let feedback = super::h264_rtcp_feedback();