diff --git a/.gitignore b/.gitignore index bb421d2d..2e7916f9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ **/*.rs.bk .idea Cargo.lock +.vscode +*.h264 +*.png +*.html +*.ivf \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index eae8ce28..fdd4cf3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [workspace] resolver = "2" -members = ["sip/*", "media/*", "examples"] +members = ["sip/*", "media/*", "media-video/*", "examples"] [workspace.package] authors = ["kbalt"] @@ -24,12 +24,19 @@ stun-types = { package = "ezk-stun-types", version = "0.3.0", path = "media/stun rtc = { package = "ezk-rtc", version = "0.1.0", path = "media/rtc" } srtp = { package = "ezk-srtp", version = "0.1.0", path = "media/srtp" } +h264 = { package = "ezk-h264", version = "0.1.0", path = "media-video/h264" } +libva = { package = "ezk-libva", version = "0.1.0", path = "media-video/libva" } +vulkan = { package = "ezk-vulkan", version = "0.1.0", path = "media-video/vulkan" } +capture = { package = "ezk-capture", version = "0.1.0", path = "media-video/capture" } + rustls-pki-types = { version = "1", features = ["std"] } tokio-native-tls = { version = "0.3" } tokio-rustls = { version = "0.26", default-features = false } log = "0.4" +ezk-image = { version = "0.4", default-features = false } + [workspace.lints.rust] unreachable_pub = "warn" diff --git a/media-video/av1/Cargo.toml b/media-video/av1/Cargo.toml new file mode 100644 index 00000000..2e82e264 --- /dev/null +++ b/media-video/av1/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "ezk-av1" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lints] +workspace = true + +[features] +default = ["vulkan"] + +vulkan = ["dep:vulkan"] + +[dependencies] +bytes = "1.10.0" +log = "0.4" +thiserror = "2.0.11" +vulkan = { workspace = true, optional = true } +smallvec = "1" + + +[dev-dependencies] +env_logger = "0.11" +capture.workspace = true +tokio = { version = "1", features = ["sync", "macros", "rt"] } +ivf = "0.1.4" diff --git a/media-video/av1/src/encoder/backends/mod.rs b/media-video/av1/src/encoder/backends/mod.rs new file mode 100644 index 00000000..5e378922 --- /dev/null +++ b/media-video/av1/src/encoder/backends/mod.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "vulkan")] +pub mod vulkan; diff --git a/media-video/av1/src/encoder/backends/vulkan/mod.rs b/media-video/av1/src/encoder/backends/vulkan/mod.rs new file mode 100644 index 00000000..8aa523bf --- /dev/null +++ b/media-video/av1/src/encoder/backends/vulkan/mod.rs @@ -0,0 +1,597 @@ +use smallvec::SmallVec; +use std::{collections::VecDeque, ffi::c_void, pin::Pin, ptr::null, time::Instant}; +use vulkan::{ + Device, PhysicalDevice, VulkanError, + ash::vk, + encoder::{ + RateControlInfos, VulkanEncodeFrameError, VulkanEncodeSlot, VulkanEncoder, + VulkanEncoderConfig, VulkanEncoderImplConfig, + capabilities::{VulkanEncoderCapabilities, VulkanEncoderCapabilitiesError}, + codec::AV1, + input::InputData, + }, +}; + +use crate::{ + AV1Framerate, AV1Level, AV1Profile, + encoder::util::{AV1EncoderState, AV1FramePattern, FrameEncodeInfo}, +}; + +#[derive(Debug, Clone, Copy)] +pub struct VulkanAV1EncoderConfig { + pub encoder: VulkanEncoderConfig, + pub profile: AV1Profile, + pub level: AV1Level, + pub frame_pattern: AV1FramePattern, + pub rate_control: VulkanAV1RateControlConfig, +} + +#[derive(Debug, Clone, Copy)] +pub struct VulkanAV1RateControlConfig { + /// Rate control mode for the AV1 encoder + pub mode: VulkanAV1RateControlMode, + + /// Expected framerate of the video stream. Default to 60 frames per second + pub framerate: Option, + + /// Maximum Quality index. 0 is highest quality & 255 is the lowest quality + /// + /// Must be equal or smaller than max_q_index + pub min_q_index: Option, + + /// Minimum Quality index. 0 is highest quality & 255 is the lowest quality + /// + /// Must be equal or greater than min_q_index + pub max_q_index: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum VulkanAV1RateControlMode { + Default, + ConstantBitrate { + bitrate: u32, + }, + VariableBitrate { + average_bitrate: u32, + max_bitrate: u32, + }, + ConstantQuality { + q_index: u32, + }, +} + +#[derive(Debug)] +pub struct VkAV1Encoder { + config: VulkanAV1EncoderConfig, + state: AV1EncoderState, + encoder: VulkanEncoder, + + caps: VulkanEncoderCapabilities, + + free_dpb_slots: Vec, + active_dpb_slots: VecDeque, +} + +#[derive(Debug, Clone, Copy)] +struct DpbSlot { + index: usize, + order_hint: u8, +} + +impl VkAV1Encoder { + pub fn capabilities( + physical_device: &PhysicalDevice, + profile: AV1Profile, + ) -> Result, VulkanEncoderCapabilitiesError> { + let av1_profile_info = + vk::VideoEncodeAV1ProfileInfoKHR::default().std_profile(match profile { + AV1Profile::Main => vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_MAIN, + AV1Profile::High => vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_HIGH, + AV1Profile::Professional => { + vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_PROFESSIONAL + } + }); + + let capabilities = + VulkanEncoderCapabilities::::new(physical_device, av1_profile_info)?; + + Ok(capabilities) + } + + pub fn new( + device: &Device, + caps: &VulkanEncoderCapabilities, + config: VulkanAV1EncoderConfig, + ) -> Result { + let profile = match config.profile { + AV1Profile::Main => vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_MAIN, + AV1Profile::High => vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_HIGH, + AV1Profile::Professional => { + vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_PROFESSIONAL + } + }; + + let std_sequence_header = vk::native::StdVideoAV1SequenceHeader { + flags: vk::native::StdVideoAV1SequenceHeaderFlags { + _bitfield_align_1: [], + _bitfield_1: { + vk::native::StdVideoAV1SequenceHeaderFlags::new_bitfield_1( + 0, // still_picture + 0, // reduced_still_picture_header + 0, // use_128x128_superblock + 0, // enable_filter_intra + 0, // enable_intra_edge_filter + 0, // enable_interintra_compound + 0, // enable_masked_compound + 0, // enable_warped_motion + 0, // enable_dual_filter + 1, // enable_order_hint + 0, // enable_jnt_comp + 0, // enable_ref_frame_mvs + 0, // frame_id_numbers_present_flag + 0, // enable_superres, + 0, // enable_cdef, + 1, // enable_restoration, + 0, // film_grain_params_present, + 0, // timing_info_present_flag, + 0, // initial_display_delay_present_flag, + 0, // reserved + ) + }, + }, + seq_profile: profile, + frame_width_bits_minus_1: 11, // 4096x4096 is maximum for now + frame_height_bits_minus_1: 11, + max_frame_width_minus_1: (config.encoder.max_encode_resolution.width - 1) as u16, + max_frame_height_minus_1: (config.encoder.max_encode_resolution.height - 1) as u16, + delta_frame_id_length_minus_2: 0, + additional_frame_id_length_minus_1: 0, + order_hint_bits_minus_1: 7, // 8 bits for order hint + seq_force_integer_mv: 0, + seq_force_screen_content_tools: 0, + reserved1: [0u8; 5], + pColorConfig: null(), + pTimingInfo: null(), + }; + + let video_encode_av1_session_parameters_create_info = + vk::VideoEncodeAV1SessionParametersCreateInfoKHR::default() + .std_sequence_header(&std_sequence_header); + + let encoder_config = VulkanEncoderImplConfig { + user: config.encoder, + num_encode_slots: 4, + max_active_references: 7, + num_dpb_slots: 8, + }; + + let av1_profile_info = vk::VideoEncodeAV1ProfileInfoKHR::default().std_profile(profile); + + let encoder = caps.create_encoder( + device, + encoder_config, + av1_profile_info, + vk::VideoEncodeAV1SessionCreateInfoKHR::default() + .max_level(map_level(config.level)) + .use_max_level(true), + video_encode_av1_session_parameters_create_info, + Some(rate_control_from_config(&config, caps)), + )?; + + let free_dpb_slots = (0..8) + .map(|index| DpbSlot { + index, + order_hint: 0, + }) + .rev() + .collect(); + + Ok(VkAV1Encoder { + config, + state: AV1EncoderState::new(config.frame_pattern), + encoder, + caps: caps.clone(), + free_dpb_slots, + active_dpb_slots: VecDeque::new(), + }) + } + + /// Request the next frame to be an IDR frame + pub fn request_idr(&mut self) { + // TODO: this totally blows up b-frames are currently queued + self.state.request_keyframe(); + } + + /// Update the encoders rate control config + pub fn update_rate_control(&mut self, rate_control: VulkanAV1RateControlConfig) { + unsafe { + self.config.rate_control = rate_control; + + self.encoder + .update_rc(rate_control_from_config(&self.config, &self.caps)); + } + } + + pub fn poll_result(&mut self) -> Result)>, VulkanError> { + self.encoder.poll_result() + } + + pub fn wait_result(&mut self) -> Result)>, VulkanError> { + self.encoder.wait_result() + } + + pub fn encode_frame(&mut self, input: InputData<'_>) -> Result<(), VulkanEncodeFrameError> { + let frame_info = self.state.next(); + log::debug!("Encode {frame_info:?}"); + + let mut encode_slot = self + .encoder + .pop_encode_slot()? + .expect("encoder must have enough encode_slots for the given ip_period configuration"); + + self.encoder + .set_input_of_encode_slot(&mut encode_slot, input)?; + + if frame_info.is_key { + self.free_dpb_slots.extend(self.active_dpb_slots.drain(..)); + } + + self.encode_slot(frame_info, encode_slot)?; + + Ok(()) + } + + fn encode_slot( + &mut self, + frame_info: FrameEncodeInfo, + encode_slot: VulkanEncodeSlot, + ) -> Result<(), VulkanEncodeFrameError> { + // Reference Frame Name indices + + const LAST_FRAME: u8 = 0; + const LAST2_FRAME: u8 = 1; + const LAST3_FRAME: u8 = 2; + const GOLDEN_FRAME: u8 = 3; + // const BWDREF_FRAME: u8 = 4; + // const ALTREF2_FRAME: u8 = 5; + // const ALTREF_FRAME: u8 = 6; + + let mut setup_dpb_slot = if let Some(dpb_slot) = self.free_dpb_slots.pop() { + dpb_slot + } else if let Some(dpb_slot) = self.active_dpb_slots.pop_back() { + dpb_slot + } else { + unreachable!() + }; + + setup_dpb_slot.order_hint = frame_info.order_hint; + + let frame_type = if frame_info.is_key { + vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_KEY + } else { + vk::native::StdVideoAV1FrameType_STD_VIDEO_AV1_FRAME_TYPE_INTER + }; + + let caps = &self.caps.codec; + + let (prediction_mode, max_reference_frames, name_mask) = if frame_info.is_key { + // INTRA Frame for Keyframes + (vk::VideoEncodeAV1PredictionModeKHR::INTRA_ONLY, 0, 0) + } else if self.active_dpb_slots.len() >= 2 + && caps.max_unidirectional_compound_group1_reference_count >= 2 + { + // When 2 or more references are active & UNIDIRECTIONAL_COMPOUND allows for 2 or more + ( + vk::VideoEncodeAV1PredictionModeKHR::UNIDIRECTIONAL_COMPOUND, + caps.max_unidirectional_compound_group1_reference_count, + caps.unidirectional_compound_reference_name_mask, + ) + } else if self.active_dpb_slots.len() == 1 && caps.single_reference_name_mask == 1 { + ( + vk::VideoEncodeAV1PredictionModeKHR::SINGLE_REFERENCE, + 1, + caps.single_reference_name_mask, + ) + } else { + panic!("Failed to identify prediction mode"); + }; + + let reference_slots: SmallVec<[_; 8]> = self + .active_dpb_slots + .iter() + .take(max_reference_frames as usize) + .take(name_mask.count_ones() as usize) + .collect(); + + log::trace!("\tUsing setup slot {}", setup_dpb_slot.index); + + let ref_frame_idx = { + let mut iter = reference_slots.iter().map(|x| x.index as i8); + + let mut ref_frame_idx = [-1; 7]; + + if name_mask & (1 << LAST_FRAME) != 0 { + ref_frame_idx[LAST_FRAME as usize] = iter.next().unwrap_or(-1); + } + if name_mask & (1 << LAST2_FRAME) != 0 { + ref_frame_idx[LAST2_FRAME as usize] = iter.next().unwrap_or(-1); + } + if name_mask & (1 << LAST3_FRAME) != 0 { + ref_frame_idx[LAST3_FRAME as usize] = iter.next().unwrap_or(-1); + } + if name_mask & (1 << GOLDEN_FRAME) != 0 { + ref_frame_idx[GOLDEN_FRAME as usize] = iter.next().unwrap_or(-1); + } + + assert!(iter.next().is_none()); + + ref_frame_idx + }; + + let reference_name_slot_indices = ref_frame_idx.map(i32::from); + + let ref_order_hint = { + let mut ref_order_hint = [0; 8]; + + for dpb_slot in &self.active_dpb_slots { + ref_order_hint[dpb_slot.index] = dpb_slot.order_hint; + } + + ref_order_hint + }; + + log::trace!("\treference_name_slot_indices {reference_name_slot_indices:?}"); + log::trace!("\tref_frame_idx {ref_frame_idx:?}"); + log::trace!("\tref_order_hint {ref_order_hint:?}"); + + let loop_restoration = vk::native::StdVideoAV1LoopRestoration { + FrameRestorationType: [vk::native::StdVideoAV1FrameRestorationType_STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SGRPROJ; 3], + LoopRestorationSize: [64; 3], + }; + + let setup_std_reference_info = vk::native::StdVideoEncodeAV1ReferenceInfo { + flags: vk::native::StdVideoEncodeAV1ReferenceInfoFlags { + _bitfield_align_1: [0; 0], + _bitfield_1: vk::native::StdVideoEncodeAV1ReferenceInfoFlags::new_bitfield_1( + 0, // disable_frame_end_update_cdf, + 0, // segmentation_enabled, + 0, // reserved, + ), + }, + RefFrameId: frame_info.current_frame_id, + frame_type, + OrderHint: frame_info.order_hint, + reserved1: [0; 3], + pExtensionHeader: null(), + }; + + let std_picture_info = vk::native::StdVideoEncodeAV1PictureInfo { + flags: vk::native::StdVideoEncodeAV1PictureInfoFlags { + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoEncodeAV1PictureInfoFlags::new_bitfield_1( + 0, // error_resilient_mode, + 0, // disable_cdf_update, + 0, // use_superres, + 0, // render_and_frame_size_different, + 0, // allow_screen_content_tools, + 0, // is_filter_switchable, + 0, // force_integer_mv, + 0, // frame_size_override_flag,TODO + 0, // buffer_removal_time_present_flag, + 1, // allow_intrabc, + 0, // frame_refs_short_signaling, TODO?? + 0, // allow_high_precision_mv, + 1, // is_motion_mode_switchable, + 1, // use_ref_frame_mvs, + 0, // disable_frame_end_update_cdf, + 0, // allow_warped_motion, + 0, // reduced_tx_set, TODO? + 0, // skip_mode_present, + 0, // delta_q_present, + 0, // delta_lf_present, + 0, // delta_lf_multi, + 0, // segmentation_enabled, + 0, // segmentation_update_map, + 0, // segmentation_temporal_update, + 0, // segmentation_update_data, + 1, // UsesLr, + 1, // usesChromaLr, + 1, // show_frame + 0, // showable_frame, + 0, // reserved, + ), + }, + frame_type, + frame_presentation_time: 0, + current_frame_id: frame_info.current_frame_id, + order_hint: frame_info.order_hint, + primary_ref_frame: 7, + refresh_frame_flags: if frame_info.is_key { 0xFF } else { 1 << setup_dpb_slot.index }, + coded_denom: 0, + render_width_minus_1: (self.encoder.current_extent().width - 1) as u16, + render_height_minus_1: (self.encoder.current_extent().height - 1) as u16, + interpolation_filter: vk::native::StdVideoAV1InterpolationFilter_STD_VIDEO_AV1_INTERPOLATION_FILTER_EIGHTTAP, + TxMode: vk::native::StdVideoAV1TxMode_STD_VIDEO_AV1_TX_MODE_LARGEST, + delta_q_res: 0, + delta_lf_res: 0, + ref_order_hint, + ref_frame_idx, + reserved1: [0u8; 3], + delta_frame_id_minus_1: [0; 7], + pTileInfo: null(), + pQuantization: null(), + pSegmentation: null(), + pLoopFilter: null(), + pCDEF: null(), + pLoopRestoration: &raw const loop_restoration, + pGlobalMotion: null(), + pExtensionHeader: null(), + pBufferRemovalTimes: null(), + }; + + let rate_control_group = if frame_info.is_key { + vk::VideoEncodeAV1RateControlGroupKHR::INTRA + } else { + vk::VideoEncodeAV1RateControlGroupKHR::PREDICTIVE + }; + + let mut picture_info = vk::VideoEncodeAV1PictureInfoKHR::default() + .std_picture_info(&std_picture_info) + .prediction_mode(prediction_mode) + .reference_name_slot_indices(reference_name_slot_indices) + .rate_control_group(rate_control_group) + .primary_reference_cdf_only(false) + .generate_obu_extension_header(false); + + if let VulkanAV1RateControlMode::ConstantQuality { q_index } = self.config.rate_control.mode + { + picture_info = picture_info.constant_q_index(q_index); + } + + self.encoder.submit_encode_slot( + encode_slot, + reference_slots.into_iter().map(|slot| slot.index).collect(), + setup_dpb_slot.index, + setup_std_reference_info, + picture_info, + frame_info.is_key, + )?; + + self.active_dpb_slots.push_front(setup_dpb_slot); + + Ok(()) + } +} + +fn map_level(profile: AV1Level) -> vk::native::StdVideoAV1Level { + match profile { + AV1Level::Level_2_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_2_0, + AV1Level::Level_2_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_2_1, + AV1Level::Level_2_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_2_2, + AV1Level::Level_2_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_2_3, + AV1Level::Level_3_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_3_0, + AV1Level::Level_3_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_3_1, + AV1Level::Level_3_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_3_2, + AV1Level::Level_3_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_3_3, + AV1Level::Level_4_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_4_0, + AV1Level::Level_4_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_4_1, + AV1Level::Level_4_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_4_2, + AV1Level::Level_4_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_4_3, + AV1Level::Level_5_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_5_0, + AV1Level::Level_5_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_5_1, + AV1Level::Level_5_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_5_2, + AV1Level::Level_5_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_5_3, + AV1Level::Level_6_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_6_0, + AV1Level::Level_6_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_6_1, + AV1Level::Level_6_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_6_2, + AV1Level::Level_6_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_6_3, + AV1Level::Level_7_0 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_7_0, + AV1Level::Level_7_1 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_7_1, + AV1Level::Level_7_2 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_7_2, + AV1Level::Level_7_3 => vk::native::StdVideoAV1Level_STD_VIDEO_AV1_LEVEL_7_3, + } +} + +fn rate_control_from_config( + config: &VulkanAV1EncoderConfig, + caps: &VulkanEncoderCapabilities, +) -> Pin>> { + let mut this = Box::pin(RateControlInfos:: { + codec_layer: vk::VideoEncodeAV1RateControlLayerInfoKHR::default(), + layer: vk::VideoEncodeRateControlLayerInfoKHR::default(), + codec_info: vk::VideoEncodeAV1RateControlInfoKHR::default(), + info: vk::VideoEncodeRateControlInfoKHR::default(), + }); + + this.layer.p_next = (&raw const this.codec_layer) as *const c_void; + this.info.p_next = (&raw const this.codec_info) as *const c_void; + this.info.p_layers = &raw const this.layer; + this.info.layer_count = 1; + + this.codec_info.key_frame_period = config.frame_pattern.keyframe_interval.into(); + this.codec_info.gop_frame_count = config.frame_pattern.keyframe_interval.into(); + this.codec_info.consecutive_bipredictive_frame_count = 0; // TODO BIPRED not supported atm + this.codec_info.temporal_layer_count = 1; + this.codec_info.flags |= vk::VideoEncodeAV1RateControlFlagsKHR::REGULAR_GOP; // TODO BIPRED not supported atm + + // TODO: magic value + this.info.virtual_buffer_size_in_ms = 1000; + this.info.initial_virtual_buffer_size_in_ms = 1000; + + if let Some(AV1Framerate { num, denom }) = config.rate_control.framerate { + this.layer.frame_rate_numerator = num; + this.layer.frame_rate_denominator = denom; + } else { + this.layer.frame_rate_numerator = 60; + this.layer.frame_rate_denominator = 1; + } + + let cap_min_q_index = caps.codec.min_q_index; + let cap_max_q_index = caps.codec.max_q_index; + + // TODO: RADV doesn't seem to care about rate control unless min & max qp are enabled? + let min_q_index = Some( + config + .rate_control + .min_q_index + .map_or(cap_min_q_index, |i| { + i.clamp(cap_min_q_index, cap_max_q_index) + }), + ); + let max_q_index = Some( + config + .rate_control + .max_q_index + .map_or(cap_max_q_index, |i| { + i.clamp(cap_min_q_index, cap_max_q_index) + }), + ); + + if let Some(min_q_index) = min_q_index { + this.codec_layer.min_q_index = vk::VideoEncodeAV1QIndexKHR { + intra_q_index: min_q_index, + predictive_q_index: min_q_index, + bipredictive_q_index: min_q_index, + }; + + this.codec_layer.use_min_q_index = vk::TRUE; + } else { + this.codec_layer.use_min_q_index = vk::FALSE; + } + + if let Some(max_q_index) = max_q_index { + this.codec_layer.max_q_index = vk::VideoEncodeAV1QIndexKHR { + intra_q_index: max_q_index, + predictive_q_index: max_q_index, + bipredictive_q_index: max_q_index, + }; + + this.codec_layer.use_max_q_index = vk::TRUE; + } else { + this.codec_layer.use_max_q_index = vk::FALSE; + } + + match config.rate_control.mode { + VulkanAV1RateControlMode::Default => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::DEFAULT; + } + VulkanAV1RateControlMode::ConstantBitrate { bitrate } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::CBR; + this.layer.average_bitrate = bitrate.into(); + this.layer.max_bitrate = bitrate.into(); + } + VulkanAV1RateControlMode::VariableBitrate { + average_bitrate, + max_bitrate, + } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::VBR; + this.layer.average_bitrate = average_bitrate.into(); + this.layer.max_bitrate = max_bitrate.into(); + } + VulkanAV1RateControlMode::ConstantQuality { .. } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::DISABLED; + } + } + + this +} diff --git a/media-video/av1/src/encoder/mod.rs b/media-video/av1/src/encoder/mod.rs new file mode 100644 index 00000000..27736699 --- /dev/null +++ b/media-video/av1/src/encoder/mod.rs @@ -0,0 +1,4 @@ +pub mod backends; +pub(crate) mod util; + +pub use util::AV1FramePattern; diff --git a/media-video/av1/src/encoder/util.rs b/media-video/av1/src/encoder/util.rs new file mode 100644 index 00000000..2f1e6ec4 --- /dev/null +++ b/media-video/av1/src/encoder/util.rs @@ -0,0 +1,55 @@ +#[derive(Debug, Clone, Copy)] +pub struct AV1FramePattern { + pub keyframe_interval: u16, +} + +#[derive(Debug)] +pub(crate) struct AV1EncoderState { + frame_pattern: AV1FramePattern, + keyframe_index: u16, + current_frame_id: u16, +} + +impl AV1EncoderState { + pub(crate) fn new(frame_pattern: AV1FramePattern) -> Self { + AV1EncoderState { + frame_pattern, + keyframe_index: 0, + current_frame_id: 0, + } + } + + pub(crate) fn request_keyframe(&mut self) { + self.keyframe_index = 0; + } + + pub(crate) fn next(&mut self) -> FrameEncodeInfo { + let mut is_key = false; + + if self + .keyframe_index + .is_multiple_of(self.frame_pattern.keyframe_interval) + { + self.keyframe_index = 0; + is_key = true; + } + + let info = FrameEncodeInfo { + is_key, + current_frame_id: self.current_frame_id.into(), + order_hint: (self.current_frame_id & 0xFF) as u8, + }; + + self.current_frame_id = self.current_frame_id.wrapping_add(1); + self.keyframe_index = self.keyframe_index.wrapping_add(1); + + info + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct FrameEncodeInfo { + pub(crate) is_key: bool, + pub(crate) current_frame_id: u32, + pub(crate) order_hint: u8, +} diff --git a/media-video/av1/src/lib.rs b/media-video/av1/src/lib.rs new file mode 100644 index 00000000..bad4f112 --- /dev/null +++ b/media-video/av1/src/lib.rs @@ -0,0 +1,55 @@ +#![allow(unsafe_op_in_unsafe_fn)] + +pub mod encoder; + +mod rtp; + +pub use rtp::{AV1DePayloader, AV1Payloader}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AV1Profile { + Main, + High, + Professional, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[allow(non_camel_case_types)] +pub enum AV1Level { + Level_2_0, + Level_2_1, + Level_2_2, + Level_2_3, + Level_3_0, + Level_3_1, + Level_3_2, + Level_3_3, + Level_4_0, + Level_4_1, + Level_4_2, + Level_4_3, + Level_5_0, + Level_5_1, + Level_5_2, + Level_5_3, + Level_6_0, + Level_6_1, + Level_6_2, + Level_6_3, + Level_7_0, + Level_7_1, + Level_7_2, + Level_7_3, +} + +#[derive(Debug, Clone, Copy)] +pub struct AV1Framerate { + pub num: u32, + pub denom: u32, +} + +impl AV1Framerate { + pub const fn from_fps(fps: u32) -> Self { + Self { num: fps, denom: 1 } + } +} diff --git a/media-video/av1/src/rtp/leb128.rs b/media-video/av1/src/rtp/leb128.rs new file mode 100644 index 00000000..5c07ae52 --- /dev/null +++ b/media-video/av1/src/rtp/leb128.rs @@ -0,0 +1,56 @@ +use bytes::BufMut; + +pub(super) fn expected_size(v: u32) -> usize { + if v <= 0x7F { + 1 + } else if v <= 0x7F_FF { + 2 + } else if v <= 0x7F_FF_FF { + 3 + } else if v <= 0x7F_FF_FF_FF { + 4 + } else { + 5 + } +} + +pub(super) fn read_leb128(bytes: &[u8]) -> Option<(usize, u32)> { + let mut value = 0; + + for (i, leb128_byte) in bytes.iter().take(8).enumerate() { + value |= (u32::from(*leb128_byte) & 0x7F) << (i * 7); + + if leb128_byte & 0x80 == 0 { + return Some((i + 1, value)); + } + } + + None +} + +pub(super) fn write_leb128(mut buf: impl BufMut, mut value: u32) { + while { + let mut byte = (value & 0x7F) as u8; + value >>= 7; + let more_bytes = value != 0; + + byte |= (more_bytes as u8) << 7; + buf.put_u8(byte); + + more_bytes + } {} +} + +#[test] +fn write_and_parse_the_world() { + fn write_and_parse(num: u32) { + let mut buf = Vec::new(); + + write_leb128(&mut buf, num); + assert_eq!(read_leb128(&buf).unwrap().1, num); + } + + for i in (0..u32::MAX).step_by(100) { + write_and_parse(i); + } +} diff --git a/media-video/av1/src/rtp/mod.rs b/media-video/av1/src/rtp/mod.rs new file mode 100644 index 00000000..45b91f5e --- /dev/null +++ b/media-video/av1/src/rtp/mod.rs @@ -0,0 +1,308 @@ +use std::{cmp, mem::take}; + +use bytes::Bytes; +use smallvec::{SmallVec, smallvec}; + +mod leb128; + +const OBU_SEQUENCE_HEADER: u8 = 1; +const OBU_TEMPORAL_DELIMITER: u8 = 2; +// const OBU_FRAME_HEADER: u8 = 3; +// const OBU_TILE_GROUP: u8 = 4; +// const OBU_METADATA: u8 = 5; +// const OBU_FRAME: u8 = 6; +// const OBU_REDUNDANT_FRAME_HEADER: u8 = 7; +const OBU_TILE_LIST: u8 = 8; + +// Reference https://aomediacodec.github.io/av1-rtp-spec/v1.0.0.html + +// AV1 Aggregation Header +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |Z|Y| W |N|-|-|-| +// +-+-+-+-+-+-+-+-+ +// Z: MUST be set to 1 if the first OBU element is an OBU fragment that is a continuation of an OBU fragment from the previous packet, +// and MUST be set to 0 otherwise. +// Y: MUST be set to 1 if the last OBU element is an OBU fragment that will continue in the next packet, and MUST be set to 0 otherwise. +// W: two bit field that describes the number of OBU elements in the packet. +// This field MUST be set equal to 0 or equal to the number of OBU elements contained in the packet. +// If set to 0, each OBU element MUST be preceded by a length field. +// If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST NOT be preceded by a length field. +// Instead, the length of the last OBU element contained in the packet can be calculated as follows: + +// open_bitstream_unit( sz ) { +// obu_header() +// if ( obu_has_size_field ) { +// obu_sizeobu_size leb128() +// } else { +// obu_size = sz - 1 - obu_extension_flag +// } +// ... + +// obu_header() { +// obu_forbidden_bit f(1) +// obu_type f(4) +// obu_extension_flag f(1) +// obu_has_size_field f(1) +// obu_reserved_1bit f(1) +// if ( obu_extension_flag == 1 ) +// obu_extension_header() +// } + +// obu_extension_header() { +// temporal_id f(3) +// spatial_id f(2) +// extension_header_reserved_3bits f(3) +// } + +#[derive(Debug)] +struct ObuHeaderAndSize { + type_: u8, + extension: Option, + content_offset: usize, + size: usize, +} + +impl ObuHeaderAndSize { + fn parse(bytes: &[u8]) -> Option { + let mut bytes = bytes.iter(); + + let header = bytes.next()?; + + let type_ = header >> 3 & 0x0F; + let has_extension = header & 0b100 != 0; + let has_size = header & 0b10 != 0; + if !has_size { + return None; + } + + let extension = if has_extension { + Some(*bytes.next()?) + } else { + None + }; + + let bytes = bytes.as_slice(); + + let (size_length, size) = leb128::read_leb128(bytes)?; + + let content_offset = 1 + (has_extension as usize) + size_length; + + Some(ObuHeaderAndSize { + type_, + extension, + content_offset, + size: content_offset + size as usize, + }) + } + + fn header(&self) -> u8 { + (self.type_ << 3) | ((self.extension.is_some() as u8) << 2) + } +} + +pub struct AV1Payloader { + _priv: (), +} + +impl AV1Payloader { + pub fn new() -> AV1Payloader { + AV1Payloader { _priv: () } + } + + pub fn payload(&mut self, mut to_payload: Bytes, max_size: usize) -> Vec> { + let mut payloads = Vec::new(); + + let mut current_payload = Vec::with_capacity(max_size); + current_payload.push(0); // Aggregation header + + while !to_payload.is_empty() { + let header_and_size = ObuHeaderAndSize::parse(&to_payload).unwrap(); + let mut obu_bytes = to_payload.split_to(header_and_size.size); + + if matches!( + header_and_size.type_, + OBU_TEMPORAL_DELIMITER | OBU_TILE_LIST + ) { + // drop + continue; + } + + if matches!(header_and_size.type_, OBU_SEQUENCE_HEADER) { + // TODO: this is probably wrong. Currently always setting the N bit when encountering a SEQ header + current_payload[0] |= 1 << 3; + } + + // TODO: remove obu_size field from OBU + // let new_header_len = 1 + header_and_size.extension.is_some() as usize; + // let new_obu_len = (obu_bytes.len() - header_and_size.content_offset) + new_header_len; + + // let is_last_obu_in_packet = new_obu_len >= remaining_space; + + // packet.push(header_and_size.header()); + // if let Some(extension) = header_and_size.extension { + // packet.push(extension); + // } + + while !obu_bytes.is_empty() { + let remaining_space = max_size - current_payload.len(); + + let obu_fragment_size = cmp::min( + remaining_space.saturating_sub(leb128::expected_size(remaining_space as u32)), + obu_bytes.len(), + ); + + if obu_fragment_size == 0 { + // mark fragment if the current obu is already partially in the current_payload + current_payload[0] |= ((obu_bytes.len() < header_and_size.size) as u8) << 6; + + payloads.push(current_payload); + + current_payload = Vec::with_capacity(max_size); + // the first OBU element is an OBU fragment that is a continuation of an OBU fragment from the previous packet + current_payload.push(1 << 7); + + continue; + } + + let to_write = obu_bytes.split_to(obu_fragment_size); + leb128::write_leb128(&mut current_payload, to_write.len() as u32); + + current_payload.extend_from_slice(&to_write); + + if !obu_bytes.is_empty() { + // last OBU element is an OBU fragment that will continue in the next packet + current_payload[0] |= 1 << 6; + + payloads.push(current_payload); + + current_payload = Vec::with_capacity(max_size); + // the first OBU element is an OBU fragment that is a continuation of an OBU fragment from the previous packet + current_payload.push(1 << 7); + } + } + } + + if current_payload.len() > 1 { + payloads.push(current_payload); + } + + payloads + } +} + +impl Default for AV1Payloader { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum AV1DePayloadError { + #[error("Unexpected end of packet")] + UnexpectedEndOfPacket, + #[error("Got 0 length for OBU packet")] + ZeroLengthOBU, + #[error("Received OBU exceeded maximum allowed size")] + FragmentedObuTooLarge, +} + +pub struct AV1DePayloader { + current_obu: Vec, +} + +impl AV1DePayloader { + pub fn new() -> AV1DePayloader { + AV1DePayloader { + current_obu: Vec::new(), + } + } + + pub fn depayload( + &mut self, + mut packet: &[u8], + ) -> Result; 3]>, AV1DePayloadError> { + if packet.is_empty() { + return Ok(smallvec![]); + } + + let mut obus = smallvec![]; + + let aggregation_header = packet[0]; + packet = &packet[1..]; + + let mut continues_fragment = (aggregation_header & 1 << 7) != 0; + let contains_fragment = (aggregation_header & 1 << 6) != 0; + let num_remaining_obus = (aggregation_header >> 4) & 0x3; + let mut num_remaining_obus = (num_remaining_obus > 0).then_some(num_remaining_obus); + + while !packet.is_empty() { + // Check if the OBU has a length prefix + // If there's a obu count in the header then the last OBU has no length prefix + let has_length = if let Some(remaining_obu) = &mut num_remaining_obus { + *remaining_obu -= 1; + *remaining_obu > 0 + } else { + // No count specified, always a length prefix + true + }; + + let (consumed, len) = if has_length { + leb128::read_leb128(packet).ok_or(AV1DePayloadError::UnexpectedEndOfPacket)? + } else { + (0, packet.len() as u32) + }; + + if len == 0 { + return Err(AV1DePayloadError::ZeroLengthOBU); + } + + let (obu_bytes, remaining) = packet[consumed..] + .split_at_checked(len as usize) + .ok_or(AV1DePayloadError::UnexpectedEndOfPacket)?; + + packet = remaining; + + if continues_fragment { + continues_fragment = false; + + if self.current_obu.is_empty() { + // Continued fragment but there isn't anything in current_obu, probably packet loss, ignore it + continue; + } + + self.current_obu.extend_from_slice(obu_bytes); + + // When contains_fragment is set for the packet is set and `packet` contains no more bytes to consume, consider this fragmented OBU as complete + if !contains_fragment && packet.is_empty() { + obus.push(take(&mut self.current_obu)); + } + } else if packet.is_empty() && contains_fragment { + self.current_obu.extend_from_slice(obu_bytes); + } else { + obus.push(obu_bytes.to_vec()); + } + + // Cap the maximum OBU size somewhere to avoid allocating infinite memory + if self.current_obu.len() > 100_000_000 { + self.current_obu = Vec::new(); + return Err(AV1DePayloadError::FragmentedObuTooLarge); + } + } + + Ok(obus) + } + + /// Reset the payload to the initial state + /// + /// Must be called when encountering packet loss to avoid aggregating broken OBUs + pub fn reset(&mut self) { + self.current_obu.clear(); + } +} + +impl Default for AV1DePayloader { + fn default() -> Self { + Self::new() + } +} diff --git a/media-video/av1/tests/vkenc_pw_capture.rs b/media-video/av1/tests/vkenc_pw_capture.rs new file mode 100644 index 00000000..46c87b18 --- /dev/null +++ b/media-video/av1/tests/vkenc_pw_capture.rs @@ -0,0 +1,273 @@ +use capture::wayland::{ + BitFlag, CapturedDmaBufferSync, CapturedFrameBuffer, DmaPlane, DmaUsageOptions, PersistMode, + PipewireOptions, PixelFormat, RgbaSwizzle, ScreenCaptureOptions, SourceType, +}; +use ezk_av1::{ + AV1DePayloader, AV1Framerate, AV1Level, AV1Payloader, AV1Profile, + encoder::{ + AV1FramePattern, + backends::vulkan::{ + VkAV1Encoder, VulkanAV1EncoderConfig, VulkanAV1RateControlConfig, + VulkanAV1RateControlMode, + }, + }, +}; +use std::{fs::OpenOptions, io::Write, time::Instant}; +use tokio::sync::mpsc; +use vulkan::{ + DrmPlane, Semaphore, + ash::vk, + encoder::{ + VulkanEncoderConfig, + input::{InputData, InputPixelFormat, InputSync, VulkanImageInput}, + }, +}; + +#[tokio::test] +async fn vk_encode_dma() { + vk_encode_dma_inner().await; +} + +async fn vk_encode_dma_inner() { + env_logger::builder().is_test(true).init(); + + let entry = unsafe { vulkan::ash::Entry::load().unwrap() }; + let instance = vulkan::Instance::create(entry, &[]).unwrap(); + let mut physical_devices: Vec = instance.physical_devices().unwrap(); + let physical_device = &mut physical_devices[0]; + + let drm_modifer: Vec = physical_device + .supported_drm_modifier(vk::Format::R8G8B8A8_UNORM) + .into_iter() + .map(|m| m.modifier) + .collect(); + + let width = 2560; + let height = 1440; + + let capabilities = VkAV1Encoder::capabilities(physical_device, AV1Profile::Main).unwrap(); + + let device = vulkan::Device::create(physical_device, &[]).unwrap(); + + let (tx, mut rx) = mpsc::channel(8); + + let options = ScreenCaptureOptions { + show_cursor: true, + source_types: SourceType::all(), + persist_mode: PersistMode::DoNot, + restore_token: None, + pipewire: PipewireOptions { + max_framerate: 30, + pixel_formats: vec![PixelFormat::RGBA(RgbaSwizzle::BGRA)], + dma_usage: Some(DmaUsageOptions { + request_sync_obj: false, + num_buffers: 16, + supported_modifier: drm_modifer, + }), + }, + }; + + let device_ = device.clone(); + capture::wayland::start_screen_capture(options, move |frame| { + let buffer = match frame.buffer { + CapturedFrameBuffer::Dma(buffer) => buffer, + _ => { + panic!("Test requires DMA buffers") + } + }; + + let mut sync = buffer.sync.map( + |CapturedDmaBufferSync { + acquire_point, + release_point, + acquire_fd, + release_fd, + }| { + ( + Some(InputSync { + semaphore: unsafe { + Semaphore::import_timeline_fd(&device_, acquire_fd).unwrap() + }, + timeline_point: Some(acquire_point), + }), + Some(InputSync { + semaphore: unsafe { + Semaphore::import_timeline_fd(&device_, release_fd).unwrap() + }, + timeline_point: Some(release_point), + }), + ) + }, + ); + + let swizzle = match frame.format { + PixelFormat::RGBA(swizzle) => swizzle, + _ => unreachable!(), + }; + + let image = unsafe { + vulkan::Image::import_dma_fd( + &device_, + frame.width, + frame.height, + buffer + .planes + .into_iter() + .map(|DmaPlane { fd, offset, stride }| DrmPlane { fd, offset, stride }) + .collect(), + buffer.modifier, + vk::Format::R8G8B8A8_UNORM, + vk::ImageUsageFlags::SAMPLED, + ) + } + .unwrap(); + + let components = match swizzle { + capture::wayland::RgbaSwizzle::RGBA => vk::ComponentMapping::default(), + capture::wayland::RgbaSwizzle::BGRA => vk::ComponentMapping { + r: vk::ComponentSwizzle::B, + g: vk::ComponentSwizzle::G, + b: vk::ComponentSwizzle::R, + a: vk::ComponentSwizzle::A, + }, + capture::wayland::RgbaSwizzle::ARGB => vk::ComponentMapping { + r: vk::ComponentSwizzle::G, + g: vk::ComponentSwizzle::B, + b: vk::ComponentSwizzle::A, + a: vk::ComponentSwizzle::R, + }, + capture::wayland::RgbaSwizzle::ABGR => vk::ComponentMapping { + r: vk::ComponentSwizzle::A, + g: vk::ComponentSwizzle::B, + b: vk::ComponentSwizzle::G, + a: vk::ComponentSwizzle::R, + }, + }; + + let view = unsafe { + vulkan::ImageView::create( + &image, + &vk::ImageViewCreateInfo::default() + .image(image.handle()) + .components(components) + .format(vk::Format::R8G8B8A8_UNORM) + .view_type(vk::ImageViewType::TYPE_2D) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }), + ) + .unwrap() + }; + + tx.blocking_send(VulkanImageInput { + view, + extent: vk::Extent2D { + width: frame.width, + height: frame.height, + }, + acquire: sync.as_mut().and_then(|(acquire, _release)| acquire.take()), + release: sync.as_mut().and_then(|(_acquire, release)| release.take()), + }) + .is_ok() + }) + .await + .unwrap(); + + let mut encoder = VkAV1Encoder::new( + &device, + &capabilities, + VulkanAV1EncoderConfig { + encoder: VulkanEncoderConfig { + max_encode_resolution: vk::Extent2D { + width: 608, + height: 1080, + }, + initial_encode_resolution: vk::Extent2D { + width: 608, + height: 1080, + }, + max_input_resolution: vk::Extent2D { width, height }, + input_as_vulkan_image: true, + input_pixel_format: InputPixelFormat::RGBA { + primaries: vulkan::encoder::input::Primaries::BT709, + }, + usage_hints: vk::VideoEncodeUsageFlagsKHR::DEFAULT, + content_hints: vk::VideoEncodeContentFlagsKHR::DEFAULT, + tuning_mode: vk::VideoEncodeTuningModeKHR::DEFAULT, + }, + profile: AV1Profile::Main, + level: AV1Level::Level_6_0, + frame_pattern: AV1FramePattern { + keyframe_interval: u16::MAX, + }, + rate_control: VulkanAV1RateControlConfig { + mode: VulkanAV1RateControlMode::VariableBitrate { + average_bitrate: 10_000_000, + max_bitrate: 12_000_000, + }, + framerate: Some(AV1Framerate::from_fps(240)), + min_q_index: None, //Some(0), + max_q_index: None, //Some(255), + }, + }, + ) + .unwrap(); + + let mut file = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open("../../test-av1.ivf") + .unwrap(); + + ivf::write_ivf_header(&mut file, width as usize, height as usize, 1000, 1); + + let epoch = Instant::now(); + + let mut depayloader = AV1DePayloader::new(); + + for _ in 0..200 { + let input = rx.recv().await.unwrap(); + + let start = Instant::now(); + encoder + .encode_frame(InputData::VulkanImage(input)) + .inspect_err(|e| println!("{e}")) + .unwrap(); + println!("Took: {:?}", start.elapsed()); + + while let Some((ts, buf)) = encoder.poll_result().unwrap() { + println!("buf: {}", buf.len()); + + ivf::write_ivf_frame(&mut file, (ts - epoch).as_millis() as _, &buf); + + let packets = AV1Payloader::new().payload(buf.clone().into(), 1000); + + for packet in packets { + for depayloaded in depayloader.depayload(&packet).unwrap() { + assert!(depayloaded.len() == buf.len()); + println!("Depayloaded OBU: {}", depayloaded.len()); + } + } + } + } + + while let Some((ts, buf)) = encoder.wait_result().unwrap() { + println!("buf: {}", buf.len()); + + ivf::write_ivf_frame(&mut file, (ts - epoch).as_millis() as _, &buf); + let packets = AV1Payloader::new().payload(buf.clone().into(), 1000); + for packet in packets { + for depayloaded in depayloader.depayload(&packet).unwrap() { + assert!(depayloaded.len() == buf.len()); + println!("Depayloaded OBU: {}", depayloaded.len()); + } + } + } + + file.flush().unwrap(); +} diff --git a/media-video/capture/Cargo.toml b/media-video/capture/Cargo.toml new file mode 100644 index 00000000..78b58d83 --- /dev/null +++ b/media-video/capture/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ezk-capture" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +log = "0.4" +thiserror = "2" +tokio = { version = "1", features = ["sync"] } +smallvec = "1" +slotmap = "1" + +[target.'cfg(target_family = "unix")'.dependencies] +ashpd = { version = "0.12", default-features = false, features = ["async-std"] } +pipewire = { version = "0.9", features = ["v0_3_33", "v0_3_44"] } + +[lints] +workspace = true diff --git a/media-video/capture/src/lib.rs b/media-video/capture/src/lib.rs new file mode 100644 index 00000000..7a6833d5 --- /dev/null +++ b/media-video/capture/src/lib.rs @@ -0,0 +1,4 @@ +#[cfg(target_family = "unix")] +pub mod pipewire; +#[cfg(target_family = "unix")] +pub mod wayland; diff --git a/media-video/capture/src/pipewire/caps.rs b/media-video/capture/src/pipewire/caps.rs new file mode 100644 index 00000000..05b39790 --- /dev/null +++ b/media-video/capture/src/pipewire/caps.rs @@ -0,0 +1,169 @@ +use pipewire::spa::{ + param::{ParamType, audio::AudioFormat}, + pod::{ChoiceValue, Object, Property, Value}, + sys, + utils::{self, Choice, ChoiceEnum, ChoiceFlags, Id, SpaTypes}, +}; +use std::ops::RangeBounds; + +#[derive(Debug, Default)] +pub struct AudioCaps { + pub(super) format: Option, + pub(super) rate: Option, + pub(super) channels: Option, +} + +unsafe impl Send for AudioCaps {} + +impl AudioCaps { + pub fn new() -> AudioCaps { + AudioCaps::default() + } + + pub fn format(mut self, format: AudioFormat) -> Self { + self.format = Some(Property::new( + sys::SPA_FORMAT_AUDIO_format, + Value::Id(utils::Id(format.as_raw())), + )); + self + } + + pub fn format_choice(mut self, formats: impl IntoIterator) -> Self { + let mut formats = formats.into_iter(); + + self.format = Some(Property::new( + sys::SPA_FORMAT_AUDIO_format, + Value::Choice(ChoiceValue::Id(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Enum { + default: Id(formats + .next() + .expect("must not pass empty iterator") + .as_raw()), + alternatives: formats.map(|f| Id(f.as_raw())).collect(), + }, + ))), + )); + + self + } + + pub fn rate(mut self, rate: u32) -> Self { + self.rate = Some(Property::new( + sys::SPA_FORMAT_AUDIO_rate, + Value::Int(rate as i32), + )); + + self + } + + pub fn rate_choice(mut self, rates: impl IntoIterator) -> Self { + let mut rates = rates.into_iter(); + + self.rate = Some(Property::new( + sys::SPA_FORMAT_AUDIO_rate, + Value::Choice(ChoiceValue::Int(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Enum { + default: rates.next().expect("must not pass empty iterator") as i32, + alternatives: rates.map(|rate| rate as i32).collect(), + }, + ))), + )); + + self + } + + pub fn channels(mut self, channels: u32) -> Self { + self.channels = Some(Property::new( + sys::SPA_FORMAT_AUDIO_channels, + Value::Int(channels as i32), + )); + + self + } + + pub fn channels_choice(mut self, channels: impl IntoIterator) -> Self { + let mut channels = channels.into_iter(); + + self.channels = Some(Property::new( + sys::SPA_FORMAT_AUDIO_channels, + Value::Choice(ChoiceValue::Int(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Enum { + default: channels.next().expect("must not pass empty iterator") as i32, + alternatives: channels.map(|channels| channels as i32).collect(), + }, + ))), + )); + + self + } + + pub fn channels_range(mut self, range: impl RangeBounds) -> Self { + let start = match range.start_bound() { + std::ops::Bound::Included(v) => *v, + std::ops::Bound::Excluded(v) => v.saturating_sub(1), + std::ops::Bound::Unbounded => 1, + } + .max(1); + + let end = match range.end_bound() { + std::ops::Bound::Included(v) => *v, + std::ops::Bound::Excluded(v) => v.saturating_sub(1), + std::ops::Bound::Unbounded => 1, + } + .max(start); + + self.channels = Some(Property::new( + sys::SPA_FORMAT_AUDIO_channels, + Value::Choice(ChoiceValue::Int(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Range { + default: 0, + min: start as i32, + max: end as i32, + }, + ))), + )); + + self + } + + pub(super) fn into_object(self) -> Object { + let Self { + format, + rate, + channels, + } = self; + + let mut properties = vec![]; + + properties.push(Property::new( + sys::SPA_FORMAT_mediaType, + Value::Id(utils::Id(sys::SPA_MEDIA_TYPE_audio)), + )); + properties.push(Property::new( + sys::SPA_FORMAT_mediaSubtype, + Value::Id(utils::Id(sys::SPA_MEDIA_SUBTYPE_raw)), + )); + + if let Some(format) = format { + properties.push(format); + } + + if let Some(rate) = rate { + properties.push(rate); + } + + if let Some(channels) = channels { + properties.push(channels); + } + + Object { + type_: SpaTypes::ObjectParamFormat.as_raw(), + id: ParamType::EnumFormat.as_raw(), + properties, + } + } +} diff --git a/media-video/capture/src/pipewire/mod.rs b/media-video/capture/src/pipewire/mod.rs new file mode 100644 index 00000000..2b411cad --- /dev/null +++ b/media-video/capture/src/pipewire/mod.rs @@ -0,0 +1,360 @@ +use crate::pipewire::streams::StreamState; +use pipewire::{ + Error, channel, + context::ContextRc, + core::CoreRc, + keys::{APP_NAME, MEDIA_CLASS, NODE_DESCRIPTION, NODE_NAME, NODE_NICK, OBJECT_SERIAL}, + main_loop::MainLoopRc, + properties::properties, + registry::{self, RegistryRc}, + spa::{ + pod::{Pod, Value, serialize::PodSerializer}, + utils::Direction, + }, + stream::{StreamFlags, StreamListener, StreamRc}, + types::ObjectType, +}; +use slotmap::{SlotMap, new_key_type}; +use std::{cell::RefCell, io::Cursor, rc::Rc, sync::Arc, thread}; +use tokio::sync::oneshot; + +mod caps; +mod streams; + +pub use caps::AudioCaps; +pub use pipewire::spa::param::audio::AudioFormat; + +new_key_type! { + pub struct StreamId; +} + +pub trait NodeListener: Send + 'static { + fn node_added(&mut self, info: NodeInfo); + fn node_removed(&mut self, id: u32); +} + +#[derive(Debug)] +pub struct NodeInfo { + pub id: u32, + pub object_serial: String, + pub media_class: MediaClass, + pub node_name: Option, + pub node_nick: Option, + pub node_description: Option, + pub app_name: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum MediaClass { + /// Microphones + AudioSource, + /// Speakers + AudioSink, + /// Applications that produce audio + AudioStreamOutput, +} + +pub trait AudioConsumer: Send + 'static { + fn set_format(&mut self, sample_rate: u32, channels: u32, format: AudioFormat); + fn on_frame(&mut self, data: &[u8]) -> bool; +} + +#[derive(Clone)] +pub struct PipeWireAudioCapture { + sender: Arc>, +} + +impl Drop for PipeWireAudioCapture { + fn drop(&mut self) { + if Arc::strong_count(&self.sender) == 1 { + let _ = self.sender.send(Command::Destroy); + } + } +} + +#[derive(Debug, thiserror::Error)] +#[error("Pipewire thread is unexpectedly gone")] +pub struct PipeWireThreadGone; + +#[derive(Debug, thiserror::Error)] +pub enum PipeWireConnectError { + #[error(transparent)] + Gone(#[from] PipeWireThreadGone), + #[error(transparent)] + PipeWire(#[from] Error), +} + +impl PipeWireAudioCapture { + pub async fn spawn() -> Option { + let (result_tx, result_rx) = oneshot::channel(); + let (sender, receiver) = channel::channel(); + + let handle = PipeWireAudioCapture { + sender: Arc::new(sender.clone()), + }; + + thread::spawn(move || PipeWireThread::run(sender, receiver, result_tx)); + + result_rx.await.ok().map(|_| handle) + } + + pub fn add_listener(&self, listener: impl NodeListener) -> Result<(), PipeWireThreadGone> { + self.sender + .send(Command::AddListener(Box::new(listener))) + .map_err(|_| PipeWireThreadGone) + } + + pub async fn connect( + &self, + target_object: Option, + consumer: impl AudioConsumer, + audio_caps: AudioCaps, + dont_reconnect: bool, + ) -> Result { + let (tx, rx) = oneshot::channel(); + + self.sender + .send(Command::Connect { + target_object, + consumer: Box::new(consumer), + audio_caps, + dont_reconnect, + ret: tx, + }) + .map_err(|_| PipeWireThreadGone)?; + + rx.await + .map_err(|_| PipeWireThreadGone)? + .map_err(|e| e.into()) + } + + pub fn update_caps( + &self, + stream_id: StreamId, + audio_caps: AudioCaps, + ) -> Result<(), PipeWireThreadGone> { + self.sender + .send(Command::UpdateCaps(stream_id, audio_caps)) + .map_err(|_| PipeWireThreadGone) + } +} + +#[allow(clippy::large_enum_variant)] +enum Command { + AddListener(Box), + Connect { + target_object: Option, + consumer: Box, + audio_caps: AudioCaps, + dont_reconnect: bool, + ret: oneshot::Sender>, + }, + UpdateCaps(StreamId, AudioCaps), + RemoveStream(StreamId), + Destroy, +} + +struct PipeWireThread { + main_loop: MainLoopRc, + core: CoreRc, + registry: RegistryRc, + + sender: channel::Sender, + + registry_listener: Vec, + streams: SlotMap)>, +} + +impl PipeWireThread { + fn create() -> Result<(MainLoopRc, CoreRc, RegistryRc), Error> { + let main_loop = MainLoopRc::new(None)?; + let context = ContextRc::new(&main_loop, None)?; + let core = context.connect_rc(None)?; + let registry = core.get_registry_rc()?; + + Ok((main_loop, core, registry)) + } + + fn run( + sender: channel::Sender, + receiver: channel::Receiver, + result_tx: oneshot::Sender>, + ) { + let (main_loop, core, registry) = match Self::create() { + Ok(v) => { + let _ = result_tx.send(Ok(())); + v + } + Err(e) => { + log::warn!("Failed to create pipewire thread {e}"); + let _ = result_tx.send(Err(e)); + return; + } + }; + + let this = RefCell::new(PipeWireThread { + main_loop: main_loop.clone(), + core, + registry, + registry_listener: Vec::new(), + streams: SlotMap::default(), + sender, + }); + + let _attached = receiver.attach(main_loop.loop_(), move |command| match command { + Command::AddListener(listener) => { + this.borrow_mut().add_listener(listener); + } + Command::Connect { + target_object, + consumer, + audio_caps, + dont_reconnect, + ret, + } => { + let result = + this.borrow_mut() + .connect(target_object, consumer, audio_caps, dont_reconnect); + + let _ = ret.send(result); + } + Command::UpdateCaps(key, audio_caps) => { + if let Some((stream, _listener)) = this.borrow_mut().streams.get_mut(key) { + let params = audio_caps.into_object(); + let params: Vec = + PodSerializer::serialize(Cursor::new(Vec::new()), &Value::Object(params)) + .expect("PodSerializer into Cursor> must not fail") + .0 + .into_inner(); + + let mut params = [Pod::from_bytes(¶ms) + .expect("Data is data produced by the PodSerializer")]; + + if let Err(e) = stream.set_active(false) { + log::error!("Failed to pause stream: {e}"); + } + + if let Err(e) = stream.update_params(&mut params) { + log::error!("Failed to update audio caps: {e}"); + } + + if let Err(e) = stream.set_active(true) { + log::error!("Failed to unpause stream: {e}"); + } + } + } + Command::RemoveStream(key) => { + if let Some((stream, listener)) = this.borrow_mut().streams.remove(key) { + if let Err(e) = stream.set_active(false) { + log::warn!("Failed to set stream to inactive, {e}"); + } + if let Err(e) = stream.disconnect() { + log::warn!("Failed to disconnect stream, {e}"); + }; + + listener.unregister(); + } + } + Command::Destroy => { + this.borrow_mut().main_loop.quit(); + } + }); + + main_loop.run(); + + log::info!("PipeWireThread Main Loop stopped running, exiting thread"); + } + + fn add_listener(&mut self, listener: Box) { + let listener = Rc::new(RefCell::new(listener)); + + let mut builder = self.registry.add_listener_local(); + + let l = listener.clone(); + builder = builder.global(move |obj| { + if obj.type_ != ObjectType::Node { + return; + } + + let Some(props) = obj.props else { return }; + + let Some(object_serial) = props.get(*OBJECT_SERIAL) else { + return; + }; + + let media_class = match props.get(*MEDIA_CLASS) { + Some("Stream/Output/Audio") => MediaClass::AudioStreamOutput, + Some("Audio/Source") => MediaClass::AudioSource, + Some("Audio/Sink") => MediaClass::AudioSink, + _ => return, + }; + + let info = NodeInfo { + id: obj.id, + object_serial: object_serial.into(), + media_class, + node_name: props.get(*NODE_NAME).map(Into::into), + node_nick: props.get(*NODE_NICK).map(Into::into), + node_description: props.get(*NODE_DESCRIPTION).map(Into::into), + app_name: props.get(*APP_NAME).map(Into::into), + }; + + l.borrow_mut().node_added(info); + }); + + builder = builder.global_remove(move |id| { + listener.borrow_mut().node_removed(id); + }); + + self.registry_listener.push(builder.register()); + } + + fn connect( + &mut self, + target_object: Option, + consumer: Box, + audio_caps: AudioCaps, + dont_reconnect: bool, + ) -> Result { + let mut stream_properties = properties! { + *pipewire::keys::MEDIA_TYPE => "Audio", + *pipewire::keys::MEDIA_CATEGORY => "Capture", + *pipewire::keys::MEDIA_ROLE => "Communication", + }; + + if let Some(object_serial) = target_object { + stream_properties.insert(*pipewire::keys::TARGET_OBJECT, object_serial); + } + + let stream = StreamRc::new(self.core.clone(), "capture", stream_properties)?; + + let stream_id = self + .streams + .try_insert_with_key(|stream_id| -> Result<_, Error> { + let listener = StreamState::new(&stream, stream_id, consumer, self.sender.clone())?; + + Ok((stream.clone(), listener)) + })?; + + let params = audio_caps.into_object(); + let params: Vec = + PodSerializer::serialize(Cursor::new(Vec::new()), &Value::Object(params)) + .expect("PodSerializer into Cursor> must not fail") + .0 + .into_inner(); + + let mut params = + [Pod::from_bytes(¶ms).expect("Data is data produced by the PodSerializer")]; + + let mut flags = + StreamFlags::MAP_BUFFERS | StreamFlags::RT_PROCESS | StreamFlags::AUTOCONNECT; + + if dont_reconnect { + flags.insert(StreamFlags::DONT_RECONNECT); + } + + stream.connect(Direction::Input, None, flags, &mut params)?; + + Ok(stream_id) + } +} diff --git a/media-video/capture/src/pipewire/streams.rs b/media-video/capture/src/pipewire/streams.rs new file mode 100644 index 00000000..83998465 --- /dev/null +++ b/media-video/capture/src/pipewire/streams.rs @@ -0,0 +1,101 @@ +use crate::pipewire::{AudioConsumer, Command, StreamId}; +use pipewire::{ + Error, channel, + spa::{ + param::{ + ParamType, + audio::AudioInfoRaw, + format::{MediaSubtype, MediaType}, + }, + pod::Pod, + }, + stream::{Stream, StreamListener}, +}; + +pub(super) struct StreamState { + id: StreamId, + format: AudioInfoRaw, + consumer: Box, + sender: channel::Sender, +} + +impl StreamState { + pub(super) fn new( + stream: &Stream, + key: StreamId, + consumer: Box, + sender: channel::Sender, + ) -> Result, Error> { + let user_data = StreamState { + id: key, + format: AudioInfoRaw::new(), + consumer, + sender, + }; + + let listener = stream + .add_local_listener_with_user_data(user_data) + .state_changed(|_stream, _user_data, old, new| { + log::debug!("StreamState Changed: {:?} {old:?}, {new:?}", _user_data.id); + }) + .param_changed(|stream, user_data, id, param| { + user_data.handle_param_changed(stream, id, param); + }) + .process(|stream, user_data| { + user_data.handle_process(stream); + }) + .register()?; + + Ok(listener) + } + + fn handle_param_changed(&mut self, _stream: &Stream, id: u32, param: Option<&Pod>) { + let Some(param) = param else { + return; + }; + + if id != ParamType::Format.as_raw() { + return; + } + + let (media_type, media_subtype) = + match pipewire::spa::param::format_utils::parse_format(param) { + Ok(v) => v, + Err(_) => return, + }; + + if media_type != MediaType::Audio || media_subtype != MediaSubtype::Raw { + return; + } + + self.format + .parse(param) + .expect("Failed to parse param changed to AudioInfoRaw"); + + self.consumer.set_format( + self.format.rate(), + self.format.channels(), + self.format.format(), + ); + } + + fn handle_process(&mut self, stream: &Stream) { + while let Some(mut buffer) = stream.dequeue_buffer() { + let data = &mut buffer.datas_mut()[0]; + + let offset = data.chunk().offset() as usize; + let size = data.chunk().size() as usize; + + let Some(data) = data.data() else { + continue; + }; + + let run = self.consumer.on_frame(&data[offset..(offset + size)]); + + if !run { + let _ = self.sender.send(Command::RemoveStream(self.id)); + break; + } + } + } +} diff --git a/media-video/capture/src/wayland/mod.rs b/media-video/capture/src/wayland/mod.rs new file mode 100644 index 00000000..a866563a --- /dev/null +++ b/media-video/capture/src/wayland/mod.rs @@ -0,0 +1,336 @@ +use ashpd::{ + desktop::{ + Session, + screencast::{CursorMode, Screencast}, + }, + enumflags2::BitFlags, +}; +use smallvec::SmallVec; +use std::{os::fd::OwnedFd, thread}; +use tokio::sync::oneshot; + +mod stream; + +pub use ashpd::{ + desktop::{PersistMode, screencast::SourceType}, + enumflags2::BitFlag, +}; + +/// Options for configuring a Wayland/Pipewire capture session +#[derive(Debug)] +pub struct ScreenCaptureOptions { + /// Embed the cursor in the video + pub show_cursor: bool, + + /// Which sources to captures + pub source_types: BitFlags, + + /// Screen capture permission persistence + pub persist_mode: PersistMode, + + /// Restore token to restore previous capture + pub restore_token: Option, + + /// Pipewire specific options + pub pipewire: PipewireOptions, +} + +impl Default for ScreenCaptureOptions { + fn default() -> Self { + ScreenCaptureOptions { + show_cursor: true, + source_types: SourceType::all(), + persist_mode: PersistMode::DoNot, + restore_token: None, + pipewire: PipewireOptions::default(), + } + } +} + +#[derive(Debug, Clone)] +pub struct PipewireOptions { + /// Maximum framerate to negotiate in the pipewire stream + /// + /// > Note: This does not guarantee that the framerate is exceeded and a proper frame limit can only be achieved by + /// > blocking the frame callback. + pub max_framerate: u32, + + /// Set the supported pixel formats, only they will be negotiated + pub pixel_formats: Vec, + + /// Configure usage of DMA buffers + pub dma_usage: Option, +} + +impl Default for PipewireOptions { + fn default() -> Self { + PipewireOptions { + max_framerate: 30, + pixel_formats: vec![ + PixelFormat::NV12, + PixelFormat::I420, + PixelFormat::RGBA(RgbaSwizzle::RGBA), + PixelFormat::RGBA(RgbaSwizzle::BGRA), + PixelFormat::RGBA(RgbaSwizzle::ARGB), + PixelFormat::RGBA(RgbaSwizzle::ABGR), + ], + dma_usage: None, + } + } +} + +/// Options for configuring usage of DMA Buffers +#[derive(Debug, Clone)] +pub struct DmaUsageOptions { + /// Request sync objects for explicit DMA buffer synchronization + pub request_sync_obj: bool, + + /// Number of buffers to allocate for the session + /// + /// This must be set to a high enough value to avoid deadlocking in certain scenarios + /// + /// E.g. if a H.264 encoder is used, `num_buffers` must be at least as large as the `ip_interval + 1`, + /// as the encoder will hold onto these buffers until they can be encoded out of order, releasing them all at once. + pub num_buffers: u32, + + /// Supported DRM modifiers + pub supported_modifier: Vec, +} + +#[derive(Debug, Clone, Copy)] +pub enum PixelFormat { + /// 2 Plane YUV with 4:2:0 subsampling + NV12, + /// 3 Plane YUV with 4:2:0 subsampling + I420, + /// Any form of 4 component RGB + RGBA(RgbaSwizzle), +} + +#[derive(Debug, Clone, Copy)] +pub enum RgbaSwizzle { + RGBA, + BGRA, + ARGB, + ABGR, +} + +#[derive(Debug)] +pub struct CapturedFrame { + pub width: u32, + pub height: u32, + pub format: PixelFormat, + pub buffer: CapturedFrameBuffer, + pub crop: Option, +} + +/// Captured buffer type, contents are defined by [`CapturedFrameFormat`] +#[derive(Debug)] +pub enum CapturedFrameBuffer { + Mem(CapturedMemBuffer), + Dma(CapturedDmaBuffer), +} + +pub struct CapturedMemBuffer { + pub memory: Vec, + pub planes: SmallVec<[MemPlane; 3]>, +} + +impl std::fmt::Debug for CapturedMemBuffer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CapturedMemBuffer") + .field("memory(len)", &self.memory.len()) + .field("planes", &self.planes) + .finish() + } +} + +#[derive(Debug)] +pub struct MemPlane { + pub offset: usize, + pub stride: usize, +} + +#[derive(Debug)] +pub struct CapturedDmaBuffer { + pub modifier: u64, + pub planes: SmallVec<[DmaPlane; 4]>, + pub sync: Option, +} + +#[derive(Debug)] +pub struct DmaPlane { + pub fd: OwnedFd, + pub offset: usize, + pub stride: usize, +} + +#[derive(Debug)] +pub struct CapturedDmaBufferSync { + pub acquire_point: u64, + pub release_point: u64, + + pub acquire_fd: OwnedFd, + pub release_fd: OwnedFd, +} + +#[derive(Debug, Clone, Copy)] +pub struct CapturedFrameCrop { + pub x: i32, + pub y: i32, + pub width: u32, + pub height: u32, +} + +#[derive(Debug, thiserror::Error)] +#[error("Stream has been closed")] +pub struct StreamClosedError; + +/// Handle to a current capture stream +/// +/// Dropping it does **not** end the stream. +/// +/// To properly close a capture stream call [`StreamHandle::close`]. +pub struct StreamHandle { + session: Session<'static, Screencast<'static>>, + restore_token: Option, + sender: pipewire::channel::Sender, +} + +impl StreamHandle { + /// Continue playing the stream. + /// + /// Should only be called after pausing the stream - created captures are automatically playing + pub fn play(&self) -> Result<(), StreamClosedError> { + self.sender + .send(stream::Command::Play) + .map_err(|_| StreamClosedError) + } + + /// Pause the stream, can be unpaused using [`StreamHandle::play`]. + pub fn pause(&self) -> Result<(), StreamClosedError> { + self.sender + .send(stream::Command::Pause) + .map_err(|_| StreamClosedError) + } + + /// Gracefully close the pipewire stream and close the dbus connection. + pub async fn close(&self) -> Result<(), StreamClosedError> { + if let Err(e) = self.session.close().await { + log::warn!("Failed to close xdg session properly {e}"); + } + + self.sender + .send(stream::Command::Close) + .map_err(|_| StreamClosedError) + } + + /// Renegotiate the stream without the given DRM modifier + /// + /// All future renegotiations will not include this modifier. + pub fn remove_modifier(&self, modifier: u64) -> Result<(), StreamClosedError> { + self.sender + .send(stream::Command::RemoveModifier(modifier)) + .map_err(|_| StreamClosedError) + } + + /// Get the restore token + pub fn restore_token(&self) -> Option<&str> { + self.restore_token.as_deref() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum StartCaptureError { + #[error("Config contains an empty list of pixel formats")] + NoPixelFormats, + #[error(transparent)] + DesktopPortal(#[from] ashpd::Error), + #[error("no streams were selected")] + NoStreamSelected, + #[error("capture thread panicked while creating stream")] + CaptureThreadPanicked, + #[error("Failed to create pipewire stream: {0}")] + Pipewire(#[from] pipewire::Error), +} + +/// Start a screen capture thread with the given options +/// +/// Calls `on_frame` until either the screen capture is cancelled or `on_frame` returns false +pub async fn start_screen_capture( + options: ScreenCaptureOptions, + on_frame: F, +) -> Result +where + F: FnMut(CapturedFrame) -> bool + Send + 'static, +{ + start_screen_capture_boxed(options, Box::new(on_frame)).await +} + +async fn start_screen_capture_boxed( + options: ScreenCaptureOptions, + on_frame: Box bool + Send>, +) -> Result { + if options.pipewire.pixel_formats.is_empty() { + return Err(StartCaptureError::NoPixelFormats); + } + + let proxy = Screencast::new().await?; + + let session = proxy.create_session().await?; + + let cursor_mode = if options.show_cursor { + CursorMode::Embedded + } else { + CursorMode::Hidden + }; + + proxy + .select_sources( + &session, + cursor_mode, + options.source_types, + false, + options.restore_token.as_deref(), + options.persist_mode, + ) + .await?; + + let response = proxy.start(&session, None).await?.response()?; + + let restore_token = response.restore_token(); + + let stream = response + .streams() + .first() + .ok_or(StartCaptureError::NoStreamSelected)?; + + let node_id = stream.pipe_wire_node_id(); + let fd = proxy.open_pipe_wire_remote(&session).await?; + + let (result_tx, result_rx) = oneshot::channel(); + + thread::Builder::new() + .name("pipewire-video-capture".into()) + .spawn(move || { + stream::start( + Some(node_id), + fd, + options.pipewire, + "Screen", + on_frame, + result_tx, + ); + }) + .expect("Thread creation "); + + let sender = result_rx + .await + .map_err(|_| StartCaptureError::CaptureThreadPanicked)??; + + Ok(StreamHandle { + session, + restore_token: restore_token.map(|s| s.to_owned()), + sender, + }) +} diff --git a/media-video/capture/src/wayland/stream.rs b/media-video/capture/src/wayland/stream.rs new file mode 100644 index 00000000..77b09e72 --- /dev/null +++ b/media-video/capture/src/wayland/stream.rs @@ -0,0 +1,854 @@ +use crate::wayland::{ + CapturedDmaBuffer, CapturedDmaBufferSync, CapturedFrame, CapturedFrameBuffer, + CapturedFrameCrop, CapturedMemBuffer, DmaPlane, MemPlane, PipewireOptions, PixelFormat, + RgbaSwizzle, +}; +use pipewire::{ + context::ContextRc, + main_loop::{MainLoopRc, MainLoopWeak}, + properties::properties, + spa::{ + self, + param::{ + ParamType, + format::{FormatProperties, MediaSubtype, MediaType}, + video::{VideoFormat, VideoInfoRaw}, + }, + pod::{ + ChoiceValue, Object, Pod, Property, PropertyFlags, Value, object, property, + serialize::PodSerializer, + }, + utils::{Choice, ChoiceEnum, ChoiceFlags, Direction, Fraction, Id, Rectangle, SpaTypes}, + }, + stream::{Stream, StreamFlags, StreamListener, StreamRc, StreamState}, +}; +use smallvec::{SmallVec, smallvec}; +use std::{ + cell::RefCell, + io::Cursor, + os::fd::{BorrowedFd, OwnedFd, RawFd}, + ptr::null, + rc::Rc, + slice::from_raw_parts, +}; +use tokio::sync::oneshot; + +struct BufferGuard<'a> { + stream: &'a Stream, + pw_buffer: *mut pipewire::sys::pw_buffer, +} + +impl Drop for BufferGuard<'_> { + fn drop(&mut self) { + unsafe { self.stream.queue_raw_buffer(self.pw_buffer) }; + } +} + +struct UserStreamState { + main_loop: MainLoopWeak, + options: PipewireOptions, + has_video_modifier: bool, + + format: VideoInfoRaw, + on_frame: Box bool + Send>, +} + +impl UserStreamState { + fn update_params(&mut self, stream: &Stream) { + if let Some(dma_options) = &self.options.dma_usage + && self.has_video_modifier + { + let crop_region_params = serialize_object(crop_region_param()); + let dma_buffer_with_sync_params = serialize_object(dma_buffer_with_sync_params()); + let dma_buffer_without_sync_params = serialize_object(dma_buffer_without_sync_params()); + let sync_obj_params = serialize_object(sync_obj_params()); + + let mut update_params: SmallVec<[&Pod; 2]> = smallvec::SmallVec::new(); + + update_params.push(pod(&crop_region_params)); + + if dma_options.request_sync_obj { + update_params.push(pod(&dma_buffer_with_sync_params)); + update_params.push(pod(&sync_obj_params)); + } + + update_params.push(pod(&dma_buffer_without_sync_params)); + + if let Err(e) = stream.update_params(&mut update_params) { + log::error!("Failed to update stream params: {e}"); + } + } else { + let mem_buffer_params = serialize_object(mem_buffer_params()); + let crop_region_params = serialize_object(crop_region_param()); + + let mut update_params = [pod(&mem_buffer_params), pod(&crop_region_params)]; + + if let Err(e) = stream.update_params(&mut update_params) { + log::error!("Failed to update stream params: {e}"); + } + } + } + + fn handle_state_changed(&mut self, _stream: &Stream, old: StreamState, new: StreamState) { + log::debug!("stream changed: {old:?} -> {new:?}"); + + if matches!(new, StreamState::Unconnected | StreamState::Error(..)) + && let Some(main_loop) = self.main_loop.upgrade() + { + main_loop.quit(); + } + } + + fn handle_param_changed(&mut self, stream: &Stream, id: u32, param: Option<&Pod>) { + let Some(param) = param else { + return; + }; + + if id != ParamType::Format.as_raw() { + return; + } + + let (media_type, media_subtype) = + match pipewire::spa::param::format_utils::parse_format(param) { + Ok(v) => v, + Err(_) => return, + }; + + if media_type != MediaType::Video || media_subtype != MediaSubtype::Raw { + return; + } + + self.format + .parse(param) + .expect("Failed to parse param changed to VideoInfoRaw"); + + log::debug!( + "Stream format changed to {:?}, resolution={}x{}, framerate={}, max_framerate={}, modifier={}", + self.format.format(), + self.format.size().width, + self.format.size().height, + (self.format.framerate().num as f32) / (self.format.framerate().denom.max(1) as f32), + (self.format.max_framerate().num as f32) + / (self.format.max_framerate().denom.max(1) as f32), + self.format.modifier() + ); + + // Check explicitly if the Video modifier property has been set + self.has_video_modifier = unsafe { + let prop = spa::sys::spa_pod_find_prop( + param.as_raw_ptr(), + null(), + spa::sys::SPA_FORMAT_VIDEO_modifier, + ); + + !prop.is_null() + }; + + self.update_params(stream); + } + + fn handle_process(&mut self, stream: &Stream) { + let pw_buffer: *mut pipewire::sys::pw_buffer = unsafe { stream.dequeue_raw_buffer() }; + + let Some(buffer) = (unsafe { pw_buffer.as_ref() }) else { + return; + }; + + let defer_enqueue = BufferGuard { stream, pw_buffer }; + + let Some(buffer) = (unsafe { buffer.buffer.as_ref() }) else { + return; + }; + + let spa::sys::spa_rectangle { width, height } = self.format.size(); + let width = width as usize; + let height = height as usize; + + let metas = unsafe { from_raw_parts(buffer.metas, buffer.n_metas as usize) }; + let datas = unsafe { from_raw_parts(buffer.datas, buffer.n_datas as usize) }; + + // First check if memory buffers were sent + let mem_data: SmallVec<[_; 3]> = datas + .iter() + .filter(|data| { + matches!( + data.type_, + spa::sys::SPA_DATA_MemFd | spa::sys::SPA_DATA_MemPtr + ) + }) + .collect(); + + let frame = if !mem_data.is_empty() { + self.handle_mem_data(width, height, &mem_data) + } else { + let dma_data: SmallVec<[_; 4]> = datas + .iter() + .filter(|data| data.type_ == spa::sys::SPA_DATA_DmaBuf) + .collect(); + + if dma_data.is_empty() { + log::warn!("Got neither MemPtr nor DmaBuf data"); + return; + } + + Some(self.handle_dma_data(metas, datas, dma_data)) + }; + + let Some(mut frame) = frame else { return }; + + frame.crop = metas.iter().find_map(|meta| { + if meta.type_ == spa::sys::SPA_META_VideoCrop { + let meta = unsafe { + meta.data + .cast::() + .read_unaligned() + }; + + Some(CapturedFrameCrop { + x: meta.region.position.x, + y: meta.region.position.y, + width: meta.region.size.width, + height: meta.region.size.height, + }) + } else { + None + } + }); + + if !(self.on_frame)(frame) { + // on_frame returned false, exit the main loop + + if let Some(main_loop) = self.main_loop.upgrade() { + main_loop.quit(); + } + } + + drop(defer_enqueue); + } + + fn handle_dma_data( + &mut self, + metas: &[spa::sys::spa_meta], + datas: &[spa::sys::spa_data], + dma_data: SmallVec<[&spa::sys::spa_data; 4]>, + ) -> CapturedFrame { + fn clone_fd(fd: RawFd) -> OwnedFd { + unsafe { + BorrowedFd::borrow_raw(fd) + .try_clone_to_owned() + .expect("fd received from pipewire must be cloneable") + } + } + + let planes = dma_data + .into_iter() + .map(|data| { + let chunk = unsafe { data.chunk.read_unaligned() }; + + DmaPlane { + fd: clone_fd(data.fd as RawFd), + offset: chunk.offset as usize, + stride: chunk.stride as usize, + } + }) + .collect(); + + let format = match self.format.format() { + VideoFormat::NV12 => PixelFormat::NV12, + VideoFormat::I420 => PixelFormat::I420, + VideoFormat::RGBA + | VideoFormat::RGBx + | VideoFormat::BGRA + | VideoFormat::BGRx + | VideoFormat::ARGB + | VideoFormat::xRGB + | VideoFormat::ABGR + | VideoFormat::xBGR => { + let swizzle = match self.format.format() { + VideoFormat::RGBA | VideoFormat::RGBx => RgbaSwizzle::RGBA, + VideoFormat::BGRA | VideoFormat::BGRx => RgbaSwizzle::BGRA, + VideoFormat::ARGB | VideoFormat::xRGB => RgbaSwizzle::ARGB, + VideoFormat::ABGR | VideoFormat::xBGR => RgbaSwizzle::ABGR, + _ => unreachable!(), + }; + + PixelFormat::RGBA(swizzle) + } + _ => unreachable!(), + }; + + let sync_timeline = metas + .iter() + .find(|m| m.type_ == spa::sys::SPA_META_SyncTimeline); + + let sync = if let Some(sync_timeline) = sync_timeline { + let sync_timeline = unsafe { + sync_timeline + .data + .cast::() + .read_unaligned() + }; + + let sync_objs: SmallVec<[_; 2]> = datas + .iter() + .filter(|d| d.type_ == spa::sys::SPA_DATA_SyncObj) + .collect(); + + let acquire_sync_obj = clone_fd(sync_objs[0].fd as RawFd); + let release_sync_obj = clone_fd(sync_objs[1].fd as RawFd); + + Some(CapturedDmaBufferSync { + acquire_point: sync_timeline.acquire_point, + release_point: sync_timeline.release_point, + acquire_fd: acquire_sync_obj, + release_fd: release_sync_obj, + }) + } else { + None + }; + + CapturedFrame { + width: self.format.size().width, + height: self.format.size().height, + format, + buffer: CapturedFrameBuffer::Dma(CapturedDmaBuffer { + modifier: self.format.modifier(), + planes, + sync, + }), + crop: None, + } + } + + fn handle_mem_data( + &mut self, + width: usize, + height: usize, + data: &SmallVec<[&spa::sys::spa_data; 3]>, + ) -> Option { + match self.format.format() { + VideoFormat::NV12 => { + let mut memory = vec![0u8; (width * height * 12).div_ceil(8)]; + + let (y_plane, uv_plane) = memory.split_at_mut(width * height); + + copy_plane(data[0], y_plane, height, width); + copy_plane(data[1], uv_plane, height / 2, width); + + let width = width as u32; + let height = height as u32; + + Some(CapturedFrame { + width, + height, + format: PixelFormat::NV12, + buffer: CapturedFrameBuffer::Mem(CapturedMemBuffer { + memory, + planes: smallvec![ + MemPlane { + offset: 0, + stride: width as usize, + }, + MemPlane { + offset: (width * height) as usize, + stride: width as usize, + } + ], + }), + crop: None, + }) + } + VideoFormat::I420 => { + let mut memory = vec![0u8; (width * height * 12).div_ceil(8)]; + + let (y_plane, uv_plane) = memory.split_at_mut(width * height); + let (u_plane, v_plane) = uv_plane.split_at_mut((width * height) / 4); + + copy_plane(data[0], y_plane, height, width); + copy_plane(data[1], u_plane, height / 2, width / 2); + copy_plane(data[2], v_plane, height / 2, width / 2); + + let width = width as u32; + let height = height as u32; + + let u_offset = width * height; + let v_offset = u_offset + (width * height) / 4; + + Some(CapturedFrame { + width, + height, + format: PixelFormat::I420, + buffer: CapturedFrameBuffer::Mem(CapturedMemBuffer { + memory, + planes: smallvec![ + MemPlane { + offset: 0, + stride: width as usize + }, + MemPlane { + offset: u_offset as usize, + stride: width as usize / 2, + }, + MemPlane { + offset: v_offset as usize, + stride: width as usize / 2, + } + ], + }), + crop: None, + }) + } + VideoFormat::RGBA + | VideoFormat::RGBx + | VideoFormat::BGRA + | VideoFormat::BGRx + | VideoFormat::ARGB + | VideoFormat::xRGB + | VideoFormat::ABGR + | VideoFormat::xBGR => { + let swizzle = match self.format.format() { + VideoFormat::RGBA | VideoFormat::RGBx => RgbaSwizzle::RGBA, + VideoFormat::BGRA | VideoFormat::BGRx => RgbaSwizzle::BGRA, + VideoFormat::ARGB | VideoFormat::xRGB => RgbaSwizzle::ARGB, + VideoFormat::ABGR | VideoFormat::xBGR => RgbaSwizzle::ABGR, + _ => unreachable!(), + }; + + let mut memory = vec![0u8; width * height * 4]; + + // Single plane + copy_plane(data[0], &mut memory, height, width * 4)?; + + let width = width as u32; + let height = height as u32; + + Some(CapturedFrame { + width, + height, + format: PixelFormat::RGBA(swizzle), + buffer: CapturedFrameBuffer::Mem(CapturedMemBuffer { + memory, + planes: smallvec![MemPlane { + offset: 0, + stride: width as usize * 4, + }], + }), + crop: None, + }) + } + _ => unreachable!("Received unexpected video format"), + } + } +} + +fn copy_plane( + spa_data: &spa::sys::spa_data, + buffer: &mut [u8], + height: usize, + buffer_stride: usize, +) -> Option<()> { + let data_slice = unsafe { + from_raw_parts( + spa_data.data.cast::(), + spa_data + .maxsize + .try_into() + .expect("maxsize must fit into usize"), + ) + }; + + let chunk = unsafe { spa_data.chunk.read_unaligned() }; + + if chunk.size == 0 { + return None; + } + + let chunk_offset = (chunk.offset % spa_data.maxsize) as usize; + let chunk_size = chunk.size as usize; + let chunk_stride = chunk.stride as usize; + let chunk_slice = &data_slice[chunk_offset..chunk_offset + chunk_size]; + + if chunk_stride == buffer_stride { + buffer.copy_from_slice(chunk_slice); + } else { + // Copy per row + for y in 0..height { + let chunk_index = y * chunk_stride; + let buffer_index = y * buffer_stride; + + let src_slice = &chunk_slice[chunk_index..chunk_index + buffer_stride]; + let dst_slice = &mut buffer[buffer_index..buffer_index + buffer_stride]; + + dst_slice.copy_from_slice(src_slice); + } + } + + Some(()) +} + +pub(super) enum Command { + Play, + Pause, + Close, + RemoveModifier(u64), +} + +pub(super) fn start( + node_id: Option, + fd: OwnedFd, + options: PipewireOptions, + role: &'static str, + on_frame: Box bool + Send>, + result_tx: oneshot::Sender, pipewire::Error>>, +) { + pipewire::init(); + + let mainloop = match MainLoopRc::new(None) { + Ok(mainloop) => mainloop, + Err(e) => { + let _ = result_tx.send(Err(e)); + return; + } + }; + + let (tx, rx) = pipewire::channel::channel(); + + let data = match build_stream(&mainloop, node_id, fd, options, role, on_frame) { + Ok(data_to_not_drop) => data_to_not_drop, + Err(e) => { + let _ = result_tx.send(Err(e)); + return; + } + }; + + let _attach_guard = rx.attach(mainloop.loop_(), move |command| match command { + Command::Play => { + if let Err(e) = data.stream.set_active(true) { + log::warn!("Failed to handle Play command: {e}"); + } + } + Command::Pause => { + if let Err(e) = data.stream.set_active(false) { + log::warn!("Failed to handle Pause command: {e}"); + } + } + Command::Close => { + if let Err(e) = data.stream.disconnect() { + log::warn!("Failed to handle Close command: {e}"); + } + } + Command::RemoveModifier(modifier) => { + let mut user_data = data.user_data.borrow_mut(); + + if let Some(dma_usage) = &mut user_data.options.dma_usage { + let prev_modifier_len = dma_usage.supported_modifier.len(); + dma_usage.supported_modifier.retain(|m| *m != modifier); + + if prev_modifier_len != dma_usage.supported_modifier.len() { + if let Err(e) = data.stream.set_active(false) { + log::error!("Failed to pause stream to remove DRM modifier: {e}"); + } + + user_data.update_params(&data.stream); + + if let Err(e) = data.stream.set_active(true) { + log::error!("Failed to unpause stream to remove DRM modifier: {e}"); + } + } + } + } + }); + + if result_tx.send(Ok(tx)).is_err() { + return; + } + + mainloop.run(); +} + +struct StreamData { + stream: StreamRc, + // This is just a guard object needed to keep alive + #[expect(dead_code)] + listener: StreamListener>>, + user_data: Rc>, +} + +fn build_stream( + mainloop: &MainLoopRc, + node_id: Option, + fd: OwnedFd, + options: PipewireOptions, + role: &'static str, + on_frame: Box bool + Send>, +) -> Result { + let context = ContextRc::new(mainloop, None)?; + let core = context.connect_fd_rc(fd, None)?; + let user_data = Rc::new(RefCell::new(UserStreamState { + format: Default::default(), + main_loop: mainloop.downgrade(), + on_frame, + options: options.clone(), + has_video_modifier: false, + })); + + let stream = StreamRc::new( + core, + "capture", + properties! { + *pipewire::keys::MEDIA_TYPE => "Video", + *pipewire::keys::MEDIA_CATEGORY => "Capture", + *pipewire::keys::MEDIA_ROLE => role, + }, + )?; + + let listener = stream + .add_local_listener_with_user_data(user_data.clone()) + .state_changed(|stream, user_data, old, new| { + user_data + .borrow_mut() + .handle_state_changed(stream, old, new); + }) + .param_changed(|stream, user_data, id, param| { + user_data + .borrow_mut() + .handle_param_changed(stream, id, param); + }) + .process(move |stream, user_data| { + user_data.borrow_mut().handle_process(stream); + }) + .register()?; + + let mut connect_params: SmallVec<[_; 2]> = SmallVec::new(); + + // Add the format params with the video drm modifier property first if dma buffers are to be used + if let Some(dma_usage) = options.dma_usage { + let mut format_params = format_params(&options.pixel_formats, options.max_framerate); + format_params + .properties + .push(drm_modifier_property(&dma_usage.supported_modifier)); + connect_params.push(serialize_object(format_params)); + } + + // Add format without video drm modifier property + connect_params.push(serialize_object(format_params( + &options.pixel_formats, + options.max_framerate, + ))); + + let mut connect_params: SmallVec<[&Pod; 2]> = + connect_params.iter().map(|param| pod(param)).collect(); + + stream.connect( + Direction::Input, + node_id, + StreamFlags::AUTOCONNECT | StreamFlags::MAP_BUFFERS, + &mut connect_params, + )?; + + Ok(StreamData { + stream, + listener, + user_data, + }) +} + +/// Build the video format capabilities which will be used to negotiate a video stream with pipewire +fn format_params(pixel_formats: &[PixelFormat], max_framerate: u32) -> Object { + fn map(p: PixelFormat) -> &'static [VideoFormat] { + match p { + PixelFormat::NV12 => &[VideoFormat::NV12], + PixelFormat::I420 => &[VideoFormat::I420], + PixelFormat::RGBA(rgba_swizzle) => match rgba_swizzle { + RgbaSwizzle::RGBA => &[VideoFormat::RGBA, VideoFormat::RGBx], + RgbaSwizzle::BGRA => &[VideoFormat::BGRA, VideoFormat::BGRx], + RgbaSwizzle::ARGB => &[VideoFormat::ARGB, VideoFormat::xRGB], + RgbaSwizzle::ABGR => &[VideoFormat::ABGR, VideoFormat::xBGR], + }, + } + } + + let video_formats = Value::Choice(ChoiceValue::Id(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Enum { + default: Id(map(pixel_formats[0])[0].0), + alternatives: pixel_formats + .iter() + .flat_map(|p| map(*p).iter().copied()) + .map(|video_format| Id(video_format.as_raw())) + .collect(), + }, + ))); + + let video_formats_property = Property { + key: FormatProperties::VideoFormat.as_raw(), + flags: PropertyFlags::empty(), + value: video_formats, + }; + + object!( + SpaTypes::ObjectParamFormat, + ParamType::EnumFormat, + property!(FormatProperties::MediaType, Id, MediaType::Video), + property!(FormatProperties::MediaSubtype, Id, MediaSubtype::Raw), + video_formats_property, + property!( + FormatProperties::VideoSize, + Choice, + Range, + Rectangle, + // Default + Rectangle { + width: 320, + height: 240 + }, + // Min + Rectangle { + width: 16, + height: 16 + }, + // Max + Rectangle { + width: 32768, + height: 32768 + } + ), + property!( + FormatProperties::VideoFramerate, + Choice, + Range, + Fraction, + // Default + Fraction { + num: max_framerate, + denom: 1 + }, + // Min + Fraction { num: 0, denom: 1 }, + // Max + Fraction { + num: max_framerate, + denom: 1 + } + ), + ) +} + +fn mem_buffer_params() -> Object { + let mut params = object!(SpaTypes::ObjectParamBuffers, ParamType::Buffers,); + + params.properties.push(Property { + key: spa::sys::SPA_PARAM_BUFFERS_dataType, + flags: PropertyFlags::empty(), + value: Value::Choice(ChoiceValue::Int(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Flags { + default: 1 << spa::sys::SPA_DATA_MemFd, + flags: vec![ + 1 << spa::sys::SPA_DATA_MemFd, + 1 << spa::sys::SPA_DATA_MemPtr, + ], + }, + ))), + }); + + params +} + +fn dma_buffer_with_sync_params() -> Object { + let mut params = object!(SpaTypes::ObjectParamBuffers, ParamType::Buffers,); + + params.properties.push(Property { + key: spa::sys::SPA_PARAM_BUFFERS_dataType, + flags: PropertyFlags::empty(), + value: Value::Int(1 << spa::sys::SPA_DATA_DmaBuf), + }); + + params.properties.push(Property { + key: spa::sys::SPA_PARAM_BUFFERS_metaType, + flags: PropertyFlags::MANDATORY, + value: Value::Int(1 << spa::sys::SPA_META_SyncTimeline), + }); + + params +} + +fn dma_buffer_without_sync_params() -> Object { + let mut params = object!(SpaTypes::ObjectParamBuffers, ParamType::Buffers,); + + params.properties.push(Property { + key: spa::sys::SPA_PARAM_BUFFERS_dataType, + flags: PropertyFlags::empty(), + value: Value::Int(1 << spa::sys::SPA_DATA_DmaBuf), + }); + + params +} + +fn drm_modifier_property(drm_modifier: &[u64]) -> Property { + let default = drm_modifier[0].cast_signed(); + let alternatives = drm_modifier.iter().copied().map(u64::cast_signed).collect(); + + Property { + key: FormatProperties::VideoModifier.as_raw(), + flags: PropertyFlags::MANDATORY | PropertyFlags::DONT_FIXATE, + value: Value::Choice(ChoiceValue::Long(Choice( + ChoiceFlags::empty(), + ChoiceEnum::Enum { + default, + alternatives, + }, + ))), + } +} + +fn sync_obj_params() -> Object { + Object { + type_: spa::sys::SPA_TYPE_OBJECT_ParamMeta, + id: spa::sys::SPA_PARAM_Meta, + properties: [ + Property { + key: spa::sys::SPA_PARAM_META_type, + flags: PropertyFlags::empty(), + value: Value::Id(Id(spa::sys::SPA_META_SyncTimeline)), + }, + Property { + key: spa::sys::SPA_PARAM_META_size, + flags: PropertyFlags::empty(), + value: Value::Int(size_of::() as i32), + }, + ] + .into(), + } +} + +fn crop_region_param() -> Object { + Object { + type_: spa::sys::SPA_TYPE_OBJECT_ParamMeta, + id: spa::sys::SPA_PARAM_Meta, + properties: [ + Property { + key: spa::sys::SPA_PARAM_META_type, + flags: PropertyFlags::empty(), + value: Value::Id(Id(spa::sys::SPA_META_VideoCrop)), + }, + Property { + key: spa::sys::SPA_PARAM_META_size, + flags: PropertyFlags::empty(), + value: Value::Int(size_of::() as i32), + }, + ] + .into(), + } +} + +fn serialize_object(object: Object) -> Vec { + PodSerializer::serialize(Cursor::new(Vec::new()), &Value::Object(object)) + .expect("objects must be serializable") + .0 + .into_inner() +} + +fn pod(pod: &[u8]) -> &Pod { + Pod::from_bytes(pod).expect("Object was serialized as pod") +} diff --git a/media-video/h264/Cargo.toml b/media-video/h264/Cargo.toml new file mode 100644 index 00000000..ab72a4a2 --- /dev/null +++ b/media-video/h264/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "ezk-h264" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lints] +workspace = true + +[features] +default = ["openh264", "libva", "vulkan"] + +openh264 = ["dep:openh264", "dep:openh264-sys2"] +libva = ["dep:libva"] +vulkan = ["dep:vulkan"] + +[dependencies] +bitstream-io = "4" +bytes = "1.10.0" +ezk-image = { workspace = true, features = ["NV12", "I420"] } +log = "0.4" +openh264 = { version = "0.9", optional = true } +openh264-sys2 = { version = "0.9", optional = true } +thiserror = "2.0.11" +vulkan = { workspace = true, optional = true } +smallvec = "1" + +[target.'cfg(target_os = "linux")'.dependencies] +libva = { workspace = true, optional = true } + +[dev-dependencies] +env_logger = "0.11" +capture.workspace = true +tokio = { version = "1", features = ["sync", "macros", "rt"] } diff --git a/media-video/h264/src/encoder/backends/libva/bitstream.rs b/media-video/h264/src/encoder/backends/libva/bitstream.rs new file mode 100644 index 00000000..b72c2ddf --- /dev/null +++ b/media-video/h264/src/encoder/backends/libva/bitstream.rs @@ -0,0 +1,466 @@ +use bitstream_io::{BigEndian, BitWrite, BitWriter}; +use libva::ffi; + +use crate::{H264Profile, encoder::backends::libva::VaH264EncoderConfig}; + +const SLICE_TYPE_P: u8 = 0; +const SLICE_TYPE_B: u8 = 1; +const SLICE_TYPE_I: u8 = 2; + +const NAL_REF_IDC_NONE: u8 = 0; +const NAL_REF_IDC_LOW: u8 = 1; +const NAL_REF_IDC_MEDIUM: u8 = 2; +const NAL_REF_IDC_HIGH: u8 = 3; + +const NAL_NON_IDR: u8 = 1; +const NAL_IDR: u8 = 5; +const NAL_SPS: u8 = 7; +const NAL_PPS: u8 = 8; +// const NAL_SEI: u8 = 6; + +struct H264BitStreamWriter { + buf: BitWriter, BigEndian>, +} + +impl H264BitStreamWriter { + fn new() -> Self { + Self { + buf: BitWriter::new(Vec::new()), + } + } + + fn write_bytes(&mut self, bytes: &[u8]) { + self.buf.writer().unwrap().extend_from_slice(bytes); + } + + fn write_bits(&mut self, value: impl Into) { + self.buf.write::(value.into()).unwrap(); + } + + fn write_bits_var(&mut self, bits: u32, value: u32) { + self.buf.write_var(bits, value).unwrap(); + } + + // exponential golomb coding + fn write_ue(&mut self, val: u32) { + let val = val + 1; + let len = 32 - val.leading_zeros(); // bit length of code_num + + if len > 1 { + self.write_bits_var(len - 1, 0); + } + + self.write_bits_var(len, val); + } + + fn write_se(&mut self, val: i32) { + let val = if val <= 0 { -2 * val } else { 2 * val - 1 }; + + self.write_ue(val.cast_unsigned()); + } + + fn write_nal_start_code_prefix(&mut self) { + self.write_bytes(&[0, 0, 0, 1]); + } + + fn write_nal_header(&mut self, nal_ref_idc: u8, nal_unit_type: u8) { + // forbidden zero bit + self.write_bits::<1>(0u32); + self.write_bits::<2>(nal_ref_idc); + self.write_bits::<5>(nal_unit_type); + } + + fn rbsp_trailing_bits(&mut self) { + self.write_bits::<1>(1u8); + self.buf.byte_align().unwrap(); + } +} + +/// Returns the encoded buffer with the bit length +pub(super) fn write_sps_rbsp( + encode_config: &VaH264EncoderConfig, + seq_param: &ffi::VAEncSequenceParameterBufferH264, +) -> Vec { + let seq_fields = unsafe { &seq_param.seq_fields.bits }; + + let mut writer = H264BitStreamWriter::new(); + + writer.write_nal_start_code_prefix(); + writer.write_nal_header(NAL_REF_IDC_HIGH, NAL_SPS); + + writer.write_bits::<8>(encode_config.profile.profile_idc()); + writer.write_bits::<8>(encode_config.profile.profile_iop()); + writer.write_bits::<8>(encode_config.level.level_idc()); + + writer.write_ue(seq_param.seq_parameter_set_id as u32); + + if matches!( + encode_config.profile, + H264Profile::High + | H264Profile::High10 + | H264Profile::High422 + | H264Profile::High444Intra + | H264Profile::High444Predictive + ) { + writer.write_ue(1); // TODO: YUV420 - THIS IS WRONG for every non-yuv420 entrypoint + // TODO: if chroma_format_idc == 3 put separate_colour_plane_flag + writer.write_ue(0); // bit_depth_luma_minus8 - TODO: also wrong, for High10 + writer.write_ue(0); // bit_depth_chroma_minus8 - TODO: also wrong, for High10 + writer.write_bits::<1>(0u32); // qpprime_y_zero_transform_bypass_flag + writer.write_bits::<1>(0u32); // seq_scaling_matrix_present_flag + } + + writer.write_ue(seq_fields.log2_max_frame_num_minus4()); + writer.write_ue(seq_fields.pic_order_cnt_type()); + + if seq_fields.pic_order_cnt_type() == 0 { + writer.write_ue(seq_fields.log2_max_pic_order_cnt_lsb_minus4()); + } else { + panic!( + "unimplemented pic_order_cnt_type {}", + seq_fields.pic_order_cnt_type() + ); + } + + writer.write_ue(seq_param.max_num_ref_frames); + writer.write_bits::<1>(0u8); /* gaps_in_frame_num_value_allowed_flag */ + + writer.write_ue(seq_param.picture_width_in_mbs as u32 - 1); + writer.write_ue(seq_param.picture_height_in_mbs as u32 - 1); + writer.write_bits::<1>(seq_fields.frame_mbs_only_flag()); + + assert_ne!( + seq_fields.frame_mbs_only_flag(), + 0, + "Interlaced encoding not supported" + ); + + writer.write_bits::<1>(seq_fields.direct_8x8_inference_flag()); + writer.write_bits::<1>(seq_param.frame_cropping_flag); + + if seq_param.frame_cropping_flag != 0 { + writer.write_ue(seq_param.frame_crop_left_offset); + writer.write_ue(seq_param.frame_crop_right_offset); + writer.write_ue(seq_param.frame_crop_top_offset); + writer.write_ue(seq_param.frame_crop_bottom_offset); + } + + // TODO: vui parameters, currently always setting it to 0 + writer.write_bits::<1>(0u32); + + writer.rbsp_trailing_bits(); + + writer.buf.into_writer() +} + +/// Returns the encoded buffer with the bit length +pub(super) fn write_pps_rbsp(pic_param: &ffi::VAEncPictureParameterBufferH264) -> Vec { + let pic_fields = unsafe { &pic_param.pic_fields.bits }; + + let mut writer = H264BitStreamWriter::new(); + + writer.write_nal_start_code_prefix(); + writer.write_nal_header(NAL_REF_IDC_HIGH, NAL_PPS); + + // pic_parameter_set_id ue(v) + writer.write_ue(pic_param.pic_parameter_set_id.into()); + // seq_parameter_set_id ue(v) + writer.write_ue(pic_param.seq_parameter_set_id.into()); + + // entropy_coding_mode_flag u(1) + writer.write_bits::<1>(pic_fields.entropy_coding_mode_flag()); + + // bottom_field_pic_order_in_frame_present_flag u(1) + writer.write_bits::<1>(pic_fields.pic_order_present_flag()); + + // num_slice_groups_minus1 ue(v) + writer.write_ue(0); + + // if ( num_slice_groups_minus1 > 0 ) { + // slice_group_map_type ue(v) + // if ( slice_group_map_type == 0 ) + // for( iGroup = 0; iGroup <= num_slice_groups_minus1; iGroup++ ) + // run_length_minus1[ iGroup ] ue(v) + // else if ( slice_group_map_type == 2 ) + // for( iGroup = 0; iGroup < num_slice_groups_minus1; iGroup++ ) { + // top_left[ iGroup ] ue(v) + // bottom_right[ iGroup ] ue(v) + // } + // else if ( slice_group_map_type == 3 | | + // slice_group_map_type == 4 | | + // slice_group_map_type == 5 ) { + // slice_group_change_direction_flag u(1) + // slice_group_change_rate_minus1 ue(v) + // } else if ( slice_group_map_type == 6 ) { + // pic_size_in_map_units_minus1 ue(v) + // for( i = 0; i <= pic_size_in_map_units_minus1; i++ ) + // slice_group_id[ i ] u(v) + // } + // } + + // num_ref_idx_l0_default_active_minus1 ue(v) + writer.write_ue(pic_param.num_ref_idx_l0_active_minus1.into()); + // num_ref_idx_l1_default_active_minus1 ue(v) + writer.write_ue(pic_param.num_ref_idx_l1_active_minus1.into()); + + // weighted_pred_flag u(1) + writer.write_bits::<1>(pic_fields.weighted_pred_flag()); + // weighted_bipred_idc u(2) + writer.write_bits::<2>(pic_fields.weighted_bipred_idc()); + + // pic_init_qp_minus26 se(v) + writer.write_se(pic_param.pic_init_qp as i32 - 26); // pic_init_qp_minus26 + // pic_init_qs_minus26 se(v) + writer.write_se(0); + // chroma_qp_index_offset se(v) + writer.write_se(0); + + // deblocking_filter_control_present_flag u(1) + writer.write_bits::<1>(pic_fields.deblocking_filter_control_present_flag()); + // constrained_intra_pred_flag u(1) + writer.write_bits::<1>(pic_fields.constrained_intra_pred_flag()); + // redundant_pic_cnt_present_flag 1 u(1) + writer.write_bits::<1>(pic_fields.redundant_pic_cnt_present_flag()); + + // if ( more_rbsp_data( ) ) { // true + if false { + // transform_8x8_mode_flag 1 u(1) + writer.write_bits::<1>(pic_fields.transform_8x8_mode_flag()); + + // pic_scaling_matrix_present_flag 1 u(1) + writer.write_bits::<1>(pic_fields.pic_scaling_matrix_present_flag()); + + // if ( pic_scaling_matrix_present_flag ) + // for( i = 0; + // i < 6 + ( ( chroma_format_idc != 3 ) ? 2 : 6 ) * transform_8x8_mode_flag; + // i++ ) { + // pic_scaling_list_present_flag[ i ] 1 u(1) + // if ( pic_scaling_list_present_flag[ i ] ) + // if ( i < 6 ) + // scaling_list( ScalingList4x4[ i ], 16, UseDefaultScalingMatrix4x4Flag[ i ] ) + // else + // scaling_list( ScalingList8x8[ i − 6 ], 64, UseDefaultScalingMatrix8x8Flag[ i − 6 ] ) + // } + if pic_fields.pic_scaling_matrix_present_flag() != 0 { + panic!("pic_scaling_matrix_present_flag is not implemented") + } + + // second_chroma_qp_index_offset 1 se(v) + writer.write_se(pic_param.second_chroma_qp_index_offset.into()); + + // } // more rbsp_data + } + + writer.rbsp_trailing_bits(); + + writer.buf.into_writer() +} + +/// Returns the encoded buffer with the bit length +pub(super) fn write_slice_header( + seq_param: &ffi::VAEncSequenceParameterBufferH264, + pic_param: &ffi::VAEncPictureParameterBufferH264, + slice_param: &ffi::VAEncSliceParameterBufferH264, +) -> Vec { + let seq_fields = unsafe { &seq_param.seq_fields.bits }; + let pic_fields = unsafe { &pic_param.pic_fields.bits }; + + let is_idr = pic_fields.idr_pic_flag() != 0; + let is_ref = pic_fields.reference_pic_flag() != 0; + + let (nal_ref_idc, nal_unit_type) = match slice_param.slice_type { + SLICE_TYPE_I => (NAL_REF_IDC_HIGH, if is_idr { NAL_IDR } else { NAL_NON_IDR }), + SLICE_TYPE_P => (NAL_REF_IDC_MEDIUM, NAL_NON_IDR), + SLICE_TYPE_B => ( + if is_ref { + NAL_REF_IDC_LOW + } else { + NAL_REF_IDC_NONE + }, + NAL_NON_IDR, + ), + _ => panic!("Unknown slice_type: {}", slice_param.slice_type), + }; + + let mut writer = H264BitStreamWriter::new(); + writer.write_nal_start_code_prefix(); + writer.write_nal_header(nal_ref_idc, nal_unit_type); + + // first_mb_in_slice ue(v)) + writer.write_ue(slice_param.macroblock_address); + // slice_type ue(v)) + writer.write_ue(slice_param.slice_type.into()); + // pic_parameter_set_id ue(v)) + writer.write_ue(slice_param.pic_parameter_set_id.into()); + + // if ( separate_colour_plane_flag == 1 ) + // colour_plane_id u(2) + + // frame_num u(v) + writer.write_bits_var( + seq_fields.log2_max_frame_num_minus4() + 4, + pic_param.frame_num as u32, + ); + + // if ( !frame_mbs_only_flag ) { + // field_pic_flag u(1) + // if ( field_pic_flag ) + // bottom_field_flag u(1) + // } + if seq_fields.frame_mbs_only_flag() == 0 { + panic!("Interlaced encoding is not supported"); + } + + // if ( IdrPicFlag ) + // idr_pic_id ue(v) + if pic_fields.idr_pic_flag() != 0 { + writer.write_ue(slice_param.idr_pic_id.into()); + } + + // if ( pic_order_cnt_type == 0 ) { + // pic_order_cnt_lsb u(v) + // if ( bottom_field_pic_order_in_frame_present_flag && !field_pic_flag ) + // delta_pic_order_cnt_bottom se(v) + // } + // if ( pic_order_cnt_type == 1 && !delta_pic_order_always_zero_flag ) { + // delta_pic_order_cnt[ 0 ] se(v) + // if ( bottom_field_pic_order_in_frame_present_flag && !field_pic_flag ) + // delta_pic_order_cnt[ 1 ] se(v) + // } + if seq_fields.pic_order_cnt_type() == 0 { + writer.write_bits_var( + seq_fields.log2_max_pic_order_cnt_lsb_minus4() + 4, + pic_param.CurrPic.TopFieldOrderCnt as u32, + ); + } else { + panic!("only pic_order_cnt_type 0 is implemented",); + } + + // if ( redundant_pic_cnt_present_flag ) + // redundant_pic_cnt ue(v)) + assert_eq!(pic_fields.redundant_pic_cnt_present_flag(), 0); + + // if ( slice_type == B ) + // direct_spatial_mv_pred_flag u(1) + if slice_param.slice_type == SLICE_TYPE_B { + writer.write_bits::<1>(slice_param.direct_spatial_mv_pred_flag); + } + + // if ( slice_type == P | | slice_type == SP | | slice_type == B ) { + // num_ref_idx_active_override_flag u(1) + // if ( num_ref_idx_active_override_flag ) { + // num_ref_idx_l0_active_minus1 ue(v)) + // if ( slice_type == B ) + // num_ref_idx_l1_active_minus1 ue(v)) + // } + // } + if matches!(slice_param.slice_type, SLICE_TYPE_P | SLICE_TYPE_B) { + writer.write_bits::<1>(slice_param.num_ref_idx_active_override_flag); + + if slice_param.num_ref_idx_active_override_flag != 0 { + writer.write_ue(slice_param.num_ref_idx_l0_active_minus1.into()); + + if slice_param.slice_type == SLICE_TYPE_B { + writer.write_ue(slice_param.num_ref_idx_l1_active_minus1.into()); + } + } + } + + // if ( nal_unit_type == 20 | | nal_unit_type == 21 ) + // ref_pic_list_mvc_modification( ) /* specified in Annex G */ 2 + // else + // ref_pic_list_modification( ) + // ref_pic_list_modification() and ref_pic_list_mvc_modification() are treated the same here + // see H.264 2024.08 G.7.3.3.1.1 + if slice_param.slice_type % 5 != 2 && slice_param.slice_type % 5 != 4 { + // ref_pic_list_modification_flag_l0 u(1) + writer.write_bits::<1>(0u32); + } + if slice_param.slice_type % 5 == 1 { + // ref_pic_list_modification_flag_l1 u(1) + writer.write_bits::<1>(0u32); + } + + // if ( ( weighted_pred_flag && ( slice_type == P | | slice_type == SP ) ) | | + // ( weighted_bipred_idc == 1 && slice_type == B ) ) + // pred_weight_table( ) + + // if ( nal_ref_idc != 0 ) + // dec_ref_pic_marking( ) + if nal_ref_idc != 0 { + // dec_ref_pic_marking( ) { + // if ( IdrPicFlag ) { + // no_output_of_prior_pics_flag u(1) + // long_term_reference_flag u(1) + // } else { + // adaptive_ref_pic_marking_mode_flag u(1) + // if ( adaptive_ref_pic_marking_mode_flag ) + // do { + // memory_management_control_operation ue(v) + // if ( memory_management_control_operation == 1 | | + // memory_management_control_operation == 3 ) + // difference_of_pic_nums_minus1 ue(v) + // if (memory_management_control_operation == 2 ) + // long_term_pic_num ue(v) + // if ( memory_management_control_operation == 3 | | + // memory_management_control_operation == 6 ) + // long_term_frame_idx ue(v) + // if ( memory_management_control_operation == 4 ) + // max_long_term_frame_idx_plus1 ue(v) + // } while( memory_management_control_operation != 0 ) + // } + // } + + if is_idr { + writer.write_bits::<1>(0u32); + writer.write_bits::<1>(0u32); + } else { + writer.write_bits::<1>(0u32); + } + } + + // if ( entropy_coding_mode_flag && slice_type != I && slice_type != SI ) + // cabac_init_idc ue(v)) + if pic_fields.entropy_coding_mode_flag() != 0 && slice_param.slice_type != SLICE_TYPE_I { + writer.write_ue(slice_param.cabac_init_idc.into()); + } + + // slice_qp_delta se(v) + writer.write_se(slice_param.slice_qp_delta.into()); + + // if ( slice_type == SP | | slice_type == SI ) { + // if ( slice_type == SP ) + // sp_for_switch_flag u(1) + // slice_qs_delta se(v) + // } + + // if ( deblocking_filter_control_present_flag ) { + // disable_deblocking_filter_idc ue(v)) + // if ( disable_deblocking_filter_idc != 1 ) { + // slice_alpha_c0_offset_div2 se(v) + // slice_beta_offset_div2 se(v) + // } + // } + if pic_fields.deblocking_filter_control_present_flag() != 0 { + writer.write_ue(slice_param.disable_deblocking_filter_idc.into()); + + if slice_param.disable_deblocking_filter_idc != 1 { + writer.write_se(slice_param.slice_alpha_c0_offset_div2.into()); + writer.write_se(slice_param.slice_beta_offset_div2.into()); + } + } + + // if ( num_slice_groups_minus1 > 0 && + // slice_group_map_type >= 3 && slice_group_map_type <= 5) + // slice_group_change_cycle u(v) + + // Copied from libva: + if pic_fields.entropy_coding_mode_flag() != 0 { + while !writer.buf.byte_aligned() { + writer.write_bits::<1>(1u32); + } + } + + writer.buf.byte_align().unwrap(); + writer.buf.into_writer() +} diff --git a/media-video/h264/src/encoder/backends/libva/mod.rs b/media-video/h264/src/encoder/backends/libva/mod.rs new file mode 100644 index 00000000..8da5368b --- /dev/null +++ b/media-video/h264/src/encoder/backends/libva/mod.rs @@ -0,0 +1,661 @@ +use crate::{ + H264Level, H264Profile, + encoder::{ + config::{FramePattern, H264FrameType, SliceMode}, + util::{FrameEncodeInfo, H264EncoderState}, + }, +}; +use ezk_image::ImageRef; +use libva::{ + Buffer, Display, RtFormat, VaError, + encoder::{ + VaEncodeFrameError, VaEncodeSlot, VaEncoder, VaEncoderCapabilities, + VaEncoderCapabilitiesError, VaEncoderConfig, VaEncoderCreateError, VaEncoderImplConfig, + VaEncoderRateControlMode, + }, + ffi, +}; +use std::{ + cmp, + collections::VecDeque, + mem::{take, zeroed}, +}; + +mod bitstream; + +#[derive(Debug, Clone, Copy)] +pub struct VaH264EncoderConfig { + pub encoder: VaEncoderConfig, + pub profile: H264Profile, + pub level: H264Level, + pub frame_pattern: FramePattern, + pub slice_mode: SliceMode, +} + +pub struct VaH264Encoder { + config: VaH264EncoderConfig, + state: H264EncoderState, + encoder: VaEncoder, + + max_l0_references: usize, + max_l1_references: usize, + + backlogged_b_frames: Vec<(FrameEncodeInfo, VaEncodeSlot)>, + free_dpb_slots: Vec, + active_dpb_slots: VecDeque, +} + +struct DpbSlot { + index: usize, + picture: ffi::VAPictureH264, +} + +impl VaH264Encoder { + pub fn profiles(display: &Display) -> Result, VaError> { + let mut profiles = Vec::new(); + + for va_profile in display.profiles()? { + let profile = match va_profile { + ffi::VAProfile_VAProfileH264Baseline => H264Profile::Baseline, + ffi::VAProfile_VAProfileH264ConstrainedBaseline => H264Profile::ConstrainedBaseline, + ffi::VAProfile_VAProfileH264High => H264Profile::High, + ffi::VAProfile_VAProfileH264High10 => H264Profile::High10, + ffi::VAProfile_VAProfileH264Main => H264Profile::Main, + _ => continue, + }; + + let entrypoints = display.entrypoints(va_profile)?; + + let supports_encode = entrypoints.contains(&ffi::VAEntrypoint_VAEntrypointEncSlice) + || entrypoints.contains(&ffi::VAEntrypoint_VAEntrypointEncSliceLP); + + if supports_encode { + profiles.push(profile); + } + } + + Ok(profiles) + } + + pub fn capabilities( + display: &Display, + profile: H264Profile, + ) -> Result { + let va_profile = profile_to_va_profile(profile) + .expect("Passed profile which was not returned by VaH264Encoder::profiles"); + + VaEncoderCapabilities::new(display, va_profile) + } + + pub fn new( + capabilities: &VaEncoderCapabilities, + mut config: VaH264EncoderConfig, + ) -> Result { + if !config.profile.support_b_frames() { + config.frame_pattern.ip_period = 1; + } + + let va_profile = profile_to_va_profile(config.profile) + .expect("Profile in config must be returned by VaH264Encoder::profiles"); + + assert_eq!( + va_profile, + capabilities.profile(), + "Profile must be the same the capabilites were queried for" + ); + + config.encoder.max_encode_resolution[0] = + config.encoder.max_encode_resolution[0].next_multiple_of(16); + config.encoder.max_encode_resolution[1] = + config.encoder.max_encode_resolution[1].next_multiple_of(16); + + let contains = |rt_format| { + capabilities + .rt_formats + .contains(rt_format) + .then_some(rt_format) + }; + + // compile_error!("Investigate input image formats for 10/12 bit RT FORMATS"); + + let va_rt_format = contains(RtFormat::YUV420) + .or(contains(RtFormat::YUV422)) + .or(contains(RtFormat::YUV444)) + .or(contains(RtFormat::YUV420_10)) + .or(contains(RtFormat::YUV422_10)) + .or(contains(RtFormat::YUV444_10)) + .or(contains(RtFormat::YUV420_12)) + .or(contains(RtFormat::YUV422_12)) + .or(contains(RtFormat::YUV444_12)) + .unwrap(); + + let num_dpb_slots = 16; + + let encoder = capabilities.create_encoder(VaEncoderImplConfig { + user: config.encoder, + va_rt_format, + num_dpb_slots: 16, + num_encode_slots: cmp::max(16, u32::from(config.frame_pattern.ip_period) + 1), + })?; + + let (max_l0_references, max_l1_references) = { + let [b0, b1, b2, b3] = capabilities.max_reference_frames.to_ne_bytes(); + + (u16::from_ne_bytes([b0, b1]), u16::from_ne_bytes([b2, b3])) + }; + + let free_dpb_slots = (0..num_dpb_slots) + .map(|index| DpbSlot { + index, + picture: ffi::VAPictureH264 { + picture_id: ffi::VA_INVALID_SURFACE, + flags: ffi::VA_PICTURE_H264_INVALID, + ..unsafe { zeroed() } + }, + }) + .collect(); + + Ok(VaH264Encoder { + config, + state: H264EncoderState::new(config.frame_pattern), + encoder, + max_l0_references: max_l0_references as usize, + max_l1_references: max_l1_references as usize, + backlogged_b_frames: Vec::new(), + free_dpb_slots, + active_dpb_slots: VecDeque::new(), + }) + } + + pub fn request_idr(&mut self) { + // TODO: this blows up when B-Frames are queued + self.state.begin_new_gop(); + } + + pub fn poll_result(&mut self) -> Result>, VaError> { + self.encoder.poll_result() + } + + pub fn wait_result(&mut self) -> Result>, VaError> { + self.encoder.wait_result() + } + + pub fn encode_frame(&mut self, image: &dyn ImageRef) -> Result<(), VaEncodeFrameError> { + let frame_info = self.state.next(); + + log::debug!("Encode {frame_info:?}"); + + let mut encode_slot = self + .encoder + .pop_encode_slot()? + .expect("Invalid VaEncoder configuration, not enough encode slots"); + + self.encoder + .copy_image_to_encode_slot(&mut encode_slot, image)?; + + // B-Frames are not encoded immediately, they are queued until after an I or P-frame is encoded + if frame_info.frame_type == H264FrameType::B { + self.backlogged_b_frames.push((frame_info, encode_slot)); + return Ok(()); + } + + if frame_info.frame_type == H264FrameType::Idr { + assert!(self.backlogged_b_frames.is_empty()); + // Just encoded an IDR frame, put all reference surfaces back into the surface pool, + self.free_dpb_slots.extend(self.active_dpb_slots.drain(..)); + } + + self.encode_slot(frame_info, encode_slot)?; + + if matches!( + frame_info.frame_type, + H264FrameType::Idr | H264FrameType::I | H264FrameType::P + ) { + let backlogged_b_frames = take(&mut self.backlogged_b_frames); + + // Process backlogged B-Frames + for (frame_info, encode_slot) in backlogged_b_frames { + self.encode_slot(frame_info, encode_slot)?; + } + } + + Ok(()) + } + + fn encode_slot( + &mut self, + frame_info: FrameEncodeInfo, + encode_slot: VaEncodeSlot, + ) -> Result<(), VaError> { + let mut setup_dpb_slot = if let Some(dpb_slot) = self.free_dpb_slots.pop() { + dpb_slot + } else if let Some(dpb_slot) = self.active_dpb_slots.pop_back() { + dpb_slot + } else { + unreachable!() + }; + + log::trace!("\tUsing setup slot {}", setup_dpb_slot.index); + + setup_dpb_slot.picture.picture_id = + self.encoder.dpb_slot_surface(setup_dpb_slot.index).id(); + setup_dpb_slot.picture.frame_idx = frame_info.picture_order_count.into(); + setup_dpb_slot.picture.TopFieldOrderCnt = frame_info.picture_order_count.into(); + setup_dpb_slot.picture.BottomFieldOrderCnt = frame_info.picture_order_count.into(); + setup_dpb_slot.picture.flags = if matches!( + frame_info.frame_type, + H264FrameType::Idr | H264FrameType::I | H264FrameType::P + ) { + ffi::VA_PICTURE_H264_SHORT_TERM_REFERENCE + } else { + 0 + }; + + let l0_references = self + .active_dpb_slots + .iter() + .filter(|dpb_slot| dpb_slot.picture.frame_idx < setup_dpb_slot.picture.frame_idx); + + let l1_references = self + .active_dpb_slots + .iter() + .rev() + .filter(|dpb_slot| dpb_slot.picture.frame_idx > setup_dpb_slot.picture.frame_idx); + + let (l0_references, l1_references) = match frame_info.frame_type { + H264FrameType::P => (l0_references.take(self.max_l0_references).collect(), vec![]), + H264FrameType::B => ( + l0_references.take(self.max_l0_references).collect(), + l1_references.take(self.max_l1_references).collect(), + ), + H264FrameType::I | H264FrameType::Idr => (vec![], vec![]), + }; + + let encode_params = self.build_encode_params( + frame_info, + &encode_slot, + &setup_dpb_slot, + l0_references, + l1_references, + )?; + + if frame_info.frame_type == H264FrameType::B { + self.free_dpb_slots.insert(0, setup_dpb_slot); + } else { + self.active_dpb_slots.push_front(setup_dpb_slot); + } + self.encoder + .submit_encode_slot(encode_slot, encode_params)?; + + Ok(()) + } + + fn build_encode_params( + &self, + frame_info: FrameEncodeInfo, + encode_slot: &VaEncodeSlot, + setup_dpb_slot: &DpbSlot, + l0_references: Vec<&DpbSlot>, + l1_references: Vec<&DpbSlot>, + ) -> Result, VaError> { + let mut encode_params = Vec::new(); + + let seq_param = self.create_seq_params(); + let pic_param = self.create_picture_params( + &frame_info, + setup_dpb_slot, + &l0_references, + &l1_references, + encode_slot.output_buffer(), + ); + + if frame_info.frame_type == H264FrameType::Idr { + // Render sequence params + encode_params.push(self.encoder.context().create_buffer_with_data( + ffi::VABufferType_VAEncSequenceParameterBufferType, + &seq_param, + )?); + encode_params.push(self.encoder.create_rate_control_params()?); + encode_params.push(self.encoder.create_quality_params()?); + + // Render packed sequence + if self.encoder.support_packed_header_sequence { + let packed_sequence_param = bitstream::write_sps_rbsp(&self.config, &seq_param); + + self.encoder.create_packed_param( + ffi::VAEncPackedHeaderTypeH264_VAEncPackedHeaderH264_SPS, + &packed_sequence_param, + &mut encode_params, + )?; + } + + // Render packed picture + if self.encoder.support_packed_header_picture { + let packed_picture_param = bitstream::write_pps_rbsp(&pic_param); + self.encoder.create_packed_param( + ffi::VAEncPackedHeaderTypeH264_VAEncPackedHeaderH264_PPS, + &packed_picture_param, + &mut encode_params, + )?; + } + } + + encode_params.push(self.encoder.context().create_buffer_with_data( + ffi::VABufferType_VAEncPictureParameterBufferType, + &pic_param, + )?); + + let current_resolution = self.encoder.current_encode_resolution(); + let total_macroblocks = (current_resolution[0] / 16) * (current_resolution[1] / 16); + + match self.config.slice_mode { + SliceMode::Picture => { + self.build_encode_slice_params( + frame_info, + &l0_references, + &l1_references, + &mut encode_params, + &seq_param, + &pic_param, + 0, + total_macroblocks, + )?; + } + SliceMode::Rows(num_rows) => { + let num_macroblocks = (current_resolution[0] / 16) * num_rows.get(); + + for row in (0..current_resolution[1] / 16).step_by(num_rows.get() as usize) { + let first_macroblock = (current_resolution[1] / 16) * row; + let num_macroblocks = + (num_macroblocks).min(total_macroblocks - first_macroblock); + + self.build_encode_slice_params( + frame_info, + &l0_references, + &l1_references, + &mut encode_params, + &seq_param, + &pic_param, + first_macroblock, + num_macroblocks, + )?; + } + } + SliceMode::MacroBlocks(config_num_mbs) => { + for first_macroblock in + (0..total_macroblocks).step_by(config_num_mbs.get() as usize) + { + let num_macroblocks = + (config_num_mbs.get()).min(total_macroblocks - first_macroblock); + + self.build_encode_slice_params( + frame_info, + &l0_references, + &l1_references, + &mut encode_params, + &seq_param, + &pic_param, + first_macroblock, + num_macroblocks, + )?; + } + } + } + + Ok(encode_params) + } + + #[allow(clippy::too_many_arguments)] + fn build_encode_slice_params( + &self, + frame_info: FrameEncodeInfo, + l0_references: &Vec<&DpbSlot>, + l1_references: &Vec<&DpbSlot>, + encode_params: &mut Vec, + seq_param: &ffi::_VAEncSequenceParameterBufferH264, + pic_param: &ffi::_VAEncPictureParameterBufferH264, + first_macroblock: u32, + num_macroblocks: u32, + ) -> Result<(), VaError> { + let slice_param = self.create_slice_params( + &frame_info, + l0_references, + l1_references, + first_macroblock, + num_macroblocks, + ); + + if self.encoder.support_packed_header_slice { + let packed_slice_params = + bitstream::write_slice_header(seq_param, pic_param, &slice_param); + + self.encoder.create_packed_param( + ffi::VAEncPackedHeaderTypeH264_VAEncPackedHeaderH264_Slice, + &packed_slice_params, + encode_params, + )?; + } + + encode_params.push(self.encoder.context().create_buffer_with_data( + ffi::VABufferType_VAEncSliceParameterBufferType, + &slice_param, + )?); + + Ok(()) + } +} + +impl VaH264Encoder { + fn create_seq_params(&self) -> ffi::VAEncSequenceParameterBufferH264 { + let [width, height] = self.encoder.current_encode_resolution(); + let [width_mbaligned, height_mbaligned] = self + .encoder + .current_encode_resolution() + .map(|v| v.next_multiple_of(16)); + + unsafe { + let mut seq_param = zeroed::(); + + seq_param.level_idc = self.config.level.level_idc(); + seq_param.picture_width_in_mbs = (width_mbaligned / 16) as u16; + seq_param.picture_height_in_mbs = (height_mbaligned / 16) as u16; + + seq_param.intra_idr_period = self.config.frame_pattern.intra_idr_period.into(); + seq_param.intra_period = self.config.frame_pattern.intra_period.into(); + seq_param.ip_period = self.config.frame_pattern.ip_period.into(); + + seq_param.max_num_ref_frames = self.max_l0_references as u32 + + if self.config.frame_pattern.ip_period > 1 { + self.max_l1_references as u32 + } else { + 0 + }; + + seq_param.time_scale = 900; // TODO: configurable + seq_param.num_units_in_tick = 15; // TODO: configurable + + let seq_fields = &mut seq_param.seq_fields.bits; + + seq_fields.set_log2_max_pic_order_cnt_lsb_minus4( + (self.state.log2_max_pic_order_cnt_lsb - 4).into(), + ); + seq_fields.set_log2_max_frame_num_minus4((self.state.log2_max_frame_num - 4).into()); + + seq_fields.set_frame_mbs_only_flag(1); + seq_fields.set_chroma_format_idc(1); // TODO: configurable this is currently hardcoded to yuv420 + seq_fields.set_direct_8x8_inference_flag(1); + + if width != width_mbaligned || height != height_mbaligned { + seq_param.frame_cropping_flag = 1; + seq_param.frame_crop_right_offset = (width_mbaligned - width) / 2; + seq_param.frame_crop_bottom_offset = (height_mbaligned - height) / 2; + } + + seq_param + } + } + + fn create_picture_params( + &self, + frame_info: &FrameEncodeInfo, + setup_dpb_slot: &DpbSlot, + l0_references: &[&DpbSlot], + l1_references: &[&DpbSlot], + output: &Buffer, + ) -> ffi::VAEncPictureParameterBufferH264 { + unsafe { + let mut pic_param = zeroed::(); + + pic_param.frame_num = frame_info.frame_num; + pic_param.CurrPic = setup_dpb_slot.picture; + + match frame_info.frame_type { + H264FrameType::P | H264FrameType::B => { + let iter = l0_references.iter().chain(l1_references).copied(); + + fill_pic_list(&mut pic_param.ReferenceFrames, iter); + } + H264FrameType::I | H264FrameType::Idr => { + // No references to add + } + } + + log::trace!( + "\tpic_params.ReferenceFrames = {:?}", + debug_pic_list(&pic_param.ReferenceFrames) + ); + + pic_param + .pic_fields + .bits + .set_idr_pic_flag((frame_info.frame_type == H264FrameType::Idr) as u32); + pic_param + .pic_fields + .bits + .set_reference_pic_flag((frame_info.frame_type != H264FrameType::B) as u32); + pic_param.pic_fields.bits.set_entropy_coding_mode_flag( + self.config.profile.support_entropy_coding_mode().into(), + ); + pic_param.pic_fields.bits.set_transform_8x8_mode_flag( + self.config.profile.support_transform_8x8_mode_flag().into(), + ); + pic_param + .pic_fields + .bits + .set_deblocking_filter_control_present_flag(1); + + pic_param.coded_buf = output.id(); + pic_param.last_picture = 0; // TODO: set on flush + + if self + .config + .encoder + .rate_control + .mode + .contains(VaEncoderRateControlMode::CQP) + { + pic_param.pic_init_qp = self.config.encoder.rate_control.initial_qp; + } + + pic_param + } + } + + fn create_slice_params( + &self, + frame_info: &FrameEncodeInfo, + l0_references: &[&DpbSlot], + l1_references: &[&DpbSlot], + first_macroblock: u32, + num_macroblocks: u32, + ) -> ffi::VAEncSliceParameterBufferH264 { + unsafe { + let mut slice_params = zeroed::(); + + slice_params.macroblock_address = first_macroblock; + slice_params.num_macroblocks = num_macroblocks; + slice_params.slice_type = match frame_info.frame_type { + H264FrameType::P => 0, + H264FrameType::B => 1, + H264FrameType::Idr | H264FrameType::I => 2, + }; + + match frame_info.frame_type { + H264FrameType::P => { + fill_pic_list(&mut slice_params.RefPicList0, l0_references.iter().copied()); + fill_pic_list(&mut slice_params.RefPicList1, None); + } + H264FrameType::B => { + fill_pic_list(&mut slice_params.RefPicList0, l0_references.iter().copied()); + fill_pic_list(&mut slice_params.RefPicList1, l1_references.iter().copied()); + } + H264FrameType::I => { + fill_pic_list(&mut slice_params.RefPicList0, None); + fill_pic_list(&mut slice_params.RefPicList1, None); + } + H264FrameType::Idr => { + fill_pic_list(&mut slice_params.RefPicList0, None); + fill_pic_list(&mut slice_params.RefPicList1, None); + + slice_params.idr_pic_id = frame_info.idr_pic_id; + } + } + + log::trace!( + "\tslice_params.RefPicList0 = {:?}", + debug_pic_list(&slice_params.RefPicList0) + ); + + log::trace!( + "\tslice_params.RefPicList1 = {:?}", + debug_pic_list(&slice_params.RefPicList1) + ); + + slice_params.slice_alpha_c0_offset_div2 = 0; + slice_params.slice_beta_offset_div2 = 0; + + slice_params.direct_spatial_mv_pred_flag = 1; + slice_params.pic_order_cnt_lsb = frame_info.picture_order_count; + + slice_params + } + } +} + +fn debug_pic_list(list: &[ffi::VAPictureH264]) -> Vec { + list.iter() + .take_while(|p| p.flags != ffi::VA_PICTURE_H264_INVALID) + .map(|p| p.frame_idx) + .collect::>() +} + +fn fill_pic_list<'a>(list: &mut [ffi::VAPictureH264], iter: impl IntoIterator) { + let mut iter = iter.into_iter(); + for dst_picture in list { + if let Some(DpbSlot { picture, index: _ }) = iter.next() { + *dst_picture = *picture; + } else { + dst_picture.picture_id = ffi::VA_INVALID_SURFACE; + dst_picture.flags = ffi::VA_PICTURE_H264_INVALID; + } + } +} + +fn profile_to_va_profile(profile: crate::H264Profile) -> Option { + let profile = match profile { + crate::H264Profile::Baseline => ffi::VAProfile_VAProfileH264Baseline, + crate::H264Profile::ConstrainedBaseline => ffi::VAProfile_VAProfileH264ConstrainedBaseline, + crate::H264Profile::Main => ffi::VAProfile_VAProfileH264Main, + crate::H264Profile::Extended => return None, + crate::H264Profile::High => ffi::VAProfile_VAProfileH264High, + crate::H264Profile::High10 => ffi::VAProfile_VAProfileH264High10, + crate::H264Profile::High422 => ffi::VAProfile_VAProfileH264High, + crate::H264Profile::High444Predictive => ffi::VAProfile_VAProfileH264High, + crate::H264Profile::High10Intra => ffi::VAProfile_VAProfileH264High10, + crate::H264Profile::High422Intra => ffi::VAProfile_VAProfileH264High, + crate::H264Profile::High444Intra => ffi::VAProfile_VAProfileH264High, + crate::H264Profile::CAVLC444Intra => return None, + }; + + Some(profile) +} diff --git a/media-video/h264/src/encoder/backends/mod.rs b/media-video/h264/src/encoder/backends/mod.rs new file mode 100644 index 00000000..fac74e9f --- /dev/null +++ b/media-video/h264/src/encoder/backends/mod.rs @@ -0,0 +1,6 @@ +#[cfg(all(target_os = "linux", feature = "libva"))] +pub mod libva; +#[cfg(feature = "openh264")] +pub mod openh264; +#[cfg(feature = "vulkan")] +pub mod vulkan; diff --git a/media-video/h264/src/encoder/backends/openh264.rs b/media-video/h264/src/encoder/backends/openh264.rs new file mode 100644 index 00000000..1c0c17ce --- /dev/null +++ b/media-video/h264/src/encoder/backends/openh264.rs @@ -0,0 +1,283 @@ +//! Utility functions for openh264 + +use crate::{ + H264FmtpOptions, H264Level, H264PacketizationMode, H264Profile, + encoder::config::{Framerate, H264EncoderConfig, H264RateControlConfig}, + profile_level_id::ProfileLevelId, +}; +use ezk_image::{ + ColorInfo, ColorSpace, Image, ImageRef, ImageRefExt, PixelFormat, YuvColorInfo, + convert_multi_thread, +}; +use openh264::{ + encoder::{BitRate, Encoder, FrameRate, IntraFramePeriod, QpRange, RateControlMode}, + formats::YUVSlices, +}; +use openh264_sys2::API as _; +use std::{collections::VecDeque, mem::MaybeUninit, time::Instant}; + +pub struct OpenH264Encoder { + encoder: Encoder, + scratch: Vec, + output: VecDeque>, + init: Option, +} + +impl OpenH264Encoder { + pub fn new(config: H264EncoderConfig) -> Result { + let config = openh264_encoder_config(config); + + let encoder = Encoder::with_api_config(openh264::OpenH264API::from_source(), config)?; + + Ok(OpenH264Encoder { + encoder, + scratch: Vec::new(), + output: VecDeque::new(), + init: None, + }) + } + + pub fn request_idr(&mut self) { + self.encoder.force_intra_frame(); + } + + pub fn encode_frame(&mut self, image: &dyn ImageRef) -> Result<(), openh264::Error> { + let init = self.init.get_or_insert_with(Instant::now); + let timestamp = openh264::Timestamp::from_millis(init.elapsed().as_millis() as u64); + + let image = image.crop_even().map_err(|e| { + openh264::Error::msg_string(format!( + "Failed to crop input image to an even resolution: {e:?}" + )) + })?; + + let bitstream = if image.format() == PixelFormat::I420 { + let mut planes = image.planes(); + + let (y_plane, y_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing Y plane"))?; + let (u_plane, u_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing U plane"))?; + let (v_plane, v_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing V plane"))?; + + let input = YUVSlices::new( + (y_plane, u_plane, v_plane), + (image.width(), image.height()), + (y_stride, u_stride, v_stride), + ); + + self.encoder.encode_at(&input, timestamp)? + } else { + self.scratch.resize( + PixelFormat::I420.buffer_size(image.width(), image.height()), + 0, + ); + + let dst_color = match image.color() { + ColorInfo::RGB(rgb_color_info) => YuvColorInfo { + transfer: rgb_color_info.transfer, + primaries: rgb_color_info.primaries, + space: ColorSpace::BT709, + full_range: true, + }, + ColorInfo::YUV(yuv_color_info) => yuv_color_info, + }; + + let mut dst = Image::from_buffer( + PixelFormat::I420, + self.scratch.as_mut_slice(), + None, + image.width(), + image.height(), + dst_color.into(), + ) + .map_err(|e| { + openh264::Error::msg_string(format!( + "Failed to create convert destination image: {e:?}" + )) + })?; + + convert_multi_thread(&image, &mut dst).map_err(|e| { + openh264::Error::msg_string(format!("Failed to convert input image to I420: {e:?}")) + })?; + + let mut planes = dst.planes(); + + let (y_plane, y_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing Y plane"))?; + let (u_plane, u_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing U plane"))?; + let (v_plane, v_stride) = planes + .next() + .ok_or_else(|| openh264::Error::msg("Missing V plane"))?; + + let input = YUVSlices::new( + (y_plane, u_plane, v_plane), + (image.width(), image.height()), + (y_stride, u_stride, v_stride), + ); + + self.encoder.encode_at(&input, timestamp)? + }; + + match bitstream.frame_type() { + openh264::encoder::FrameType::Invalid + | openh264::encoder::FrameType::Skip + | openh264::encoder::FrameType::IPMixed => { + log::warn!("Got invalid frame type: {:?}", bitstream.frame_type()); + return Ok(()); + } + openh264::encoder::FrameType::IDR => {} + openh264::encoder::FrameType::I => {} + openh264::encoder::FrameType::P => {} + } + + self.output.push_back(bitstream.to_vec()); + + Ok(()) + } + + pub fn poll_result(&mut self) -> Option> { + self.output.pop_front() + } + + pub fn wait_result(&mut self) -> Option> { + self.output.pop_front() + } +} + +fn map_profile(profile: H264Profile) -> openh264::encoder::Profile { + use H264Profile::*; + + match profile { + ConstrainedBaseline | Baseline => openh264::encoder::Profile::Baseline, + Main => openh264::encoder::Profile::Main, + Extended => openh264::encoder::Profile::Extended, + High => openh264::encoder::Profile::High, + High10 | High10Intra => openh264::encoder::Profile::High10, + High422 | High422Intra => openh264::encoder::Profile::High422, + High444Predictive | High444Intra => openh264::encoder::Profile::High444, + CAVLC444Intra => openh264::encoder::Profile::CAVLC444, + } +} + +fn map_level(level: H264Level) -> openh264::encoder::Level { + match level { + H264Level::Level_1_0 => openh264::encoder::Level::Level_1_0, + H264Level::Level_1_B => openh264::encoder::Level::Level_1_B, + H264Level::Level_1_1 => openh264::encoder::Level::Level_1_1, + H264Level::Level_1_2 => openh264::encoder::Level::Level_1_2, + H264Level::Level_1_3 => openh264::encoder::Level::Level_1_3, + H264Level::Level_2_0 => openh264::encoder::Level::Level_2_0, + H264Level::Level_2_1 => openh264::encoder::Level::Level_2_1, + H264Level::Level_2_2 => openh264::encoder::Level::Level_2_2, + H264Level::Level_3_0 => openh264::encoder::Level::Level_3_0, + H264Level::Level_3_1 => openh264::encoder::Level::Level_3_1, + H264Level::Level_3_2 => openh264::encoder::Level::Level_3_2, + H264Level::Level_4_0 => openh264::encoder::Level::Level_4_0, + H264Level::Level_4_1 => openh264::encoder::Level::Level_4_1, + H264Level::Level_4_2 => openh264::encoder::Level::Level_4_2, + H264Level::Level_5_0 => openh264::encoder::Level::Level_5_0, + H264Level::Level_5_1 => openh264::encoder::Level::Level_5_1, + H264Level::Level_5_2 => openh264::encoder::Level::Level_5_2, + // Level 6+ is not supported by openh264 - use 5.2 + H264Level::Level_6_0 => openh264::encoder::Level::Level_5_2, + H264Level::Level_6_1 => openh264::encoder::Level::Level_5_2, + H264Level::Level_6_2 => openh264::encoder::Level::Level_5_2, + } +} + +/// Create a openh264 encoder config from the parsed [`FmtpOptions`] +fn openh264_encoder_config(c: H264EncoderConfig) -> openh264::encoder::EncoderConfig { + let mut config = openh264::encoder::EncoderConfig::new() + .profile(map_profile(c.profile)) + .level(map_level(c.level)); + + if let Some(Framerate { num, denom }) = c.framerate { + config = config.max_frame_rate(FrameRate::from_hz(num as f32 / denom as f32)); + } + + if let Some((qmin, qmax)) = c.qp { + config = config.qp(QpRange::new(qmin, qmax)); + } + + config = config.intra_frame_period(IntraFramePeriod::from_num_frames( + c.frame_pattern.intra_idr_period.into(), + )); + + match c.rate_control { + H264RateControlConfig::ConstantBitRate { bitrate } => { + config = config + .rate_control_mode(RateControlMode::Quality) + .bitrate(BitRate::from_bps(bitrate)); + } + H264RateControlConfig::VariableBitRate { + average_bitrate, + max_bitrate, + } => { + // TODO: make the distinction between max & target bitrate in openh264 + let _ = average_bitrate; + config = config + .rate_control_mode(RateControlMode::Bitrate) + .bitrate(BitRate::from_bps(max_bitrate)); + } + H264RateControlConfig::ConstantQuality { + const_qp, + max_bitrate, + } => { + config = config + .rate_control_mode(RateControlMode::Quality) + .qp(QpRange::new(const_qp, const_qp)); + + if let Some(max_bitrate) = max_bitrate { + config = config.bitrate(BitRate::from_bps(max_bitrate)); + } + } + } + + if let Some(max_slice_len) = c.slice_max_len { + config = config.max_slice_len(max_slice_len as u32); + } + + config +} + +/// Create [`FmtpOptions`] from openh264's decoder capabilities. +/// +/// Should be used when offering to receive H.264 in a SDP negotiation. +pub fn openh264_decoder_fmtp(api: &openh264::OpenH264API) -> H264FmtpOptions { + let capability = unsafe { + let mut capability = MaybeUninit::uninit(); + + assert_eq!( + api.WelsGetDecoderCapability(capability.as_mut_ptr()), + 0, + "openh264 WelsGetDecoderCapability failed" + ); + + capability.assume_init() + }; + + H264FmtpOptions { + profile_level_id: ProfileLevelId::from_bytes( + capability.iProfileIdc as u8, + capability.iProfileIop as u8, + capability.iLevelIdc as u8, + ) + .expect("openh264 should not return unknown capabilities"), + level_asymmetry_allowed: true, + packetization_mode: H264PacketizationMode::NonInterleavedMode, + max_mbps: Some(capability.iMaxMbps as u32), + max_fs: Some(capability.iMaxFs as u32), + max_cbp: Some(capability.iMaxCpb as u32), + max_dpb: Some(capability.iMaxDpb as u32), + max_br: Some(capability.iMaxBr as u32), + redundant_pic_cap: capability.bRedPicCap, + } +} diff --git a/media-video/h264/src/encoder/backends/vulkan/mod.rs b/media-video/h264/src/encoder/backends/vulkan/mod.rs new file mode 100644 index 00000000..336fd981 --- /dev/null +++ b/media-video/h264/src/encoder/backends/vulkan/mod.rs @@ -0,0 +1,700 @@ +use crate::{ + H264Level, H264Profile, + encoder::{ + config::{FramePattern, Framerate, H264FrameType, SliceMode}, + util::{FrameEncodeInfo, H264EncoderState}, + }, + profile_iop_consts::{ + CONSTRAINT_SET0_FLAG, CONSTRAINT_SET1_FLAG, CONSTRAINT_SET2_FLAG, CONSTRAINT_SET3_FLAG, + CONSTRAINT_SET4_FLAG, CONSTRAINT_SET5_FLAG, + }, +}; +use smallvec::SmallVec; +use std::{ + cmp, + collections::VecDeque, + ffi::c_void, + mem::{take, zeroed}, + pin::Pin, + ptr::null, + time::Instant, +}; +use vulkan::{ + Device, PhysicalDevice, VulkanError, + ash::vk, + encoder::{ + RateControlInfos, VulkanEncodeFrameError, VulkanEncodeSlot, VulkanEncoder, + VulkanEncoderConfig, VulkanEncoderImplConfig, + capabilities::{VulkanEncoderCapabilities, VulkanEncoderCapabilitiesError}, + codec::H264, + input::InputData, + }, +}; + +#[derive(Debug, Clone, Copy)] +pub struct VulkanH264EncoderConfig { + pub encoder: VulkanEncoderConfig, + pub profile: H264Profile, + pub level: H264Level, + pub frame_pattern: FramePattern, + pub rate_control: VulkanH264RateControlConfig, + pub slice_mode: SliceMode, +} + +#[derive(Debug, Clone, Copy)] +pub struct VulkanH264RateControlConfig { + pub mode: VulkanH264RateControlMode, + pub framerate: Option, + pub min_qp: Option, + pub max_qp: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum VulkanH264RateControlMode { + Default, + ConstantBitrate { + bitrate: u32, + }, + VariableBitrate { + average_bitrate: u32, + max_bitrate: u32, + }, + ConstantQuality { + qp: u8, + }, +} + +#[derive(Debug)] +pub struct VkH264Encoder { + config: VulkanH264EncoderConfig, + state: H264EncoderState, + encoder: VulkanEncoder, + + seq_params: vk::native::StdVideoH264SequenceParameterSet, + pic_params: vk::native::StdVideoH264PictureParameterSet, + + max_l0_p_ref_images: usize, + max_l0_b_ref_images: usize, + max_l1_b_ref_images: usize, + + backlogged_b_frames: Vec<(FrameEncodeInfo, VulkanEncodeSlot)>, + free_dpb_slots: Vec, + active_dpb_slots: VecDeque, +} + +#[derive(Debug, Clone, Copy)] +struct DpbSlot { + index: usize, + display_index: u16, +} + +impl VkH264Encoder { + pub fn capabilities( + physical_device: &PhysicalDevice, + profile: H264Profile, + ) -> Result, VulkanEncoderCapabilitiesError> { + let h264_profile_info = vk::VideoEncodeH264ProfileInfoKHR::default() + .std_profile_idc(profile.profile_idc().into()); + + let capabilities = + VulkanEncoderCapabilities::::new(physical_device, h264_profile_info)?; + + Ok(capabilities) + } + + pub fn new( + device: &Device, + capabilities: &VulkanEncoderCapabilities, + config: VulkanH264EncoderConfig, + ) -> Result { + let state = H264EncoderState::new(config.frame_pattern); + + let caps = capabilities.video; + let h264_caps = capabilities.codec; + let max_references = cmp::max( + h264_caps.max_p_picture_l0_reference_count, + h264_caps.max_b_picture_l0_reference_count + h264_caps.max_l1_reference_count, + ); + let max_active_references = cmp::min(max_references, caps.max_active_reference_pictures); + + // Make only as many dpb slots as can be actively references, + 1 for the setup reference + let max_dpb_slots = cmp::min(caps.max_dpb_slots, max_active_references + 1); + + let vk::Extent2D { width, height } = config.encoder.initial_encode_resolution; + + let width_mbaligned = width.next_multiple_of(16); + let height_mbaligned = height.next_multiple_of(16); + + let profile_idc = config.profile.profile_idc(); + let profile_iop = config.profile.profile_iop(); + + let seq_params = vk::native::StdVideoH264SequenceParameterSet { + flags: vk::native::StdVideoH264SpsFlags { + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoH264SpsFlags::new_bitfield_1( + (profile_iop | CONSTRAINT_SET0_FLAG) as u32, + (profile_iop | CONSTRAINT_SET1_FLAG) as u32, + (profile_iop | CONSTRAINT_SET2_FLAG) as u32, + (profile_iop | CONSTRAINT_SET3_FLAG) as u32, + (profile_iop | CONSTRAINT_SET4_FLAG) as u32, + (profile_iop | CONSTRAINT_SET5_FLAG) as u32, + 1, // direct_0x0_inference_flag + 0, // mb_adaptive_frame_field_flag, + 1, // frame_mbs_only_flag, + 0, // delta_pic_order_always_zero_flag, + 0, // separate_colour_plane_flag, + 0, // gaps_in_frame_num_value_allowed_flag, + 0, // qpprime_y_zero_transform_bypass_flag, + (width != width_mbaligned || height != height_mbaligned).into(), // frame_cropping_flag, + 0, // seq_scaling_matrix_present_flag, + 0, // vui_parameters_present_flag, + ), + __bindgen_padding_0: 0, + }, + profile_idc: profile_idc.into(), + level_idc: map_level(config.level), + chroma_format_idc: + vk::native::StdVideoH264ChromaFormatIdc_STD_VIDEO_H264_CHROMA_FORMAT_IDC_420, + seq_parameter_set_id: 0, + bit_depth_luma_minus8: 0, + bit_depth_chroma_minus8: 0, + log2_max_frame_num_minus4: state.log2_max_frame_num - 4, + pic_order_cnt_type: 0, + offset_for_non_ref_pic: 0, + offset_for_top_to_bottom_field: 0, + log2_max_pic_order_cnt_lsb_minus4: state.log2_max_pic_order_cnt_lsb - 4, + num_ref_frames_in_pic_order_cnt_cycle: 0, + max_num_ref_frames: max_active_references as u8, + reserved1: 0, + pic_width_in_mbs_minus1: (width_mbaligned / 16) - 1, + pic_height_in_map_units_minus1: (height_mbaligned / 16) - 1, + frame_crop_left_offset: 0, + frame_crop_right_offset: 0, + frame_crop_top_offset: (width_mbaligned - width) / 2, + frame_crop_bottom_offset: (height_mbaligned - height) / 2, + reserved2: 0, + pOffsetForRefFrame: null(), + pScalingLists: null(), + pSequenceParameterSetVui: null(), + }; + + let pic_params = vk::native::StdVideoH264PictureParameterSet { + flags: vk::native::StdVideoH264PpsFlags { + _bitfield_align_1: [], + _bitfield_1: vk::native::StdVideoH264PpsFlags::new_bitfield_1( + config.profile.support_transform_8x8_mode_flag().into(), // transform_8x8_mode_flag, + 0, // redundant_pic_cnt_present_flag, + 0, // constrained_intra_pred_flag, + 0, // deblocking_filter_control_present_flag, + 0, // weighted_pred_flag, + 0, // bottom_field_pic_order_in_frame_present_flag, + config.profile.support_entropy_coding_mode().into(), // entropy_coding_mode_flag, + 0, // pic_scaling_matrix_present_flag, + ), + __bindgen_padding_0: [0; _], + }, + seq_parameter_set_id: 0, + pic_parameter_set_id: 0, + num_ref_idx_l0_default_active_minus1: 0, + num_ref_idx_l1_default_active_minus1: 0, + weighted_bipred_idc: 0, + pic_init_qp_minus26: 0, + pic_init_qs_minus26: 0, + chroma_qp_index_offset: 0, + second_chroma_qp_index_offset: 0, + pScalingLists: null(), + }; + + let std_sp_ss = [seq_params]; + let std_pp_ss = [pic_params]; + let video_encode_h264_session_parameters_add_info = + vk::VideoEncodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(&std_sp_ss) + .std_pp_ss(&std_pp_ss); + + let video_encode_h264_session_parameters_create_info = + vk::VideoEncodeH264SessionParametersCreateInfoKHR::default() + .max_std_sps_count(u32::MAX) + .max_std_pps_count(u32::MAX) + .parameters_add_info(&video_encode_h264_session_parameters_add_info); + + let encoder_config = VulkanEncoderImplConfig { + user: config.encoder, + // Set number of encode slots to (num_b_frames + 1) and at least 4 + num_encode_slots: cmp::max(4, u32::from(config.frame_pattern.ip_period) + 1), + max_active_references, + num_dpb_slots: max_dpb_slots, + }; + + let h264_profile_info = vk::VideoEncodeH264ProfileInfoKHR::default() + .std_profile_idc(config.profile.profile_idc().into()); + + let encoder = capabilities.create_encoder( + device, + encoder_config, + h264_profile_info, + vk::VideoEncodeH264SessionCreateInfoKHR::default() + .use_max_level_idc(true) + .max_level_idc(map_level(config.level)), + video_encode_h264_session_parameters_create_info, + Some(rate_control_from_config(&config)), + )?; + + let free_dpb_slots = (0..max_dpb_slots as usize) + .map(|index| DpbSlot { + index, + display_index: 0, + }) + .rev() + .collect(); + + Ok(VkH264Encoder { + config, + state, + encoder, + seq_params, + pic_params, + max_l0_p_ref_images: h264_caps.max_p_picture_l0_reference_count as usize, + max_l0_b_ref_images: h264_caps.max_b_picture_l0_reference_count as usize, + max_l1_b_ref_images: h264_caps.max_l1_reference_count as usize, + backlogged_b_frames: Vec::new(), + free_dpb_slots, + active_dpb_slots: VecDeque::new(), + }) + } + + /// Request the next frame to be an IDR frame + pub fn request_idr(&mut self) { + // TODO: this totally blows up b-frames are currently queued + self.state.begin_new_gop(); + } + + /// Update the encoders rate control config + pub fn update_rate_control(&mut self, rate_control: VulkanH264RateControlConfig) { + unsafe { + self.config.rate_control = rate_control; + + self.encoder + .update_rc(rate_control_from_config(&self.config)); + } + } + + /// Change the output resolution of the encoder + pub fn update_output_extent( + &mut self, + new_extent: vk::Extent2D, + ) -> Result<(), VulkanEncodeFrameError> { + if new_extent == self.encoder.current_extent() { + return Ok(()); + } + + // First drain all backlogged B-Frames since we're going to emit an IDR frame next + let mut backlogged_b_frames = take(&mut self.backlogged_b_frames); + + // Encode the last frame a P frame + if let Some((mut frame_info, encode_slot)) = backlogged_b_frames.pop() { + frame_info.frame_type = H264FrameType::P; + self.encode_slot(frame_info, encode_slot)?; + } + + // Then encode all other frames as B-Frames + for (frame_info, encode_slot) in backlogged_b_frames { + self.encode_slot(frame_info, encode_slot)?; + } + + self.state.begin_new_gop(); + + // Update the encoder + let vk::Extent2D { width, height } = new_extent; + + let width_mbaligned = width.next_multiple_of(16); + let height_mbaligned = height.next_multiple_of(16); + + self.seq_params.flags.set_frame_cropping_flag( + (width != width_mbaligned || height != height_mbaligned).into(), + ); + + self.seq_params.seq_parameter_set_id += 1; + self.pic_params.seq_parameter_set_id = self.seq_params.seq_parameter_set_id; + self.pic_params.pic_parameter_set_id += 1; + + self.seq_params.pic_width_in_mbs_minus1 = (width_mbaligned / 16) - 1; + self.seq_params.pic_height_in_map_units_minus1 = (height_mbaligned / 16) - 1; + + self.seq_params.frame_crop_top_offset = (width_mbaligned - width) / 2; + self.seq_params.frame_crop_bottom_offset = (height_mbaligned - height) / 2; + + let mut parameters = vk::VideoEncodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(std::slice::from_ref(&self.seq_params)) + .std_pp_ss(std::slice::from_ref(&self.pic_params)); + + self.encoder + .update_current_extent(new_extent, &mut parameters)?; + + Ok(()) + } + + pub fn poll_result(&mut self) -> Result)>, VulkanError> { + self.encoder.poll_result() + } + + pub fn wait_result(&mut self) -> Result)>, VulkanError> { + self.encoder.wait_result() + } + + pub fn encode_frame(&mut self, input: InputData<'_>) -> Result<(), VulkanEncodeFrameError> { + let frame_info = self.state.next(); + + log::debug!("Encode {frame_info:?}"); + + let mut encode_slot = self + .encoder + .pop_encode_slot()? + .expect("encoder must have enough encode_slots for the given ip_period configuration"); + + self.encoder + .set_input_of_encode_slot(&mut encode_slot, input)?; + + // B-Frames are not encoded immediately, they are queued until after an I or P-frame is encoded + if frame_info.frame_type == H264FrameType::B { + self.backlogged_b_frames.push((frame_info, encode_slot)); + return Ok(()); + } + + if frame_info.frame_type == H264FrameType::Idr { + assert!(self.backlogged_b_frames.is_empty()); + // Just encoded an IDR frame, put all reference surfaces back into the surface pool, + self.free_dpb_slots.extend(self.active_dpb_slots.drain(..)); + } + + self.encode_slot(frame_info, encode_slot)?; + + if matches!( + frame_info.frame_type, + H264FrameType::Idr | H264FrameType::I | H264FrameType::P + ) { + let backlogged_b_frames = take(&mut self.backlogged_b_frames); + + // Process backlogged B-Frames + for (frame_info, encode_slot) in backlogged_b_frames { + self.encode_slot(frame_info, encode_slot)?; + } + } + + Ok(()) + } + + fn encode_slot( + &mut self, + frame_info: FrameEncodeInfo, + encode_slot: VulkanEncodeSlot, + ) -> Result<(), VulkanEncodeFrameError> { + let mut setup_dpb_slot = if let Some(dpb_slot) = self.free_dpb_slots.pop() { + dpb_slot + } else if let Some(dpb_slot) = self.active_dpb_slots.pop_back() { + dpb_slot + } else { + unreachable!() + }; + + log::trace!("\tUsing setup slot {}", setup_dpb_slot.index); + + setup_dpb_slot.display_index = frame_info.picture_order_count; + + let l0_references = self + .active_dpb_slots + .iter() + .filter(|dpb_slot| dpb_slot.display_index < frame_info.picture_order_count) + .map(|dpb_slot| dpb_slot.index); + + let l1_references = self + .active_dpb_slots + .iter() + .rev() + .filter(|dpb_slot| dpb_slot.display_index > frame_info.picture_order_count) + .map(|dpb_slot| dpb_slot.index); + + let (l0_references, l1_references): (SmallVec<[_; 8]>, SmallVec<[_; 1]>) = match frame_info + .frame_type + { + H264FrameType::P => ( + l0_references.take(self.max_l0_p_ref_images).collect(), + smallvec::smallvec![], + ), + H264FrameType::B => ( + l0_references.take(self.max_l0_b_ref_images).collect(), + l1_references.take(self.max_l1_b_ref_images).collect(), + ), + H264FrameType::I | H264FrameType::Idr => (smallvec::smallvec![], smallvec::smallvec![]), + }; + + let primary_pic_type = match frame_info.frame_type { + H264FrameType::P => vk::native::StdVideoH264PictureType_STD_VIDEO_H264_PICTURE_TYPE_P, + H264FrameType::B => vk::native::StdVideoH264PictureType_STD_VIDEO_H264_PICTURE_TYPE_B, + H264FrameType::I => vk::native::StdVideoH264PictureType_STD_VIDEO_H264_PICTURE_TYPE_I, + H264FrameType::Idr => { + vk::native::StdVideoH264PictureType_STD_VIDEO_H264_PICTURE_TYPE_IDR + } + }; + + let setup_std_reference_info = vk::native::StdVideoEncodeH264ReferenceInfo { + flags: vk::native::StdVideoEncodeH264ReferenceInfoFlags { + _bitfield_align_1: [0; 0], + _bitfield_1: vk::native::StdVideoEncodeH264ReferenceInfoFlags::new_bitfield_1( + 0, // used_for_long_term_reference + 0, // reserved + ), + }, + primary_pic_type, + FrameNum: frame_info.frame_num.into(), + PicOrderCnt: frame_info.picture_order_count.into(), + long_term_pic_num: 0, + long_term_frame_idx: 0, + temporal_id: 0, + }; + + let mut std_slice_headers = vec![]; + + let vk::Extent2D { width, height } = self.encoder.current_extent(); + + let total_macroblocks = (width / 16) * (height / 16); + + match self.config.slice_mode { + SliceMode::Picture => { + std_slice_headers.push(slice_header(&frame_info, 0)); + } + SliceMode::Rows(num_rows) => { + for row in (0..height / 16).step_by(num_rows.get() as usize) { + let first_macroblock = (width / 16) * row; + + std_slice_headers.push(slice_header(&frame_info, first_macroblock)); + } + } + SliceMode::MacroBlocks(config_num_mbs) => { + for first_macroblock in + (0..total_macroblocks).step_by(config_num_mbs.get() as usize) + { + std_slice_headers.push(slice_header(&frame_info, first_macroblock)); + } + } + } + + let mut nalu_slices: SmallVec<[_; 1]> = std_slice_headers + .iter() + .map(|std_slice_header| { + vk::VideoEncodeH264NaluSliceInfoKHR::default().std_slice_header(std_slice_header) + }) + .collect(); + + if let VulkanH264RateControlMode::ConstantQuality { qp } = &self.config.rate_control.mode { + for nalu_slice in &mut nalu_slices { + nalu_slice.constant_qp = (*qp).into(); + } + } + + let mut ref_lists = unsafe { zeroed::() }; + + let mut l0_iter = l0_references.iter().map(|index| *index as u8); + ref_lists + .RefPicList0 + .fill_with(|| l0_iter.next().unwrap_or(0xFF)); + + let mut l1_iter = l1_references.iter().map(|index| *index as u8); + ref_lists + .RefPicList1 + .fill_with(|| l1_iter.next().unwrap_or(0xFF)); + + ref_lists.num_ref_idx_l0_active_minus1 = l0_references.len().saturating_sub(1) as u8; + ref_lists.num_ref_idx_l1_active_minus1 = l1_references.len().saturating_sub(1) as u8; + + log::trace!("\tRefPicList0: {}", debug_list(&ref_lists.RefPicList0)); + log::trace!("\tRefPicList1: {}", debug_list(&ref_lists.RefPicList1)); + + let std_picture_info = vk::native::StdVideoEncodeH264PictureInfo { + flags: vk::native::StdVideoEncodeH264PictureInfoFlags { + _bitfield_align_1: [0; 0], + _bitfield_1: vk::native::StdVideoEncodeH264PictureInfoFlags::new_bitfield_1( + (frame_info.frame_type == H264FrameType::Idr) as u32, // IdrPicFlag + (frame_info.frame_type != H264FrameType::B) as u32, // is_reference + 0, // no_output_of_prior_pics_flag + 0, // long_term_reference_flag + 0, // adaptive_ref_pic_marking_mode_flag + 0, // reserved + ), + }, + seq_parameter_set_id: self.seq_params.seq_parameter_set_id, + pic_parameter_set_id: self.pic_params.pic_parameter_set_id, + idr_pic_id: frame_info.idr_pic_id, + primary_pic_type, + frame_num: frame_info.frame_num.into(), + PicOrderCnt: frame_info.picture_order_count.into(), + temporal_id: 0, + reserved1: [0; 3], + pRefLists: &raw const ref_lists, + }; + + let picture_info = vk::VideoEncodeH264PictureInfoKHR::default() + .generate_prefix_nalu(false) + .nalu_slice_entries(&nalu_slices) + .std_picture_info(&std_picture_info); + + self.encoder.submit_encode_slot( + encode_slot, + l0_references + .iter() + .chain(l1_references.iter()) + .copied() + .collect(), + setup_dpb_slot.index, + setup_std_reference_info, + picture_info, + frame_info.frame_type == H264FrameType::Idr, + )?; + + if frame_info.frame_type == H264FrameType::B { + self.free_dpb_slots.push(setup_dpb_slot); + } else { + self.active_dpb_slots.push_front(setup_dpb_slot); + } + + Ok(()) + } +} + +fn slice_header( + frame_info: &FrameEncodeInfo, + first_mb_in_slice: u32, +) -> vk::native::StdVideoEncodeH264SliceHeader { + vk::native::StdVideoEncodeH264SliceHeader { + flags: vk::native::StdVideoEncodeH264SliceHeaderFlags { + _bitfield_align_1: [0; 0], + _bitfield_1: vk::native::StdVideoEncodeH264SliceHeaderFlags::new_bitfield_1( + 1, // direct_spatial_mv_pred_flag + 1, // num_ref_idx_active_override_flag + 0, // reserved + ), + }, + first_mb_in_slice, + slice_type: match frame_info.frame_type { + H264FrameType::P => vk::native::StdVideoH264SliceType_STD_VIDEO_H264_SLICE_TYPE_P, + H264FrameType::B => vk::native::StdVideoH264SliceType_STD_VIDEO_H264_SLICE_TYPE_B, + H264FrameType::I => vk::native::StdVideoH264SliceType_STD_VIDEO_H264_SLICE_TYPE_I, + H264FrameType::Idr => vk::native::StdVideoH264SliceType_STD_VIDEO_H264_SLICE_TYPE_I, + }, + slice_alpha_c0_offset_div2: 0, + slice_beta_offset_div2: 0, + slice_qp_delta: 0, + reserved1: 0, + cabac_init_idc: vk::native::StdVideoH264CabacInitIdc_STD_VIDEO_H264_CABAC_INIT_IDC_0, + disable_deblocking_filter_idc: vk::native::StdVideoH264DisableDeblockingFilterIdc_STD_VIDEO_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISABLED, + pWeightTable: null(), + } +} + +fn debug_list(list: &[u8]) -> String { + format!( + "{:?}", + list.iter().take_while(|x| **x != 0xFF).collect::>() + ) +} + +fn map_level(profile: H264Level) -> vk::native::StdVideoH264LevelIdc { + match profile { + H264Level::Level_1_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_0, + // TODO: not super excited about silently discarding the B here, just hoping noone is actually using this + H264Level::Level_1_B => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_0, + H264Level::Level_1_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_1, + H264Level::Level_1_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_2, + H264Level::Level_1_3 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_1_3, + H264Level::Level_2_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_0, + H264Level::Level_2_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_1, + H264Level::Level_2_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_2_2, + H264Level::Level_3_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_0, + H264Level::Level_3_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_1, + H264Level::Level_3_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_3_2, + H264Level::Level_4_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_0, + H264Level::Level_4_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1, + H264Level::Level_4_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_2, + H264Level::Level_5_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_0, + H264Level::Level_5_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_1, + H264Level::Level_5_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_5_2, + H264Level::Level_6_0 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_0, + H264Level::Level_6_1 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_1, + H264Level::Level_6_2 => vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_6_2, + } +} + +fn rate_control_from_config(config: &VulkanH264EncoderConfig) -> Pin>> { + let mut this = Box::pin(RateControlInfos:: { + codec_layer: vk::VideoEncodeH264RateControlLayerInfoKHR::default(), + layer: vk::VideoEncodeRateControlLayerInfoKHR::default(), + codec_info: vk::VideoEncodeH264RateControlInfoKHR::default(), + info: vk::VideoEncodeRateControlInfoKHR::default(), + }); + + this.layer.p_next = (&raw const this.codec_layer) as *const c_void; + this.info.p_next = (&raw const this.codec_info) as *const c_void; + this.info.p_layers = &raw const this.layer; + + // TODO: magic value + this.codec_info.idr_period = config.frame_pattern.intra_idr_period.into(); + this.codec_info.gop_frame_count = config.frame_pattern.intra_period.into(); + this.info.virtual_buffer_size_in_ms = 100; + this.info.layer_count = 1; + + if let Some(Framerate { num, denom }) = config.rate_control.framerate { + this.layer.frame_rate_numerator = num; + this.layer.frame_rate_denominator = denom; + } else { + this.layer.frame_rate_numerator = 1; + this.layer.frame_rate_denominator = 1; + } + + if let Some(min_qp) = config.rate_control.min_qp { + this.codec_layer.min_qp = vk::VideoEncodeH264QpKHR { + qp_i: min_qp.into(), + qp_p: min_qp.into(), + qp_b: min_qp.into(), + }; + + this.codec_layer.use_min_qp = vk::TRUE; + } else { + this.codec_layer.use_min_qp = vk::FALSE; + } + + if let Some(max_qp) = config.rate_control.max_qp { + this.codec_layer.max_qp = vk::VideoEncodeH264QpKHR { + qp_i: max_qp.into(), + qp_p: max_qp.into(), + qp_b: max_qp.into(), + }; + + this.codec_layer.use_max_qp = vk::TRUE; + } else { + this.codec_layer.use_max_qp = vk::FALSE; + } + + match config.rate_control.mode { + VulkanH264RateControlMode::Default => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::DEFAULT; + } + VulkanH264RateControlMode::ConstantBitrate { bitrate } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::CBR; + this.layer.average_bitrate = bitrate.into(); + this.layer.max_bitrate = bitrate.into(); + } + VulkanH264RateControlMode::VariableBitrate { + average_bitrate, + max_bitrate, + } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::VBR; + this.layer.average_bitrate = average_bitrate.into(); + this.layer.max_bitrate = max_bitrate.into(); + } + VulkanH264RateControlMode::ConstantQuality { .. } => { + this.info.rate_control_mode = vk::VideoEncodeRateControlModeFlagsKHR::DISABLED; + } + } + + this +} diff --git a/media-video/h264/src/encoder/config.rs b/media-video/h264/src/encoder/config.rs new file mode 100644 index 00000000..fa241742 --- /dev/null +++ b/media-video/h264/src/encoder/config.rs @@ -0,0 +1,193 @@ +use crate::{H264Level, H264Profile}; +use std::num::NonZeroU32; + +/// Generic H.264 encoder config +#[derive(Debug, Clone, Copy)] +pub struct H264EncoderConfig { + /// H.264 encoding profile to use. Defines the feature-set the encoder may use. + pub profile: H264Profile, + + /// H264 encoding level. Defines default constraints like frame size, fps and more. + pub level: H264Level, + + /// Maximum width & height of the image to be encoded. + /// + /// This value is only used for the initialization and should represent the largest allowed resolution. + /// Some encoders will not be able to handle larger resolutions later without being reinitialized. + pub resolution: (u32, u32), + + /// Expected (maximum) framerate of the video stream + pub framerate: Option, + + /// Define the range of QP values the encoder is allowed use. + /// + /// Allowed values range from 0 to 51, where 0 is the best quality and 51 the worst with the most compression. + /// + /// Default should be (17..=28) but manual tuning is recommended! + /// + /// Ignored when `rate_control` is `ConstantQuality` + pub qp: Option<(u8, u8)>, + + /// Pattern of frames to emit + pub frame_pattern: FramePattern, + + /// Rate control configuration + pub rate_control: H264RateControlConfig, + + /// Limit the output slice size. + /// + /// Required if the packetization mode is SingleNAL which doesn't support fragmentation units. + pub slice_max_len: Option, + + /// How slices should be created + pub slice_mode: SliceMode, + + /// Quality level, + pub quality_level: u32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum H264FrameType { + // Uses previous frames as reference + P, + // Uses previous and future frames as reference + B, + // Intra frame, standalone complete picture, no references + I, + // Intra Frame preceded by a SPS/PPS set. Clears all reference frames + Idr, +} + +/// Describes the pattern in which frames are created +/// +/// # Examples +/// +/// ```rust +/// # use ezk_h264::encoder::config::{H264FrameType, H264FrameType::*, FramePattern}; +/// # fn eval(pattern: FramePattern) -> [H264FrameType; N] { +/// # let mut ret = [P; N]; +/// # let mut n = 0; +/// # while n < N { +/// # ret[n] = pattern.frame_type_of_nth_frame(n as _); +/// # n += 1; +/// # } +/// # ret +/// # } +/// // Only create I Frames +/// let pattern = FramePattern { intra_idr_period: 32, intra_period: 1, ip_period: 1 }; +/// assert_eq!(eval(pattern), [Idr, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I]); +/// +/// // Create I & P Frames +/// let pattern = FramePattern { intra_idr_period: 32, intra_period: 4, ip_period: 1 }; +/// assert_eq!(eval(pattern), [Idr, P, P, P, I, P, P, P, I, P, P, P, I, P, P, P]); +/// +/// // Insert some IDR frames, required for livestream or video conferences +/// let pattern = FramePattern { intra_idr_period: 8, intra_period: 4, ip_period: 1 }; +/// assert_eq!(eval(pattern), [Idr, P, P, P, I, P, P, P, Idr, P, P, P, I, P, P, P]); +/// +/// // B frames are only created if `ip_period` is larger than 1 +/// let pattern = FramePattern { intra_idr_period: 32, intra_period: 8, ip_period: 4 }; +/// assert_eq!(eval(pattern), [Idr, B, B, B, P, B, B, B, I, B, B, B, P, B, B, B]); +/// +/// // Some more IDR frames... +/// let pattern = FramePattern { intra_idr_period: 8, intra_period: 8, ip_period: 4 }; +/// assert_eq!(eval(pattern), [Idr, B, B, B, P, B, B, P, Idr, B, B, B, P, B, B]); +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct FramePattern { + /// Period in which to create IDR-Frames + /// + /// Must be a multiple of `i_period` (or `p_period`) if set + pub intra_idr_period: u16, + + /// Period in which to create I-Frames + /// + /// Must be a multiple of `ip_period` if set + pub intra_period: u16, + + /// Period in which to create P-Frames. All other frames are created as B-Frames + /// + /// B-Frames are not inserted if this is set to 1 + pub ip_period: u16, +} + +impl Default for FramePattern { + fn default() -> Self { + Self { + intra_idr_period: 120, + intra_period: 60, + ip_period: 1, + } + } +} + +impl FramePattern { + // public for doc test + #[doc(hidden)] + pub fn frame_type_of_nth_frame(&self, n: u64) -> H264FrameType { + // Emit IDR frame every idr_period frames + if n.is_multiple_of(self.intra_idr_period.into()) { + return H264FrameType::Idr; + } + + // Emit I frame every i_period frames + if n.is_multiple_of(self.intra_period.into()) { + return H264FrameType::I; + } + + // Emit P frame every ip_period frames + if n.is_multiple_of(self.ip_period.into()) { + H264FrameType::P + } else if (n + 1).is_multiple_of(self.intra_idr_period.into()) { + // This should have been a B-Frame, but the next on is an IDR Frame. + // Since B-Frames cannot be used as references for other B-Frames (yet), emit an P-Frame instead. + H264FrameType::P + } else { + H264FrameType::B + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum H264RateControlConfig { + /// CBR (Constant Bit Rate) + ConstantBitRate { bitrate: u32 }, + + /// VBR (Variable Bit Rate) + VariableBitRate { + average_bitrate: u32, + max_bitrate: u32, + }, + + /// Constant Quality + ConstantQuality { + const_qp: u8, + max_bitrate: Option, + }, +} + +#[derive(Debug, Clone, Copy)] +pub struct Framerate { + pub num: u32, + pub denom: u32, +} + +impl Framerate { + pub const fn from_fps(fps: u32) -> Self { + Self { num: fps, denom: 1 } + } +} + +/// Defines how slices should be created for a single picture +#[derive(Default, Debug, Clone, Copy)] +pub enum SliceMode { + #[default] + /// A single slice per picture + Picture, + + /// Number of rows per slice + Rows(NonZeroU32), + + /// Number of macro blocks per slice + MacroBlocks(NonZeroU32), +} diff --git a/media-video/h264/src/encoder/mod.rs b/media-video/h264/src/encoder/mod.rs new file mode 100644 index 00000000..f4b1ff7f --- /dev/null +++ b/media-video/h264/src/encoder/mod.rs @@ -0,0 +1,3 @@ +pub mod backends; +pub mod config; +pub(crate) mod util; diff --git a/media-video/h264/src/encoder/util.rs b/media-video/h264/src/encoder/util.rs new file mode 100644 index 00000000..79ef9d44 --- /dev/null +++ b/media-video/h264/src/encoder/util.rs @@ -0,0 +1,87 @@ +use crate::encoder::config::{FramePattern, H264FrameType}; + +#[derive(Debug)] +pub(crate) struct H264EncoderState { + frame_pattern: FramePattern, + + /// Number of bits to use for picture_order_count_lsb + pub(crate) log2_max_pic_order_cnt_lsb: u8, + /// Number of bits to use for frame_num + pub(crate) log2_max_frame_num: u8, + + /// Number of frames that have been submitted to the encoder (but not necessarily encoded) + num_submitted_frames: u64, + + /// Display index (nth submitted frame) of the last IDR frame + current_idr_display: u64, + + /// ID of the last IDR frame (incremented with each IDR frame) + idr_pic_id: u16, + + /// Frame index in the current GOP, not incremented for B Frames + current_frame_num: u16, +} + +impl H264EncoderState { + pub(crate) fn new(frame_pattern: FramePattern) -> Self { + let max_frame_num = frame_pattern.intra_idr_period / frame_pattern.ip_period; + let log2_max_frame_num = ((max_frame_num as f32).log2().ceil() as u8).clamp(4, 16); + + let max_pic_order_cnt_lsb = frame_pattern.intra_idr_period; + let log2_max_pic_order_cnt_lsb = + ((max_pic_order_cnt_lsb as f32).log2().ceil() as u8).clamp(4, 16); + + H264EncoderState { + frame_pattern, + log2_max_pic_order_cnt_lsb, + log2_max_frame_num, + num_submitted_frames: 0, + current_idr_display: 0, + idr_pic_id: 0, + current_frame_num: 0, + } + } + + pub(crate) fn begin_new_gop(&mut self) { + self.num_submitted_frames = self + .num_submitted_frames + .next_multiple_of(self.frame_pattern.intra_idr_period.into()); + } + + pub(crate) fn next(&mut self) -> FrameEncodeInfo { + let frame_type = self + .frame_pattern + .frame_type_of_nth_frame(self.num_submitted_frames); + + if frame_type == H264FrameType::Idr { + self.current_frame_num = 0; + self.current_idr_display = self.num_submitted_frames; + self.idr_pic_id = self.idr_pic_id.wrapping_add(1); + } + + let picture_order_count = self.num_submitted_frames - self.current_idr_display; + + let info = FrameEncodeInfo { + frame_type, + frame_num: self.current_frame_num, + picture_order_count: picture_order_count.try_into().unwrap(), + idr_pic_id: self.idr_pic_id - 1, // idr_pic_id is always incremented once at start + }; + + if frame_type != H264FrameType::B { + self.current_frame_num = self.current_frame_num.wrapping_add(1); + } + + self.num_submitted_frames += 1; + + info + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct FrameEncodeInfo { + pub(crate) frame_type: H264FrameType, + pub(crate) frame_num: u16, + pub(crate) picture_order_count: u16, + pub(crate) idr_pic_id: u16, +} diff --git a/media-video/h264/src/fmtp.rs b/media-video/h264/src/fmtp.rs new file mode 100644 index 00000000..080e42b0 --- /dev/null +++ b/media-video/h264/src/fmtp.rs @@ -0,0 +1,283 @@ +use crate::profile_level_id::{ParseProfileLevelIdError, ProfileLevelId}; +use std::{fmt, num::ParseIntError, str::FromStr}; + +/// Specifies the RTP packetization mode +#[derive(Default, Debug, Clone, Copy, PartialEq, PartialOrd)] +pub enum H264PacketizationMode { + /// Each RTP packet contains exactly one H.264 NAL unit. + /// This mode is the default and best suited for low latency applications like video conferencing + /// + /// Encoders must have their NAL unit size limited to the MTU. + #[default] + SingleNAL = 0, + + /// Multiple NAL units can be combined into a single RTP packet. + /// + /// Uses fragmentation units (FU-A) to split large NAL units across multiple RTP packets + NonInterleavedMode = 1, + + /// NAL units can be transmitted out of order and reassembled at the receiver. + /// This mode is designed for environments with higher packet loss and jitter, providing better error resilience. + /// + /// Uses Fragmentation Units (FU-A and FU-B) and Aggregation Packets (STAP-B and MTAP) to manage NAL units. + InterleavedMode = 2, +} + +/// H.264 specific format parameters used in SDP negotiation +#[derive(Debug, Default)] +pub struct H264FmtpOptions { + /// Indicates the profile and level used for encoding the video stream + pub profile_level_id: ProfileLevelId, + /// Whether level asymmetry, i.e., sending media encoded at a + /// different level in the offerer-to-answerer direction than the + /// level in the answerer-to-offerer direction, is allowed + pub level_asymmetry_allowed: bool, + /// RTP packetization mode + pub packetization_mode: H264PacketizationMode, + /// Maximum macroblock processing rate in macroblocks per second + pub max_mbps: Option, + /// Maximum frame size in macroblocks + pub max_fs: Option, + /// Maximum codec picture buffer size + pub max_cbp: Option, + /// Maximum decoded picture buffer size in frames + pub max_dpb: Option, + /// Maximum video bitrate in kilobits per second + pub max_br: Option, + /// Whether redundant pictures are present in the stream + pub redundant_pic_cap: bool, +} + +impl H264FmtpOptions { + /// Returns the maximum resolution for the given aspect ration + pub fn max_resolution(&self, num: u32, denom: u32) -> (u32, u32) { + let max_fs = self + .max_fs + .unwrap_or_else(|| self.profile_level_id.level.max_fs()); + + resolution_from_max_fs(num, denom, max_fs) + } + + /// Returns the maximum resolution with the given fps and aspect ratio num/denom + pub fn max_resolution_for_fps(&self, num: u32, denom: u32, fps: u32) -> (u32, u32) { + let max_mbps = self + .max_mbps + .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); + + let max_fs = max_mbps / fps.max(1); + + resolution_from_max_fs(num, denom, max_fs) + } + + /// Returns the maximum supported FPS using the maximum supported resolution + pub fn max_fps_for_max_resolution(&self) -> u32 { + let max_fs = self + .max_fs + .unwrap_or_else(|| self.profile_level_id.level.max_fs()); + + let max_mbps = self + .max_mbps + .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); + + max_mbps / max_fs.max(1) + } + + /// Returns the maximum supported FPS for the given resolution + pub fn max_fps_for_resolution(&self, width: u32, height: u32) -> u32 { + let max_mbps = self + .max_mbps + .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); + + let frame_size = (width * height) / 256; + + max_mbps / frame_size.max(1) + } + + /// Returns the maximum bitrate in bit/s + pub fn max_bitrate(&self) -> u32 { + self.max_br + .unwrap_or_else(|| self.profile_level_id.level.max_br()) + .saturating_mul(1000) + } +} + +fn resolution_from_max_fs(num: u32, denom: u32, max_fs: u32) -> (u32, u32) { + const MAX_FS_BOUND: u32 = 0x7FFFFF; + + fn greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { + while b != 0 { + let tmp = b; + b = a % b; + a = tmp; + } + + a + } + + // Limit max FS to avoid integer overflows + let max_fs = max_fs.min(MAX_FS_BOUND); + let max_pixels = max_fs.saturating_mul(256); + let divisor = greatest_common_divisor(num.max(1), denom.max(1)); + let num = num / divisor; + let denom = denom / divisor; + + // Search for the best resolution by testing them all + for i in 1.. { + let width = num * i; + let height = denom * i; + + if width * height > max_pixels { + let width = num * (i - 1); + let height = denom * (i - 1); + return (width, height); + } + } + + unreachable!() +} + +/// Failed to parse H.264 fmtp line +#[derive(Debug, thiserror::Error)] +pub enum ParseH264FmtpOptionsError { + #[error(transparent)] + InvalidProfileId(#[from] ParseProfileLevelIdError), + #[error("encountered non integer value {0}")] + InvalidValue(#[from] ParseIntError), +} + +impl FromStr for H264FmtpOptions { + type Err = ParseH264FmtpOptionsError; + + fn from_str(s: &str) -> Result { + let mut options = Self::default(); + + fn parse_u32(i: &str) -> Result { + Ok(i.parse::()?.clamp(1, 8_388_607)) + } + + for (key, value) in s.split(';').filter_map(|e| e.split_once('=')) { + let value = value.trim(); + match key { + "profile-level-id" => options.profile_level_id = value.parse()?, + "level-asymmetry-allowed" => options.level_asymmetry_allowed = value == "1", + "packetization-mode" => { + options.packetization_mode = match value { + "0" => H264PacketizationMode::SingleNAL, + "1" => H264PacketizationMode::NonInterleavedMode, + "2" => H264PacketizationMode::InterleavedMode, + _ => continue, + }; + } + "max-mbps" => options.max_mbps = Some(parse_u32(value)?), + "max-fs" => options.max_fs = Some(parse_u32(value)?), + "max-cbp" => options.max_cbp = Some(parse_u32(value)?), + "max-dpb" => options.max_dpb = Some(parse_u32(value)?), + "max-br" => options.max_br = Some(parse_u32(value)?), + "redundant-pic-cap" => options.redundant_pic_cap = value == "1", + _ => continue, + } + } + + Ok(options) + } +} + +impl fmt::Display for H264FmtpOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { + profile_level_id, + level_asymmetry_allowed, + packetization_mode, + max_mbps, + max_fs, + max_cbp, + max_dpb, + max_br, + redundant_pic_cap, + } = self; + + write!(f, "profile-level-id={profile_level_id}")?; + + if *level_asymmetry_allowed { + write!(f, ";level-asymmetry-allowed=1")?; + } + + write!(f, ";packetization-mode={}", *packetization_mode as u8)?; + + if let Some(max_mbps) = max_mbps { + write!(f, ";max-mbps={max_mbps}")?; + } + + if let Some(max_fs) = max_fs { + write!(f, ";max-fs={max_fs}")?; + } + + if let Some(max_cbp) = max_cbp { + write!(f, ";max-cbp={max_cbp}")?; + } + + if let Some(max_dpb) = max_dpb { + write!(f, ";max-dbp={max_dpb}")?; + } + + if let Some(max_br) = max_br { + write!(f, ";max-br={max_br}")?; + } + + if *redundant_pic_cap { + write!(f, ";redundant-pic-cap=1")?; + } + + Ok(()) + } +} + +#[test] +fn no_panics() { + let fmtp = H264FmtpOptions { + profile_level_id: ProfileLevelId::default(), + level_asymmetry_allowed: true, + packetization_mode: H264PacketizationMode::SingleNAL, + max_mbps: Some(u32::MAX), + max_fs: Some(u32::MAX), + max_cbp: Some(u32::MAX), + max_dpb: Some(u32::MAX), + max_br: Some(u32::MAX), + redundant_pic_cap: false, + }; + + for i in 1..100 { + for j in 1..100 { + println!("{:?}", fmtp.max_resolution(i, j)); + } + } + println!("{:?}", fmtp.max_resolution_for_fps(16, 9, 30)); + println!("{:?}", fmtp.max_fps_for_max_resolution()); + println!("{:?}", fmtp.max_fps_for_resolution(1920, 1080)); + println!("{:?}", fmtp.max_bitrate()); +} + +#[test] +fn no_divide_by_zero() { + let fmtp = H264FmtpOptions { + profile_level_id: ProfileLevelId::default(), + level_asymmetry_allowed: true, + packetization_mode: H264PacketizationMode::SingleNAL, + max_mbps: Some(0), + max_fs: Some(0), + max_cbp: Some(0), + max_dpb: Some(0), + max_br: Some(0), + redundant_pic_cap: false, + }; + + for i in 1..100 { + for j in 1..100 { + println!("{:?}", fmtp.max_resolution(i, j)); + } + } + println!("{:?}", fmtp.max_resolution_for_fps(16, 9, 30)); + println!("{:?}", fmtp.max_fps_for_max_resolution()); + println!("{:?}", fmtp.max_fps_for_resolution(1920, 1080)); + println!("{:?}", fmtp.max_bitrate()); +} diff --git a/media-video/h264/src/level.rs b/media-video/h264/src/level.rs new file mode 100644 index 00000000..5da7cbd3 --- /dev/null +++ b/media-video/h264/src/level.rs @@ -0,0 +1,127 @@ +/// H.264 encoding levels with their corresponding capabilities. +#[derive(Debug, Clone, Copy)] +#[allow(non_camel_case_types)] +pub enum H264Level { + /// Level 1.0: Max resolution 176x144 (QCIF), 15 fps, 64 kbps (Main), 80 kbps (High) + Level_1_0, + /// Level 1.B: Specialized low-complexity baseline level. + Level_1_B, + /// Level 1.1: Max resolution 176x144 (QCIF), 30 fps, 192 kbps (Main), 240 kbps (High) + Level_1_1, + /// Level 1.2: Max resolution 320x240 (QVGA), 30 fps, 384 kbps (Main), 480 kbps (High) + Level_1_2, + /// Level 1.3: Reserved in standard, similar to Level 2.0. + Level_1_3, + + /// Level 2.0: Max resolution 352x288 (CIF), 30 fps, 2 Mbps (Main), 2.5 Mbps (High) + Level_2_0, + /// Level 2.1: Max resolution 352x288 (CIF), 30 fps, 4 Mbps (Main), 5 Mbps (High) + Level_2_1, + /// Level 2.2: Max resolution 352x288 (CIF), 30 fps, 10 Mbps (Main), 12.5 Mbps (High) + Level_2_2, + + /// Level 3.0: Max resolution 720x576 (SD), 30 fps, 10 Mbps (Main), 12.5 Mbps (High) + Level_3_0, + /// Level 3.1: Max resolution 1280x720 (HD), 30 fps, 14 Mbps (Main), 17.5 Mbps (High) + Level_3_1, + /// Level 3.2: Max resolution 1280x720 (HD), 60 fps, 20 Mbps (Main), 25 Mbps (High) + Level_3_2, + + /// Level 4.0: Max resolution 1920x1080 (Full HD), 30 fps, 20 Mbps (Main), 25 Mbps (High) + Level_4_0, + /// Level 4.1: Max resolution 1920x1080 (Full HD), 60 fps, 50 Mbps (Main), 62.5 Mbps (High) + Level_4_1, + /// Level 4.2: Max resolution 1920x1080 (Full HD), 120 fps, 100 Mbps (Main), 125 Mbps (High) + Level_4_2, + + /// Level 5.0: Max resolution 3840x2160 (4K), 30 fps, 135 Mbps (Main), 168.75 Mbps (High) + Level_5_0, + /// Level 5.1: Max resolution 3840x2160 (4K), 60 fps, 240 Mbps (Main), 300 Mbps (High) + Level_5_1, + /// Level 5.2: Max resolution 4096x2160 (4K Cinema), 60 fps, 480 Mbps (Main), 600 Mbps (High) + Level_5_2, + + /// Level 6.0: Max resolution 8192x4320 (8K UHD), 30 fps, 240 Mbps (Main), 240 Mbps (High) + Level_6_0, + /// Level 6.1: Max resolution 8192x4320 (8K UHD), 60 fps, 480 Mbps (Main), 480 Mbps (High) + Level_6_1, + /// Level 6.2: Max resolution 8192x4320 (8K UHD), 120 fps, 800 Mbps (Main), 800 Mbps (High) + Level_6_2, +} + +impl H264Level { + /// Returns the level idc as specified in H.264 for this level + /// + /// Note that level 1.1 & 1.b have the same value + pub fn level_idc(self) -> u8 { + match self { + H264Level::Level_1_0 => 10, + H264Level::Level_1_B => 11, + H264Level::Level_1_1 => 11, + H264Level::Level_1_2 => 12, + H264Level::Level_1_3 => 13, + H264Level::Level_2_0 => 20, + H264Level::Level_2_1 => 21, + H264Level::Level_2_2 => 22, + H264Level::Level_3_0 => 30, + H264Level::Level_3_1 => 31, + H264Level::Level_3_2 => 32, + H264Level::Level_4_0 => 40, + H264Level::Level_4_1 => 41, + H264Level::Level_4_2 => 42, + H264Level::Level_5_0 => 50, + H264Level::Level_5_1 => 51, + H264Level::Level_5_2 => 52, + H264Level::Level_6_0 => 60, + H264Level::Level_6_1 => 61, + H264Level::Level_6_2 => 62, + } + } + + pub fn max_mbps(self) -> u32 { + self.limits().0 + } + + pub fn max_fs(self) -> u32 { + self.limits().1 + } + + pub fn max_br(self) -> u32 { + self.limits().3 + } + + /// ITU-T H.264 Table A-1 Level Limits + /// + /// 0 - Max macroblock processing rate MaxMBPS (MB/s) + /// 1 - Max frame size MaxFS (MBs) + /// 2 - Max decoded picture buffer size MaxDpbMbs (MBs) + /// 3 - Max video bit rate MaxBR (1000 bits/s, 1200 bits/s, cpbBrVclFactor bits/s, or cpbBrNalFactor bits/s) + /// 4 - Max CPB size MaxCPB (1000 bits, 1200 bits, cpbBrVclFactor bits, or cpbBrNalFactor bits) + /// 5 - Vertical MV component limit MaxVmvR (luma frame samples) + /// 6 - Min compression ratio MinCR + /// 7 - Max number of motion vectors per two consecutive MBs MaxMvsPer2Mb + fn limits(self) -> (u32, u32, u32, u32, u32, u32, u32, Option) { + match self { + H264Level::Level_1_0 => (1485, 99, 396, 64, 175, 64, 2, None), + H264Level::Level_1_B => (1485, 99, 396, 128, 350, 64, 2, None), + H264Level::Level_1_1 => (3000, 396, 900, 192, 500, 128, 2, None), + H264Level::Level_1_2 => (6000, 396, 2376, 384, 1000, 128, 2, None), + H264Level::Level_1_3 => (11880, 396, 2376, 768, 2000, 128, 2, None), + H264Level::Level_2_0 => (11880, 396, 2376, 2000, 2000, 128, 2, None), + H264Level::Level_2_1 => (19800, 792, 4752, 4000, 4000, 256, 2, None), + H264Level::Level_2_2 => (20250, 1620, 8100, 4000, 4000, 256, 2, None), + H264Level::Level_3_0 => (40500, 1620, 8100, 10000, 10000, 256, 2, Some(32)), + H264Level::Level_3_1 => (108000, 3600, 18000, 14000, 14000, 512, 4, Some(16)), + H264Level::Level_3_2 => (216000, 5120, 20480, 20000, 20000, 512, 4, Some(16)), + H264Level::Level_4_0 => (245760, 8192, 32768, 20000, 25000, 512, 4, Some(16)), + H264Level::Level_4_1 => (245760, 8192, 32768, 50000, 62500, 512, 2, Some(16)), + H264Level::Level_4_2 => (522240, 8704, 34816, 50000, 62500, 512, 2, Some(16)), + H264Level::Level_5_0 => (589824, 22080, 110400, 135000, 135000, 512, 2, Some(16)), + H264Level::Level_5_1 => (983040, 36864, 184320, 240000, 240000, 512, 2, Some(16)), + H264Level::Level_5_2 => (2073600, 36864, 184320, 240000, 240000, 512, 2, Some(16)), + H264Level::Level_6_0 => (4177920, 139264, 696320, 240000, 240000, 8192, 2, Some(16)), + H264Level::Level_6_1 => (8355840, 139264, 696320, 480000, 480000, 8192, 2, Some(16)), + H264Level::Level_6_2 => (16711680, 139264, 696320, 800000, 800000, 8192, 2, Some(16)), + } + } +} diff --git a/media-video/h264/src/lib.rs b/media-video/h264/src/lib.rs new file mode 100644 index 00000000..35676fd9 --- /dev/null +++ b/media-video/h264/src/lib.rs @@ -0,0 +1,29 @@ +//! H.264 tools for use with SDP & RTP + +#![allow(unsafe_op_in_unsafe_fn)] + +mod fmtp; +mod level; +mod payload; +mod profile; +pub mod profile_level_id; + +pub mod encoder; + +pub use fmtp::{H264FmtpOptions, H264PacketizationMode, ParseH264FmtpOptionsError}; +pub use level::H264Level; +pub use payload::{ + H264DePayloadError, H264DePayloader, H264DePayloaderOutputFormat, H264Payloader, +}; +pub use profile::H264Profile; + +mod profile_iop_consts { + #![allow(unused)] + + pub(crate) const CONSTRAINT_SET0_FLAG: u8 = 1 << 7; + pub(crate) const CONSTRAINT_SET1_FLAG: u8 = 1 << 6; + pub(crate) const CONSTRAINT_SET2_FLAG: u8 = 1 << 5; + pub(crate) const CONSTRAINT_SET3_FLAG: u8 = 1 << 4; + pub(crate) const CONSTRAINT_SET4_FLAG: u8 = 1 << 3; + pub(crate) const CONSTRAINT_SET5_FLAG: u8 = 1 << 2; +} diff --git a/media/h264/src/payload.rs b/media-video/h264/src/payload.rs similarity index 96% rename from media/h264/src/payload.rs rename to media-video/h264/src/payload.rs index f5ef00e1..f3c1518a 100644 --- a/media/h264/src/payload.rs +++ b/media-video/h264/src/payload.rs @@ -1,4 +1,4 @@ -use crate::PacketizationMode; +use crate::H264PacketizationMode; use bytes::{Buf, BufMut, Bytes, BytesMut}; use std::{iter::from_fn, mem::take}; @@ -26,13 +26,13 @@ const FUA_START_BIT: u8 = 1 << 7; /// Convert H.264 NAL unit as received from encoders or found in media formats to RTP payload format pub struct H264Payloader { - mode: PacketizationMode, + mode: H264PacketizationMode, sps: Option, pps: Option, } impl H264Payloader { - pub fn new(mode: PacketizationMode) -> Self { + pub fn new(mode: H264PacketizationMode) -> Self { Self { mode, sps: None, @@ -45,7 +45,7 @@ impl H264Payloader { return vec![]; } - if self.mode == PacketizationMode::SingleNAL { + if self.mode == H264PacketizationMode::SingleNAL { return nal_units(bytes).collect(); } @@ -121,15 +121,15 @@ impl H264Payloader { nal_unit.advance(1); let chunk_size = max_size - FUA_HEADER_LEN; - let chunks = nal_unit.len() / chunk_size; - for (i, chunk) in nal_unit[..].chunks(chunk_size).enumerate() { + let mut chunks = nal_unit[..].chunks(chunk_size).enumerate().peekable(); + while let Some((i, chunk)) = chunks.next() { let mut fua = Vec::with_capacity(chunk.len() + FUA_HEADER_LEN); fua.push(NAL_UNIT_FU_A | nal_unit_ref_idc); if i == 0 { fua.push(nal_unit_type | FUA_START_BIT) - } else if i == chunks - 1 { + } else if chunks.peek().is_none() { fua.push(nal_unit_type | FUA_END_BIT) } else { fua.push(nal_unit_type); @@ -223,6 +223,10 @@ impl H264DePayloader { Self { format, fua: None } } + pub fn reset(&mut self) { + self.fua = None; + } + pub fn depayload( &mut self, packet: &[u8], @@ -276,11 +280,17 @@ impl H264DePayloader { let fua = self.fua.get_or_insert_with(BytesMut::new); + let b1 = packet[1]; + + // Check if this is the first FU-A package + if b1 & FUA_START_BIT != 0 { + fua.clear(); + } + // Append the received package to the FU-A buffer fua.extend_from_slice(&packet[FUA_HEADER_LEN..]); // Check if this is the last FU-A package - let b1 = packet[1]; if b1 & FUA_END_BIT == 0 { return Ok(()); } @@ -328,7 +338,7 @@ mod test { &[0x1c, 0x40, 0x13, 0x14, 0x15], ]; - let mut pck = H264Payloader::new(PacketizationMode::NonInterleavedMode); + let mut pck = H264Payloader::new(H264PacketizationMode::NonInterleavedMode); // Positive MTU, empty payload let result = pck.payload(empty, 1); @@ -494,7 +504,7 @@ mod test { #[test] fn test_h264_packetizer_payload_sps_and_pps_handling() { - let mut pck = H264Payloader::new(PacketizationMode::NonInterleavedMode); + let mut pck = H264Payloader::new(H264PacketizationMode::NonInterleavedMode); let expected: Vec<&[u8]> = vec![ &[ 0x78, 0x00, 0x03, 0x07, 0x00, 0x01, 0x00, 0x03, 0x08, 0x02, 0x03, diff --git a/media-video/h264/src/profile.rs b/media-video/h264/src/profile.rs new file mode 100644 index 00000000..3fcb172b --- /dev/null +++ b/media-video/h264/src/profile.rs @@ -0,0 +1,100 @@ +/// H.264 encoding profile +#[derive(Debug, Clone, Copy)] +pub enum H264Profile { + Baseline, + ConstrainedBaseline, + Main, + Extended, + High, + High10, + High422, + High444Predictive, + High10Intra, + High422Intra, + High444Intra, + CAVLC444Intra, +} + +impl H264Profile { + pub fn profile_idc(self) -> u8 { + match self { + H264Profile::Baseline | H264Profile::ConstrainedBaseline => 66, + H264Profile::Main => 77, + H264Profile::Extended => 88, + H264Profile::High => 100, + H264Profile::High10 | H264Profile::High10Intra => 110, + H264Profile::High422 | H264Profile::High422Intra => 122, + H264Profile::High444Predictive | H264Profile::High444Intra => 244, + H264Profile::CAVLC444Intra => 44, + } + } + + pub fn profile_iop(self) -> u8 { + use crate::profile_iop_consts::*; + + match self { + H264Profile::Baseline => CONSTRAINT_SET0_FLAG, + H264Profile::ConstrainedBaseline => CONSTRAINT_SET0_FLAG | CONSTRAINT_SET1_FLAG, + H264Profile::Main => CONSTRAINT_SET1_FLAG, + H264Profile::Extended => CONSTRAINT_SET2_FLAG, + H264Profile::High => 0, + H264Profile::High10 => 0, + H264Profile::High422 => 0, + H264Profile::High444Predictive => 0, + H264Profile::High10Intra => CONSTRAINT_SET3_FLAG, + H264Profile::High422Intra => CONSTRAINT_SET3_FLAG, + H264Profile::High444Intra => CONSTRAINT_SET3_FLAG, + H264Profile::CAVLC444Intra => 0, + } + } + + pub(crate) fn support_b_frames(&self) -> bool { + match self { + H264Profile::Baseline | H264Profile::ConstrainedBaseline => false, + H264Profile::Main + | H264Profile::Extended + | H264Profile::High + | H264Profile::High10 + | H264Profile::High422 + | H264Profile::High444Predictive + | H264Profile::High10Intra + | H264Profile::High422Intra + | H264Profile::High444Intra + | H264Profile::CAVLC444Intra => true, + } + } + + pub(crate) fn support_entropy_coding_mode(&self) -> bool { + match self { + H264Profile::Baseline + | H264Profile::ConstrainedBaseline + | H264Profile::Extended + | H264Profile::CAVLC444Intra => false, + H264Profile::Main + | H264Profile::High + | H264Profile::High10 + | H264Profile::High422 + | H264Profile::High444Predictive + | H264Profile::High10Intra + | H264Profile::High422Intra + | H264Profile::High444Intra => true, + } + } + + pub(crate) fn support_transform_8x8_mode_flag(&self) -> bool { + match self { + H264Profile::Baseline + | H264Profile::ConstrainedBaseline + | H264Profile::Main + | H264Profile::Extended => false, + H264Profile::High + | H264Profile::High10 + | H264Profile::High422 + | H264Profile::High444Predictive + | H264Profile::High10Intra + | H264Profile::High422Intra + | H264Profile::High444Intra + | H264Profile::CAVLC444Intra => true, + } + } +} diff --git a/media/h264/src/profile_level_id.rs b/media-video/h264/src/profile_level_id.rs similarity index 61% rename from media/h264/src/profile_level_id.rs rename to media-video/h264/src/profile_level_id.rs index 4295c3b8..cde4d54a 100644 --- a/media/h264/src/profile_level_id.rs +++ b/media-video/h264/src/profile_level_id.rs @@ -1,29 +1,20 @@ -use crate::{Level, Profile}; +use crate::{H264Level, H264Profile, profile_iop_consts::CONSTRAINT_SET3_FLAG}; use std::{fmt, num::ParseIntError, str::FromStr}; -pub mod profile_iop_consts { - pub const CONSTRAINT_SET0_FLAG: u8 = 1 << 7; - pub const CONSTRAINT_SET1_FLAG: u8 = 1 << 6; - pub const CONSTRAINT_SET2_FLAG: u8 = 1 << 5; - pub const CONSTRAINT_SET3_FLAG: u8 = 1 << 4; - pub const CONSTRAINT_SET4_FLAG: u8 = 1 << 3; - pub const CONSTRAINT_SET5_FLAG: u8 = 1 << 2; -} - /// H.264 specific parameter which specifies the H.264 encoding profile and level /// /// Represented in fmtp as 3 hex bytes e.g. (42E020) #[derive(Debug, Clone, Copy)] pub struct ProfileLevelId { - pub profile: Profile, - pub level: Level, + pub profile: H264Profile, + pub level: H264Level, } impl Default for ProfileLevelId { fn default() -> Self { Self { - profile: Profile::Baseline, - level: Level::Level_3_1, + profile: H264Profile::Baseline, + level: H264Level::Level_3_1, } } } @@ -45,32 +36,32 @@ impl ProfileLevelId { #[rustfmt::skip] let table = const { [ // Constrained baseline - (0x42, const { bitpattern("?1??_0000") }, Profile::ConstrainedBaseline), - (0x4D, const { bitpattern("1???_0000") }, Profile::ConstrainedBaseline), - (0x58, const { bitpattern("11??_0000") }, Profile::ConstrainedBaseline), + (0x42, const { bitpattern("?1??_0000") }, H264Profile::ConstrainedBaseline), + (0x4D, const { bitpattern("1???_0000") }, H264Profile::ConstrainedBaseline), + (0x58, const { bitpattern("11??_0000") }, H264Profile::ConstrainedBaseline), // Baseline - (0x42, const { bitpattern("?0??_0000") }, Profile::Baseline), - (0x58, const { bitpattern("10??_0000") }, Profile::Baseline), + (0x42, const { bitpattern("?0??_0000") }, H264Profile::Baseline), + (0x58, const { bitpattern("10??_0000") }, H264Profile::Baseline), // Main - (0x4D, const { bitpattern("0?0?_0000") }, Profile::Main), + (0x4D, const { bitpattern("0?0?_0000") }, H264Profile::Main), // Extended - (0x58, const { bitpattern("00??_0000") }, Profile::Extended), + (0x58, const { bitpattern("00??_0000") }, H264Profile::Extended), // High - (0x64, const { bitpattern("0000_0000") }, Profile::High), + (0x64, const { bitpattern("0000_0000") }, H264Profile::High), // High10 - (0x6E, const { bitpattern("0000_0000") }, Profile::High10), + (0x6E, const { bitpattern("0000_0000") }, H264Profile::High10), // High422 - (0x7A, const { bitpattern("0000_0000") }, Profile::High422), + (0x7A, const { bitpattern("0000_0000") }, H264Profile::High422), // High444Predictive - (0xF4, const { bitpattern("0000_0000") }, Profile::High444Predictive), + (0xF4, const { bitpattern("0000_0000") }, H264Profile::High444Predictive), // High10 Intra - (0x6E, const { bitpattern("0001_0000") }, Profile::High10Intra), + (0x6E, const { bitpattern("0001_0000") }, H264Profile::High10Intra), // High422 Intra - (0x7A, const { bitpattern("0001_0000") }, Profile::High422Intra), + (0x7A, const { bitpattern("0001_0000") }, H264Profile::High422Intra), // High444 Intra - (0xF4, const { bitpattern("0001_0000") }, Profile::High444Intra), + (0xF4, const { bitpattern("0001_0000") }, H264Profile::High444Intra), // CAVLC444 Intra - (0x2C, const { bitpattern("0001_0000") }, Profile::CAVLC444Intra), + (0x2C, const { bitpattern("0001_0000") }, H264Profile::CAVLC444Intra), ] }; let profile = table @@ -85,31 +76,31 @@ impl ProfileLevelId { .ok_or(ProfileLevelIdFromBytesError::UnknownProfileIdc(profile_idc))?; let level = match level_idc { - 10 => Level::Level_1_0, + 10 => H264Level::Level_1_0, 11 => { - if profile_iop & profile_iop_consts::CONSTRAINT_SET3_FLAG != 0 { - Level::Level_1_B + if profile_iop & CONSTRAINT_SET3_FLAG != 0 { + H264Level::Level_1_B } else { - Level::Level_1_1 + H264Level::Level_1_1 } } - 12 => Level::Level_1_2, - 13 => Level::Level_1_3, - 20 => Level::Level_2_0, - 21 => Level::Level_2_1, - 22 => Level::Level_2_2, - 30 => Level::Level_3_0, - 31 => Level::Level_3_1, - 32 => Level::Level_3_2, - 40 => Level::Level_4_0, - 41 => Level::Level_4_1, - 42 => Level::Level_4_2, - 50 => Level::Level_5_0, - 51 => Level::Level_5_1, - 52 => Level::Level_5_2, - 60 => Level::Level_6_0, - 61 => Level::Level_6_1, - 62 => Level::Level_6_2, + 12 => H264Level::Level_1_2, + 13 => H264Level::Level_1_3, + 20 => H264Level::Level_2_0, + 21 => H264Level::Level_2_1, + 22 => H264Level::Level_2_2, + 30 => H264Level::Level_3_0, + 31 => H264Level::Level_3_1, + 32 => H264Level::Level_3_2, + 40 => H264Level::Level_4_0, + 41 => H264Level::Level_4_1, + 42 => H264Level::Level_4_2, + 50 => H264Level::Level_5_0, + 51 => H264Level::Level_5_1, + 52 => H264Level::Level_5_2, + 60 => H264Level::Level_6_0, + 61 => H264Level::Level_6_1, + 62 => H264Level::Level_6_2, _ => return Err(ProfileLevelIdFromBytesError::UnknownLevelIdc(level_idc)), }; @@ -148,8 +139,8 @@ impl fmt::Display for ProfileLevelId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut profile_iop = self.profile.profile_iop(); - if matches!(self.level, Level::Level_1_B) { - profile_iop |= profile_iop_consts::CONSTRAINT_SET3_FLAG; + if matches!(self.level, H264Level::Level_1_B) { + profile_iop |= CONSTRAINT_SET3_FLAG; } write!( diff --git a/media-video/h264/tests/vaenc_pw_capture.rs b/media-video/h264/tests/vaenc_pw_capture.rs new file mode 100644 index 00000000..7c06fac8 --- /dev/null +++ b/media-video/h264/tests/vaenc_pw_capture.rs @@ -0,0 +1,127 @@ +use capture::wayland::{ + BitFlag, CapturedFrameBuffer, PersistMode, PipewireOptions, PixelFormat, RgbaSwizzle, + ScreenCaptureOptions, SourceType, +}; +use ezk_h264::{ + H264Level, H264Profile, + encoder::{ + backends::libva::{VaH264Encoder, VaH264EncoderConfig}, + config::{FramePattern, SliceMode}, + }, +}; +use ezk_image::ImageRef; +use libva::{ + Display, + encoder::{VaEncoderConfig, VaEncoderRateControlConfig, VaEncoderRateControlMode}, +}; +use std::{fs::OpenOptions, io::Write}; +use tokio::sync::mpsc; + +#[tokio::test] +async fn va_encode_memory() { + va_encode_memory_inner().await; +} + +async fn va_encode_memory_inner() { + env_logger::builder().is_test(true).init(); + + let mut devices = Display::enumerate_drm().unwrap(); + let device = &mut devices[0]; + + let (tx, mut rx) = mpsc::channel(8); + + let options = ScreenCaptureOptions { + show_cursor: true, + source_types: SourceType::all(), + persist_mode: PersistMode::DoNot, + restore_token: None, + pipewire: PipewireOptions { + max_framerate: 30, + pixel_formats: vec![PixelFormat::RGBA(RgbaSwizzle::BGRA)], + dma_usage: None, + }, + }; + + capture::wayland::start_screen_capture(options, move |frame| { + let buffer = match frame.buffer { + CapturedFrameBuffer::Mem(buffer) => buffer, + _ => { + panic!("Test requires DMA buffers") + } + }; + + let image = ezk_image::Image::from_buffer( + ezk_image::PixelFormat::BGRA, + buffer.memory, + None, + frame.width as usize, + frame.height as usize, + ezk_image::ColorInfo::RGB(ezk_image::RgbColorInfo { + transfer: ezk_image::ColorTransfer::Linear, + primaries: ezk_image::ColorPrimaries::BT709, + }), + ) + .unwrap(); + + tx.blocking_send(image).is_ok() + }) + .await + .unwrap(); + + let first_image = rx.recv().await.unwrap(); + + let width = first_image.width() as u32; + let height = first_image.height() as u32; + + let capabilities = + VaH264Encoder::capabilities(device, H264Profile::ConstrainedBaseline).unwrap(); + + let mut encoder = VaH264Encoder::new( + &capabilities, + VaH264EncoderConfig { + encoder: VaEncoderConfig { + max_encode_resolution: [width, height], + initial_encode_resolution: [width, height], + rate_control: VaEncoderRateControlConfig { + mode: VaEncoderRateControlMode::CBR, + window_size: 1000, + initial_qp: 24, + min_qp: 0, + max_qp: 51, + bitrate: 6_000_000, + target_percentage: 100, + }, + }, + profile: H264Profile::ConstrainedBaseline, + level: H264Level::Level_6_2, + frame_pattern: FramePattern { + intra_idr_period: 60, + intra_period: 30, + ip_period: 1, + }, + slice_mode: SliceMode::Picture, + }, + ) + .unwrap(); + + let mut file = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open("../../test.h264") + .unwrap(); + + for _ in 0..1000 { + let image = rx.recv().await.unwrap(); + + encoder.encode_frame(&image).unwrap(); + + while let Some(buf) = encoder.poll_result().unwrap() { + file.write_all(&buf).unwrap(); + } + } + + while let Some(buf) = encoder.wait_result().unwrap() { + file.write_all(&buf).unwrap(); + } +} diff --git a/media-video/h264/tests/vkenc_pw_capture.rs b/media-video/h264/tests/vkenc_pw_capture.rs new file mode 100644 index 00000000..dd235820 --- /dev/null +++ b/media-video/h264/tests/vkenc_pw_capture.rs @@ -0,0 +1,373 @@ +use capture::wayland::{ + BitFlag, CapturedDmaBufferSync, CapturedFrameBuffer, DmaPlane, DmaUsageOptions, PersistMode, + PipewireOptions, PixelFormat, RgbaSwizzle, ScreenCaptureOptions, SourceType, +}; +use ezk_h264::{ + H264Level, H264Profile, + encoder::{ + backends::vulkan::{ + VkH264Encoder, VulkanH264EncoderConfig, VulkanH264RateControlConfig, + VulkanH264RateControlMode, + }, + config::{FramePattern, Framerate, SliceMode}, + }, +}; +use ezk_image::ImageRef; +use std::{fs::OpenOptions, io::Write, time::Instant}; +use tokio::sync::mpsc; +use vulkan::{ + DrmPlane, Semaphore, + ash::vk, + encoder::{ + VulkanEncoderConfig, + input::{InputData, InputPixelFormat, InputSync, VulkanImageInput}, + }, +}; + +#[tokio::test] +async fn vk_encode_dma() { + vk_encode_dma_inner().await; +} + +async fn vk_encode_dma_inner() { + env_logger::builder().is_test(true).init(); + + let entry = unsafe { vulkan::ash::Entry::load().unwrap() }; + let instance = vulkan::Instance::create(entry, &[]).unwrap(); + let mut physical_devices: Vec = instance.physical_devices().unwrap(); + let physical_device = &mut physical_devices[0]; + + let drm_modifer: Vec = physical_device + .supported_drm_modifier(vk::Format::R8G8B8A8_UNORM) + .into_iter() + .map(|m| m.modifier) + .collect(); + + let width = 2560; + let height = 1440; + + let capabilities = VkH264Encoder::capabilities(physical_device, H264Profile::Baseline).unwrap(); + + let device = vulkan::Device::create(physical_device, &[]).unwrap(); + + let (tx, mut rx) = mpsc::channel(8); + + let options = ScreenCaptureOptions { + show_cursor: true, + source_types: SourceType::all(), + persist_mode: PersistMode::DoNot, + restore_token: None, + pipewire: PipewireOptions { + max_framerate: 30, + pixel_formats: vec![PixelFormat::RGBA(RgbaSwizzle::BGRA)], + dma_usage: Some(DmaUsageOptions { + request_sync_obj: true, + num_buffers: 16, + supported_modifier: drm_modifer, + }), + }, + }; + + let device_ = device.clone(); + capture::wayland::start_screen_capture(options, move |frame| { + let buffer = match frame.buffer { + CapturedFrameBuffer::Dma(buffer) => buffer, + _ => { + panic!("Test requires DMA buffers") + } + }; + + let mut sync = buffer.sync.map( + |CapturedDmaBufferSync { + acquire_point, + release_point, + acquire_fd, + release_fd, + }| { + ( + Some(InputSync { + semaphore: unsafe { + Semaphore::import_timeline_fd(&device_, acquire_fd).unwrap() + }, + timeline_point: Some(acquire_point), + }), + Some(InputSync { + semaphore: unsafe { + Semaphore::import_timeline_fd(&device_, release_fd).unwrap() + }, + timeline_point: Some(release_point), + }), + ) + }, + ); + + let swizzle = match frame.format { + PixelFormat::RGBA(swizzle) => swizzle, + _ => unreachable!(), + }; + + let image = unsafe { + vulkan::Image::import_dma_fd( + &device_, + frame.width, + frame.height, + buffer + .planes + .into_iter() + .map(|DmaPlane { fd, offset, stride }| DrmPlane { fd, offset, stride }) + .collect(), + buffer.modifier, + vk::Format::R8G8B8A8_UNORM, + vk::ImageUsageFlags::SAMPLED, + ) + } + .unwrap(); + + let components = match swizzle { + capture::wayland::RgbaSwizzle::RGBA => vk::ComponentMapping::default(), + capture::wayland::RgbaSwizzle::BGRA => vk::ComponentMapping { + r: vk::ComponentSwizzle::B, + g: vk::ComponentSwizzle::G, + b: vk::ComponentSwizzle::R, + a: vk::ComponentSwizzle::A, + }, + capture::wayland::RgbaSwizzle::ARGB => vk::ComponentMapping { + r: vk::ComponentSwizzle::G, + g: vk::ComponentSwizzle::B, + b: vk::ComponentSwizzle::A, + a: vk::ComponentSwizzle::R, + }, + capture::wayland::RgbaSwizzle::ABGR => vk::ComponentMapping { + r: vk::ComponentSwizzle::A, + g: vk::ComponentSwizzle::B, + b: vk::ComponentSwizzle::G, + a: vk::ComponentSwizzle::R, + }, + }; + + let view = unsafe { + vulkan::ImageView::create( + &image, + &vk::ImageViewCreateInfo::default() + .image(image.handle()) + .components(components) + .format(vk::Format::R8G8B8A8_UNORM) + .view_type(vk::ImageViewType::TYPE_2D) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }), + ) + .unwrap() + }; + + tx.blocking_send(VulkanImageInput { + view, + extent: vk::Extent2D { + width: frame.width, + height: frame.height, + }, + acquire: sync.as_mut().and_then(|(acquire, _release)| acquire.take()), + release: sync.as_mut().and_then(|(_acquire, release)| release.take()), + }) + .is_ok() + }) + .await + .unwrap(); + + let mut encoder = VkH264Encoder::new( + &device, + &capabilities, + VulkanH264EncoderConfig { + encoder: VulkanEncoderConfig { + max_encode_resolution: vk::Extent2D { width, height }, + initial_encode_resolution: vk::Extent2D { width, height }, + max_input_resolution: vk::Extent2D { width, height }, + input_as_vulkan_image: true, + input_pixel_format: InputPixelFormat::RGBA { + primaries: vulkan::encoder::input::Primaries::BT709, + }, + usage_hints: vk::VideoEncodeUsageFlagsKHR::DEFAULT, + content_hints: vk::VideoEncodeContentFlagsKHR::DEFAULT, + tuning_mode: vk::VideoEncodeTuningModeKHR::DEFAULT, + }, + profile: H264Profile::Main, + level: H264Level::Level_6_0, + frame_pattern: FramePattern { + intra_idr_period: u16::MAX, + intra_period: u16::MAX, + ip_period: 1, + }, + rate_control: VulkanH264RateControlConfig { + mode: VulkanH264RateControlMode::VariableBitrate { + average_bitrate: 500_000, + max_bitrate: 1_000_000, + }, + framerate: Some(Framerate::from_fps(240)), + min_qp: None, + max_qp: None, + }, + slice_mode: SliceMode::Picture, + }, + ) + .unwrap(); + + let mut file = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open("../../test.h264") + .unwrap(); + + for _ in 0..1000 { + let input = rx.recv().await.unwrap(); + + let start = Instant::now(); + encoder + .encode_frame(InputData::VulkanImage(input)) + .inspect_err(|e| println!("{e}")) + .unwrap(); + println!("Took: {:?}", start.elapsed()); + + while let Some((_, buf)) = encoder.poll_result().unwrap() { + println!("buf: {}", buf.len()); + + file.write_all(&buf).unwrap(); + } + } + + while let Some((_, buf)) = encoder.wait_result().unwrap() { + file.write_all(&buf).unwrap(); + } +} + +#[tokio::test] +async fn vk_encode_memory() { + vk_encode_memory_inner().await; +} + +async fn vk_encode_memory_inner() { + env_logger::builder().is_test(true).init(); + + let entry = unsafe { vulkan::ash::Entry::load().unwrap() }; + let instance = vulkan::Instance::create(entry, &[]).unwrap(); + let mut physical_devices: Vec = instance.physical_devices().unwrap(); + let physical_device = &mut physical_devices[0]; + + let (tx, mut rx) = mpsc::channel(8); + + let options = ScreenCaptureOptions { + show_cursor: true, + source_types: SourceType::all(), + persist_mode: PersistMode::DoNot, + restore_token: None, + pipewire: PipewireOptions { + max_framerate: 30, + pixel_formats: vec![PixelFormat::RGBA(RgbaSwizzle::BGRA)], + dma_usage: None, + }, + }; + + capture::wayland::start_screen_capture(options, move |frame| { + let buffer = match frame.buffer { + CapturedFrameBuffer::Mem(buffer) => buffer, + _ => { + panic!("Test requires DMA buffers") + } + }; + + println!("{:?}", frame.format); + + let image = ezk_image::Image::from_buffer( + ezk_image::PixelFormat::BGRA, + buffer.memory, + None, + frame.width as usize, + frame.height as usize, + ezk_image::ColorInfo::RGB(ezk_image::RgbColorInfo { + transfer: ezk_image::ColorTransfer::Linear, + primaries: ezk_image::ColorPrimaries::BT709, + }), + ) + .unwrap(); + + tx.blocking_send(image).is_ok() + }) + .await + .unwrap(); + + let first_image = rx.recv().await.unwrap(); + + let width = first_image.width() as u32; + let height = first_image.height() as u32; + + println!("{width}x{height}"); + + let capabilities = VkH264Encoder::capabilities(physical_device, H264Profile::Baseline).unwrap(); + let device = vulkan::Device::create(physical_device, &[]).unwrap(); + + let mut encoder = VkH264Encoder::new( + &device, + &capabilities, + VulkanH264EncoderConfig { + encoder: VulkanEncoderConfig { + max_encode_resolution: vk::Extent2D { width, height }, + initial_encode_resolution: vk::Extent2D { + width, + height: height / 2, + }, + max_input_resolution: vk::Extent2D { + width: width * 2, + height, + }, + input_as_vulkan_image: false, + input_pixel_format: InputPixelFormat::RGBA { + primaries: vulkan::encoder::input::Primaries::BT709, + }, + usage_hints: vk::VideoEncodeUsageFlagsKHR::DEFAULT, + content_hints: vk::VideoEncodeContentFlagsKHR::DEFAULT, + tuning_mode: vk::VideoEncodeTuningModeKHR::DEFAULT, + }, + profile: H264Profile::Baseline, + level: H264Level::Level_6_2, + frame_pattern: FramePattern { + intra_idr_period: 60, + intra_period: 30, + ip_period: 1, + }, + rate_control: VulkanH264RateControlConfig { + mode: VulkanH264RateControlMode::ConstantBitrate { bitrate: 6_000_000 }, + framerate: None, + min_qp: None, + max_qp: None, + }, + slice_mode: SliceMode::Picture, + }, + ) + .unwrap(); + + let mut file = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open("../../test.h264") + .unwrap(); + + for _ in 0..100 { + let image = rx.recv().await.unwrap(); + + let start = Instant::now(); + encoder.encode_frame(InputData::Image(&image)).unwrap(); + println!("Took: {:?}", start.elapsed()); + while let Some((_, buf)) = encoder.poll_result().unwrap() { + file.write_all(&buf).unwrap(); + } + } + + while let Some((_, buf)) = encoder.wait_result().unwrap() { + file.write_all(&buf).unwrap(); + } +} diff --git a/media-video/libva/Cargo.toml b/media-video/libva/Cargo.toml new file mode 100644 index 00000000..925c7223 --- /dev/null +++ b/media-video/libva/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "ezk-libva" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lints] +workspace = true + +[dependencies] +thiserror = "2" +log = "0.4" +ezk-image = { workspace = true, features = [ + "NV12", + "RGBA", + "BGRA", + "I420", + "I422", + "I444", + "RGB", + "BGR", + "I010", +] } +bitflags = "2" + +[build-dependencies] +bindgen = "0.72" +pkg-config = "0.3" diff --git a/media-video/libva/build.rs b/media-video/libva/build.rs new file mode 100644 index 00000000..c475ac90 --- /dev/null +++ b/media-video/libva/build.rs @@ -0,0 +1,37 @@ +use std::env; + +fn main() { + if cfg!(not(target_os = "linux")) { + return; + } + + let libva = pkg_config::probe_library("libva").unwrap(); + let libva_drm = pkg_config::probe_library("libva-drm").unwrap(); + + for lib in libva.libs.into_iter().chain(libva_drm.libs) { + println!("cargo:rustc-link-lib={lib}"); + } + + let mut bindgen = bindgen::Builder::default(); + + for include_path in libva + .include_paths + .into_iter() + .chain(libva_drm.include_paths) + { + bindgen = bindgen.clang_arg(format!("-I{}", include_path.to_string_lossy())); + } + + bindgen + .header("wrapper.h") + .allowlist_function("(va|VA).*") + .allowlist_type("(va|VA).*") + .allowlist_var("(va|VA).*") + .derive_partialeq(true) + .derive_eq(true) + .derive_debug(true) + .generate() + .unwrap() + .write_to_file(format!("{}/bindings.rs", env::var("OUT_DIR").unwrap())) + .unwrap(); +} diff --git a/media-video/libva/src/buffer.rs b/media-video/libva/src/buffer.rs new file mode 100644 index 00000000..8c55ec72 --- /dev/null +++ b/media-video/libva/src/buffer.rs @@ -0,0 +1,70 @@ +use std::{ffi::c_void, ptr::null_mut, sync::Arc}; + +use crate::{Handle, VaError, ffi}; + +#[must_use] +pub struct Buffer { + pub(crate) display: Arc, + pub(crate) buf_id: ffi::VABufferID, +} + +impl Buffer { + pub fn id(&self) -> ffi::VABufferID { + self.buf_id + } + + pub fn map(&mut self) -> Result, VaError> { + unsafe { + let mut mapped = null_mut(); + + VaError::try_(ffi::vaMapBuffer( + self.display.dpy, + self.buf_id, + &raw mut mapped, + ))?; + + Ok(MappedBuffer { + encoded_buffer: self, + mapped, + }) + } + } +} + +impl Drop for Buffer { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaDestroyBuffer(self.display.dpy, self.buf_id)) { + log::error!("Failed to destroy VABuffer {}, {}", self.buf_id, e) + } + } + } +} + +pub struct MappedBuffer<'a> { + encoded_buffer: &'a mut Buffer, + mapped: *mut std::ffi::c_void, +} + +impl MappedBuffer<'_> { + pub fn data(&mut self) -> *mut c_void { + self.mapped + } +} + +impl Drop for MappedBuffer<'_> { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaUnmapBuffer( + self.encoded_buffer.display.dpy, + self.encoded_buffer.buf_id, + )) { + log::error!( + "Failed to unmap VABuffer {}, {}", + self.encoded_buffer.buf_id, + e + ) + } + } + } +} diff --git a/media-video/libva/src/config.rs b/media-video/libva/src/config.rs new file mode 100644 index 00000000..0e0caa4f --- /dev/null +++ b/media-video/libva/src/config.rs @@ -0,0 +1,18 @@ +use std::sync::Arc; + +use crate::{Handle, VaError, ffi}; + +pub struct Config { + pub(crate) display: Arc, + pub(crate) config_id: ffi::VAConfigID, +} + +impl Drop for Config { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaDestroyConfig(self.display.dpy, self.config_id)) { + log::error!("Failed to destroy VAConfig: {e}"); + } + } + } +} diff --git a/media-video/libva/src/context.rs b/media-video/libva/src/context.rs new file mode 100644 index 00000000..10554089 --- /dev/null +++ b/media-video/libva/src/context.rs @@ -0,0 +1,162 @@ +use std::{ffi::c_void, ptr::null_mut, sync::Arc}; + +use crate::{Handle, Surface, VaError, buffer::Buffer, ffi}; + +pub struct Context { + pub(crate) display: Arc, + pub(crate) context_id: ffi::VAContextID, +} + +impl Context { + pub fn create_buffer_empty( + &self, + type_: ffi::VABufferType, + size: usize, + ) -> Result { + unsafe { + let mut buf_id = ffi::VA_INVALID_ID; + + VaError::try_(ffi::vaCreateBuffer( + self.display.dpy, + self.context_id, + type_, + size as _, + 1, + null_mut(), + &raw mut buf_id, + ))?; + + Ok(Buffer { + display: self.display.clone(), + buf_id, + }) + } + } + + pub fn create_buffer_with_data( + &self, + type_: ffi::VABufferType, + data: &T, + ) -> Result { + unsafe { + let mut buf_id = ffi::VA_INVALID_ID; + + VaError::try_(ffi::vaCreateBuffer( + self.display.dpy, + self.context_id, + type_, + size_of::() as _, + 1, + data as *const T as *mut c_void, + &raw mut buf_id, + ))?; + + Ok(Buffer { + display: self.display.clone(), + buf_id, + }) + } + } + + pub fn create_buffer_from_bytes( + &self, + type_: ffi::VABufferType, + bytes: &[u8], + ) -> Result { + unsafe { + let mut buf_id = ffi::VA_INVALID_ID; + + VaError::try_(ffi::vaCreateBuffer( + self.display.dpy, + self.context_id, + type_, + bytes.len() as _, + 1, + bytes.as_ptr().cast_mut().cast(), + &raw mut buf_id, + ))?; + + Ok(Buffer { + display: self.display.clone(), + buf_id, + }) + } + } + + pub fn begin_picture(&self, render_target: &Surface) -> Result, VaError> { + debug_assert!(Arc::ptr_eq(&self.display, &render_target.display)); + + unsafe { + VaError::try_(ffi::vaBeginPicture( + self.display.dpy, + self.context_id, + render_target.surface_id, + ))?; + } + + Ok(Pipeline { + context: self, + completed: false, + }) + } +} + +impl Drop for Context { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaDestroyContext(self.display.dpy, self.context_id)) + { + log::error!("Failed to destroy VAContext {}, {}", self.context_id, e) + } + } + } +} + +pub struct Pipeline<'a> { + context: &'a Context, + completed: bool, +} + +impl<'a> Pipeline<'a> { + pub fn render_picture( + &self, + buffers: impl IntoIterator, + ) -> Result<(), VaError> { + unsafe { + let buffers: Vec = buffers.into_iter().map(|b| b.buf_id).collect(); + + VaError::try_(ffi::vaRenderPicture( + self.context.display.dpy, + self.context.context_id, + buffers.as_ptr().cast_mut(), + buffers.len() as _, + )) + } + } + + pub fn end_picture(mut self) -> Result<(), VaError> { + self.completed = true; + self.end_picture_ref() + } + + fn end_picture_ref(&mut self) -> Result<(), VaError> { + unsafe { + VaError::try_(ffi::vaEndPicture( + self.context.display.dpy, + self.context.context_id, + )) + } + } +} + +impl Drop for Pipeline<'_> { + fn drop(&mut self) { + if self.completed { + return; + } + + if let Err(e) = self.end_picture_ref() { + log::error!("vaEndPicture failed: {e:?}"); + } + } +} diff --git a/media-video/libva/src/display.rs b/media-video/libva/src/display.rs new file mode 100644 index 00000000..dc7c5bbb --- /dev/null +++ b/media-video/libva/src/display.rs @@ -0,0 +1,392 @@ +use std::{ + ffi::{CStr, c_int}, + fmt, + fs::OpenOptions, + io, + mem::{MaybeUninit, zeroed}, + os::fd::AsRawFd, + path::{Path, PathBuf}, + ptr::null_mut, + sync::Arc, +}; + +use crate::{ + Config, Context, Handle, Image, Surface, VaError, + ffi::{self, vaQueryImageFormats}, +}; + +#[derive(Debug, thiserror::Error)] +pub enum DisplayOpenDrmError { + #[error("IO error {0}")] + Io(#[from] io::Error), + #[error("Call to vaGetDisplayDRM failed")] + GetDisplayDRM, + #[error("Failed to initialize the va library")] + Initialize(#[source] VaError), +} + +#[derive(Clone)] +pub struct Display { + handle: Arc, +} + +impl Display { + /// Enumerate all DRM displays + pub fn enumerate_drm() -> Result, DisplayOpenDrmError> { + let read_dir = std::fs::read_dir("/dev/dri")?; + + let mut devices = Vec::new(); + + for entry in read_dir { + let entry = entry?; + + if !entry.file_name().as_encoded_bytes().starts_with(b"renderD") { + continue; + } + + let display = Self::open_drm(entry.path())?; + + devices.push(display); + } + + devices.sort_by(|l, r| l.drm_path().cmp(r.drm_path())); + + Ok(devices) + } + + /// Open a DRM display + /// + /// Path should be something like `/dev/dri/renderD128` or `/dev/dri/renderD129` + pub fn open_drm>(path: P) -> Result { + let drm_file = OpenOptions::new() + .read(true) + .write(true) + .open(path.as_ref())?; + + unsafe { + let dpy = ffi::vaGetDisplayDRM(drm_file.as_raw_fd()); + + if dpy.is_null() { + return Err(DisplayOpenDrmError::GetDisplayDRM); + } + + let mut major = ffi::VA_MAJOR_VERSION as i32; + let mut minor = ffi::VA_MINOR_VERSION as i32; + + VaError::try_(ffi::vaInitialize(dpy, &mut major, &mut minor)) + .map_err(DisplayOpenDrmError::Initialize)?; + + // Query display attributes + let mut attributes = [ffi::VADisplayAttribute { + type_: ffi::VADisplayAttribType_VADisplayPCIID, + ..zeroed() + }]; + let mut num_attributes = attributes.len() as c_int; + + VaError::try_(ffi::vaQueryDisplayAttributes( + dpy, + attributes.as_mut_ptr(), + &raw mut num_attributes, + )) + .map_err(DisplayOpenDrmError::Initialize)?; + + let [b0, b1, b2, b3] = attributes[0].value.to_ne_bytes(); + let device_id = u16::from_ne_bytes([b0, b1]); + let vendor_id = u16::from_ne_bytes([b2, b3]); + + Ok(Self { + handle: Arc::new(Handle { + _drm_file: drm_file, + drm_path: path.as_ref().into(), + vendor_id, + device_id, + dpy, + }), + }) + } + } + + pub fn drm_path(&self) -> &PathBuf { + &self.handle.drm_path + } + + pub fn vendor_id(&self) -> u16 { + self.handle.vendor_id + } + + pub fn device_id(&self) -> u16 { + self.handle.device_id + } + + pub fn vendor(&self) -> Option<&'static CStr> { + unsafe { + let char_ptr = ffi::vaQueryVendorString(self.handle.dpy); + + if char_ptr.is_null() { + None + } else { + Some(CStr::from_ptr(char_ptr)) + } + } + } + + /// Query all available profiles + pub fn profiles(&self) -> Result, VaError> { + let mut num_profiles = unsafe { ffi::vaMaxNumProfiles(self.handle.dpy) }; + + let mut profiles: Vec = vec![0; num_profiles as usize]; + + VaError::try_(unsafe { + ffi::vaQueryConfigProfiles( + self.handle.dpy, + profiles.as_mut_ptr().cast(), + &raw mut num_profiles, + ) + })?; + + profiles.truncate(num_profiles as usize); + + Ok(profiles) + } + + /// Query all available entrypoints for the given profile + pub fn entrypoints(&self, profile: ffi::VAProfile) -> Result, VaError> { + let mut num_entrypoint = unsafe { ffi::vaMaxNumEntrypoints(self.handle.dpy) }; + + let mut entrypoints: Vec = vec![0; num_entrypoint as usize]; + + VaError::try_(unsafe { + ffi::vaQueryConfigEntrypoints( + self.handle.dpy, + profile, + entrypoints.as_mut_ptr().cast(), + &raw mut num_entrypoint, + ) + })?; + + entrypoints.truncate(num_entrypoint as usize); + + Ok(entrypoints) + } + + /// Query all supported image formats + pub fn image_formats(&self) -> Result, VaError> { + unsafe { + let mut len = ffi::vaMaxNumImageFormats(self.handle.dpy); + + let mut formats = vec![zeroed(); len as usize]; + + VaError::try_(vaQueryImageFormats( + self.handle.dpy, + formats.as_mut_ptr(), + &raw mut len, + ))?; + + formats.truncate(len as usize); + + Ok(formats) + } + } + + pub fn get_config_attributes( + &self, + profile: ffi::VAProfile, + entrypoint: ffi::VAEntrypoint, + ) -> Result, VaError> { + unsafe { + const MAX_ATTRIBUTES: usize = ffi::VAConfigAttribType_VAConfigAttribTypeMax as usize; + + let mut attrib_list: Vec = (0..MAX_ATTRIBUTES) + .map(|i| ffi::VAConfigAttrib { + type_: i as _, + value: 0, + }) + .collect(); + + VaError::try_(ffi::vaGetConfigAttributes( + self.handle.dpy, + profile, + entrypoint, + attrib_list.as_mut_ptr(), + MAX_ATTRIBUTES as _, + ))?; + + attrib_list.set_len(MAX_ATTRIBUTES); + + Ok(attrib_list) + } + } + + pub fn create_config( + &self, + profile: ffi::VAProfile, + entrypoint: ffi::VAEntrypoint, + attributes: &[ffi::VAConfigAttrib], + ) -> Result { + let mut config_id = ffi::VA_INVALID_ID; + + VaError::try_(unsafe { + ffi::vaCreateConfig( + self.handle.dpy, + profile, + entrypoint, + attributes.as_ptr().cast_mut(), + attributes.len() as _, + &raw mut config_id, + ) + })?; + + Ok(Config { + display: self.handle.clone(), + config_id, + }) + } + + pub fn query_surface_attributes( + &self, + config: &Config, + ) -> Result, VaError> { + unsafe { + let mut num = 0; + + VaError::try_(ffi::vaQuerySurfaceAttributes( + self.handle.dpy, + config.config_id, + null_mut(), + &raw mut num, + ))?; + + let mut attrib_list = Vec::with_capacity(num as usize); + + VaError::try_(ffi::vaQuerySurfaceAttributes( + self.handle.dpy, + config.config_id, + attrib_list.as_mut_ptr(), + &raw mut num, + ))?; + + attrib_list.set_len(num as usize); + + Ok(attrib_list) + } + } + + pub fn create_surfaces( + &self, + format: u32, + width: u32, + height: u32, + num: u32, + attributes: &[ffi::VASurfaceAttrib], + ) -> Result, VaError> { + unsafe { + let mut surfaces: Vec = vec![ffi::VA_INVALID_ID; num as usize]; + + VaError::try_(ffi::vaCreateSurfaces( + self.handle.dpy, + format, + width, + height, + surfaces.as_mut_ptr(), + num as _, + attributes.as_ptr().cast_mut(), + attributes.len() as _, + ))?; + + let surfaces = surfaces + .into_iter() + .map(|surface_id| Surface { + display: self.handle.clone(), + surface_id, + }) + .collect(); + + Ok(surfaces) + } + } + + pub fn create_context<'a>( + &self, + config: &Config, + picture_width: i32, + picture_height: i32, + flag: i32, + render_targets: impl IntoIterator, + ) -> Result { + unsafe { + let mut render_targets: Vec = + render_targets.into_iter().map(|c| c.surface_id).collect(); + let mut context_id = ffi::VA_INVALID_ID; + + VaError::try_(ffi::vaCreateContext( + self.handle.dpy, + config.config_id, + picture_width, + picture_height, + flag, + render_targets.as_mut_ptr(), + render_targets.len() as _, + &raw mut context_id, + ))?; + + Ok(Context { + display: self.handle.clone(), + context_id, + }) + } + } + + pub fn query_image_formats(&self) -> Result, VaError> { + unsafe { + let mut num_formats = ffi::vaMaxNumImageFormats(self.handle.dpy); + + let mut formats = Vec::with_capacity(num_formats as usize); + + VaError::try_(ffi::vaQueryImageFormats( + self.handle.dpy, + formats.as_mut_ptr(), + &raw mut num_formats, + ))?; + + formats.set_len(num_formats as usize); + + Ok(formats) + } + } + + pub fn create_image( + &self, + mut format: ffi::VAImageFormat, + width: i32, + height: i32, + ) -> Result { + unsafe { + let mut image = MaybeUninit::uninit(); + + VaError::try_(ffi::vaCreateImage( + self.handle.dpy, + &raw mut format, + width, + height, + image.as_mut_ptr(), + ))?; + + Ok(Image { + display: self.handle.clone(), + image: image.assume_init(), + }) + } + } +} + +impl fmt::Debug for Display { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Display") + .field("dpy", &self.handle.dpy) + .field("drm_path", &self.handle.drm_path) + .field("vendor_id", &self.handle.vendor_id) + .field("device_id", &self.handle.device_id) + .field("vendor", &self.vendor()) + .finish() + } +} diff --git a/media-video/libva/src/encoder.rs b/media-video/libva/src/encoder.rs new file mode 100644 index 00000000..6b5d97c8 --- /dev/null +++ b/media-video/libva/src/encoder.rs @@ -0,0 +1,695 @@ +use ezk_image::{ + ColorInfo, ColorSpace, ConvertError, Image, ImageError, ImageRef, PixelFormat, YuvColorInfo, + convert_multi_thread, +}; + +use crate::{Buffer, Context, Display, FourCC, RtFormat, Surface, VaError, ffi, map_pixel_format}; +use std::{ + collections::VecDeque, + mem::zeroed, + slice::{from_raw_parts, from_raw_parts_mut}, +}; + +#[derive(Debug, Clone, Copy)] +pub struct VaEncoderImplConfig { + pub user: VaEncoderConfig, + pub va_rt_format: RtFormat, + pub num_dpb_slots: u32, + pub num_encode_slots: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct VaEncoderConfig { + pub max_encode_resolution: [u32; 2], + pub initial_encode_resolution: [u32; 2], + pub rate_control: VaEncoderRateControlConfig, +} + +#[derive(Debug, Clone, Copy)] +pub struct VaEncoderRateControlConfig { + pub mode: VaEncoderRateControlMode, + pub window_size: u32, + pub initial_qp: u8, + pub min_qp: u8, + pub max_qp: u8, + pub bitrate: u32, + pub target_percentage: u32, +} + +bitflags::bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct VaEncoderRateControlMode: u32 { + const NONE = ffi::VA_RC_NONE; + const CBR = ffi::VA_RC_CBR; + const VBR = ffi::VA_RC_VBR; + const VCM = ffi::VA_RC_VCM; + const CQP = ffi::VA_RC_CQP; + const VBR_CONSTRAINED = ffi::VA_RC_VBR_CONSTRAINED; + const ICQ = ffi::VA_RC_ICQ; + const MB = ffi::VA_RC_MB; + const CFS = ffi::VA_RC_CFS; + const PARALLEL = ffi::VA_RC_PARALLEL; + const QVBR = ffi::VA_RC_QVBR; + const AVBR = ffi::VA_RC_AVBR; + const TCBRC = ffi::VA_RC_TCBRC; + } +} + +#[derive(Debug, thiserror::Error)] +pub enum VaEncoderCapabilitiesError { + #[error("Profile {0:?} is not supported")] + UnsupportedProfile(ffi::VAProfile), + #[error("Failed to get entrypoints for profile {0:?}")] + FailedToGetEntrypoints(#[source] VaError), + #[error("No encode entrypoint for profile {0:?}")] + UnsupportedEncodeProfile(ffi::VAProfile), + #[error("Failed to get config attributes {0}")] + FailedToGetConfigAttributes(#[source] VaError), + #[error("Failed to get image formats {0}")] + FailedToGetImageFormats(#[source] VaError), +} + +#[derive(Debug, thiserror::Error)] +pub enum VaEncoderCreateError { + #[error("Failed to create va config")] + FailedToCreateConfig(#[source] VaError), + #[error("Failed to create va surfaces")] + FailedToCreateSurfaces(#[source] VaError), + #[error("Failed to create va context")] + FailedToCreateContext(#[source] VaError), + #[error("Failed to create coded buffer")] + FailedToCreateCodedBuffer(#[source] VaError), +} + +#[derive(Debug, thiserror::Error)] +pub enum VaEncodeFrameError { + #[error("Failed to create destination image from VAImage")] + FailedToCreateDestinationImage(#[from] ImageError), + + #[error("Failed to convert/copy input image to VAImage")] + FailedToConvert(#[from] ConvertError), + + #[error(transparent)] + Va(#[from] VaError), +} + +#[derive(Debug)] +pub struct VaEncoderCapabilities { + display: Display, + + va_profile: ffi::VAProfile, + va_entrypoint: ffi::VAEntrypoint, + + support_packed_headers: bool, + support_packed_header_sequence: bool, + support_packed_header_picture: bool, + support_packed_header_slice: bool, + + // e.g. ffi::VA_RT_FORMAT_YUV420 + pub rt_formats: RtFormat, + + pub rc_modes: VaEncoderRateControlMode, + + pub max_reference_frames: u32, + + pub max_width: u32, + pub max_height: u32, + + pub max_quality_level: Option, + + // Slice structures + pub slice_structure_support_power_of_two_rows: bool, + pub slice_structure_support_arbitrary_macroblocks: bool, + pub slice_structure_support_equal_rows: bool, + pub slice_structure_support_max_slice_size: bool, + pub slice_structure_support_arbitrary_rows: bool, + pub slice_structure_support_equal_multi_rows: bool, + + pub image_formats: Vec, +} + +impl VaEncoderCapabilities { + pub fn new( + display: &Display, + va_profile: ffi::VAProfile, + ) -> Result { + type E = VaEncoderCapabilitiesError; + + let va_entrypoint = display + .entrypoints(va_profile) + .map_err(E::FailedToGetEntrypoints)? + .into_iter() + .find(|&e| { + e == ffi::VAEntrypoint_VAEntrypointEncSlice + || e == ffi::VAEntrypoint_VAEntrypointEncSliceLP + }) + .ok_or(E::UnsupportedEncodeProfile(va_profile))?; + + let attrs = display + .get_config_attributes(va_profile, va_entrypoint) + .map_err(E::FailedToGetConfigAttributes)?; + + let mut support_packed_headers = false; + let mut support_packed_header_sequence = false; + let mut support_packed_header_picture = false; + let mut support_packed_header_slice = false; + + { + let value = + attrs[ffi::VAConfigAttribType_VAConfigAttribEncPackedHeaders as usize].value; + + if value != ffi::VA_ATTRIB_NOT_SUPPORTED { + support_packed_headers = true; + support_packed_header_sequence = (value & ffi::VA_ENC_PACKED_HEADER_SEQUENCE) != 0; + support_packed_header_picture = (value & ffi::VA_ENC_PACKED_HEADER_PICTURE) != 0; + support_packed_header_slice = (value & ffi::VA_ENC_PACKED_HEADER_SLICE) != 0; + } + } + + let rc_modes = attrs[ffi::VAConfigAttribType_VAConfigAttribRateControl as usize].value; + let rt_formats = attrs[ffi::VAConfigAttribType_VAConfigAttribRTFormat as usize].value; + let max_reference_frames = + attrs[ffi::VAConfigAttribType_VAConfigAttribEncMaxRefFrames as usize].value; + let max_width = attrs[ffi::VAConfigAttribType_VAConfigAttribMaxPictureWidth as usize].value; + let max_height = + attrs[ffi::VAConfigAttribType_VAConfigAttribMaxPictureHeight as usize].value; + + let max_quality_level = { + let value = attrs[ffi::VAConfigAttribType_VAConfigAttribEncQualityRange as usize].value; + + if value == ffi::VA_ATTRIB_NOT_SUPPORTED { + None + } else { + Some(value) + } + }; + + // EncSliceStructure + let enc_slice_structures = + attrs[ffi::VAConfigAttribType_VAConfigAttribEncSliceStructure as usize].value; + + let mut slice_structure_support_power_of_two_rows = false; + let mut slice_structure_support_arbitrary_macroblocks = false; + let mut slice_structure_support_equal_rows = false; + let mut slice_structure_support_max_slice_size = false; + let mut slice_structure_support_arbitrary_rows = false; + let mut slice_structure_support_equal_multi_rows = false; + + if enc_slice_structures != ffi::VA_ATTRIB_NOT_SUPPORTED { + slice_structure_support_power_of_two_rows = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS != 0; + slice_structure_support_arbitrary_macroblocks = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS != 0; + slice_structure_support_equal_rows = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS != 0; + slice_structure_support_max_slice_size = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_MAX_SLICE_SIZE != 0; + slice_structure_support_arbitrary_rows = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS != 0; + slice_structure_support_equal_multi_rows = + enc_slice_structures & ffi::VA_ENC_SLICE_STRUCTURE_EQUAL_MULTI_ROWS != 0; + } + + let image_formats = display + .image_formats() + .map_err(E::FailedToGetConfigAttributes)? + .into_iter() + .map(|image_format| FourCC::from_bits_retain(image_format.fourcc)) + .collect(); + + Ok(VaEncoderCapabilities { + display: display.clone(), + va_profile, + va_entrypoint, + support_packed_headers, + support_packed_header_sequence, + support_packed_header_picture, + support_packed_header_slice, + rt_formats: RtFormat::from_bits_retain(rt_formats), + rc_modes: VaEncoderRateControlMode::from_bits_retain(rc_modes), + max_reference_frames, + max_width, + max_height, + max_quality_level, + slice_structure_support_power_of_two_rows, + slice_structure_support_arbitrary_macroblocks, + slice_structure_support_equal_rows, + slice_structure_support_max_slice_size, + slice_structure_support_arbitrary_rows, + slice_structure_support_equal_multi_rows, + image_formats, + }) + } + + pub fn profile(&self) -> ffi::VAProfile { + self.va_profile + } + + pub fn supported_pixel_formats(&self) -> Vec { + self.image_formats + .iter() + .copied() + .filter_map(map_pixel_format) + .collect() + } + + pub fn create_encoder( + &self, + config: VaEncoderImplConfig, + ) -> Result { + type E = VaEncoderCreateError; + + let mut config_attributes = Vec::new(); + config_attributes.push(ffi::VAConfigAttrib { + type_: ffi::VAConfigAttribType_VAConfigAttribRTFormat, + value: config.va_rt_format.bits(), + }); + + if self.support_packed_headers { + let mut value = 0; + + if self.support_packed_header_sequence { + value |= ffi::VA_ENC_PACKED_HEADER_SEQUENCE + } + + if self.support_packed_header_picture { + value |= ffi::VA_ENC_PACKED_HEADER_PICTURE + } + + if self.support_packed_header_slice { + value |= ffi::VA_ENC_PACKED_HEADER_SLICE + } + + config_attributes.push(ffi::VAConfigAttrib { + type_: ffi::VAConfigAttribType_VAConfigAttribEncPackedHeaders, + value, + }); + } + + config_attributes.push(ffi::VAConfigAttrib { + type_: ffi::VAConfigAttribType_VAConfigAttribRateControl, + value: config.user.rate_control.mode.bits(), + }); + + let va_config = self + .display + .create_config(self.va_profile, self.va_entrypoint, &config_attributes) + .map_err(E::FailedToCreateConfig)?; + + let input_surfaces = self + .display + .create_surfaces( + config.va_rt_format.bits(), + config.user.max_encode_resolution[0], + config.user.max_encode_resolution[1], + config.num_encode_slots, + &[], + ) + .map_err(E::FailedToCreateSurfaces)?; + + let reference_surfaces = self + .display + .create_surfaces( + config.va_rt_format.bits(), + config.user.max_encode_resolution[0], + config.user.max_encode_resolution[1], + config.num_dpb_slots, + &[], + ) + .map_err(E::FailedToCreateSurfaces)?; + + let context = self + .display + .create_context( + &va_config, + config.user.max_encode_resolution[0] as i32, + config.user.max_encode_resolution[1] as i32, + ffi::VA_PROGRESSIVE as _, + input_surfaces.iter().chain(reference_surfaces.iter()), + ) + .map_err(E::FailedToCreateContext)?; + + // EncCodec buffer size is estimated from the input image resolution. Currently using a higher value to ensure + // proper output even with worst case input + let output_buffer_size = (config.user.max_encode_resolution[0] as f64 + * config.user.max_encode_resolution[1] as f64 + * 1.5) as usize; + + let encode_slots = input_surfaces + .into_iter() + .map(|surface| -> Result { + let output = context.create_buffer_empty( + ffi::VABufferType_VAEncCodedBufferType, + output_buffer_size, + )?; + + Ok(VaEncodeSlot { surface, output }) + }) + .collect::, VaError>>() + .map_err(E::FailedToCreateCodedBuffer)?; + + Ok(VaEncoder { + context, + max_encode_resolution: config.user.max_encode_resolution, + current_encode_resolution: config.user.initial_encode_resolution, + support_packed_header_sequence: self.support_packed_header_sequence, + support_packed_header_picture: self.support_packed_header_picture, + support_packed_header_slice: self.support_packed_header_slice, // TODO quality level + rate_control: config.user.rate_control, + quality_level: 0, + encode_slots, + in_flight: VecDeque::new(), + dpb_slots: reference_surfaces, + output: VecDeque::new(), + }) + } +} + +pub struct VaEncoder { + context: Context, + + max_encode_resolution: [u32; 2], + current_encode_resolution: [u32; 2], + + pub support_packed_header_sequence: bool, + pub support_packed_header_picture: bool, + pub support_packed_header_slice: bool, + + rate_control: VaEncoderRateControlConfig, + quality_level: u32, + + encode_slots: Vec, + in_flight: VecDeque, + + dpb_slots: Vec, + + output: VecDeque>, +} + +pub struct VaEncodeSlot { + surface: Surface, + output: Buffer, +} + +impl VaEncodeSlot { + pub fn output_buffer(&self) -> &Buffer { + &self.output + } +} + +impl VaEncoder { + pub fn context(&self) -> &Context { + &self.context + } + + pub fn max_encode_resolution(&self) -> [u32; 2] { + self.max_encode_resolution + } + + pub fn current_encode_resolution(&self) -> [u32; 2] { + self.current_encode_resolution + } + + pub fn dpb_slot_surface(&self, dpb_slot_index: usize) -> &Surface { + &self.dpb_slots[dpb_slot_index] + } + + fn read_out_encode_slot(&mut self, encode_slot: &mut VaEncodeSlot) -> Result<(), VaError> { + let mut codec_buffer_mapped = encode_slot.output.map()?; + let mut ptr = codec_buffer_mapped.data(); + + while !ptr.is_null() { + let segment = unsafe { ptr.cast::().read() }; + ptr = segment.next; + + let buf = segment.buf.cast::().cast_const(); + let buf = unsafe { from_raw_parts(buf, segment.size as usize) }; + + self.output.push_back(buf.to_vec()); + } + + Ok(()) + } + + pub fn pop_encode_slot(&mut self) -> Result, VaError> { + if let Some(encode_slot) = self.encode_slots.pop() { + return Ok(Some(encode_slot)); + } + + let Some(mut encode_slot) = self.in_flight.pop_front() else { + return Ok(None); + }; + + encode_slot.surface.sync()?; + self.read_out_encode_slot(&mut encode_slot)?; + + Ok(Some(encode_slot)) + } + + pub fn poll_result(&mut self) -> Result>, VaError> { + if let Some(output) = self.output.pop_front() { + return Ok(Some(output)); + } + + if let Some(encode_slot) = self.in_flight.front_mut() { + let completed = encode_slot.surface.try_sync()?; + if !completed { + return Ok(None); + } + + let mut encode_slot = self.in_flight.pop_front().unwrap(); + self.read_out_encode_slot(&mut encode_slot)?; + self.encode_slots.push(encode_slot); + } + + Ok(self.output.pop_front()) + } + + pub fn wait_result(&mut self) -> Result>, VaError> { + if let Some(output) = self.output.pop_front() { + return Ok(Some(output)); + } + + if let Some(mut encode_slot) = self.in_flight.pop_front() { + encode_slot.surface.sync()?; + self.read_out_encode_slot(&mut encode_slot)?; + self.encode_slots.push(encode_slot); + } + + Ok(self.output.pop_front()) + } + + pub fn copy_image_to_encode_slot( + &mut self, + encode_slot: &mut VaEncodeSlot, + image: &dyn ImageRef, + ) -> Result<(), VaEncodeFrameError> { + let mut dst = encode_slot.surface.derive_image()?; + let dst_img = *dst.ffi(); + let dst_pixel_format = map_pixel_format(FourCC::from_bits_truncate(dst_img.format.fourcc)) + .expect("Unknown FourCC in input surface"); + + // Safety: the mapped image must live for this entire scope + unsafe { + let mut mapped = dst.map()?; + + let mut planes = vec![]; + let mut strides = vec![]; + + strides.push(dst_img.pitches[0] as usize); + planes.push(from_raw_parts_mut( + mapped.data().add(dst_img.offsets[0] as usize), + (dst_img.offsets[1] - dst_img.offsets[0]) as usize, + )); + + if dst_img.num_planes >= 2 { + let next_start = if dst_img.num_planes == 2 { + dst_img.data_size + } else { + dst_img.offsets[2] + }; + + strides.push(dst_img.pitches[1] as usize); + planes.push(from_raw_parts_mut( + mapped.data().add(dst_img.offsets[1] as usize), + (next_start - dst_img.offsets[1]) as usize, + )); + } + + if dst_img.num_planes == 3 { + strides.push(dst_img.pitches[2] as usize); + planes.push(from_raw_parts_mut( + mapped.data().add(dst_img.offsets[2] as usize), + (dst_img.data_size - dst_img.offsets[2]) as usize, + )); + } + + let dst_color = match image.color() { + ColorInfo::RGB(rgb_color_info) => YuvColorInfo { + transfer: rgb_color_info.transfer, + primaries: rgb_color_info.primaries, + space: ColorSpace::BT709, + full_range: true, + }, + ColorInfo::YUV(yuv_color_info) => yuv_color_info, + }; + + let mut dst_image = Image::from_planes( + dst_pixel_format, + planes, + Some(strides), + image.width(), + image.height(), + dst_color.into(), + )?; + + convert_multi_thread(image, &mut dst_image)?; + } + + Ok(()) + } + + pub fn create_quality_params(&self) -> Result { + unsafe { + let mut quality_params_buffer = self.context.create_buffer_empty( + ffi::VABufferType_VAEncMiscParameterBufferType, + size_of::() + + size_of::(), + )?; + let mut mapped = quality_params_buffer.map()?; + let misc_param = &mut *mapped.data().cast::(); + misc_param.type_ = ffi::VAEncMiscParameterType_VAEncMiscParameterTypeEncQuality; + + let enc_quality_params = &mut *misc_param + .data + .as_mut_ptr() + .cast::(); + + *enc_quality_params = zeroed(); + + enc_quality_params.quality_level = self.quality_level; + + drop(mapped); + + Ok(quality_params_buffer) + } + } + + pub fn create_rate_control_params(&self) -> Result { + unsafe { + // Build rate control parameter buffer + // + // Modifying the data in the buffer instead of on the stack since the + // VAEncMiscParameterBuffer and VAEncMiscParameterRateControl must be packed after another + let mut rate_control_params_buffer = self.context.create_buffer_empty( + ffi::VABufferType_VAEncMiscParameterBufferType, + size_of::() + + size_of::(), + )?; + let mut mapped = rate_control_params_buffer.map()?; + let misc_param = &mut *mapped.data().cast::(); + misc_param.type_ = ffi::VAEncMiscParameterType_VAEncMiscParameterTypeRateControl; + + let rate_control_params = &mut *misc_param + .data + .as_mut_ptr() + .cast::(); + + *rate_control_params = zeroed(); + + let VaEncoderRateControlConfig { + mode: _, + window_size, + initial_qp, + min_qp, + max_qp, + bitrate, + target_percentage, + } = self.rate_control; + + rate_control_params.window_size = window_size; + rate_control_params.initial_qp = initial_qp.into(); + rate_control_params.min_qp = min_qp.into(); + rate_control_params.max_qp = max_qp.into(); + rate_control_params.target_percentage = target_percentage; + rate_control_params.bits_per_second = bitrate; + + drop(mapped); + + Ok(rate_control_params_buffer) + } + } + + pub fn create_max_slice_size_params(&self, max_slice_size: u32) -> Result { + unsafe { + let mut quality_params_buffer = self.context.create_buffer_empty( + ffi::VABufferType_VAEncMiscParameterBufferType, + size_of::() + + size_of::(), + )?; + let mut mapped = quality_params_buffer.map()?; + let misc_param = &mut *mapped.data().cast::(); + misc_param.type_ = ffi::VAEncMiscParameterType_VAEncMiscParameterTypeMaxSliceSize; + + let enc_max_slice_size_params = &mut *misc_param + .data + .as_mut_ptr() + .cast::(); + + *enc_max_slice_size_params = zeroed(); + + enc_max_slice_size_params.max_slice_size = max_slice_size; + + drop(mapped); + + Ok(quality_params_buffer) + } + } + + pub fn create_packed_param( + &self, + type_: u32, + buf: &[u8], + bufs: &mut Vec, + ) -> Result<(), VaError> { + let params = ffi::VAEncPackedHeaderParameterBuffer { + type_, + bit_length: (buf.len() * 8) as _, + has_emulation_bytes: 0, + va_reserved: Default::default(), + }; + + let packed_header_params = self.context.create_buffer_with_data( + ffi::VABufferType_VAEncPackedHeaderParameterBufferType, + ¶ms, + )?; + + let b = self + .context + .create_buffer_from_bytes(ffi::VABufferType_VAEncPackedHeaderDataBufferType, buf)?; + + bufs.push(packed_header_params); + bufs.push(b); + + Ok(()) + } + + pub fn submit_encode_slot( + &mut self, + encode_slot: VaEncodeSlot, + encode_params: Vec, + ) -> Result<(), VaError> { + let begin_picture_result = self.context.begin_picture(&encode_slot.surface); + + self.in_flight.push_back(encode_slot); + + let pipeline = begin_picture_result?; + + pipeline.render_picture(&encode_params)?; + pipeline.end_picture()?; + + drop(encode_params); + + Ok(()) + } +} diff --git a/media-video/libva/src/image.rs b/media-video/libva/src/image.rs new file mode 100644 index 00000000..22cdfb14 --- /dev/null +++ b/media-video/libva/src/image.rs @@ -0,0 +1,66 @@ +use std::{ptr::null_mut, sync::Arc}; + +use crate::{Handle, VaError, ffi}; + +pub struct Image { + pub(crate) display: Arc, + pub(crate) image: ffi::VAImage, +} + +impl Image { + pub fn ffi(&self) -> &ffi::VAImage { + &self.image + } + + pub fn map(&mut self) -> Result, VaError> { + unsafe { + let mut mapped = null_mut(); + + VaError::try_(ffi::vaMapBuffer( + self.display.dpy, + self.image.buf, + &raw mut mapped, + ))?; + + Ok(MappedImage { + image: self, + mapped, + }) + } + } +} + +impl Drop for Image { + fn drop(&mut self) { + unsafe { + if let Err(e) = + VaError::try_(ffi::vaDestroyImage(self.display.dpy, self.image.image_id)) + { + log::error!("Failed to destroy VAImage {}, {}", self.image.image_id, e) + } + } + } +} + +pub struct MappedImage<'a> { + image: &'a mut Image, + mapped: *mut std::ffi::c_void, +} +impl MappedImage<'_> { + pub fn data(&mut self) -> *mut u8 { + self.mapped.cast() + } +} + +impl Drop for MappedImage<'_> { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaUnmapBuffer( + self.image.display.dpy, + self.image.image.buf, + )) { + log::error!("Failed to unmap VABuffer {}, {}", self.image.image.buf, e) + } + } + } +} diff --git a/media-video/libva/src/lib.rs b/media-video/libva/src/lib.rs new file mode 100644 index 00000000..4a53b66c --- /dev/null +++ b/media-video/libva/src/lib.rs @@ -0,0 +1,192 @@ +#![cfg(target_os = "linux")] + +use ezk_image::PixelFormat; +use std::{ + backtrace::{Backtrace, BacktraceStatus}, + error::Error, + ffi::{CStr, c_void}, + fmt, + fs::File, + path::PathBuf, +}; + +pub mod ffi { + #![allow(unreachable_pub, dead_code, nonstandard_style, unsafe_op_in_unsafe_fn)] + + include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +} + +mod buffer; +mod config; +mod context; +mod display; +pub mod encoder; +mod image; +mod surface; + +pub use buffer::{Buffer, MappedBuffer}; +pub use config::Config; +pub use context::Context; +pub use display::{Display, DisplayOpenDrmError}; +pub use image::{Image, MappedImage}; +pub use surface::Surface; + +struct Handle { + _drm_file: File, + drm_path: PathBuf, + vendor_id: u16, + device_id: u16, + dpy: *mut c_void, +} + +unsafe impl Send for Handle {} +unsafe impl Sync for Handle {} + +#[derive(Debug)] +pub struct VaError { + status: ffi::VAStatus, + text: Option<&'static CStr>, + backtrace: Backtrace, +} + +impl VaError { + #[track_caller] + fn try_(status: ffi::VAStatus) -> Result<(), Self> { + if status == ffi::VA_STATUS_SUCCESS as ffi::VAStatus { + Ok(()) + } else { + let error_str = unsafe { ffi::vaErrorStr(status) }; + + let text = if error_str.is_null() { + None + } else { + Some(unsafe { CStr::from_ptr(error_str) }) + }; + + let backtrace = Backtrace::capture(); + + Err(Self { + status, + text, + backtrace, + }) + } + } +} + +impl fmt::Display for VaError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(text) = self.text { + write!(f, " description={:?}", text)?; + } + + if self.backtrace.status() != BacktraceStatus::Disabled { + write!(f, " backtrace={}", self.backtrace)?; + } + + Ok(()) + } +} + +impl Error for VaError {} + +fn map_pixel_format(fourcc: FourCC) -> Option { + // Make sure to update used pixel formats in the ezk-image dependency + match fourcc.bits() { + ffi::VA_FOURCC_NV12 => Some(PixelFormat::NV12), + ffi::VA_FOURCC_RGBA => Some(PixelFormat::RGBA), + ffi::VA_FOURCC_RGBX => Some(PixelFormat::RGBA), + ffi::VA_FOURCC_BGRA => Some(PixelFormat::BGRA), + ffi::VA_FOURCC_BGRX => Some(PixelFormat::BGRA), + ffi::VA_FOURCC_I420 => Some(PixelFormat::I420), + ffi::VA_FOURCC_422H => Some(PixelFormat::I422), + ffi::VA_FOURCC_444P => Some(PixelFormat::I444), + ffi::VA_FOURCC_RGBP => Some(PixelFormat::RGB), + ffi::VA_FOURCC_BGRP => Some(PixelFormat::BGR), + ffi::VA_FOURCC_I010 => Some(PixelFormat::I010), + _ => None, + } +} + +bitflags::bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct FourCC: u32 { + const _NV12 = ffi::VA_FOURCC_NV12; + const _NV21 = ffi::VA_FOURCC_NV21; + const _AI44 = ffi::VA_FOURCC_AI44; + const _RGBA = ffi::VA_FOURCC_RGBA; + const _RGBX = ffi::VA_FOURCC_RGBX; + const _BGRA = ffi::VA_FOURCC_BGRA; + const _BGRX = ffi::VA_FOURCC_BGRX; + const _ARGB = ffi::VA_FOURCC_ARGB; + const _XRGB = ffi::VA_FOURCC_XRGB; + const _ABGR = ffi::VA_FOURCC_ABGR; + const _XBGR = ffi::VA_FOURCC_XBGR; + const _UYVY = ffi::VA_FOURCC_UYVY; + const _YUY2 = ffi::VA_FOURCC_YUY2; + const _AYUV = ffi::VA_FOURCC_AYUV; + const _NV11 = ffi::VA_FOURCC_NV11; + const _YV12 = ffi::VA_FOURCC_YV12; + const _P208 = ffi::VA_FOURCC_P208; + const _I420 = ffi::VA_FOURCC_I420; + const _YV24 = ffi::VA_FOURCC_YV24; + const _YV32 = ffi::VA_FOURCC_YV32; + const _Y800 = ffi::VA_FOURCC_Y800; + const _IMC3 = ffi::VA_FOURCC_IMC3; + const _411P = ffi::VA_FOURCC_411P; + const _411R = ffi::VA_FOURCC_411R; + const _422H = ffi::VA_FOURCC_422H; + const _422V = ffi::VA_FOURCC_422V; + const _444P = ffi::VA_FOURCC_444P; + const _RGBP = ffi::VA_FOURCC_RGBP; + const _BGRP = ffi::VA_FOURCC_BGRP; + const _RGB565 = ffi::VA_FOURCC_RGB565; + const _BGR565 = ffi::VA_FOURCC_BGR565; + const _Y210 = ffi::VA_FOURCC_Y210; + const _Y212 = ffi::VA_FOURCC_Y212; + const _Y216 = ffi::VA_FOURCC_Y216; + const _Y410 = ffi::VA_FOURCC_Y410; + const _Y412 = ffi::VA_FOURCC_Y412; + const _Y416 = ffi::VA_FOURCC_Y416; + const _YV16 = ffi::VA_FOURCC_YV16; + const _P010 = ffi::VA_FOURCC_P010; + const _P012 = ffi::VA_FOURCC_P012; + const _P016 = ffi::VA_FOURCC_P016; + const _I010 = ffi::VA_FOURCC_I010; + const _IYUV = ffi::VA_FOURCC_IYUV; + const _A2R10G10B10 = ffi::VA_FOURCC_A2R10G10B10; + const _A2B10G10R10 = ffi::VA_FOURCC_A2B10G10R10; + const _X2R10G10B10 = ffi::VA_FOURCC_X2R10G10B10; + const _X2B10G10R10 = ffi::VA_FOURCC_X2B10G10R10; + const _Y8 = ffi::VA_FOURCC_Y8; + const _Y16 = ffi::VA_FOURCC_Y16; + const _VYUY = ffi::VA_FOURCC_VYUY; + const _YVYU = ffi::VA_FOURCC_YVYU; + const _ARGB64 = ffi::VA_FOURCC_ARGB64; + const _ABGR64 = ffi::VA_FOURCC_ABGR64; + const _XYUV = ffi::VA_FOURCC_XYUV; + const _Q416 = ffi::VA_FOURCC_Q416; + } +} + +bitflags::bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct RtFormat: u32 { + const YUV420 = ffi::VA_RT_FORMAT_YUV420; + const YUV422 = ffi::VA_RT_FORMAT_YUV422; + const YUV444 = ffi::VA_RT_FORMAT_YUV444; + const YUV411 = ffi::VA_RT_FORMAT_YUV411; + const YUV400 = ffi::VA_RT_FORMAT_YUV400; + const YUV420_10 = ffi::VA_RT_FORMAT_YUV420_10; + const YUV422_10 = ffi::VA_RT_FORMAT_YUV422_10; + const YUV444_10 = ffi::VA_RT_FORMAT_YUV444_10; + const YUV420_12 = ffi::VA_RT_FORMAT_YUV420_12; + const YUV422_12 = ffi::VA_RT_FORMAT_YUV422_12; + const YUV444_12 = ffi::VA_RT_FORMAT_YUV444_12; + const RGB16 = ffi::VA_RT_FORMAT_RGB16; + const RGB32 = ffi::VA_RT_FORMAT_RGB32; + const RGBP = ffi::VA_RT_FORMAT_RGBP; + const RGB32_10 = ffi::VA_RT_FORMAT_RGB32_10; + const PROTECTED = ffi::VA_RT_FORMAT_PROTECTED; + } +} diff --git a/media-video/libva/src/surface.rs b/media-video/libva/src/surface.rs new file mode 100644 index 00000000..da72ca2b --- /dev/null +++ b/media-video/libva/src/surface.rs @@ -0,0 +1,87 @@ +use crate::{Handle, Image, VaError, ffi}; +use std::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, + sync::Arc, +}; + +pub struct Surface { + pub(crate) display: Arc, + pub(crate) surface_id: ffi::VASurfaceID, +} + +impl Surface { + pub fn id(&self) -> ffi::VASurfaceID { + self.surface_id + } + + pub fn derive_image(&mut self) -> Result, VaError> { + unsafe { + let mut image = MaybeUninit::uninit(); + + VaError::try_(ffi::vaDeriveImage( + self.display.dpy, + self.surface_id, + image.as_mut_ptr(), + ))?; + + let image = Image { + display: self.display.clone(), + image: image.assume_init(), + }; + + Ok(SurfaceImage { + _surface: self, + image, + }) + } + } + + pub fn sync(&mut self) -> Result<(), VaError> { + unsafe { VaError::try_(ffi::vaSyncSurface(self.display.dpy, self.surface_id)) } + } + + pub fn try_sync(&mut self) -> Result { + unsafe { + match VaError::try_(ffi::vaSyncSurface2(self.display.dpy, self.surface_id, 0)) { + Ok(_) => Ok(true), + Err(e) if e.status == ffi::VA_STATUS_ERROR_TIMEDOUT as ffi::VAStatus => Ok(false), + Err(e) => Err(e), + } + } + } +} + +impl Drop for Surface { + fn drop(&mut self) { + unsafe { + if let Err(e) = VaError::try_(ffi::vaDestroySurfaces( + self.display.dpy, + &raw mut self.surface_id, + 1, + )) { + log::error!("Failed to destroy VASurface {}, {}", self.surface_id, e) + } + } + } +} + +/// [`Image`] derives from a [`Surface`]. Holds a lifetime since it may not outlive the `Surface` +pub struct SurfaceImage<'a> { + _surface: &'a mut Surface, + image: Image, +} + +impl Deref for SurfaceImage<'_> { + type Target = Image; + + fn deref(&self) -> &Self::Target { + &self.image + } +} + +impl DerefMut for SurfaceImage<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.image + } +} diff --git a/media-video/libva/wrapper.h b/media-video/libva/wrapper.h new file mode 100644 index 00000000..b83243c9 --- /dev/null +++ b/media-video/libva/wrapper.h @@ -0,0 +1,3 @@ + +#include +#include \ No newline at end of file diff --git a/media-video/vulkan/Cargo.toml b/media-video/vulkan/Cargo.toml new file mode 100644 index 00000000..ba79c687 --- /dev/null +++ b/media-video/vulkan/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "ezk-vulkan" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +log = "0.4" +ash-stable = { package = "ash", version = "0.38.0" } +ash = { package = "ash", git = "https://github.com/ash-rs/ash.git", rev = "20cdb2597fcaca2aa45eb184de22221bdad8d28c" } +naga = { version = "27", features = ["wgsl-in", "spv-out"] } +thiserror = "2" +smallvec = "1" +ezk-image = { workspace = true, features = [ + "multi-thread", + "NV12", + "RGBA", + "BGRA", + "ARGB", + "ABGR", +] } + +wgpu = { version = "27", default-features = false, features = ["vulkan"] } +anyhow = "1" + +[dev-dependencies] +image = { version = "0.25", default-features = false, features = ["png"] } + +[lints] +workspace = true diff --git a/media-video/vulkan/src/buffer.rs b/media-video/vulkan/src/buffer.rs new file mode 100644 index 00000000..53525bd3 --- /dev/null +++ b/media-video/vulkan/src/buffer.rs @@ -0,0 +1,130 @@ +use std::{ + marker::PhantomData, + slice::{from_raw_parts, from_raw_parts_mut}, +}; + +use ash::vk; + +use crate::{Device, VulkanError}; + +#[derive(Debug)] +pub struct Buffer { + device: Device, + buffer: vk::Buffer, + memory: vk::DeviceMemory, + capacity: usize, + _m: PhantomData, +} + +impl Buffer { + pub unsafe fn create( + device: &Device, + create_info: &vk::BufferCreateInfo<'_>, + ) -> Result { + if !create_info + .size + .is_multiple_of(size_of::() as vk::DeviceSize) + { + return Err(VulkanError::InvalidArgument { + message: "Buffer size is not a multiple of T", + }); + } + + let capacity = create_info.size as usize / size_of::(); + + let buffer = device.ash().create_buffer(create_info, None)?; + + let memory_requirements = device.ash().get_buffer_memory_requirements(buffer); + + let output_alloc_info = vk::MemoryAllocateInfo::default() + .allocation_size(memory_requirements.size) + .memory_type_index(device.find_memory_type( + memory_requirements.memory_type_bits, + vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT, + )?); + + let memory = device.ash().allocate_memory(&output_alloc_info, None)?; + + device.ash().bind_buffer_memory(buffer, memory, 0)?; + + Ok(Self { + device: device.clone(), + buffer, + memory, + capacity, + _m: PhantomData, + }) + } + + pub unsafe fn buffer(&self) -> vk::Buffer { + self.buffer + } + + #[allow(clippy::len_without_is_empty)] + pub fn capacity(&self) -> usize { + self.capacity + } + + pub fn map(&mut self, len: usize) -> Result, VulkanError> { + if len == 0 { + return Err(VulkanError::InvalidArgument { + message: "Cannot map buffer with size 0", + }); + } + + if len > self.capacity { + return Err(VulkanError::InvalidArgument { + message: "Tried to map buffer with size larger than buffer size", + }); + } + + let ptr = unsafe { + self.device.ash().map_memory( + self.memory, + 0, + (size_of::() * len) as vk::DeviceSize, + vk::MemoryMapFlags::empty(), + )? + }; + + Ok(MappedBuffer { + buffer: self, + ptr: ptr.cast::(), + len, + }) + } +} + +impl Drop for Buffer { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_buffer(self.buffer, None); + self.device.ash().free_memory(self.memory, None); + } + } +} + +#[derive(Debug)] +pub struct MappedBuffer<'a, T> { + buffer: &'a mut Buffer, + ptr: *mut T, + len: usize, +} + +impl<'a, T> MappedBuffer<'a, T> { + pub fn data(&self) -> &'a [T] { + unsafe { from_raw_parts(self.ptr.cast(), self.len) } + } + + pub fn data_mut(&mut self) -> &'a mut [T] { + unsafe { from_raw_parts_mut(self.ptr.cast(), self.len) } + } +} + +impl Drop for MappedBuffer<'_, T> { + fn drop(&mut self) { + unsafe { + self.buffer.device.ash().unmap_memory(self.buffer.memory); + } + } +} diff --git a/media-video/vulkan/src/command_buffer.rs b/media-video/vulkan/src/command_buffer.rs new file mode 100644 index 00000000..aec10aa3 --- /dev/null +++ b/media-video/vulkan/src/command_buffer.rs @@ -0,0 +1,134 @@ +use ash::vk::{self}; +use std::sync::Arc; + +use crate::{Device, VulkanError}; + +#[derive(Debug)] +pub struct CommandBuffer { + inner: Arc, + command_buffer: vk::CommandBuffer, +} + +#[derive(Debug)] +struct Inner { + device: Device, + pool: vk::CommandPool, +} + +impl CommandBuffer { + pub fn create( + device: &Device, + queue_family_index: u32, + command_buffer_count: u32, + ) -> Result, VulkanError> { + let pool_create_info = vk::CommandPoolCreateInfo::default() + .queue_family_index(queue_family_index) + .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER); + let pool = unsafe { device.ash().create_command_pool(&pool_create_info, None)? }; + + let command_buffer_create_info = vk::CommandBufferAllocateInfo::default() + .command_buffer_count(command_buffer_count) + .command_pool(pool) + .level(vk::CommandBufferLevel::PRIMARY); + + let command_buffers = unsafe { + device + .ash() + .allocate_command_buffers(&command_buffer_create_info)? + }; + + let inner = Arc::new(Inner { + device: device.clone(), + pool, + }); + + let command_buffers = command_buffers + .into_iter() + .map(|command_buffer| CommandBuffer { + inner: inner.clone(), + command_buffer, + }) + .collect(); + + Ok(command_buffers) + } + + pub(crate) fn device(&self) -> &Device { + &self.inner.device + } + + pub unsafe fn handle(&self) -> vk::CommandBuffer { + self.command_buffer + } + + pub unsafe fn begin( + &self, + begin_info: &vk::CommandBufferBeginInfo, + ) -> Result, vk::Result> { + self.device() + .ash() + .begin_command_buffer(self.handle(), begin_info)?; + + Ok(RecordingCommandBuffer { + command_buffer: self, + ended: false, + }) + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_command_pool(self.pool, None); + } + } +} + +impl Drop for CommandBuffer { + fn drop(&mut self) { + unsafe { + self.inner + .device + .ash() + .free_command_buffers(self.inner.pool, &[self.command_buffer]); + } + } +} + +#[derive(Debug)] +pub struct RecordingCommandBuffer<'a> { + command_buffer: &'a CommandBuffer, + ended: bool, +} + +impl RecordingCommandBuffer<'_> { + pub unsafe fn command_buffer(&self) -> vk::CommandBuffer { + self.command_buffer.handle() + } + + pub fn end(mut self) -> Result<(), vk::Result> { + self.ended = true; + self.end_ref() + } + + fn end_ref(&mut self) -> Result<(), vk::Result> { + unsafe { + self.command_buffer + .device() + .ash() + .end_command_buffer(self.command_buffer()) + } + } +} + +impl Drop for RecordingCommandBuffer<'_> { + fn drop(&mut self) { + if self.ended { + return; + } + + if let Err(e) = self.end_ref() { + log::error!("Failed to end command buffer: {e}"); + } + } +} diff --git a/media-video/vulkan/src/descriptor_set.rs b/media-video/vulkan/src/descriptor_set.rs new file mode 100644 index 00000000..6ff123e0 --- /dev/null +++ b/media-video/vulkan/src/descriptor_set.rs @@ -0,0 +1,126 @@ +use crate::{Device, VulkanError}; +use ash::vk; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct DescriptorSetLayout { + inner: Arc, +} + +#[derive(Debug)] +struct DescriptorSetLayoutInner { + device: Device, + descriptor_set_layout: vk::DescriptorSetLayout, +} + +impl DescriptorSetLayout { + pub fn create( + device: &Device, + bindings: &[vk::DescriptorSetLayoutBinding<'_>], + ) -> Result { + let descriptor_set_layout = unsafe { + let create_info = vk::DescriptorSetLayoutCreateInfo::default().bindings(bindings); + + device + .ash() + .create_descriptor_set_layout(&create_info, None)? + }; + + Ok(DescriptorSetLayout { + inner: Arc::new(DescriptorSetLayoutInner { + device: device.clone(), + descriptor_set_layout, + }), + }) + } + + pub(crate) fn device(&self) -> &Device { + &self.inner.device + } + + pub(crate) unsafe fn descriptor_set_layout(&self) -> vk::DescriptorSetLayout { + self.inner.descriptor_set_layout + } +} + +impl Drop for DescriptorSetLayoutInner { + fn drop(&mut self) { + unsafe { + self.device + .ash() + .destroy_descriptor_set_layout(self.descriptor_set_layout, None); + } + } +} + +#[derive(Debug)] +pub struct DescriptorSet { + _inner: Arc, + descriptor_set: vk::DescriptorSet, +} + +#[derive(Debug)] +struct DescriptorSetInner { + layout: DescriptorSetLayout, + pool: vk::DescriptorPool, +} + +impl DescriptorSet { + pub fn create( + device: &Device, + pool_sizes: &[vk::DescriptorPoolSize], + layout: &DescriptorSetLayout, + num_sets: u32, + ) -> Result, VulkanError> { + let create_info = vk::DescriptorPoolCreateInfo::default() + .pool_sizes(pool_sizes) + .max_sets(num_sets) + .flags(vk::DescriptorPoolCreateFlags::empty()); + + let descriptor_pool = unsafe { device.ash().create_descriptor_pool(&create_info, None)? }; + + let set_layouts = vec![unsafe { layout.descriptor_set_layout() }; num_sets as usize]; + + let alloc_info = vk::DescriptorSetAllocateInfo::default() + .descriptor_pool(descriptor_pool) + .set_layouts(&set_layouts); + + let descriptor_sets = match unsafe { device.ash().allocate_descriptor_sets(&alloc_info) } { + Ok(descriptor_sets) => descriptor_sets, + Err(e) => { + unsafe { device.ash().destroy_descriptor_pool(descriptor_pool, None) }; + + return Err(VulkanError::from(e)); + } + }; + + let inner = Arc::new(DescriptorSetInner { + layout: layout.clone(), + pool: descriptor_pool, + }); + + Ok(descriptor_sets + .into_iter() + .map(|descriptor_set| DescriptorSet { + _inner: inner.clone(), + descriptor_set, + }) + .collect()) + } + + pub unsafe fn handle(&self) -> vk::DescriptorSet { + self.descriptor_set + } +} + +impl Drop for DescriptorSetInner { + fn drop(&mut self) { + unsafe { + self.layout + .inner + .device + .ash() + .destroy_descriptor_pool(self.pool, None); + } + } +} diff --git a/media-video/vulkan/src/device.rs b/media-video/vulkan/src/device.rs new file mode 100644 index 00000000..a2533060 --- /dev/null +++ b/media-video/vulkan/src/device.rs @@ -0,0 +1,516 @@ +use super::Instance; +use crate::{PhysicalDevice, VulkanError}; +use anyhow::Context; +use ash::{ + khr::{video_encode_queue, video_queue}, + vk::{self, Handle, TaggedStructure}, +}; +use ash_stable::vk::Handle as _; +use std::{ffi::CStr, fmt, mem::transmute, sync::Arc}; + +#[derive(Clone)] +pub struct Device { + inner: Arc, +} + +struct Inner { + instance: Instance, + physical_device: PhysicalDevice, + physical_device_memory_properties: vk::PhysicalDeviceMemoryProperties, + + device: ash::Device, + + device_extensions: DeviceVideoExtensions, + + video_queue_device: video_queue::Device, + video_encode_queue_device: video_encode_queue::Device, + + graphics_queue_family_index: u32, + encode_queue_family_index: u32, + + graphics_queue: vk::Queue, + encode_queue: vk::Queue, +} + +impl fmt::Debug for Device { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Device") + .field(&self.inner.device.handle()) + .finish() + } +} + +/// All relevant extensions for video encoding +#[derive(Debug)] +pub struct DeviceVideoExtensions { + pub video_queue: bool, + pub video_encode_queue: bool, + pub video_encode_h264: bool, + pub video_encode_h265: bool, + pub video_encode_av1: bool, + pub video_decode_queue: bool, + pub video_decode_h264: bool, + pub video_decode_h265: bool, + pub video_decode_av1: bool, + pub external_memory_fd: bool, + pub external_memory_dma_buf: bool, + pub image_drm_format_modifier: bool, + pub timeline_semaphore: bool, + pub external_semaphore_fd: bool, + pub queue_family_foreign: bool, +} + +impl Device { + /// Create a new device from a WGPU Instance + /// + /// This will let WGPU create the vulkan device, making sure that everything is setup just as wgpu wants it, + /// any extensions and features for video will be added to WGPU's device creation process. + pub fn create_wgpu( + instance: &wgpu::Instance, + adapter: &wgpu::Adapter, + ) -> Result<(Device, wgpu::Device, wgpu::Queue), anyhow::Error> { + unsafe { + let vk_adapter = adapter.as_hal::().unwrap(); + + // Query all available device extensions + let props = vk_adapter + .shared_instance() + .raw_instance() + .enumerate_device_extension_properties(vk_adapter.raw_physical_device())?; + + let mut extensions = vec![]; + + // Add all desired device extensions if they are available + let device_extensions = DeviceVideoExtensions { + video_queue: add2(&props, ash::khr::video_queue::NAME, &mut extensions), + video_encode_queue: add2( + &props, + ash::khr::video_encode_queue::NAME, + &mut extensions, + ), + video_encode_h264: add2(&props, ash::khr::video_encode_h264::NAME, &mut extensions), + video_encode_h265: add2(&props, ash::khr::video_encode_h265::NAME, &mut extensions), + video_encode_av1: add2(&props, ash::khr::video_encode_av1::NAME, &mut extensions), + video_decode_queue: add2( + &props, + ash::khr::video_decode_queue::NAME, + &mut extensions, + ), + video_decode_h264: add2(&props, ash::khr::video_decode_h264::NAME, &mut extensions), + video_decode_h265: add2(&props, ash::khr::video_decode_h265::NAME, &mut extensions), + video_decode_av1: add2(&props, ash::khr::video_decode_av1::NAME, &mut extensions), + external_memory_fd: add2( + &props, + ash::khr::external_memory_fd::NAME, + &mut extensions, + ), + external_memory_dma_buf: add2( + &props, + ash::ext::external_memory_dma_buf::NAME, + &mut extensions, + ), + image_drm_format_modifier: add2( + &props, + ash::ext::image_drm_format_modifier::NAME, + &mut extensions, + ), + timeline_semaphore: add2( + &props, + ash::khr::timeline_semaphore::NAME, + &mut extensions, + ), + external_semaphore_fd: add2( + &props, + ash::khr::external_semaphore_fd::NAME, + &mut extensions, + ), + queue_family_foreign: add2( + &props, + ash::ext::queue_family_foreign::NAME, + &mut extensions, + ), + }; + + // Query all available queues families + let queue_family_properties = vk_adapter + .shared_instance() + .raw_instance() + .get_physical_device_queue_family_properties(vk_adapter.raw_physical_device()); + + let mut separate_encode_queue_family_index = None; + + let mut encode_av1_feature = vk::PhysicalDeviceVideoEncodeAV1FeaturesKHR::default() + .video_encode_av1(device_extensions.video_encode_av1); + + // Always enabling these features since they are always required + let mut synchronization2_features = + ash_stable::vk::PhysicalDeviceSynchronization2Features::default() + .synchronization2(true); + + // FIXME: Cannot use push_next here since these are from two different ash versions + synchronization2_features.p_next = (&raw mut encode_av1_feature).cast(); + + let device = vk_adapter + .open_with_callback( + adapter.features(), + &wgpu::MemoryHints::default(), + Some(Box::new(|args| { + // Add all desired extensions + args.extensions.extend(extensions); + + // Add all required features + *args.create_info = + args.create_info.push_next(&mut synchronization2_features); + + // Find the encode queue and request it + // TODO: currently forcing a different queue for encode operations + let graphics_queue_family_index = + args.queue_create_infos[0].queue_family_index; + let encode_queue_family_index = queue_family_properties + .iter() + .enumerate() + .position(|(i, properties)| { + i as u32 != graphics_queue_family_index + && properties + .queue_flags + .contains(ash_stable::vk::QueueFlags::VIDEO_ENCODE_KHR) + }); + + // If there's a (separate) encode queue, request that + if let Some(index) = encode_queue_family_index { + separate_encode_queue_family_index = Some(index as u32); + + args.queue_create_infos.push( + ash_stable::vk::DeviceQueueCreateInfo::default() + .queue_family_index(index as u32) + .queue_priorities(&[1.0]), + ); + } + })), + ) + .context("Failed to open WGPU device")?; + + let graphics_queue_family_index = device.device.queue_family_index(); + let encode_queue_family_index = separate_encode_queue_family_index + .context("Device does not have a encode queue")?; + + let (wgpu_device, wgpu_queue) = adapter + .create_device_from_hal( + device, + &wgpu::DeviceDescriptor { + label: None, + required_features: adapter.features(), + required_limits: adapter.limits(), + experimental_features: wgpu::ExperimentalFeatures::disabled(), + memory_hints: wgpu::MemoryHints::default(), + trace: wgpu::Trace::default(), + }, + ) + .context("Failed to create wgpu Device & Queue pair from hal device")?; + + let vk_device = wgpu_device + .as_hal::() + .expect("Just created a vulkan device"); + + let vk_physical_device = vk_device.raw_physical_device(); + + let instance = Instance::from_wgpu(instance.clone()); + + let vk_device = ash::Device::load( + instance.ash().fp_v1_0(), + vk::Device::from_raw(vk_device.raw_device().handle().as_raw()), + ); + let graphics_queue = vk_device.get_device_queue(graphics_queue_family_index, 0); + let encode_queue = vk_device.get_device_queue(encode_queue_family_index, 0); + + let video_queue_device = + ash::khr::video_queue::Device::load(instance.ash(), &vk_device); + + let video_encode_queue_device = + video_encode_queue::Device::load(instance.ash(), &vk_device); + + let physical_device_memory_properties = vk_adapter + .shared_instance() + .raw_instance() + .get_physical_device_memory_properties(vk_adapter.raw_physical_device()); + + let physical_device = PhysicalDevice::new( + instance.clone(), + vk::PhysicalDevice::from_raw(vk_physical_device.as_raw()), + ); + + let device = Device { + inner: Arc::new(Inner { + instance, + physical_device, + physical_device_memory_properties: transmute::< + ash_stable::vk::PhysicalDeviceMemoryProperties, + ash::vk::PhysicalDeviceMemoryProperties, + >( + physical_device_memory_properties + ), + device: vk_device, + device_extensions, + video_queue_device, + video_encode_queue_device, + graphics_queue_family_index, + encode_queue_family_index, + graphics_queue, + encode_queue, + }), + }; + + Ok((device, wgpu_device, wgpu_queue)) + } + } + + pub fn create( + physical_device: &PhysicalDevice, + additional_extensions: &[&'static CStr], + ) -> Result { + let instance = physical_device.instance(); + + // Set up queues + let queue_family_properties = physical_device.queue_family_properties(); + + let graphics_queue_family_index = queue_family_properties + .iter() + .position(|properties| { + properties.queue_flags.contains( + vk::QueueFlags::GRAPHICS | vk::QueueFlags::COMPUTE | vk::QueueFlags::TRANSFER, + ) + }) + .unwrap() as u32; + + let encode_queue_family_index = queue_family_properties + .iter() + .enumerate() + .position(|(i, properties)| { + i as u32 != graphics_queue_family_index + && properties + .queue_flags + .contains(vk::QueueFlags::VIDEO_ENCODE_KHR) + }) + .unwrap() as u32; + + // Set up extensions + let props = unsafe { + instance + .ash() + .enumerate_device_extension_properties(physical_device.handle())? + }; + + let mut extensions = vec![]; + + let device_extensions = DeviceVideoExtensions { + video_queue: add(&props, ash::khr::video_queue::NAME, &mut extensions), + video_encode_queue: add(&props, ash::khr::video_encode_queue::NAME, &mut extensions), + video_encode_h264: add(&props, ash::khr::video_encode_h264::NAME, &mut extensions), + video_encode_h265: add(&props, ash::khr::video_encode_h265::NAME, &mut extensions), + video_encode_av1: add(&props, ash::khr::video_encode_av1::NAME, &mut extensions), + video_decode_queue: add(&props, ash::khr::video_decode_queue::NAME, &mut extensions), + video_decode_h264: add(&props, ash::khr::video_decode_h264::NAME, &mut extensions), + video_decode_h265: add(&props, ash::khr::video_decode_h265::NAME, &mut extensions), + video_decode_av1: add(&props, ash::khr::video_decode_av1::NAME, &mut extensions), + external_memory_fd: add(&props, ash::khr::external_memory_fd::NAME, &mut extensions), + external_memory_dma_buf: add( + &props, + ash::ext::external_memory_dma_buf::NAME, + &mut extensions, + ), + image_drm_format_modifier: add( + &props, + ash::ext::image_drm_format_modifier::NAME, + &mut extensions, + ), + timeline_semaphore: add(&props, ash::khr::timeline_semaphore::NAME, &mut extensions), + external_semaphore_fd: add( + &props, + ash::khr::external_semaphore_fd::NAME, + &mut extensions, + ), + queue_family_foreign: add( + &props, + ash::ext::queue_family_foreign::NAME, + &mut extensions, + ), + }; + + for extension in additional_extensions { + add(&props, extension, &mut extensions); + } + + // Always enabling these features since they are always required + let mut synchronization2_features = + vk::PhysicalDeviceSynchronization2Features::default().synchronization2(true); + let mut timeline_sempahore_feature = + vk::PhysicalDeviceTimelineSemaphoreFeatures::default().timeline_semaphore(true); + + let mut encode_av1_feature = vk::PhysicalDeviceVideoEncodeAV1FeaturesKHR::default() + .video_encode_av1(device_extensions.video_encode_av1); + + // Currently always creating two queues + let queue_create_infos = [ + vk::DeviceQueueCreateInfo::default() + .queue_family_index(graphics_queue_family_index) + .queue_priorities(&[1.0]), + vk::DeviceQueueCreateInfo::default() + .queue_family_index(encode_queue_family_index) + .queue_priorities(&[1.0]), + ]; + + let create_device_info = vk::DeviceCreateInfo::default() + .enabled_extension_names(&extensions) + .queue_create_infos(&queue_create_infos) + .push(&mut synchronization2_features) + .push(&mut timeline_sempahore_feature) + .push(&mut encode_av1_feature); + + let device = unsafe { + instance + .ash() + .create_device(physical_device.handle(), &create_device_info, None)? + }; + + let video_queue_device = ash::khr::video_queue::Device::load(instance.ash(), &device); + let video_encode_queue_device = video_encode_queue::Device::load(instance.ash(), &device); + + let physical_device_memory_properties = unsafe { + instance + .ash() + .get_physical_device_memory_properties(physical_device.handle()) + }; + + let (graphics_queue, encode_queue) = unsafe { + ( + device.get_device_queue(graphics_queue_family_index, 0), + device.get_device_queue(encode_queue_family_index, 0), + ) + }; + + Ok(Device { + inner: Arc::new(Inner { + instance: instance.clone(), + physical_device: physical_device.clone(), + physical_device_memory_properties, + device, + device_extensions, + video_queue_device, + video_encode_queue_device, + graphics_queue_family_index, + encode_queue_family_index, + graphics_queue, + encode_queue, + }), + }) + } + + pub(crate) fn find_memory_type( + &self, + memory_type_bits: u32, + properties: vk::MemoryPropertyFlags, + ) -> Result { + for (i, memory_type) in self + .inner + .physical_device_memory_properties + .memory_types + .iter() + .enumerate() + { + let type_supported = (memory_type_bits & (1 << i)) != 0; + let has_properties = memory_type.property_flags.contains(properties); + if type_supported && has_properties { + return Ok(i as u32); + } + } + + Err(VulkanError::CannotFindMemoryType { + memory_type_bits, + properties, + }) + } + + pub fn instance(&self) -> &Instance { + &self.inner.instance + } + + pub fn physical_device(&self) -> &PhysicalDevice { + &self.inner.physical_device + } + + pub fn ash(&self) -> &ash::Device { + &self.inner.device + } + + pub fn ash_video_queue_device(&self) -> &video_queue::Device { + &self.inner.video_queue_device + } + + pub fn ash_video_encode_queue_device(&self) -> &video_encode_queue::Device { + &self.inner.video_encode_queue_device + } + + pub fn graphics_queue_family_index(&self) -> u32 { + self.inner.graphics_queue_family_index + } + + pub fn encode_queue_family_index(&self) -> u32 { + self.inner.encode_queue_family_index + } + + pub fn graphics_queue(&self) -> vk::Queue { + self.inner.graphics_queue + } + + pub fn encode_queue(&self) -> vk::Queue { + self.inner.encode_queue + } + + pub fn enabled_extensions(&self) -> &DeviceVideoExtensions { + &self.inner.device_extensions + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + if let Err(e) = self.device.device_wait_idle() { + log::warn!("device_wait_idle failed: {e:?}"); + } + + self.device.destroy_device(None); + } + } +} + +fn add( + properties: &[vk::ExtensionProperties], + extension: &'static CStr, + extensions: &mut Vec<*const i8>, +) -> bool { + let is_supported = properties + .iter() + .any(|x| unsafe { CStr::from_ptr(x.extension_name.as_ptr()) } == extension); + + if is_supported { + extensions.push(extension.as_ptr()); + } + + is_supported +} + +fn add2( + properties: &[ash_stable::vk::ExtensionProperties], + extension: &'static CStr, + extensions: &mut Vec<&'static CStr>, +) -> bool { + let is_supported = properties + .iter() + .any(|x| unsafe { CStr::from_ptr(x.extension_name.as_ptr()) } == extension); + + if is_supported { + extensions.push(extension); + } + + is_supported +} diff --git a/media-video/vulkan/src/dpb.rs b/media-video/vulkan/src/dpb.rs new file mode 100644 index 00000000..68cb173e --- /dev/null +++ b/media-video/vulkan/src/dpb.rs @@ -0,0 +1,127 @@ +use crate::{Device, Image, ImageView, VulkanError}; +use ash::vk::{self, TaggedStructure}; + +pub(crate) fn create_dpb( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + num_slots: u32, + extent: vk::Extent2D, + usage: vk::ImageUsageFlags, + separate_images: bool, +) -> Result, VulkanError> { + if separate_images { + create_dpb_separate_images(device, video_profile_info, num_slots, extent, usage) + } else { + create_dpb_layers(device, video_profile_info, num_slots, extent, usage) + } +} + +fn create_dpb_layers( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + num_slots: u32, + extent: vk::Extent2D, + usage: vk::ImageUsageFlags, +) -> Result, VulkanError> { + let mut video_profile_list_info = + vk::VideoProfileListInfoKHR::default().profiles(std::slice::from_ref(video_profile_info)); + let input_image_info = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .extent(vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(num_slots) + .tiling(vk::ImageTiling::OPTIMAL) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(usage) + .push(&mut video_profile_list_info); + + let image = unsafe { Image::create(device, &input_image_info)? }; + + let mut slots = Vec::with_capacity(num_slots as usize); + + for array_layer in 0..num_slots { + let mut view_usage_create_info = vk::ImageViewUsageCreateInfo::default().usage(usage); + + let create_info = vk::ImageViewCreateInfo::default() + .image(unsafe { image.handle() }) + .view_type(vk::ImageViewType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .components(vk::ComponentMapping::default()) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: array_layer, + layer_count: 1, + }) + .push(&mut view_usage_create_info); + + let image_view = unsafe { ImageView::create(&image, &create_info)? }; + + slots.push(image_view) + } + + Ok(slots) +} + +fn create_dpb_separate_images( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + num_slots: u32, + extent: vk::Extent2D, + usage: vk::ImageUsageFlags, +) -> Result, VulkanError> { + let mut slots = Vec::with_capacity(num_slots as usize); + + for _ in 0..num_slots { + let mut video_profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(video_profile_info)); + let input_image_info = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .extent(vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(1) + .tiling(vk::ImageTiling::OPTIMAL) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(usage) + .push(&mut video_profile_list_info); + + let image = unsafe { Image::create(device, &input_image_info)? }; + + let mut view_usage_create_info = vk::ImageViewUsageCreateInfo::default().usage(usage); + + let create_info = vk::ImageViewCreateInfo::default() + .image(unsafe { image.handle() }) + .view_type(vk::ImageViewType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .components(vk::ComponentMapping::default()) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }) + .push(&mut view_usage_create_info); + + let image_view = unsafe { ImageView::create(&image, &create_info)? }; + + slots.push(image_view) + } + + Ok(slots) +} diff --git a/media-video/vulkan/src/encoder/capabilities.rs b/media-video/vulkan/src/encoder/capabilities.rs new file mode 100644 index 00000000..ac13a440 --- /dev/null +++ b/media-video/vulkan/src/encoder/capabilities.rs @@ -0,0 +1,269 @@ +use crate::{ + Buffer, CommandBuffer, Device, Fence, PhysicalDevice, Semaphore, VideoFeedbackQueryPool, + VideoSession, VideoSessionParameters, VulkanError, create_dpb, + encoder::{ + DpbSlot, Input, RateControlInfos, VulkanEncodeSlot, VulkanEncodeSlotSeparateQueueData, + VulkanEncoder, VulkanEncoderImplConfig, VulkanEncoderSeparateQueueData, + codec::VulkanEncCodec, + }, + image::ImageMemoryBarrier, +}; +use ash::vk::{self, TaggedStructure as _}; +use std::{collections::VecDeque, mem::zeroed, pin::Pin, time::Instant}; + +#[derive(Debug, thiserror::Error)] +pub enum VulkanEncoderCapabilitiesError { + #[error("Failed to find a transfer | compute | graphics queue")] + FailedToFindMainQueue, + #[error("Failed to find a encode queue")] + FailedToFindEncodeQueue, + #[error(transparent)] + VideoCapabilities(VulkanError), +} + +#[derive(Debug, Clone)] +pub struct VulkanEncoderCapabilities { + pub physical_device: PhysicalDevice, + pub video: vk::VideoCapabilitiesKHR<'static>, + pub encode: vk::VideoEncodeCapabilitiesKHR<'static>, + pub codec: C::Capabilities<'static>, +} + +impl VulkanEncoderCapabilities { + pub fn new( + physical_device: &PhysicalDevice, + codec_profile_info: C::ProfileInfo<'_>, + ) -> Result, VulkanEncoderCapabilitiesError> { + let video_profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(C::ENCODE_OPERATION) + .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420); + + let mut tmp = video_profile_info; + tmp.p_next = (&raw const codec_profile_info).cast(); + + let (video, encode, codec) = physical_device + .video_capabilities::(tmp) + .map_err(|e| VulkanEncoderCapabilitiesError::VideoCapabilities(e.into()))?; + + Ok(VulkanEncoderCapabilities { + physical_device: physical_device.clone(), + video, + encode, + codec, + }) + } + + pub fn create_encoder( + &self, + device: &Device, + config: VulkanEncoderImplConfig, + codec_profile_info: C::ProfileInfo<'_>, + session: C::SessionCreateInfo<'_>, + parameters: C::ParametersCreateInfo<'_>, + rate_control: Option>>>, + ) -> Result, VulkanError> { + let mut video_encode_usage_info = vk::VideoEncodeUsageInfoKHR::default() + .video_usage_hints(config.user.usage_hints) + .video_content_hints(config.user.content_hints) + .tuning_mode(config.user.tuning_mode); + + let mut video_profile_info = vk::VideoProfileInfoKHR::default() + .video_codec_operation(C::ENCODE_OPERATION) + .chroma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .luma_bit_depth(vk::VideoComponentBitDepthFlagsKHR::TYPE_8) + .chroma_subsampling(vk::VideoChromaSubsamplingFlagsKHR::TYPE_420); + + video_encode_usage_info.p_next = (&raw const codec_profile_info).cast(); + video_profile_info.p_next = (&raw const video_encode_usage_info).cast(); + + let graphics_queue_family_index = device.graphics_queue_family_index(); + let encode_queue_family_index = device.encode_queue_family_index(); + + let graphics_queue = device.graphics_queue(); + let encode_queue = device.encode_queue(); + + // Create video session + let mut create_info = vk::VideoSessionCreateInfoKHR::default() + .max_coded_extent(config.user.max_encode_resolution) + .queue_family_index(encode_queue_family_index) + .max_active_reference_pictures(config.max_active_references) + .max_dpb_slots(config.num_dpb_slots) + .picture_format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .reference_picture_format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .video_profile(&video_profile_info) + .std_header_version(&self.video.std_header_version); + + create_info.p_next = (&raw const session).cast(); + + let video_session = unsafe { VideoSession::create(device, &create_info)? }; + + // Create video session parameters + let video_session_parameters = + VideoSessionParameters::create::(&video_session, ¶meters)?; + + // Create command buffers + let mut command_buffers = + CommandBuffer::create(device, graphics_queue_family_index, config.num_encode_slots)?; + + let mut separate_encode_command_buffers = + if graphics_queue_family_index == encode_queue_family_index { + None + } else { + Some(CommandBuffer::create( + device, + encode_queue_family_index, + config.num_encode_slots, + )?) + }; + + let mut inputs = Input::create( + device, + &video_profile_info, + config.user.input_as_vulkan_image, + config.user.input_pixel_format, + config.user.max_input_resolution, + config.user.max_encode_resolution, + config.num_encode_slots, + )?; + + let output_buffer_size: u64 = (config.user.max_encode_resolution.width as u64 + * config.user.max_encode_resolution.height as u64) + .next_multiple_of(self.video.min_bitstream_buffer_size_alignment); + let mut encode_slots = vec![]; + + for index in 0..config.num_encode_slots { + let output_buffer = { + let mut video_profile_list_info = vk::VideoProfileListInfoKHR::default() + .profiles(std::slice::from_ref(&video_profile_info)); + + let create_info = vk::BufferCreateInfo::default() + .size(output_buffer_size) + .usage( + vk::BufferUsageFlags::VIDEO_ENCODE_DST_KHR + | vk::BufferUsageFlags::TRANSFER_SRC, + ) + .push(&mut video_profile_list_info); + + unsafe { Buffer::create(device, &create_info)? } + }; + + let separate_queue_data = match &mut separate_encode_command_buffers { + Some(separate_encode_command_buffers) => { + let command_buffer = separate_encode_command_buffers.pop().unwrap(); + let semaphore = Semaphore::create(device)?; + + Some(VulkanEncodeSlotSeparateQueueData { + semaphore, + command_buffer, + }) + } + None => None, + }; + + let completion_fence = Fence::create(device)?; + + encode_slots.push(VulkanEncodeSlot { + index, + emit_parameters: false, + // Fake placeholder value + submitted_at: Instant::now(), + input: inputs.pop().unwrap(), + output_buffer, + command_buffer: command_buffers.pop().unwrap(), + separate_queue_data, + completion_fence, + }); + } + + let dpb_views = create_dpb( + device, + &video_profile_info, + config.num_dpb_slots, + config.user.max_encode_resolution, + vk::ImageUsageFlags::VIDEO_ENCODE_DPB_KHR, + self.video + .flags + .contains(vk::VideoCapabilityFlagsKHR::SEPARATE_REFERENCE_IMAGES), + )?; + + let dpb_slots: Vec> = dpb_views + .into_iter() + .map(|image_view| DpbSlot { + image_view, + std_reference_info: unsafe { zeroed() }, + }) + .collect(); + + let encode_slot = &mut encode_slots[0]; + + // Prepare layouts + unsafe { + let fence = Fence::create(device)?; + + let command_buffer = match &mut encode_slot.separate_queue_data { + Some(separate_queue_data) => &mut separate_queue_data.command_buffer, + None => &mut encode_slot.command_buffer, + }; + + let recording = command_buffer.begin(&vk::CommandBufferBeginInfo::default())?; + + // Transition all dpb slots to the correct layout + for dpb_slot in &dpb_slots { + dpb_slot.image_view.image().cmd_memory_barrier( + &recording, + ImageMemoryBarrier::dst( + vk::ImageLayout::VIDEO_ENCODE_DPB_KHR, + vk::PipelineStageFlags2::VIDEO_ENCODE_KHR, + vk::AccessFlags2::VIDEO_ENCODE_WRITE_KHR, + ), + dpb_slot.image_view.subresource_range().base_array_layer, + ); + } + + recording.end()?; + + let command_buffers = [command_buffer.handle()]; + let submit_info = vk::SubmitInfo::default().command_buffers(&command_buffers); + + device + .ash() + .queue_submit(encode_queue, &[submit_info], fence.handle())?; + + fence.wait(u64::MAX)?; + }; + + let video_feedback_query_pool = + VideoFeedbackQueryPool::create(device, config.num_encode_slots, &video_profile_info)?; + + let separate_queue_data = if graphics_queue_family_index == encode_queue_family_index { + None + } else { + Some(VulkanEncoderSeparateQueueData { + encode_queue_family_index, + encode_queue, + }) + }; + + Ok(VulkanEncoder { + max_input_extent: config.user.max_input_resolution, + max_encode_extent: config.user.max_encode_resolution, + current_encode_extent: config.user.initial_encode_resolution, + output_buffer_size, + video_session, + video_session_parameters, + video_session_is_uninitialized: true, + video_feedback_query_pool, + graphics_queue_family_index, + graphics_queue, + separate_queue_data, + current_rc: None, + next_rc: rate_control, + encode_slots, + in_flight: VecDeque::new(), + dpb_slots, + output: VecDeque::new(), + }) + } +} diff --git a/media-video/vulkan/src/encoder/codec.rs b/media-video/vulkan/src/encoder/codec.rs new file mode 100644 index 00000000..97baaec3 --- /dev/null +++ b/media-video/vulkan/src/encoder/codec.rs @@ -0,0 +1,177 @@ +use crate::{VideoSessionParameters, VulkanError}; +use ash::vk; +use std::{ffi::CStr, fmt}; + +pub trait VulkanEncCodec: Copy { + const ENCODE_OPERATION: vk::VideoCodecOperationFlagsKHR; + const EXTENSION: &'static CStr; + + type ProfileInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + type Capabilities<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + Default + + fmt::Debug + + Copy; + + type SessionCreateInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + + type ParametersCreateInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + + type StdReferenceInfo: fmt::Debug + Copy; + type DpbSlotInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + + fn slot_info_from_std(std_reference_info: &Self::StdReferenceInfo) -> Self::DpbSlotInfo<'_>; + + type PictureInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + + type RateControlInfo<'a>: vk::Extends> + + vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + type RateControlLayerInfo<'a>: fmt::Debug + + vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; + + #[allow(private_interfaces)] + fn get_encoded_video_session_parameters( + video_session_parameters: &VideoSessionParameters, + ) -> Result, VulkanError>; +} + +pub trait VulkanEncCodecUpdate: VulkanEncCodec { + type ParametersAddInfo<'a>: vk::Extends> + + vk::TaggedStructure<'a> + + fmt::Debug + + Copy; +} + +#[derive(Debug, Clone, Copy)] +pub struct H264; + +impl VulkanEncCodec for H264 { + const ENCODE_OPERATION: vk::VideoCodecOperationFlagsKHR = + vk::VideoCodecOperationFlagsKHR::ENCODE_H264; + const EXTENSION: &'static CStr = ash::khr::video_encode_h264::NAME; + type ProfileInfo<'a> = vk::VideoEncodeH264ProfileInfoKHR<'a>; + type Capabilities<'a> = vk::VideoEncodeH264CapabilitiesKHR<'a>; + type SessionCreateInfo<'a> = vk::VideoEncodeH264SessionCreateInfoKHR<'a>; + type ParametersCreateInfo<'a> = vk::VideoEncodeH264SessionParametersCreateInfoKHR<'a>; + + type StdReferenceInfo = vk::native::StdVideoEncodeH264ReferenceInfo; + type DpbSlotInfo<'a> = vk::VideoEncodeH264DpbSlotInfoKHR<'a>; + + fn slot_info_from_std(std_reference_info: &Self::StdReferenceInfo) -> Self::DpbSlotInfo<'_> { + vk::VideoEncodeH264DpbSlotInfoKHR::default().std_reference_info(std_reference_info) + } + + type PictureInfo<'a> = vk::VideoEncodeH264PictureInfoKHR<'a>; + + type RateControlInfo<'a> = vk::VideoEncodeH264RateControlInfoKHR<'a>; + type RateControlLayerInfo<'a> = vk::VideoEncodeH264RateControlLayerInfoKHR<'a>; + + #[allow(private_interfaces)] + fn get_encoded_video_session_parameters( + video_session_parameters: &VideoSessionParameters, + ) -> Result, VulkanError> { + let mut info = vk::VideoEncodeH264SessionParametersGetInfoKHR::default() + .write_std_sps(true) + .write_std_pps(true); + + unsafe { video_session_parameters.get_encoded_video_session_parameters(Some(&mut info)) } + } +} + +impl VulkanEncCodecUpdate for H264 { + type ParametersAddInfo<'a> = vk::VideoEncodeH264SessionParametersAddInfoKHR<'a>; +} + +#[derive(Debug, Clone, Copy)] +pub struct H265; + +impl VulkanEncCodec for H265 { + const ENCODE_OPERATION: vk::VideoCodecOperationFlagsKHR = + vk::VideoCodecOperationFlagsKHR::ENCODE_H265; + const EXTENSION: &'static CStr = ash::khr::video_encode_h265::NAME; + type ProfileInfo<'a> = vk::VideoEncodeH265ProfileInfoKHR<'a>; + type Capabilities<'a> = vk::VideoEncodeH265CapabilitiesKHR<'a>; + type SessionCreateInfo<'a> = vk::VideoEncodeH265SessionCreateInfoKHR<'a>; + type ParametersCreateInfo<'a> = vk::VideoEncodeH265SessionParametersCreateInfoKHR<'a>; + type DpbSlotInfo<'a> = vk::VideoEncodeH265DpbSlotInfoKHR<'a>; + + type StdReferenceInfo = vk::native::StdVideoEncodeH265ReferenceInfo; + + fn slot_info_from_std(std_reference_info: &Self::StdReferenceInfo) -> Self::DpbSlotInfo<'_> { + vk::VideoEncodeH265DpbSlotInfoKHR::default().std_reference_info(std_reference_info) + } + + type PictureInfo<'a> = vk::VideoEncodeH265PictureInfoKHR<'a>; + + type RateControlInfo<'a> = vk::VideoEncodeH265RateControlInfoKHR<'a>; + type RateControlLayerInfo<'a> = vk::VideoEncodeH265RateControlLayerInfoKHR<'a>; + + #[allow(private_interfaces)] + fn get_encoded_video_session_parameters( + video_session_parameters: &VideoSessionParameters, + ) -> Result, VulkanError> { + let mut info = vk::VideoEncodeH265SessionParametersGetInfoKHR::default() + .write_std_sps(true) + .write_std_pps(true) + .write_std_vps(true); + + unsafe { video_session_parameters.get_encoded_video_session_parameters(Some(&mut info)) } + } +} + +impl VulkanEncCodecUpdate for H265 { + type ParametersAddInfo<'a> = vk::VideoEncodeH265SessionParametersAddInfoKHR<'a>; +} + +#[derive(Debug, Clone, Copy)] +pub struct AV1; + +impl VulkanEncCodec for AV1 { + const ENCODE_OPERATION: vk::VideoCodecOperationFlagsKHR = + vk::VideoCodecOperationFlagsKHR::ENCODE_AV1; + const EXTENSION: &'static CStr = ash::khr::video_encode_av1::NAME; + type ProfileInfo<'a> = vk::VideoEncodeAV1ProfileInfoKHR<'a>; + type Capabilities<'a> = vk::VideoEncodeAV1CapabilitiesKHR<'a>; + type SessionCreateInfo<'a> = vk::VideoEncodeAV1SessionCreateInfoKHR<'a>; + type ParametersCreateInfo<'a> = vk::VideoEncodeAV1SessionParametersCreateInfoKHR<'a>; + type DpbSlotInfo<'a> = vk::VideoEncodeAV1DpbSlotInfoKHR<'a>; + + type StdReferenceInfo = vk::native::StdVideoEncodeAV1ReferenceInfo; + + fn slot_info_from_std(std_reference_info: &Self::StdReferenceInfo) -> Self::DpbSlotInfo<'_> { + vk::VideoEncodeAV1DpbSlotInfoKHR::default().std_reference_info(std_reference_info) + } + + type PictureInfo<'a> = vk::VideoEncodeAV1PictureInfoKHR<'a>; + + type RateControlInfo<'a> = vk::VideoEncodeAV1RateControlInfoKHR<'a>; + type RateControlLayerInfo<'a> = vk::VideoEncodeAV1RateControlLayerInfoKHR<'a>; + + #[allow(private_interfaces)] + fn get_encoded_video_session_parameters( + video_session_parameters: &VideoSessionParameters, + ) -> Result, VulkanError> { + unsafe { video_session_parameters.get_encoded_video_session_parameters2() } + } +} diff --git a/media-video/vulkan/src/encoder/input/mod.rs b/media-video/vulkan/src/encoder/input/mod.rs new file mode 100644 index 00000000..93aaca4e --- /dev/null +++ b/media-video/vulkan/src/encoder/input/mod.rs @@ -0,0 +1,612 @@ +use crate::{ + Buffer, Device, Image, ImageView, RecordingCommandBuffer, Semaphore, VulkanError, + encoder::input::rgb_to_nv12::RgbToNV12Converter, image::ImageMemoryBarrier, +}; +use ash::vk::{self, TaggedStructure}; +use ezk_image::ImageRef; +use smallvec::SmallVec; + +mod rgb_to_nv12; +pub use rgb_to_nv12::Primaries; + +#[derive(Debug, Clone, Copy)] +pub enum InputPixelFormat { + /// 2 Plane YUV with 4:2:0 subsampling + NV12, + /// 1 Plane RGBA + RGBA { + /// Primaries to use when converting RGB to YUV for encoding + primaries: Primaries, + }, +} + +#[allow(missing_debug_implementations)] +pub enum InputData<'a> { + /// Host memory image + Image(&'a dyn ImageRef), + + /// Externally provided vulkan image view + /// + /// Must have usage SAMPLED and represent an RGB(A) image + VulkanImage(VulkanImageInput), +} + +#[derive(Debug)] +pub struct VulkanImageInput { + pub view: ImageView, + pub extent: vk::Extent2D, + + pub acquire: Option, + pub release: Option, +} + +#[derive(Debug, Clone)] +pub struct InputSync { + pub semaphore: Semaphore, + pub timeline_point: Option, +} + +impl InputData<'_> { + pub fn extent(&self) -> vk::Extent2D { + match self { + InputData::Image(image_ref) => vk::Extent2D { + width: image_ref.width() as u32, + height: image_ref.height() as u32, + }, + InputData::VulkanImage(image) => image.extent, + } + } +} + +/// Encoder input +/// +/// NV12 (host-memory) -> staging-buffer -> encode-input-image +/// RGBA (host-memory) -> staging-buffer -> rgb-image -> convert -> encode-input-image +/// +/// NV12 (device-memory) = encode-input-image +/// RGBA (device-memory) -> convert -> encode-input-image +#[derive(Debug)] +pub(super) enum Input { + /// Input is NV12 copied from Host to staging buffer, staging buffer has image set when recording command buffer + HostNV12 { + staging_buffer: Buffer, + nv12_image: ImageView, + // TODO: nv12 scaler + }, + /// Input is RGBA copied from Host to staging buffer, then converted to NV12 + HostRGBA { + /// RGBA staging buffer + staging_buffer: Buffer, + /// Extent of the image inside the staging buffer + staging_extent: Option, + + /// RGBA image created from the staging buffer + rgb_image: ImageView, + rgb_image_extent: vk::Extent2D, + + /// RGB -> YUV converter + converter: RgbToNV12Converter, + /// Final NV12 output image + nv12_image: ImageView, + }, + ImportedRGBA { + /// Imported RGBA image + rgb_image: Option, + /// RGB -> YUV converter + converter: RgbToNV12Converter, + /// final output image + nv12_image: ImageView, + }, +} + +impl Input { + pub(super) fn create( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + input_as_vulkan_image: bool, + pixel_format: InputPixelFormat, + input_extent: vk::Extent2D, + encode_extent: vk::Extent2D, + num: u32, + ) -> Result, VulkanError> { + if input_as_vulkan_image { + Self::new_from_vulkan_image( + device, + video_profile_info, + pixel_format, + input_extent, + encode_extent, + num, + ) + } else { + Self::new_from_host( + device, + video_profile_info, + pixel_format, + input_extent, + encode_extent, + num, + ) + } + } + + fn new_from_host( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + pixel_format: InputPixelFormat, + input_extent: vk::Extent2D, + encode_extent: vk::Extent2D, + num: u32, + ) -> Result, VulkanError> { + use InputPixelFormat::*; + + match pixel_format { + NV12 => { + let staging_buffer_size = + (input_extent.width as u64 * input_extent.height as u64 * 12) / 8; + + (0..num) + .map(|_| -> Result { + Ok(Input::HostNV12 { + staging_buffer: create_staging_buffer(device, staging_buffer_size)?, + nv12_image: create_nv12_image( + device, + video_profile_info, + encode_extent, + )?, + }) + }) + .collect() + } + RGBA { primaries } => { + let staging_buffer_size = + input_extent.width as u64 * input_extent.height as u64 * 4; + + let mut converter: Vec = + RgbToNV12Converter::create(device, primaries, encode_extent, num)?; + + (0..num) + .map(|_| -> Result { + // Staging buffer containing the host image data + let staging_buffer = create_staging_buffer(device, staging_buffer_size)?; + + // Staging buffer copy destination and if the resolution matches the encoder's: input to the RGB->Yuv converter + let rgb_image = create_rgba_image( + device, + input_extent, + vk::ImageUsageFlags::SAMPLED + | vk::ImageUsageFlags::TRANSFER_DST + | vk::ImageUsageFlags::TRANSFER_SRC, + )?; + + // Destination of the RGB->YUV converter + let nv12_image = + create_nv12_image(device, video_profile_info, encode_extent)?; + + Ok(Input::HostRGBA { + staging_buffer, + staging_extent: None, + rgb_image, + rgb_image_extent: input_extent, + converter: converter.pop().unwrap(), + nv12_image, + }) + }) + .collect() + } + } + } + + fn new_from_vulkan_image( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + pixel_format: InputPixelFormat, + #[expect(unused_variables)] input_extent: vk::Extent2D, + encode_extent: vk::Extent2D, + num: u32, + ) -> Result, VulkanError> { + use InputPixelFormat::*; + + match pixel_format { + NV12 => Err(VulkanError::InvalidArgument { + message: "NV12 Vulkan Image Input to VulkanEncoder is currently not supported", + }), + RGBA { primaries } => { + let mut converter: Vec = + RgbToNV12Converter::create(device, primaries, encode_extent, num)?; + + (0..num) + .map(|_| -> Result { + // Destination of the RGB->YUV converter + let nv12_image = + create_nv12_image(device, video_profile_info, encode_extent)?; + + Ok(Input::ImportedRGBA { + rgb_image: None, + converter: converter.pop().unwrap(), + nv12_image, + }) + }) + .collect() + } + } + } + + pub(super) fn submit_graphics_queue_add_semaphores( + &mut self, + wait_semaphores: &mut SmallVec<[vk::SemaphoreSubmitInfo; 2]>, + signal_semaphores: &mut SmallVec<[vk::SemaphoreSubmitInfo; 2]>, + ) { + if let Input::ImportedRGBA { + rgb_image: Some(rgb_image), + .. + } = &self + { + if let Some(InputSync { + semaphore, + timeline_point, + }) = &rgb_image.acquire + { + let mut semaphore_info = + vk::SemaphoreSubmitInfo::default().semaphore(unsafe { semaphore.handle() }); + + if let Some(timeline_point) = timeline_point { + semaphore_info = semaphore_info.value(*timeline_point); + }; + + wait_semaphores.push(semaphore_info); + } + + if let Some(InputSync { + semaphore, + timeline_point, + }) = &rgb_image.release + { + let mut semaphore_info = + vk::SemaphoreSubmitInfo::default().semaphore(unsafe { semaphore.handle() }); + + if let Some(timeline_point) = timeline_point { + semaphore_info = semaphore_info.value(*timeline_point); + }; + + signal_semaphores.push(semaphore_info); + } + } + } + + /// Destroy any references to external resources + pub(super) fn drop_borrowed_resources(&mut self) { + if let Input::ImportedRGBA { rgb_image, .. } = self { + *rgb_image = None; + } + } + + /// Process input, depending on the input type and data given + /// + /// Copies image data from staging buffers, converts RGB to YUV etc.. + pub(super) unsafe fn prepare_input_image( + &mut self, + device: &Device, + queue_family_index: u32, + encode_queue_family_index: u32, + command_buffer: &RecordingCommandBuffer<'_>, + nv12_extent: vk::Extent2D, + ) -> Result<(), VulkanError> { + match self { + Input::HostNV12 { + staging_buffer, + nv12_image, + } => { + nv12_image.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + vk::PipelineStageFlags2::TRANSFER, + vk::AccessFlags2::TRANSFER_WRITE, + ) + .src( + vk::ImageLayout::UNDEFINED, + vk::PipelineStageFlags2::NONE, + vk::AccessFlags2::NONE, + ), + 0, + ); + + copy_nv12_staging_buffer_to_image( + device, + command_buffer, + staging_buffer, + nv12_image.image(), + nv12_extent, + ); + release_and_prepare_for_encode_queue( + queue_family_index, + encode_queue_family_index, + command_buffer, + nv12_image.image(), + ); + } + Input::HostRGBA { + staging_buffer, + staging_extent, + rgb_image, + rgb_image_extent, + converter, + nv12_image, + } => { + let rgb_image_content_extent = staging_extent.expect("staging_extent must be set"); + + rgb_image.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + vk::PipelineStageFlags2::TRANSFER, + vk::AccessFlags2::TRANSFER_WRITE, + ) + .src( + vk::ImageLayout::UNDEFINED, + vk::PipelineStageFlags2::BLIT, + vk::AccessFlags2::TRANSFER_READ, + ), + 0, + ); + + copy_rgba_staging_buffer_to_image( + device, + command_buffer, + staging_buffer, + rgb_image.image(), + rgb_image_content_extent, + ); + + converter.record_rgba_to_nv12( + command_buffer, + *rgb_image_extent, + rgb_image_content_extent, + nv12_extent, + rgb_image, + nv12_image.image(), + )?; + release_and_prepare_for_encode_queue( + queue_family_index, + encode_queue_family_index, + command_buffer, + nv12_image.image(), + ); + } + Input::ImportedRGBA { + rgb_image, + converter, + nv12_image, + } => { + let rgb_image = rgb_image + .as_ref() + .expect("device rgba-image view not set on submitted encode slot"); + + converter.record_rgba_to_nv12( + command_buffer, + rgb_image.extent, + rgb_image.extent, + nv12_extent, + &rgb_image.view, + nv12_image.image(), + )?; + release_and_prepare_for_encode_queue( + queue_family_index, + encode_queue_family_index, + command_buffer, + nv12_image.image(), + ); + } + } + + Ok(()) + } + + pub(super) unsafe fn acquire_input_image( + &self, + graphics_queue_family_index: u32, + encode_queue_family_index: u32, + command_buffer: &RecordingCommandBuffer<'_>, + ) -> &ImageView { + let nv12_image = match self { + Input::HostNV12 { nv12_image, .. } => nv12_image, + Input::HostRGBA { nv12_image, .. } => nv12_image, + Input::ImportedRGBA { nv12_image, .. } => nv12_image, + }; + + nv12_image.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::VIDEO_ENCODE_SRC_KHR, + vk::PipelineStageFlags2::VIDEO_ENCODE_KHR, + vk::AccessFlags2::VIDEO_ENCODE_READ_KHR, + ) + .queue_family_indices(graphics_queue_family_index, encode_queue_family_index), + 0, + ); + + nv12_image + } +} + +fn create_staging_buffer(device: &Device, size: u64) -> Result { + let create_info = vk::BufferCreateInfo::default() + .size(size) + .usage(vk::BufferUsageFlags::TRANSFER_SRC) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + unsafe { Buffer::create(device, &create_info) } +} + +fn create_nv12_image( + device: &Device, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + extent: vk::Extent2D, +) -> Result { + let mut video_profile_list_info = + vk::VideoProfileListInfoKHR::default().profiles(std::slice::from_ref(video_profile_info)); + let create_info = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .extent(vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(1) + .tiling(vk::ImageTiling::OPTIMAL) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(vk::ImageUsageFlags::VIDEO_ENCODE_SRC_KHR | vk::ImageUsageFlags::TRANSFER_DST) + .push(&mut video_profile_list_info); + + let image = unsafe { Image::create(device, &create_info)? }; + + let create_info = vk::ImageViewCreateInfo::default() + .image(unsafe { image.handle() }) + .view_type(vk::ImageViewType::TYPE_2D) + .format(vk::Format::G8_B8R8_2PLANE_420_UNORM) + .components(vk::ComponentMapping::default()) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + unsafe { ImageView::create(&image, &create_info) } +} + +fn create_rgba_image( + device: &Device, + extent: vk::Extent2D, + usage: vk::ImageUsageFlags, +) -> Result { + let create_info = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::R8G8B8A8_UNORM) + .extent(vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(1) + .tiling(vk::ImageTiling::OPTIMAL) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(usage); + + let image = unsafe { Image::create(device, &create_info)? }; + + let create_info = vk::ImageViewCreateInfo::default() + .image(unsafe { image.handle() }) + .view_type(vk::ImageViewType::TYPE_2D) + .format(vk::Format::R8G8B8A8_UNORM) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + unsafe { ImageView::create(&image, &create_info) } +} + +unsafe fn copy_nv12_staging_buffer_to_image( + device: &Device, + recording: &RecordingCommandBuffer<'_>, + staging_buffer: &Buffer, + image: &Image, + extent: vk::Extent2D, +) { + device.ash().cmd_copy_buffer_to_image( + recording.command_buffer(), + staging_buffer.buffer(), + image.handle(), + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[ + // Plane 0 + vk::BufferImageCopy::default() + .image_subresource( + vk::ImageSubresourceLayers::default() + .aspect_mask(vk::ImageAspectFlags::PLANE_0) + .layer_count(1), + ) + .image_extent(vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }), + // Plane 1 + vk::BufferImageCopy::default() + .buffer_offset(extent.width as vk::DeviceSize * extent.height as vk::DeviceSize) + .image_subresource( + vk::ImageSubresourceLayers::default() + .aspect_mask(vk::ImageAspectFlags::PLANE_1) + .layer_count(1), + ) + .image_extent(vk::Extent3D { + width: extent.width / 2, + height: extent.height / 2, + depth: 1, + }), + ], + ); +} + +unsafe fn copy_rgba_staging_buffer_to_image( + device: &Device, + command_buffer: &RecordingCommandBuffer<'_>, + staging_buffer: &Buffer, + image: &Image, + extent: vk::Extent2D, +) { + device.ash().cmd_copy_buffer_to_image( + command_buffer.command_buffer(), + staging_buffer.buffer(), + image.handle(), + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[vk::BufferImageCopy { + buffer_offset: 0, + buffer_row_length: extent.width, + buffer_image_height: extent.height, + image_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + image_offset: vk::Offset3D::default(), + image_extent: vk::Extent3D { + width: extent.width, + height: extent.height, + depth: 1, + }, + }], + ); +} + +unsafe fn release_and_prepare_for_encode_queue( + queue_family_index: u32, + encode_queue_family_index: u32, + command_buffer: &RecordingCommandBuffer<'_>, + nv12: &Image, +) { + nv12.cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::VIDEO_ENCODE_SRC_KHR, + vk::PipelineStageFlags2::NONE, + vk::AccessFlags2::NONE, + ) + .queue_family_indices(queue_family_index, encode_queue_family_index), + 0, + ); +} diff --git a/media-video/vulkan/src/encoder/input/rgb_to_nv12.rs b/media-video/vulkan/src/encoder/input/rgb_to_nv12.rs new file mode 100644 index 00000000..5c69a32a --- /dev/null +++ b/media-video/vulkan/src/encoder/input/rgb_to_nv12.rs @@ -0,0 +1,533 @@ +use crate::{ + Buffer, DescriptorSet, DescriptorSetLayout, Device, Image, ImageView, Pipeline, PipelineLayout, + RecordingCommandBuffer, Sampler, ShaderModule, VulkanError, image::ImageMemoryBarrier, +}; +use ash::vk::{self}; + +use std::{ + slice, + sync::{Arc, OnceLock}, +}; + +static SHADER: &str = include_str!("rgb_to_nv12.wgsl"); +static COMPILED: OnceLock> = OnceLock::new(); + +#[derive(Debug, Clone, Copy)] +pub enum Primaries { + BT601, + BT709, + BT2020, +} + +impl Primaries { + fn primaries(&self) -> [f32; 3] { + match self { + Primaries::BT601 => [0.299, 0.587, 0.114], + Primaries::BT709 => [0.2126, 0.7152, 0.0722], + Primaries::BT2020 => [0.2627, 0.6780, 0.0593], + } + } +} + +/// RGB to YUV converter using a compute shader +/// +/// Since some drivers don't support binding Vulkan YUV images to compute shaders it uses intermediate +/// R8_UNORM & R8G8_UNORM images for the YUV planes +#[derive(Debug)] +pub(crate) struct RgbToNV12Converter { + device: Device, + + compute_pipeline: Pipeline, + descriptor_set: DescriptorSet, + + rgb_sampler: Sampler, + intermediate_y: ImageView, + intermediate_uv: ImageView, + + primaries_uniform: Arc>, + scale_uniform: Buffer<[f32; 2]>, + current_scale: [f32; 2], +} + +impl RgbToNV12Converter { + pub(super) fn create( + device: &Device, + primaries: Primaries, + max_extent: vk::Extent2D, + num: u32, + ) -> Result, VulkanError> { + let spv = COMPILED.get_or_init(|| ShaderModule::compile_wgsl_to_spv(SHADER)); + + let compute_shader_module = ShaderModule::from_spv(device, spv)?; + + let bindings = [ + // Input RGB image + vk::DescriptorSetLayoutBinding::default() + .binding(0) + .descriptor_type(vk::DescriptorType::SAMPLED_IMAGE) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + // Input RGB image sampler + vk::DescriptorSetLayoutBinding::default() + .binding(1) + .descriptor_type(vk::DescriptorType::SAMPLER) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + // Output Y plane + vk::DescriptorSetLayoutBinding::default() + .binding(2) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + // Output UV plane + vk::DescriptorSetLayoutBinding::default() + .binding(3) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + // Primaries + vk::DescriptorSetLayoutBinding::default() + .binding(4) + .descriptor_type(vk::DescriptorType::UNIFORM_BUFFER) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + // Scale + vk::DescriptorSetLayoutBinding::default() + .binding(5) + .descriptor_type(vk::DescriptorType::UNIFORM_BUFFER) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE), + ]; + + let descriptor_set_layout = DescriptorSetLayout::create(device, &bindings)?; + let pipeline_layout = PipelineLayout::create(device, &descriptor_set_layout)?; + + let mut compute_pipelines = Pipeline::create( + device, + pipeline_layout, + compute_shader_module, + vk::ShaderStageFlags::COMPUTE, + c"main", + num, + )?; + + let pool_sizes = [ + vk::DescriptorPoolSize { + ty: vk::DescriptorType::SAMPLED_IMAGE, + descriptor_count: num, // 1 sampled image binding + }, + vk::DescriptorPoolSize { + ty: vk::DescriptorType::SAMPLER, + descriptor_count: num, // 1 sampler binding + }, + vk::DescriptorPoolSize { + ty: vk::DescriptorType::STORAGE_IMAGE, + descriptor_count: 2 * num, // 2 image storage bindings + }, + vk::DescriptorPoolSize { + ty: vk::DescriptorType::UNIFORM_BUFFER, + descriptor_count: num * 2, // 2 uniform buffer bindings + }, + ]; + + let mut descriptor_sets = + DescriptorSet::create(device, &pool_sizes, &descriptor_set_layout, num)?; + + let primaries_uniform = unsafe { + let primaries = primaries.primaries(); + + let create_info = vk::BufferCreateInfo::default() + .size(size_of_val(&primaries) as vk::DeviceSize) + .usage(vk::BufferUsageFlags::UNIFORM_BUFFER); + + let mut buffer = Buffer::<[f32; 3]>::create(device, &create_info)?; + + let mut data = buffer.map(1)?; + data.data_mut()[0] = primaries; + drop(data); + + Arc::new(buffer) + }; + + let mut converter = Vec::with_capacity(num as usize); + + for _ in 0..num { + let intermediate_y = unsafe { + let intermediate_y = { + let create_info = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::R8_UNORM) + .extent(vk::Extent3D { + width: max_extent.width, + height: max_extent.height, + depth: 1, + }) + .mip_levels(1) + .array_layers(1) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .tiling(vk::ImageTiling::OPTIMAL) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(vk::ImageUsageFlags::TRANSFER_SRC | vk::ImageUsageFlags::STORAGE) + .initial_layout(vk::ImageLayout::UNDEFINED); + + Image::create(device, &create_info) + }?; + + let create_info = vk::ImageViewCreateInfo::default() + .image(intermediate_y.handle()) + .format(vk::Format::R8_UNORM) + .view_type(vk::ImageViewType::TYPE_2D) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + ImageView::create(&intermediate_y, &create_info)? + }; + + let intermediate_uv = unsafe { + let intermediate_uv = { + let initial_layout = vk::ImageCreateInfo::default() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::R8G8_UNORM) + .extent(vk::Extent3D { + width: max_extent.width / 2, + height: max_extent.height / 2, + depth: 1, + }) + .mip_levels(1) + .array_layers(1) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .tiling(vk::ImageTiling::OPTIMAL) + .samples(vk::SampleCountFlags::TYPE_1) + .usage(vk::ImageUsageFlags::TRANSFER_SRC | vk::ImageUsageFlags::STORAGE) + .initial_layout(vk::ImageLayout::UNDEFINED); + + Image::create(device, &initial_layout) + }?; + + let create_info = vk::ImageViewCreateInfo::default() + .image(intermediate_uv.handle()) + .format(vk::Format::R8G8_UNORM) + .view_type(vk::ImageViewType::TYPE_2D) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }); + + ImageView::create(&intermediate_uv, &create_info)? + }; + + let rgb_sampler = unsafe { + let sampler_create_info = vk::SamplerCreateInfo::default() + .mag_filter(vk::Filter::LINEAR) + .min_filter(vk::Filter::LINEAR) + .address_mode_u(vk::SamplerAddressMode::CLAMP_TO_EDGE) + .address_mode_v(vk::SamplerAddressMode::CLAMP_TO_EDGE) + .address_mode_w(vk::SamplerAddressMode::CLAMP_TO_EDGE) + .mipmap_mode(vk::SamplerMipmapMode::LINEAR); + + Sampler::create(device, &sampler_create_info)? + }; + + let (scale_uniform, current_scale) = unsafe { + let scale = [1.0f32; 2]; + + let create_info = vk::BufferCreateInfo::default() + .size(size_of_val(&scale) as vk::DeviceSize) + .usage(vk::BufferUsageFlags::UNIFORM_BUFFER); + + let mut buffer = Buffer::<[f32; 2]>::create(device, &create_info)?; + + let mut data = buffer.map(1)?; + data.data_mut()[0] = scale; + drop(data); + + (buffer, scale) + }; + + converter.push(RgbToNV12Converter { + device: device.clone(), + compute_pipeline: compute_pipelines.pop().unwrap(), + descriptor_set: descriptor_sets.pop().unwrap(), + rgb_sampler, + intermediate_y, + intermediate_uv, + primaries_uniform: primaries_uniform.clone(), + scale_uniform, + current_scale, + }); + } + + Ok(converter) + } + + /// Convert `input_rgb` into `output_nv12` + pub(super) unsafe fn record_rgba_to_nv12( + &mut self, + command_buffer: &RecordingCommandBuffer<'_>, + rgb_image_extent: vk::Extent2D, + rgb_image_content_extent: vk::Extent2D, + nv12_extent: vk::Extent2D, + input_rgb: &ImageView, + output_nv12: &Image, + ) -> Result<(), VulkanError> { + let barrier = ImageMemoryBarrier::dst( + vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, + vk::PipelineStageFlags2::COMPUTE_SHADER, + vk::AccessFlags2::SHADER_SAMPLED_READ, + ); + + let barrier = if input_rgb.image().is_foreign() { + barrier.queue_family_indices( + vk::QUEUE_FAMILY_FOREIGN_EXT, + self.device.graphics_queue_family_index(), + ) + } else { + barrier + }; + + input_rgb + .image() + .cmd_memory_barrier(command_buffer, barrier, 0); + + output_nv12.cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + vk::PipelineStageFlags2::TRANSFER, + vk::AccessFlags2::TRANSFER_WRITE, + ) + .src( + vk::ImageLayout::UNDEFINED, + vk::PipelineStageFlags2::NONE, + vk::AccessFlags2::NONE, + ), + 0, + ); + + self.intermediate_y.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::GENERAL, + vk::PipelineStageFlags2::COMPUTE_SHADER, + vk::AccessFlags2::SHADER_STORAGE_WRITE, + ), + 0, + ); + + self.intermediate_uv.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::GENERAL, + vk::PipelineStageFlags2::COMPUTE_SHADER, + vk::AccessFlags2::SHADER_STORAGE_WRITE, + ), + 0, + ); + + self.record_compute_shader( + command_buffer, + rgb_image_extent, + rgb_image_content_extent, + nv12_extent, + input_rgb, + )?; + + self.intermediate_y.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + vk::PipelineStageFlags2::TRANSFER, + vk::AccessFlags2::TRANSFER_READ, + ), + 0, + ); + + self.intermediate_uv.image().cmd_memory_barrier( + command_buffer, + ImageMemoryBarrier::dst( + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + vk::PipelineStageFlags2::TRANSFER, + vk::AccessFlags2::TRANSFER_READ, + ), + 0, + ); + + self.device.ash().cmd_copy_image( + command_buffer.command_buffer(), + self.intermediate_y.image().handle(), + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + output_nv12.handle(), + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[vk::ImageCopy { + src_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + src_offset: Default::default(), + dst_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::PLANE_0, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + dst_offset: Default::default(), + extent: vk::Extent3D { + width: nv12_extent.width, + height: nv12_extent.height, + depth: 1, + }, + }], + ); + + self.device.ash().cmd_copy_image( + command_buffer.command_buffer(), + self.intermediate_uv.image().handle(), + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + output_nv12.handle(), + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[vk::ImageCopy { + src_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + src_offset: Default::default(), + dst_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::PLANE_1, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + dst_offset: Default::default(), + extent: vk::Extent3D { + width: nv12_extent.width / 2, + height: nv12_extent.height / 2, + depth: 1, + }, + }], + ); + + Ok(()) + } + + unsafe fn record_compute_shader( + &mut self, + recording: &RecordingCommandBuffer<'_>, + rgb_image_extent: vk::Extent2D, + rgb_image_content_extent: vk::Extent2D, + nv12_extent: vk::Extent2D, + input_rgb: &ImageView, + ) -> Result<(), VulkanError> { + let scale = [ + rgb_image_content_extent.width as f32 + / rgb_image_extent.width as f32 + / nv12_extent.width as f32, + rgb_image_content_extent.height as f32 + / rgb_image_extent.height as f32 + / nv12_extent.height as f32, + ]; + + if self.current_scale != scale { + self.current_scale = scale; + let mut mapped = self.scale_uniform.map(1)?; + mapped.data_mut()[0] = scale; + } + + // Update descriptor set + let image_infos = [ + vk::DescriptorImageInfo::default() + .image_view(unsafe { input_rgb.handle() }) + .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL), + vk::DescriptorImageInfo::default().sampler(self.rgb_sampler.sampler()), + vk::DescriptorImageInfo::default() + .image_view(unsafe { self.intermediate_y.handle() }) + .image_layout(vk::ImageLayout::GENERAL), + vk::DescriptorImageInfo::default() + .image_view(unsafe { self.intermediate_uv.handle() }) + .image_layout(vk::ImageLayout::GENERAL), + ]; + + let primaries_buffer_info = vk::DescriptorBufferInfo::default() + .buffer(unsafe { self.primaries_uniform.buffer() }) + .offset(0) + .range(size_of::<[f32; 3]>() as u64); + + let scale_buffer_info = vk::DescriptorBufferInfo::default() + .buffer(unsafe { self.scale_uniform.buffer() }) + .offset(0) + .range(size_of::<[f32; 2]>() as u64); + + let descriptor_writes = [ + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(0) + .descriptor_type(vk::DescriptorType::SAMPLED_IMAGE) + .image_info(slice::from_ref(&image_infos[0])), + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(1) + .descriptor_type(vk::DescriptorType::SAMPLER) + .image_info(slice::from_ref(&image_infos[1])), + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(2) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .image_info(slice::from_ref(&image_infos[2])), + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(3) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .image_info(slice::from_ref(&image_infos[3])), + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(4) + .descriptor_type(vk::DescriptorType::UNIFORM_BUFFER) + .buffer_info(slice::from_ref(&primaries_buffer_info)), + vk::WriteDescriptorSet::default() + .dst_set(unsafe { self.descriptor_set.handle() }) + .dst_binding(5) + .descriptor_type(vk::DescriptorType::UNIFORM_BUFFER) + .buffer_info(slice::from_ref(&scale_buffer_info)), + ]; + + self.device + .ash() + .update_descriptor_sets(&descriptor_writes, &[]); + + // Bind pipeline and dispatch + self.device.ash().cmd_bind_pipeline( + recording.command_buffer(), + vk::PipelineBindPoint::COMPUTE, + self.compute_pipeline.pipeline(), + ); + + self.device.ash().cmd_bind_descriptor_sets( + recording.command_buffer(), + vk::PipelineBindPoint::COMPUTE, + self.compute_pipeline.pipeline_layout(), + 0, + &[self.descriptor_set.handle()], + &[], + ); + + self.device.ash().cmd_dispatch( + recording.command_buffer(), + nv12_extent.width.div_ceil(16), + nv12_extent.height.div_ceil(16), + 1, + ); + + Ok(()) + } +} diff --git a/media-video/vulkan/src/encoder/input/rgb_to_nv12.wgsl b/media-video/vulkan/src/encoder/input/rgb_to_nv12.wgsl new file mode 100644 index 00000000..10a6b4cf --- /dev/null +++ b/media-video/vulkan/src/encoder/input/rgb_to_nv12.wgsl @@ -0,0 +1,68 @@ +@group(0) @binding(0) var input_image: texture_2d; +@group(0) @binding(1) var input_sampler: sampler; +@group(0) @binding(2) var output_y: texture_storage_2d; +@group(0) @binding(3) var output_uv: texture_storage_2d; +@group(0) @binding(4) var colorspace: vec3; +@group(0) @binding(5) var scale: vec2; + +fn rgb_to_yuv(rgb: vec3) -> vec3 { + let kr = colorspace.r; + let kg = colorspace.g; + let kb = colorspace.b; + + let y = kr * rgb.r + kg * rgb.g + kb * rgb.b; + let u = (rgb.b - y) / (2.0 * (1.0 - kb)) + 0.5; + let v = (rgb.r - y) / (2.0 * (1.0 - kr)) + 0.5; + + return vec3(y, u, v); +} + +/// Write out U & V into the UV plane +fn write_uv( + pos: vec2, + yuv00: vec3, + yuv01: vec3, + yuv10: vec3, + yuv11: vec3, +) { + let u = (yuv00.y + yuv10.y + yuv01.y + yuv11.y) * 0.25; + let v = (yuv00.z + yuv10.z + yuv01.z + yuv11.z) * 0.25; + + textureStore(output_uv, pos / 2u, vec4(u, v, 0.0, 0.0)); +} + +fn to_logical(physical_pos: vec2) -> vec2 { + return (vec2(physical_pos) + 0.5) * scale; +} + +fn sample_input(pos: vec2) -> vec4 { + return textureSampleLevel(input_image, input_sampler, pos, 0.0); +} + +@compute @workgroup_size(16, 16, 1) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let physical_pos = global_id.xy; + let image_size = textureDimensions(output_y); + + if physical_pos.x >= image_size.x || physical_pos.y >= image_size.y { + return; + } + + let yuv00_pos = to_logical(physical_pos); + + let yuv00 = rgb_to_yuv(sample_input(yuv00_pos).rgb); + + textureStore(output_y, physical_pos, vec4(yuv00.x, 0.0, 0.0, 0.0)); + + if (physical_pos.x % 2u) == 0 && (physical_pos.y % 2u) == 0 { + let yuv10_pos = to_logical(physical_pos + vec2(1, 0)); + let yuv01_pos = to_logical(physical_pos + vec2(0, 1)); + let yuv11_pos = to_logical(physical_pos + vec2(1, 1)); + + let yuv10 = rgb_to_yuv(vec3(sample_input(yuv10_pos).rgb)); + let yuv01 = rgb_to_yuv(vec3(sample_input(yuv01_pos).rgb)); + let yuv11 = rgb_to_yuv(vec3(sample_input(yuv11_pos).rgb)); + + write_uv(physical_pos, yuv00, yuv01, yuv10, yuv11); + } +} diff --git a/media-video/vulkan/src/encoder/mod.rs b/media-video/vulkan/src/encoder/mod.rs new file mode 100644 index 00000000..62c6dd84 --- /dev/null +++ b/media-video/vulkan/src/encoder/mod.rs @@ -0,0 +1,731 @@ +use crate::{ + Buffer, CommandBuffer, Fence, ImageView, RecordingCommandBuffer, Semaphore, + VideoFeedbackQueryPool, VideoSession, VideoSessionParameters, VulkanError, + encoder::{ + codec::{VulkanEncCodec, VulkanEncCodecUpdate}, + input::{Input, InputData, InputPixelFormat}, + }, + image::ImageMemoryBarrier, +}; +use ash::vk::{self, TaggedStructure}; +use ezk_image::{ColorInfo, ColorSpace, ImageRef, PixelFormat, YuvColorInfo}; +use smallvec::SmallVec; +use std::{collections::VecDeque, pin::Pin, time::Instant}; + +pub mod capabilities; +pub mod codec; +pub mod input; + +#[derive(Debug, thiserror::Error)] +pub enum VulkanEncodeFrameError { + #[error( + "Input image is larger than the configured maximum size, got={got:?} maximum={maximum:?} " + )] + InputExtentTooLarge { got: [u32; 2], maximum: [u32; 2] }, + + #[error("Invalid input type, expected: {expected}")] + InvalidInputType { expected: &'static str }, + + #[error(transparent)] + Other(#[from] VulkanError), +} + +// Configuration for [`VulkanEncoder`] set by a codec implementation +#[derive(Debug)] +pub struct VulkanEncoderImplConfig { + /// Configuration provided by the user of a encoder + pub user: VulkanEncoderConfig, + + /// Number of encode slots. + /// + /// Must be at least 1 or number of out of order frames + 1. + /// + /// E.g. H.264 uses B-Frames specified by an `ip_interval` where `num_encode_slots` must be at least `ip_interval + 1` + pub num_encode_slots: u32, + + /// Maximum number of active reference kept by the encoder + pub max_active_references: u32, + + /// Number of DPB slots + pub num_dpb_slots: u32, +} + +// Configuration for [`VulkanEncoder`] set by the user of a encoder +#[derive(Debug, Clone, Copy)] +pub struct VulkanEncoderConfig { + /// Maximum resolution of the encoded video + pub max_encode_resolution: vk::Extent2D, + + /// The initial resolution of the encoded video + pub initial_encode_resolution: vk::Extent2D, + + /// Set the maximum input resolution. Input is always resized to fit the current encoder resolution. + pub max_input_resolution: vk::Extent2D, + + /// Input is a a new vulkan image set for every frame, instead of using a staging buffer to copy image data from + /// the host memory. + pub input_as_vulkan_image: bool, + + /// Pixel format of the input, cannot be changed later. + pub input_pixel_format: InputPixelFormat, + + /// Vulkan encoder usage flags, zero or more bits can be set + pub usage_hints: vk::VideoEncodeUsageFlagsKHR, + + /// Vulkan encoder content flags, zero or more bits can be set + pub content_hints: vk::VideoEncodeContentFlagsKHR, + + /// Vulkan tuning mode can be set to value + pub tuning_mode: vk::VideoEncodeTuningModeKHR, +} + +#[derive(Debug)] +pub struct VulkanEncoder { + max_input_extent: vk::Extent2D, + max_encode_extent: vk::Extent2D, + current_encode_extent: vk::Extent2D, + + output_buffer_size: u64, + + video_session: VideoSession, + video_session_parameters: VideoSessionParameters, + video_session_is_uninitialized: bool, + + video_feedback_query_pool: VideoFeedbackQueryPool, + + graphics_queue_family_index: u32, + graphics_queue: vk::Queue, + + // Data required when there's a dedicated encode queue + separate_queue_data: Option, + + // boxed so pointers in structures are stable + current_rc: Option>>>, + next_rc: Option>>>, + + encode_slots: Vec, + in_flight: VecDeque, + + dpb_slots: Vec>, + + output: VecDeque<(Instant, Vec)>, +} + +#[derive(Debug)] +struct VulkanEncoderSeparateQueueData { + encode_queue_family_index: u32, + encode_queue: vk::Queue, +} + +#[derive(Debug)] +pub struct VulkanEncodeSlot { + /// Index used for the video feedback query pool + index: u32, + + emit_parameters: bool, + submitted_at: Instant, + input: input::Input, + + output_buffer: Buffer, + + command_buffer: CommandBuffer, + + // Data required when there's a dedicated encode queue + separate_queue_data: Option, + + completion_fence: Fence, +} + +impl VulkanEncodeSlot { + pub fn submitted_at(&self) -> Instant { + self.submitted_at + } +} + +#[derive(Debug)] +struct VulkanEncodeSlotSeparateQueueData { + semaphore: Semaphore, + command_buffer: CommandBuffer, +} + +#[derive(Debug)] +struct DpbSlot { + image_view: ImageView, + std_reference_info: C::StdReferenceInfo, +} + +impl VulkanEncoder { + /// Maximum configured extent, cannot be changed without re-creating the encoder + pub fn max_extent(&self) -> vk::Extent2D { + self.max_encode_extent + } + + /// The extent the encoder is currently configured for, input must match this exactly, to change the current extent + /// see [`Self::update_current_extent`] + pub fn current_extent(&self) -> vk::Extent2D { + self.current_encode_extent + } + + /// Set the new extent of the encoder and updates vulkan's VideoSessionParameters. The given `parameters` must + /// match the given `extent`. + /// + /// # Panics + /// + /// If the given extent is larger than [`Self::max_extent`] + pub fn update_current_extent<'a>( + &mut self, + extent: vk::Extent2D, + parameters: &'a mut C::ParametersAddInfo<'a>, + ) -> Result<(), VulkanError> + where + C: VulkanEncCodecUpdate, + { + assert!(extent.width <= self.max_encode_extent.width); + assert!(extent.height <= self.max_encode_extent.height); + + self.current_encode_extent = extent; + + self.video_session_parameters.update(parameters)?; + + Ok(()) + } + + /// Update the current rate control settings + /// + /// # Safety + /// + /// 1. [`RateControlInfos`] is self referential and all pointers must be valid until the whole thing is dropped + /// 2. [`RateControlInfos`] must contain valid parameters for the session + pub unsafe fn update_rc(&mut self, rate_control: Pin>>) { + self.next_rc = Some(rate_control); + } + + fn wait_encode_slot(&mut self, encode_slot: &mut VulkanEncodeSlot) -> Result<(), VulkanError> { + encode_slot.completion_fence.wait(u64::MAX)?; + encode_slot.completion_fence.reset()?; + + Ok(()) + } + + fn read_out_encode_slot( + &mut self, + encode_slot: &mut VulkanEncodeSlot, + ) -> Result<(), VulkanError> { + if encode_slot.emit_parameters { + let parameters = + C::get_encoded_video_session_parameters(&self.video_session_parameters)?; + + self.output + .push_back((encode_slot.submitted_at, parameters)); + } + + unsafe { + let bytes_written = self + .video_feedback_query_pool + .get_bytes_written(encode_slot.index)?; + + let mapped_buffer = encode_slot.output_buffer.map(bytes_written as usize)?; + + self.output + .push_back((encode_slot.submitted_at, mapped_buffer.data().to_vec())); + } + + encode_slot.input.drop_borrowed_resources(); + + Ok(()) + } + + /// Try to get an available encode slot for a new frame, if this ever returns `Ok(None)` the encoder was not properly + /// configured for the use case + /// (e.g. if the number of B-Frames used is larger than the number of available encode slots) + pub fn pop_encode_slot(&mut self) -> Result, VulkanError> { + if let Some(encode_slot) = self.encode_slots.pop() { + return Ok(Some(encode_slot)); + } + + let Some(mut encode_slot) = self.in_flight.pop_front() else { + return Ok(None); + }; + + self.wait_encode_slot(&mut encode_slot)?; + self.read_out_encode_slot(&mut encode_slot)?; + + Ok(Some(encode_slot)) + } + + /// Poll for encoder results, returns `None` immediately if there's no in-flight encodings or all of them are still + /// in progress. + pub fn poll_result(&mut self) -> Result)>, VulkanError> { + if let Some(output) = self.output.pop_front() { + return Ok(Some(output)); + } + + if let Some(encode_slot) = self.in_flight.front_mut() { + let completed = encode_slot.completion_fence.wait(0)?; + if !completed { + return Ok(None); + } + + encode_slot.completion_fence.reset()?; + + let mut encode_slot = self + .in_flight + .pop_front() + .expect("just peeked with front_mut"); + + self.read_out_encode_slot(&mut encode_slot)?; + self.encode_slots.push(encode_slot); + } + + Ok(self.output.pop_front()) + } + + /// Blocks until an encoding slot has finished, returns `None` if no slots are in-flight. + pub fn wait_result(&mut self) -> Result)>, VulkanError> { + if let Some(output) = self.output.pop_front() { + return Ok(Some(output)); + } + + if let Some(mut encode_slot) = self.in_flight.pop_front() { + self.wait_encode_slot(&mut encode_slot)?; + self.read_out_encode_slot(&mut encode_slot)?; + self.encode_slots.push(encode_slot); + } + + Ok(self.output.pop_front()) + } + + /// Set the input image of an encode slot + pub fn set_input_of_encode_slot( + &mut self, + encode_slot: &mut VulkanEncodeSlot, + input_data: InputData<'_>, + ) -> Result<(), VulkanEncodeFrameError> { + let width = input_data.extent().width; + let height = input_data.extent().height; + + if width > self.max_input_extent.width || height > self.max_input_extent.height { + return Err(VulkanEncodeFrameError::InputExtentTooLarge { + got: [width, height], + maximum: [self.max_input_extent.width, self.max_input_extent.height], + }); + } + + encode_slot.submitted_at = Instant::now(); + + match input_data { + InputData::Image(image) => self.copy_image_to_encode_slot(encode_slot, image), + InputData::VulkanImage(vulkan_image_input) => match &mut encode_slot.input { + Input::ImportedRGBA { rgb_image, .. } => { + *rgb_image = Some(vulkan_image_input); + Ok(()) + } + _ => Err(VulkanEncodeFrameError::InvalidInputType { + expected: "InputData::Image", + }), + }, + } + } + + fn copy_image_to_encode_slot( + &mut self, + encode_slot: &mut VulkanEncodeSlot, + image: &dyn ImageRef, + ) -> Result<(), VulkanEncodeFrameError> { + let (dst_format, staging_buffer) = match &mut encode_slot.input { + Input::HostNV12 { staging_buffer, .. } => (PixelFormat::NV12, staging_buffer), + Input::HostRGBA { + staging_buffer, + staging_extent, + .. + } => { + *staging_extent = Some(vk::Extent2D { + width: image.width() as u32, + height: image.height() as u32, + }); + + (PixelFormat::RGBA, staging_buffer) + } + _ => { + return Err(VulkanEncodeFrameError::InvalidInputType { + expected: "InputData::Image", + }); + } + }; + + let mut mapped_buffer = staging_buffer.map(staging_buffer.capacity())?; + + let dst_color = match image.color() { + ColorInfo::RGB(rgb_color_info) => YuvColorInfo { + transfer: rgb_color_info.transfer, + primaries: rgb_color_info.primaries, + space: ColorSpace::BT709, + full_range: true, + }, + ColorInfo::YUV(yuv_color_info) => yuv_color_info, + }; + + let mut dst = ezk_image::Image::from_buffer( + dst_format, + mapped_buffer.data_mut(), + None, + image.width(), + image.height(), + dst_color.into(), + ) + .unwrap(); + + ezk_image::convert_multi_thread(image, &mut dst).unwrap(); + + drop(mapped_buffer); + + Ok(()) + } + + /// Submit an slot to be encoded + pub fn submit_encode_slot( + &mut self, + mut encode_slot: VulkanEncodeSlot, + reference_indices: SmallVec<[usize; 8]>, + setup_reference: usize, + setup_std_reference_info: C::StdReferenceInfo, + picture_info: C::PictureInfo<'_>, + emit_parameters: bool, + ) -> Result<(), VulkanError> { + encode_slot.emit_parameters = emit_parameters; + + log::trace!( + "Submit encode slot: references: {reference_indices:?}, setup_reference: {setup_reference}, emit_parameters: {emit_parameters}" + ); + + unsafe { + let mut recording = encode_slot + .command_buffer + .begin(&vk::CommandBufferBeginInfo::default())?; + + let encode_queue_family_index = self + .separate_queue_data + .as_ref() + .map(|x| x.encode_queue_family_index) + .unwrap_or(self.graphics_queue_family_index); + + encode_slot.input.prepare_input_image( + self.video_session.device(), + self.graphics_queue_family_index, + encode_queue_family_index, + &recording, + self.current_encode_extent, + )?; + + // When using a separate queue release the ownership and submit the first queue + if let Some(slot_separate_queue_data) = &encode_slot.separate_queue_data { + recording.end()?; + + let mut wait_semaphores = smallvec::smallvec![]; + let mut signal_semaphores = smallvec::smallvec![ + vk::SemaphoreSubmitInfo::default() + .semaphore(slot_separate_queue_data.semaphore.handle()) + .stage_mask(vk::PipelineStageFlags2::ALL_COMMANDS), + ]; + + let command_buffers = [vk::CommandBufferSubmitInfo::default() + .command_buffer(encode_slot.command_buffer.handle())]; + + encode_slot.input.submit_graphics_queue_add_semaphores( + &mut wait_semaphores, + &mut signal_semaphores, + ); + + let submit_info = vk::SubmitInfo2::default() + .command_buffer_infos(&command_buffers) + .wait_semaphore_infos(&wait_semaphores) + .signal_semaphore_infos(&signal_semaphores); + + self.video_session.device().ash().queue_submit2( + self.graphics_queue, + &[submit_info], + vk::Fence::null(), + )?; + + // Begin recording the encode command buffer + recording = slot_separate_queue_data + .command_buffer + .begin(&vk::CommandBufferBeginInfo::default())?; + } + + self.record_encode_queue( + &encode_slot, + &recording, + reference_indices, + setup_reference, + setup_std_reference_info, + picture_info, + ); + + let command_buffer = recording.command_buffer(); + + // Finish up everything + recording.end()?; + + let command_buffer_infos = + [vk::CommandBufferSubmitInfo::default().command_buffer(command_buffer)]; + + let wait_semaphore_infos: SmallVec<[vk::SemaphoreSubmitInfo; 1]> = + if let Some(slot_separate_queue_data) = &encode_slot.separate_queue_data { + smallvec::smallvec![ + vk::SemaphoreSubmitInfo::default() + .semaphore(slot_separate_queue_data.semaphore.handle()) + .stage_mask(vk::PipelineStageFlags2::ALL_COMMANDS), + ] + } else { + smallvec::smallvec![] + }; + + let submit_info = vk::SubmitInfo2::default() + .command_buffer_infos(&command_buffer_infos) + .wait_semaphore_infos(&wait_semaphore_infos); + + self.video_session.device().ash().queue_submit2( + self.separate_queue_data + .as_ref() + .map(|d| d.encode_queue) + .unwrap_or(self.graphics_queue), + &[submit_info], + encode_slot.completion_fence.handle(), + )?; + + self.in_flight.push_back(encode_slot); + } + + Ok(()) + } + + unsafe fn record_encode_queue( + &mut self, + encode_slot: &VulkanEncodeSlot, + recording: &RecordingCommandBuffer<'_>, + reference_indices: SmallVec<[usize; 8]>, + setup_reference_index: usize, + setup_std_reference_info: C::StdReferenceInfo, + picture_info: C::PictureInfo<'_>, + ) { + let device = self.video_session.device(); + + // Reset query for this encode + self.video_feedback_query_pool + .cmd_reset_query(recording.command_buffer(), encode_slot.index); + + let encode_queue_family_index = self + .separate_queue_data + .as_ref() + .map(|x| x.encode_queue_family_index) + .unwrap_or(self.graphics_queue_family_index); + + let input_image = encode_slot.input.acquire_input_image( + self.graphics_queue_family_index, + encode_queue_family_index, + recording, + ); + + // Barrier the setup dpb slot + self.dpb_slots[setup_reference_index].std_reference_info = setup_std_reference_info; + let setup_reference = &self.dpb_slots[setup_reference_index]; + + setup_reference.image_view.image().cmd_memory_barrier( + recording, + ImageMemoryBarrier::dst( + vk::ImageLayout::VIDEO_ENCODE_DPB_KHR, + vk::PipelineStageFlags2::VIDEO_ENCODE_KHR, + vk::AccessFlags2::VIDEO_ENCODE_WRITE_KHR, + ), + setup_reference + .image_view + .subresource_range() + .base_array_layer, + ); + + let setup_reference_picture_resource_info = vk::VideoPictureResourceInfoKHR::default() + .image_view_binding(setup_reference.image_view.handle()) + .coded_extent(self.current_encode_extent); + let mut setup_reference_dpb_slot_info = + C::slot_info_from_std(&setup_reference.std_reference_info); + let setup_reference_slot_info = vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(&setup_reference_picture_resource_info) + .slot_index(setup_reference_index as i32) + .push(&mut setup_reference_dpb_slot_info); + + // Barrier the active reference dpb slots + for dpb_slot in &reference_indices { + let dpb_slot = &self.dpb_slots[*dpb_slot]; + + dpb_slot.image_view.image().cmd_memory_barrier( + recording, + ImageMemoryBarrier::dst( + vk::ImageLayout::VIDEO_ENCODE_DPB_KHR, + vk::PipelineStageFlags2::VIDEO_ENCODE_KHR, + vk::AccessFlags2::VIDEO_ENCODE_READ_KHR, + ), + setup_reference + .image_view + .subresource_range() + .base_array_layer, + ); + } + + let mut reference_slots_resources: Vec<_> = reference_indices + .iter() + .map(|index| { + let slot = &self.dpb_slots[*index]; + + let dpb_slot_info = C::slot_info_from_std(&slot.std_reference_info); + + let picture_resource_info = vk::VideoPictureResourceInfoKHR::default() + .image_view_binding(slot.image_view.handle()) + .coded_extent(self.current_encode_extent); + + (*index, picture_resource_info, dpb_slot_info) + }) + .collect(); + + let mut reference_slots: Vec<_> = reference_slots_resources + .iter_mut() + .map(|(slot_index, picture_resource, dpb_slot_info)| { + let mut info = vk::VideoReferenceSlotInfoKHR::default() + .picture_resource(picture_resource) + .slot_index(*slot_index as i32); + + info.p_next = (dpb_slot_info as *mut C::DpbSlotInfo<'_>) + .cast_const() + .cast(); + + info + }) + .collect(); + + reference_slots.push(setup_reference_slot_info); + reference_slots.last_mut().unwrap().slot_index = -1; + + log::trace!( + "Begin reference_slots: {:?}", + reference_slots + .iter() + .map(|slot| slot.slot_index) + .collect::>() + ); + + { + let mut begin_info = vk::VideoBeginCodingInfoKHR::default() + .video_session(self.video_session.video_session()) + .video_session_parameters(self.video_session_parameters.video_session_parameters()) + .reference_slots(&reference_slots); + + if let Some(rc) = &self.current_rc { + begin_info.p_next = (&raw const rc.info).cast(); + } + + // Issue the begin video coding command + let cmd_begin_video_coding = device + .ash_video_queue_device() + .fp() + .cmd_begin_video_coding_khr; + (cmd_begin_video_coding)(recording.command_buffer(), &raw const begin_info); + } + + if self.video_session_is_uninitialized || self.next_rc.is_some() { + // Update the rate control configs after begin_video_coding, so the rate control passed reflects the current + // state of the video session. + self.current_rc = self.next_rc.take(); + + self.control_video_coding(recording, self.video_session_is_uninitialized); + + self.video_session_is_uninitialized = false; + } + + let src_picture_resource_info = vk::VideoPictureResourceInfoKHR::default() + .image_view_binding(input_image.handle()) + .coded_offset(vk::Offset2D { x: 0, y: 0 }) + .coded_extent(self.current_encode_extent) + .base_array_layer(0); + + // Do not include the setup reference in the vk::VideoEncodeInfoKHR::reference_slots + let _setup_slot = reference_slots.pop(); + + { + let mut encode_info = vk::VideoEncodeInfoKHR::default() + .src_picture_resource(src_picture_resource_info) + .dst_buffer(encode_slot.output_buffer.buffer()) + .dst_buffer_range(self.output_buffer_size) + .reference_slots(&reference_slots) + .flags(vk::VideoEncodeFlagsKHR::empty()) + .setup_reference_slot(&setup_reference_slot_info); + + encode_info.p_next = (&raw const picture_info).cast(); + + self.video_feedback_query_pool + .cmd_begin_query(recording.command_buffer(), encode_slot.index); + + let cmd_encode_video = device + .ash_video_encode_queue_device() + .fp() + .cmd_encode_video_khr; + (cmd_encode_video)(recording.command_buffer(), &raw const encode_info); + } + + self.video_feedback_query_pool + .cmd_end_query(recording.command_buffer(), encode_slot.index); + + let end_video_coding_info = vk::VideoEndCodingInfoKHR::default(); + let cmd_end_video_coding = device + .ash_video_queue_device() + .fp() + .cmd_end_video_coding_khr; + cmd_end_video_coding(recording.command_buffer(), &raw const end_video_coding_info); + } + + unsafe fn control_video_coding( + &self, + command_buffer: &RecordingCommandBuffer<'_>, + reset: bool, + ) { + let mut video_coding_control_info = vk::VideoCodingControlInfoKHR::default(); + + if reset { + video_coding_control_info.flags |= vk::VideoCodingControlFlagsKHR::RESET; + }; + + if let Some(rc) = &self.current_rc { + video_coding_control_info.flags |= vk::VideoCodingControlFlagsKHR::ENCODE_RATE_CONTROL; + video_coding_control_info.p_next = (&raw const rc.info).cast(); + } + + let cmd_control_video_coding = self + .video_session + .device() + .ash_video_queue_device() + .fp() + .cmd_control_video_coding_khr; + + (cmd_control_video_coding)( + command_buffer.command_buffer(), + &raw const video_coding_control_info, + ); + } +} + +impl Drop for VulkanEncoder { + fn drop(&mut self) { + // Wait for all encode operations to complete + while let Ok(Some(..)) = self.wait_result() {} + } +} + +/// Rate control parameters +/// +/// See [`VulkanEncoder::update_rc`] +#[derive(Debug)] +pub struct RateControlInfos { + pub codec_layer: C::RateControlLayerInfo<'static>, + pub layer: vk::VideoEncodeRateControlLayerInfoKHR<'static>, + pub codec_info: C::RateControlInfo<'static>, + pub info: vk::VideoEncodeRateControlInfoKHR<'static>, +} diff --git a/media-video/vulkan/src/error.rs b/media-video/vulkan/src/error.rs new file mode 100644 index 00000000..a59d2553 --- /dev/null +++ b/media-video/vulkan/src/error.rs @@ -0,0 +1,67 @@ +use std::{backtrace::Backtrace, error::Error, fmt}; + +use ash::vk; + +#[derive(Debug)] +pub enum VulkanError { + Native { + backtrace: Backtrace, + result: vk::Result, + }, + + MissingExtension(&'static str), + + CannotFindMemoryType { + memory_type_bits: u32, + properties: vk::MemoryPropertyFlags, + }, + + QueryFailed { + status: vk::QueryResultStatusKHR, + }, + + InvalidArgument { + message: &'static str, + }, +} + +impl From for VulkanError { + #[track_caller] + fn from(result: vk::Result) -> Self { + VulkanError::Native { + backtrace: Backtrace::capture(), + result, + } + } +} + +impl fmt::Display for VulkanError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + VulkanError::Native { backtrace, result } => { + write!( + f, + "Vulkan call failed with result={result}, backtrace={backtrace}" + ) + } + VulkanError::MissingExtension(extension) => { + write!(f, "Missing required extensions {extension}") + } + VulkanError::CannotFindMemoryType { + memory_type_bits, + properties, + } => write!( + f, + "Failed to find memory type that can be used with the constraints memory_type_bits={memory_type_bits:b}, properties={properties:?}" + ), + VulkanError::QueryFailed { status } => { + write!(f, "Query failed, status={status:?}") + } + VulkanError::InvalidArgument { message } => { + write!(f, "Invalid argument, {message}") + } + } + } +} + +impl Error for VulkanError {} diff --git a/media-video/vulkan/src/fence.rs b/media-video/vulkan/src/fence.rs new file mode 100644 index 00000000..a4420603 --- /dev/null +++ b/media-video/vulkan/src/fence.rs @@ -0,0 +1,53 @@ +use crate::{Device, VulkanError}; +use ash::vk; + +#[derive(Debug)] +pub struct Fence { + device: Device, + fence: vk::Fence, +} + +impl Fence { + pub fn create(device: &Device) -> Result { + unsafe { + let fence = device + .ash() + .create_fence(&vk::FenceCreateInfo::default(), None)?; + + Ok(Self { + device: device.clone(), + fence, + }) + } + } + + pub unsafe fn handle(&self) -> vk::Fence { + self.fence + } + + pub fn wait(&self, timeout: u64) -> Result { + unsafe { + match self + .device + .ash() + .wait_for_fences(&[self.fence], true, timeout) + { + Ok(()) => Ok(true), + Err(result) if result == vk::Result::TIMEOUT => Ok(false), + Err(e) => Err(e.into()), + } + } + } + + pub fn reset(&self) -> Result<(), VulkanError> { + unsafe { Ok(self.device.ash().reset_fences(&[self.fence])?) } + } +} + +impl Drop for Fence { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_fence(self.fence, None); + } + } +} diff --git a/media-video/vulkan/src/image.rs b/media-video/vulkan/src/image.rs new file mode 100644 index 00000000..475c2ddd --- /dev/null +++ b/media-video/vulkan/src/image.rs @@ -0,0 +1,565 @@ +use crate::{Device, RecordingCommandBuffer, VulkanError}; +use ash::vk::{self, Handle, TaggedStructure}; +use smallvec::{SmallVec, smallvec}; +use std::{ + os::fd::{AsRawFd, OwnedFd}, + sync::{Arc, Mutex}, +}; + +#[derive(Debug, Clone)] +pub struct Image { + inner: Arc, +} + +#[derive(Debug)] +struct Inner { + device: Device, + image: vk::Image, + memory: SmallVec<[vk::DeviceMemory; 1]>, + extent: vk::Extent3D, + usage: vk::ImageUsageFlags, + foreign: bool, + + state: Mutex>, +} + +#[derive(Debug, Clone)] +struct State { + current_layout: vk::ImageLayout, + last_access: vk::AccessFlags2, + last_stage: vk::PipelineStageFlags2, +} + +#[derive(Debug)] +pub struct DrmPlane { + pub fd: OwnedFd, + pub offset: usize, + pub stride: usize, +} + +impl Image { + pub unsafe fn create( + device: &Device, + create_info: &vk::ImageCreateInfo<'_>, + ) -> Result { + let image = device.ash().create_image(create_info, None)?; + let memory_requirements = device.ash().get_image_memory_requirements(image); + + let alloc_info = vk::MemoryAllocateInfo::default() + .allocation_size(memory_requirements.size) + .memory_type_index(device.find_memory_type( + memory_requirements.memory_type_bits, + vk::MemoryPropertyFlags::DEVICE_LOCAL, + )?); + + let memory = device.ash().allocate_memory(&alloc_info, None)?; + device.ash().bind_image_memory(image, memory, 0)?; + + Ok(Self { + inner: Arc::new(Inner { + device: device.clone(), + image, + memory: smallvec![memory], + extent: create_info.extent, + usage: create_info.usage, + foreign: false, + state: Mutex::new(smallvec::smallvec![ + State { + current_layout: create_info.initial_layout, + last_access: vk::AccessFlags2::NONE, + last_stage: vk::PipelineStageFlags2::TOP_OF_PIPE, + }; + create_info.array_layers as usize + ]), + }), + }) + } + + #[allow(clippy::too_many_arguments)] + pub unsafe fn import_dma_fd( + device: &Device, + width: u32, + height: u32, + mut planes: SmallVec<[DrmPlane; 4]>, + modifier: u64, + format: vk::Format, + usage: vk::ImageUsageFlags, + ) -> Result { + // Define the plane layout of the image inside the dma buffer + let plane_layouts: SmallVec<[vk::SubresourceLayout; 4]> = planes + .iter() + .map(|plane| { + vk::SubresourceLayout::default() + .offset(plane.offset as vk::DeviceSize) + .row_pitch(plane.stride as vk::DeviceSize) + }) + .collect(); + + let mut drm_modifier_info = vk::ImageDrmFormatModifierExplicitCreateInfoEXT::default() + .drm_format_modifier(modifier) + .plane_layouts(&plane_layouts); + + // Set the DMA_BUF_EXT handle for image creation + let mut external_memory_image_info = vk::ExternalMemoryImageCreateInfo::default() + .handle_types(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT); + + let extent = vk::Extent3D { + width, + height, + depth: 1, + }; + + let image_create_info = vk::ImageCreateInfo::default() + .flags(vk::ImageCreateFlags::empty()) + .image_type(vk::ImageType::TYPE_2D) + .format(format) + .extent(extent) + .mip_levels(1) + .array_layers(1) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(vk::ImageTiling::DRM_FORMAT_MODIFIER_EXT) + .usage(usage) + .initial_layout(vk::ImageLayout::UNDEFINED) + .push(&mut external_memory_image_info) + .push(&mut drm_modifier_info); + + // Create the image + let image = unsafe { device.ash().create_image(&image_create_info, None)? }; + + let memory_requirements_info = vk::ImageMemoryRequirementsInfo2::default().image(image); + + let mut memory_requirements = vk::MemoryRequirements2::default(); + + // Bind external dma buf memory to the image + unsafe { + device + .ash() + .get_image_memory_requirements2(&memory_requirements_info, &mut memory_requirements) + }; + + let mut memory_requirements = memory_requirements.memory_requirements; + + let mut memory_fd_properties = vk::MemoryFdPropertiesKHR::default(); + ash::khr::external_memory_fd::Device::load(device.instance().ash(), device.ash()) + .get_memory_fd_properties( + vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT, + planes[0].fd.as_raw_fd(), + &mut memory_fd_properties, + )?; + + memory_requirements.memory_type_bits &= memory_fd_properties.memory_type_bits; + + let memory_type_index = device.find_memory_type( + memory_requirements.memory_type_bits, + vk::MemoryPropertyFlags::DEVICE_LOCAL, + )?; + + let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().image(image); + + let mut import_fd_info = vk::ImportMemoryFdInfoKHR::default() + .handle_type(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT) + .fd(planes[0].fd.as_raw_fd()); + + let allocate_info = vk::MemoryAllocateInfo::default() + .allocation_size(memory_requirements.size) + .memory_type_index(memory_type_index) + .push(&mut import_fd_info) + .push(&mut dedicated); + + // Create vulkan memory using the dma buf fd + let memory = unsafe { device.ash().allocate_memory(&allocate_info, None)? }; + + // Finally bind the image memory, when this call succeeds the fd ownership is transferred to vulkan + let bind_result = unsafe { device.ash().bind_image_memory(image, memory, 0) }; + + match bind_result { + Ok(()) => { + std::mem::forget(planes.remove(0)); + } + Err(e) => { + device.ash().destroy_image(image, None); + device.ash().free_memory(memory, None); + + return Err(e.into()); + } + } + + Ok(Self { + inner: Arc::new(Inner { + device: device.clone(), + image, + memory: smallvec![memory], + extent, + usage, + foreign: true, + state: Mutex::new(smallvec::smallvec![State { + current_layout: vk::ImageLayout::UNDEFINED, + last_access: vk::AccessFlags2::NONE, + last_stage: vk::PipelineStageFlags2::NONE, + }]), + }), + }) + } + + #[allow(clippy::too_many_arguments)] + pub unsafe fn import_planar_dma_fd( + device: &Device, + width: u32, + height: u32, + planes: SmallVec<[DrmPlane; 4]>, + modifier: u64, + format: vk::Format, + usage: vk::ImageUsageFlags, + ) -> Result { + // Define the plane layout of the image inside the dma buffer + let plane_layouts: SmallVec<[vk::SubresourceLayout; 4]> = planes + .iter() + .map(|plane| { + vk::SubresourceLayout::default() + .offset(plane.offset as vk::DeviceSize) + .row_pitch(plane.stride as vk::DeviceSize) + }) + .collect(); + + let mut drm_modifier_info = vk::ImageDrmFormatModifierExplicitCreateInfoEXT::default() + .drm_format_modifier(modifier) + .plane_layouts(&plane_layouts); + + // Set the DMA_BUF_EXT handle for image creation + let mut external_memory_image_info = vk::ExternalMemoryImageCreateInfo::default() + .handle_types(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT); + + let extent = vk::Extent3D { + width, + height, + depth: 1, + }; + + let image_create_info = vk::ImageCreateInfo::default() + .flags(vk::ImageCreateFlags::DISJOINT) + .image_type(vk::ImageType::TYPE_2D) + .format(format) + .extent(extent) + .mip_levels(1) + .array_layers(1) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(vk::ImageTiling::DRM_FORMAT_MODIFIER_EXT) + .usage(usage) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED) + .push(&mut external_memory_image_info) + .push(&mut drm_modifier_info); + + // Create the image + let image = unsafe { device.ash().create_image(&image_create_info, None)? }; + + let mut allocated_memory = smallvec![]; + let mut plane_bind_infos: SmallVec<[vk::BindImagePlaneMemoryInfo; 4]> = smallvec![]; + let mut bind_infos: SmallVec<[vk::BindImageMemoryInfo; 4]> = smallvec![]; + + for (i, plane) in planes.iter().enumerate() { + let plane_aspect = match i { + 0 => vk::ImageAspectFlags::MEMORY_PLANE_0_EXT, + 1 => vk::ImageAspectFlags::MEMORY_PLANE_1_EXT, + 2 => vk::ImageAspectFlags::MEMORY_PLANE_2_EXT, + 3 => vk::ImageAspectFlags::MEMORY_PLANE_3_EXT, + _ => { + return Err(VulkanError::InvalidArgument { + message: "too many planes", + }); + } + }; + + let mut plane_memory_requirements = + vk::ImagePlaneMemoryRequirementsInfo::default().plane_aspect(plane_aspect); + + let memory_requirements_info = vk::ImageMemoryRequirementsInfo2::default() + .image(image) + .push(&mut plane_memory_requirements); + + let mut memory_requirements = vk::MemoryRequirements2::default(); + + // Bind external dma buf memory to the image + unsafe { + device.ash().get_image_memory_requirements2( + &memory_requirements_info, + &mut memory_requirements, + ) + }; + + let memory_requirements = memory_requirements.memory_requirements; + + let memory_type_index = device.find_memory_type( + memory_requirements.memory_type_bits, + vk::MemoryPropertyFlags::empty(), + )?; + + let mut dedicated = vk::MemoryDedicatedAllocateInfo::default().image(image); + + let mut import_fd_info = vk::ImportMemoryFdInfoKHR::default() + .handle_type(vk::ExternalMemoryHandleTypeFlags::DMA_BUF_EXT) + .fd(plane.fd.as_raw_fd()); + + let allocate_info = vk::MemoryAllocateInfo::default() + .allocation_size(memory_requirements.size) + .memory_type_index(memory_type_index) + .push(&mut import_fd_info) + .push(&mut dedicated); + + // Create vulkan memory using the dma buf fd + let memory = unsafe { device.ash().allocate_memory(&allocate_info, None)? }; + + allocated_memory.push(memory); + plane_bind_infos + .push(vk::BindImagePlaneMemoryInfo::default().plane_aspect(plane_aspect)); + + bind_infos.push( + vk::BindImageMemoryInfo::default() + .image(image) + .memory(memory), + ); + } + + let bind_infos: SmallVec<[_; 4]> = plane_bind_infos + .iter_mut() + .zip(allocated_memory.iter()) + .map(|(plane, memory)| { + vk::BindImageMemoryInfo::default() + .image(image) + .memory(*memory) + .push(plane) + }) + .collect(); + + // Finally bind the image memory, when this call succeeds the fd ownership is transferred to vulkan + let bind_result = unsafe { device.ash().bind_image_memory2(&bind_infos) }; + + match bind_result { + Ok(()) => { + for plane in planes { + std::mem::forget(plane.fd); + } + } + Err(e) => { + device.ash().destroy_image(image, None); + + for memory in allocated_memory { + device.ash().free_memory(memory, None); + } + + return Err(e.into()); + } + } + + Ok(Self { + inner: Arc::new(Inner { + device: device.clone(), + image, + memory: allocated_memory, + extent, + usage, + foreign: true, + state: Mutex::new(smallvec::smallvec![State { + current_layout: vk::ImageLayout::UNDEFINED, + last_access: vk::AccessFlags2::NONE, + last_stage: vk::PipelineStageFlags2::NONE, + }]), + }), + }) + } + + pub(crate) fn device(&self) -> &Device { + &self.inner.device + } + + pub unsafe fn handle(&self) -> vk::Image { + self.inner.image + } + + pub fn is_foreign(&self) -> bool { + self.inner.foreign + } + + #[allow(clippy::too_many_arguments)] + pub fn cmd_memory_barrier( + &self, + command_buffer: &RecordingCommandBuffer<'_>, + info: ImageMemoryBarrier, + base_array_layer: u32, + ) { + let mut state = self.inner.state.lock().unwrap(); + let state = &mut state[base_array_layer as usize]; + + let (old_layout, src_stage_mask, src_access_mask) = match info.src { + Some(src) => src, + None => (state.current_layout, state.last_stage, state.last_access), + }; + + let (new_layout, dst_stage_mask, dst_access_mask) = info.dst; + + let barrier = vk::ImageMemoryBarrier2::default() + .image(unsafe { self.handle() }) + .old_layout(old_layout) + .new_layout(new_layout) + .src_queue_family_index(info.src_queue_family_index) + .dst_queue_family_index(info.dst_queue_family_index) + .src_stage_mask(src_stage_mask) + .src_access_mask(src_access_mask) + .dst_stage_mask(dst_stage_mask) + .dst_access_mask(dst_access_mask) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer, + layer_count: 1, + }); + + state.current_layout = new_layout; + state.last_stage = dst_stage_mask; + state.last_access = dst_access_mask; + + let barriers = [barrier]; + let dependency_info = vk::DependencyInfoKHR::default().image_memory_barriers(&barriers); + + unsafe { + self.inner + .device + .ash() + .cmd_pipeline_barrier2(command_buffer.command_buffer(), &dependency_info); + } + } + + /// Create a [`wgpu::Texture`] handle from this Image + /// + /// # Safety + /// + /// - Image must be created from the same Device as passed into as parameter + /// - Image must be format `R8G8B8A8_UNORM` + /// - Image must have have one mip level + /// - Image must have a sample count of 1 + /// - Image must be 2D + pub unsafe fn to_rgba8_wgpu_texture(&self, device: &wgpu::Device) -> wgpu::Texture { + let size = wgpu::Extent3d { + width: self.inner.extent.width, + height: self.inner.extent.height, + depth_or_array_layers: self.inner.extent.depth, + }; + + let this = self.clone(); + let hal_texture = device + .as_hal::() + .unwrap() + .texture_from_raw( + ash_stable::vk::Handle::from_raw(self.inner.image.as_raw()), + &wgpu::hal::TextureDescriptor { + label: None, + size, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUses::UNKNOWN, + memory_flags: wgpu::hal::MemoryFlags::empty(), + view_formats: vec![], + }, + Some(Box::new(|| drop(this))), + ); + + let mut usage = wgpu::TextureUsages::empty(); + + if self.inner.usage.contains(vk::ImageUsageFlags::TRANSFER_SRC) { + usage.insert(wgpu::TextureUsages::COPY_SRC); + } + + if self.inner.usage.contains(vk::ImageUsageFlags::TRANSFER_DST) { + usage.insert(wgpu::TextureUsages::COPY_DST); + } + + if self.inner.usage.contains(vk::ImageUsageFlags::SAMPLED) { + usage.insert(wgpu::TextureUsages::TEXTURE_BINDING); + } + + if self.inner.usage.contains(vk::ImageUsageFlags::STORAGE) { + usage.insert(wgpu::TextureUsages::STORAGE_BINDING); + } + + if self + .inner + .usage + .contains(vk::ImageUsageFlags::COLOR_ATTACHMENT) + { + usage.insert(wgpu::TextureUsages::RENDER_ATTACHMENT); + } + + device.create_texture_from_hal::( + hal_texture, + &wgpu::TextureDescriptor { + label: None, + size, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage, + view_formats: &[], + }, + ) + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_image(self.image, None); + + for memory in &self.memory { + self.device.ash().free_memory(*memory, None); + } + } + } +} + +#[derive(Debug)] +pub struct ImageMemoryBarrier { + dst: (vk::ImageLayout, vk::PipelineStageFlags2, vk::AccessFlags2), + src: Option<(vk::ImageLayout, vk::PipelineStageFlags2, vk::AccessFlags2)>, + + src_queue_family_index: u32, + dst_queue_family_index: u32, +} + +impl ImageMemoryBarrier { + pub fn dst( + new_layout: vk::ImageLayout, + dst_stage_mask: vk::PipelineStageFlags2, + dst_access_flags: vk::AccessFlags2, + ) -> Self { + ImageMemoryBarrier { + dst: (new_layout, dst_stage_mask, dst_access_flags), + src: None, + src_queue_family_index: vk::QUEUE_FAMILY_IGNORED, + dst_queue_family_index: vk::QUEUE_FAMILY_IGNORED, + } + } + + pub fn src( + mut self, + old_layout: vk::ImageLayout, + src_stage_mask: vk::PipelineStageFlags2, + src_access_flags: vk::AccessFlags2, + ) -> Self { + self.src = Some((old_layout, src_stage_mask, src_access_flags)); + self + } + + pub fn queue_family_indices( + mut self, + src_queue_family_index: u32, + dst_queue_family_index: u32, + ) -> Self { + self.src_queue_family_index = src_queue_family_index; + self.dst_queue_family_index = dst_queue_family_index; + self + } +} diff --git a/media-video/vulkan/src/image_view.rs b/media-video/vulkan/src/image_view.rs new file mode 100644 index 00000000..19090216 --- /dev/null +++ b/media-video/vulkan/src/image_view.rs @@ -0,0 +1,56 @@ +use crate::{Image, VulkanError}; +use ash::vk; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct ImageView { + inner: Arc, +} + +#[derive(Debug)] +struct Inner { + image: Image, + handle: vk::ImageView, + subresource_range: vk::ImageSubresourceRange, +} + +impl ImageView { + pub unsafe fn create( + image: &Image, + create_info: &vk::ImageViewCreateInfo<'_>, + ) -> Result { + let device = image.device(); + + let handle = device.ash().create_image_view(create_info, None)?; + + Ok(Self { + inner: Arc::new(Inner { + image: image.clone(), + handle, + subresource_range: create_info.subresource_range, + }), + }) + } + + pub fn image(&self) -> &Image { + &self.inner.image + } + + pub unsafe fn handle(&self) -> vk::ImageView { + self.inner.handle + } + + pub(crate) fn subresource_range(&self) -> &vk::ImageSubresourceRange { + &self.inner.subresource_range + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + let device = self.image.device(); + + device.ash().destroy_image_view(self.handle, None); + } + } +} diff --git a/media-video/vulkan/src/instance.rs b/media-video/vulkan/src/instance.rs new file mode 100644 index 00000000..65fe1769 --- /dev/null +++ b/media-video/vulkan/src/instance.rs @@ -0,0 +1,240 @@ +use ash::{ + ext::debug_utils, + khr::video_queue, + vk::{self, Handle as _, TaggedStructure}, +}; +use ash_stable::vk::Handle; +use std::{ffi::CStr, fmt, sync::Arc}; + +use crate::{PhysicalDevice, VulkanError}; + +const INSTANCE_API_VERSION: u32 = vk::make_api_version(0, 1, 3, 316); + +#[derive(Clone)] +pub struct Instance { + inner: Arc, +} + +struct Inner { + _entry: ash::Entry, + _stable_entry: Option, + instance: ash::Instance, + video_queue_instance: video_queue::Instance, + + // This instance was created by wgpu, so hold a reference and don't destroy it on drop + wgpu: Option, + + // enabled_extensions: Vec<&'static CStr>, + debug_messenger: Option, +} + +impl fmt::Debug for Instance { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Instance") + .field(&self.inner.instance.handle()) + .finish_non_exhaustive() + } +} + +impl Instance { + pub const INSTANCE_VERSION: u32 = vk::make_api_version(0, 1, 3, 316); + + pub unsafe fn from_wgpu(wgpu: wgpu::Instance) -> Instance { + let vk_instance = wgpu.as_hal::().unwrap(); + + let stable_entry = vk_instance.shared_instance().entry().clone(); + let stable_instance = vk_instance.shared_instance().raw_instance().clone(); + + let entry = ash::Entry::load().unwrap(); + let instance = ash::vk::Instance::from_raw(stable_instance.handle().as_raw()); + let instance = ash::Instance::load(entry.static_fn(), instance); + + let video_queue_instance = video_queue::Instance::load(&entry, &instance); + + Instance { + inner: Arc::new(Inner { + _entry: entry, + _stable_entry: Some(stable_entry), + instance, + video_queue_instance, + wgpu: Some(wgpu), + debug_messenger: None, + }), + } + } + + pub fn create( + entry: ash::Entry, + additional_extensions: &[&'static CStr], + ) -> Result { + unsafe { + let app_info = vk::ApplicationInfo { + api_version: INSTANCE_API_VERSION, + ..Default::default() + }; + + let instance_layers = [ + #[cfg(debug_assertions)] + c"VK_LAYER_KHRONOS_validation".as_ptr(), + ]; + + let mut instance_extensions = vec![ + #[cfg(debug_assertions)] + ash::ext::debug_utils::NAME.as_ptr(), + ]; + + for extension in additional_extensions { + instance_extensions.push(extension.as_ptr()); + } + + let enabled = [ + // vk::ValidationFeatureEnableEXT::BEST_PRACTICES, // TODO: SEGFAULT under RADV + vk::ValidationFeatureEnableEXT::SYNCHRONIZATION_VALIDATION, + ]; + let mut validation_features = + vk::ValidationFeaturesEXT::default().enabled_validation_features(&enabled); + + let mut create_info = vk::InstanceCreateInfo { + p_application_info: &app_info, + ..Default::default() + } + .enabled_layer_names(&instance_layers) + .enabled_extension_names(&instance_extensions); + + if cfg!(debug_assertions) { + create_info = create_info.push(&mut validation_features); + } + + let instance = entry.create_instance(&create_info, None)?; + + let debug_messenger = if cfg!(debug_assertions) { + Some( + debug_utils::Instance::load(&entry, &instance).create_debug_utils_messenger( + &vk::DebugUtilsMessengerCreateInfoEXT::default() + .message_severity( + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE + | vk::DebugUtilsMessageSeverityFlagsEXT::WARNING + | vk::DebugUtilsMessageSeverityFlagsEXT::INFO + | vk::DebugUtilsMessageSeverityFlagsEXT::ERROR, + ) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) + .pfn_user_callback(Some(debug_utils_callback)), + None, + )?, + ) + } else { + None + }; + + let video_queue_instance = video_queue::Instance::load(&entry, &instance); + + Ok(Self { + inner: Arc::new(Inner { + _entry: entry, + _stable_entry: None, + instance, + // enabled_extensions: instance_extensions + // .into_iter() + // .map(|c| CStr::from_ptr(c)) + // .collect(), + video_queue_instance, + wgpu: None, + debug_messenger, + }), + }) + } + } + + // pub fn to_wgpu(&self) -> Result { + // let this = self.clone(); + + // unsafe { + // let hal_instance = wgpu::hal::vulkan::Instance::from_raw( + // self.inner._entry.clone(), + // self.inner.instance.clone(), + // INSTANCE_API_VERSION, + // 0, + // None, + // self.inner.enabled_extensions.clone(), + // wgpu::InstanceFlags::default(), + // Default::default(), + // false, + // Some(Box::new(|| drop(this))), + // )?; + + // Ok(wgpu::Instance::from_hal::( + // hal_instance, + // )) + // } + // } + + pub fn ash(&self) -> &ash::Instance { + &self.inner.instance + } + + pub fn video_queue_instance(&self) -> &video_queue::Instance { + &self.inner.video_queue_instance + } + + pub fn physical_devices(&self) -> Result, vk::Result> { + unsafe { + let physical_devices = self + .ash() + .enumerate_physical_devices()? + .into_iter() + .map(|physical_device| PhysicalDevice::new(self.clone(), physical_device)) + .collect(); + + Ok(physical_devices) + } + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + if self.wgpu.is_none() { + if let Some(debug_messenger) = self.debug_messenger.take() { + debug_utils::Instance::load(&self._entry, &self.instance) + .destroy_debug_utils_messenger(debug_messenger, None); + } + + self.instance.destroy_instance(None); + } + } + } +} + +unsafe extern "system" fn debug_utils_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_types: vk::DebugUtilsMessageTypeFlagsEXT, + p_callback_data: *const vk::DebugUtilsMessengerCallbackDataEXT<'_>, + _p_user_data: *mut std::ffi::c_void, +) -> vk::Bool32 { + use std::ffi::CStr; + + let data = &*p_callback_data; + match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => { + log::error!(target: "vulkan", "{message_types:?}: {:?}", CStr::from_ptr(data.p_message)) + } + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => { + log::warn!(target: "vulkan", "{message_types:?}: {:?}", CStr::from_ptr(data.p_message)) + } + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => { + log::info!(target: "vulkan", "{message_types:?}: {:?}", CStr::from_ptr(data.p_message)) + } + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => { + log::debug!(target: "vulkan", "{message_types:?}: {:?}", CStr::from_ptr(data.p_message)) + } + _ => { + log::error!(target: "vulkan", "{message_severity:?} - {message_types:?}: {:?}", CStr::from_ptr(data.p_message)) + } + } + + vk::FALSE +} diff --git a/media-video/vulkan/src/lib.rs b/media-video/vulkan/src/lib.rs new file mode 100644 index 00000000..faec669f --- /dev/null +++ b/media-video/vulkan/src/lib.rs @@ -0,0 +1,53 @@ +//! Some convenience types for working with vulkan, not intended for use outside of ezk's own use + +#![allow( + unsafe_op_in_unsafe_fn, + clippy::missing_safety_doc, + clippy::upper_case_acronyms +)] +#![warn(missing_debug_implementations)] + +pub mod encoder; + +mod buffer; +mod command_buffer; +mod descriptor_set; +mod device; +mod dpb; +mod error; +mod fence; +mod image; +mod image_view; +mod instance; +mod physical_device; +mod pipeline; +mod sampler; +mod semaphore; +mod shader_module; +mod video_feedback_query_pool; +mod video_session; +mod video_session_parameters; + +pub use buffer::Buffer; +pub use command_buffer::{CommandBuffer, RecordingCommandBuffer}; +pub use descriptor_set::{DescriptorSet, DescriptorSetLayout}; +pub use device::Device; +pub use error::VulkanError; +pub use fence::Fence; +pub use image::{DrmPlane, Image, ImageMemoryBarrier}; +pub use image_view::ImageView; +pub use instance::Instance; +pub use physical_device::PhysicalDevice; +pub use pipeline::{Pipeline, PipelineLayout}; +pub use sampler::Sampler; +pub use semaphore::Semaphore; +pub use shader_module::ShaderModule; + +// reexport ash for convenience +pub use ash; + +pub(crate) use dpb::create_dpb; + +pub(crate) use video_feedback_query_pool::VideoFeedbackQueryPool; +pub(crate) use video_session::VideoSession; +pub(crate) use video_session_parameters::VideoSessionParameters; diff --git a/media-video/vulkan/src/physical_device.rs b/media-video/vulkan/src/physical_device.rs new file mode 100644 index 00000000..011f243a --- /dev/null +++ b/media-video/vulkan/src/physical_device.rs @@ -0,0 +1,235 @@ +use std::{ffi::CStr, fmt, ptr}; + +use crate::{Instance, encoder::codec::VulkanEncCodec}; +use anyhow::Context as _; +use ash::vk::{self, Handle, PhysicalDeviceProperties, TaggedStructure}; +use ash_stable::vk::Handle as _; + +#[derive(Debug, Clone, Copy)] +pub struct DrmModifier { + pub modifier: u64, + pub plane_count: u32, + pub tiling_features: vk::FormatFeatureFlags2, +} + +#[derive(Clone)] +pub struct PhysicalDevice { + instance: Instance, + physical_device: vk::PhysicalDevice, +} + +impl PhysicalDevice { + pub(crate) fn new(instance: Instance, physical_device: vk::PhysicalDevice) -> Self { + PhysicalDevice { + instance, + physical_device, + } + } + + pub fn instance(&self) -> &Instance { + &self.instance + } + + pub fn handle(&self) -> vk::PhysicalDevice { + self.physical_device + } + + pub fn properties(&self) -> vk::PhysicalDeviceProperties { + unsafe { + self.instance + .ash() + .get_physical_device_properties(self.physical_device) + } + } + + pub fn queue_family_properties(&self) -> Vec { + unsafe { + self.instance + .ash() + .get_physical_device_queue_family_properties(self.physical_device) + } + } + + pub fn video_format_properties( + &self, + video_profile_infos: &[vk::VideoProfileInfoKHR<'_>], + ) -> Result>, vk::Result> { + let mut video_profile_list_info = + vk::VideoProfileListInfoKHR::default().profiles(video_profile_infos); + let physical_device_video_format_info = vk::PhysicalDeviceVideoFormatInfoKHR::default() + .image_usage(vk::ImageUsageFlags::VIDEO_ENCODE_SRC_KHR) + .push(&mut video_profile_list_info); + + let get_physical_device_video_format_properties = self + .instance + .video_queue_instance() + .fp() + .get_physical_device_video_format_properties_khr; + + let mut len = 0; + unsafe { + (get_physical_device_video_format_properties)( + self.physical_device, + &raw const physical_device_video_format_info, + &raw mut len, + ptr::null_mut(), + ) + .result()? + }; + + let mut video_format_properties = + vec![vk::VideoFormatPropertiesKHR::default(); len as usize]; + unsafe { + (get_physical_device_video_format_properties)( + self.physical_device, + &raw const physical_device_video_format_info, + &raw mut len, + video_format_properties.as_mut_ptr(), + ) + .result()? + }; + + Ok(video_format_properties) + } + + pub fn video_capabilities<'a, C: VulkanEncCodec>( + &self, + video_profile_info: vk::VideoProfileInfoKHR<'a>, + ) -> Result< + ( + vk::VideoCapabilitiesKHR<'static>, + vk::VideoEncodeCapabilitiesKHR<'static>, + C::Capabilities<'static>, + ), + vk::Result, + > { + let mut codec_caps = C::Capabilities::default(); + let mut encode_caps = vk::VideoEncodeCapabilitiesKHR { + p_next: (&raw mut codec_caps).cast(), + ..Default::default() + }; + let mut caps = vk::VideoCapabilitiesKHR { + p_next: (&raw mut encode_caps).cast(), + ..Default::default() + }; + + let get_physical_device_video_capabilities = self + .instance() + .video_queue_instance() + .fp() + .get_physical_device_video_capabilities_khr; + + unsafe { + (get_physical_device_video_capabilities)( + self.physical_device, + &raw const video_profile_info, + &raw mut caps, + ) + .result()?; + } + + Ok((caps, encode_caps, codec_caps)) + } + + pub fn supported_drm_modifier(&self, format: vk::Format) -> Vec { + unsafe { + let mut modifier_list = vk::DrmFormatModifierPropertiesList2EXT::default(); + let mut format_properties = vk::FormatProperties2::default().push(&mut modifier_list); + + self.instance() + .ash() + .get_physical_device_format_properties2( + self.handle(), + format, + &mut format_properties, + ); + + let mut properties = vec![ + vk::DrmFormatModifierProperties2EXT::default(); + modifier_list.drm_format_modifier_count as usize + ]; + + let mut modifier_list = vk::DrmFormatModifierPropertiesList2EXT::default() + .drm_format_modifier_properties(&mut properties); + let mut format_properties = vk::FormatProperties2::default().push(&mut modifier_list); + + self.instance() + .ash() + .get_physical_device_format_properties2( + self.handle(), + format, + &mut format_properties, + ); + + properties + .into_iter() + .map(|props| DrmModifier { + modifier: props.drm_format_modifier, + plane_count: props.drm_format_modifier_plane_count, + tiling_features: props.drm_format_modifier_tiling_features, + }) + .collect() + } + } + + pub unsafe fn to_wgpu(&self, instance: &wgpu::Instance) -> anyhow::Result { + instance + .enumerate_adapters(wgpu::Backends::VULKAN) + .into_iter() + .find(|a| { + let raw = a + .as_hal::() + .unwrap() + .raw_physical_device(); + + self.handle().as_raw() == raw.as_raw() + }) + .context("Failed to find adapter when enumerating vulkan adapters") + } + + pub fn name(&self) -> String { + let mut device_name = self.properties().device_name; + device_name[vk::MAX_PHYSICAL_DEVICE_NAME_SIZE - 1] = 0; // you never know + let device_name = unsafe { CStr::from_ptr(device_name.as_ptr()) }; + device_name.to_string_lossy().into_owned() + } +} + +impl fmt::Debug for PhysicalDevice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let PhysicalDeviceProperties { + api_version, + driver_version, + vendor_id, + device_id, + device_type, + mut device_name, + .. + } = self.properties(); + + let api_version = ( + vk::api_version_major(api_version), + vk::api_version_minor(api_version), + vk::api_version_patch(api_version), + ); + + let driver_version = ( + vk::api_version_major(driver_version), + vk::api_version_minor(driver_version), + vk::api_version_patch(driver_version), + ); + + device_name[vk::MAX_PHYSICAL_DEVICE_NAME_SIZE - 1] = 0; // you never know + let device_name = unsafe { CStr::from_ptr(device_name.as_ptr()) }; + + f.debug_struct("PhysicalDevice") + .field("physical_device", &self.physical_device) + .field("api_version", &api_version) + .field("driver_version", &driver_version) + .field("vendor_id", &vendor_id) + .field("device_id", &device_id) + .field("device_type", &device_type) + .field("device_name", &device_name) + .finish() + } +} diff --git a/media-video/vulkan/src/pipeline.rs b/media-video/vulkan/src/pipeline.rs new file mode 100644 index 00000000..3312d3b0 --- /dev/null +++ b/media-video/vulkan/src/pipeline.rs @@ -0,0 +1,124 @@ +use std::{ffi::CStr, sync::Arc}; + +use crate::{DescriptorSetLayout, Device, ShaderModule, VulkanError}; +use ash::vk; + +#[derive(Debug, Clone)] +pub struct PipelineLayout { + inner: Arc, +} + +#[derive(Debug)] +struct PipelineLayoutInner { + descriptor_set_layout: DescriptorSetLayout, + pipeline_layout: vk::PipelineLayout, +} + +impl PipelineLayout { + pub fn create( + device: &Device, + descriptor_set_layout: &DescriptorSetLayout, + ) -> Result { + let set_layouts = [unsafe { descriptor_set_layout.descriptor_set_layout() }]; + let create_info = vk::PipelineLayoutCreateInfo::default().set_layouts(&set_layouts); + let pipeline_layout = unsafe { device.ash().create_pipeline_layout(&create_info, None)? }; + + Ok(PipelineLayout { + inner: Arc::new(PipelineLayoutInner { + descriptor_set_layout: descriptor_set_layout.clone(), + pipeline_layout, + }), + }) + } + + pub unsafe fn pipeline_layout(&self) -> vk::PipelineLayout { + self.inner.pipeline_layout + } +} + +impl Drop for PipelineLayoutInner { + fn drop(&mut self) { + unsafe { + self.descriptor_set_layout + .device() + .ash() + .destroy_pipeline_layout(self.pipeline_layout, None); + } + } +} + +#[derive(Debug)] +pub struct Pipeline { + _shader_module: ShaderModule, + layout: PipelineLayout, + pipeline: vk::Pipeline, +} + +impl Pipeline { + pub fn create( + device: &Device, + layout: PipelineLayout, + shader_module: ShaderModule, + stage: vk::ShaderStageFlags, + name: &CStr, + num: u32, + ) -> Result, VulkanError> { + let stage_info = vk::PipelineShaderStageCreateInfo::default() + .stage(stage) + .module(unsafe { shader_module.shader_module() }) + .name(name); + + let create_info = vk::ComputePipelineCreateInfo::default() + .stage(stage_info) + .layout(unsafe { layout.pipeline_layout() }); + + let create_result = unsafe { + device.ash().create_compute_pipelines( + vk::PipelineCache::null(), + &vec![create_info; num as usize], + None, + ) + }; + + let pipelines = match create_result { + Ok(pipelines) => pipelines, + Err((pipelines, result)) => { + for pipeline in pipelines { + unsafe { device.ash().destroy_pipeline(pipeline, None) }; + } + + return Err(VulkanError::from(result)); + } + }; + + Ok(pipelines + .into_iter() + .map(|pipeline| Pipeline { + _shader_module: shader_module.clone(), + layout: layout.clone(), + pipeline, + }) + .collect()) + } + + pub unsafe fn pipeline_layout(&self) -> vk::PipelineLayout { + self.layout.pipeline_layout() + } + + pub unsafe fn pipeline(&self) -> vk::Pipeline { + self.pipeline + } +} + +impl Drop for Pipeline { + fn drop(&mut self) { + unsafe { + self.layout + .inner + .descriptor_set_layout + .device() + .ash() + .destroy_pipeline(self.pipeline, None); + } + } +} diff --git a/media-video/vulkan/src/sampler.rs b/media-video/vulkan/src/sampler.rs new file mode 100644 index 00000000..78ed0e4f --- /dev/null +++ b/media-video/vulkan/src/sampler.rs @@ -0,0 +1,34 @@ +use crate::{Device, VulkanError}; +use ash::vk; + +#[derive(Debug)] +pub struct Sampler { + device: Device, + sampler: vk::Sampler, +} + +impl Sampler { + pub unsafe fn create( + device: &Device, + create_info: &vk::SamplerCreateInfo, + ) -> Result { + let sampler = device.ash().create_sampler(create_info, None)?; + + Ok(Sampler { + device: device.clone(), + sampler, + }) + } + + pub unsafe fn sampler(&self) -> vk::Sampler { + self.sampler + } +} + +impl Drop for Sampler { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_sampler(self.sampler, None); + } + } +} diff --git a/media-video/vulkan/src/semaphore.rs b/media-video/vulkan/src/semaphore.rs new file mode 100644 index 00000000..a0bf6511 --- /dev/null +++ b/media-video/vulkan/src/semaphore.rs @@ -0,0 +1,101 @@ +use crate::{Device, VulkanError}; +use ash::vk::{self, TaggedStructure}; +use std::{ + os::fd::{AsRawFd, OwnedFd}, + sync::Arc, +}; + +#[derive(Debug, Clone)] +pub struct Semaphore { + inner: Arc, +} + +#[derive(Debug)] +struct Inner { + device: Device, + handle: vk::Semaphore, +} + +impl Semaphore { + pub fn create(device: &Device) -> Result { + unsafe { + let handle = device + .ash() + .create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?; + + Ok(Semaphore { + inner: Arc::new(Inner { + device: device.clone(), + handle, + }), + }) + } + } + + pub fn create_timeline(device: &Device) -> Result { + if !device.enabled_extensions().timeline_semaphore { + return Err(VulkanError::MissingExtension("timeline_semaphore")); + } + + unsafe { + let mut type_create_info = + vk::SemaphoreTypeCreateInfo::default().semaphore_type(vk::SemaphoreType::TIMELINE); + let create_info = vk::SemaphoreCreateInfo::default().push(&mut type_create_info); + + let handle = device.ash().create_semaphore(&create_info, None)?; + + Ok(Semaphore { + inner: Arc::new(Inner { + device: device.clone(), + handle, + }), + }) + } + } + + pub unsafe fn import_timeline_fd(device: &Device, fd: OwnedFd) -> Result { + if !device.enabled_extensions().timeline_semaphore { + return Err(VulkanError::MissingExtension("timeline_semaphore")); + } + + if !device.enabled_extensions().external_semaphore_fd { + return Err(VulkanError::MissingExtension("external_semaphore_fd")); + } + + let mut type_create_info = + vk::SemaphoreTypeCreateInfo::default().semaphore_type(vk::SemaphoreType::TIMELINE); + let create_info = vk::SemaphoreCreateInfo::default().push(&mut type_create_info); + + let handle = device.ash().create_semaphore(&create_info, None)?; + + let import_semaphore_fd_info = vk::ImportSemaphoreFdInfoKHR::default() + .semaphore(handle) + .handle_type(vk::ExternalSemaphoreHandleTypeFlags::OPAQUE_FD) + .fd(fd.as_raw_fd()); + + ash::khr::external_semaphore_fd::Device::load(device.instance().ash(), device.ash()) + .import_semaphore_fd(&import_semaphore_fd_info)?; + + // Ownership of the fd transferred to the vulkan driver, forget about it + std::mem::forget(fd); + + Ok(Semaphore { + inner: Arc::new(Inner { + device: device.clone(), + handle, + }), + }) + } + + pub unsafe fn handle(&self) -> vk::Semaphore { + self.inner.handle + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_semaphore(self.handle, None); + } + } +} diff --git a/media-video/vulkan/src/shader_module.rs b/media-video/vulkan/src/shader_module.rs new file mode 100644 index 00000000..d9246bf7 --- /dev/null +++ b/media-video/vulkan/src/shader_module.rs @@ -0,0 +1,84 @@ +use crate::{Device, VulkanError}; +use ash::vk; +use naga::{ + back::spv, + front::wgsl, + valid::{Capabilities, ShaderStages, SubgroupOperationSet, ValidationFlags, Validator}, +}; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct ShaderModule { + inner: Arc, +} + +#[derive(Debug)] +struct Inner { + device: Device, + shader_module: vk::ShaderModule, +} + +impl ShaderModule { + pub fn from_spv(device: &Device, spv: &[u32]) -> Result { + unsafe { + let create_info = vk::ShaderModuleCreateInfo::default().code(spv); + + let shader_module = device.ash().create_shader_module(&create_info, None)?; + + Ok(Self { + inner: Arc::new(Inner { + device: device.clone(), + shader_module, + }), + }) + } + } + + pub fn compile_wgsl_to_spv(source: &str) -> Vec { + let module = match wgsl::parse_str(source) { + Ok(module) => module, + Err(e) => { + panic!("{}", e.emit_to_string(source)) + } + }; + + let module_info = match Validator::new(ValidationFlags::all(), Capabilities::all()) + .subgroup_stages(ShaderStages::COMPUTE) + .subgroup_operations(SubgroupOperationSet::all()) + .validate(&module) + { + Ok(module_info) => module_info, + Err(e) => { + panic!("{}", e.emit_to_string(source)); + } + }; + + let mut spv = Vec::new(); + + if let Err(e) = spv::Writer::new(&spv::Options::default()).unwrap().write( + &module, + &module_info, + None, + &None, + &mut spv, + ) { + panic!("{e}") + } + + spv + } + + pub unsafe fn shader_module(&self) -> vk::ShaderModule { + self.inner.shader_module + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + self.device + .ash() + .destroy_shader_module(self.shader_module, None); + } + } +} diff --git a/media-video/vulkan/src/video_feedback_query_pool.rs b/media-video/vulkan/src/video_feedback_query_pool.rs new file mode 100644 index 00000000..3aefb314 --- /dev/null +++ b/media-video/vulkan/src/video_feedback_query_pool.rs @@ -0,0 +1,102 @@ +use crate::{Device, VulkanError}; +use ash::vk; + +#[derive(Debug)] +pub(crate) struct VideoFeedbackQueryPool { + device: Device, + query_pool: vk::QueryPool, +} + +impl VideoFeedbackQueryPool { + pub(crate) fn create( + device: &Device, + query_count: u32, + video_profile_info: &vk::VideoProfileInfoKHR<'_>, + ) -> Result { + unsafe { + let mut query_pool_video_encode_feedback_create_info = + vk::QueryPoolVideoEncodeFeedbackCreateInfoKHR::default().encode_feedback_flags( + vk::VideoEncodeFeedbackFlagsKHR::BITSTREAM_BYTES_WRITTEN + | vk::VideoEncodeFeedbackFlagsKHR::BITSTREAM_BUFFER_OFFSET, + ); + + let mut query_create_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::VIDEO_ENCODE_FEEDBACK_KHR) + .query_count(query_count); + + query_pool_video_encode_feedback_create_info.p_next = + (video_profile_info as *const vk::VideoProfileInfoKHR<'_>).cast(); + query_create_info.p_next = + (&raw const query_pool_video_encode_feedback_create_info).cast(); + + let query_pool = device.ash().create_query_pool(&query_create_info, None)?; + + Ok(Self { + device: device.clone(), + query_pool, + }) + } + } + + pub(crate) unsafe fn get_bytes_written(&mut self, index: u32) -> Result { + let mut feedback = [EncodeFeedback { + offset: 0, + bytes_written: 0, + status: vk::QueryResultStatusKHR::NOT_READY, + }]; + + self.device.ash().get_query_pool_results( + self.query_pool, + index, + &mut feedback, + vk::QueryResultFlags::WITH_STATUS_KHR | vk::QueryResultFlags::WAIT, + )?; + + let [feedback] = feedback; + + if feedback.status != vk::QueryResultStatusKHR::COMPLETE { + return Err(VulkanError::QueryFailed { + status: feedback.status, + }); + } + + Ok(feedback.bytes_written) + } + + pub(crate) unsafe fn cmd_reset_query(&mut self, command_buffer: vk::CommandBuffer, index: u32) { + self.device + .ash() + .cmd_reset_query_pool(command_buffer, self.query_pool, index, 1); + } + + pub(crate) unsafe fn cmd_begin_query(&mut self, command_buffer: vk::CommandBuffer, index: u32) { + self.device.ash().cmd_begin_query( + command_buffer, + self.query_pool, + index, + vk::QueryControlFlags::empty(), + ); + } + + pub(crate) unsafe fn cmd_end_query(&mut self, command_buffer: vk::CommandBuffer, index: u32) { + self.device + .ash() + .cmd_end_query(command_buffer, self.query_pool, index); + } +} + +impl Drop for VideoFeedbackQueryPool { + fn drop(&mut self) { + unsafe { + self.device.ash().destroy_query_pool(self.query_pool, None); + } + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +struct EncodeFeedback { + offset: u32, + bytes_written: u32, + status: vk::QueryResultStatusKHR, +} diff --git a/media-video/vulkan/src/video_session.rs b/media-video/vulkan/src/video_session.rs new file mode 100644 index 00000000..00b85dc2 --- /dev/null +++ b/media-video/vulkan/src/video_session.rs @@ -0,0 +1,140 @@ +use crate::{Device, VulkanError}; +use ash::vk; +use std::{ + ptr::{null, null_mut}, + sync::Arc, +}; + +#[derive(Debug, Clone)] +pub(crate) struct VideoSession { + inner: Arc, +} + +#[derive(Debug)] +struct Inner { + device: Device, + video_session: vk::VideoSessionKHR, + video_session_memory: Vec, +} + +impl VideoSession { + pub(crate) unsafe fn create( + device: &Device, + create_info: &vk::VideoSessionCreateInfoKHR, + ) -> Result { + let create_video_session = device + .ash_video_queue_device() + .fp() + .create_video_session_khr; + let get_video_session_memory_requirements = device + .ash_video_queue_device() + .fp() + .get_video_session_memory_requirements_khr; + let bind_video_session_memory = device + .ash_video_queue_device() + .fp() + .bind_video_session_memory_khr; + + let mut video_session = vk::VideoSessionKHR::null(); + (create_video_session)( + device.ash().handle(), + &raw const *create_info, + null(), + &raw mut video_session, + ) + .result()?; + + let mut len = 0; + (get_video_session_memory_requirements)( + device.ash().handle(), + video_session, + &raw mut len, + null_mut(), + ) + .result()?; + + let mut video_session_memory_requirements = + vec![vk::VideoSessionMemoryRequirementsKHR::default(); len as usize]; + + (get_video_session_memory_requirements)( + device.ash().handle(), + video_session, + &raw mut len, + video_session_memory_requirements.as_mut_ptr(), + ) + .result()?; + + let mut bind_session_memory_infos = vec![]; + let mut video_session_memory = vec![]; + + for video_session_memory_requirement in video_session_memory_requirements { + let memory_type_index = device.find_memory_type( + video_session_memory_requirement + .memory_requirements + .memory_type_bits, + vk::MemoryPropertyFlags::empty(), + )?; + + let allocate_info = vk::MemoryAllocateInfo::default() + .memory_type_index(memory_type_index) + .allocation_size(video_session_memory_requirement.memory_requirements.size); + + let memory = device.ash().allocate_memory(&allocate_info, None)?; + + let bind_session_memory_info = vk::BindVideoSessionMemoryInfoKHR::default() + .memory(memory) + .memory_bind_index(video_session_memory_requirement.memory_bind_index) + .memory_size(video_session_memory_requirement.memory_requirements.size); + + video_session_memory.push(memory); + bind_session_memory_infos.push(bind_session_memory_info); + } + + bind_video_session_memory( + device.ash().handle(), + video_session, + len, + bind_session_memory_infos.as_ptr(), + ) + .result()?; + + let memory = bind_session_memory_infos + .into_iter() + .map(|info| info.memory) + .collect(); + + Ok(Self { + inner: Arc::new(Inner { + device: device.clone(), + video_session, + video_session_memory: memory, + }), + }) + } + + pub(crate) fn device(&self) -> &Device { + &self.inner.device + } + + pub(crate) unsafe fn video_session(&self) -> vk::VideoSessionKHR { + self.inner.video_session + } +} + +impl Drop for Inner { + fn drop(&mut self) { + unsafe { + let destroy_video_session = self + .device + .ash_video_queue_device() + .fp() + .destroy_video_session_khr; + + (destroy_video_session)(self.device.ash().handle(), self.video_session, null()); + + for memory in &self.video_session_memory { + self.device.ash().free_memory(*memory, None); + } + } + } +} diff --git a/media-video/vulkan/src/video_session_parameters.rs b/media-video/vulkan/src/video_session_parameters.rs new file mode 100644 index 00000000..81d059fa --- /dev/null +++ b/media-video/vulkan/src/video_session_parameters.rs @@ -0,0 +1,181 @@ +use crate::{VideoSession, VulkanError, encoder::codec::VulkanEncCodec}; +use ash::vk::{self, TaggedStructure}; +use std::ptr::{null, null_mut}; + +#[derive(Debug)] +pub(crate) struct VideoSessionParameters { + video_session: VideoSession, + update_count: u32, + video_session_parameters: vk::VideoSessionParametersKHR, +} + +impl VideoSessionParameters { + pub(crate) fn create( + video_session: &VideoSession, + parameters: &C::ParametersCreateInfo<'_>, + ) -> Result { + let device = video_session.device(); + + let mut create_info = vk::VideoSessionParametersCreateInfoKHR::default() + .video_session(unsafe { video_session.video_session() }); + create_info.p_next = (parameters as *const C::ParametersCreateInfo<'_>).cast(); + + let mut video_session_parameters = vk::VideoSessionParametersKHR::null(); + + let create_video_session_parameters = device + .ash_video_queue_device() + .fp() + .create_video_session_parameters_khr; + + unsafe { + (create_video_session_parameters)( + device.ash().handle(), + &raw const create_info, + null_mut(), + &raw mut video_session_parameters, + ) + .result()?; + } + + Ok(Self { + video_session: video_session.clone(), + update_count: 0, + video_session_parameters, + }) + } + + pub(crate) fn update<'a, P>(&mut self, parameters: &'a mut P) -> Result<(), vk::Result> + where + P: vk::Extends> + vk::TaggedStructure<'a>, + { + self.update_count += 1; + + let device = self.video_session().device(); + + let update_info = vk::VideoSessionParametersUpdateInfoKHR::default() + .update_sequence_count(self.update_count) + .push(parameters); + + let update_video_session_parameters = device + .ash_video_queue_device() + .fp() + .update_video_session_parameters_khr; + + unsafe { + update_video_session_parameters( + device.ash().handle(), + self.video_session_parameters, + &raw const update_info, + ) + .result() + } + } + + pub(crate) unsafe fn get_encoded_video_session_parameters<'a, T>( + &self, + ext: Option<&'a mut T>, + ) -> Result, VulkanError> + where + T: vk::TaggedStructure<'a>, + T: vk::Extends>, + { + let device = self.video_session.device(); + + let mut session_parameters_info = vk::VideoEncodeSessionParametersGetInfoKHR::default() + .video_session_parameters(self.video_session_parameters); + + if let Some(ext) = ext { + session_parameters_info = session_parameters_info.push(ext); + }; + + let get_encoded_video_session_parameters = device + .ash_video_encode_queue_device() + .fp() + .get_encoded_video_session_parameters_khr; + + let mut len = 0; + (get_encoded_video_session_parameters)( + device.ash().handle(), + &session_parameters_info, + null_mut(), + &raw mut len, + null_mut(), + ) + .result()?; + + let mut buf = vec![0u8; len]; + (get_encoded_video_session_parameters)( + device.ash().handle(), + &session_parameters_info, + null_mut(), + &raw mut len, + buf.as_mut_ptr().cast(), + ) + .result()?; + + Ok(buf) + } + + pub(crate) unsafe fn get_encoded_video_session_parameters2( + &self, + ) -> Result, VulkanError> { + let device = self.video_session.device(); + + let session_parameters_info = vk::VideoEncodeSessionParametersGetInfoKHR::default() + .video_session_parameters(self.video_session_parameters); + + let get_encoded_video_session_parameters = device + .ash_video_encode_queue_device() + .fp() + .get_encoded_video_session_parameters_khr; + + let mut len = 0; + (get_encoded_video_session_parameters)( + device.ash().handle(), + &session_parameters_info, + null_mut(), + &raw mut len, + null_mut(), + ) + .result()?; + + let mut buf = vec![0u8; len]; + (get_encoded_video_session_parameters)( + device.ash().handle(), + &session_parameters_info, + null_mut(), + &raw mut len, + buf.as_mut_ptr().cast(), + ) + .result()?; + + Ok(buf) + } + + pub(crate) fn video_session(&self) -> &VideoSession { + &self.video_session + } + + pub(crate) unsafe fn video_session_parameters(&self) -> vk::VideoSessionParametersKHR { + self.video_session_parameters + } +} + +impl Drop for VideoSessionParameters { + fn drop(&mut self) { + let device = self.video_session.device(); + + unsafe { + let destroy_video_session_parameters_khr = device + .ash_video_queue_device() + .fp() + .destroy_video_session_parameters_khr; + + destroy_video_session_parameters_khr( + device.ash().handle(), + self.video_session_parameters, + null(), + ); + } + } +} diff --git a/media/h264/Cargo.toml b/media/h264/Cargo.toml deleted file mode 100644 index 528d77d4..00000000 --- a/media/h264/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "ezk-h264" -version = "0.1.0" -authors.workspace = true -edition.workspace = true -license.workspace = true -repository.workspace = true - -[dependencies] -bytes = "1.10.0" -thiserror = "2.0.11" - -openh264 = { version = "0.8", optional = true } -openh264-sys2 = { version = "0.8", optional = true } - -[features] -default = ["openh264"] -openh264 = ["dep:openh264", "dep:openh264-sys2"] - -[lints] -workspace = true diff --git a/media/h264/src/lib.rs b/media/h264/src/lib.rs deleted file mode 100644 index a47d703f..00000000 --- a/media/h264/src/lib.rs +++ /dev/null @@ -1,532 +0,0 @@ -//! H.264 tools for use with SDP & RTP - -use profile_level_id::{ParseProfileLevelIdError, ProfileLevelId}; -use std::{fmt, num::ParseIntError, str::FromStr}; - -#[cfg(feature = "openh264")] -pub mod openh264; -mod payload; -pub mod profile_level_id; - -pub use payload::{ - H264DePayloadError, H264DePayloader, H264DePayloaderOutputFormat, H264Payloader, -}; - -/// Generic H.264 encoder config -pub struct H264EncoderConfig { - /// H.264 encoding profile to use. Defines the feature-set the encoder may use. - pub profile: Profile, - - /// H264 encoding level. Defines default constraints like frame size, fps and more. - pub level: Level, - - /// width & height of the image to be encoded. - /// - /// This value is only used for the initialization and should represent to largest allowed resolution. - /// Some encoders will not be able to handle larger resolutions later without being reinitialized. - pub resolution: (u32, u32), - - /// Define the range of QP values the encoder is allowed use. - /// - /// Allowed values range from 0 to 51, where 0 is the best quality and 51 the worst with the most compression. - /// - /// Default is (17..=28) but manual tuning is recommended! - pub qp: Option<(u32, u32)>, - - /// Keyframe interval in frames. - pub gop: Option, - - /// Target bitrate in bits/s - pub bitrate: Option, - - /// Override the level's maximum bitrate in bits/s - pub max_bitrate: Option, - - /// Limit the output slice size. - /// - /// Required if the packetization mode is SingleNAL which doesn't support fragmentation units. - pub max_slice_len: Option, -} - -impl H264EncoderConfig { - /// Create a encoder config from the peer's H.264 decoder capabilities, communicated through SDP's fmtp attribute - pub fn from_fmtp(fmtp: FmtpOptions, mtu: usize) -> Self { - Self { - profile: fmtp.profile_level_id.profile, - level: fmtp.profile_level_id.level, - resolution: fmtp.max_resolution(1, 1), - qp: None, - gop: None, - bitrate: None, - max_bitrate: Some(fmtp.max_bitrate()), - max_slice_len: { - match fmtp.packetization_mode { - PacketizationMode::SingleNAL => Some(mtu), - PacketizationMode::NonInterleavedMode | PacketizationMode::InterleavedMode => { - None - } - } - }, - } - } -} - -/// Specifies the RTP packetization mode -#[derive(Default, Debug, Clone, Copy, PartialEq, PartialOrd)] -pub enum PacketizationMode { - /// Each RTP packet contains exactly one H.264 NAL unit. - /// This mode is the default and best suited for low latency applications like video conferencing - /// - /// Encoders must have their NAL unit size limited to the MTU. - #[default] - SingleNAL = 0, - - /// Multiple NAL units can be combined into a single RTP packet. - /// - /// Uses fragmentation units (FU-A) to split large NAL units across multiple RTP packets - NonInterleavedMode = 1, - - /// NAL units can be transmitted out of order and reassembled at the receiver. - /// This mode is designed for environments with higher packet loss and jitter, providing better error resilience. - /// - /// Uses Fragmentation Units (FU-A and FU-B) and Aggregation Packets (STAP-B and MTAP) to manage NAL units. - InterleavedMode = 2, -} - -/// H.264 specific format parameters used in SDP negotiation -#[derive(Debug, Default)] -pub struct FmtpOptions { - /// Indicates the profile and level used for encoding the video stream - pub profile_level_id: ProfileLevelId, - /// Whether level asymmetry, i.e., sending media encoded at a - /// different level in the offerer-to-answerer direction than the - /// level in the answerer-to-offerer direction, is allowed - pub level_asymmetry_allowed: bool, - /// RTP packetization mode - pub packetization_mode: PacketizationMode, - /// Maximum macroblock processing rate in macroblocks per second - pub max_mbps: Option, - /// Maximum frame size in macroblocks - pub max_fs: Option, - /// Maximum codec picture buffer size - pub max_cbp: Option, - /// Maximum decoded picture buffer size in frames - pub max_dpb: Option, - /// Maximum video bitrate in kilobits per second - pub max_br: Option, - /// Whether redundant pictures are present in the stream - pub redundant_pic_cap: bool, -} - -impl FmtpOptions { - /// Returns the maximum resolution for the given aspect ration - pub fn max_resolution(&self, num: u32, denom: u32) -> (u32, u32) { - let max_fs = self - .max_fs - .unwrap_or_else(|| self.profile_level_id.level.max_fs()); - - resolution_from_max_fs(num, denom, max_fs) - } - - /// Returns the maximum resolution with the given fps and aspect ratio num/denom - pub fn max_resolution_for_fps(&self, num: u32, denom: u32, fps: u32) -> (u32, u32) { - let max_mbps = self - .max_mbps - .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); - - let max_fs = max_mbps / fps.max(1); - - resolution_from_max_fs(num, denom, max_fs) - } - - /// Returns the maximum supported FPS using the maximum supported resolution - pub fn max_fps_for_max_resolution(&self) -> u32 { - let max_fs = self - .max_fs - .unwrap_or_else(|| self.profile_level_id.level.max_fs()); - - let max_mbps = self - .max_mbps - .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); - - max_mbps / max_fs.max(1) - } - - /// Returns the maximum supported FPS for the given resolution - pub fn max_fps_for_resolution(&self, width: u32, height: u32) -> u32 { - let max_mbps = self - .max_mbps - .unwrap_or_else(|| self.profile_level_id.level.max_mbps()); - - let frame_size = (width * height) / 256; - - max_mbps / frame_size.max(1) - } - - /// Returns the maximum bitrate in bit/s - pub fn max_bitrate(&self) -> u32 { - self.max_br - .unwrap_or_else(|| self.profile_level_id.level.max_br()) - .saturating_mul(1000) - } -} - -fn resolution_from_max_fs(num: u32, denom: u32, max_fs: u32) -> (u32, u32) { - const MAX_FS_BOUND: u32 = 0x7FFFFF; - - fn greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { - while b != 0 { - let tmp = b; - b = a % b; - a = tmp; - } - - a - } - - // Limit max FS to avoid integer overflows - let max_fs = max_fs.min(MAX_FS_BOUND); - let max_pixels = max_fs.saturating_mul(256); - let divisor = greatest_common_divisor(num.max(1), denom.max(1)); - let num = num / divisor; - let denom = denom / divisor; - - // Search for the best resolution by testing them all - for i in 1.. { - let width = num * i; - let height = denom * i; - - if width * height > max_pixels { - let width = num * (i - 1); - let height = denom * (i - 1); - return (width, height); - } - } - - unreachable!() -} - -/// Failed to parse H.264 fmtp line -#[derive(Debug, thiserror::Error)] -pub enum ParseFmtpOptionsError { - #[error(transparent)] - InvalidProfileId(#[from] ParseProfileLevelIdError), - #[error("encountered non integer value {0}")] - InvalidValue(#[from] ParseIntError), -} - -impl FromStr for FmtpOptions { - type Err = ParseFmtpOptionsError; - - fn from_str(s: &str) -> Result { - let mut options = Self::default(); - - fn parse_u32(i: &str) -> Result { - Ok(i.parse::()?.clamp(1, 8_388_607)) - } - - for (key, value) in s.split(';').filter_map(|e| e.split_once('=')) { - let value = value.trim(); - match key { - "profile-level-id" => options.profile_level_id = value.parse()?, - "level-asymmetry-allowed" => options.level_asymmetry_allowed = value == "1", - "packetization-mode" => { - options.packetization_mode = match value { - "0" => PacketizationMode::SingleNAL, - "1" => PacketizationMode::NonInterleavedMode, - "2" => PacketizationMode::InterleavedMode, - _ => continue, - }; - } - "max-mbps" => options.max_mbps = Some(parse_u32(value)?), - "max-fs" => options.max_fs = Some(parse_u32(value)?), - "max-cbp" => options.max_cbp = Some(parse_u32(value)?), - "max-dpb" => options.max_dpb = Some(parse_u32(value)?), - "max-br" => options.max_br = Some(parse_u32(value)?), - "redundant-pic-cap" => options.redundant_pic_cap = value == "1", - _ => continue, - } - } - - Ok(options) - } -} - -impl fmt::Display for FmtpOptions { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - profile_level_id, - level_asymmetry_allowed, - packetization_mode, - max_mbps, - max_fs, - max_cbp, - max_dpb, - max_br, - redundant_pic_cap, - } = self; - - write!(f, "profile-level-id={profile_level_id}")?; - - if *level_asymmetry_allowed { - write!(f, ";level-asymmetry-allowed=1")?; - } - - write!(f, ";packetization-mode={}", *packetization_mode as u8)?; - - if let Some(max_mbps) = max_mbps { - write!(f, ";max-mbps={max_mbps}")?; - } - - if let Some(max_fs) = max_fs { - write!(f, ";max-fs={max_fs}")?; - } - - if let Some(max_cbp) = max_cbp { - write!(f, ";max-cbp={max_cbp}")?; - } - - if let Some(max_dpb) = max_dpb { - write!(f, ";max-dbp={max_dpb}")?; - } - - if let Some(max_br) = max_br { - write!(f, ";max-br={max_br}")?; - } - - if *redundant_pic_cap { - write!(f, ";redundant-pic-cap=1")?; - } - - Ok(()) - } -} - -/// H.264 encoding profile -#[derive(Debug, Clone, Copy)] -pub enum Profile { - Baseline, - ConstrainedBaseline, - Main, - Extended, - High, - High10, - High422, - High444Predictive, - High10Intra, - High422Intra, - High444Intra, - CAVLC444Intra, -} - -impl Profile { - pub fn profile_idc(self) -> u8 { - match self { - Profile::Baseline | Profile::ConstrainedBaseline => 66, - Profile::Main => 77, - Profile::Extended => 88, - Profile::High => 100, - Profile::High10 | Profile::High10Intra => 110, - Profile::High422 | Profile::High422Intra => 122, - Profile::High444Predictive | Profile::High444Intra => 244, - Profile::CAVLC444Intra => 44, - } - } - - pub fn profile_iop(self) -> u8 { - use profile_level_id::profile_iop_consts::*; - - match self { - Profile::Baseline => 0, - Profile::ConstrainedBaseline => CONSTRAINT_SET1_FLAG, - Profile::Main => 0, - Profile::Extended => 0, - Profile::High => 0, - Profile::High10 => 0, - Profile::High422 => 0, - Profile::High444Predictive => 0, - Profile::High10Intra => CONSTRAINT_SET3_FLAG, - Profile::High422Intra => CONSTRAINT_SET3_FLAG, - Profile::High444Intra => CONSTRAINT_SET3_FLAG, - Profile::CAVLC444Intra => 0, - } - } -} - -/// H.264 encoding levels with their corresponding capabilities. -#[derive(Debug, Clone, Copy)] -#[allow(non_camel_case_types)] -pub enum Level { - /// Level 1.0: Max resolution 176x144 (QCIF), 15 fps, 64 kbps (Main), 80 kbps (High) - Level_1_0, - /// Level 1.B: Specialized low-complexity baseline level. - Level_1_B, - /// Level 1.1: Max resolution 176x144 (QCIF), 30 fps, 192 kbps (Main), 240 kbps (High) - Level_1_1, - /// Level 1.2: Max resolution 320x240 (QVGA), 30 fps, 384 kbps (Main), 480 kbps (High) - Level_1_2, - /// Level 1.3: Reserved in standard, similar to Level 2.0. - Level_1_3, - - /// Level 2.0: Max resolution 352x288 (CIF), 30 fps, 2 Mbps (Main), 2.5 Mbps (High) - Level_2_0, - /// Level 2.1: Max resolution 352x288 (CIF), 30 fps, 4 Mbps (Main), 5 Mbps (High) - Level_2_1, - /// Level 2.2: Max resolution 352x288 (CIF), 30 fps, 10 Mbps (Main), 12.5 Mbps (High) - Level_2_2, - - /// Level 3.0: Max resolution 720x576 (SD), 30 fps, 10 Mbps (Main), 12.5 Mbps (High) - Level_3_0, - /// Level 3.1: Max resolution 1280x720 (HD), 30 fps, 14 Mbps (Main), 17.5 Mbps (High) - Level_3_1, - /// Level 3.2: Max resolution 1280x720 (HD), 60 fps, 20 Mbps (Main), 25 Mbps (High) - Level_3_2, - - /// Level 4.0: Max resolution 1920x1080 (Full HD), 30 fps, 20 Mbps (Main), 25 Mbps (High) - Level_4_0, - /// Level 4.1: Max resolution 1920x1080 (Full HD), 60 fps, 50 Mbps (Main), 62.5 Mbps (High) - Level_4_1, - /// Level 4.2: Max resolution 1920x1080 (Full HD), 120 fps, 100 Mbps (Main), 125 Mbps (High) - Level_4_2, - - /// Level 5.0: Max resolution 3840x2160 (4K), 30 fps, 135 Mbps (Main), 168.75 Mbps (High) - Level_5_0, - /// Level 5.1: Max resolution 3840x2160 (4K), 60 fps, 240 Mbps (Main), 300 Mbps (High) - Level_5_1, - /// Level 5.2: Max resolution 4096x2160 (4K Cinema), 60 fps, 480 Mbps (Main), 600 Mbps (High) - Level_5_2, - - /// Level 6.0: Max resolution 8192x4320 (8K UHD), 30 fps, 240 Mbps (Main), 240 Mbps (High) - Level_6_0, - /// Level 6.1: Max resolution 8192x4320 (8K UHD), 60 fps, 480 Mbps (Main), 480 Mbps (High) - Level_6_1, - /// Level 6.2: Max resolution 8192x4320 (8K UHD), 120 fps, 800 Mbps (Main), 800 Mbps (High) - Level_6_2, -} - -impl Level { - /// Returns the level idc as specified in H.264 for this level - /// - /// Note that level 1.1 & 1.b have the same value - pub fn level_idc(self) -> u8 { - match self { - Level::Level_1_0 => 10, - Level::Level_1_B => 11, - Level::Level_1_1 => 11, - Level::Level_1_2 => 12, - Level::Level_1_3 => 13, - Level::Level_2_0 => 20, - Level::Level_2_1 => 21, - Level::Level_2_2 => 22, - Level::Level_3_0 => 30, - Level::Level_3_1 => 31, - Level::Level_3_2 => 32, - Level::Level_4_0 => 40, - Level::Level_4_1 => 41, - Level::Level_4_2 => 42, - Level::Level_5_0 => 50, - Level::Level_5_1 => 51, - Level::Level_5_2 => 52, - Level::Level_6_0 => 60, - Level::Level_6_1 => 61, - Level::Level_6_2 => 62, - } - } - - fn max_mbps(self) -> u32 { - self.limits().0 - } - - fn max_fs(self) -> u32 { - self.limits().1 - } - - fn max_br(self) -> u32 { - self.limits().3 - } - - /// ITU-T H.264 Table A-1 Level Limits - /// - /// 0 - Max macroblock processing rate MaxMBPS (MB/s) - /// 1 - Max frame size MaxFS (MBs) - /// 2 - Max decoded picture buffer size MaxDpbMbs (MBs) - /// 3 - Max video bit rate MaxBR (1000 bits/s, 1200 bits/s, cpbBrVclFactor bits/s, or cpbBrNalFactor bits/s) - /// 4 - Max CPB size MaxCPB (1000 bits, 1200 bits, cpbBrVclFactor bits, or cpbBrNalFactor bits) - /// 5 - Vertical MV component limit MaxVmvR (luma frame samples) - /// 6 - Min compression ratio MinCR - /// 7 - Max number of motion vectors per two consecutive MBs MaxMvsPer2Mb - fn limits(self) -> (u32, u32, u32, u32, u32, u32, u32, Option) { - match self { - Level::Level_1_0 => (1485, 99, 396, 64, 175, 64, 2, None), - Level::Level_1_B => (1485, 99, 396, 128, 350, 64, 2, None), - Level::Level_1_1 => (3000, 396, 900, 192, 500, 128, 2, None), - Level::Level_1_2 => (6000, 396, 2376, 384, 1000, 128, 2, None), - Level::Level_1_3 => (11880, 396, 2376, 768, 2000, 128, 2, None), - Level::Level_2_0 => (11880, 396, 2376, 2000, 2000, 128, 2, None), - Level::Level_2_1 => (19800, 792, 4752, 4000, 4000, 256, 2, None), - Level::Level_2_2 => (20250, 1620, 8100, 4000, 4000, 256, 2, None), - Level::Level_3_0 => (40500, 1620, 8100, 10000, 10000, 256, 2, Some(32)), - Level::Level_3_1 => (108000, 3600, 18000, 14000, 14000, 512, 4, Some(16)), - Level::Level_3_2 => (216000, 5120, 20480, 20000, 20000, 512, 4, Some(16)), - Level::Level_4_0 => (245760, 8192, 32768, 20000, 25000, 512, 4, Some(16)), - Level::Level_4_1 => (245760, 8192, 32768, 50000, 62500, 512, 2, Some(16)), - Level::Level_4_2 => (522240, 8704, 34816, 50000, 62500, 512, 2, Some(16)), - Level::Level_5_0 => (589824, 22080, 110400, 135000, 135000, 512, 2, Some(16)), - Level::Level_5_1 => (983040, 36864, 184320, 240000, 240000, 512, 2, Some(16)), - Level::Level_5_2 => (2073600, 36864, 184320, 240000, 240000, 512, 2, Some(16)), - Level::Level_6_0 => (4177920, 139264, 696320, 240000, 240000, 8192, 2, Some(16)), - Level::Level_6_1 => (8355840, 139264, 696320, 480000, 480000, 8192, 2, Some(16)), - Level::Level_6_2 => (16711680, 139264, 696320, 800000, 800000, 8192, 2, Some(16)), - } - } -} - -#[test] -fn no_panics() { - let fmtp = FmtpOptions { - profile_level_id: ProfileLevelId::default(), - level_asymmetry_allowed: true, - packetization_mode: PacketizationMode::SingleNAL, - max_mbps: Some(u32::MAX), - max_fs: Some(u32::MAX), - max_cbp: Some(u32::MAX), - max_dpb: Some(u32::MAX), - max_br: Some(u32::MAX), - redundant_pic_cap: false, - }; - - for i in 1..100 { - for j in 1..100 { - println!("{:?}", fmtp.max_resolution(i, j)); - } - } - println!("{:?}", fmtp.max_resolution_for_fps(16, 9, 30)); - println!("{:?}", fmtp.max_fps_for_max_resolution()); - println!("{:?}", fmtp.max_fps_for_resolution(1920, 1080)); - println!("{:?}", fmtp.max_bitrate()); -} - -#[test] -fn no_divide_by_zero() { - let fmtp = FmtpOptions { - profile_level_id: ProfileLevelId::default(), - level_asymmetry_allowed: true, - packetization_mode: PacketizationMode::SingleNAL, - max_mbps: Some(0), - max_fs: Some(0), - max_cbp: Some(0), - max_dpb: Some(0), - max_br: Some(0), - redundant_pic_cap: false, - }; - - for i in 1..100 { - for j in 1..100 { - println!("{:?}", fmtp.max_resolution(i, j)); - } - } - println!("{:?}", fmtp.max_resolution_for_fps(16, 9, 30)); - println!("{:?}", fmtp.max_fps_for_max_resolution()); - println!("{:?}", fmtp.max_fps_for_resolution(1920, 1080)); - println!("{:?}", fmtp.max_bitrate()); -} diff --git a/media/h264/src/openh264.rs b/media/h264/src/openh264.rs deleted file mode 100644 index 93b9f68c..00000000 --- a/media/h264/src/openh264.rs +++ /dev/null @@ -1,112 +0,0 @@ -//! Utility functions for openh264 - -use crate::{ - FmtpOptions, H264EncoderConfig, Level, PacketizationMode, Profile, - profile_level_id::ProfileLevelId, -}; -use openh264::encoder::{BitRate, IntraFramePeriod, QpRange}; -use openh264_sys2::API as _; -use std::mem::MaybeUninit; - -fn map_profile(profile: Profile) -> openh264::encoder::Profile { - use Profile::*; - - match profile { - ConstrainedBaseline | Baseline => openh264::encoder::Profile::Baseline, - Main => openh264::encoder::Profile::Main, - Extended => openh264::encoder::Profile::Extended, - High => openh264::encoder::Profile::High, - High10 | High10Intra => openh264::encoder::Profile::High10, - High422 | High422Intra => openh264::encoder::Profile::High422, - High444Predictive | High444Intra => openh264::encoder::Profile::High444, - CAVLC444Intra => openh264::encoder::Profile::CAVLC444, - } -} - -fn map_level(level: Level) -> openh264::encoder::Level { - match level { - Level::Level_1_0 => openh264::encoder::Level::Level_1_0, - Level::Level_1_B => openh264::encoder::Level::Level_1_B, - Level::Level_1_1 => openh264::encoder::Level::Level_1_1, - Level::Level_1_2 => openh264::encoder::Level::Level_1_2, - Level::Level_1_3 => openh264::encoder::Level::Level_1_3, - Level::Level_2_0 => openh264::encoder::Level::Level_2_0, - Level::Level_2_1 => openh264::encoder::Level::Level_2_1, - Level::Level_2_2 => openh264::encoder::Level::Level_2_2, - Level::Level_3_0 => openh264::encoder::Level::Level_3_0, - Level::Level_3_1 => openh264::encoder::Level::Level_3_1, - Level::Level_3_2 => openh264::encoder::Level::Level_3_2, - Level::Level_4_0 => openh264::encoder::Level::Level_4_0, - Level::Level_4_1 => openh264::encoder::Level::Level_4_1, - Level::Level_4_2 => openh264::encoder::Level::Level_4_2, - Level::Level_5_0 => openh264::encoder::Level::Level_5_0, - Level::Level_5_1 => openh264::encoder::Level::Level_5_1, - Level::Level_5_2 => openh264::encoder::Level::Level_5_2, - // Level 6+ is not supported by openh264 - use 5.2 - Level::Level_6_0 => openh264::encoder::Level::Level_5_2, - Level::Level_6_1 => openh264::encoder::Level::Level_5_2, - Level::Level_6_2 => openh264::encoder::Level::Level_5_2, - } -} - -/// Create a openh264 encoder config from the parsed [`FmtpOptions`] -pub fn openh264_encoder_config(c: H264EncoderConfig) -> openh264::encoder::EncoderConfig { - let mut config = openh264::encoder::EncoderConfig::new() - .profile(map_profile(c.profile)) - .level(map_level(c.level)); - - if let Some((qmin, qmax)) = c.qp { - config = config.qp(QpRange::new( - qmin.try_into().expect("qmin must be 0..=51"), - qmax.try_into().expect("qmax must be 0..=51"), - )); - } - - if let Some(gop) = c.gop { - config = config.intra_frame_period(IntraFramePeriod::from_num_frames(gop)); - } - - if let Some(bitrate) = c.bitrate { - config = config.bitrate(BitRate::from_bps(bitrate)) - } - - if let Some(max_slice_len) = c.max_slice_len { - config = config.max_slice_len(max_slice_len as u32); - } - - config -} - -/// Create [`FmtpOptions`] from openh264's decoder capabilities. -/// -/// Should be used when offering to receive H.264 in a SDP negotiation. -pub fn openh264_decoder_fmtp(api: &openh264::OpenH264API) -> FmtpOptions { - let capability = unsafe { - let mut capability = MaybeUninit::uninit(); - - assert_eq!( - api.WelsGetDecoderCapability(capability.as_mut_ptr()), - 0, - "openh264 WelsGetDecoderCapability failed" - ); - - capability.assume_init() - }; - - FmtpOptions { - profile_level_id: ProfileLevelId::from_bytes( - capability.iProfileIdc as u8, - capability.iProfileIop as u8, - capability.iLevelIdc as u8, - ) - .expect("openh264 should not return unknown capabilities"), - level_asymmetry_allowed: true, - packetization_mode: PacketizationMode::NonInterleavedMode, - max_mbps: Some(capability.iMaxMbps as u32), - max_fs: Some(capability.iMaxFs as u32), - max_cbp: Some(capability.iMaxCpb as u32), - max_dpb: Some(capability.iMaxDpb as u32), - max_br: Some(capability.iMaxBr as u32), - redundant_pic_cap: capability.bRedPicCap, - } -} diff --git a/media/rtc/src/tokio/mod.rs b/media/rtc/src/tokio/mod.rs index 2386364a..dcf419be 100644 --- a/media/rtc/src/tokio/mod.rs +++ b/media/rtc/src/tokio/mod.rs @@ -192,13 +192,11 @@ impl TokioIoState { return Poll::Pending; } - // Polled without receiving data, so poll session once - if !received { - session.poll(now); + if received { + self.update_sleep(session, now, true); } - // Sleep must be updated after polling or receiving data - let mut update_sleep = true; + let mut polled = false; // Poll sleep until it returns pending, to register the sleep with the context while let Some(sleep) = &mut self.sleep @@ -206,13 +204,16 @@ impl TokioIoState { { session.poll(now); - self.update_sleep(session, now); + self.update_sleep(session, now, false); - update_sleep = false; + polled = true; } - if update_sleep { - self.update_sleep(session, now); + // When nothing was received, and sleep also didn't cause a poll, poll once anyway + // since this migth be the first poll after handling a session event + if !received && !polled { + session.poll(now); + self.update_sleep(session, now, false); } if session.has_events() { @@ -222,13 +223,15 @@ impl TokioIoState { } } - fn update_sleep(&mut self, session: &mut SdpSession, now: Instant) { + fn update_sleep(&mut self, session: &mut SdpSession, now: Instant, allow_zero: bool) { match session.timeout(now) { Some(duration) => { - debug_assert!( - duration != Duration::ZERO, - "SdpSession::timeout must not return Duration::ZERO after SdpSession::poll" - ); + if !allow_zero { + debug_assert!( + duration != Duration::ZERO, + "SdpSession::timeout must not return Duration::ZERO after SdpSession::poll" + ); + } let deadline = tokio::time::Instant::from(now + duration);