Skip to content

Commit ca618eb

Browse files
feat: add scalability mode for AV1/VP9. (#1076)
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 3352aef commit ca618eb

9 files changed

Lines changed: 101 additions & 4 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
libwebrtc: patch
3+
livekit: patch
4+
livekit-ffi: patch
5+
---
6+
7+
feat: add scalability mode for AV1/VP9. - #1076 (@cloudwebrtc)

libwebrtc/src/native/rtp_parameters.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ impl From<sys_rp::ffi::RtpEncodingParameters> for RtpEncodingParameters {
7272
scale_resolution_down_by: value
7373
.has_scale_resolution_down_by
7474
.then_some(value.scale_resolution_down_by),
75+
scalability_mode: value.has_scalability_mode.then_some(value.scalability_mode),
7576
}
7677
}
7778
}
@@ -203,8 +204,8 @@ impl From<RtpEncodingParameters> for sys_rp::ffi::RtpEncodingParameters {
203204
min_bitrate_bps: 0,
204205
has_num_temporal_layers: false,
205206
num_temporal_layers: 0,
206-
has_scalability_mode: false,
207-
scalability_mode: "".to_string(),
207+
has_scalability_mode: value.scalability_mode.is_some(),
208+
scalability_mode: value.scalability_mode.unwrap_or_default(),
208209
has_ssrc: false,
209210
ssrc: 0,
210211
}

libwebrtc/src/rtp_parameters.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ pub struct RtpEncodingParameters {
5858
pub priority: Priority,
5959
pub rid: String,
6060
pub scale_resolution_down_by: Option<f64>,
61+
/// RTP scalability mode (e.g. "L3T3_KEY"). Required to enable true
62+
/// SVC for codecs that support it (VP9, AV1).
63+
pub scalability_mode: Option<String>,
6164
}
6265

6366
#[derive(Debug, Clone)]
@@ -89,6 +92,7 @@ impl Default for RtpEncodingParameters {
8992
priority: Priority::Low,
9093
rid: String::default(),
9194
scale_resolution_down_by: None,
95+
scalability_mode: None,
9296
}
9397
}
9498
}

livekit-ffi-node-bindings/proto/room_pb.d.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,15 @@ export declare class TrackPublishOptions extends Message<TrackPublishOptions> {
17701770
*/
17711771
packetTrailerFeatures: PacketTrailerFeature[];
17721772

1773+
/**
1774+
* RTP scalability mode (e.g. "L3T3_KEY"). When set, a single RTP
1775+
* encoding is produced with this mode, enabling true SVC for codecs
1776+
* that support it (VP9, AV1). Has no effect for VP8/H264.
1777+
*
1778+
* @generated from field: optional string scalability_mode = 11;
1779+
*/
1780+
scalabilityMode?: string;
1781+
17731782
constructor(data?: PartialMessage<TrackPublishOptions>);
17741783

17751784
static readonly runtime: typeof proto2;

livekit-ffi-node-bindings/proto/room_pb.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ const TrackPublishOptions = /*@__PURE__*/ proto2.makeMessageType(
697697
{ no: 8, name: "stream", kind: "scalar", T: 9 /* ScalarType.STRING */, opt: true },
698698
{ no: 9, name: "preconnect_buffer", kind: "scalar", T: 8 /* ScalarType.BOOL */, opt: true },
699699
{ no: 10, name: "packet_trailer_features", kind: "enum", T: proto2.getEnumType(PacketTrailerFeature), repeated: true },
700+
{ no: 11, name: "scalability_mode", kind: "scalar", T: 9 /* ScalarType.STRING */, opt: true },
700701
],
701702
);
702703

livekit-ffi/protocol/room.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,10 @@ message TrackPublishOptions {
308308
optional string stream = 8;
309309
optional bool preconnect_buffer = 9;
310310
repeated PacketTrailerFeature packet_trailer_features = 10;
311+
// RTP scalability mode (e.g. "L3T3_KEY"). When set, a single RTP
312+
// encoding is produced with this mode, enabling true SVC for codecs
313+
// that support it (VP9, AV1). Has no effect for VP8/H264.
314+
optional string scalability_mode = 11;
311315
}
312316

313317
enum IceTransportType {

livekit-ffi/src/conversion/room.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ impl From<proto::TrackPublishOptions> for TrackPublishOptions {
316316
packet_trailer_features: packet_trailer_features_from_proto(
317317
opts.packet_trailer_features,
318318
),
319+
scalability_mode: opts.scalability_mode,
319320
}
320321
}
321322
}

livekit/src/room/options.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ pub struct TrackPublishOptions {
119119
pub stream: String,
120120
pub preconnect_buffer: bool,
121121
pub packet_trailer_features: PacketTrailerFeatures,
122+
/// RTP scalability mode (e.g. "L3T3_KEY"). When set, a single RTP
123+
/// encoding is produced and that mode is forwarded to libwebrtc to
124+
/// enable true SVC for VP9/AV1. Has no effect for VP8/H264.
125+
pub scalability_mode: Option<String>,
122126
}
123127

124128
impl Default for TrackPublishOptions {
@@ -135,6 +139,7 @@ impl Default for TrackPublishOptions {
135139
stream: "".to_string(),
136140
preconnect_buffer: false,
137141
packet_trailer_features: PacketTrailerFeatures::default(),
142+
scalability_mode: None,
138143
}
139144
}
140145
}
@@ -176,6 +181,16 @@ pub fn compute_video_encodings(
176181
},
177182
};
178183

184+
// SVC: when an explicit scalability_mode is set, emit a single encoding
185+
// and let libwebrtc produce the spatial/temporal layers internally.
186+
if let Some(mode) = options.scalability_mode.clone() {
187+
let mut encodings = into_rtp_encodings(width, height, &[initial_preset]);
188+
if let Some(first) = encodings.first_mut() {
189+
first.scalability_mode = Some(mode);
190+
}
191+
return encodings;
192+
}
193+
179194
if !options.simulcast {
180195
return into_rtp_encodings(width, height, &[initial_preset]);
181196
}
@@ -310,6 +325,19 @@ pub fn video_quality_for_rid(rid: &str) -> Option<proto::VideoQuality> {
310325
}
311326
}
312327

328+
/// Parse the number of spatial layers from an RTP scalability mode string.
329+
/// Standard modes start with `L<N>` where `<N>` is the spatial-layer count
330+
/// (e.g. "L3T3_KEY" -> 3, "L2T3" -> 2, "L1T3" -> 1).
331+
pub fn spatial_layers_from_scalability_mode(mode: &str) -> u32 {
332+
if let Some(rest) = mode.strip_prefix('L') {
333+
let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect();
334+
if let Ok(n) = digits.parse::<u32>() {
335+
return n.max(1);
336+
}
337+
}
338+
1
339+
}
340+
313341
pub fn video_layers_from_encodings(
314342
width: u32,
315343
height: u32,
@@ -326,6 +354,39 @@ pub fn video_layers_from_encodings(
326354
}];
327355
}
328356

357+
// SVC: a single RTP encoding carries multiple spatial layers internally.
358+
// Synthesise one VideoLayer per spatial layer so the SFU knows the track
359+
// has switchable quality tiers.
360+
if encodings.len() == 1 {
361+
if let Some(mode) = encodings[0].scalability_mode.as_ref() {
362+
let spatial = spatial_layers_from_scalability_mode(mode);
363+
if spatial > 1 {
364+
let total_bitrate = encodings[0].max_bitrate.unwrap_or(0);
365+
let mut layers = Vec::with_capacity(spatial as usize);
366+
// Highest spatial layer is the source resolution; each lower
367+
// layer is half on each axis (the libwebrtc default for
368+
// L2/L3 scalability modes).
369+
for i in 0..spatial {
370+
let scale = 1u32 << (spatial - 1 - i);
371+
let quality = match (spatial - 1 - i, spatial) {
372+
(0, _) => proto::VideoQuality::High,
373+
(1, _) => proto::VideoQuality::Medium,
374+
_ => proto::VideoQuality::Low,
375+
};
376+
layers.push(proto::VideoLayer {
377+
quality: quality as i32,
378+
width: width / scale,
379+
height: height / scale,
380+
bitrate: (total_bitrate / spatial as u64) as u32,
381+
ssrc: 0,
382+
..Default::default()
383+
});
384+
}
385+
return layers;
386+
}
387+
}
388+
}
389+
329390
let mut layers = Vec::with_capacity(encodings.len());
330391
for encoding in encodings {
331392
let scale = encoding.scale_resolution_down_by.unwrap_or(1.0);

livekit/src/room/participant/local_participant.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,17 @@ impl LocalParticipant {
336336
encodings = compute_video_encodings(req.width, req.height, &options);
337337
req.layers = video_layers_from_encodings(req.width, req.height, &encodings);
338338

339-
// Populate simulcast_codecs so the server knows this track is simulcasted
340-
if options.simulcast && encodings.len() > 1 {
339+
// Populate simulcast_codecs so the server knows this track has
340+
// multiple quality layers — either real simulcast (multiple
341+
// RTP encodings) or SVC (one encoding with several spatial
342+
// layers carried inside it).
343+
let is_svc_multilayer = encodings.len() == 1
344+
&& encodings
345+
.first()
346+
.and_then(|e| e.scalability_mode.as_ref())
347+
.map(|m| options::spatial_layers_from_scalability_mode(m) > 1)
348+
.unwrap_or(false);
349+
if (options.simulcast && encodings.len() > 1) || is_svc_multilayer {
341350
req.simulcast_codecs = vec![proto::SimulcastCodec {
342351
codec: options.video_codec.as_str().to_string(),
343352
cid: track.rtc_track().id(),

0 commit comments

Comments
 (0)