Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,295 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/create_scalability_structure.h"
#include <memory>
#include "api/video_codecs/scalability_mode.h"
#include "modules/video_coding/svc/scalability_structure_full_svc.h"
#include "modules/video_coding/svc/scalability_structure_key_svc.h"
#include "modules/video_coding/svc/scalability_structure_l2t2_key_shift.h"
#include "modules/video_coding/svc/scalability_structure_simulcast.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
struct NamedStructureFactory {
ScalabilityMode name;
// Use function pointer to make NamedStructureFactory trivally destructable.
std::unique_ptr<ScalableVideoController> (*factory)();
ScalableVideoController::StreamLayersConfig config;
};
// Wrap std::make_unique function to have correct return type.
template <typename T>
std::unique_ptr<ScalableVideoController> Create() {
return std::make_unique<T>();
}
template <typename T>
std::unique_ptr<ScalableVideoController> CreateH() {
// 1.5:1 scaling, see https://w3c.github.io/webrtc-svc/#scalabilitymodes*
typename T::ScalingFactor factor;
factor.num = 2;
factor.den = 3;
return std::make_unique<T>(factor);
}
constexpr ScalableVideoController::StreamLayersConfig kConfigL1T1 = {
/*num_spatial_layers=*/1, /*num_temporal_layers=*/1,
/*uses_reference_scaling=*/false};
constexpr ScalableVideoController::StreamLayersConfig kConfigL1T2 = {
/*num_spatial_layers=*/1, /*num_temporal_layers=*/2,
/*uses_reference_scaling=*/false};
constexpr ScalableVideoController::StreamLayersConfig kConfigL1T3 = {
/*num_spatial_layers=*/1, /*num_temporal_layers=*/3,
/*uses_reference_scaling=*/false};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T1 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/true,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T1h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/true,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T2 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/true,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T2h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/true,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T3 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/true,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL2T3h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/true,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T1 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/true,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T1h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/true,
{4, 2, 1},
{9, 3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T2 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/true,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T2h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/true,
{4, 2, 1},
{9, 3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T3 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/true,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigL3T3h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/true,
{4, 2, 1},
{9, 3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T1 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/false,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T1h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/false,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T2 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/false,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T2h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/false,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T3 = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/false,
{1, 1},
{2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS2T3h = {
/*num_spatial_layers=*/2,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/false,
{2, 1},
{3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T1 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/false,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T1h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/1,
/*uses_reference_scaling=*/false,
{4, 2, 1},
{9, 3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T2 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/false,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T2h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/2,
/*uses_reference_scaling=*/false,
{4, 2, 1},
{9, 3, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T3 = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/false,
{1, 1, 1},
{4, 2, 1}};
constexpr ScalableVideoController::StreamLayersConfig kConfigS3T3h = {
/*num_spatial_layers=*/3,
/*num_temporal_layers=*/3,
/*uses_reference_scaling=*/false,
{4, 2, 1},
{9, 3, 1}};
constexpr NamedStructureFactory kFactories[] = {
{ScalabilityMode::kL1T1, Create<ScalableVideoControllerNoLayering>,
kConfigL1T1},
{ScalabilityMode::kL1T2, Create<ScalabilityStructureL1T2>, kConfigL1T2},
{ScalabilityMode::kL1T3, Create<ScalabilityStructureL1T3>, kConfigL1T3},
{ScalabilityMode::kL2T1, Create<ScalabilityStructureL2T1>, kConfigL2T1},
{ScalabilityMode::kL2T1h, CreateH<ScalabilityStructureL2T1>, kConfigL2T1h},
{ScalabilityMode::kL2T1_KEY, Create<ScalabilityStructureL2T1Key>,
kConfigL2T1},
{ScalabilityMode::kL2T2, Create<ScalabilityStructureL2T2>, kConfigL2T2},
{ScalabilityMode::kL2T2h, CreateH<ScalabilityStructureL2T2>, kConfigL2T2h},
{ScalabilityMode::kL2T2_KEY, Create<ScalabilityStructureL2T2Key>,
kConfigL2T2},
{ScalabilityMode::kL2T2_KEY_SHIFT, Create<ScalabilityStructureL2T2KeyShift>,
kConfigL2T2},
{ScalabilityMode::kL2T3, Create<ScalabilityStructureL2T3>, kConfigL2T3},
{ScalabilityMode::kL2T3h, CreateH<ScalabilityStructureL2T3>, kConfigL2T3h},
{ScalabilityMode::kL2T3_KEY, Create<ScalabilityStructureL2T3Key>,
kConfigL2T3},
{ScalabilityMode::kL3T1, Create<ScalabilityStructureL3T1>, kConfigL3T1},
{ScalabilityMode::kL3T1h, CreateH<ScalabilityStructureL3T1>, kConfigL3T1h},
{ScalabilityMode::kL3T1_KEY, Create<ScalabilityStructureL3T1Key>,
kConfigL3T1},
{ScalabilityMode::kL3T2, Create<ScalabilityStructureL3T2>, kConfigL3T2},
{ScalabilityMode::kL3T2h, CreateH<ScalabilityStructureL3T2>, kConfigL3T2h},
{ScalabilityMode::kL3T2_KEY, Create<ScalabilityStructureL3T2Key>,
kConfigL3T2},
{ScalabilityMode::kL3T3, Create<ScalabilityStructureL3T3>, kConfigL3T3},
{ScalabilityMode::kL3T3h, CreateH<ScalabilityStructureL3T3>, kConfigL3T3h},
{ScalabilityMode::kL3T3_KEY, Create<ScalabilityStructureL3T3Key>,
kConfigL3T3},
{ScalabilityMode::kS2T1, Create<ScalabilityStructureS2T1>, kConfigS2T1},
{ScalabilityMode::kS2T1h, CreateH<ScalabilityStructureS2T1>, kConfigS2T1h},
{ScalabilityMode::kS2T2, Create<ScalabilityStructureS2T2>, kConfigS2T2},
{ScalabilityMode::kS2T2h, CreateH<ScalabilityStructureS2T2>, kConfigS2T2h},
{ScalabilityMode::kS2T3, Create<ScalabilityStructureS2T3>, kConfigS2T3},
{ScalabilityMode::kS2T3h, CreateH<ScalabilityStructureS2T3>, kConfigS2T3h},
{ScalabilityMode::kS3T1, Create<ScalabilityStructureS3T1>, kConfigS3T1},
{ScalabilityMode::kS3T1h, CreateH<ScalabilityStructureS3T1>, kConfigS3T1h},
{ScalabilityMode::kS3T2, Create<ScalabilityStructureS3T2>, kConfigS3T2},
{ScalabilityMode::kS3T2h, CreateH<ScalabilityStructureS3T2>, kConfigS3T2h},
{ScalabilityMode::kS3T3, Create<ScalabilityStructureS3T3>, kConfigS3T3},
{ScalabilityMode::kS3T3h, CreateH<ScalabilityStructureS3T3>, kConfigS3T3h},
};
} // namespace
std::unique_ptr<ScalableVideoController> CreateScalabilityStructure(
ScalabilityMode name) {
for (const auto& entry : kFactories) {
if (entry.name == name) {
return entry.factory();
}
}
return nullptr;
}
absl::optional<ScalableVideoController::StreamLayersConfig>
ScalabilityStructureConfig(ScalabilityMode name) {
for (const auto& entry : kFactories) {
if (entry.name == name) {
return entry.config;
}
}
return absl::nullopt;
}
} // namespace webrtc

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_
#define MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_
#include <memory>
#include <vector>
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
// Creates a structure by name according to
// https://w3c.github.io/webrtc-svc/#scalabilitymodes*
// Returns nullptr for unknown name.
std::unique_ptr<ScalableVideoController> CreateScalabilityStructure(
ScalabilityMode name);
// Returns description of the scalability structure identified by 'name',
// Return nullopt for unknown name.
absl::optional<ScalableVideoController::StreamLayersConfig>
ScalabilityStructureConfig(ScalabilityMode name);
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_CREATE_SCALABILITY_STRUCTURE_H_

View file

@ -0,0 +1,390 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "rtc_base/checks.h"
namespace webrtc {
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view mode_string) {
if (mode_string == "L1T1")
return ScalabilityMode::kL1T1;
if (mode_string == "L1T2")
return ScalabilityMode::kL1T2;
if (mode_string == "L1T3")
return ScalabilityMode::kL1T3;
if (mode_string == "L2T1")
return ScalabilityMode::kL2T1;
if (mode_string == "L2T1h")
return ScalabilityMode::kL2T1h;
if (mode_string == "L2T1_KEY")
return ScalabilityMode::kL2T1_KEY;
if (mode_string == "L2T2")
return ScalabilityMode::kL2T2;
if (mode_string == "L2T2h")
return ScalabilityMode::kL2T2h;
if (mode_string == "L2T2_KEY")
return ScalabilityMode::kL2T2_KEY;
if (mode_string == "L2T2_KEY_SHIFT")
return ScalabilityMode::kL2T2_KEY_SHIFT;
if (mode_string == "L2T3")
return ScalabilityMode::kL2T3;
if (mode_string == "L2T3h")
return ScalabilityMode::kL2T3h;
if (mode_string == "L2T3_KEY")
return ScalabilityMode::kL2T3_KEY;
if (mode_string == "L3T1")
return ScalabilityMode::kL3T1;
if (mode_string == "L3T1h")
return ScalabilityMode::kL3T1h;
if (mode_string == "L3T1_KEY")
return ScalabilityMode::kL3T1_KEY;
if (mode_string == "L3T2")
return ScalabilityMode::kL3T2;
if (mode_string == "L3T2h")
return ScalabilityMode::kL3T2h;
if (mode_string == "L3T2_KEY")
return ScalabilityMode::kL3T2_KEY;
if (mode_string == "L3T3")
return ScalabilityMode::kL3T3;
if (mode_string == "L3T3h")
return ScalabilityMode::kL3T3h;
if (mode_string == "L3T3_KEY")
return ScalabilityMode::kL3T3_KEY;
if (mode_string == "S2T1")
return ScalabilityMode::kS2T1;
if (mode_string == "S2T1h")
return ScalabilityMode::kS2T1h;
if (mode_string == "S2T2")
return ScalabilityMode::kS2T2;
if (mode_string == "S2T2h")
return ScalabilityMode::kS2T2h;
if (mode_string == "S2T3")
return ScalabilityMode::kS2T3;
if (mode_string == "S2T3h")
return ScalabilityMode::kS2T3h;
if (mode_string == "S3T1")
return ScalabilityMode::kS3T1;
if (mode_string == "S3T1h")
return ScalabilityMode::kS3T1h;
if (mode_string == "S3T2")
return ScalabilityMode::kS3T2;
if (mode_string == "S3T2h")
return ScalabilityMode::kS3T2h;
if (mode_string == "S3T3")
return ScalabilityMode::kS3T3;
if (mode_string == "S3T3h")
return ScalabilityMode::kS3T3h;
return absl::nullopt;
}
InterLayerPredMode ScalabilityModeToInterLayerPredMode(
ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
case ScalabilityMode::kL1T2:
case ScalabilityMode::kL1T3:
case ScalabilityMode::kL2T1:
case ScalabilityMode::kL2T1h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T1_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL2T2:
case ScalabilityMode::kL2T2h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T2_KEY:
case ScalabilityMode::kL2T2_KEY_SHIFT:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL2T3:
case ScalabilityMode::kL2T3h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T3_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T1:
case ScalabilityMode::kL3T1h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T1_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T2:
case ScalabilityMode::kL3T2h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T2_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T3:
case ScalabilityMode::kL3T3h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T3_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kS2T1:
case ScalabilityMode::kS2T1h:
case ScalabilityMode::kS2T2:
case ScalabilityMode::kS2T2h:
case ScalabilityMode::kS2T3:
case ScalabilityMode::kS2T3h:
case ScalabilityMode::kS3T1:
case ScalabilityMode::kS3T1h:
case ScalabilityMode::kS3T2:
case ScalabilityMode::kS3T2h:
case ScalabilityMode::kS3T3:
case ScalabilityMode::kS3T3h:
return InterLayerPredMode::kOff;
}
RTC_CHECK_NOTREACHED();
}
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
case ScalabilityMode::kL1T2:
case ScalabilityMode::kL1T3:
return 1;
case ScalabilityMode::kL2T1:
case ScalabilityMode::kL2T1h:
case ScalabilityMode::kL2T1_KEY:
case ScalabilityMode::kL2T2:
case ScalabilityMode::kL2T2h:
case ScalabilityMode::kL2T2_KEY:
case ScalabilityMode::kL2T2_KEY_SHIFT:
case ScalabilityMode::kL2T3:
case ScalabilityMode::kL2T3h:
case ScalabilityMode::kL2T3_KEY:
return 2;
case ScalabilityMode::kL3T1:
case ScalabilityMode::kL3T1h:
case ScalabilityMode::kL3T1_KEY:
case ScalabilityMode::kL3T2:
case ScalabilityMode::kL3T2h:
case ScalabilityMode::kL3T2_KEY:
case ScalabilityMode::kL3T3:
case ScalabilityMode::kL3T3h:
case ScalabilityMode::kL3T3_KEY:
return 3;
case ScalabilityMode::kS2T1:
case ScalabilityMode::kS2T1h:
case ScalabilityMode::kS2T2:
case ScalabilityMode::kS2T2h:
case ScalabilityMode::kS2T3:
case ScalabilityMode::kS2T3h:
return 2;
case ScalabilityMode::kS3T1:
case ScalabilityMode::kS3T1h:
case ScalabilityMode::kS3T2:
case ScalabilityMode::kS3T2h:
case ScalabilityMode::kS3T3:
case ScalabilityMode::kS3T3h:
return 3;
}
RTC_CHECK_NOTREACHED();
}
int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
return 1;
case ScalabilityMode::kL1T2:
return 2;
case ScalabilityMode::kL1T3:
return 3;
case ScalabilityMode::kL2T1:
case ScalabilityMode::kL2T1h:
case ScalabilityMode::kL2T1_KEY:
return 1;
case ScalabilityMode::kL2T2:
case ScalabilityMode::kL2T2h:
case ScalabilityMode::kL2T2_KEY:
case ScalabilityMode::kL2T2_KEY_SHIFT:
return 2;
case ScalabilityMode::kL2T3:
case ScalabilityMode::kL2T3h:
case ScalabilityMode::kL2T3_KEY:
return 3;
case ScalabilityMode::kL3T1:
case ScalabilityMode::kL3T1h:
case ScalabilityMode::kL3T1_KEY:
return 1;
case ScalabilityMode::kL3T2:
case ScalabilityMode::kL3T2h:
case ScalabilityMode::kL3T2_KEY:
return 2;
case ScalabilityMode::kL3T3:
case ScalabilityMode::kL3T3h:
case ScalabilityMode::kL3T3_KEY:
return 3;
case ScalabilityMode::kS2T1:
case ScalabilityMode::kS2T1h:
case ScalabilityMode::kS3T1:
case ScalabilityMode::kS3T1h:
return 1;
case ScalabilityMode::kS2T2:
case ScalabilityMode::kS2T2h:
case ScalabilityMode::kS3T2:
case ScalabilityMode::kS3T2h:
return 2;
case ScalabilityMode::kS2T3:
case ScalabilityMode::kS2T3h:
case ScalabilityMode::kS3T3:
case ScalabilityMode::kS3T3h:
return 3;
}
RTC_CHECK_NOTREACHED();
}
absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio(
ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
case ScalabilityMode::kL1T2:
case ScalabilityMode::kL1T3:
return absl::nullopt;
case ScalabilityMode::kL2T1:
case ScalabilityMode::kL2T1_KEY:
case ScalabilityMode::kL2T2:
case ScalabilityMode::kL2T2_KEY:
case ScalabilityMode::kL2T2_KEY_SHIFT:
case ScalabilityMode::kL2T3:
case ScalabilityMode::kL2T3_KEY:
case ScalabilityMode::kL3T1:
case ScalabilityMode::kL3T1_KEY:
case ScalabilityMode::kL3T2:
case ScalabilityMode::kL3T2_KEY:
case ScalabilityMode::kL3T3:
case ScalabilityMode::kL3T3_KEY:
case ScalabilityMode::kS2T1:
case ScalabilityMode::kS2T2:
case ScalabilityMode::kS2T3:
case ScalabilityMode::kS3T1:
case ScalabilityMode::kS3T2:
case ScalabilityMode::kS3T3:
return ScalabilityModeResolutionRatio::kTwoToOne;
case ScalabilityMode::kL2T1h:
case ScalabilityMode::kL2T2h:
case ScalabilityMode::kL2T3h:
case ScalabilityMode::kL3T1h:
case ScalabilityMode::kL3T2h:
case ScalabilityMode::kL3T3h:
case ScalabilityMode::kS2T1h:
case ScalabilityMode::kS2T2h:
case ScalabilityMode::kS2T3h:
case ScalabilityMode::kS3T1h:
case ScalabilityMode::kS3T2h:
case ScalabilityMode::kS3T3h:
return ScalabilityModeResolutionRatio::kThreeToTwo;
}
RTC_CHECK_NOTREACHED();
}
ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
int max_spatial_layers) {
int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode);
if (max_spatial_layers >= num_spatial_layers) {
return scalability_mode;
}
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kL1T2:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kL1T3:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kL2T1:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kL2T1h:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kL2T1_KEY:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kL2T2:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kL2T2h:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kL2T2_KEY:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kL2T2_KEY_SHIFT:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kL2T3:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kL2T3h:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kL2T3_KEY:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kL3T1:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T1
: ScalabilityMode::kL1T1;
case ScalabilityMode::kL3T1h:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T1h
: ScalabilityMode::kL1T1;
case ScalabilityMode::kL3T1_KEY:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T1_KEY
: ScalabilityMode::kL1T1;
case ScalabilityMode::kL3T2:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T2
: ScalabilityMode::kL1T2;
case ScalabilityMode::kL3T2h:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T2h
: ScalabilityMode::kL1T2;
case ScalabilityMode::kL3T2_KEY:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T2_KEY
: ScalabilityMode::kL1T2;
case ScalabilityMode::kL3T3:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T3
: ScalabilityMode::kL1T3;
case ScalabilityMode::kL3T3h:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T3h
: ScalabilityMode::kL1T3;
case ScalabilityMode::kL3T3_KEY:
return max_spatial_layers == 2 ? ScalabilityMode::kL2T3_KEY
: ScalabilityMode::kL1T3;
case ScalabilityMode::kS2T1:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kS2T1h:
return ScalabilityMode::kL1T1;
case ScalabilityMode::kS2T2:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kS2T2h:
return ScalabilityMode::kL1T2;
case ScalabilityMode::kS2T3:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kS2T3h:
return ScalabilityMode::kL1T3;
case ScalabilityMode::kS3T1:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T1
: ScalabilityMode::kL1T1;
case ScalabilityMode::kS3T1h:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T1h
: ScalabilityMode::kL1T1;
case ScalabilityMode::kS3T2:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T2
: ScalabilityMode::kL1T2;
case ScalabilityMode::kS3T2h:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T2h
: ScalabilityMode::kL1T2;
case ScalabilityMode::kS3T3:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T3
: ScalabilityMode::kL1T3;
case ScalabilityMode::kS3T3h:
return max_spatial_layers == 2 ? ScalabilityMode::kS2T3h
: ScalabilityMode::kL1T3;
}
RTC_CHECK_NOTREACHED();
}
} // namespace webrtc

View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_codec.h"
namespace webrtc {
enum class ScalabilityModeResolutionRatio {
kTwoToOne, // The resolution ratio between spatial layers is 2:1.
kThreeToTwo, // The resolution ratio between spatial layers is 1.5:1.
};
static constexpr char kDefaultScalabilityModeStr[] = "L1T2";
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view scalability_mode_string);
InterLayerPredMode ScalabilityModeToInterLayerPredMode(
ScalabilityMode scalability_mode);
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode);
int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode);
absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio(
ScalabilityMode scalability_mode);
ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
int max_spatial_layers);
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_

View file

@ -0,0 +1,444 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_structure_full_svc.h"
#include <utility>
#include <vector>
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers;
constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers;
constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[];
ScalabilityStructureFullSvc::ScalabilityStructureFullSvc(
int num_spatial_layers,
int num_temporal_layers,
ScalingFactor resolution_factor)
: num_spatial_layers_(num_spatial_layers),
num_temporal_layers_(num_temporal_layers),
resolution_factor_(resolution_factor),
active_decode_targets_(
(uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
}
ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default;
ScalabilityStructureFullSvc::StreamLayersConfig
ScalabilityStructureFullSvc::StreamConfig() const {
StreamLayersConfig result;
result.num_spatial_layers = num_spatial_layers_;
result.num_temporal_layers = num_temporal_layers_;
result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
result.scaling_factor_num[sid - 1] =
resolution_factor_.num * result.scaling_factor_num[sid];
result.scaling_factor_den[sid - 1] =
resolution_factor_.den * result.scaling_factor_den[sid];
}
result.uses_reference_scaling = num_spatial_layers_ > 1;
return result;
}
bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const {
if (tid >= num_temporal_layers_) {
return false;
}
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (DecodeTargetIsActive(sid, tid)) {
return true;
}
}
return false;
}
DecodeTargetIndication ScalabilityStructureFullSvc::Dti(
int sid,
int tid,
const LayerFrameConfig& config) {
if (sid < config.SpatialId() || tid < config.TemporalId()) {
return DecodeTargetIndication::kNotPresent;
}
if (sid == config.SpatialId()) {
if (tid == 0) {
RTC_DCHECK_EQ(config.TemporalId(), 0);
return DecodeTargetIndication::kSwitch;
}
if (tid == config.TemporalId()) {
return DecodeTargetIndication::kDiscardable;
}
if (tid > config.TemporalId()) {
RTC_DCHECK_GT(tid, config.TemporalId());
return DecodeTargetIndication::kSwitch;
}
}
RTC_DCHECK_GT(sid, config.SpatialId());
RTC_DCHECK_GE(tid, config.TemporalId());
if (config.IsKeyframe() || config.Id() == kKey) {
return DecodeTargetIndication::kSwitch;
}
return DecodeTargetIndication::kRequired;
}
ScalabilityStructureFullSvc::FramePattern
ScalabilityStructureFullSvc::NextPattern() const {
switch (last_pattern_) {
case kNone:
return kKey;
case kDeltaT2B:
return kDeltaT0;
case kDeltaT2A:
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
case kDeltaT1:
if (TemporalLayerIsActive(2)) {
return kDeltaT2B;
}
return kDeltaT0;
case kKey:
case kDeltaT0:
if (TemporalLayerIsActive(2)) {
return kDeltaT2A;
}
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
}
RTC_DCHECK_NOTREACHED();
return kNone;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
std::vector<LayerFrameConfig> configs;
if (active_decode_targets_.none()) {
last_pattern_ = kNone;
return configs;
}
configs.reserve(num_spatial_layers_);
if (last_pattern_ == kNone || restart) {
can_reference_t0_frame_for_spatial_id_.reset();
last_pattern_ = kNone;
}
FramePattern current_pattern = NextPattern();
absl::optional<int> spatial_dependency_buffer_id;
switch (current_pattern) {
case kDeltaT0:
case kKey:
// Disallow temporal references cross T0 on higher temporal layers.
can_reference_t1_frame_for_spatial_id_.reset();
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
// Next frame from the spatial layer `sid` shouldn't depend on
// potentially old previous frame from the spatial layer `sid`.
can_reference_t0_frame_for_spatial_id_.reset(sid);
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern).S(sid).T(0);
if (spatial_dependency_buffer_id) {
config.Reference(*spatial_dependency_buffer_id);
} else if (current_pattern == kKey) {
config.Keyframe();
}
if (can_reference_t0_frame_for_spatial_id_[sid]) {
config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0));
} else {
// TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame
// to ChainDiffCalculator
config.Update(BufferIndex(sid, /*tid=*/0));
}
spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
}
break;
case kDeltaT1:
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/1) ||
!can_reference_t0_frame_for_spatial_id_[sid]) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern).S(sid).T(1);
// Temporal reference.
config.Reference(BufferIndex(sid, /*tid=*/0));
// Spatial reference unless this is the lowest active spatial layer.
if (spatial_dependency_buffer_id) {
config.Reference(*spatial_dependency_buffer_id);
}
// No frame reference top layer frame, so no need save it into a buffer.
if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
config.Update(BufferIndex(sid, /*tid=*/1));
}
spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
}
break;
case kDeltaT2A:
case kDeltaT2B:
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/2) ||
!can_reference_t0_frame_for_spatial_id_[sid]) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern).S(sid).T(2);
// Temporal reference.
if (current_pattern == kDeltaT2B &&
can_reference_t1_frame_for_spatial_id_[sid]) {
config.Reference(BufferIndex(sid, /*tid=*/1));
} else {
config.Reference(BufferIndex(sid, /*tid=*/0));
}
// Spatial reference unless this is the lowest active spatial layer.
if (spatial_dependency_buffer_id) {
config.Reference(*spatial_dependency_buffer_id);
}
// No frame reference top layer frame, so no need save it into a buffer.
if (sid < num_spatial_layers_ - 1) {
config.Update(BufferIndex(sid, /*tid=*/2));
}
spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2);
}
break;
case kNone:
RTC_DCHECK_NOTREACHED();
break;
}
if (configs.empty() && !restart) {
RTC_LOG(LS_WARNING) << "Failed to generate configuration for L"
<< num_spatial_layers_ << "T" << num_temporal_layers_
<< " with active decode targets "
<< active_decode_targets_.to_string('-').substr(
active_decode_targets_.size() -
num_spatial_layers_ * num_temporal_layers_)
<< " and transition from "
<< kFramePatternNames[last_pattern_] << " to "
<< kFramePatternNames[current_pattern]
<< ". Resetting.";
return NextFrameConfig(/*restart=*/true);
}
return configs;
}
GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone(
const LayerFrameConfig& config) {
// When encoder drops all frames for a temporal unit, it is better to reuse
// old temporal pattern rather than switch to next one, thus switch to next
// pattern defered here from the `NextFrameConfig`.
// In particular creating VP9 references rely on this behavior.
last_pattern_ = static_cast<FramePattern>(config.Id());
if (config.TemporalId() == 0) {
can_reference_t0_frame_for_spatial_id_.set(config.SpatialId());
}
if (config.TemporalId() == 1) {
can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
}
GenericFrameInfo frame_info;
frame_info.spatial_id = config.SpatialId();
frame_info.temporal_id = config.TemporalId();
frame_info.encoder_buffers = config.Buffers();
frame_info.decode_target_indications.reserve(num_spatial_layers_ *
num_temporal_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
for (int tid = 0; tid < num_temporal_layers_; ++tid) {
frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
}
}
if (config.TemporalId() == 0) {
frame_info.part_of_chain.resize(num_spatial_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
frame_info.part_of_chain[sid] = config.SpatialId() <= sid;
}
} else {
frame_info.part_of_chain.assign(num_spatial_layers_, false);
}
frame_info.active_decode_targets = active_decode_targets_;
return frame_info;
}
void ScalabilityStructureFullSvc::OnRatesUpdated(
const VideoBitrateAllocation& bitrates) {
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
// Enable/disable spatial layers independetely.
bool active = true;
for (int tid = 0; tid < num_temporal_layers_; ++tid) {
// To enable temporal layer, require bitrates for lower temporal layers.
active = active && bitrates.GetBitrate(sid, tid) > 0;
SetDecodeTargetIsActive(sid, tid, active);
}
}
}
FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 2;
structure.num_chains = 1;
structure.decode_target_protected_by_chain = {0, 0};
structure.templates.resize(3);
structure.templates[0].T(0).Dtis("SS").ChainDiffs({0});
structure.templates[1].T(0).Dtis("SS").ChainDiffs({2}).FrameDiffs({2});
structure.templates[2].T(1).Dtis("-D").ChainDiffs({1}).FrameDiffs({1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 3;
structure.num_chains = 1;
structure.decode_target_protected_by_chain = {0, 0, 0};
structure.templates.resize(5);
structure.templates[0].T(0).Dtis("SSS").ChainDiffs({0});
structure.templates[1].T(0).Dtis("SSS").ChainDiffs({4}).FrameDiffs({4});
structure.templates[2].T(1).Dtis("-DS").ChainDiffs({2}).FrameDiffs({2});
structure.templates[3].T(2).Dtis("--D").ChainDiffs({1}).FrameDiffs({1});
structure.templates[4].T(2).Dtis("--D").ChainDiffs({3}).FrameDiffs({1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 2;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 1};
structure.templates.resize(4);
structure.templates[0].S(0).Dtis("SR").ChainDiffs({2, 1}).FrameDiffs({2});
structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({2, 1});
structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 4;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 1, 1};
structure.templates.resize(6);
auto& templates = structure.templates;
templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
templates[1].S(0).T(0).Dtis("SSRR").ChainDiffs({4, 3}).FrameDiffs({4});
templates[2].S(0).T(1).Dtis("-D-R").ChainDiffs({2, 1}).FrameDiffs({2});
templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({4, 1});
templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2, 1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL2T3::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1};
auto& t = structure.templates;
t.resize(10);
t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0});
t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1});
t[3].S(0).T(2).Dtis("--D--R").ChainDiffs({2, 1}).FrameDiffs({2});
t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2, 1});
t[2].S(0).T(1).Dtis("-DS-RR").ChainDiffs({4, 3}).FrameDiffs({4});
t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4, 1});
t[4].S(0).T(2).Dtis("--D--R").ChainDiffs({6, 5}).FrameDiffs({2});
t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2, 1});
t[0].S(0).T(0).Dtis("SSSRRR").ChainDiffs({8, 7}).FrameDiffs({8});
t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({8, 1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 3;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 1, 2};
auto& templates = structure.templates;
templates.resize(6);
templates[0].S(0).Dtis("SRR").ChainDiffs({3, 2, 1}).FrameDiffs({3});
templates[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0});
templates[2].S(1).Dtis("-SR").ChainDiffs({1, 1, 1}).FrameDiffs({3, 1});
templates[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
templates[4].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({3, 1});
templates[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL3T2::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2};
auto& t = structure.templates;
t.resize(9);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement.
t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0});
t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
t[2].S(0).T(1).Dtis("-D-R-R").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[5].S(1).T(1).Dtis("---D-R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1});
t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1});
t[0].S(0).T(0).Dtis("SSRRRR").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[3].S(1).T(0).Dtis("--SSRR").ChainDiffs({1, 1, 1}).FrameDiffs({6, 1});
t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({6, 1});
return structure;
}
FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 9;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
auto& t = structure.templates;
t.resize(15);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement. Indexes are written in hex for nicer alignment.
t[0x1].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
t[0x6].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
t[0x3].S(0).T(2).Dtis("--D--R--R").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[0x8].S(1).T(2).Dtis("-----D--R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1});
t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1});
t[0x2].S(0).T(1).Dtis("-DS-RR-RR").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[0x7].S(1).T(1).Dtis("----DS-RR").ChainDiffs({7, 6, 5}).FrameDiffs({6, 1});
t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6, 1});
t[0x4].S(0).T(2).Dtis("--D--R--R").ChainDiffs({9, 8, 7}).FrameDiffs({3});
t[0x9].S(1).T(2).Dtis("-----D--R").ChainDiffs({10, 9, 8}).FrameDiffs({3, 1});
t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3, 1});
t[0x0].S(0).T(0).Dtis("SSSRRRRRR").ChainDiffs({12, 11, 10}).FrameDiffs({12});
t[0x5].S(1).T(0).Dtis("---SSSRRR").ChainDiffs({1, 1, 1}).FrameDiffs({12, 1});
t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({12, 1});
return structure;
}
} // namespace webrtc

View file

@ -0,0 +1,190 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_
#include <bitset>
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
class ScalabilityStructureFullSvc : public ScalableVideoController {
public:
struct ScalingFactor {
int num = 1;
int den = 2;
};
ScalabilityStructureFullSvc(int num_spatial_layers,
int num_temporal_layers,
ScalingFactor resolution_factor);
~ScalabilityStructureFullSvc() override;
StreamLayersConfig StreamConfig() const override;
std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override;
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
enum FramePattern {
kNone,
kKey,
kDeltaT2A,
kDeltaT1,
kDeltaT2B,
kDeltaT0,
};
static constexpr absl::string_view kFramePatternNames[] = {
"None", "Key", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"};
static constexpr int kMaxNumSpatialLayers = 3;
static constexpr int kMaxNumTemporalLayers = 3;
// Index of the buffer to store last frame for layer (`sid`, `tid`)
int BufferIndex(int sid, int tid) const {
return tid * num_spatial_layers_ + sid;
}
bool DecodeTargetIsActive(int sid, int tid) const {
return active_decode_targets_[sid * num_temporal_layers_ + tid];
}
void SetDecodeTargetIsActive(int sid, int tid, bool value) {
active_decode_targets_.set(sid * num_temporal_layers_ + tid, value);
}
FramePattern NextPattern() const;
bool TemporalLayerIsActive(int tid) const;
static DecodeTargetIndication Dti(int sid,
int tid,
const LayerFrameConfig& frame);
const int num_spatial_layers_;
const int num_temporal_layers_;
const ScalingFactor resolution_factor_;
FramePattern last_pattern_ = kNone;
std::bitset<kMaxNumSpatialLayers> can_reference_t0_frame_for_spatial_id_ = 0;
std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_ = 0;
std::bitset<32> active_decode_targets_;
};
// T1 0 0
// / / / ...
// T0 0---0---0--
// Time-> 0 1 2 3 4
class ScalabilityStructureL1T2 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL1T2(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(1, 2, resolution_factor) {}
~ScalabilityStructureL1T2() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// T2 0 0 0 0
// | / | /
// T1 / 0 / 0 ...
// |_/ |_/
// T0 0-------0------
// Time-> 0 1 2 3 4 5 6 7
class ScalabilityStructureL1T3 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL1T3(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(1, 3, resolution_factor) {}
~ScalabilityStructureL1T3() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// S1 0--0--0-
// | | | ...
// S0 0--0--0-
class ScalabilityStructureL2T1 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL2T1(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(2, 1, resolution_factor) {}
~ScalabilityStructureL2T1() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// S1T1 0 0
// /| /| /
// S1T0 0-+-0-+-0
// | | | | | ...
// S0T1 | 0 | 0 |
// |/ |/ |/
// S0T0 0---0---0--
// Time-> 0 1 2 3 4
class ScalabilityStructureL2T2 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL2T2(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(2, 2, resolution_factor) {}
~ScalabilityStructureL2T2() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// S1T2 4 ,8
// S1T1 / | 6' |
// S1T0 2--+-'+--+-...
// | | | |
// S0T2 | 3 | ,7
// S0T1 | / 5'
// S0T0 1----'-----...
// Time-> 0 1 2 3
class ScalabilityStructureL2T3 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL2T3(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(2, 3, resolution_factor) {}
~ScalabilityStructureL2T3() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// S2 0-0-0-
// | | |
// S1 0-0-0-...
// | | |
// S0 0-0-0-
// Time-> 0 1 2
class ScalabilityStructureL3T1 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL3T1(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(3, 1, resolution_factor) {}
~ScalabilityStructureL3T1() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// https://www.w3.org/TR/webrtc-svc/#L3T2*
class ScalabilityStructureL3T2 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL3T2(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(3, 2, resolution_factor) {}
~ScalabilityStructureL3T2() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// https://www.w3.org/TR/webrtc-svc/#L3T3*
class ScalabilityStructureL3T3 : public ScalabilityStructureFullSvc {
public:
explicit ScalabilityStructureL3T3(ScalingFactor resolution_factor = {})
: ScalabilityStructureFullSvc(3, 3, resolution_factor) {}
~ScalabilityStructureL3T3() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_FULL_SVC_H_

View file

@ -0,0 +1,427 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_structure_key_svc.h"
#include <bitset>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers;
constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers;
ScalabilityStructureKeySvc::ScalabilityStructureKeySvc(int num_spatial_layers,
int num_temporal_layers)
: num_spatial_layers_(num_spatial_layers),
num_temporal_layers_(num_temporal_layers),
active_decode_targets_(
(uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
// There is no point to use this structure without spatial scalability.
RTC_DCHECK_GT(num_spatial_layers, 1);
RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
}
ScalabilityStructureKeySvc::~ScalabilityStructureKeySvc() = default;
ScalableVideoController::StreamLayersConfig
ScalabilityStructureKeySvc::StreamConfig() const {
StreamLayersConfig result;
result.num_spatial_layers = num_spatial_layers_;
result.num_temporal_layers = num_temporal_layers_;
result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
result.scaling_factor_num[sid - 1] = 1;
result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid];
}
result.uses_reference_scaling = true;
return result;
}
bool ScalabilityStructureKeySvc::TemporalLayerIsActive(int tid) const {
if (tid >= num_temporal_layers_) {
return false;
}
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (DecodeTargetIsActive(sid, tid)) {
return true;
}
}
return false;
}
DecodeTargetIndication ScalabilityStructureKeySvc::Dti(
int sid,
int tid,
const LayerFrameConfig& config) {
if (config.IsKeyframe() || config.Id() == kKey) {
RTC_DCHECK_EQ(config.TemporalId(), 0);
return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
: DecodeTargetIndication::kSwitch;
}
if (sid != config.SpatialId() || tid < config.TemporalId()) {
return DecodeTargetIndication::kNotPresent;
}
if (tid == config.TemporalId() && tid > 0) {
return DecodeTargetIndication::kDiscardable;
}
return DecodeTargetIndication::kSwitch;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::KeyframeConfig() {
std::vector<LayerFrameConfig> configs;
configs.reserve(num_spatial_layers_);
absl::optional<int> spatial_dependency_buffer_id;
spatial_id_is_enabled_.reset();
// Disallow temporal references cross T0 on higher temporal layers.
can_reference_t1_frame_for_spatial_id_.reset();
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(kKey).S(sid).T(0);
if (spatial_dependency_buffer_id) {
config.Reference(*spatial_dependency_buffer_id);
} else {
config.Keyframe();
}
config.Update(BufferIndex(sid, /*tid=*/0));
spatial_id_is_enabled_.set(sid);
spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
}
return configs;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::T0Config() {
std::vector<LayerFrameConfig> configs;
configs.reserve(num_spatial_layers_);
// Disallow temporal references cross T0 on higher temporal layers.
can_reference_t1_frame_for_spatial_id_.reset();
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
spatial_id_is_enabled_.reset(sid);
continue;
}
configs.emplace_back();
configs.back().Id(kDeltaT0).S(sid).T(0).ReferenceAndUpdate(
BufferIndex(sid, /*tid=*/0));
}
return configs;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::T1Config() {
std::vector<LayerFrameConfig> configs;
configs.reserve(num_spatial_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/1)) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(kDeltaT1).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
if (num_temporal_layers_ > 2) {
config.Update(BufferIndex(sid, /*tid=*/1));
}
}
return configs;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::T2Config(FramePattern pattern) {
std::vector<LayerFrameConfig> configs;
configs.reserve(num_spatial_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/2)) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(pattern).S(sid).T(2);
if (can_reference_t1_frame_for_spatial_id_[sid]) {
config.Reference(BufferIndex(sid, /*tid=*/1));
} else {
config.Reference(BufferIndex(sid, /*tid=*/0));
}
}
return configs;
}
ScalabilityStructureKeySvc::FramePattern
ScalabilityStructureKeySvc::NextPattern(FramePattern last_pattern) const {
switch (last_pattern) {
case kNone:
return kKey;
case kDeltaT2B:
return kDeltaT0;
case kDeltaT2A:
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
case kDeltaT1:
if (TemporalLayerIsActive(2)) {
return kDeltaT2B;
}
return kDeltaT0;
case kDeltaT0:
case kKey:
if (TemporalLayerIsActive(2)) {
return kDeltaT2A;
}
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
}
RTC_DCHECK_NOTREACHED();
return kNone;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
if (active_decode_targets_.none()) {
last_pattern_ = kNone;
return {};
}
if (restart) {
last_pattern_ = kNone;
}
FramePattern current_pattern = NextPattern(last_pattern_);
switch (current_pattern) {
case kKey:
return KeyframeConfig();
case kDeltaT0:
return T0Config();
case kDeltaT1:
return T1Config();
case kDeltaT2A:
case kDeltaT2B:
return T2Config(current_pattern);
case kNone:
break;
}
RTC_DCHECK_NOTREACHED();
return {};
}
GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
const LayerFrameConfig& config) {
// When encoder drops all frames for a temporal unit, it is better to reuse
// old temporal pattern rather than switch to next one, thus switch to next
// pattern defered here from the `NextFrameConfig`.
// In particular creating VP9 references rely on this behavior.
last_pattern_ = static_cast<FramePattern>(config.Id());
if (config.TemporalId() == 1) {
can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
}
GenericFrameInfo frame_info;
frame_info.spatial_id = config.SpatialId();
frame_info.temporal_id = config.TemporalId();
frame_info.encoder_buffers = config.Buffers();
frame_info.decode_target_indications.reserve(num_spatial_layers_ *
num_temporal_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
for (int tid = 0; tid < num_temporal_layers_; ++tid) {
frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
}
}
frame_info.part_of_chain.assign(num_spatial_layers_, false);
if (config.IsKeyframe() || config.Id() == kKey) {
RTC_DCHECK_EQ(config.TemporalId(), 0);
for (int sid = config.SpatialId(); sid < num_spatial_layers_; ++sid) {
frame_info.part_of_chain[sid] = true;
}
} else if (config.TemporalId() == 0) {
frame_info.part_of_chain[config.SpatialId()] = true;
}
frame_info.active_decode_targets = active_decode_targets_;
return frame_info;
}
void ScalabilityStructureKeySvc::OnRatesUpdated(
const VideoBitrateAllocation& bitrates) {
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
// Enable/disable spatial layers independetely.
bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0;
SetDecodeTargetIsActive(sid, /*tid=*/0, active);
if (!spatial_id_is_enabled_[sid] && active) {
// Key frame is required to reenable any spatial layer.
last_pattern_ = kNone;
}
for (int tid = 1; tid < num_temporal_layers_; ++tid) {
// To enable temporal layer, require bitrates for lower temporal layers.
active = active && bitrates.GetBitrate(sid, tid) > 0;
SetDecodeTargetIsActive(sid, tid, active);
}
}
}
ScalabilityStructureL2T1Key::~ScalabilityStructureL2T1Key() = default;
FrameDependencyStructure ScalabilityStructureL2T1Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 2;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 1};
structure.templates.resize(4);
structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2});
structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2});
structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
return structure;
}
ScalabilityStructureL2T2Key::~ScalabilityStructureL2T2Key() = default;
FrameDependencyStructure ScalabilityStructureL2T2Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 4;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 1, 1};
structure.templates.resize(6);
auto& templates = structure.templates;
templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4});
templates[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2});
templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4});
templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2});
return structure;
}
ScalabilityStructureL2T3Key::~ScalabilityStructureL2T3Key() = default;
FrameDependencyStructure ScalabilityStructureL2T3Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1};
auto& templates = structure.templates;
templates.resize(10);
templates[0].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0});
templates[1].S(0).T(0).Dtis("SSS---").ChainDiffs({8, 7}).FrameDiffs({8});
templates[2].S(0).T(1).Dtis("-DS---").ChainDiffs({4, 3}).FrameDiffs({4});
templates[3].S(0).T(2).Dtis("--D---").ChainDiffs({2, 1}).FrameDiffs({2});
templates[4].S(0).T(2).Dtis("--D---").ChainDiffs({6, 5}).FrameDiffs({2});
templates[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1});
templates[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 8}).FrameDiffs({8});
templates[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4});
templates[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2});
templates[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2});
return structure;
}
ScalabilityStructureL3T1Key::~ScalabilityStructureL3T1Key() = default;
FrameDependencyStructure ScalabilityStructureL3T1Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 3;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 1, 2};
auto& t = structure.templates;
t.resize(6);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement.
t[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0});
t[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
t[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1});
t[0].S(0).Dtis("S--").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[2].S(1).Dtis("-S-").ChainDiffs({1, 3, 2}).FrameDiffs({3});
t[4].S(2).Dtis("--S").ChainDiffs({2, 1, 3}).FrameDiffs({3});
return structure;
}
ScalabilityStructureL3T2Key::~ScalabilityStructureL3T2Key() = default;
FrameDependencyStructure ScalabilityStructureL3T2Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2};
auto& t = structure.templates;
t.resize(9);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement.
t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0});
t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
t[2].S(0).T(1).Dtis("-D----").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[5].S(1).T(1).Dtis("---D--").ChainDiffs({4, 3, 2}).FrameDiffs({3});
t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
t[0].S(0).T(0).Dtis("SS----").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[3].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 6, 5}).FrameDiffs({6});
t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 6}).FrameDiffs({6});
return structure;
}
ScalabilityStructureL3T3Key::~ScalabilityStructureL3T3Key() = default;
FrameDependencyStructure ScalabilityStructureL3T3Key::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 9;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
auto& t = structure.templates;
t.resize(15);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement. Indexes are written in hex for nicer alignment.
t[0x0].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
t[0x5].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3});
t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6});
t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6});
t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3});
t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3});
t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3});
t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12});
t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12});
t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12});
return structure;
}
} // namespace webrtc

View file

@ -0,0 +1,138 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_
#include <bitset>
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
class ScalabilityStructureKeySvc : public ScalableVideoController {
public:
ScalabilityStructureKeySvc(int num_spatial_layers, int num_temporal_layers);
~ScalabilityStructureKeySvc() override;
StreamLayersConfig StreamConfig() const override;
std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override;
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
enum FramePattern : int {
kNone,
kKey,
kDeltaT0,
kDeltaT2A,
kDeltaT1,
kDeltaT2B,
};
static constexpr int kMaxNumSpatialLayers = 3;
static constexpr int kMaxNumTemporalLayers = 3;
// Index of the buffer to store last frame for layer (`sid`, `tid`)
int BufferIndex(int sid, int tid) const {
return tid * num_spatial_layers_ + sid;
}
bool DecodeTargetIsActive(int sid, int tid) const {
return active_decode_targets_[sid * num_temporal_layers_ + tid];
}
void SetDecodeTargetIsActive(int sid, int tid, bool value) {
active_decode_targets_.set(sid * num_temporal_layers_ + tid, value);
}
bool TemporalLayerIsActive(int tid) const;
static DecodeTargetIndication Dti(int sid,
int tid,
const LayerFrameConfig& config);
std::vector<LayerFrameConfig> KeyframeConfig();
std::vector<LayerFrameConfig> T0Config();
std::vector<LayerFrameConfig> T1Config();
std::vector<LayerFrameConfig> T2Config(FramePattern pattern);
FramePattern NextPattern(FramePattern last_pattern) const;
const int num_spatial_layers_;
const int num_temporal_layers_;
FramePattern last_pattern_ = kNone;
std::bitset<kMaxNumSpatialLayers> spatial_id_is_enabled_;
std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_;
std::bitset<32> active_decode_targets_;
};
// S1 0--0--0-
// | ...
// S0 0--0--0-
class ScalabilityStructureL2T1Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL2T1Key() : ScalabilityStructureKeySvc(2, 1) {}
~ScalabilityStructureL2T1Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
// S1T1 0 0
// / / /
// S1T0 0---0---0
// | ...
// S0T1 | 0 0
// |/ / /
// S0T0 0---0---0
// Time-> 0 1 2 3 4
class ScalabilityStructureL2T2Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL2T2Key() : ScalabilityStructureKeySvc(2, 2) {}
~ScalabilityStructureL2T2Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureL2T3Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL2T3Key() : ScalabilityStructureKeySvc(2, 3) {}
~ScalabilityStructureL2T3Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureL3T1Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL3T1Key() : ScalabilityStructureKeySvc(3, 1) {}
~ScalabilityStructureL3T1Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureL3T2Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL3T2Key() : ScalabilityStructureKeySvc(3, 2) {}
~ScalabilityStructureL3T2Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureL3T3Key : public ScalabilityStructureKeySvc {
public:
ScalabilityStructureL3T3Key() : ScalabilityStructureKeySvc(3, 3) {}
~ScalabilityStructureL3T3Key() override;
FrameDependencyStructure DependencyStructure() const override;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_KEY_SVC_H_

View file

@ -0,0 +1,177 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_structure_l2t2_key_shift.h"
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
namespace {
DecodeTargetIndication
Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) {
if (config.IsKeyframe()) {
RTC_DCHECK_EQ(config.TemporalId(), 0);
return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
: DecodeTargetIndication::kSwitch;
}
if (sid != config.SpatialId() || tid < config.TemporalId()) {
return DecodeTargetIndication::kNotPresent;
}
if (tid == config.TemporalId() && tid > 0) {
return DecodeTargetIndication::kDiscardable;
}
return DecodeTargetIndication::kSwitch;
}
} // namespace
constexpr int ScalabilityStructureL2T2KeyShift::kNumSpatialLayers;
constexpr int ScalabilityStructureL2T2KeyShift::kNumTemporalLayers;
ScalabilityStructureL2T2KeyShift::~ScalabilityStructureL2T2KeyShift() = default;
ScalableVideoController::StreamLayersConfig
ScalabilityStructureL2T2KeyShift::StreamConfig() const {
StreamLayersConfig result;
result.num_spatial_layers = 2;
result.num_temporal_layers = 2;
result.scaling_factor_num[0] = 1;
result.scaling_factor_den[0] = 2;
result.uses_reference_scaling = true;
return result;
}
FrameDependencyStructure ScalabilityStructureL2T2KeyShift::DependencyStructure()
const {
FrameDependencyStructure structure;
structure.num_decode_targets = 4;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 1, 1};
structure.templates.resize(7);
auto& templates = structure.templates;
templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({2, 1}).FrameDiffs({2});
templates[2].S(0).T(0).Dtis("SS--").ChainDiffs({4, 1}).FrameDiffs({4});
templates[3].S(0).T(1).Dtis("-D--").ChainDiffs({2, 3}).FrameDiffs({2});
templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
templates[5].S(1).T(0).Dtis("--SS").ChainDiffs({3, 4}).FrameDiffs({4});
templates[6].S(1).T(1).Dtis("---D").ChainDiffs({1, 2}).FrameDiffs({2});
return structure;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureL2T2KeyShift::NextFrameConfig(bool restart) {
std::vector<LayerFrameConfig> configs;
configs.reserve(2);
if (restart) {
next_pattern_ = kKey;
}
// Buffer0 keeps latest S0T0 frame,
// Buffer1 keeps latest S1T0 frame.
switch (next_pattern_) {
case kKey:
if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(0).T(0).Update(0).Keyframe();
}
if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(1).T(0).Update(1);
if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) {
configs.back().Reference(0);
} else {
configs.back().Keyframe();
}
}
next_pattern_ = kDelta0;
break;
case kDelta0:
if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(0).T(0).ReferenceAndUpdate(0);
}
if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) {
configs.emplace_back();
configs.back().S(1).T(1).Reference(1);
}
if (configs.empty() && DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(1).T(0).ReferenceAndUpdate(1);
}
next_pattern_ = kDelta1;
break;
case kDelta1:
if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) {
configs.emplace_back();
configs.back().S(0).T(1).Reference(0);
}
if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(1).T(0).ReferenceAndUpdate(1);
}
if (configs.empty() && DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) {
configs.emplace_back();
configs.back().S(0).T(0).ReferenceAndUpdate(0);
}
next_pattern_ = kDelta0;
break;
}
RTC_DCHECK(!configs.empty() || active_decode_targets_.none());
return configs;
}
GenericFrameInfo ScalabilityStructureL2T2KeyShift::OnEncodeDone(
const LayerFrameConfig& config) {
GenericFrameInfo frame_info;
frame_info.spatial_id = config.SpatialId();
frame_info.temporal_id = config.TemporalId();
frame_info.encoder_buffers = config.Buffers();
for (int sid = 0; sid < kNumSpatialLayers; ++sid) {
for (int tid = 0; tid < kNumTemporalLayers; ++tid) {
frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
}
}
if (config.IsKeyframe()) {
frame_info.part_of_chain = {true, true};
} else if (config.TemporalId() == 0) {
frame_info.part_of_chain = {config.SpatialId() == 0,
config.SpatialId() == 1};
} else {
frame_info.part_of_chain = {false, false};
}
return frame_info;
}
void ScalabilityStructureL2T2KeyShift::OnRatesUpdated(
const VideoBitrateAllocation& bitrates) {
for (int sid = 0; sid < kNumSpatialLayers; ++sid) {
// Enable/disable spatial layers independetely.
bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0;
if (!DecodeTargetIsActive(sid, /*tid=*/0) && active) {
// Key frame is required to reenable any spatial layer.
next_pattern_ = kKey;
}
SetDecodeTargetIsActive(sid, /*tid=*/0, active);
SetDecodeTargetIsActive(sid, /*tid=*/1,
active && bitrates.GetBitrate(sid, /*tid=*/1) > 0);
}
}
} // namespace webrtc

View file

@ -0,0 +1,64 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
// S1T1 0 0
// / / /
// S1T0 0---0---0
// | ...
// S0T1 | 0 0
// | / /
// S0T0 0-0---0--
// Time-> 0 1 2 3 4
class ScalabilityStructureL2T2KeyShift : public ScalableVideoController {
public:
~ScalabilityStructureL2T2KeyShift() override;
StreamLayersConfig StreamConfig() const override;
FrameDependencyStructure DependencyStructure() const override;
std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override;
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
enum FramePattern {
kKey,
kDelta0,
kDelta1,
};
static constexpr int kNumSpatialLayers = 2;
static constexpr int kNumTemporalLayers = 2;
bool DecodeTargetIsActive(int sid, int tid) const {
return active_decode_targets_[sid * kNumTemporalLayers + tid];
}
void SetDecodeTargetIsActive(int sid, int tid, bool value) {
active_decode_targets_.set(sid * kNumTemporalLayers + tid, value);
}
FramePattern next_pattern_ = kKey;
std::bitset<32> active_decode_targets_ = 0b1111;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_L2T2_KEY_SHIFT_H_

View file

@ -0,0 +1,353 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_structure_simulcast.h"
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
namespace {
DecodeTargetIndication
Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) {
if (sid != config.SpatialId() || tid < config.TemporalId()) {
return DecodeTargetIndication::kNotPresent;
}
if (tid == 0) {
RTC_DCHECK_EQ(config.TemporalId(), 0);
return DecodeTargetIndication::kSwitch;
}
if (tid == config.TemporalId()) {
return DecodeTargetIndication::kDiscardable;
}
RTC_DCHECK_GT(tid, config.TemporalId());
return DecodeTargetIndication::kSwitch;
}
} // namespace
constexpr int ScalabilityStructureSimulcast::kMaxNumSpatialLayers;
constexpr int ScalabilityStructureSimulcast::kMaxNumTemporalLayers;
ScalabilityStructureSimulcast::ScalabilityStructureSimulcast(
int num_spatial_layers,
int num_temporal_layers,
ScalingFactor resolution_factor)
: num_spatial_layers_(num_spatial_layers),
num_temporal_layers_(num_temporal_layers),
resolution_factor_(resolution_factor),
active_decode_targets_(
(uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
}
ScalabilityStructureSimulcast::~ScalabilityStructureSimulcast() = default;
ScalableVideoController::StreamLayersConfig
ScalabilityStructureSimulcast::StreamConfig() const {
StreamLayersConfig result;
result.num_spatial_layers = num_spatial_layers_;
result.num_temporal_layers = num_temporal_layers_;
result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
result.scaling_factor_num[sid - 1] =
resolution_factor_.num * result.scaling_factor_num[sid];
result.scaling_factor_den[sid - 1] =
resolution_factor_.den * result.scaling_factor_den[sid];
}
result.uses_reference_scaling = false;
return result;
}
bool ScalabilityStructureSimulcast::TemporalLayerIsActive(int tid) const {
if (tid >= num_temporal_layers_) {
return false;
}
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (DecodeTargetIsActive(sid, tid)) {
return true;
}
}
return false;
}
ScalabilityStructureSimulcast::FramePattern
ScalabilityStructureSimulcast::NextPattern() const {
switch (last_pattern_) {
case kNone:
case kDeltaT2B:
return kDeltaT0;
case kDeltaT2A:
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
case kDeltaT1:
if (TemporalLayerIsActive(2)) {
return kDeltaT2B;
}
return kDeltaT0;
case kDeltaT0:
if (TemporalLayerIsActive(2)) {
return kDeltaT2A;
}
if (TemporalLayerIsActive(1)) {
return kDeltaT1;
}
return kDeltaT0;
}
RTC_DCHECK_NOTREACHED();
return kDeltaT0;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureSimulcast::NextFrameConfig(bool restart) {
std::vector<LayerFrameConfig> configs;
if (active_decode_targets_.none()) {
last_pattern_ = kNone;
return configs;
}
configs.reserve(num_spatial_layers_);
if (last_pattern_ == kNone || restart) {
can_reference_t0_frame_for_spatial_id_.reset();
last_pattern_ = kNone;
}
FramePattern current_pattern = NextPattern();
switch (current_pattern) {
case kDeltaT0:
// Disallow temporal references cross T0 on higher temporal layers.
can_reference_t1_frame_for_spatial_id_.reset();
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
// Next frame from the spatial layer `sid` shouldn't depend on
// potentially old previous frame from the spatial layer `sid`.
can_reference_t0_frame_for_spatial_id_.reset(sid);
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern).S(sid).T(0);
if (can_reference_t0_frame_for_spatial_id_[sid]) {
config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0));
} else {
config.Keyframe().Update(BufferIndex(sid, /*tid=*/0));
}
can_reference_t0_frame_for_spatial_id_.set(sid);
}
break;
case kDeltaT1:
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/1) ||
!can_reference_t0_frame_for_spatial_id_[sid]) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern)
.S(sid)
.T(1)
.Reference(BufferIndex(sid, /*tid=*/0));
// Save frame only if there is a higher temporal layer that may need it.
if (num_temporal_layers_ > 2) {
config.Update(BufferIndex(sid, /*tid=*/1));
}
}
break;
case kDeltaT2A:
case kDeltaT2B:
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
if (!DecodeTargetIsActive(sid, /*tid=*/2) ||
!can_reference_t0_frame_for_spatial_id_[sid]) {
continue;
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
config.Id(current_pattern).S(sid).T(2);
if (can_reference_t1_frame_for_spatial_id_[sid]) {
config.Reference(BufferIndex(sid, /*tid=*/1));
} else {
config.Reference(BufferIndex(sid, /*tid=*/0));
}
}
break;
case kNone:
RTC_DCHECK_NOTREACHED();
break;
}
return configs;
}
GenericFrameInfo ScalabilityStructureSimulcast::OnEncodeDone(
const LayerFrameConfig& config) {
last_pattern_ = static_cast<FramePattern>(config.Id());
if (config.TemporalId() == 1) {
can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
}
GenericFrameInfo frame_info;
frame_info.spatial_id = config.SpatialId();
frame_info.temporal_id = config.TemporalId();
frame_info.encoder_buffers = config.Buffers();
frame_info.decode_target_indications.reserve(num_spatial_layers_ *
num_temporal_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
for (int tid = 0; tid < num_temporal_layers_; ++tid) {
frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
}
}
frame_info.part_of_chain.assign(num_spatial_layers_, false);
if (config.TemporalId() == 0) {
frame_info.part_of_chain[config.SpatialId()] = true;
}
frame_info.active_decode_targets = active_decode_targets_;
return frame_info;
}
void ScalabilityStructureSimulcast::OnRatesUpdated(
const VideoBitrateAllocation& bitrates) {
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
// Enable/disable spatial layers independetely.
bool active = true;
for (int tid = 0; tid < num_temporal_layers_; ++tid) {
// To enable temporal layer, require bitrates for lower temporal layers.
active = active && bitrates.GetBitrate(sid, tid) > 0;
SetDecodeTargetIsActive(sid, tid, active);
}
}
}
FrameDependencyStructure ScalabilityStructureS2T1::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 2;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 1};
structure.templates.resize(4);
structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2});
structure.templates[1].S(0).Dtis("S-").ChainDiffs({0, 0});
structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2});
structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 0});
return structure;
}
FrameDependencyStructure ScalabilityStructureS2T2::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 4;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 1, 1};
auto& t = structure.templates;
t.resize(6);
t[1].S(0).T(0).Dtis("SS--").ChainDiffs({0, 0});
t[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 0});
t[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2});
t[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2});
t[0].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4});
t[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4});
return structure;
}
FrameDependencyStructure ScalabilityStructureS2T3::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 2;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1};
auto& t = structure.templates;
t.resize(10);
t[1].S(0).T(0).Dtis("SSS---").ChainDiffs({0, 0});
t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 0});
t[3].S(0).T(2).Dtis("--D---").ChainDiffs({2, 1}).FrameDiffs({2});
t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2});
t[2].S(0).T(1).Dtis("-DS---").ChainDiffs({4, 3}).FrameDiffs({4});
t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4});
t[4].S(0).T(2).Dtis("--D---").ChainDiffs({6, 5}).FrameDiffs({2});
t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2});
t[0].S(0).T(0).Dtis("SSS---").ChainDiffs({8, 7}).FrameDiffs({8});
t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 8}).FrameDiffs({8});
return structure;
}
FrameDependencyStructure ScalabilityStructureS3T1::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 3;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 1, 2};
auto& t = structure.templates;
t.resize(6);
t[1].S(0).T(0).Dtis("S--").ChainDiffs({0, 0, 0});
t[3].S(1).T(0).Dtis("-S-").ChainDiffs({1, 0, 0});
t[5].S(2).T(0).Dtis("--S").ChainDiffs({2, 1, 0});
t[0].S(0).T(0).Dtis("S--").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[2].S(1).T(0).Dtis("-S-").ChainDiffs({1, 3, 2}).FrameDiffs({3});
t[4].S(2).T(0).Dtis("--S").ChainDiffs({2, 1, 3}).FrameDiffs({3});
return structure;
}
FrameDependencyStructure ScalabilityStructureS3T2::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 6;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2};
auto& t = structure.templates;
t.resize(9);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement.
t[1].S(0).T(0).Dtis("SS----").ChainDiffs({0, 0, 0});
t[4].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 0, 0});
t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 0});
t[2].S(0).T(1).Dtis("-D----").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[5].S(1).T(1).Dtis("---D--").ChainDiffs({4, 3, 2}).FrameDiffs({3});
t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
t[0].S(0).T(0).Dtis("SS----").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[3].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 6, 5}).FrameDiffs({6});
t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 6}).FrameDiffs({6});
return structure;
}
FrameDependencyStructure ScalabilityStructureS3T3::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 9;
structure.num_chains = 3;
structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
auto& t = structure.templates;
t.resize(15);
// Templates are shown in the order frames following them appear in the
// stream, but in `structure.templates` array templates are sorted by
// (`spatial_id`, `temporal_id`) since that is a dependency descriptor
// requirement. Indexes are written in hex for nicer alignment.
t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({0, 0, 0});
t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 0, 0});
t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 0});
t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3});
t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3});
t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6});
t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6});
t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6});
t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3});
t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3});
t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3});
t[0x0].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12});
t[0x5].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12});
t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12});
return structure;
}
} // namespace webrtc

View file

@ -0,0 +1,145 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
// Scalability structure with multiple independent spatial layers each with the
// same temporal layering.
class ScalabilityStructureSimulcast : public ScalableVideoController {
public:
struct ScalingFactor {
int num = 1;
int den = 2;
};
ScalabilityStructureSimulcast(int num_spatial_layers,
int num_temporal_layers,
ScalingFactor resolution_factor);
~ScalabilityStructureSimulcast() override;
StreamLayersConfig StreamConfig() const override;
std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override;
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
enum FramePattern {
kNone,
kDeltaT2A,
kDeltaT1,
kDeltaT2B,
kDeltaT0,
};
static constexpr int kMaxNumSpatialLayers = 3;
static constexpr int kMaxNumTemporalLayers = 3;
// Index of the buffer to store last frame for layer (`sid`, `tid`)
int BufferIndex(int sid, int tid) const {
return tid * num_spatial_layers_ + sid;
}
bool DecodeTargetIsActive(int sid, int tid) const {
return active_decode_targets_[sid * num_temporal_layers_ + tid];
}
void SetDecodeTargetIsActive(int sid, int tid, bool value) {
active_decode_targets_.set(sid * num_temporal_layers_ + tid, value);
}
FramePattern NextPattern() const;
bool TemporalLayerIsActive(int tid) const;
const int num_spatial_layers_;
const int num_temporal_layers_;
const ScalingFactor resolution_factor_;
FramePattern last_pattern_ = kNone;
std::bitset<kMaxNumSpatialLayers> can_reference_t0_frame_for_spatial_id_ = 0;
std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_ = 0;
std::bitset<32> active_decode_targets_;
};
// S1 0--0--0-
// ...
// S0 0--0--0-
class ScalabilityStructureS2T1 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS2T1(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(2, 1, resolution_factor) {}
~ScalabilityStructureS2T1() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureS2T2 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS2T2(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(2, 2, resolution_factor) {}
~ScalabilityStructureS2T2() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
// S1T2 3 7
// | /
// S1T1 / 5
// |_/
// S1T0 1-------9...
//
// S0T2 2 6
// | /
// S0T1 / 4
// |_/
// S0T0 0-------8...
// Time-> 0 1 2 3 4
class ScalabilityStructureS2T3 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS2T3(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(2, 3, resolution_factor) {}
~ScalabilityStructureS2T3() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureS3T1 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS3T1(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(3, 1, resolution_factor) {}
~ScalabilityStructureS3T1() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureS3T2 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS3T2(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(3, 2, resolution_factor) {}
~ScalabilityStructureS3T2() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
class ScalabilityStructureS3T3 : public ScalabilityStructureSimulcast {
public:
explicit ScalabilityStructureS3T3(ScalingFactor resolution_factor = {})
: ScalabilityStructureSimulcast(3, 3, resolution_factor) {}
~ScalabilityStructureS3T3() override = default;
FrameDependencyStructure DependencyStructure() const override;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_SIMULCAST_H_

View file

@ -0,0 +1,101 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalability_structure_test_helpers.h"
#include <stdint.h>
#include <utility>
#include <vector>
#include "api/array_view.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "modules/video_coding/chain_diff_calculator.h"
#include "modules/video_coding/frame_dependencies_calculator.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "test/gtest.h"
namespace webrtc {
VideoBitrateAllocation EnableTemporalLayers(int s0, int s1, int s2) {
VideoBitrateAllocation bitrate;
for (int tid = 0; tid < s0; ++tid) {
bitrate.SetBitrate(0, tid, 1'000'000);
}
for (int tid = 0; tid < s1; ++tid) {
bitrate.SetBitrate(1, tid, 1'000'000);
}
for (int tid = 0; tid < s2; ++tid) {
bitrate.SetBitrate(2, tid, 1'000'000);
}
return bitrate;
}
void ScalabilityStructureWrapper::GenerateFrames(
int num_temporal_units,
std::vector<GenericFrameInfo>& frames) {
for (int i = 0; i < num_temporal_units; ++i) {
for (auto& layer_frame :
structure_controller_.NextFrameConfig(/*restart=*/false)) {
int64_t frame_id = ++frame_id_;
bool is_keyframe = layer_frame.IsKeyframe();
GenericFrameInfo frame_info =
structure_controller_.OnEncodeDone(layer_frame);
if (is_keyframe) {
chain_diff_calculator_.Reset(frame_info.part_of_chain);
}
frame_info.chain_diffs =
chain_diff_calculator_.From(frame_id, frame_info.part_of_chain);
for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage(
frame_id, frame_info.encoder_buffers)) {
frame_info.frame_diffs.push_back(frame_id - base_frame_id);
}
frames.push_back(std::move(frame_info));
}
}
}
bool ScalabilityStructureWrapper::FrameReferencesAreValid(
rtc::ArrayView<const GenericFrameInfo> frames) const {
bool valid = true;
// VP9 and AV1 supports up to 8 buffers. Expect no more buffers are not used.
std::bitset<8> buffer_contains_frame;
for (size_t i = 0; i < frames.size(); ++i) {
const GenericFrameInfo& frame = frames[i];
for (const CodecBufferUsage& buffer_usage : frame.encoder_buffers) {
if (buffer_usage.id < 0 || buffer_usage.id >= 8) {
ADD_FAILURE() << "Invalid buffer id " << buffer_usage.id
<< " for frame#" << i
<< ". Up to 8 buffers are supported.";
valid = false;
continue;
}
if (buffer_usage.referenced && !buffer_contains_frame[buffer_usage.id]) {
ADD_FAILURE() << "buffer " << buffer_usage.id << " for frame#" << i
<< " was reference before updated.";
valid = false;
}
if (buffer_usage.updated) {
buffer_contains_frame.set(buffer_usage.id);
}
}
for (int fdiff : frame.frame_diffs) {
if (fdiff <= 0 || static_cast<size_t>(fdiff) > i) {
ADD_FAILURE() << "Invalid frame diff " << fdiff << " for frame#" << i;
valid = false;
}
}
}
return valid;
}
} // namespace webrtc

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_
#define MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_
#include <stdint.h>
#include <vector>
#include "api/array_view.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/chain_diff_calculator.h"
#include "modules/video_coding/frame_dependencies_calculator.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
// Creates bitrate allocation with non-zero bitrate for given number of temporal
// layers for each spatial layer.
VideoBitrateAllocation EnableTemporalLayers(int s0, int s1 = 0, int s2 = 0);
class ScalabilityStructureWrapper {
public:
explicit ScalabilityStructureWrapper(ScalableVideoController& structure)
: structure_controller_(structure) {}
std::vector<GenericFrameInfo> GenerateFrames(int num_temporal_units) {
std::vector<GenericFrameInfo> frames;
GenerateFrames(num_temporal_units, frames);
return frames;
}
void GenerateFrames(int num_temporal_units,
std::vector<GenericFrameInfo>& frames);
// Returns false and ADD_FAILUREs for frames with invalid references.
// In particular validates no frame frame reference to frame before frames[0].
// In error messages frames are indexed starting with 0.
bool FrameReferencesAreValid(
rtc::ArrayView<const GenericFrameInfo> frames) const;
private:
ScalableVideoController& structure_controller_;
FrameDependenciesCalculator frame_deps_calculator_;
ChainDiffCalculator chain_diff_calculator_;
int64_t frame_id_ = 0;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_STRUCTURE_TEST_HELPERS_H_

View file

@ -0,0 +1,395 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <ostream>
#include <string>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "modules/video_coding/svc/scalability_structure_test_helpers.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::AllOf;
using ::testing::Contains;
using ::testing::Each;
using ::testing::ElementsAreArray;
using ::testing::Field;
using ::testing::Ge;
using ::testing::IsEmpty;
using ::testing::Le;
using ::testing::Lt;
using ::testing::Not;
using ::testing::NotNull;
using ::testing::SizeIs;
using ::testing::TestWithParam;
using ::testing::Values;
std::string FrameDependencyTemplateToString(const FrameDependencyTemplate& t) {
rtc::StringBuilder sb;
sb << "S" << t.spatial_id << "T" << t.temporal_id;
sb << ": dtis = ";
for (const auto dtis : t.decode_target_indications) {
switch (dtis) {
case DecodeTargetIndication::kNotPresent:
sb << "-";
break;
case DecodeTargetIndication::kDiscardable:
sb << "D";
break;
case DecodeTargetIndication::kSwitch:
sb << "S";
break;
case DecodeTargetIndication::kRequired:
sb << "R";
break;
default:
sb << "?";
break;
}
}
sb << ", frame diffs = { ";
for (int d : t.frame_diffs) {
sb << d << ", ";
}
sb << "}, chain diffs = { ";
for (int d : t.chain_diffs) {
sb << d << ", ";
}
sb << "}";
return sb.Release();
}
struct SvcTestParam {
friend std::ostream& operator<<(std::ostream& os, const SvcTestParam& param) {
return os << param.name;
}
ScalabilityMode GetScalabilityMode() const {
absl::optional<ScalabilityMode> scalability_mode =
ScalabilityModeFromString(name);
RTC_CHECK(scalability_mode.has_value());
return *scalability_mode;
}
std::string name;
int num_temporal_units;
};
class ScalabilityStructureTest : public TestWithParam<SvcTestParam> {};
TEST_P(ScalabilityStructureTest,
StaticConfigMatchesConfigReturnedByController) {
std::unique_ptr<ScalableVideoController> controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
absl::optional<ScalableVideoController::StreamLayersConfig> static_config =
ScalabilityStructureConfig(GetParam().GetScalabilityMode());
ASSERT_THAT(controller, NotNull());
ASSERT_NE(static_config, absl::nullopt);
ScalableVideoController::StreamLayersConfig config =
controller->StreamConfig();
EXPECT_EQ(config.num_spatial_layers, static_config->num_spatial_layers);
EXPECT_EQ(config.num_temporal_layers, static_config->num_temporal_layers);
EXPECT_THAT(
rtc::MakeArrayView(config.scaling_factor_num, config.num_spatial_layers),
ElementsAreArray(static_config->scaling_factor_num,
static_config->num_spatial_layers));
EXPECT_THAT(
rtc::MakeArrayView(config.scaling_factor_den, config.num_spatial_layers),
ElementsAreArray(static_config->scaling_factor_den,
static_config->num_spatial_layers));
}
TEST_P(ScalabilityStructureTest,
NumberOfDecodeTargetsAndChainsAreInRangeAndConsistent) {
FrameDependencyStructure structure =
CreateScalabilityStructure(GetParam().GetScalabilityMode())
->DependencyStructure();
EXPECT_GT(structure.num_decode_targets, 0);
EXPECT_LE(structure.num_decode_targets,
DependencyDescriptor::kMaxDecodeTargets);
EXPECT_GE(structure.num_chains, 0);
EXPECT_LE(structure.num_chains, structure.num_decode_targets);
if (structure.num_chains == 0) {
EXPECT_THAT(structure.decode_target_protected_by_chain, IsEmpty());
} else {
EXPECT_THAT(structure.decode_target_protected_by_chain,
AllOf(SizeIs(structure.num_decode_targets), Each(Ge(0)),
Each(Lt(structure.num_chains))));
}
EXPECT_THAT(structure.templates,
SizeIs(Lt(size_t{DependencyDescriptor::kMaxTemplates})));
}
TEST_P(ScalabilityStructureTest, TemplatesAreSortedByLayerId) {
FrameDependencyStructure structure =
CreateScalabilityStructure(GetParam().GetScalabilityMode())
->DependencyStructure();
ASSERT_THAT(structure.templates, Not(IsEmpty()));
const auto& first_templates = structure.templates.front();
EXPECT_EQ(first_templates.spatial_id, 0);
EXPECT_EQ(first_templates.temporal_id, 0);
for (size_t i = 1; i < structure.templates.size(); ++i) {
const auto& prev_template = structure.templates[i - 1];
const auto& next_template = structure.templates[i];
if (next_template.spatial_id == prev_template.spatial_id &&
next_template.temporal_id == prev_template.temporal_id) {
// Same layer, next_layer_idc == 0
} else if (next_template.spatial_id == prev_template.spatial_id &&
next_template.temporal_id == prev_template.temporal_id + 1) {
// Next temporal layer, next_layer_idc == 1
} else if (next_template.spatial_id == prev_template.spatial_id + 1 &&
next_template.temporal_id == 0) {
// Next spatial layer, next_layer_idc == 2
} else {
// everything else is invalid.
ADD_FAILURE() << "Invalid templates order. Template #" << i
<< " with layer (" << next_template.spatial_id << ","
<< next_template.temporal_id
<< ") follows template with layer ("
<< prev_template.spatial_id << ","
<< prev_template.temporal_id << ").";
}
}
}
TEST_P(ScalabilityStructureTest, TemplatesMatchNumberOfDecodeTargetsAndChains) {
FrameDependencyStructure structure =
CreateScalabilityStructure(GetParam().GetScalabilityMode())
->DependencyStructure();
EXPECT_THAT(
structure.templates,
Each(AllOf(Field(&FrameDependencyTemplate::decode_target_indications,
SizeIs(structure.num_decode_targets)),
Field(&FrameDependencyTemplate::chain_diffs,
SizeIs(structure.num_chains)))));
}
TEST_P(ScalabilityStructureTest, FrameInfoMatchesFrameDependencyStructure) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
FrameDependencyStructure structure = svc_controller->DependencyStructure();
std::vector<GenericFrameInfo> frame_infos =
ScalabilityStructureWrapper(*svc_controller)
.GenerateFrames(GetParam().num_temporal_units);
for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) {
const auto& frame = frame_infos[frame_id];
EXPECT_GE(frame.spatial_id, 0) << " for frame " << frame_id;
EXPECT_GE(frame.temporal_id, 0) << " for frame " << frame_id;
EXPECT_THAT(frame.decode_target_indications,
SizeIs(structure.num_decode_targets))
<< " for frame " << frame_id;
EXPECT_THAT(frame.part_of_chain, SizeIs(structure.num_chains))
<< " for frame " << frame_id;
}
}
TEST_P(ScalabilityStructureTest, ThereIsAPerfectTemplateForEachFrame) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
FrameDependencyStructure structure = svc_controller->DependencyStructure();
std::vector<GenericFrameInfo> frame_infos =
ScalabilityStructureWrapper(*svc_controller)
.GenerateFrames(GetParam().num_temporal_units);
for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) {
EXPECT_THAT(structure.templates, Contains(frame_infos[frame_id]))
<< " for frame " << frame_id << ", Expected "
<< FrameDependencyTemplateToString(frame_infos[frame_id]);
}
}
TEST_P(ScalabilityStructureTest, FrameDependsOnSameOrLowerLayer) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
std::vector<GenericFrameInfo> frame_infos =
ScalabilityStructureWrapper(*svc_controller)
.GenerateFrames(GetParam().num_temporal_units);
int64_t num_frames = frame_infos.size();
for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) {
const auto& frame = frame_infos[frame_id];
for (int frame_diff : frame.frame_diffs) {
int64_t base_frame_id = frame_id - frame_diff;
const auto& base_frame = frame_infos[base_frame_id];
EXPECT_GE(frame.spatial_id, base_frame.spatial_id)
<< "Frame " << frame_id << " depends on frame " << base_frame_id;
EXPECT_GE(frame.temporal_id, base_frame.temporal_id)
<< "Frame " << frame_id << " depends on frame " << base_frame_id;
}
}
}
TEST_P(ScalabilityStructureTest, NoFrameDependsOnDiscardableOrNotPresent) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
std::vector<GenericFrameInfo> frame_infos =
ScalabilityStructureWrapper(*svc_controller)
.GenerateFrames(GetParam().num_temporal_units);
int64_t num_frames = frame_infos.size();
FrameDependencyStructure structure = svc_controller->DependencyStructure();
for (int dt = 0; dt < structure.num_decode_targets; ++dt) {
for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) {
const auto& frame = frame_infos[frame_id];
if (frame.decode_target_indications[dt] ==
DecodeTargetIndication::kNotPresent) {
continue;
}
for (int frame_diff : frame.frame_diffs) {
int64_t base_frame_id = frame_id - frame_diff;
const auto& base_frame = frame_infos[base_frame_id];
EXPECT_NE(base_frame.decode_target_indications[dt],
DecodeTargetIndication::kNotPresent)
<< "Frame " << frame_id << " depends on frame " << base_frame_id
<< " that is not part of decode target#" << dt;
EXPECT_NE(base_frame.decode_target_indications[dt],
DecodeTargetIndication::kDiscardable)
<< "Frame " << frame_id << " depends on frame " << base_frame_id
<< " that is discardable for decode target#" << dt;
}
}
}
}
TEST_P(ScalabilityStructureTest, NoFrameDependsThroughSwitchIndication) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
FrameDependencyStructure structure = svc_controller->DependencyStructure();
std::vector<GenericFrameInfo> frame_infos =
ScalabilityStructureWrapper(*svc_controller)
.GenerateFrames(GetParam().num_temporal_units);
int64_t num_frames = frame_infos.size();
std::vector<std::set<int64_t>> full_deps(num_frames);
// For each frame calculate set of all frames it depends on, both directly and
// indirectly.
for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) {
std::set<int64_t> all_base_frames;
for (int frame_diff : frame_infos[frame_id].frame_diffs) {
int64_t base_frame_id = frame_id - frame_diff;
all_base_frames.insert(base_frame_id);
const auto& indirect = full_deps[base_frame_id];
all_base_frames.insert(indirect.begin(), indirect.end());
}
full_deps[frame_id] = std::move(all_base_frames);
}
// Now check the switch indication: frames after the switch indication mustn't
// depend on any addition frames before the switch indications.
for (int dt = 0; dt < structure.num_decode_targets; ++dt) {
for (int64_t switch_frame_id = 0; switch_frame_id < num_frames;
++switch_frame_id) {
if (frame_infos[switch_frame_id].decode_target_indications[dt] !=
DecodeTargetIndication::kSwitch) {
continue;
}
for (int64_t later_frame_id = switch_frame_id + 1;
later_frame_id < num_frames; ++later_frame_id) {
if (frame_infos[later_frame_id].decode_target_indications[dt] ==
DecodeTargetIndication::kNotPresent) {
continue;
}
for (int frame_diff : frame_infos[later_frame_id].frame_diffs) {
int64_t early_frame_id = later_frame_id - frame_diff;
if (early_frame_id < switch_frame_id) {
EXPECT_THAT(full_deps[switch_frame_id], Contains(early_frame_id))
<< "For decode target #" << dt << " frame " << later_frame_id
<< " depends on the frame " << early_frame_id
<< " that switch indication frame " << switch_frame_id
<< " doesn't directly on indirectly depend on.";
}
}
}
}
}
}
TEST_P(ScalabilityStructureTest, ProduceNoFrameForDisabledLayers) {
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(GetParam().GetScalabilityMode());
ScalableVideoController::StreamLayersConfig structure =
svc_controller->StreamConfig();
VideoBitrateAllocation all_bitrates;
for (int sid = 0; sid < structure.num_spatial_layers; ++sid) {
for (int tid = 0; tid < structure.num_temporal_layers; ++tid) {
all_bitrates.SetBitrate(sid, tid, 100'000);
}
}
svc_controller->OnRatesUpdated(all_bitrates);
ScalabilityStructureWrapper wrapper(*svc_controller);
std::vector<GenericFrameInfo> frames =
wrapper.GenerateFrames(GetParam().num_temporal_units);
for (int sid = 0; sid < structure.num_spatial_layers; ++sid) {
for (int tid = 0; tid < structure.num_temporal_layers; ++tid) {
// When all layers were enabled, expect there was a frame for each layer.
EXPECT_THAT(frames,
Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid),
Field(&GenericFrameInfo::temporal_id, tid))))
<< "For layer (" << sid << "," << tid << ")";
// Restore bitrates for all layers before disabling single layer.
VideoBitrateAllocation bitrates = all_bitrates;
bitrates.SetBitrate(sid, tid, 0);
svc_controller->OnRatesUpdated(bitrates);
// With layer (sid, tid) disabled, expect no frames are produced for it.
EXPECT_THAT(
wrapper.GenerateFrames(GetParam().num_temporal_units),
Not(Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid),
Field(&GenericFrameInfo::temporal_id, tid)))))
<< "For layer (" << sid << "," << tid << ")";
}
}
}
INSTANTIATE_TEST_SUITE_P(
Svc,
ScalabilityStructureTest,
Values(SvcTestParam{"L1T1", /*num_temporal_units=*/3},
SvcTestParam{"L1T2", /*num_temporal_units=*/4},
SvcTestParam{"L1T3", /*num_temporal_units=*/8},
SvcTestParam{"L2T1", /*num_temporal_units=*/3},
SvcTestParam{"L2T1_KEY", /*num_temporal_units=*/3},
SvcTestParam{"L3T1", /*num_temporal_units=*/3},
SvcTestParam{"L3T1_KEY", /*num_temporal_units=*/3},
SvcTestParam{"L3T3", /*num_temporal_units=*/8},
SvcTestParam{"S2T1", /*num_temporal_units=*/3},
SvcTestParam{"S2T2", /*num_temporal_units=*/4},
SvcTestParam{"S2T3", /*num_temporal_units=*/8},
SvcTestParam{"S3T1", /*num_temporal_units=*/3},
SvcTestParam{"S3T2", /*num_temporal_units=*/4},
SvcTestParam{"S3T3", /*num_temporal_units=*/8},
SvcTestParam{"L2T2", /*num_temporal_units=*/4},
SvcTestParam{"L2T2_KEY", /*num_temporal_units=*/4},
SvcTestParam{"L2T2_KEY_SHIFT", /*num_temporal_units=*/4},
SvcTestParam{"L2T3", /*num_temporal_units=*/8},
SvcTestParam{"L2T3_KEY", /*num_temporal_units=*/8},
SvcTestParam{"L3T2", /*num_temporal_units=*/4},
SvcTestParam{"L3T2_KEY", /*num_temporal_units=*/4},
SvcTestParam{"L3T3_KEY", /*num_temporal_units=*/8}),
[](const testing::TestParamInfo<SvcTestParam>& info) {
return info.param.name;
});
} // namespace
} // namespace webrtc

View file

@ -0,0 +1,139 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_
#define MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_
#include <vector>
#include "absl/container/inlined_vector.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
namespace webrtc {
// Controls how video should be encoded to be scalable. Outputs results as
// buffer usage configuration for encoder and enough details to communicate the
// scalability structure via dependency descriptor rtp header extension.
class ScalableVideoController {
public:
struct StreamLayersConfig {
int num_spatial_layers = 1;
int num_temporal_layers = 1;
// Indicates if frames can reference frames of a different resolution.
bool uses_reference_scaling = true;
// Spatial layers scaling. Frames with spatial_id = i expected to be encoded
// with original_resolution * scaling_factor_num[i] / scaling_factor_den[i].
int scaling_factor_num[DependencyDescriptor::kMaxSpatialIds] = {1, 1, 1, 1};
int scaling_factor_den[DependencyDescriptor::kMaxSpatialIds] = {1, 1, 1, 1};
};
class LayerFrameConfig {
public:
// Builders/setters.
LayerFrameConfig& Id(int value);
LayerFrameConfig& Keyframe();
LayerFrameConfig& S(int value);
LayerFrameConfig& T(int value);
LayerFrameConfig& Reference(int buffer_id);
LayerFrameConfig& Update(int buffer_id);
LayerFrameConfig& ReferenceAndUpdate(int buffer_id);
// Getters.
int Id() const { return id_; }
bool IsKeyframe() const { return is_keyframe_; }
int SpatialId() const { return spatial_id_; }
int TemporalId() const { return temporal_id_; }
const absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers>& Buffers()
const {
return buffers_;
}
private:
// Id to match configuration returned by NextFrameConfig with
// (possibly modified) configuration passed back via OnEncoderDone.
// The meaning of the id is an implementation detail of
// the ScalableVideoController.
int id_ = 0;
// Indication frame should be encoded as a key frame. In particular when
// `is_keyframe=true` property `CodecBufferUsage::referenced` should be
// ignored and treated as false.
bool is_keyframe_ = false;
int spatial_id_ = 0;
int temporal_id_ = 0;
// Describes how encoder which buffers encoder allowed to reference and
// which buffers encoder should update.
absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> buffers_;
};
virtual ~ScalableVideoController() = default;
// Returns video structure description for encoder to configure itself.
virtual StreamLayersConfig StreamConfig() const = 0;
// Returns video structure description in format compatible with
// dependency descriptor rtp header extension.
virtual FrameDependencyStructure DependencyStructure() const = 0;
// Notifies Controller with updated bitrates per layer. In particular notifies
// when certain layers should be disabled.
// Controller shouldn't produce LayerFrameConfig for disabled layers.
virtual void OnRatesUpdated(const VideoBitrateAllocation& bitrates) = 0;
// When `restart` is true, first `LayerFrameConfig` should have `is_keyframe`
// set to true.
// Returned vector shouldn't be empty.
virtual std::vector<LayerFrameConfig> NextFrameConfig(bool restart) = 0;
// Returns configuration to pass to EncoderCallback.
virtual GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) = 0;
};
// Below are implementation details.
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::Id(int value) {
id_ = value;
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::Keyframe() {
is_keyframe_ = true;
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::S(int value) {
spatial_id_ = value;
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::T(int value) {
temporal_id_ = value;
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::Reference(int buffer_id) {
buffers_.emplace_back(buffer_id, /*referenced=*/true, /*updated=*/false);
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::Update(int buffer_id) {
buffers_.emplace_back(buffer_id, /*referenced=*/false, /*updated=*/true);
return *this;
}
inline ScalableVideoController::LayerFrameConfig&
ScalableVideoController::LayerFrameConfig::ReferenceAndUpdate(int buffer_id) {
buffers_.emplace_back(buffer_id, /*referenced=*/true, /*updated=*/true);
return *this;
}
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_H_

View file

@ -0,0 +1,88 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
#include <utility>
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "rtc_base/checks.h"
namespace webrtc {
ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() =
default;
ScalableVideoController::StreamLayersConfig
ScalableVideoControllerNoLayering::StreamConfig() const {
StreamLayersConfig result;
result.num_spatial_layers = 1;
result.num_temporal_layers = 1;
result.uses_reference_scaling = false;
return result;
}
FrameDependencyStructure
ScalableVideoControllerNoLayering::DependencyStructure() const {
FrameDependencyStructure structure;
structure.num_decode_targets = 1;
structure.num_chains = 1;
structure.decode_target_protected_by_chain = {0};
FrameDependencyTemplate key_frame;
key_frame.decode_target_indications = {DecodeTargetIndication::kSwitch};
key_frame.chain_diffs = {0};
structure.templates.push_back(key_frame);
FrameDependencyTemplate delta_frame;
delta_frame.decode_target_indications = {DecodeTargetIndication::kSwitch};
delta_frame.chain_diffs = {1};
delta_frame.frame_diffs = {1};
structure.templates.push_back(delta_frame);
return structure;
}
std::vector<ScalableVideoController::LayerFrameConfig>
ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) {
if (!enabled_) {
return {};
}
std::vector<LayerFrameConfig> result(1);
if (restart || start_) {
result[0].Id(0).Keyframe().Update(0);
} else {
result[0].Id(0).ReferenceAndUpdate(0);
}
start_ = false;
return result;
}
GenericFrameInfo ScalableVideoControllerNoLayering::OnEncodeDone(
const LayerFrameConfig& config) {
RTC_DCHECK_EQ(config.Id(), 0);
GenericFrameInfo frame_info;
frame_info.encoder_buffers = config.Buffers();
if (config.IsKeyframe()) {
for (auto& buffer : frame_info.encoder_buffers) {
buffer.referenced = false;
}
}
frame_info.decode_target_indications = {DecodeTargetIndication::kSwitch};
frame_info.part_of_chain = {true};
return frame_info;
}
void ScalableVideoControllerNoLayering::OnRatesUpdated(
const VideoBitrateAllocation& bitrates) {
enabled_ = bitrates.GetBitrate(0, 0) > 0;
}
} // namespace webrtc

View file

@ -0,0 +1,40 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
#define MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_bitrate_allocation.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
class ScalableVideoControllerNoLayering : public ScalableVideoController {
public:
~ScalableVideoControllerNoLayering() override;
StreamLayersConfig StreamConfig() const override;
FrameDependencyStructure DependencyStructure() const override;
std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
GenericFrameInfo OnEncodeDone(const LayerFrameConfig& config) override;
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
bool start_ = true;
bool enabled_ = true;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_

View file

@ -0,0 +1,451 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/svc_rate_allocator.h"
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <numeric>
#include <vector>
#include "absl/container/inlined_vector.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr float kSpatialLayeringRateScalingFactor = 0.55f;
constexpr float kTemporalLayeringRateScalingFactor = 0.55f;
struct ActiveSpatialLayers {
size_t first = 0;
size_t num = 0;
};
ActiveSpatialLayers GetActiveSpatialLayers(const VideoCodec& codec,
size_t num_spatial_layers) {
ActiveSpatialLayers active;
for (active.first = 0; active.first < num_spatial_layers; ++active.first) {
if (codec.spatialLayers[active.first].active) {
break;
}
}
size_t last_active_layer = active.first;
for (; last_active_layer < num_spatial_layers; ++last_active_layer) {
if (!codec.spatialLayers[last_active_layer].active) {
break;
}
}
active.num = last_active_layer - active.first;
return active;
}
std::vector<DataRate> AdjustAndVerify(
const VideoCodec& codec,
size_t first_active_layer,
const std::vector<DataRate>& spatial_layer_rates) {
std::vector<DataRate> adjusted_spatial_layer_rates;
// Keep track of rate that couldn't be applied to the previous layer due to
// max bitrate constraint, try to pass it forward to the next one.
DataRate excess_rate = DataRate::Zero();
for (size_t sl_idx = 0; sl_idx < spatial_layer_rates.size(); ++sl_idx) {
DataRate min_rate = DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + sl_idx].minBitrate);
DataRate max_rate = DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + sl_idx].maxBitrate);
DataRate layer_rate = spatial_layer_rates[sl_idx] + excess_rate;
if (layer_rate < min_rate) {
// Not enough rate to reach min bitrate for desired number of layers,
// abort allocation.
if (spatial_layer_rates.size() == 1) {
return spatial_layer_rates;
}
return adjusted_spatial_layer_rates;
}
if (layer_rate <= max_rate) {
excess_rate = DataRate::Zero();
adjusted_spatial_layer_rates.push_back(layer_rate);
} else {
excess_rate = layer_rate - max_rate;
adjusted_spatial_layer_rates.push_back(max_rate);
}
}
return adjusted_spatial_layer_rates;
}
static std::vector<DataRate> SplitBitrate(size_t num_layers,
DataRate total_bitrate,
float rate_scaling_factor) {
std::vector<DataRate> bitrates;
double denominator = 0.0;
for (size_t layer_idx = 0; layer_idx < num_layers; ++layer_idx) {
denominator += std::pow(rate_scaling_factor, layer_idx);
}
double numerator = std::pow(rate_scaling_factor, num_layers - 1);
for (size_t layer_idx = 0; layer_idx < num_layers; ++layer_idx) {
bitrates.push_back(numerator * total_bitrate / denominator);
numerator /= rate_scaling_factor;
}
const DataRate sum =
std::accumulate(bitrates.begin(), bitrates.end(), DataRate::Zero());
// Keep the sum of split bitrates equal to the total bitrate by adding or
// subtracting bits, which were lost due to rounding, to the latest layer.
if (total_bitrate > sum) {
bitrates.back() += total_bitrate - sum;
} else if (total_bitrate < sum) {
bitrates.back() -= sum - total_bitrate;
}
return bitrates;
}
// Returns the minimum bitrate needed for `num_active_layers` spatial layers to
// become active using the configuration specified by `codec`.
DataRate FindLayerTogglingThreshold(const VideoCodec& codec,
size_t first_active_layer,
size_t num_active_layers) {
if (num_active_layers == 1) {
return DataRate::KilobitsPerSec(codec.spatialLayers[0].minBitrate);
}
if (codec.mode == VideoCodecMode::kRealtimeVideo) {
DataRate lower_bound = DataRate::Zero();
DataRate upper_bound = DataRate::Zero();
if (num_active_layers > 1) {
for (size_t i = 0; i < num_active_layers - 1; ++i) {
lower_bound += DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + i].minBitrate);
upper_bound += DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + i].maxBitrate);
}
}
upper_bound += DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + num_active_layers - 1]
.minBitrate);
// Do a binary search until upper and lower bound is the highest bitrate for
// `num_active_layers` - 1 layers and lowest bitrate for `num_active_layers`
// layers respectively.
while (upper_bound - lower_bound > DataRate::BitsPerSec(1)) {
DataRate try_rate = (lower_bound + upper_bound) / 2;
if (AdjustAndVerify(codec, first_active_layer,
SplitBitrate(num_active_layers, try_rate,
kSpatialLayeringRateScalingFactor))
.size() == num_active_layers) {
upper_bound = try_rate;
} else {
lower_bound = try_rate;
}
}
return upper_bound;
} else {
DataRate toggling_rate = DataRate::Zero();
for (size_t i = 0; i < num_active_layers - 1; ++i) {
toggling_rate += DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + i].targetBitrate);
}
toggling_rate += DataRate::KilobitsPerSec(
codec.spatialLayers[first_active_layer + num_active_layers - 1]
.minBitrate);
return toggling_rate;
}
}
} // namespace
SvcRateAllocator::NumLayers SvcRateAllocator::GetNumLayers(
const VideoCodec& codec) {
NumLayers layers;
if (absl::optional<ScalabilityMode> scalability_mode =
codec.GetScalabilityMode();
scalability_mode.has_value()) {
if (auto structure = CreateScalabilityStructure(*scalability_mode)) {
ScalableVideoController::StreamLayersConfig config =
structure->StreamConfig();
layers.spatial = config.num_spatial_layers;
layers.temporal = config.num_temporal_layers;
return layers;
}
}
if (codec.codecType == kVideoCodecVP9) {
layers.spatial = codec.VP9().numberOfSpatialLayers;
layers.temporal = codec.VP9().numberOfTemporalLayers;
return layers;
}
layers.spatial = 1;
layers.temporal = 1;
return layers;
}
SvcRateAllocator::SvcRateAllocator(const VideoCodec& codec)
: codec_(codec),
num_layers_(GetNumLayers(codec)),
experiment_settings_(StableTargetRateExperiment::ParseFromFieldTrials()),
cumulative_layer_start_bitrates_(GetLayerStartBitrates(codec)),
last_active_layer_count_(0) {
RTC_DCHECK_GT(num_layers_.spatial, 0);
RTC_DCHECK_LE(num_layers_.spatial, kMaxSpatialLayers);
RTC_DCHECK_GT(num_layers_.temporal, 0);
RTC_DCHECK_LE(num_layers_.temporal, 3);
for (size_t layer_idx = 0; layer_idx < num_layers_.spatial; ++layer_idx) {
// Verify min <= target <= max.
if (codec.spatialLayers[layer_idx].active) {
RTC_DCHECK_GT(codec.spatialLayers[layer_idx].maxBitrate, 0);
RTC_DCHECK_GE(codec.spatialLayers[layer_idx].maxBitrate,
codec.spatialLayers[layer_idx].minBitrate);
RTC_DCHECK_GE(codec.spatialLayers[layer_idx].targetBitrate,
codec.spatialLayers[layer_idx].minBitrate);
RTC_DCHECK_GE(codec.spatialLayers[layer_idx].maxBitrate,
codec.spatialLayers[layer_idx].targetBitrate);
}
}
}
VideoBitrateAllocation SvcRateAllocator::Allocate(
VideoBitrateAllocationParameters parameters) {
DataRate total_bitrate = parameters.total_bitrate;
if (codec_.maxBitrate != 0) {
total_bitrate =
std::min(total_bitrate, DataRate::KilobitsPerSec(codec_.maxBitrate));
}
if (codec_.spatialLayers[0].targetBitrate == 0) {
// Delegate rate distribution to encoder wrapper if bitrate thresholds
// are not set.
VideoBitrateAllocation bitrate_allocation;
bitrate_allocation.SetBitrate(0, 0, total_bitrate.bps());
return bitrate_allocation;
}
const ActiveSpatialLayers active_layers =
GetActiveSpatialLayers(codec_, num_layers_.spatial);
size_t num_spatial_layers = active_layers.num;
if (num_spatial_layers == 0) {
return VideoBitrateAllocation(); // All layers are deactivated.
}
// Figure out how many spatial layers should be active.
if (experiment_settings_.IsEnabled() &&
parameters.stable_bitrate > DataRate::Zero()) {
double hysteresis_factor;
if (codec_.mode == VideoCodecMode::kScreensharing) {
hysteresis_factor = experiment_settings_.GetScreenshareHysteresisFactor();
} else {
hysteresis_factor = experiment_settings_.GetVideoHysteresisFactor();
}
DataRate stable_rate = std::min(total_bitrate, parameters.stable_bitrate);
// First check if bitrate has grown large enough to enable new layers.
size_t num_enabled_with_hysteresis =
FindNumEnabledLayers(stable_rate / hysteresis_factor);
if (num_enabled_with_hysteresis >= last_active_layer_count_) {
num_spatial_layers = num_enabled_with_hysteresis;
} else {
// We could not enable new layers, check if any should be disabled.
num_spatial_layers =
std::min(last_active_layer_count_, FindNumEnabledLayers(stable_rate));
}
} else {
num_spatial_layers = FindNumEnabledLayers(total_bitrate);
}
last_active_layer_count_ = num_spatial_layers;
VideoBitrateAllocation allocation;
if (codec_.mode == VideoCodecMode::kRealtimeVideo) {
allocation = GetAllocationNormalVideo(total_bitrate, active_layers.first,
num_spatial_layers);
} else {
allocation = GetAllocationScreenSharing(total_bitrate, active_layers.first,
num_spatial_layers);
}
allocation.set_bw_limited(num_spatial_layers < active_layers.num);
return allocation;
}
VideoBitrateAllocation SvcRateAllocator::GetAllocationNormalVideo(
DataRate total_bitrate,
size_t first_active_layer,
size_t num_spatial_layers) const {
std::vector<DataRate> spatial_layer_rates;
if (num_spatial_layers == 0) {
// Not enough rate for even the base layer. Force allocation at the total
// bitrate anyway.
num_spatial_layers = 1;
spatial_layer_rates.push_back(total_bitrate);
} else {
spatial_layer_rates =
AdjustAndVerify(codec_, first_active_layer,
SplitBitrate(num_spatial_layers, total_bitrate,
kSpatialLayeringRateScalingFactor));
RTC_DCHECK_EQ(spatial_layer_rates.size(), num_spatial_layers);
}
VideoBitrateAllocation bitrate_allocation;
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
std::vector<DataRate> temporal_layer_rates =
SplitBitrate(num_layers_.temporal, spatial_layer_rates[sl_idx],
kTemporalLayeringRateScalingFactor);
// Distribute rate across temporal layers. Allocate more bits to lower
// layers since they are used for prediction of higher layers and their
// references are far apart.
if (num_layers_.temporal == 1) {
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0,
temporal_layer_rates[0].bps());
} else if (num_layers_.temporal == 2) {
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0,
temporal_layer_rates[1].bps());
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 1,
temporal_layer_rates[0].bps());
} else {
RTC_CHECK_EQ(num_layers_.temporal, 3);
// In case of three temporal layers the high layer has two frames and the
// middle layer has one frame within GOP (in between two consecutive low
// layer frames). Thus high layer requires more bits (comparing pure
// bitrate of layer, excluding bitrate of base layers) to keep quality on
// par with lower layers.
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 0,
temporal_layer_rates[2].bps());
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 1,
temporal_layer_rates[0].bps());
bitrate_allocation.SetBitrate(sl_idx + first_active_layer, 2,
temporal_layer_rates[1].bps());
}
}
return bitrate_allocation;
}
// Bit-rate is allocated in such a way, that the highest enabled layer will have
// between min and max bitrate, and all others will have exactly target
// bit-rate allocated.
VideoBitrateAllocation SvcRateAllocator::GetAllocationScreenSharing(
DataRate total_bitrate,
size_t first_active_layer,
size_t num_spatial_layers) const {
VideoBitrateAllocation bitrate_allocation;
if (num_spatial_layers == 0 ||
total_bitrate <
DataRate::KilobitsPerSec(
codec_.spatialLayers[first_active_layer].minBitrate)) {
// Always enable at least one layer.
bitrate_allocation.SetBitrate(first_active_layer, 0, total_bitrate.bps());
return bitrate_allocation;
}
DataRate allocated_rate = DataRate::Zero();
DataRate top_layer_rate = DataRate::Zero();
size_t sl_idx;
for (sl_idx = first_active_layer;
sl_idx < first_active_layer + num_spatial_layers; ++sl_idx) {
const DataRate min_rate =
DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx].minBitrate);
const DataRate target_rate =
DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx].targetBitrate);
if (allocated_rate + min_rate > total_bitrate) {
// Use stable rate to determine if layer should be enabled.
break;
}
top_layer_rate = std::min(target_rate, total_bitrate - allocated_rate);
bitrate_allocation.SetBitrate(sl_idx, 0, top_layer_rate.bps());
allocated_rate += top_layer_rate;
}
if (sl_idx > 0 && total_bitrate - allocated_rate > DataRate::Zero()) {
// Add leftover to the last allocated layer.
top_layer_rate = std::min(
top_layer_rate + (total_bitrate - allocated_rate),
DataRate::KilobitsPerSec(codec_.spatialLayers[sl_idx - 1].maxBitrate));
bitrate_allocation.SetBitrate(sl_idx - 1, 0, top_layer_rate.bps());
}
return bitrate_allocation;
}
size_t SvcRateAllocator::FindNumEnabledLayers(DataRate target_rate) const {
if (cumulative_layer_start_bitrates_.empty()) {
return 0;
}
size_t num_enabled_layers = 0;
for (DataRate start_rate : cumulative_layer_start_bitrates_) {
// First layer is always enabled.
if (num_enabled_layers == 0 || start_rate <= target_rate) {
++num_enabled_layers;
} else {
break;
}
}
return num_enabled_layers;
}
DataRate SvcRateAllocator::GetMaxBitrate(const VideoCodec& codec) {
const NumLayers num_layers = GetNumLayers(codec);
const ActiveSpatialLayers active_layers =
GetActiveSpatialLayers(codec, num_layers.spatial);
DataRate max_bitrate = DataRate::Zero();
for (size_t sl_idx = 0; sl_idx < active_layers.num; ++sl_idx) {
max_bitrate += DataRate::KilobitsPerSec(
codec.spatialLayers[active_layers.first + sl_idx].maxBitrate);
}
if (codec.maxBitrate != 0) {
max_bitrate =
std::min(max_bitrate, DataRate::KilobitsPerSec(codec.maxBitrate));
}
return max_bitrate;
}
DataRate SvcRateAllocator::GetPaddingBitrate(const VideoCodec& codec) {
auto start_bitrate = GetLayerStartBitrates(codec);
if (start_bitrate.empty()) {
return DataRate::Zero(); // All layers are deactivated.
}
return start_bitrate.back();
}
absl::InlinedVector<DataRate, kMaxSpatialLayers>
SvcRateAllocator::GetLayerStartBitrates(const VideoCodec& codec) {
absl::InlinedVector<DataRate, kMaxSpatialLayers> start_bitrates;
const NumLayers num_layers = GetNumLayers(codec);
const ActiveSpatialLayers active_layers =
GetActiveSpatialLayers(codec, num_layers.spatial);
DataRate last_rate = DataRate::Zero();
for (size_t i = 1; i <= active_layers.num; ++i) {
DataRate layer_toggling_rate =
FindLayerTogglingThreshold(codec, active_layers.first, i);
start_bitrates.push_back(layer_toggling_rate);
RTC_DCHECK_LE(last_rate, layer_toggling_rate);
last_rate = layer_toggling_rate;
}
return start_bitrates;
}
} // namespace webrtc

View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_
#define MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_
#include <stddef.h>
#include <stdint.h>
#include "absl/container/inlined_vector.h"
#include "api/video/video_bitrate_allocation.h"
#include "api/video/video_bitrate_allocator.h"
#include "api/video/video_codec_constants.h"
#include "api/video_codecs/video_codec.h"
#include "rtc_base/experiments/stable_target_rate_experiment.h"
namespace webrtc {
class SvcRateAllocator : public VideoBitrateAllocator {
public:
explicit SvcRateAllocator(const VideoCodec& codec);
VideoBitrateAllocation Allocate(
VideoBitrateAllocationParameters parameters) override;
static DataRate GetMaxBitrate(const VideoCodec& codec);
static DataRate GetPaddingBitrate(const VideoCodec& codec);
static absl::InlinedVector<DataRate, kMaxSpatialLayers> GetLayerStartBitrates(
const VideoCodec& codec);
private:
struct NumLayers {
size_t spatial = 1;
size_t temporal = 1;
};
static NumLayers GetNumLayers(const VideoCodec& codec);
VideoBitrateAllocation GetAllocationNormalVideo(
DataRate total_bitrate,
size_t first_active_layer,
size_t num_spatial_layers) const;
VideoBitrateAllocation GetAllocationScreenSharing(
DataRate total_bitrate,
size_t first_active_layer,
size_t num_spatial_layers) const;
// Returns the number of layers that are active and have enough bitrate to
// actually be enabled.
size_t FindNumEnabledLayers(DataRate target_rate) const;
const VideoCodec codec_;
const NumLayers num_layers_;
const StableTargetRateExperiment experiment_settings_;
const absl::InlinedVector<DataRate, kMaxSpatialLayers>
cumulative_layer_start_bitrates_;
size_t last_active_layer_count_;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SVC_RATE_ALLOCATOR_H_