Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,401 @@
/*
* Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_video/h265/h265_bitstream_parser.h"
#include <stdlib.h>
#include <cstdint>
#include <vector>
#include "common_video/h265/h265_common.h"
#include "rtc_base/logging.h"
#include "common_video/h265/legacy_bit_buffer.h"
namespace {
const int kMaxAbsQpDeltaValue = 51;
const int kMinQpValue = 0;
const int kMaxQpValue = 51;
} // namespace
namespace webrtc {
#define RETURN_ON_FAIL(x, res) \
if (!(x)) { \
RTC_LOG_F(LS_ERROR) << "FAILED: " #x; \
return res; \
}
#define RETURN_INV_ON_FAIL(x) RETURN_ON_FAIL(x, kInvalidStream)
H265BitstreamParser::H265BitstreamParser() {}
H265BitstreamParser::~H265BitstreamParser() {}
H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu(
const uint8_t* source,
size_t source_length,
uint8_t nalu_type) {
if (!sps_ || !pps_)
return kInvalidStream;
last_slice_qp_delta_ = absl::nullopt;
const std::vector<uint8_t> slice_rbsp =
H265::ParseRbsp(source, source_length);
if (slice_rbsp.size() < H265::kNaluTypeSize)
return kInvalidStream;
rtc::BitBuffer slice_reader(slice_rbsp.data() + H265::kNaluTypeSize,
slice_rbsp.size() - H265::kNaluTypeSize);
// Check to see if this is an IDR slice, which has an extra field to parse
// out.
//bool is_idr = (source[0] & 0x0F) == H265::NaluType::kIdr;
//uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
uint32_t golomb_tmp;
uint32_t bits_tmp;
// first_slice_segment_in_pic_flag: u(1)
uint32_t first_slice_segment_in_pic_flag = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&first_slice_segment_in_pic_flag, 1));
if (H265::NaluType::kBlaWLp <= nalu_type &&
nalu_type <= H265::NaluType::kRsvIrapVcl23) {
// no_output_of_prior_pics_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
// slice_pic_parameter_set_id: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
uint32_t dependent_slice_segment_flag = 0;
if (first_slice_segment_in_pic_flag == 0) {
if (pps_->dependent_slice_segments_enabled_flag) {
// dependent_slice_segment_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&dependent_slice_segment_flag, 1));
}
// slice_segment_address: u(v)
int32_t log2_ctb_size_y = sps_->log2_min_luma_coding_block_size_minus3 + 3 + sps_->log2_diff_max_min_luma_coding_block_size;
uint32_t ctb_size_y = 1 << log2_ctb_size_y;
uint32_t pic_width_in_ctbs_y = sps_->pic_width_in_luma_samples / ctb_size_y;
if(sps_->pic_width_in_luma_samples % ctb_size_y)
pic_width_in_ctbs_y++;
uint32_t pic_height_in_ctbs_y = sps_->pic_height_in_luma_samples / ctb_size_y;
if(sps_->pic_height_in_luma_samples % ctb_size_y)
pic_height_in_ctbs_y++;
uint32_t slice_segment_address_bits = H265::Log2(pic_height_in_ctbs_y * pic_width_in_ctbs_y);
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, slice_segment_address_bits));
}
if (dependent_slice_segment_flag == 0) {
for (uint32_t i = 0; i < pps_->num_extra_slice_header_bits; i++) {
// slice_reserved_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
// slice_type: ue(v)
uint32_t slice_type = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&slice_type));
if (pps_->output_flag_present_flag) {
// pic_output_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
if (sps_->separate_colour_plane_flag) {
// colour_plane_id: u(2)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
}
uint32_t num_long_term_sps = 0;
uint32_t num_long_term_pics = 0;
std::vector<uint32_t> lt_idx_sps;
std::vector<uint32_t> used_by_curr_pic_lt_flag;
uint32_t short_term_ref_pic_set_sps_flag = 0;
uint32_t short_term_ref_pic_set_idx = 0;
H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set;
uint32_t slice_temporal_mvp_enabled_flag = 0;
if (nalu_type != H265::NaluType::kIdrWRadl && nalu_type != H265::NaluType::kIdrNLp) {
// slice_pic_order_cnt_lsb: u(v)
uint32_t slice_pic_order_cnt_lsb_bits = sps_->log2_max_pic_order_cnt_lsb_minus4 + 4;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, slice_pic_order_cnt_lsb_bits));
// short_term_ref_pic_set_sps_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&short_term_ref_pic_set_sps_flag, 1));
if (!short_term_ref_pic_set_sps_flag) {
absl::optional<H265SpsParser::ShortTermRefPicSet> ref_pic_set
= H265SpsParser::ParseShortTermRefPicSet(sps_->num_short_term_ref_pic_sets,
sps_->num_short_term_ref_pic_sets, sps_->short_term_ref_pic_set, *sps_, &slice_reader);
if (ref_pic_set) {
short_term_ref_pic_set = *ref_pic_set;
} else {
return kInvalidStream;
}
} else if (sps_->num_short_term_ref_pic_sets > 1) {
// short_term_ref_pic_set_idx: u(v)
uint32_t short_term_ref_pic_set_idx_bits = H265::Log2(sps_->num_short_term_ref_pic_sets);
if ((1 << short_term_ref_pic_set_idx_bits) < sps_->num_short_term_ref_pic_sets) {
short_term_ref_pic_set_idx_bits++;
}
if (short_term_ref_pic_set_idx_bits > 0) {
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&short_term_ref_pic_set_idx, short_term_ref_pic_set_idx_bits));
}
}
if (sps_->long_term_ref_pics_present_flag) {
if (sps_->num_long_term_ref_pics_sps > 0) {
// num_long_term_sps: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_long_term_sps));
}
// num_long_term_sps: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_long_term_pics));
lt_idx_sps.resize(num_long_term_sps + num_long_term_pics, 0);
used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, 0);
for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) {
if (i < num_long_term_sps) {
if (sps_->num_long_term_ref_pics_sps > 1) {
// lt_idx_sps: u(v)
uint32_t lt_idx_sps_bits = H265::Log2(sps_->num_long_term_ref_pics_sps);
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&lt_idx_sps[i], lt_idx_sps_bits));
}
} else {
// poc_lsb_lt: u(v)
uint32_t poc_lsb_lt_bits = sps_->log2_max_pic_order_cnt_lsb_minus4 + 4;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, poc_lsb_lt_bits));
// used_by_curr_pic_lt_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&used_by_curr_pic_lt_flag[i], 1));
}
// delta_poc_msb_present_flag: u(1)
uint32_t delta_poc_msb_present_flag = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&delta_poc_msb_present_flag, 1));
if (delta_poc_msb_present_flag) {
// delta_poc_msb_cycle_lt: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
}
if (sps_->sps_temporal_mvp_enabled_flag) {
// slice_temporal_mvp_enabled_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&slice_temporal_mvp_enabled_flag, 1));
}
}
if (sps_->sample_adaptive_offset_enabled_flag) {
// slice_sao_luma_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
uint32_t chroma_array_type = sps_->separate_colour_plane_flag == 0 ? sps_->chroma_format_idc : 0;
if (chroma_array_type != 0) {
// slice_sao_chroma_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
}
if (slice_type == H265::SliceType::kP || slice_type == H265::SliceType::kB) {
// num_ref_idx_active_override_flag: u(1)
uint32_t num_ref_idx_active_override_flag = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
uint32_t num_ref_idx_l0_active_minus1 = pps_->num_ref_idx_l0_default_active_minus1;
uint32_t num_ref_idx_l1_active_minus1 = pps_->num_ref_idx_l1_default_active_minus1;
if (num_ref_idx_active_override_flag) {
// num_ref_idx_l0_active_minus1: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_ref_idx_l0_active_minus1));
if (slice_type == H265::SliceType::kB) {
// num_ref_idx_l1_active_minus1: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_ref_idx_l1_active_minus1));
}
}
uint32_t num_pic_total_curr = CalcNumPocTotalCurr(
num_long_term_sps, num_long_term_pics, lt_idx_sps,
used_by_curr_pic_lt_flag, short_term_ref_pic_set_sps_flag,
short_term_ref_pic_set_idx, short_term_ref_pic_set);
if (pps_->lists_modification_present_flag && num_pic_total_curr > 1) {
// ref_pic_lists_modification()
uint32_t list_entry_bits = H265::Log2(num_pic_total_curr);
if ((1 << list_entry_bits) < num_pic_total_curr) {
list_entry_bits++;
}
// ref_pic_list_modification_flag_l0: u(1)
uint32_t ref_pic_list_modification_flag_l0 = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&ref_pic_list_modification_flag_l0, 1));
if (ref_pic_list_modification_flag_l0) {
for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) {
// list_entry_l0: u(v)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, list_entry_bits));
}
}
if (slice_type == H265::SliceType::kB) {
// ref_pic_list_modification_flag_l1: u(1)
uint32_t ref_pic_list_modification_flag_l1 = 0;
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&ref_pic_list_modification_flag_l1, 1));
if (ref_pic_list_modification_flag_l1) {
for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) {
// list_entry_l1: u(v)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, list_entry_bits));
}
}
}
}
if (slice_type == H265::SliceType::kB) {
// mvd_l1_zero_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
if (pps_->cabac_init_present_flag) {
// cabac_init_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
if (slice_temporal_mvp_enabled_flag) {
uint32_t collocated_from_l0_flag = 0;
if (slice_type == H265::SliceType::kB) {
// collocated_from_l0_flag: u(1)
RETURN_INV_ON_FAIL(slice_reader.ReadBits(&collocated_from_l0_flag, 1));
}
if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0)
|| (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) {
// collocated_ref_idx: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
if ((pps_->weighted_pred_flag && slice_type == H265::SliceType::kP)
|| (pps_->weighted_bipred_flag && slice_type == H265::SliceType::kB)) {
// pred_weight_table()
// TODO(piasy): Do we need support for pred_weight_table()?
RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported.";
return kUnsupportedStream;
}
// five_minus_max_num_merge_cand: ue(v)
RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
// TODO(piasy): motion_vector_resolution_control_idc?
}
}
// slice_qp_delta: se(v)
int32_t last_slice_qp_delta;
RETURN_INV_ON_FAIL(
slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta));
if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) {
// Something has gone wrong, and the parsed value is invalid.
RTC_LOG(LS_WARNING) << "Parsed QP value out of range.";
return kInvalidStream;
}
last_slice_qp_delta_ = last_slice_qp_delta;
return kOk;
}
uint32_t H265BitstreamParser::CalcNumPocTotalCurr(
uint32_t num_long_term_sps, uint32_t num_long_term_pics,
const std::vector<uint32_t> lt_idx_sps,
const std::vector<uint32_t> used_by_curr_pic_lt_flag,
uint32_t short_term_ref_pic_set_sps_flag,
uint32_t short_term_ref_pic_set_idx,
const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set) {
uint32_t num_poc_total_curr = 0;
uint32_t curr_sps_idx;
bool used_by_curr_pic_lt[16];
uint32_t num_long_term = num_long_term_sps + num_long_term_pics;
for (uint32_t i = 0; i < num_long_term; i++) {
if (i < num_long_term_sps) {
used_by_curr_pic_lt[i] = sps_->used_by_curr_pic_lt_sps_flag[lt_idx_sps[i]];
} else {
used_by_curr_pic_lt[i] = used_by_curr_pic_lt_flag[i];
}
}
if (short_term_ref_pic_set_sps_flag) {
curr_sps_idx = short_term_ref_pic_set_idx;
} else {
curr_sps_idx = sps_->num_short_term_ref_pic_sets;
}
if (sps_->short_term_ref_pic_set.size() <= curr_sps_idx) {
if (curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag) {
return 0;
}
}
const H265SpsParser::ShortTermRefPicSet* ref_pic_set;
if (curr_sps_idx < sps_->short_term_ref_pic_set.size()) {
ref_pic_set = &(sps_->short_term_ref_pic_set[curr_sps_idx]);
} else {
ref_pic_set = &short_term_ref_pic_set;
}
for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) {
if (ref_pic_set->used_by_curr_pic_s0_flag[i]) {
num_poc_total_curr++;
}
}
for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) {
if (ref_pic_set->used_by_curr_pic_s1_flag[i]) {
num_poc_total_curr++;
}
}
for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) {
if (used_by_curr_pic_lt[i]) {
num_poc_total_curr++;
}
}
return num_poc_total_curr;
}
void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
H265::NaluType nalu_type = H265::ParseNaluType(slice[0]);
if (nalu_type == H265::NaluType::kSps) {
sps_ = H265SpsParser::ParseSps(slice + H265::kNaluTypeSize,
length - H265::kNaluTypeSize);
if (!sps_) {
RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream.";
}
} else if (nalu_type == H265::NaluType::kPps) {
pps_ = H265PpsParser::ParsePps(slice + H265::kNaluTypeSize,
length - H265::kNaluTypeSize);
if (!pps_) {
RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream.";
}
} else if (nalu_type <= H265::NaluType::kRsvIrapVcl23) {
Result res = ParseNonParameterSetNalu(slice, length, nalu_type);
if (res != kOk) {
RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res;
}
}
}
void H265BitstreamParser::ParseBitstream(const uint8_t* bitstream,
size_t length) {
std::vector<H265::NaluIndex> nalu_indices =
H265::FindNaluIndices(bitstream, length);
for (const H265::NaluIndex& index : nalu_indices)
ParseSlice(&bitstream[index.payload_start_offset], index.payload_size);
}
bool H265BitstreamParser::GetLastSliceQp(int* qp) const {
if (!last_slice_qp_delta_ || !pps_) {
return false;
}
const int parsed_qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) {
RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream.";
return false;
}
*qp = parsed_qp;
return true;
}
void H265BitstreamParser::ParseBitstream(
rtc::ArrayView<const uint8_t> bitstream) {
ParseBitstream(bitstream.data(), bitstream.size());
}
absl::optional<int> H265BitstreamParser::GetLastSliceQp() const {
int qp;
bool success = GetLastSliceQp(&qp);
return success ? absl::optional<int>(qp) : absl::nullopt;
}
} // namespace webrtc

View file

@ -0,0 +1,72 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_
#define COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_
#include <stddef.h>
#include <stdint.h>
#include "absl/types/optional.h"
#include "api/video_codecs/bitstream_parser.h"
#include "common_video/h265/h265_pps_parser.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/h265_vps_parser.h"
namespace webrtc {
// Stateful H265 bitstream parser (due to SPS/PPS). Used to parse out QP values
// from the bitstream.
// TODO(pbos): Unify with RTP SPS parsing and only use one H265 parser.
// TODO(pbos): If/when this gets used on the receiver side CHECKs must be
// removed and gracefully abort as we have no control over receive-side
// bitstreams.
class H265BitstreamParser : public BitstreamParser {
public:
H265BitstreamParser();
~H265BitstreamParser() override;
// These are here for backwards-compatability for the time being.
void ParseBitstream(const uint8_t* bitstream, size_t length);
bool GetLastSliceQp(int* qp) const;
// New interface.
void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override;
absl::optional<int> GetLastSliceQp() const override;
protected:
enum Result {
kOk,
kInvalidStream,
kUnsupportedStream,
};
void ParseSlice(const uint8_t* slice, size_t length);
Result ParseNonParameterSetNalu(const uint8_t* source,
size_t source_length,
uint8_t nalu_type);
uint32_t CalcNumPocTotalCurr(uint32_t num_long_term_sps,
uint32_t num_long_term_pics,
const std::vector<uint32_t> lt_idx_sps,
const std::vector<uint32_t> used_by_curr_pic_lt_flag,
uint32_t short_term_ref_pic_set_sps_flag,
uint32_t short_term_ref_pic_set_idx,
const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set);
// SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices.
absl::optional<H265SpsParser::SpsState> sps_;
absl::optional<H265PpsParser::PpsState> pps_;
// Last parsed slice QP.
absl::optional<int32_t> last_slice_qp_delta_;
};
} // namespace webrtc
#endif // COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_

View file

@ -0,0 +1,67 @@
/*
* Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_video/h265/h265_common.h"
#include "common_video/h264/h264_common.h"
namespace webrtc {
namespace H265 {
constexpr uint8_t kNaluTypeMask = 0x7E;
std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
size_t buffer_size) {
std::vector<H264::NaluIndex> indices =
H264::FindNaluIndices(buffer, buffer_size);
std::vector<NaluIndex> results;
for (auto& index : indices) {
results.push_back(
{index.start_offset, index.payload_start_offset, index.payload_size});
}
return results;
}
NaluType ParseNaluType(uint8_t data) {
return static_cast<NaluType>((data & kNaluTypeMask) >> 1);
}
std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) {
return H264::ParseRbsp(data, length);
}
void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) {
H264::WriteRbsp(bytes, length, destination);
}
uint32_t Log2Ceiling(uint32_t value) {
// When n == 0, we want the function to return -1.
// When n == 0, (n - 1) will underflow to 0xFFFFFFFF, which is
// why the statement below starts with (n ? 32 : -1).
return (value ? 32 : -1) - WebRtcVideo_CountLeadingZeros32(value - 1);
}
uint32_t Log2(uint32_t value) {
uint32_t result = 0;
// If value is not a power of two an additional bit is required
// to account for the ceil() of log2() below.
if ((value & (value - 1)) != 0) {
++result;
}
while (value > 0) {
value >>= 1;
++result;
}
return result;
}
} // namespace H265
} // namespace webrtc

View file

@ -0,0 +1,115 @@
/*
* Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_H265_COMMON_H_
#define COMMON_VIDEO_H265_H265_COMMON_H_
#include <memory>
#include <vector>
#include "common_video/h265/h265_inline.h"
#include "rtc_base/buffer.h"
namespace webrtc {
namespace H265 {
// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU
// of an access unit, and for SPS and PPS blocks.
constexpr size_t kNaluLongStartSequenceSize = 4;
// The size of a shortened NALU start sequence {0 0 1}, that may be used if
// not the first NALU of an access unit or an SPS or PPS block.
constexpr size_t kNaluShortStartSequenceSize = 3;
// The size of the NALU header byte (2).
constexpr size_t kNaluHeaderSize = 2;
// The size of the NALU type byte (2).
const size_t kNaluTypeSize = 2;
// Type description of 0-40 is defined in Table7-1 of the H.265 spec
// Type desciption of 48-49 is defined in section 4.4.2 and 4.4.3 of RFC7798
enum NaluType : uint8_t {
kTrailN = 0,
kTrailR = 1,
kTsaN = 2,
kTsaR = 3,
kStsaN = 4,
kStsaR = 5,
kRadlN = 6,
kRadlR = 7,
kBlaWLp = 16,
kBlaWRadl = 17,
kBlaNLp = 18,
kIdrWRadl = 19,
kIdrNLp = 20,
kCra = 21,
kRsvIrapVcl23 = 23,
kVps = 32,
kSps = 33,
kPps = 34,
kAud = 35,
kPrefixSei = 39,
kSuffixSei = 40,
// Aggregation packets, refer to section 4.4.2 in RFC 7798.
kAp = 48,
// Fragmentation units, refer to section 4.4.3 in RFC 7798.
kFu = 49,
// PACI packets, refer to section 4.4.4 in RFC 7798.
kPaci = 50
};
// Slice type definition. See table 7-7 of the H.265 spec
enum SliceType : uint8_t { kB = 0, kP = 1, kI = 2 };
struct NaluIndex {
// Start index of NALU, including start sequence.
size_t start_offset = 0;
// Start index of NALU payload, typically type header.
size_t payload_start_offset = 0;
// Length of NALU payload, in bytes, counting from payload_start_offset.
size_t payload_size = 0;
};
// Returns a vector of the NALU indices in the given buffer.
std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
size_t buffer_size);
// Get the NAL type from the header byte immediately following start sequence.
NaluType ParseNaluType(uint8_t data);
// Methods for parsing and writing RBSP. See section 7.4.2 of the H.265 spec.
//
// The following sequences are illegal, and need to be escaped when encoding:
// 00 00 00 -> 00 00 03 00
// 00 00 01 -> 00 00 03 01
// 00 00 02 -> 00 00 03 02
// And things in the source that look like the emulation byte pattern (00 00 03)
// need to have an extra emulation byte added, so it's removed when decoding:
// 00 00 03 -> 00 00 03 03
//
// Decoding is simply a matter of finding any 00 00 03 sequence and removing
// the 03 emulation byte.
// Parse the given data and remove any emulation byte escaping.
std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length);
// Write the given data to the destination buffer, inserting and emulation
// bytes in order to escape any data the could be interpreted as a start
// sequence.
void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination);
uint32_t Log2Ceiling(uint32_t value);
uint32_t Log2(uint32_t value);
} // namespace H265
} // namespace webrtc
#endif // COMMON_VIDEO_H265_H265_COMMON_H_

View file

@ -0,0 +1,24 @@
/*
* Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_video/h265/h265_inline.h"
#include <stdint.h>
// Table used by WebRtcVideo_CountLeadingZeros32_NotBuiltin. For each uint32_t n
// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
// n.
const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64] = {
32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24,
4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9,
-1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12,
};

View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in H265 parser.
#ifndef COMMON_VIDEO_H265_H265_INLINE_H_
#define COMMON_VIDEO_H265_H265_INLINE_H_
#include <stdint.h>
#include "rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcVideo_CountLeadingZeros32_Table[64];
static __inline int WebRtcVideo_CountLeadingZeros32_NotBuiltin(uint32_t n) {
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return kWebRtcVideo_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcVideo_CountLeadingZeros32(uint32_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
return n == 0 ? 32 : __builtin_clz(n);
#else
return WebRtcVideo_CountLeadingZeros32_NotBuiltin(n);
#endif
}
#endif // COMMON_VIDEO_H265_H265_INLINE_H_

View file

@ -0,0 +1,217 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_video/h265/h265_pps_parser.h"
#include <memory>
#include <vector>
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/legacy_bit_buffer.h"
#include "rtc_base/logging.h"
#define RETURN_EMPTY_ON_FAIL(x) \
if (!(x)) { \
return absl::nullopt; \
}
namespace {
const int kMaxPicInitQpDeltaValue = 25;
const int kMinPicInitQpDeltaValue = -26;
} // namespace
namespace webrtc {
// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265
absl::optional<H265PpsParser::PpsState> H265PpsParser::ParsePps(
const uint8_t* data,
size_t length) {
// First, parse out rbsp, which is basically the source buffer minus emulation
// bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
// section 7.3.1.1 of the H.265 standard.
std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
return ParseInternal(&bit_buffer);
}
bool H265PpsParser::ParsePpsIds(const uint8_t* data,
size_t length,
uint32_t* pps_id,
uint32_t* sps_id) {
RTC_DCHECK(pps_id);
RTC_DCHECK(sps_id);
// First, parse out rbsp, which is basically the source buffer minus emulation
// bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
// section 7.3.1.1 of the H.265 standard.
std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
return ParsePpsIdsInternal(&bit_buffer, pps_id, sps_id);
}
absl::optional<uint32_t> H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp(
const uint8_t* data,
size_t length,
uint8_t nalu_type) {
rtc::BitBuffer slice_reader(data, length);
// first_slice_segment_in_pic_flag: u(1)
uint32_t first_slice_segment_in_pic_flag = 0;
RETURN_EMPTY_ON_FAIL(
slice_reader.ReadBits(&first_slice_segment_in_pic_flag, 1));
if (nalu_type >= H265::NaluType::kBlaWLp &&
nalu_type <= H265::NaluType::kRsvIrapVcl23) {
// no_output_of_prior_pics_flag: u(1)
RETURN_EMPTY_ON_FAIL(slice_reader.ConsumeBits(1));
}
// slice_pic_parameter_set_id: ue(v)
uint32_t slice_pic_parameter_set_id = 0;
if (!slice_reader.ReadExponentialGolomb(&slice_pic_parameter_set_id))
return absl::nullopt;
return slice_pic_parameter_set_id;
}
absl::optional<H265PpsParser::PpsState> H265PpsParser::ParseInternal(
rtc::BitBuffer* bit_buffer) {
PpsState pps;
RETURN_EMPTY_ON_FAIL(ParsePpsIdsInternal(bit_buffer, &pps.id, &pps.sps_id));
uint32_t bits_tmp;
uint32_t golomb_ignored;
int32_t signed_golomb_ignored;
// dependent_slice_segments_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.dependent_slice_segments_enabled_flag, 1));
// output_flag_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.output_flag_present_flag, 1));
// num_extra_slice_header_bits: u(3)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.num_extra_slice_header_bits, 3));
// sign_data_hiding_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// cabac_init_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.cabac_init_present_flag, 1));
// num_ref_idx_l0_default_active_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.num_ref_idx_l0_default_active_minus1));
// num_ref_idx_l1_default_active_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.num_ref_idx_l1_default_active_minus1));
// init_qp_minus26: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26));
// Sanity-check parsed value
if (pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue ||
pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) {
RETURN_EMPTY_ON_FAIL(false);
}
// constrained_intra_pred_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// transform_skip_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// cu_qp_delta_enabled_flag: u(1)
uint32_t cu_qp_delta_enabled_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&cu_qp_delta_enabled_flag, 1));
if (cu_qp_delta_enabled_flag) {
// diff_cu_qp_delta_depth: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
}
// pps_cb_qp_offset: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
// pps_cr_qp_offset: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
// pps_slice_chroma_qp_offsets_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// weighted_pred_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_pred_flag, 1));
// weighted_bipred_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_flag, 1));
// transquant_bypass_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// tiles_enabled_flag: u(1)
uint32_t tiles_enabled_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&tiles_enabled_flag, 1));
// entropy_coding_sync_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
if (tiles_enabled_flag) {
// num_tile_columns_minus1: ue(v)
uint32_t num_tile_columns_minus1 = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&num_tile_columns_minus1));
// num_tile_rows_minus1: ue(v)
uint32_t num_tile_rows_minus1 = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&num_tile_rows_minus1));
// uniform_spacing_flag: u(1)
uint32_t uniform_spacing_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&uniform_spacing_flag, 1));
if (!uniform_spacing_flag) {
for (uint32_t i = 0; i < num_tile_columns_minus1; i++) {
// column_width_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
}
for (uint32_t i = 0; i < num_tile_rows_minus1; i++) {
// row_height_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
}
// loop_filter_across_tiles_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
}
}
// pps_loop_filter_across_slices_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// deblocking_filter_control_present_flag: u(1)
uint32_t deblocking_filter_control_present_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&deblocking_filter_control_present_flag, 1));
if (deblocking_filter_control_present_flag) {
// deblocking_filter_override_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// pps_deblocking_filter_disabled_flag: u(1)
uint32_t pps_deblocking_filter_disabled_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps_deblocking_filter_disabled_flag, 1));
if (!pps_deblocking_filter_disabled_flag) {
// pps_beta_offset_div2: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
// pps_tc_offset_div2: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
}
}
// pps_scaling_list_data_present_flag: u(1)
uint32_t pps_scaling_list_data_present_flag = 0;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps_scaling_list_data_present_flag, 1));
if (pps_scaling_list_data_present_flag) {
// scaling_list_data()
if (!H265SpsParser::ParseScalingListData(bit_buffer)) {
return absl::nullopt;
}
}
// lists_modification_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.lists_modification_present_flag, 1));
// log2_parallel_merge_level_minus2: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// slice_segment_header_extension_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
return pps;
}
bool H265PpsParser::ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer,
uint32_t* pps_id,
uint32_t* sps_id) {
// pic_parameter_set_id: ue(v)
if (!bit_buffer->ReadExponentialGolomb(pps_id))
return false;
// seq_parameter_set_id: ue(v)
if (!bit_buffer->ReadExponentialGolomb(sps_id))
return false;
return true;
}
} // namespace webrtc

View file

@ -0,0 +1,68 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_PPS_PARSER_H_
#define COMMON_VIDEO_H265_PPS_PARSER_H_
#include "absl/types/optional.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class for parsing out picture parameter set (PPS) data from a H265 NALU.
class H265PpsParser {
public:
// The parsed state of the PPS. Only some select values are stored.
// Add more as they are actually needed.
struct PpsState {
PpsState() = default;
uint32_t dependent_slice_segments_enabled_flag = 0;
uint32_t cabac_init_present_flag = 0;
uint32_t output_flag_present_flag = 0;
uint32_t num_extra_slice_header_bits = 0;
uint32_t num_ref_idx_l0_default_active_minus1 = 0;
uint32_t num_ref_idx_l1_default_active_minus1 = 0;
int32_t pic_init_qp_minus26 = 0;
uint32_t weighted_pred_flag = 0;
uint32_t weighted_bipred_flag = 0;
uint32_t lists_modification_present_flag = 0;
uint32_t id = 0;
uint32_t sps_id = 0;
};
// Unpack RBSP and parse PPS state from the supplied buffer.
static absl::optional<PpsState> ParsePps(const uint8_t* data, size_t length);
static bool ParsePpsIds(const uint8_t* data,
size_t length,
uint32_t* pps_id,
uint32_t* sps_id);
static absl::optional<uint32_t> ParsePpsIdFromSliceSegmentLayerRbsp(
const uint8_t* data,
size_t length,
uint8_t nalu_type);
protected:
// Parse the PPS state, for a bit buffer where RBSP decoding has already been
// performed.
static absl::optional<PpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
static bool ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer,
uint32_t* pps_id,
uint32_t* sps_id);
};
} // namespace webrtc
#endif // COMMON_VIDEO_H265_PPS_PARSER_H_

View file

@ -0,0 +1,408 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include <vector>
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/legacy_bit_buffer.h"
#include "rtc_base/logging.h"
namespace {
typedef absl::optional<webrtc::H265SpsParser::SpsState> OptionalSps;
typedef absl::optional<webrtc::H265SpsParser::ShortTermRefPicSet> OptionalShortTermRefPicSet;
#define RETURN_EMPTY_ON_FAIL(x) \
if (!(x)) { \
return OptionalSps(); \
}
#define RETURN_FALSE_ON_FAIL(x) \
if (!(x)) { \
return false; \
}
#define RETURN_EMPTY2_ON_FAIL(x) \
if (!(x)) { \
return OptionalShortTermRefPicSet(); \
}
} // namespace
namespace webrtc {
H265SpsParser::SpsState::SpsState() = default;
H265SpsParser::ShortTermRefPicSet::ShortTermRefPicSet() = default;
// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265
// Unpack RBSP and parse SPS state from the supplied buffer.
absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSps(
const uint8_t* data,
size_t length) {
std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
return ParseSpsInternal(&bit_buffer);
}
bool H265SpsParser::ParseScalingListData(rtc::BitBuffer* buffer) {
uint32_t scaling_list_pred_mode_flag[4][6];
uint32_t scaling_list_pred_matrix_id_delta[4][6];
int32_t scaling_list_dc_coef_minus8[4][6];
int32_t scaling_list[4][6][64];
for (int size_id = 0; size_id < 4; size_id++) {
for (int matrix_id = 0; matrix_id < 6; matrix_id += (size_id == 3) ? 3 : 1) {
// scaling_list_pred_mode_flag: u(1)
RETURN_FALSE_ON_FAIL(buffer->ReadBits(&scaling_list_pred_mode_flag[size_id][matrix_id], 1));
if (!scaling_list_pred_mode_flag[size_id][matrix_id]) {
// scaling_list_pred_matrix_id_delta: ue(v)
RETURN_FALSE_ON_FAIL(buffer->ReadExponentialGolomb(&scaling_list_pred_matrix_id_delta[size_id][matrix_id]));
} else {
int32_t next_coef = 8;
uint32_t coef_num = std::min(64, 1 << (4 + (size_id << 1)));
if (size_id > 1) {
// scaling_list_dc_coef_minus8: se(v)
RETURN_FALSE_ON_FAIL(buffer->ReadSignedExponentialGolomb(&scaling_list_dc_coef_minus8[size_id - 2][matrix_id]));
next_coef = scaling_list_dc_coef_minus8[size_id - 2][matrix_id];
}
for (uint32_t i = 0; i < coef_num; i++) {
// scaling_list_delta_coef: se(v)
int32_t scaling_list_delta_coef = 0;
RETURN_FALSE_ON_FAIL(buffer->ReadSignedExponentialGolomb(&scaling_list_delta_coef));
next_coef = (next_coef + scaling_list_delta_coef + 256) % 256;
scaling_list[size_id][matrix_id][i] = next_coef;
}
}
}
}
return true;
}
absl::optional<H265SpsParser::ShortTermRefPicSet> H265SpsParser::ParseShortTermRefPicSet(
uint32_t st_rps_idx, uint32_t num_short_term_ref_pic_sets,
const std::vector<H265SpsParser::ShortTermRefPicSet>& short_term_ref_pic_set,
H265SpsParser::SpsState& sps, rtc::BitBuffer* buffer) {
H265SpsParser::ShortTermRefPicSet ref_pic_set;
uint32_t inter_ref_pic_set_prediction_flag = 0;
if (st_rps_idx != 0) {
// inter_ref_pic_set_prediction_flag: u(1)
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&inter_ref_pic_set_prediction_flag, 1));
}
if (inter_ref_pic_set_prediction_flag) {
uint32_t delta_idx_minus1 = 0;
if (st_rps_idx == num_short_term_ref_pic_sets) {
// delta_idx_minus1: ue(v)
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&delta_idx_minus1));
}
// delta_rps_sign: u(1)
uint32_t delta_rps_sign = 0;
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&delta_rps_sign, 1));
// abs_delta_rps_minus1: ue(v)
uint32_t abs_delta_rps_minus1 = 0;
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&abs_delta_rps_minus1));
uint32_t ref_rps_idx = st_rps_idx - (delta_idx_minus1 + 1);
uint32_t num_delta_pocs = 0;
if (short_term_ref_pic_set[ref_rps_idx].inter_ref_pic_set_prediction_flag) {
auto& used_by_curr_pic_flag = short_term_ref_pic_set[ref_rps_idx].used_by_curr_pic_flag;
auto& use_delta_flag = short_term_ref_pic_set[ref_rps_idx].use_delta_flag;
if (used_by_curr_pic_flag.size() != use_delta_flag.size()) {
return OptionalShortTermRefPicSet();
}
for (uint32_t i = 0; i < used_by_curr_pic_flag.size(); i++) {
if (used_by_curr_pic_flag[i] || use_delta_flag[i]) {
num_delta_pocs++;
}
}
} else {
num_delta_pocs = short_term_ref_pic_set[ref_rps_idx].num_negative_pics + short_term_ref_pic_set[ref_rps_idx].num_positive_pics;
}
ref_pic_set.used_by_curr_pic_flag.resize(num_delta_pocs + 1, 0);
ref_pic_set.use_delta_flag.resize(num_delta_pocs + 1, 1);
for (uint32_t j = 0; j <= num_delta_pocs; j++) {
// used_by_curr_pic_flag: u(1)
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_flag[j], 1));
if (!ref_pic_set.used_by_curr_pic_flag[j]) {
// use_delta_flag: u(1)
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.use_delta_flag[j], 1));
}
}
} else {
// num_negative_pics: ue(v)
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.num_negative_pics));
// num_positive_pics: ue(v)
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.num_positive_pics));
ref_pic_set.delta_poc_s0_minus1.resize(ref_pic_set.num_negative_pics, 0);
ref_pic_set.used_by_curr_pic_s0_flag.resize(ref_pic_set.num_negative_pics, 0);
for (uint32_t i = 0; i < ref_pic_set.num_negative_pics; i++) {
// delta_poc_s0_minus1: ue(v)
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.delta_poc_s0_minus1[i]));
// used_by_curr_pic_s0_flag: u(1)
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_s0_flag[i], 1));
}
ref_pic_set.delta_poc_s1_minus1.resize(ref_pic_set.num_positive_pics, 0);
ref_pic_set.used_by_curr_pic_s1_flag.resize(ref_pic_set.num_positive_pics, 0);
for (uint32_t i = 0; i < ref_pic_set.num_positive_pics; i++) {
// delta_poc_s1_minus1: ue(v)
RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.delta_poc_s1_minus1[i]));
// used_by_curr_pic_s1_flag: u(1)
RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_s1_flag[i], 1));
}
}
return OptionalShortTermRefPicSet(ref_pic_set);
}
absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSpsInternal(
rtc::BitBuffer* buffer) {
// Now, we need to use a bit buffer to parse through the actual HEVC SPS
// format. See Section 7.3.2.2.1 ("General sequence parameter set data
// syntax") of the H.265 standard for a complete description.
// Since we only care about resolution, we ignore the majority of fields, but
// we still have to actively parse through a lot of the data, since many of
// the fields have variable size.
// We're particularly interested in:
// chroma_format_idc -> affects crop units
// pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
// frame_crop_*_offset -> crop information
SpsState sps;
// The golomb values we have to read, not just consume.
uint32_t golomb_ignored;
// sps_video_parameter_set_id: u(4)
uint32_t sps_video_parameter_set_id = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_video_parameter_set_id, 4));
// sps_max_sub_layers_minus1: u(3)
uint32_t sps_max_sub_layers_minus1 = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_max_sub_layers_minus1, 3));
sps.sps_max_sub_layers_minus1 = sps_max_sub_layers_minus1;
sps.sps_max_dec_pic_buffering_minus1.resize(sps_max_sub_layers_minus1 + 1, 0);
// sps_temporal_id_nesting_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
// profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not
// using them, so read/skip over it.
// general_profile_space+general_tier_flag+general_prfile_idc: u(8)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
// general_profile_compatabilitiy_flag[32]
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4));
// general_progressive_source_flag + interlaced_source_flag+
// non-packed_constraint flag + frame_only_constraint_flag: u(4)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
// general_profile_idc decided flags or reserved. u(43)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43));
// general_inbld_flag or reserved 0: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
// general_level_idc: u(8)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
// if max_sub_layers_minus1 >=1, read the sublayer profile information
std::vector<uint32_t> sub_layer_profile_present_flags;
std::vector<uint32_t> sub_layer_level_present_flags;
uint32_t sub_layer_profile_present = 0;
uint32_t sub_layer_level_present = 0;
for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) {
// sublayer_profile_present_flag and sublayer_level_presnet_flag: u(2)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_profile_present, 1));
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_level_present, 1));
sub_layer_profile_present_flags.push_back(sub_layer_profile_present);
sub_layer_level_present_flags.push_back(sub_layer_level_present);
}
if (sps_max_sub_layers_minus1 > 0) {
for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) {
// reserved 2 bits: u(2)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(2));
}
}
for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) {
if (sub_layer_profile_present_flags[k]) { //
// sub_layer profile_space/tier_flag/profile_idc. ignored. u(8)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
// profile_compatability_flag: u(32)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4));
// sub_layer progressive_source_flag/interlaced_source_flag/
// non_packed_constraint_flag/frame_only_constraint_flag: u(4)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
// following 43-bits are profile_idc specific. We simply read/skip it.
// u(43)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43));
// 1-bit profile_idc specific inbld flag. We simply read/skip it. u(1)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
}
if (sub_layer_level_present_flags[k]) {
// sub_layer_level_idc: u(8)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
}
}
// sps_seq_parameter_set_id: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
// chrome_format_idc: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.chroma_format_idc));
if (sps.chroma_format_idc == 3) {
// seperate_colour_plane_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
}
uint32_t pic_width_in_luma_samples = 0;
uint32_t pic_height_in_luma_samples = 0;
// pic_width_in_luma_samples: ue(v)
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&pic_width_in_luma_samples));
// pic_height_in_luma_samples: ue(v)
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&pic_height_in_luma_samples));
// conformance_window_flag: u(1)
uint32_t conformance_window_flag = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&conformance_window_flag, 1));
uint32_t conf_win_left_offset = 0;
uint32_t conf_win_right_offset = 0;
uint32_t conf_win_top_offset = 0;
uint32_t conf_win_bottom_offset = 0;
if (conformance_window_flag) {
// conf_win_left_offset: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_left_offset));
// conf_win_right_offset: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_right_offset));
// conf_win_top_offset: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_top_offset));
// conf_win_bottom_offset: ue(v)
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&conf_win_bottom_offset));
}
// bit_depth_luma_minus8: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// bit_depth_chroma_minus8: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// log2_max_pic_order_cnt_lsb_minus4: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4));
uint32_t sps_sub_layer_ordering_info_present_flag = 0;
// sps_sub_layer_ordering_info_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_sub_layer_ordering_info_present_flag, 1));
for (uint32_t i = (sps_sub_layer_ordering_info_present_flag != 0) ? 0 : sps_max_sub_layers_minus1;
i <= sps_max_sub_layers_minus1; i++) {
// sps_max_dec_pic_buffering_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.sps_max_dec_pic_buffering_minus1[i]));
// sps_max_num_reorder_pics: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// sps_max_latency_increase_plus1: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
}
// log2_min_luma_coding_block_size_minus3: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_min_luma_coding_block_size_minus3));
// log2_diff_max_min_luma_coding_block_size: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_diff_max_min_luma_coding_block_size));
// log2_min_luma_transform_block_size_minus2: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// log2_diff_max_min_luma_transform_block_size: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// max_transform_hierarchy_depth_inter: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// max_transform_hierarchy_depth_intra: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// scaling_list_enabled_flag: u(1)
uint32_t scaling_list_enabled_flag = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&scaling_list_enabled_flag, 1));
if (scaling_list_enabled_flag) {
// sps_scaling_list_data_present_flag: u(1)
uint32_t sps_scaling_list_data_present_flag = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_scaling_list_data_present_flag, 1));
if (sps_scaling_list_data_present_flag) {
// scaling_list_data()
if (!ParseScalingListData(buffer)) {
return OptionalSps();
}
}
}
// amp_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
// sample_adaptive_offset_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.sample_adaptive_offset_enabled_flag, 1));
// pcm_enabled_flag: u(1)
uint32_t pcm_enabled_flag = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&pcm_enabled_flag, 1));
if (pcm_enabled_flag) {
// pcm_sample_bit_depth_luma_minus1: u(4)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
// pcm_sample_bit_depth_chroma_minus1: u(4)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
// log2_min_pcm_luma_coding_block_size_minus3: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// log2_diff_max_min_pcm_luma_coding_block_size: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// pcm_loop_filter_disabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
}
// num_short_term_ref_pic_sets: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.num_short_term_ref_pic_sets));
sps.short_term_ref_pic_set.resize(sps.num_short_term_ref_pic_sets);
for (uint32_t st_rps_idx = 0; st_rps_idx < sps.num_short_term_ref_pic_sets; st_rps_idx++) {
// st_ref_pic_set()
OptionalShortTermRefPicSet ref_pic_set = ParseShortTermRefPicSet(
st_rps_idx, sps.num_short_term_ref_pic_sets, sps.short_term_ref_pic_set, sps, buffer);
if (ref_pic_set) {
sps.short_term_ref_pic_set[st_rps_idx] = *ref_pic_set;
} else {
return OptionalSps();
}
}
// long_term_ref_pics_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.long_term_ref_pics_present_flag, 1));
if (sps.long_term_ref_pics_present_flag) {
// num_long_term_ref_pics_sps: ue(v)
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.num_long_term_ref_pics_sps));
sps.used_by_curr_pic_lt_sps_flag.resize(sps.num_long_term_ref_pics_sps, 0);
for (uint32_t i = 0; i < sps.num_long_term_ref_pics_sps; i++) {
// lt_ref_pic_poc_lsb_sps: u(v)
uint32_t lt_ref_pic_poc_lsb_sps_bits = sps.log2_max_pic_order_cnt_lsb_minus4 + 4;
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(lt_ref_pic_poc_lsb_sps_bits));
// used_by_curr_pic_lt_sps_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.used_by_curr_pic_lt_sps_flag[i], 1));
}
}
// sps_temporal_mvp_enabled_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.sps_temporal_mvp_enabled_flag, 1));
// Far enough! We don't use the rest of the SPS.
sps.vps_id = sps_video_parameter_set_id;
sps.pic_width_in_luma_samples = pic_width_in_luma_samples;
sps.pic_height_in_luma_samples = pic_height_in_luma_samples;
// Start with the resolution determined by the pic_width/pic_height fields.
sps.width = pic_width_in_luma_samples;
sps.height = pic_height_in_luma_samples;
if (conformance_window_flag) {
int sub_width_c = ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) &&
(0 == sps.separate_colour_plane_flag)
? 2
: 1;
int sub_height_c =
(1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) ? 2 : 1;
// the offset includes the pixel within conformance window. so don't need to
// +1 as per spec
sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset);
sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset);
}
return OptionalSps(sps);
}
} // namespace webrtc

View file

@ -0,0 +1,86 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_
#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_
#include <vector>
#include "absl/types/optional.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class for parsing out sequence parameter set (SPS) data from an H265 NALU.
class H265SpsParser {
public:
struct ShortTermRefPicSet {
ShortTermRefPicSet();
uint32_t inter_ref_pic_set_prediction_flag = 0;
std::vector<uint32_t> used_by_curr_pic_flag;
std::vector<uint32_t> use_delta_flag;
uint32_t num_negative_pics = 0;
uint32_t num_positive_pics = 0;
std::vector<uint32_t> delta_poc_s0_minus1;
std::vector<uint32_t> used_by_curr_pic_s0_flag;
std::vector<uint32_t> delta_poc_s1_minus1;
std::vector<uint32_t> used_by_curr_pic_s1_flag;
};
// The parsed state of the SPS. Only some select values are stored.
// Add more as they are actually needed.
struct SpsState {
SpsState();
uint32_t sps_max_sub_layers_minus1;
uint32_t chroma_format_idc = 0;
uint32_t separate_colour_plane_flag = 0;
uint32_t pic_width_in_luma_samples = 0;
uint32_t pic_height_in_luma_samples = 0;
uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
std::vector<uint32_t> sps_max_dec_pic_buffering_minus1;
uint32_t log2_min_luma_coding_block_size_minus3 = 0;
uint32_t log2_diff_max_min_luma_coding_block_size = 0;
uint32_t sample_adaptive_offset_enabled_flag = 0;
uint32_t num_short_term_ref_pic_sets = 0;
std::vector<H265SpsParser::ShortTermRefPicSet> short_term_ref_pic_set;
uint32_t long_term_ref_pics_present_flag = 0;
uint32_t num_long_term_ref_pics_sps = 0;
std::vector<uint32_t> used_by_curr_pic_lt_sps_flag;
uint32_t sps_temporal_mvp_enabled_flag = 0;
uint32_t width = 0;
uint32_t height = 0;
uint32_t id = 0;
uint32_t vps_id = 0;
};
// Unpack RBSP and parse SPS state from the supplied buffer.
static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length);
static bool ParseScalingListData(rtc::BitBuffer* buffer);
static absl::optional<ShortTermRefPicSet> ParseShortTermRefPicSet(
uint32_t st_rps_idx, uint32_t num_short_term_ref_pic_sets,
const std::vector<ShortTermRefPicSet>& ref_pic_sets,
SpsState& sps, rtc::BitBuffer* buffer);
protected:
// Parse the SPS state, for a bit buffer where RBSP decoding has already been
// performed.
static absl::optional<SpsState> ParseSpsInternal(rtc::BitBuffer* buffer);
};
} // namespace webrtc
#endif // COMMON_VIDEO_H265_H265_SPS_PARSER_H_

View file

@ -0,0 +1,60 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include <vector>
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_vps_parser.h"
#include "common_video/h265/legacy_bit_buffer.h"
#include "rtc_base/logging.h"
namespace {
typedef absl::optional<webrtc::H265VpsParser::VpsState> OptionalVps;
#define RETURN_EMPTY_ON_FAIL(x) \
if (!(x)) { \
return OptionalVps(); \
}
} // namespace
namespace webrtc {
H265VpsParser::VpsState::VpsState() = default;
// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265
// Unpack RBSP and parse SPS state from the supplied buffer.
absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseVps(
const uint8_t* data,
size_t length) {
std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
return ParseInternal(&bit_buffer);
}
absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseInternal(
rtc::BitBuffer* buffer) {
// Now, we need to use a bit buffer to parse through the actual HEVC VPS
// format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the
// H.265 standard for a complete description.
VpsState vps;
// vps_video_parameter_set_id: u(4)
vps.id = 0;
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&vps.id, 4));
return OptionalVps(vps);
}
} // namespace webrtc

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_
#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_
#include "absl/types/optional.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class for parsing out sequence parameter set (VPS) data from an H265 NALU.
class H265VpsParser {
public:
// The parsed state of the VPS. Only some select values are stored.
// Add more as they are actually needed.
struct VpsState {
VpsState();
uint32_t id = 0;
};
// Unpack RBSP and parse VPS state from the supplied buffer.
static absl::optional<VpsState> ParseVps(const uint8_t* data, size_t length);
protected:
// Parse the VPS state, for a bit buffer where RBSP decoding has already been
// performed.
static absl::optional<VpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
};
} // namespace webrtc
#endif // COMMON_VIDEO_H265_H265_VPS_PARSER_H_

View file

@ -0,0 +1,227 @@
/*
* Copyright 2015 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_video/h265/legacy_bit_buffer.h"
#include <algorithm>
#include <limits>
#include "rtc_base/checks.h"
namespace {
// Returns the lowest (right-most) |bit_count| bits in |byte|.
uint8_t LowestBits(uint8_t byte, size_t bit_count) {
RTC_DCHECK_LE(bit_count, 8);
return byte & ((1 << bit_count) - 1);
}
// Returns the highest (left-most) |bit_count| bits in |byte|, shifted to the
// lowest bits (to the right).
uint8_t HighestBits(uint8_t byte, size_t bit_count) {
RTC_DCHECK_LE(bit_count, 8);
uint8_t shift = 8 - static_cast<uint8_t>(bit_count);
uint8_t mask = 0xFF << shift;
return (byte & mask) >> shift;
}
// Counts the number of bits used in the binary representation of val.
size_t CountBits(uint64_t val) {
size_t bit_count = 0;
while (val != 0) {
bit_count++;
val >>= 1;
}
return bit_count;
}
} // namespace
namespace rtc {
BitBuffer::BitBuffer(const uint8_t* bytes, size_t byte_count)
: bytes_(bytes), byte_count_(byte_count), byte_offset_(), bit_offset_() {
RTC_DCHECK(static_cast<uint64_t>(byte_count_) <=
std::numeric_limits<uint32_t>::max());
}
uint64_t BitBuffer::RemainingBitCount() const {
return (static_cast<uint64_t>(byte_count_) - byte_offset_) * 8 - bit_offset_;
}
bool BitBuffer::ReadUInt8(uint8_t* val) {
uint32_t bit_val;
if (!ReadBits(&bit_val, sizeof(uint8_t) * 8)) {
return false;
}
RTC_DCHECK(bit_val <= std::numeric_limits<uint8_t>::max());
*val = static_cast<uint8_t>(bit_val);
return true;
}
bool BitBuffer::ReadUInt16(uint16_t* val) {
uint32_t bit_val;
if (!ReadBits(&bit_val, sizeof(uint16_t) * 8)) {
return false;
}
RTC_DCHECK(bit_val <= std::numeric_limits<uint16_t>::max());
*val = static_cast<uint16_t>(bit_val);
return true;
}
bool BitBuffer::ReadUInt32(uint32_t* val) {
return ReadBits(val, sizeof(uint32_t) * 8);
}
bool BitBuffer::PeekBits(uint32_t* val, size_t bit_count) {
// TODO(nisse): Could allow bit_count == 0 and always return success. But
// current code reads one byte beyond end of buffer in the case that
// RemainingBitCount() == 0 and bit_count == 0.
RTC_DCHECK(bit_count > 0);
if (!val || bit_count > RemainingBitCount() || bit_count > 32) {
return false;
}
const uint8_t* bytes = bytes_ + byte_offset_;
size_t remaining_bits_in_current_byte = 8 - bit_offset_;
uint32_t bits = LowestBits(*bytes++, remaining_bits_in_current_byte);
// If we're reading fewer bits than what's left in the current byte, just
// return the portion of this byte that we need.
if (bit_count < remaining_bits_in_current_byte) {
*val = HighestBits(bits, bit_offset_ + bit_count);
return true;
}
// Otherwise, subtract what we've read from the bit count and read as many
// full bytes as we can into bits.
bit_count -= remaining_bits_in_current_byte;
while (bit_count >= 8) {
bits = (bits << 8) | *bytes++;
bit_count -= 8;
}
// Whatever we have left is smaller than a byte, so grab just the bits we need
// and shift them into the lowest bits.
if (bit_count > 0) {
bits <<= bit_count;
bits |= HighestBits(*bytes, bit_count);
}
*val = bits;
return true;
}
bool BitBuffer::ReadBits(uint32_t* val, size_t bit_count) {
return PeekBits(val, bit_count) && ConsumeBits(bit_count);
}
bool BitBuffer::ConsumeBytes(size_t byte_count) {
return ConsumeBits(byte_count * 8);
}
bool BitBuffer::ConsumeBits(size_t bit_count) {
if (bit_count > RemainingBitCount()) {
return false;
}
byte_offset_ += (bit_offset_ + bit_count) / 8;
bit_offset_ = (bit_offset_ + bit_count) % 8;
return true;
}
bool BitBuffer::ReadNonSymmetric(uint32_t* val, uint32_t num_values) {
RTC_DCHECK_GT(num_values, 0);
RTC_DCHECK_LE(num_values, uint32_t{1} << 31);
if (num_values == 1) {
// When there is only one possible value, it requires zero bits to store it.
// But ReadBits doesn't support reading zero bits.
*val = 0;
return true;
}
size_t count_bits = CountBits(num_values);
uint32_t num_min_bits_values = (uint32_t{1} << count_bits) - num_values;
if (!ReadBits(val, count_bits - 1)) {
return false;
}
if (*val < num_min_bits_values) {
return true;
}
uint32_t extra_bit;
if (!ReadBits(&extra_bit, /*bit_count=*/1)) {
return false;
}
*val = (*val << 1) + extra_bit - num_min_bits_values;
return true;
}
bool BitBuffer::ReadExponentialGolomb(uint32_t* val) {
if (!val) {
return false;
}
// Store off the current byte/bit offset, in case we want to restore them due
// to a failed parse.
size_t original_byte_offset = byte_offset_;
size_t original_bit_offset = bit_offset_;
// Count the number of leading 0 bits by peeking/consuming them one at a time.
size_t zero_bit_count = 0;
uint32_t peeked_bit;
while (PeekBits(&peeked_bit, 1) && peeked_bit == 0) {
zero_bit_count++;
ConsumeBits(1);
}
// We should either be at the end of the stream, or the next bit should be 1.
RTC_DCHECK(!PeekBits(&peeked_bit, 1) || peeked_bit == 1);
// The bit count of the value is the number of zeros + 1. Make sure that many
// bits fits in a uint32_t and that we have enough bits left for it, and then
// read the value.
size_t value_bit_count = zero_bit_count + 1;
if (value_bit_count > 32 || !ReadBits(val, value_bit_count)) {
RTC_CHECK(Seek(original_byte_offset, original_bit_offset));
return false;
}
*val -= 1;
return true;
}
bool BitBuffer::ReadSignedExponentialGolomb(int32_t* val) {
uint32_t unsigned_val;
if (!ReadExponentialGolomb(&unsigned_val)) {
return false;
}
if ((unsigned_val & 1) == 0) {
*val = -static_cast<int32_t>(unsigned_val / 2);
} else {
*val = (unsigned_val + 1) / 2;
}
return true;
}
void BitBuffer::GetCurrentOffset(size_t* out_byte_offset,
size_t* out_bit_offset) {
RTC_CHECK(out_byte_offset != nullptr);
RTC_CHECK(out_bit_offset != nullptr);
*out_byte_offset = byte_offset_;
*out_bit_offset = bit_offset_;
}
bool BitBuffer::Seek(size_t byte_offset, size_t bit_offset) {
if (byte_offset > byte_count_ || bit_offset > 7 ||
(byte_offset == byte_count_ && bit_offset > 0)) {
return false;
}
byte_offset_ = byte_offset;
bit_offset_ = bit_offset;
return true;
}
} // namespace rtc

View file

@ -0,0 +1,104 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_VIDEO_H265_LEGACY_BIT_BUFFER_H_
#define COMMON_VIDEO_H265_LEGACY_BIT_BUFFER_H_
#include <stddef.h> // For size_t.
#include <stdint.h> // For integer types.
namespace rtc {
// A class, similar to ByteBuffer, that can parse bit-sized data out of a set of
// bytes. Has a similar API to ByteBuffer, plus methods for reading bit-sized
// and exponential golomb encoded data. For a writable version, use
// BitBufferWriter. Unlike ByteBuffer, this class doesn't make a copy of the
// source bytes, so it can be used on read-only data.
// Sizes/counts specify bits/bytes, for clarity.
// Byte order is assumed big-endian/network.
class BitBuffer {
public:
BitBuffer(const uint8_t* bytes, size_t byte_count);
BitBuffer(const BitBuffer&) = delete;
BitBuffer& operator=(const BitBuffer&) = delete;
// Gets the current offset, in bytes/bits, from the start of the buffer. The
// bit offset is the offset into the current byte, in the range [0,7].
void GetCurrentOffset(size_t* out_byte_offset, size_t* out_bit_offset);
// The remaining bits in the byte buffer.
uint64_t RemainingBitCount() const;
// Reads byte-sized values from the buffer. Returns false if there isn't
// enough data left for the specified type.
bool ReadUInt8(uint8_t* val);
bool ReadUInt16(uint16_t* val);
bool ReadUInt32(uint32_t* val);
// Reads bit-sized values from the buffer. Returns false if there isn't enough
// data left for the specified bit count.
bool ReadBits(uint32_t* val, size_t bit_count);
// Peeks bit-sized values from the buffer. Returns false if there isn't enough
// data left for the specified number of bits. Doesn't move the current
// offset.
bool PeekBits(uint32_t* val, size_t bit_count);
// Reads value in range [0, num_values - 1].
// This encoding is similar to ReadBits(val, Ceil(Log2(num_values)),
// but reduces wastage incurred when encoding non-power of two value ranges
// Non symmetric values are encoded as:
// 1) n = countbits(num_values)
// 2) k = (1 << n) - num_values
// Value v in range [0, k - 1] is encoded in (n-1) bits.
// Value v in range [k, num_values - 1] is encoded as (v+k) in n bits.
// https://aomediacodec.github.io/av1-spec/#nsn
// Returns false if there isn't enough data left.
bool ReadNonSymmetric(uint32_t* val, uint32_t num_values);
// Reads the exponential golomb encoded value at the current offset.
// Exponential golomb values are encoded as:
// 1) x = source val + 1
// 2) In binary, write [countbits(x) - 1] 0s, then x
// To decode, we count the number of leading 0 bits, read that many + 1 bits,
// and increment the result by 1.
// Returns false if there isn't enough data left for the specified type, or if
// the value wouldn't fit in a uint32_t.
bool ReadExponentialGolomb(uint32_t* val);
// Reads signed exponential golomb values at the current offset. Signed
// exponential golomb values are just the unsigned values mapped to the
// sequence 0, 1, -1, 2, -2, etc. in order.
bool ReadSignedExponentialGolomb(int32_t* val);
// Moves current position |byte_count| bytes forward. Returns false if
// there aren't enough bytes left in the buffer.
bool ConsumeBytes(size_t byte_count);
// Moves current position |bit_count| bits forward. Returns false if
// there aren't enough bits left in the buffer.
bool ConsumeBits(size_t bit_count);
// Sets the current offset to the provied byte/bit offsets. The bit
// offset is from the given byte, in the range [0,7].
bool Seek(size_t byte_offset, size_t bit_offset);
protected:
const uint8_t* const bytes_;
// The total size of |bytes_|.
size_t byte_count_;
// The current offset, in bytes, from the start of |bytes_|.
size_t byte_offset_;
// The current offset, in bits, into the current byte.
size_t bit_offset_;
};
} // namespace rtc
#endif // COMMON_VIDEO_H265_LEGACY_BIT_BUFFER_H_