OggCodecState.cpp - mozsearch

mozilla-central/dom/media/ogg/OggCodecState.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <string.h>

#include "mozilla/EndianUtils.h"

#include "mozilla/ScopeExit.h"

#include "mozilla/TextUtils.h"

#include "mozilla/Utf8.h"

#include <stdint.h>

#include <algorithm>

#include <opus/opus.h>

#include <opus/opus_multistream.h>

#include "OggCodecState.h"

#include "OggRLBox.h"

#include "OpusParser.h"

#include "VideoUtils.h"

#include "XiphExtradata.h"

#include "nsDebug.h"

namespace mozilla {

extern LazyLogModule gMediaDecoderLog;

#define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg)

using media::TimeUnit;

/** Decoder base class for Ogg-encapsulated streams. */

UniquePtr<OggCodecState> OggCodecState::Create(

    rlbox_sandbox_ogg* aSandbox, tainted_opaque_ogg<ogg_page*> aPage,

    uint32_t aSerial) {

  NS_ASSERTION(sandbox_invoke(*aSandbox, ogg_page_bos, aPage)

                   .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION),

               "Only call on BOS page!");

  UniquePtr<OggCodecState> codecState;

  tainted_ogg<ogg_page*> aPage_t = rlbox::from_opaque(aPage);

  const char codec_reason[] =

      "These conditions set the type of codec. Since we are relying on "

      "ogg_page to determine the codec type, the library could lie about "

      "this. We allow this as it does not directly allow renderer "

      "vulnerabilities if this is incorrect.";

  long body_len = aPage_t->body_len.unverified_safe_because(codec_reason);

  if (body_len > 6 && rlbox::memcmp(*aSandbox, aPage_t->body + 1, "theora", 6u)

                              .unverified_safe_because(codec_reason) == 0) {

    codecState = MakeUnique<TheoraState>(aSandbox, aPage, aSerial);

  } else if (body_len > 6 &&

             rlbox::memcmp(*aSandbox, aPage_t->body + 1, "vorbis", 6u)

                     .unverified_safe_because(codec_reason) == 0) {

    codecState = MakeUnique<VorbisState>(aSandbox, aPage, aSerial);

  } else if (body_len > 8 &&

             rlbox::memcmp(*aSandbox, aPage_t->body, "OpusHead", 8u)

                     .unverified_safe_because(codec_reason) == 0) {

    codecState = MakeUnique<OpusState>(aSandbox, aPage, aSerial);

  } else if (body_len > 8 &&

             rlbox::memcmp(*aSandbox, aPage_t->body, "fishead\0", 8u)

                     .unverified_safe_because(codec_reason) == 0) {

    codecState = MakeUnique<SkeletonState>(aSandbox, aPage, aSerial);

  } else if (body_len > 5 &&

             rlbox::memcmp(*aSandbox, aPage_t->body, "\177FLAC", 5u)

                     .unverified_safe_because(codec_reason) == 0) {

    codecState = MakeUnique<FlacState>(aSandbox, aPage, aSerial);

  } else {

    // Can't use MakeUnique here, OggCodecState is protected.

    codecState.reset(new OggCodecState(aSandbox, aPage, aSerial, false));

  if (!codecState->OggCodecState::InternalInit()) {

    codecState.reset();

  return codecState;

OggCodecState::OggCodecState(rlbox_sandbox_ogg* aSandbox,

                             tainted_opaque_ogg<ogg_page*> aBosPage,

                             uint32_t aSerial, bool aActive)

    : mPacketCount(0),

      mSerial(aSerial),

      mActive(aActive),

      mDoneReadingHeaders(!aActive),

      mSandbox(aSandbox) {

  MOZ_COUNT_CTOR(OggCodecState);

  tainted_ogg<ogg_stream_state*> state =

      mSandbox->malloc_in_sandbox<ogg_stream_state>();

  MOZ_RELEASE_ASSERT(state != nullptr);

  rlbox::memset(*mSandbox, state, 0, sizeof(ogg_stream_state));

  mState = state.to_opaque();

OggCodecState::~OggCodecState() {

  MOZ_COUNT_DTOR(OggCodecState);

  Reset();

#ifdef DEBUG

  int ret =

#endif

      sandbox_invoke(*mSandbox, ogg_stream_clear, mState)

          .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION);

  NS_ASSERTION(ret == 0, "ogg_stream_clear failed");

  mSandbox->free_in_sandbox(rlbox::from_opaque(mState));

  tainted_ogg<ogg_stream_state*> nullval = nullptr;

  mState = nullval.to_opaque();

nsresult OggCodecState::Reset() {

  if (sandbox_invoke(*mSandbox, ogg_stream_reset, mState)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0) {

    return NS_ERROR_FAILURE;

  mPackets.Erase();

  ClearUnstamped();

  return NS_OK;

void OggCodecState::ClearUnstamped() { mUnstamped.Clear(); }

bool OggCodecState::InternalInit() {

  int ret = sandbox_invoke(*mSandbox, ogg_stream_init, mState, mSerial)

                .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON);

  return ret == 0;

bool OggCodecState::IsValidVorbisTagName(nsCString& aName) {

  // Tag names must consist of ASCII 0x20 through 0x7D,

  // excluding 0x3D '=' which is the separator.

  uint32_t length = aName.Length();

  const char* data = aName.Data();

  for (uint32_t i = 0; i < length; i++) {

    if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') {

      return false;

  return true;

bool OggCodecState::AddVorbisComment(UniquePtr<MetadataTags>& aTags,

                                     const char* aComment, uint32_t aLength) {

  const char* div = (const char*)memchr(aComment, '=', aLength);

  if (!div) {

    LOG(LogLevel::Debug, ("Skipping comment: no separator"));

    return false;

  nsCString key = nsCString(aComment, div - aComment);

  if (!IsValidVorbisTagName(key)) {

    LOG(LogLevel::Debug, ("Skipping comment: invalid tag name"));

    return false;

  uint32_t valueLength = aLength - (div - aComment);

  nsCString value = nsCString(div + 1, valueLength);

  if (!IsUtf8(value)) {

    LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value"));

    return false;

  aTags->InsertOrUpdate(key, value);

  return true;

bool OggCodecState::SetCodecSpecificConfig(MediaByteBuffer* aBuffer,

                                           OggPacketQueue& aHeaders) {

  nsTArray<const unsigned char*> headers;

  nsTArray<size_t> headerLens;

  for (size_t i = 0; i < aHeaders.Length(); i++) {

    headers.AppendElement(aHeaders[i]->packet);

    headerLens.AppendElement(aHeaders[i]->bytes);

  // Save header packets for the decoder

  if (!XiphHeadersToExtradata(aBuffer, headers, headerLens)) {

    return false;

  aHeaders.Erase();

  return true;

void VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket,

                                            long aSamples) {

#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

  mVorbisPacketSamples[aPacket] = aSamples;

#endif

void VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket,

                                              long aSamples) {

#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

  NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples,

               "Decoded samples for Vorbis packet don't match expected!");

  mVorbisPacketSamples.erase(aPacket);

#endif

void VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) {

#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

  NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1,

               "Must have recorded packet samples");

#endif

// Clone the given packet from memory accessible to the sandboxed libOgg to

// memory accessible only to the Firefox renderer

static OggPacketPtr CloneOutOfSandbox(tainted_ogg<ogg_packet*> aPacket) {

  ogg_packet* clone =

      aPacket.copy_and_verify([](std::unique_ptr<tainted_ogg<ogg_packet>> val) {

        const char packet_reason[] =

            "Packets have no guarantees on what data they hold. The renderer's "

            "safety is not compromised even if packets return garbage data.";

        ogg_packet* p = new ogg_packet();

        p->bytes = val->bytes.unverified_safe_because(packet_reason);

        p->b_o_s = val->b_o_s.unverified_safe_because(packet_reason);

        p->e_o_s = val->e_o_s.unverified_safe_because(packet_reason);

        p->granulepos = val->granulepos.unverified_safe_because(packet_reason);

        p->packetno = val->packetno.unverified_safe_because(packet_reason);

        if (p->bytes == 0) {

          p->packet = nullptr;

        } else {

          p->packet = val->packet.copy_and_verify_range(

              [](std::unique_ptr<unsigned char[]> packet) {

                return packet.release();

},

              p->bytes);

        return p;

});

  return OggPacketPtr(clone);

void OggPacketQueue::Append(OggPacketPtr aPacket) {

  nsDeque::Push(aPacket.release());

bool OggCodecState::IsPacketReady() { return !mPackets.IsEmpty(); }

OggPacketPtr OggCodecState::PacketOut() {

  if (mPackets.IsEmpty()) {

    return nullptr;

  return mPackets.PopFront();

ogg_packet* OggCodecState::PacketPeek() {

  if (mPackets.IsEmpty()) {

    return nullptr;

  return mPackets.PeekFront();

void OggCodecState::PushFront(OggPacketQueue&& aOther) {

  while (!aOther.IsEmpty()) {

    mPackets.PushFront(aOther.Pop());

already_AddRefed<MediaRawData> OggCodecState::PacketOutAsMediaRawData() {

  OggPacketPtr packet = PacketOut();

  if (!packet) {

    return nullptr;

  NS_ASSERTION(

      !IsHeader(packet.get()),

      "PacketOutAsMediaRawData can only be called on non-header packets");

  RefPtr<MediaRawData> sample = new MediaRawData(packet->packet, packet->bytes);

  if (packet->bytes && !sample->Data()) {

    // OOM.

    return nullptr;

  TimeUnit endTimestamp = Time(packet->granulepos);

  NS_ASSERTION(endTimestamp.IsPositiveOrZero(), "timestamp invalid");

  TimeUnit duration = PacketDuration(packet.get());

  if (!duration.IsValid() || !duration.IsPositiveOrZero()) {

    NS_WARNING(

        nsPrintfCString("duration invalid! (%s)", duration.ToString().get())

            .get());

    duration = TimeUnit::Zero(endTimestamp);

  sample->mTimecode = Time(packet->granulepos);

  sample->mTime = endTimestamp - duration;

  sample->mDuration = duration;

  sample->mKeyframe = IsKeyframe(packet.get());

  sample->mEOS = packet->e_o_s;

  return sample.forget();

nsresult OggCodecState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) {

  if (!mActive) {

    return NS_OK;

  NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke(

                    *mSandbox, ogg_page_serialno, aPage)) == mSerial)

                   .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON),

               "Page must be for this stream!");

  if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) {

    return NS_ERROR_FAILURE;

  int r;

  tainted_ogg<ogg_packet*> packet = mSandbox->malloc_in_sandbox<ogg_packet>();

  if (!packet) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); });

  do {

    r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet)

            .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON);

    if (r == 1) {

      mPackets.Append(CloneOutOfSandbox(packet));

  } while (r != 0);

  if (sandbox_invoke(*mSandbox, ogg_stream_check, mState)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) {

    NS_WARNING("Unrecoverable error in ogg_stream_packetout");

    return NS_ERROR_FAILURE;

  return NS_OK;

nsresult OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) {

  tainted_ogg<int> r;

  aFoundGranulepos = false;

  // Extract packets from the sync state until either no more packets

  // come out, or we get a data packet with non -1 granulepos.

  tainted_ogg<ogg_packet*> packet = mSandbox->malloc_in_sandbox<ogg_packet>();

  if (!packet) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); });

  do {

    r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet);

    if (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 1) {

      OggPacketPtr clone = CloneOutOfSandbox(packet);

      if (IsHeader(clone.get())) {

        // Header packets go straight into the packet queue.

        mPackets.Append(std::move(clone));

      } else {

        // We buffer data packets until we encounter a granulepos. We'll

        // then use the granulepos to figure out the granulepos of the

        // preceeding packets.

        aFoundGranulepos = clone.get()->granulepos > 0;

        mUnstamped.AppendElement(std::move(clone));

  } while (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0 &&

           !aFoundGranulepos);

  if (sandbox_invoke(*mSandbox, ogg_stream_check, mState)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) {

    NS_WARNING("Unrecoverable error in ogg_stream_packetout");

    return NS_ERROR_FAILURE;

  return NS_OK;

TheoraState::TheoraState(rlbox_sandbox_ogg* aSandbox,

                         tainted_opaque_ogg<ogg_page*> aBosPage,

                         uint32_t aSerial)

    : OggCodecState(aSandbox, aBosPage, aSerial, true),

      mSetup(nullptr),

      mCtx(nullptr) {

  MOZ_COUNT_CTOR(TheoraState);

  th_info_init(&mTheoraInfo);

  th_comment_init(&mComment);

TheoraState::~TheoraState() {

  MOZ_COUNT_DTOR(TheoraState);

  th_setup_free(mSetup);

  th_decode_free(mCtx);

  th_comment_clear(&mComment);

  th_info_clear(&mTheoraInfo);

  Reset();

bool TheoraState::Init() {

  if (!mActive) {

    return false;

  int64_t n = mTheoraInfo.aspect_numerator;

  int64_t d = mTheoraInfo.aspect_denominator;

  float aspectRatio =

      (n == 0 || d == 0) ? 1.0f : static_cast<float>(n) / static_cast<float>(d);

  // Ensure the frame and picture regions aren't larger than our prescribed

  // maximum, or zero sized.

  gfx::IntSize frame(mTheoraInfo.frame_width, mTheoraInfo.frame_height);

  gfx::IntRect picture(mTheoraInfo.pic_x, mTheoraInfo.pic_y,

                       mTheoraInfo.pic_width, mTheoraInfo.pic_height);

  gfx::IntSize display(mTheoraInfo.pic_width, mTheoraInfo.pic_height);

  ScaleDisplayByAspectRatio(display, aspectRatio);

  if (!IsValidVideoRegion(frame, picture, display)) {

    return mActive = false;

  mCtx = th_decode_alloc(&mTheoraInfo, mSetup);

  if (!mCtx) {

    return mActive = false;

  // Video track's frame sizes will not overflow. Activate the video track.

  mInfo.mMimeType = "video/theora"_ns;

  mInfo.mDisplay = display;

  mInfo.mImage = frame;

  mInfo.SetImageRect(picture);

  return mActive = SetCodecSpecificConfig(mInfo.mCodecSpecificConfig, mHeaders);

nsresult TheoraState::Reset() {

  mHeaders.Erase();

  return OggCodecState::Reset();

bool TheoraState::DecodeHeader(OggPacketPtr aPacket) {

  ogg_packet* packet = aPacket.get();  // Will be owned by mHeaders.

  mHeaders.Append(std::move(aPacket));

  mPacketCount++;

  int ret = th_decode_headerin(&mTheoraInfo, &mComment, &mSetup, packet);

  // We must determine when we've read the last header packet.

  // th_decode_headerin() does not tell us when it's read the last header, so

  // we must keep track of the headers externally.

//

  // There are 3 header packets, the Identification, Comment, and Setup

  // headers, which must be in that order. If they're out of order, the file

  // is invalid. If we've successfully read a header, and it's the setup

  // header, then we're done reading headers. The first byte of each packet

  // determines it's type as follows:

  //    0x80 -> Identification header

  //    0x81 -> Comment header

  //    0x82 -> Setup header

  // See http://www.theora.org/doc/Theora.pdf Chapter 6, "Bitstream Headers",

  // for more details of the Ogg/Theora containment scheme.

  bool isSetupHeader = packet->bytes > 0 && packet->packet[0] == 0x82;

  if (ret < 0 || mPacketCount > 3) {

    // We've received an error, or the first three packets weren't valid

    // header packets. Assume bad input.

    // Our caller will deactivate the bitstream.

    return false;

  if (ret > 0 && isSetupHeader && mPacketCount == 3) {

    // Successfully read the three header packets.

    mDoneReadingHeaders = true;

  return true;

TimeUnit TheoraState::Time(int64_t aGranulepos) {

  if (!mActive) {

    return TimeUnit::Invalid();

  return TheoraState::Time(&mTheoraInfo, aGranulepos);

bool TheoraState::IsHeader(ogg_packet* aPacket) {

  return th_packet_isheader(aPacket);

#define TH_VERSION_CHECK(_info, _maj, _min, _sub)                            \

  (((_info)->version_major > (_maj) || (_info)->version_major == (_maj)) &&  \

   (((_info)->version_minor > (_min) || (_info)->version_minor == (_min)) && \

    (_info)->version_subminor >= (_sub)))

TimeUnit TheoraState::Time(th_info* aInfo, int64_t aGranulepos) {

  if (aGranulepos < 0 || aInfo->fps_numerator == 0) {

    return TimeUnit::Invalid();

  // Implementation of th_granule_frame inlined here to operate

  // on the th_info structure instead of the theora_state.

  int shift = aInfo->keyframe_granule_shift;

  ogg_int64_t iframe = aGranulepos >> shift;

  ogg_int64_t pframe = aGranulepos - (iframe << shift);

  int64_t frameno = iframe + pframe - TH_VERSION_CHECK(aInfo, 3, 2, 1);

  CheckedInt64 t =

      ((CheckedInt64(frameno) + 1) * USECS_PER_S) * aInfo->fps_denominator;

  if (!t.isValid()) {

    return TimeUnit::Invalid();

  t /= aInfo->fps_numerator;

  // TODO -- use rationals here

  return TimeUnit::FromMicroseconds(t.value());

TimeUnit TheoraState::StartTime(int64_t aGranulepos) {

  if (aGranulepos < 0 || !mActive || mTheoraInfo.fps_numerator == 0) {

    return TimeUnit::Invalid();

  CheckedInt64 t =

      (CheckedInt64(th_granule_frame(mCtx, aGranulepos)) * USECS_PER_S) *

      mTheoraInfo.fps_denominator;

  if (!t.isValid()) {

    return TimeUnit::Invalid();

  // TODO -- use rationals here

  return TimeUnit::FromMicroseconds(t.value() / mTheoraInfo.fps_numerator);

TimeUnit TheoraState::PacketDuration(ogg_packet* aPacket) {

  if (!mActive || mTheoraInfo.fps_numerator == 0) {

    return TimeUnit::Invalid();

  CheckedInt64 t = SaferMultDiv(mTheoraInfo.fps_denominator, USECS_PER_S,

                                mTheoraInfo.fps_numerator);

  return t.isValid() ? TimeUnit::FromMicroseconds(t.value())

                     : TimeUnit::Invalid();

TimeUnit TheoraState::MaxKeyframeOffset() {

  // Determine the maximum time in microseconds by which a key frame could

  // offset for the theora bitstream. Theora granulepos encode time as:

  // ((key_frame_number << granule_shift) + frame_offset).

  // Therefore the maximum possible time by which any frame could be offset

  // from a keyframe is the duration of (1 << granule_shift) - 1) frames.

  int64_t frameDuration;

  // Max number of frames keyframe could possibly be offset.

  int64_t keyframeDiff = (1 << mTheoraInfo.keyframe_granule_shift) - 1;

  // Length of frame in usecs.

  frameDuration =

      (mTheoraInfo.fps_denominator * USECS_PER_S) / mTheoraInfo.fps_numerator;

  // Total time in usecs keyframe can be offset from any given frame.

  return TimeUnit::FromMicroseconds(frameDuration * keyframeDiff);

bool TheoraState::IsKeyframe(ogg_packet* aPacket) {

  // first bit of packet is 1 for header, 0 for data

  // second bit of packet is 1 for inter frame, 0 for intra frame

  return (aPacket->bytes >= 1 && (aPacket->packet[0] & 0x40) == 0x00);

nsresult TheoraState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) {

  if (!mActive) return NS_OK;

  NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke(

                    *mSandbox, ogg_page_serialno, aPage)) == mSerial)

                   .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON),

               "Page must be for this stream!");

  if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) {

    return NS_ERROR_FAILURE;

  bool foundGp;

  nsresult res = PacketOutUntilGranulepos(foundGp);

  if (NS_FAILED(res)) return res;

  if (foundGp && mDoneReadingHeaders) {

    // We've found a packet with a granulepos, and we've loaded our metadata

    // and initialized our decoder. Determine granulepos of buffered packets.

    ReconstructTheoraGranulepos();

    for (uint32_t i = 0; i < mUnstamped.Length(); ++i) {

      OggPacketPtr packet = std::move(mUnstamped[i]);

#ifdef DEBUG

      NS_ASSERTION(!IsHeader(packet.get()),

                   "Don't try to recover header packet gp");

      NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now");

#endif

      mPackets.Append(std::move(packet));

    mUnstamped.Clear();

  return NS_OK;

// Returns 1 if the Theora info struct is decoding a media of Theora

// version (maj,min,sub) or later, otherwise returns 0.

int TheoraVersion(th_info* info, unsigned char maj, unsigned char min,

                  unsigned char sub) {

  ogg_uint32_t ver = (maj << 16) + (min << 8) + sub;

  ogg_uint32_t th_ver = (info->version_major << 16) +

                        (info->version_minor << 8) + info->version_subminor;

  return (th_ver >= ver) ? 1 : 0;

void TheoraState::ReconstructTheoraGranulepos() {

  if (mUnstamped.Length() == 0) {

    return;

  ogg_int64_t lastGranulepos = mUnstamped[mUnstamped.Length() - 1]->granulepos;

  NS_ASSERTION(lastGranulepos != -1, "Must know last granulepos");

  // Reconstruct the granulepos (and thus timestamps) of the decoded

  // frames. Granulepos are stored as ((keyframe<<shift)+offset). We

  // know the granulepos of the last frame in the list, so we can infer

  // the granulepos of the intermediate frames using their frame numbers.

  ogg_int64_t shift = mTheoraInfo.keyframe_granule_shift;

  ogg_int64_t version_3_2_1 = TheoraVersion(&mTheoraInfo, 3, 2, 1);

  ogg_int64_t lastFrame =

      th_granule_frame(mCtx, lastGranulepos) + version_3_2_1;

  ogg_int64_t firstFrame =

      AssertedCast<ogg_int64_t>(lastFrame - mUnstamped.Length() + 1);

  // Until we encounter a keyframe, we'll assume that the "keyframe"

  // segment of the granulepos is the first frame, or if that causes

  // the "offset" segment to overflow, we assume the required

  // keyframe is maximumally offset. Until we encounter a keyframe

  // the granulepos will probably be wrong, but we can't decode the

  // frame anyway (since we don't have its keyframe) so it doesn't really

  // matter.

  ogg_int64_t keyframe = lastGranulepos >> shift;

  // The lastFrame, firstFrame, keyframe variables, as well as the frame

  // variable in the loop below, store the frame number for Theora

  // version >= 3.2.1 streams, and store the frame index for Theora

  // version < 3.2.1 streams.

  for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) {

    ogg_int64_t frame = firstFrame + i;

    ogg_int64_t granulepos;

    auto& packet = mUnstamped[i];

    bool isKeyframe = th_packet_iskeyframe(packet.get()) == 1;

    if (isKeyframe) {

      granulepos = frame << shift;

      keyframe = frame;

    } else if (frame >= keyframe &&

               frame - keyframe < ((ogg_int64_t)1 << shift)) {

      // (frame - keyframe) won't overflow the "offset" segment of the

      // granulepos, so it's safe to calculate the granulepos.

      granulepos = (keyframe << shift) + (frame - keyframe);

    } else {

      // (frame - keyframeno) will overflow the "offset" segment of the

      // granulepos, so we take "keyframe" to be the max possible offset

      // frame instead.

      ogg_int64_t k =

          std::max(frame - (((ogg_int64_t)1 << shift) - 1), version_3_2_1);

      granulepos = (k << shift) + (frame - k);

    // Theora 3.2.1+ granulepos store frame number [1..N], so granulepos

    // should be > 0.

    // Theora 3.2.0 granulepos store the frame index [0..(N-1)], so

    // granulepos should be >= 0.

    NS_ASSERTION(granulepos >= version_3_2_1,

                 "Invalid granulepos for Theora version");

    // Check that the frame's granule number is one more than the

    // previous frame's.

    NS_ASSERTION(

        i == 0 || th_granule_frame(mCtx, granulepos) ==

                      th_granule_frame(mCtx, mUnstamped[i - 1]->granulepos) + 1,

        "Granulepos calculation is incorrect!");

    packet->granulepos = granulepos;

  // Check that the second to last frame's granule number is one less than

  // the last frame's (the known granule number). If not our granulepos

  // recovery missed a beat.

  NS_ASSERTION(mUnstamped.Length() < 2 ||

                   (th_granule_frame(

                        mCtx, mUnstamped[mUnstamped.Length() - 2]->granulepos) +

                    1) == th_granule_frame(mCtx, lastGranulepos),

               "Granulepos recovery should catch up with packet->granulepos!");

nsresult VorbisState::Reset() {

  nsresult res = NS_OK;

  if (mActive && vorbis_synthesis_restart(&mDsp) != 0) {

    res = NS_ERROR_FAILURE;

  mHeaders.Erase();

  if (NS_FAILED(OggCodecState::Reset())) {

    return NS_ERROR_FAILURE;

  mGranulepos = 0;

  mPrevVorbisBlockSize = 0;

  return res;

VorbisState::VorbisState(rlbox_sandbox_ogg* aSandbox,

                         tainted_opaque_ogg<ogg_page*> aBosPage,

                         uint32_t aSerial)

    : OggCodecState(aSandbox, aBosPage, aSerial, true),

      mPrevVorbisBlockSize(0),

      mGranulepos(0) {

  MOZ_COUNT_CTOR(VorbisState);

  vorbis_info_init(&mVorbisInfo);

  vorbis_comment_init(&mComment);

  memset(&mDsp, 0, sizeof(vorbis_dsp_state));

  memset(&mBlock, 0, sizeof(vorbis_block));

VorbisState::~VorbisState() {

  MOZ_COUNT_DTOR(VorbisState);

  Reset();

  vorbis_block_clear(&mBlock);

  vorbis_dsp_clear(&mDsp);

  vorbis_info_clear(&mVorbisInfo);

  vorbis_comment_clear(&mComment);

bool VorbisState::DecodeHeader(OggPacketPtr aPacket) {

  ogg_packet* packet = aPacket.get();  // Will be owned by mHeaders.

  mHeaders.Append(std::move(aPacket));

  mPacketCount++;

  int ret = vorbis_synthesis_headerin(&mVorbisInfo, &mComment, packet);

  // We must determine when we've read the last header packet.

  // vorbis_synthesis_headerin() does not tell us when it's read the last

  // header, so we must keep track of the headers externally.

//

  // There are 3 header packets, the Identification, Comment, and Setup

  // headers, which must be in that order. If they're out of order, the file

  // is invalid. If we've successfully read a header, and it's the setup

  // header, then we're done reading headers. The first byte of each packet

  // determines it's type as follows:

  //    0x1 -> Identification header

  //    0x3 -> Comment header

  //    0x5 -> Setup header

  // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I

  // Specification, Chapter 4, Codec Setup and Packet Decode:

  // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004

  bool isSetupHeader = packet->bytes > 0 && packet->packet[0] == 0x5;

  if (ret < 0 || mPacketCount > 3) {

    // We've received an error, or the first three packets weren't valid

    // header packets. Assume bad input. Our caller will deactivate the

    // bitstream.

    return false;

  if (!ret && isSetupHeader && mPacketCount == 3) {

    // Successfully read the three header packets.

    // The bitstream remains active.

    mDoneReadingHeaders = true;

  return true;

bool VorbisState::Init() {

  if (!mActive) {

    return false;

  int ret = vorbis_synthesis_init(&mDsp, &mVorbisInfo);

  if (ret != 0) {

    NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream");

    return mActive = false;

  ret = vorbis_block_init(&mDsp, &mBlock);

  if (ret != 0) {

    NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream");

    if (mActive) {

      vorbis_dsp_clear(&mDsp);

    return mActive = false;

  nsTArray<const unsigned char*> headers;

  nsTArray<size_t> headerLens;

  for (size_t i = 0; i < mHeaders.Length(); i++) {

    headers.AppendElement(mHeaders[i]->packet);

    headerLens.AppendElement(mHeaders[i]->bytes);

  // Save header packets for the decoder

  VorbisCodecSpecificData vorbisCodecSpecificData{};

  if (!XiphHeadersToExtradata(vorbisCodecSpecificData.mHeadersBinaryBlob,

                              headers, headerLens)) {

    return mActive = false;

  mHeaders.Erase();

  mInfo.mMimeType = "audio/vorbis"_ns;

  mInfo.mRate = mVorbisInfo.rate;

  mInfo.mChannels = mVorbisInfo.channels;

  mInfo.mBitDepth = 16;

  mInfo.mCodecSpecificConfig =

      AudioCodecSpecificVariant{std::move(vorbisCodecSpecificData)};

  return true;

TimeUnit VorbisState::Time(int64_t aGranulepos) {

  if (!mActive) {

    return TimeUnit::Invalid();

  return VorbisState::Time(&mVorbisInfo, aGranulepos);

TimeUnit VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) {

  if (aGranulepos == -1 || aInfo->rate == 0) {

    return TimeUnit::Invalid();

  return TimeUnit(aGranulepos, aInfo->rate);

TimeUnit VorbisState::PacketDuration(ogg_packet* aPacket) {

  if (!mActive) {

    return TimeUnit::Invalid();

  if (aPacket->granulepos == -1) {

    return TimeUnit::Invalid();

  // @FIXME store these in a more stable place

  if (mVorbisPacketSamples.count(aPacket) == 0) {

    // We haven't seen this packet, don't know its size?

    return TimeUnit::Invalid();

  long samples = mVorbisPacketSamples[aPacket];

  return Time(samples);

bool VorbisState::IsHeader(ogg_packet* aPacket) {

  // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05,

  // i.e. the first bit is odd. Audio data packets have their first bit as 0x0.

  // Any packet with its first bit set cannot be a data packet, it's a

  // (possibly invalid) header packet.

  // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1

  return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false;

UniquePtr<MetadataTags> VorbisState::GetTags() {

  NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!");

  NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!");

  auto tags = MakeUnique<MetadataTags>();

  for (int i = 0; i < mComment.comments; i++) {

    AddVorbisComment(tags, mComment.user_comments[i],

                     mComment.comment_lengths[i]);

  return tags;

nsresult VorbisState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) {

  if (!mActive) {

    return NS_OK;

  NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke(

                    *mSandbox, ogg_page_serialno, aPage)) == mSerial)

                   .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON),

               "Page must be for this stream!");

  if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) {

    return NS_ERROR_FAILURE;

  bool foundGp;

  nsresult res = PacketOutUntilGranulepos(foundGp);

  if (NS_FAILED(res)) {

    return res;

  if (foundGp && mDoneReadingHeaders) {

    // We've found a packet with a granulepos, and we've loaded our metadata

    // and initialized our decoder. Determine granulepos of buffered packets.

    ReconstructVorbisGranulepos();

    for (uint32_t i = 0; i < mUnstamped.Length(); ++i) {

      OggPacketPtr packet = std::move(mUnstamped[i]);

      AssertHasRecordedPacketSamples(packet.get());

      NS_ASSERTION(!IsHeader(packet.get()),

                   "Don't try to recover header packet gp");

      NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now");

      mPackets.Append(std::move(packet));

    mUnstamped.Clear();

  return NS_OK;

void VorbisState::ReconstructVorbisGranulepos() {

  // The number of samples in a Vorbis packet is:

  // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4

  // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2

  // So we maintain mPrevVorbisBlockSize, the block size of the last packet

  // encountered. We also maintain mGranulepos, which is the granulepos of

  // the last encountered packet. This enables us to give granulepos to

  // packets when the last packet in mUnstamped doesn't have a granulepos

  // (for example if the stream was truncated).

//

  // We validate our prediction of the number of samples decoded when

  // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted

  // number of samples, and verifing we extract that many when decoding

  // each packet.

  NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0");

  auto& last = mUnstamped.LastElement();

  NS_ASSERTION(last->e_o_s || last->granulepos >= 0,

               "Must know last granulepos!");

  if (mUnstamped.Length() == 1) {

    auto& packet = mUnstamped[0];

    long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get());

    if (blockSize < 0) {

      // On failure vorbis_packet_blocksize returns < 0. If we've got

      // a bad packet, we just assume that decode will have to skip this

      // packet, i.e. assume 0 samples are decodable from this packet.

      blockSize = 0;

      mPrevVorbisBlockSize = 0;

    long samples = mPrevVorbisBlockSize / 4 + blockSize / 4;

    mPrevVorbisBlockSize = blockSize;

    if (packet->granulepos == -1) {

      packet->granulepos = mGranulepos + samples;

    // Account for a partial last frame

    if (packet->e_o_s && packet->granulepos >= mGranulepos) {

      samples = packet->granulepos - mGranulepos;

    mGranulepos = packet->granulepos;

    RecordVorbisPacketSamples(packet.get(), samples);

    return;

  bool unknownGranulepos = last->granulepos == -1;

  int64_t totalSamples = 0;

  for (int32_t i = AssertedCast<int32_t>(mUnstamped.Length() - 1); i > 0; i--) {

    auto& packet = mUnstamped[i];

    auto& prev = mUnstamped[i - 1];

    ogg_int64_t granulepos = packet->granulepos;

    NS_ASSERTION(granulepos != -1, "Must know granulepos!");

    long prevBlockSize = vorbis_packet_blocksize(&mVorbisInfo, prev.get());

    long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get());

    if (blockSize < 0 || prevBlockSize < 0) {

      // On failure vorbis_packet_blocksize returns < 0. If we've got

      // a bad packet, we just assume that decode will have to skip this

      // packet, i.e. assume 0 samples are decodable from this packet.

      blockSize = 0;

      prevBlockSize = 0;

    long samples = prevBlockSize / 4 + blockSize / 4;

    totalSamples += samples;

    prev->granulepos = granulepos - samples;

    RecordVorbisPacketSamples(packet.get(), samples);

  if (unknownGranulepos) {

    for (uint32_t i = 0; i < mUnstamped.Length(); i++) {

      mUnstamped[i]->granulepos += mGranulepos + totalSamples + 1;

  auto& first = mUnstamped[0];

  long blockSize = vorbis_packet_blocksize(&mVorbisInfo, first.get());

  if (blockSize < 0) {

    mPrevVorbisBlockSize = 0;

    blockSize = 0;

  long samples = (mPrevVorbisBlockSize == 0)

? 0

                     : mPrevVorbisBlockSize / 4 + blockSize / 4;

  int64_t start = first->granulepos - samples;

  RecordVorbisPacketSamples(first.get(), samples);

  if (last->e_o_s && start < mGranulepos) {

    // We've calculated that there are more samples in this page than its

    // granulepos claims, and it's the last page in the stream. This is legal,

    // and we will need to prune the trailing samples when we come to decode it.

    // We must correct the timestamps so that they follow the last Vorbis page's

    // samples.

    int64_t pruned = mGranulepos - start;

    for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) {

      mUnstamped[i]->granulepos += pruned;

#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

    mVorbisPacketSamples[last.get()] -= pruned;

#endif

  mPrevVorbisBlockSize = vorbis_packet_blocksize(&mVorbisInfo, last.get());

  mPrevVorbisBlockSize = std::max(static_cast<long>(0), mPrevVorbisBlockSize);

  mGranulepos = last->granulepos;

OpusState::OpusState(rlbox_sandbox_ogg* aSandbox,

                     tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial)

    : OggCodecState(aSandbox, aBosPage, aSerial, true),

      mParser(nullptr),

      mDecoder(nullptr),

      mPrevPacketGranulepos(0),

      mPrevPageGranulepos(0) {

  MOZ_COUNT_CTOR(OpusState);

OpusState::~OpusState() {

  MOZ_COUNT_DTOR(OpusState);

  Reset();

  if (mDecoder) {

    opus_multistream_decoder_destroy(mDecoder);

    mDecoder = nullptr;

nsresult OpusState::Reset() { return Reset(false); }

nsresult OpusState::Reset(bool aStart) {

  nsresult res = NS_OK;

  if (mActive && mDecoder) {

    // Reset the decoder.

    opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE);

    // This lets us distinguish the first page being the last page vs. just

    // not having processed the previous page when we encounter the last page.

    mPrevPageGranulepos = aStart ? 0 : -1;

    mPrevPacketGranulepos = aStart ? 0 : -1;

  // Clear queued data.

  if (NS_FAILED(OggCodecState::Reset())) {

    return NS_ERROR_FAILURE;

  LOG(LogLevel::Debug, ("Opus decoder reset"));

  return res;

bool OpusState::Init(void) {

  if (!mActive) {

    return false;

  int error;

  NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder");

  mDecoder = opus_multistream_decoder_create(

      mParser->mRate, mParser->mChannels, mParser->mStreams,

      mParser->mCoupledStreams, mParser->mMappingTable, &error);

  mInfo.mMimeType = "audio/opus"_ns;

  mInfo.mRate = mParser->mRate;

  mInfo.mChannels = mParser->mChannels;

  mInfo.mBitDepth = 16;

  // Save preskip & the first header packet for the Opus decoder

  OpusCodecSpecificData opusData;

  opusData.mContainerCodecDelayFrames = mParser->mPreSkip;

  if (!mHeaders.PeekFront()) {

    return false;

  opusData.mHeadersBinaryBlob->AppendElements(mHeaders.PeekFront()->packet,

                                              mHeaders.PeekFront()->bytes);

  mInfo.mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(opusData)};

  mHeaders.Erase();

  LOG(LogLevel::Debug, ("Opus decoder init"));

  return error == OPUS_OK;

bool OpusState::DecodeHeader(OggPacketPtr aPacket) {

  switch (mPacketCount++) {

    // Parse the id header.

    case 0:

      mParser = MakeUnique<OpusParser>();

      if (!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) {

        return false;

      mHeaders.Append(std::move(aPacket));

      break;

    // Parse the metadata header.

    case 1:

      if (!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) {

        return false;

      break;

    // We made it to the first data packet (which includes reconstructing

    // timestamps for it in PageIn). Success!

    default:

      mDoneReadingHeaders = true;

      // Put it back on the queue so we can decode it.

      mPackets.PushFront(std::move(aPacket));

      break;

  return true;

/* Construct and return a tags hashmap from our internal array */

UniquePtr<MetadataTags> OpusState::GetTags() {

  auto tags = MakeUnique<MetadataTags>();

  for (uint32_t i = 0; i < mParser->mTags.Length(); i++) {

    AddVorbisComment(tags, mParser->mTags[i].Data(),

                     mParser->mTags[i].Length());

  return tags;

/* Return the timestamp (in microseconds) equivalent to a granulepos. */

TimeUnit OpusState::Time(int64_t aGranulepos) {

  if (!mActive) {

    return TimeUnit::Invalid();

  return Time(mParser->mPreSkip, aGranulepos);

TimeUnit OpusState::Time(int aPreSkip, int64_t aGranulepos) {

  if (aGranulepos < 0) {

    return TimeUnit::Invalid();

  int64_t offsetGranulePos = aGranulepos - aPreSkip;

  // Ogg Opus always runs at a granule rate of 48 kHz.

  return TimeUnit(offsetGranulePos, 48000);

bool OpusState::IsHeader(ogg_packet* aPacket) {

  return aPacket->bytes >= 16 && (!memcmp(aPacket->packet, "OpusHead", 8) ||

                                  !memcmp(aPacket->packet, "OpusTags", 8));

nsresult OpusState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) {

  if (!mActive) {

    return NS_OK;

  NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke(

                    *mSandbox, ogg_page_serialno, aPage)) == mSerial)

                   .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON),

               "Page must be for this stream!");

  if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) {

    return NS_ERROR_FAILURE;

  bool haveGranulepos;

  nsresult rv = PacketOutUntilGranulepos(haveGranulepos);

  if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) {

    return rv;

  if (!ReconstructOpusGranulepos()) {

    return NS_ERROR_FAILURE;

  for (uint32_t i = 0; i < mUnstamped.Length(); i++) {

    OggPacketPtr packet = std::move(mUnstamped[i]);

    NS_ASSERTION(!IsHeader(packet.get()), "Don't try to play a header packet");

    NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos");

    mPackets.Append(std::move(packet));

  mUnstamped.Clear();

  return NS_OK;

// Helper method to return the change in granule position due to an Opus packet

// (as distinct from the number of samples in the packet, which depends on the

// decoder rate). It should work with a multistream Opus file, and continue to

// work should we ever allow the decoder to decode at a rate other than 48 kHz.

// It even works before we've created the actual Opus decoder.

static int GetOpusDeltaGP(ogg_packet* packet) {

  int nframes;

  nframes = opus_packet_get_nb_frames(packet->packet,

                                      AssertedCast<int32_t>(packet->bytes));

  if (nframes > 0) {

    return nframes * opus_packet_get_samples_per_frame(packet->packet, 48000);

  NS_WARNING("Invalid Opus packet.");

  return 0;

TimeUnit OpusState::PacketDuration(ogg_packet* aPacket) {

  return TimeUnit(GetOpusDeltaGP(aPacket), 48000);

bool OpusState::ReconstructOpusGranulepos(void) {

  NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets");

  NS_ASSERTION(mUnstamped.LastElement()->e_o_s ||

                   mUnstamped.LastElement()->granulepos > 0,

               "Must know last granulepos!");

  int64_t gp;

  // If this is the last page, and we've seen at least one previous page (or

  // this is the first page)...

  if (mUnstamped.LastElement()->e_o_s) {

    auto& last = mUnstamped.LastElement();

    if (mPrevPageGranulepos != -1) {

      // If this file only has one page and the final granule position is

      // smaller than the pre-skip amount, we MUST reject the stream.

      if (!mDoneReadingHeaders && last->granulepos < mParser->mPreSkip) {

        return false;

      int64_t last_gp = last->granulepos;

      gp = mPrevPageGranulepos;

      // Loop through the packets forwards, adding the current packet's

      // duration to the previous granulepos to get the value for the

      // current packet.

      for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) {

        auto& packet = mUnstamped[i];

        int offset = GetOpusDeltaGP(packet.get());

        // Check for error (negative offset) and overflow.

        if (offset >= 0 && gp <= INT64_MAX - offset) {

          gp += offset;

          if (gp >= last_gp) {

            NS_WARNING("Opus end trimming removed more than a full packet.");

            // We were asked to remove a full packet's worth of data or more.

            // Encoders SHOULD NOT produce streams like this, but we'll handle

            // it for them anyway.

            gp = last_gp;

            mUnstamped.RemoveLastElements(mUnstamped.Length() - (i + 1));

            packet->e_o_s = 1;

        packet->granulepos = gp;

      mPrevPageGranulepos = last_gp;

      return true;

    NS_WARNING("No previous granule position to use for Opus end trimming.");

    // If we don't have a previous granule position, fall through.

    // We simply won't trim any samples from the end.

    // TODO: Are we guaranteed to have seen a previous page if there is one?

  auto& last = mUnstamped.LastElement();

  gp = last->granulepos;

  // Loop through the packets backwards, subtracting the next

  // packet's duration from its granulepos to get the value

  // for the current packet.

  for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) {

    int offset = GetOpusDeltaGP(mUnstamped[i].get());

    // Check for error (negative offset) and overflow.

    if (offset >= 0) {

      if (offset <= gp) {

        gp -= offset;

      } else {

        // If the granule position of the first data page is smaller than the

        // number of decodable audio samples on that page, then we MUST reject

        // the stream.

        if (!mDoneReadingHeaders) return false;

        // It's too late to reject the stream.

        // If we get here, this almost certainly means the file has screwed-up

        // timestamps somewhere after the first page.

        NS_WARNING("Clamping negative Opus granulepos to zero.");

        gp = 0;

    mUnstamped[i - 1]->granulepos = gp;

  // Check to make sure the first granule position is at least as large as the

  // total number of samples decodable from the first page with completed

  // packets. This requires looking at the duration of the first packet, too.

  // We MUST reject such streams.

  if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0].get()) > gp) {

    return false;

  mPrevPageGranulepos = last->granulepos;

  return true;

already_AddRefed<MediaRawData> OpusState::PacketOutAsMediaRawData() {

  ogg_packet* packet = PacketPeek();

  if (!packet) {

    return nullptr;

  uint32_t frames = 0;

  const int64_t endFrame = packet->granulepos;

  if (packet->e_o_s) {

    frames = GetOpusDeltaGP(packet);

  RefPtr<MediaRawData> data = OggCodecState::PacketOutAsMediaRawData();

  if (!data) {

    return nullptr;

  if (data->mEOS && mPrevPacketGranulepos != -1) {

    // If this is the last packet, perform end trimming.

    int64_t startFrame = mPrevPacketGranulepos;

    frames -= std::max<int64_t>(

        0, std::min(endFrame - startFrame, static_cast<int64_t>(frames)));

    TimeUnit toTrim = TimeUnit(frames, 48000);

    LOG(LogLevel::Debug,

        ("Trimming last opus packet: [%s, %s] to [%s, %s]",

         data->mTime.ToString().get(), data->GetEndTime().ToString().get(),

         data->mTime.ToString().get(),

         (data->mTime + data->mDuration - toTrim).ToString().get()));

    data->mOriginalPresentationWindow =

        Some(media::TimeInterval{data->mTime, data->mTime + data->mDuration});

    data->mDuration -= toTrim;

    if (data->mDuration.IsNegative()) {

      data->mDuration = TimeUnit::Zero(data->mTime);

  // Save this packet's granule position in case we need to perform end

  // trimming on the next packet.

  mPrevPacketGranulepos = endFrame;

  return data.forget();

FlacState::FlacState(rlbox_sandbox_ogg* aSandbox,

                     tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial)

    : OggCodecState(aSandbox, aBosPage, aSerial, true) {}

bool FlacState::DecodeHeader(OggPacketPtr aPacket) {

  if (mParser.DecodeHeaderBlock(aPacket->packet, aPacket->bytes).isErr()) {

    return false;

  if (mParser.HasFullMetadata()) {

    mDoneReadingHeaders = true;

  return true;

TimeUnit FlacState::Time(int64_t aGranulepos) {

  if (!mParser.mInfo.IsValid()) {

    return TimeUnit::Invalid();

  return TimeUnit(aGranulepos, mParser.mInfo.mRate);

TimeUnit FlacState::PacketDuration(ogg_packet* aPacket) {

  return TimeUnit(mParser.BlockDuration(aPacket->packet, aPacket->bytes),

                  mParser.mInfo.mRate);

bool FlacState::IsHeader(ogg_packet* aPacket) {

  auto res = mParser.IsHeaderBlock(aPacket->packet, aPacket->bytes);

  return res.isOk() ? res.unwrap() : false;

nsresult FlacState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) {

  if (!mActive) {

    return NS_OK;

  NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke(

                    *mSandbox, ogg_page_serialno, aPage)) == mSerial)

                   .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON),

               "Page must be for this stream!");

  if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage)

          .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) {

    return NS_ERROR_FAILURE;

  bool foundGp;

  nsresult res = PacketOutUntilGranulepos(foundGp);

  if (NS_FAILED(res)) {

    return res;

  if (foundGp && mDoneReadingHeaders) {

    // We've found a packet with a granulepos, and we've loaded our metadata

    // and initialized our decoder. Determine granulepos of buffered packets.

    ReconstructFlacGranulepos();

    for (uint32_t i = 0; i < mUnstamped.Length(); ++i) {

      OggPacketPtr packet = std::move(mUnstamped[i]);

      NS_ASSERTION(!IsHeader(packet.get()),

                   "Don't try to recover header packet gp");

      NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now");

      mPackets.Append(std::move(packet));

    mUnstamped.Clear();

  return NS_OK;

// Return a hash table with tag metadata.

UniquePtr<MetadataTags> FlacState::GetTags() { return mParser.GetTags(); }

const TrackInfo* FlacState::GetInfo() const { return &mParser.mInfo; }

bool FlacState::ReconstructFlacGranulepos(void) {

  NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets");

  auto& last = mUnstamped.LastElement();

  NS_ASSERTION(last->e_o_s || last->granulepos > 0,

               "Must know last granulepos!");

  int64_t gp;

  gp = last->granulepos;

  // Loop through the packets backwards, subtracting the next

  // packet's duration from its granulepos to get the value

  // for the current packet.

  for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) {

    int64_t offset =

        mParser.BlockDuration(mUnstamped[i]->packet, mUnstamped[i]->bytes);

    // Check for error (negative offset) and overflow.

    if (offset >= 0) {

      if (offset <= gp) {

        gp -= offset;

      } else {

        // If the granule position of the first data page is smaller than the

        // number of decodable audio samples on that page, then we MUST reject

        // the stream.

        if (!mDoneReadingHeaders) {

          return false;

        // It's too late to reject the stream.

        // If we get here, this almost certainly means the file has screwed-up

        // timestamps somewhere after the first page.

        NS_WARNING("Clamping negative granulepos to zero.");

        gp = 0;

    mUnstamped[i - 1]->granulepos = gp;

  return true;

SkeletonState::SkeletonState(rlbox_sandbox_ogg* aSandbox,

                             tainted_opaque_ogg<ogg_page*> aBosPage,

                             uint32_t aSerial)

    : OggCodecState(aSandbox, aBosPage, aSerial, true),

      mVersion(0),

      mPresentationTime(0),

      mLength(0) {

  MOZ_COUNT_CTOR(SkeletonState);

SkeletonState::~SkeletonState() { MOZ_COUNT_DTOR(SkeletonState); }

// Support for Ogg Skeleton 4.0, as per specification at:

// http://wiki.xiph.org/Ogg_Skeleton_4

// Minimum length in bytes of a Skeleton header packet.

static const long SKELETON_MIN_HEADER_LEN = 28;

static const long SKELETON_4_0_MIN_HEADER_LEN = 80;

// Minimum length in bytes of a Skeleton 4.0 index packet.

static const long SKELETON_4_0_MIN_INDEX_LEN = 42;

// Minimum length in bytes of a Skeleton 3.0/4.0 Fisbone packet.

static const long SKELETON_MIN_FISBONE_LEN = 52;

// Minimum possible size of a compressed index keypoint.

static const size_t MIN_KEY_POINT_SIZE = 2;

// Byte offset of the major and minor version numbers in the

// Ogg Skeleton 4.0 header packet.

static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8;

static const size_t SKELETON_VERSION_MINOR_OFFSET = 10;

// Byte-offsets of the presentation time numerator and denominator

static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12;

static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20;

// Byte-offsets of the length of file field in the Skeleton 4.0 header packet.

static const size_t SKELETON_FILE_LENGTH_OFFSET = 64;

// Byte-offsets of the fields in the Skeleton index packet.

static const size_t INDEX_SERIALNO_OFFSET = 6;

static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10;

static const size_t INDEX_TIME_DENOM_OFFSET = 18;

static const size_t INDEX_FIRST_NUMER_OFFSET = 26;

static const size_t INDEX_LAST_NUMER_OFFSET = 34;

static const size_t INDEX_KEYPOINT_OFFSET = 42;

// Byte-offsets of the fields in the Skeleton Fisbone packet.

static const size_t FISBONE_MSG_FIELDS_OFFSET = 8;

static const size_t FISBONE_SERIALNO_OFFSET = 12;

static bool IsSkeletonBOS(ogg_packet* aPacket) {

  static_assert(SKELETON_MIN_HEADER_LEN >= 8,

                "Minimum length of skeleton BOS header incorrect");

  return aPacket->bytes >= SKELETON_MIN_HEADER_LEN &&

         memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0;

static bool IsSkeletonIndex(ogg_packet* aPacket) {

  static_assert(SKELETON_4_0_MIN_INDEX_LEN >= 5,

                "Minimum length of skeleton index header incorrect");

  return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN &&

         memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0;

static bool IsSkeletonFisbone(ogg_packet* aPacket) {

  static_assert(SKELETON_MIN_FISBONE_LEN >= 8,

                "Minimum length of skeleton fisbone header incorrect");

  return aPacket->bytes >= SKELETON_MIN_FISBONE_LEN &&

         memcmp(reinterpret_cast<char*>(aPacket->packet), "fisbone", 8) == 0;

// Reads a variable length encoded integer at p. Will not read

// past aLimit. Returns pointer to character after end of integer.

static const unsigned char* ReadVariableLengthInt(const unsigned char* p,

                                                  const unsigned char* aLimit,

                                                  int64_t& n) {

  int shift = 0;

  int64_t byte = 0;

  n = 0;

  while (p < aLimit && (byte & 0x80) != 0x80 && shift < 57) {

    byte = static_cast<int64_t>(*p);

    n |= ((byte & 0x7f) << shift);

    shift += 7;

    p++;

  return p;

bool SkeletonState::DecodeIndex(ogg_packet* aPacket) {

  NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN,

               "Index must be at least minimum size");

  if (!mActive) {

    return false;

  uint32_t serialno =

      LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET);

  int64_t numKeyPoints =

      LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET);

  TimeUnit endTime = TimeUnit::Zero();

  TimeUnit startTime = TimeUnit::Zero();

  const unsigned char* p = aPacket->packet;

  int64_t timeDenom =

      LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET);

  if (timeDenom == 0) {

    LOG(LogLevel::Debug, ("Ogg Skeleton Index packet for stream %u has 0 "

                          "timestamp denominator.",

                          serialno));

    return (mActive = false);

  // Extract the start time.

  int64_t timeRawInt = LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET);

  startTime = TimeUnit(timeRawInt, timeDenom);

  // Extract the end time.

  timeRawInt = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET);

  endTime = TimeUnit(timeRawInt, timeDenom);

  // Check the numKeyPoints value read, ensure we're not going to run out of

  // memory while trying to decode the index packet.

  CheckedInt64 minPacketSize =

      (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET;

  if (!minPacketSize.isValid()) {

    return (mActive = false);

  int64_t sizeofIndex =

      AssertedCast<int64_t>(aPacket->bytes - INDEX_KEYPOINT_OFFSET);

  int64_t maxNumKeyPoints =

      AssertedCast<int64_t>(sizeofIndex / MIN_KEY_POINT_SIZE);

  if (aPacket->bytes < minPacketSize.value() ||

      numKeyPoints > maxNumKeyPoints || numKeyPoints < 0) {

    // Packet size is less than the theoretical minimum size, or the packet is

    // claiming to store more keypoints than it's capable of storing. This means

    // that the numKeyPoints field is too large or small for the packet to

    // possibly contain as many packets as it claims to, so the numKeyPoints

    // field is possibly malicious. Don't try decoding this index, we may run

    // out of memory.

    LOG(LogLevel::Debug, ("Possibly malicious number of key points reported "

                          "(%" PRId64 ") in index packet for stream %u.",

                          numKeyPoints, serialno));

    return (mActive = false);

  UniquePtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime));

  p = aPacket->packet + INDEX_KEYPOINT_OFFSET;

  const unsigned char* limit = aPacket->packet + aPacket->bytes;

  int64_t numKeyPointsRead = 0;

  CheckedInt64 offset = 0;

  TimeUnit time = TimeUnit::Zero();

  while (p < limit && numKeyPointsRead < numKeyPoints) {

    int64_t delta = 0;

    p = ReadVariableLengthInt(p, limit, delta);

    offset += delta;

    if (p == limit || !offset.isValid() || offset.value() > mLength ||

        offset.value() < 0) {

      return (mActive = false);

    p = ReadVariableLengthInt(p, limit, delta);

    time += TimeUnit(delta, timeDenom);

    if (!time.IsValid() || time > endTime || time < startTime) {

      return (mActive = false);

    keyPoints->Add(offset.value(), time);

    numKeyPointsRead++;

  uint32_t keyPointsRead = keyPoints->Length();

  if (keyPointsRead > 0) {

    mIndex.InsertOrUpdate(serialno, std::move(keyPoints));

  LOG(LogLevel::Debug, ("Loaded %d keypoints for Skeleton on stream %u",

                        keyPointsRead, serialno));

  return true;

nsresult SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno,

                                                  const TimeUnit& aTarget,

                                                  nsKeyPoint& aResult) {

  nsKeyFrameIndex* index = nullptr;

  mIndex.Get(aSerialno, &index);

  if (!index || index->Length() == 0 || aTarget < index->mStartTime ||

      aTarget > index->mEndTime) {

    return NS_ERROR_FAILURE;

  // Binary search to find the last key point with time less than target.

  uint32_t start = 0;

  uint32_t end = index->Length() - 1;

  while (end > start) {

    uint32_t mid = start + ((end - start + 1) >> 1);

    if (index->Get(mid).mTime == aTarget) {

      start = mid;

      break;

    if (index->Get(mid).mTime < aTarget) {

      start = mid;

    } else {

      end = mid - 1;

  aResult = index->Get(start);

  NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target");

  return NS_OK;

nsresult SkeletonState::IndexedSeekTarget(const TimeUnit& aTarget,

                                          nsTArray<uint32_t>& aTracks,

                                          nsSeekTarget& aResult) {

  if (!mActive || mVersion < SKELETON_VERSION(4, 0)) {

    return NS_ERROR_FAILURE;

  // Loop over all requested tracks' indexes, and get the keypoint for that

  // seek target. Record the keypoint with the lowest offset, this will be

  // our seek result. User must seek to the one with lowest offset to ensure we

  // pass "keyframes" on all tracks when we decode forwards to the seek target.

  nsSeekTarget r;

  for (uint32_t i = 0; i < aTracks.Length(); i++) {

    nsKeyPoint k;

    if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) &&

        k.mOffset < r.mKeyPoint.mOffset) {

      r.mKeyPoint = k;

      r.mSerial = aTracks[i];

  if (r.IsNull()) {

    return NS_ERROR_FAILURE;

  LOG(LogLevel::Debug, ("Indexed seek target for time %s is offset %" PRId64,

                        aTarget.ToString().get(), r.mKeyPoint.mOffset));

  aResult = r;

  return NS_OK;

nsresult SkeletonState::GetDuration(const nsTArray<uint32_t>& aTracks,

                                    TimeUnit& aDuration) {

  if (!mActive || mVersion < SKELETON_VERSION(4, 0) || !HasIndex() ||

      aTracks.Length() == 0) {

    return NS_ERROR_FAILURE;

  TimeUnit endTime = TimeUnit::FromNegativeInfinity();

  TimeUnit startTime = TimeUnit::FromInfinity();

  for (uint32_t i = 0; i < aTracks.Length(); i++) {

    nsKeyFrameIndex* index = nullptr;

    mIndex.Get(aTracks[i], &index);

    if (!index) {

      // Can't get the timestamps for one of the required tracks, fail.

      return NS_ERROR_FAILURE;

    if (index->mEndTime > endTime) {

      endTime = index->mEndTime;

    if (index->mStartTime < startTime) {

      startTime = index->mStartTime;

  NS_ASSERTION(endTime > startTime, "Duration must be positive");

  aDuration = endTime - startTime;

  return aDuration.IsValid() ? NS_OK : NS_ERROR_FAILURE;

bool SkeletonState::DecodeFisbone(ogg_packet* aPacket) {

  if (aPacket->bytes < static_cast<long>(FISBONE_MSG_FIELDS_OFFSET + 4)) {

    return false;

  uint32_t offsetMsgField =

      LittleEndian::readUint32(aPacket->packet + FISBONE_MSG_FIELDS_OFFSET);

  if (aPacket->bytes < static_cast<long>(FISBONE_SERIALNO_OFFSET + 4)) {

    return false;

  uint32_t serialno =

      LittleEndian::readUint32(aPacket->packet + FISBONE_SERIALNO_OFFSET);

  CheckedUint32 checked_fields_pos =

      CheckedUint32(FISBONE_MSG_FIELDS_OFFSET) + offsetMsgField;

  if (!checked_fields_pos.isValid() ||

      aPacket->bytes < static_cast<int64_t>(checked_fields_pos.value())) {

    return false;

  int64_t msgLength = aPacket->bytes - checked_fields_pos.value();

  char* msgProbe = (char*)aPacket->packet + checked_fields_pos.value();

  char* msgHead = msgProbe;

  UniquePtr<MessageField> field(new MessageField());

  const static FieldPatternType kFieldTypeMaps[] = {

      {"Content-Type:", eContentType},

      {"Role:", eRole},

      {"Name:", eName},

      {"Language:", eLanguage},

      {"Title:", eTitle},

      {"Display-hint:", eDisplayHint},

      {"Altitude:", eAltitude},

      {"TrackOrder:", eTrackOrder},

      {"Track dependencies:", eTrackDependencies}};

  bool isContentTypeParsed = false;

  while (msgLength > 1) {

    if (*msgProbe == '\r' && *(msgProbe + 1) == '\n') {

      nsAutoCString strMsg(msgHead, msgProbe - msgHead);

      for (size_t i = 0; i < ArrayLength(kFieldTypeMaps); i++) {

        if (strMsg.Find(kFieldTypeMaps[i].mPatternToRecognize) != -1) {

          // The content of message header fields follows [RFC2822], and the

          // mandatory message field must be encoded in US-ASCII, others

          // must be be encoded in UTF-8. "Content-Type" must come first

          // for all of message header fields.

          // See

          // http://svn.annodex.net/standards/draft-pfeiffer-oggskeleton-current.txt.

          if (i != 0 && !isContentTypeParsed) {

            return false;

          if ((i == 0 && IsAscii(strMsg)) || (i != 0 && IsUtf8(strMsg))) {

            EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType;

            Unused << field->mValuesStore.LookupOrInsertWith(

                eHeaderType, [i, msgHead, msgProbe]() {

                  uint32_t nameLen =

                      strlen(kFieldTypeMaps[i].mPatternToRecognize);

                  return MakeUnique<nsCString>(msgHead + nameLen,

                                               msgProbe - msgHead - nameLen);

});

            isContentTypeParsed = i == 0 ? true : isContentTypeParsed;

          break;

      msgProbe += 2;

      msgLength -= 2;

      msgHead = msgProbe;

      continue;

    msgLength--;

    msgProbe++;

  return mMsgFieldStore.WithEntryHandle(serialno, [&](auto&& entry) {

    if (entry) {

      // mMsgFieldStore has an entry for serialno already.

      return false;

    entry.Insert(std::move(field));

    return true;

});

bool SkeletonState::DecodeHeader(OggPacketPtr aPacket) {

  if (IsSkeletonBOS(aPacket.get())) {

    uint16_t verMajor = LittleEndian::readUint16(aPacket->packet +

                                                 SKELETON_VERSION_MAJOR_OFFSET);

    uint16_t verMinor = LittleEndian::readUint16(aPacket->packet +

                                                 SKELETON_VERSION_MINOR_OFFSET);

    // Read the presentation time. We read this before the version check as the

    // presentation time exists in all versions.

    int64_t n = LittleEndian::readInt64(

        aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET);

    int64_t d = LittleEndian::readInt64(

        aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET);

    mPresentationTime = d == 0 ? 0

                               : AssertedCast<int64_t>(static_cast<float>(n) /

                                                       static_cast<float>(d)) *

                                     USECS_PER_S;

    mVersion = SKELETON_VERSION(verMajor, verMinor);

    // We can only care to parse Skeleton version 4.0+.

    if (mVersion < SKELETON_VERSION(4, 0) ||

        mVersion >= SKELETON_VERSION(5, 0) ||

        aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) {

      return false;

    // Extract the segment length.

    mLength =

        LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET);

    LOG(LogLevel::Debug, ("Skeleton segment length: %" PRId64, mLength));

    // Initialize the serialno-to-index map.

    return true;

  if (IsSkeletonIndex(aPacket.get()) && mVersion >= SKELETON_VERSION(4, 0)) {

    return DecodeIndex(aPacket.get());

  if (IsSkeletonFisbone(aPacket.get())) {

    return DecodeFisbone(aPacket.get());

  if (aPacket->e_o_s) {

    mDoneReadingHeaders = true;

  return true;

#undef LOG

}  // namespace mozilla