DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (c68fe15a81fc)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef MOZILLA_DYNAMIC_RESAMPLER_H_
#define MOZILLA_DYNAMIC_RESAMPLER_H_

#include "AudioRingBuffer.h"
#include "AudioSegment.h"

#include <speex/speex_resampler.h>

namespace mozilla {

const int STEREO = 2;

/**
 * DynamicResampler allows updating on the fly the output sample rate and the
 * number of channels. In addition to that, it maintains an internal buffer for
 * the input data and allows pre-buffering as well. The Resample() method
 * strives to provide the requested number of output frames by using the input
 * data including any pre-buffering. If this is not possible then it will not
 * attempt to resample and it will return failure.
 *
 * Input data buffering makes use of the AudioRingBuffer. The capacity of the
 * buffer is 100ms of float audio and it is pre-allocated at the constructor.
 * No extra allocations take place when the input is appended. In addition to
 * that, due to special feature of AudioRingBuffer, no extra copies take place
 * when the input data is fed to the resampler.
 *
 * The sample format must be set before using any method. If the provided sample
 * format is of type short the pre-allocated capacity of the input buffer
 * becomes 200ms of short audio.
 *
 * The DynamicResampler is not thread-safe, so all the methods appart from the
 * constructor must be called on the same thread.
 */
class DynamicResampler final {
 public:
  /**
   * Provide the initial input and output rate and the amount of pre-buffering.
   * The channel count will be set to stereo. Memory allocation will take
   * place. The input buffer is non-interleaved.
   */
  DynamicResampler(int aInRate, int aOutRate, uint32_t aPreBufferFrames = 0);
  ~DynamicResampler();

  /**
   * Set the sample format type to float or short.
   */
  void SetSampleFormat(AudioSampleFormat aFormat);
  int GetOutRate() const { return mOutRate; }
  int GetChannels() const { return mChannels; }

  /**
   * Append `aInFrames` number of frames from `aInBuffer` to the internal input
   * buffer. Memory copy/move takes place.
   */
  void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames);
  void AppendInput(const nsTArray<const int16_t*>& aInBuffer,
                   uint32_t aInFrames);
  /**
   * Append `aInFrames` number of frames of silence to the internal input
   * buffer. Memory copy/move takes place.
   */
  void AppendInputSilence(const uint32_t aInFrames);
  /**
   * Return the number of frames stored in the internal input buffer.
   */
  uint32_t InFramesBuffered(int aChannelIndex) const;

  /*
   * Resampler as much frame is needed from the internal input buffer to the
   * `aOutBuffer` in order to provide all `aOutFrames` and return true. If there
   * not enough input frames to provide the requested output frames no
   * resampling is attempted and false is returned.
   */
  bool Resample(float* aOutBuffer, uint32_t* aOutFrames, int aChannelIndex);
  bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames, int aChannelIndex);

  /**
   * Update the output rate or/and the channel count. If a value is not updated
   * compared to the current one nothing happens. Changing the `aOutRate`
   * results in recalculation in the resampler. Changing `aChannels` results in
   * the reallocation of the internal input buffer with the exception of
   * changes between mono to stereo and vice versa where no reallocation takes
   * place. A stereo internal input buffer is always maintained even if the
   * sound is mono.
   */
  void UpdateResampler(int aOutRate, int aChannels);

  /**
   * Returns true if the resampler has enough input data to provide to the
   * output of the `Resample()` method `aOutFrames` number of frames. This is a
   * way to know in advance if the `Resampler` method will return true or false
   * given that nothing changes in between.
   */
  bool CanResample(uint32_t aOutFrames) const;

 private:
  template <typename T>
  void AppendInputInternal(const nsTArray<const T*>& aInBuffer,
                           uint32_t aInFrames) {
    MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
    for (int i = 0; i < mChannels; ++i) {
      PushInFrames(aInBuffer[i], aInFrames, i);
    }
  }

  void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
                        float* aOutBuffer, uint32_t* aOutFrames,
                        int aChannelIndex);
  void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
                        int16_t* aOutBuffer, uint32_t* aOutFrames,
                        int aChannelIndex);

  template <typename T>
  bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames,
                        int aChannelIndex) {
    MOZ_ASSERT(mInRate);
    MOZ_ASSERT(mOutRate);
    MOZ_ASSERT(mChannels);
    MOZ_ASSERT(aChannelIndex >= 0);
    MOZ_ASSERT(aChannelIndex <= mChannels);
    MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
    MOZ_ASSERT(aOutFrames);
    MOZ_ASSERT(*aOutFrames);

    // Not enough input, don't do anything
    if (!EnoughInFrames(*aOutFrames, aChannelIndex)) {
      *aOutFrames = 0;
      return false;
    }

    if (mInRate == mOutRate) {
      mInternalInBuffer[aChannelIndex].Read(MakeSpan(aOutBuffer, *aOutFrames));
      // Workaround to avoid discontinuity when the speex resampler operates
      // again. Feed it with the last 20 frames to warm up the internal memory
      // of the resampler and then skip memory equals to resampler's input
      // latency.
      mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames);
      return true;
    }

    uint32_t totalOutFramesNeeded = *aOutFrames;

    mInternalInBuffer[aChannelIndex].ReadNoCopy(
        [this, &aOutBuffer, &totalOutFramesNeeded,
         aChannelIndex](const Span<const T>& aInBuffer) -> int {
          if (!totalOutFramesNeeded) {
            return 0;
          }
          uint32_t outFramesResampled = totalOutFramesNeeded;
          uint32_t inFrames = aInBuffer.Length();
          ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
                           &outFramesResampled, aChannelIndex);
          aOutBuffer += outFramesResampled;
          totalOutFramesNeeded -= outFramesResampled;
          mInputTail[aChannelIndex].StoreTail<T>(aInBuffer);
          return inFrames;
        });

    MOZ_ASSERT(totalOutFramesNeeded == 0);
    return true;
  }

  bool EnoughInFrames(uint32_t aOutFrames, int aChannelIndex) const;

  template <typename T>
  void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
                    int aChannelIndex) {
    MOZ_ASSERT(aInBuffer);
    MOZ_ASSERT(aInFrames);
    MOZ_ASSERT(mChannels);
    MOZ_ASSERT(aChannelIndex >= 0);
    MOZ_ASSERT(aChannelIndex <= mChannels);
    MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
    mInternalInBuffer[aChannelIndex].Write(MakeSpan(aInBuffer, aInFrames));
  }

  void WarmUpResampler(bool aSkipLatency);

 private:
  int mChannels = 0;
  const int mInRate;
  int mOutRate;

  AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;

  SpeexResamplerState* mResampler = nullptr;
  AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
  const uint32_t mPreBufferFrames;

  class TailBuffer {
   public:
    template <typename T>
    T* Buffer() {
      return reinterpret_cast<T*>(mBuffer);
    }
    /* Store the MAXSIZE last elements of the buffer. */
    template <typename T>
    void StoreTail(const Span<const T>& aInBuffer) {
      StoreTail(aInBuffer.data(), aInBuffer.size());
    }
    template <typename T>
    void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
      if (aInFrames >= MAXSIZE) {
        PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
        mSize = MAXSIZE;
      } else {
        PodCopy(Buffer<T>(), aInBuffer, aInFrames);
        mSize = static_cast<int>(aInFrames);
      }
    }
    int Length() { return mSize; }
    static const int MAXSIZE = 20;

   private:
    float mBuffer[MAXSIZE] = {};
    int mSize = 0;
  };
  AutoTArray<TailBuffer, STEREO> mInputTail;
};

/**
 * AudioChunkList provides a way to have preallocated audio buffers in
 * AudioSegment. The idea is that the amount of  AudioChunks is created in
 * advance. Each AudioChunk is able to hold a specific amount of audio
 * (capacity). The total capacity of AudioChunkList is specified by the number
 * of AudioChunks. The important aspect of the AudioChunkList is that
 * preallocates everything and reuse the same chunks similar to a ring buffer.
 *
 * Why the whole AudioChunk is preallocated and not some raw memory buffer? This
 * is due to the limitations of MediaTrackGraph. The way that MTG works depends
 * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists
 * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the
 * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio
 * data was in a different form, the only way to use it from the audio thread
 * would be to create the AudioChunk there. That would result in a copy
 * operation (not very important) and most of all an allocation of the audio
 * buffer in the audio thread. This happens in many places inside MTG it's a bad
 * practice, though, and it has been avoided due to the AudioChunkList.
 *
 * After construction the sample format must be set, when it is available. It
 * can be set in the audio thread. Before setting the sample format is not
 * possible to use any method of AudioChunkList.
 *
 * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128
 * frames of float audio. Nevertheless, the sample format is not available at
 * that point. Thus if the sample format is set to short, the capacity of each
 * chunk changes to 256 number of frames, and the total duration becomes twice
 * big. There are methods to get the chunk capacity and total capacity in frames
 * and must always be used.
 *
 * Two things to note. First, when the channel count changes everything is
 * recreated which means reallocations. Second, the total capacity might differs
 * from the requested total capacity for two reasons. First, if the sample
 * format is set to short and second because the number of chunks in the list
 * divides exactly the final total capacity. The corresponding method must
 * always be used to query the total capacity.
 */
class AudioChunkList {
 public:
  /**
   * Constructor, the final total duration might be different from the requested
   * `aTotalDuration`. Memory allocation takes place.
   */
  AudioChunkList(int aTotalDuration, int aChannels);
  AudioChunkList(const AudioChunkList&) = delete;
  AudioChunkList(AudioChunkList&&) = delete;
  ~AudioChunkList() = default;

  /**
   * Set sample format. It must be done before any other method being used.
   */
  void SetSampleFormat(AudioSampleFormat aFormat);
  /**
   * Get the next available AudioChunk. The duration of the chunk will be zero
   * and the volume 1.0. However, the buffers will be there ready to be written.
   * Please note, that a reference of the preallocated chunk is returned. Thus
   * it _must not be consumed_ directly. If the chunk needs to be consumed it
   * must be copied to a temporary chunk first. For example:
   * ```
   *   AudioChunk& chunk = audioChunklist.GetNext();
   *   // Set up the chunk
   *   AudioChunk tmp = chunk;
   *   audioSegment.AppendAndConsumeChunk(&tmp);
   * ```
   * This way no memory allocation or copy, takes place.
   */
  AudioChunk& GetNext();

  /**
   * Get the capacity of each individual AudioChunk in the list.
   */
  int ChunkCapacity() const {
    MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
               mSampleFormat == AUDIO_FORMAT_FLOAT32);
    return mChunkCapacity;
  }
  /**
   * Get the total capacity of AudioChunkList.
   */
  int TotalCapacity() const {
    MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
               mSampleFormat == AUDIO_FORMAT_FLOAT32);
    return CheckedInt<int>(mChunkCapacity * mChunks.Length()).value();
  }

  /**
   * Update the channel count of the AudioChunkList. Memory allocation is
   * taking place.
   */
  void Update(int aChannels);

 private:
  void IncrementIndex() {
    ++mIndex;
    mIndex = CheckedInt<int>(mIndex % mChunks.Length()).value();
  }
  void CreateChunks(int aNumOfChunks, int aChannels);
  void UpdateToMonoOrStereo(int aChannels);

 private:
  nsTArray<AudioChunk> mChunks;
  int mIndex = 0;
  int mChunkCapacity = 128;
  AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
};

/**
 * Audio Resampler is a resampler able to change the output rate and channels
 * count on the fly. The API is simple and it is based in AudioSegment in order
 * to be used MTG. All memory allocations, for input and output buffers, happen
 * in the constructor and when channel count changes. The memory is recycled in
 * order to avoid reallocations. It also supports prebuffering of silence. It
 * consists of DynamicResampler and AudioChunkList so please read their
 * documentation if you are interested in more details.
 *
 * The output buffer is preallocated  and returned in the form of AudioSegment.
 * The intention is to be used directly in a MediaTrack. Since an AudioChunk
 * must no be "shared" in order to be written, the AudioSegment returned by
 * resampler method must be cleaned up in order to be able for the `AudioChunk`s
 * that it consists of to be reused. For `MediaTrack::mSegment` this happens
 * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus
 * memory capacity of 100ms has been preallocated for internal input and output
 * buffering.
 */
class AudioResampler final {
 public:
  AudioResampler(int aInRate, int aOutRate, uint32_t aPreBufferFrames = 0);

  /**
   * Append input data into the resampler internal buffer. Copy/move of the
   * memory is taking place. Also, the channel count will change according to
   * the channel count of the chunks.
   */
  void AppendInput(const AudioSegment& aInSegment);
  /*
   * Get the duration of internal input buffer in frames.
   */
  int InputDuration() const;

  /*
   * Reguest `aOutFrames` of audio in the output sample rate. The internal
   * buffered input os used. If there is no enough input for that amount of
   * output and empty AudioSegment is returned
   */
  AudioSegment Resample(uint32_t aOutFrames);

  /*
   * Updates the output rate that will be used by the resampler.
   */
  void UpdateOutRate(int aOutRate) {
    Update(aOutRate, mResampler.GetChannels());
  }

 private:
  void UpdateChannels(int aChannels) {
    Update(mResampler.GetOutRate(), aChannels);
  }
  void Update(int aOutRate, int aChannels);

 private:
  DynamicResampler mResampler;
  AudioChunkList mOutputChunks;
  bool mIsSampleFormatSet = false;
};

}  // namespace mozilla

#endif  // MOZILLA_DYNAMIC_RESAMPLER_H_