TestAudioInputProcessing.cpp

mozilla-central/dom/media/gtest/TestAudioInputProcessing.cpp (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Audio/Video

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this file,

 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "gmock/gmock.h"

#include "gtest/gtest.h"

#include "AudioGenerator.h"

#include "MediaEngineWebRTCAudio.h"

#include "MediaTrackGraphImpl.h"

#include "PrincipalHandle.h"

#include "mozilla/Attributes.h"

#include "mozilla/NullPrincipal.h"

#include "mozilla/UniquePtr.h"

#include "mozilla/Unused.h"

#include "nsContentUtils.h"

#include "nsTArray.h"

using namespace mozilla;

using testing::NiceMock;

using testing::Return;

class MockGraph : public MediaTrackGraphImpl {

 public:

  explicit MockGraph(TrackRate aRate)

      : MediaTrackGraphImpl(0, aRate, nullptr, AbstractThread::MainThread()) {

    ON_CALL(*this, OnGraphThread).WillByDefault(Return(true));

  void Init(uint32_t aChannels) {

    MediaTrackGraphImpl::Init(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aChannels);

    MonitorAutoLock lock(mMonitor);

    // We don't need a graph driver.  Advance to

    // LIFECYCLE_WAITING_FOR_TRACK_DESTRUCTION so that the driver never

    // starts.  Graph control messages run as in shutdown, synchronously.

    // This permits the main thread part of track initialization through

    // AudioProcessingTrack::Create().

    mLifecycleState = LIFECYCLE_WAITING_FOR_TRACK_DESTRUCTION;

#ifdef DEBUG

    mCanRunMessagesSynchronously = true;

#endif

    // Remove this graph's driver since it holds a ref. We are still kept

    // alive by the self-ref. Destroy() must be called to break that cycle if

    // no tracks are created and destroyed.

    mDriver = nullptr;

  MOCK_CONST_METHOD0(OnGraphThread, bool());

 protected:

  ~MockGraph() = default;

};

// AudioInputProcessing will put extra frames as pre-buffering data to avoid

// glitchs in non pass-through mode. The main goal of the test is to check how

// many frames left in the AudioInputProcessing's mSegment in various situations

// after input data has been processed.

TEST(TestAudioInputProcessing, Buffering)

  const TrackRate rate = 8000;  // So packet size is 80

  const uint32_t channels = 1;

  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);

  graph->Init(channels);

  RefPtr track = AudioProcessingTrack::Create(graph);

  auto aip = MakeRefPtr<AudioInputProcessing>(channels);

  const size_t frames = 72;

  AudioGenerator<AudioDataValue> generator(channels, rate);

  GraphTime processedTime;

  GraphTime nextTime;

  AudioSegment output;

  MediaEnginePrefs settings;

  settings.mChannels = channels;

  // pref "media.getusermedia.agc2_forced" defaults to true.

  // mAgc would need to be set to something other than kAdaptiveAnalog

  // for mobile, as asserted in AudioInputProcessing::ConfigForPrefs,

  // if gain_controller1 were used.

  settings.mAgc2Forced = true;

  // Toggle pass-through mode without starting

    EXPECT_EQ(aip->IsPassThrough(graph), true);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

    settings.mAgcOn = true;

    aip->ApplySettings(graph, nullptr, settings);

    EXPECT_EQ(aip->IsPassThrough(graph), false);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

    settings.mAgcOn = false;

    aip->ApplySettings(graph, nullptr, settings);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

    // Need (nextTime - processedTime) = 128 - 0 = 128 frames this round.

    // aip has not started and set to processing mode yet, so output will be

    // filled with silence data directly.

    processedTime = 0;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

  // Set aip to processing/non-pass-through mode

  settings.mAgcOn = true;

  aip->ApplySettings(graph, nullptr, settings);

    // Need (nextTime - processedTime) = 256 - 128 = 128 frames this round.

    // aip has not started yet, so output will be filled with silence data

    // directly.

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

  // aip has been set to processing mode and is started.

  aip->Start(graph);

    // Need (nextTime - processedTime) = 256 - 256 = 0 frames this round.

    // Process() will return early on 0 frames of input.

    // Pre-buffering is not triggered.

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

    // Need (nextTime - processedTime) = 384 - 256 = 128 frames this round.

    // On receipt of the these first frames, aip will insert 80 frames

    // into its internal buffer as pre-buffering.

    // Process() will take 128 frames from input, packetize and process

    // these frames into floor(128/80) = 1 80-frame packet (48 frames left in

    // packetizer), insert packets into aip's internal buffer, then move 128

    // frames the internal buffer to output, leaving 80 + 80 - 128 = 32 frames

    // in aip's internal buffer.

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 32);

    // Need (nextTime - processedTime) = 384 - 384 = 0 frames this round.

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(5 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 32);

    // Need (nextTime - processedTime) = 512 - 384 = 128 frames this round.

    // The Process() aip will take 128 frames from input, packetize and process

    // these frames into floor(128+48/80) = 2 80-frame packet (16 frames left in

    // packetizer), insert packets into aip's internal buffer, then move 128

    // frames the internal buffer to output, leaving 32 + 2*80 - 128 = 64 frames

    // in aip's internal buffer.

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(6 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 64);

  // Set aip to pass-through mode

  settings.mAgcOn = false;

  aip->ApplySettings(graph, nullptr, settings);

    // Need (nextTime - processedTime) = 512 - 512 = 0 frames this round.

    // No buffering in pass-through mode

    processedTime = nextTime;

    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(7 * frames);

    AudioSegment input;

    generator.Generate(input, nextTime - processedTime);

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), processedTime);

    EXPECT_EQ(aip->NumBufferedFrames(graph), 0);

  aip->Stop(graph);

  track->Destroy();

TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals)

  const TrackRate rate = 48000;  // so # of output frames from packetizer is 480

  const uint32_t channels = 2;

  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);

  graph->Init(channels);

  RefPtr track = AudioProcessingTrack::Create(graph);

  auto aip = MakeRefPtr<AudioInputProcessing>(channels);

  AudioGenerator<AudioDataValue> generator(channels, rate);

  RefPtr<nsIPrincipal> dummy_principal =

      NullPrincipal::CreateWithoutOriginAttributes();

  const PrincipalHandle principal1 = MakePrincipalHandle(dummy_principal.get());

  const PrincipalHandle principal2 =

      MakePrincipalHandle(nsContentUtils::GetSystemPrincipal());

  // Total 4800 frames. It's easier to test with frames of multiples of 480.

  nsTArray<std::pair<TrackTime, PrincipalHandle>> framesWithPrincipal = {

      {100, principal1},

      {200, PRINCIPAL_HANDLE_NONE},

      {300, principal2},

      {400, principal1},

      {440, PRINCIPAL_HANDLE_NONE},

      // 3 packet-size above.

      {480, principal1},

      {480, principal2},

      {480, PRINCIPAL_HANDLE_NONE},

      // 3 packet-size above.

      {500, principal2},

      {490, principal1},

      {600, principal1},

      {330, principal1}

      // 4 packet-size above.

};

  // Generate 4800 frames of data with different principals.

  AudioSegment input;

    for (const auto& [duration, principal] : framesWithPrincipal) {

      AudioSegment data;

      generator.Generate(data, duration);

      for (AudioSegment::ChunkIterator it(data); !it.IsEnded(); it.Next()) {

        it->mPrincipalHandle = principal;

      input.AppendFrom(&data);

  auto verifyPrincipals = [&](const AudioSegment& data) {

    TrackTime start = 0;

    for (const auto& [duration, principal] : framesWithPrincipal) {

      const TrackTime end = start + duration;

      AudioSegment slice;

      slice.AppendSlice(data, start, end);

      start = end;

      for (AudioSegment::ChunkIterator it(slice); !it.IsEnded(); it.Next()) {

        EXPECT_EQ(it->mPrincipalHandle, principal);

};

  // Check the principals in audio-processing mode.

  MediaEnginePrefs settings;

  settings.mChannels = channels;

  settings.mAgcOn = true;

  settings.mAgc2Forced = true;

  aip->ApplySettings(graph, nullptr, settings);

  EXPECT_EQ(aip->IsPassThrough(graph), false);

  aip->Start(graph);

    AudioSegment output;

      AudioSegment data;

      aip->Process(track, 0, 4800, &input, &data);

      EXPECT_EQ(input.GetDuration(), 4800);

      EXPECT_EQ(data.GetDuration(), 4800);

      // Extract another 480 frames to account for delay from pre-buffering.

      EXPECT_EQ(aip->NumBufferedFrames(graph), 480);

      AudioSegment dummy;

      dummy.AppendNullData(480);

      aip->Process(track, 0, 480, &dummy, &data);

      EXPECT_EQ(dummy.GetDuration(), 480);

      EXPECT_EQ(data.GetDuration(), 480 + 4800);

      // Ignore the pre-buffering silence.

      output.AppendSlice(data, 480, 480 + 4800);

    verifyPrincipals(output);

  // Check the principals in pass-through mode.

  settings.mAgcOn = false;

  aip->ApplySettings(graph, nullptr, settings);

  EXPECT_EQ(aip->IsPassThrough(graph), true);

    AudioSegment output;

    aip->Process(track, 0, 4800, &input, &output);

    EXPECT_EQ(input.GetDuration(), 4800);

    EXPECT_EQ(output.GetDuration(), 4800);

    verifyPrincipals(output);

  aip->Stop(graph);

  track->Destroy();

TEST(TestAudioInputProcessing, Downmixing)

  const TrackRate rate = 44100;

  const uint32_t channels = 4;

  auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate);

  graph->Init(channels);

  RefPtr track = AudioProcessingTrack::Create(graph);

  auto aip = MakeRefPtr<AudioInputProcessing>(channels);

  const size_t frames = 44100;

  AudioGenerator<AudioDataValue> generator(channels, rate);

  GraphTime processedTime;

  GraphTime nextTime;

  MediaEnginePrefs settings;

  settings.mChannels = channels;

  settings.mAgcOn = true;

  settings.mAgc2Forced = true;

  aip->ApplySettings(graph, nullptr, settings);

  EXPECT_EQ(aip->IsPassThrough(graph), false);

  aip->Start(graph);

  processedTime = 0;

  nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);

    AudioSegment input;

    AudioSegment output;

    generator.Generate(input, nextTime - processedTime);

    // Intentionally reduce the amplitude of the generated sine wave so there's

    // no chance the max amplitude reaches 1.0, but not enough so that 4

    // channels summed together won't clip.

    input.ApplyVolume(0.9);

    // Process is going to see that it has 4 channels of input, and is going to

    // downmix to mono, scaling the input by 1/4 in the process.

    // We can't compare the input and output signal because the sine is going to

    // be mangledui

    aip->Process(track, processedTime, nextTime, &input, &output);

    EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

    EXPECT_EQ(output.GetDuration(), nextTime);

    EXPECT_EQ(output.MaxChannelCount(), 1u);

    // Verify that it doesn't clip: the input signal has likely been mangled by

    // the various processing passes, but at least it shouldn't clip. We know we

    // always have floating point audio here, regardless of the sample-type used

    // by Gecko.

    for (AudioSegment::ChunkIterator iterOutput(output); !iterOutput.IsEnded();

         iterOutput.Next()) {

      const float* const output = iterOutput->ChannelData<float>()[0];

      for (uint32_t i = 0; i < iterOutput->GetDuration(); i++) {

        // Very conservative here, it's likely that the AGC lowers the volume a

        // lot.

        EXPECT_LE(std::abs(output[i]), 0.95);

  // Now, repeat the test in pass-through mode, checking we get the unmodified

  // 4 channels.

  settings.mAgcOn = false;

  aip->ApplySettings(graph, nullptr, settings);

  EXPECT_EQ(aip->IsPassThrough(graph), true);

  AudioSegment input, output;

  processedTime = nextTime;

  nextTime += MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);

  generator.Generate(input, nextTime - processedTime);

  aip->Process(track, processedTime, nextTime, &input, &output);

  EXPECT_EQ(input.GetDuration(), nextTime - processedTime);

  EXPECT_EQ(output.GetDuration(), nextTime - processedTime);

  // This time, no downmix: 4 channels of input, 4 channels of output

  EXPECT_EQ(output.MaxChannelCount(), 4u);

  nsTArray<AudioDataValue> inputLinearized, outputLinearized;

  input.WriteToInterleavedBuffer(inputLinearized, input.MaxChannelCount());

  output.WriteToInterleavedBuffer(outputLinearized, output.MaxChannelCount());

  // The data should be passed through, and exactly equal.

  for (uint32_t i = 0; i < frames * channels; i++) {

    EXPECT_EQ(inputLinearized[i], outputLinearized[i]);

  aip->Stop(graph);

  track->Destroy();