TDStretch.cpp - mozsearch

mozilla-central/media/libsoundtouch/src/TDStretch.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

///////////////////////////////////////////////////////////////////////////////

///

/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo

/// while maintaining the original pitch by using a time domain WSOLA-like

/// method with several performance-increasing tweaks.

///

/// Notes : MMX optimized functions reside in a separate, platform-specific

/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.

///

/// This source file contains OpenMP optimizations that allow speeding up the

/// corss-correlation algorithm by executing it in several threads / CPU cores

/// in parallel. See the following article link for more detailed discussion

/// about SoundTouch OpenMP optimizations:

/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices

///

/// Author        : Copyright (c) Olli Parviainen

/// Author e-mail : oparviai 'at' iki.fi

/// SoundTouch WWW: http://www.surina.net/soundtouch

///

////////////////////////////////////////////////////////////////////////////////

//

// License :

//

//  SoundTouch audio processing library

//  Copyright (c) Olli Parviainen

//

//  This library is free software; you can redistribute it and/or

//  modify it under the terms of the GNU Lesser General Public

//  License as published by the Free Software Foundation; either

//  version 2.1 of the License, or (at your option) any later version.

//

//  This library is distributed in the hope that it will be useful,

//  but WITHOUT ANY WARRANTY; without even the implied warranty of

//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

//  Lesser General Public License for more details.

//

//  You should have received a copy of the GNU Lesser General Public

//  License along with this library; if not, write to the Free Software

//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

//

////////////////////////////////////////////////////////////////////////////////

#include <string.h>

#include <limits.h>

#include <assert.h>

#include <math.h>

#include <float.h>

#include "STTypes.h"

#include "cpu_detect.h"

#include "TDStretch.h"

using namespace soundtouch;

#define max(x, y) (((x) > (y)) ? (x) : (y))

/*****************************************************************************

 * Constant definitions

 *****************************************************************************/

// Table for the hierarchical mixing position seeking algorithm

const short _scanOffsets[5][24]={

    { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,

      868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},

    {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,

        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

    { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,

        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

    {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,

        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

    { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,

      116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};

/*****************************************************************************

 * Implementation of the class 'TDStretch'

 *****************************************************************************/

TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)

    bQuickSeek = false;

    channels = 2;

    pMidBuffer = NULL;

    pMidBufferUnaligned = NULL;

    overlapLength = 0;

    bAutoSeqSetting = true;

    bAutoSeekSetting = true;

    tempo = 1.0f;

    setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);

    setTempo(1.0f);

    clear();

TDStretch::~TDStretch()

    delete[] pMidBufferUnaligned;

// Sets routine control parameters. These control are certain time constants

// defining how the sound is stretched to the desired duration.

//

// 'sampleRate' = sample rate of the sound

// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)

// 'seekwindowMS' = seeking window length for scanning the best overlapping

//      position (default = 28 ms)

// 'overlapMS' = overlapping length (default = 12 ms)

void TDStretch::setParameters(int aSampleRate, int aSequenceMS,

                              int aSeekWindowMS, int aOverlapMS)

    // accept only positive parameter values - if zero or negative, use old values instead

    if (aSampleRate > 0)

        if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");

        this->sampleRate = aSampleRate;

    if (aOverlapMS > 0) this->overlapMs = aOverlapMS;

    if (aSequenceMS > 0)

        this->sequenceMs = aSequenceMS;

        bAutoSeqSetting = false;

    else if (aSequenceMS == 0)

        // if zero, use automatic setting

        bAutoSeqSetting = true;

    if (aSeekWindowMS > 0)

        this->seekWindowMs = aSeekWindowMS;

        bAutoSeekSetting = false;

    else if (aSeekWindowMS == 0)

        // if zero, use automatic setting

        bAutoSeekSetting = true;

    calcSeqParameters();

    calculateOverlapLength(overlapMs);

    // set tempo to recalculate 'sampleReq'

    setTempo(tempo);

/// Get routine control parameters, see setParameters() function.

/// Any of the parameters to this function can be NULL, in such case corresponding parameter

/// value isn't returned.

void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const

    if (pSampleRate)

        *pSampleRate = sampleRate;

    if (pSequenceMs)

        *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;

    if (pSeekWindowMs)

        *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;

    if (pOverlapMs)

        *pOverlapMs = overlapMs;

// Overlaps samples in 'midBuffer' with the samples in 'pInput'

void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const

    int i;

    SAMPLETYPE m1, m2;

    m1 = (SAMPLETYPE)0;

    m2 = (SAMPLETYPE)overlapLength;

    for (i = 0; i < overlapLength ; i ++)

        pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;

        m1 += 1;

        m2 -= 1;

void TDStretch::clearMidBuffer()

    memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);

void TDStretch::clearInput()

    inputBuffer.clear();

    clearMidBuffer();

    isBeginning = true;

    maxnorm = 0;

    maxnormf = 1e8;

    skipFract = 0;

// Clears the sample buffers

void TDStretch::clear()

    outputBuffer.clear();

    clearInput();

// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero

// to enable

void TDStretch::enableQuickSeek(bool enable)

    bQuickSeek = enable;

// Returns nonzero if the quick seeking algorithm is enabled.

bool TDStretch::isQuickSeekEnabled() const

    return bQuickSeek;

// Seeks for the optimal overlap-mixing position.

int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)

    if (bQuickSeek)

        return seekBestOverlapPositionQuick(refPos);

    else

        return seekBestOverlapPositionFull(refPos);

// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position

// of 'ovlPos'.

inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const

#ifndef USE_MULTICH_ALWAYS

    if (channels == 1)

        // mono sound.

        overlapMono(pOutput, pInput + ovlPos);

    else if (channels == 2)

        // stereo sound

        overlapStereo(pOutput, pInput + 2 * ovlPos);

    else

#endif // USE_MULTICH_ALWAYS

        assert(channels > 0);

        overlapMulti(pOutput, pInput + channels * ovlPos);

// Seeks for the optimal overlap-mixing position. The 'stereo' version of the

// routine

//

// The best position is determined as the position where the two overlapped

// sample sequences are 'most alike', in terms of the highest cross-correlation

// value over the overlapping period

int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)

    int bestOffs;

    double bestCorr;

    int i;

    double norm;

    bestCorr = -FLT_MAX;

    bestOffs = 0;

    // Scans for the best correlation value by testing each possible position

    // over the permitted range.

    bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);

    bestCorr = (bestCorr + 0.1) * 0.75;

    #pragma omp parallel for

    for (i = 1; i < seekLength; i ++)

        double corr;

        // Calculates correlation value for the mixing position corresponding to 'i'

#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)

        // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't

        // iterate the loop in sequential order

        // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions

        corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);

#else

        // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same

        // as "calcCrossCorr", but saves time by reusing & updating previously stored

        // "norm" value

        corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);

#endif

        // heuristic rule to slightly favour values close to mid of the range

        double tmp = (double)(2 * i - seekLength) / (double)seekLength;

        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));

        // Checks for the highest correlation value

        if (corr > bestCorr)

            // For optimal performance, enter critical section only in case that best value found.

            // in such case repeat 'if' condition as it's possible that parallel execution may have

            // updated the bestCorr value in the mean time

            #pragma omp critical

            if (corr > bestCorr)

                bestCorr = corr;

                bestOffs = i;

#ifdef SOUNDTOUCH_INTEGER_SAMPLES

    adaptNormalizer();

#endif

    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).

    clearCrossCorrState();

    return bestOffs;

// Quick seek algorithm for improved runtime-performance: First roughly scans through the

// correlation area, and then scan surroundings of two best preliminary correlation candidates

// with improved precision

//

// Based on testing:

// - This algorithm gives on average 99% as good match as the full algorithm

// - this quick seek algorithm finds the best match on ~90% of cases

// - on those 10% of cases when this algorithm doesn't find best match,

//   it still finds on average ~90% match vs. the best possible match

int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)

#define _MIN(a, b)   (((a) < (b)) ? (a) : (b))

#define SCANSTEP    16

#define SCANWIND    8

    int bestOffs;

    int i;

    int bestOffs2;

    float bestCorr, corr;

    float bestCorr2;

    double norm;

    // note: 'float' types used in this function in case that the platform would need to use software-fp

    bestCorr =

    bestCorr2 = -FLT_MAX;

    bestOffs =

    bestOffs2 = SCANWIND;

    // Scans for the best correlation value by testing each possible position

    // over the permitted range. Look for two best matches on the first pass to

    // increase possibility of ideal match.

//

    // Begin from "SCANSTEP" instead of SCANWIND to make the calculation

    // catch the 'middlepoint' of seekLength vector as that's the a-priori

    // expected best match position

//

    // Roughly:

    // - 15% of cases find best result directly on the first round,

    // - 75% cases find better match on 2nd round around the best match from 1st round

    // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round

    for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)

        // Calculates correlation value for the mixing position corresponding

        // to 'i'

        corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);

        // heuristic rule to slightly favour values close to mid of the seek range

        float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;

        corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));

        // Checks for the highest correlation value

        if (corr > bestCorr)

            // found new best match. keep the previous best as 2nd best match

            bestCorr2 = bestCorr;

            bestOffs2 = bestOffs;

            bestCorr = corr;

            bestOffs = i;

        else if (corr > bestCorr2)

            // not new best, but still new 2nd best match

            bestCorr2 = corr;

            bestOffs2 = i;

    // Scans surroundings of the found best match with small stepping

    int end = _MIN(bestOffs + SCANWIND + 1, seekLength);

    for (i = bestOffs - SCANWIND; i < end; i++)

        if (i == bestOffs) continue;    // this offset already calculated, thus skip

        // Calculates correlation value for the mixing position corresponding

        // to 'i'

        corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);

        // heuristic rule to slightly favour values close to mid of the range

        float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;

        corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));

        // Checks for the highest correlation value

        if (corr > bestCorr)

            bestCorr = corr;

            bestOffs = i;

    // Scans surroundings of the 2nd best match with small stepping

    end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);

    for (i = bestOffs2 - SCANWIND; i < end; i++)

        if (i == bestOffs2) continue;    // this offset already calculated, thus skip

        // Calculates correlation value for the mixing position corresponding

        // to 'i'

        corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);

        // heuristic rule to slightly favour values close to mid of the range

        float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;

        corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));

        // Checks for the highest correlation value

        if (corr > bestCorr)

            bestCorr = corr;

            bestOffs = i;

    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).

    clearCrossCorrState();

#ifdef SOUNDTOUCH_INTEGER_SAMPLES

    adaptNormalizer();

#endif

    return bestOffs;

/// For integer algorithm: adapt normalization factor divider with music so that

/// it'll not be pessimistically restrictive that can degrade quality on quieter sections

/// yet won't cause integer overflows either

void TDStretch::adaptNormalizer()

    // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to

    // too low values during pauses in music

    if ((maxnorm > 1000) || (maxnormf > 40000000))

        //norm averaging filter

        maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;

        if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))

            // large values, so increase divider

            overlapDividerBitsNorm++;

            if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase

        else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))

            // extra small values, decrease divider

            overlapDividerBitsNorm--;

    maxnorm = 0;

/// clear cross correlation routine state if necessary

void TDStretch::clearCrossCorrState()

    // default implementation is empty.

/// Calculates processing sequence length according to tempo setting

void TDStretch::calcSeqParameters()

    // Adjust tempo param according to tempo, so that variating processing sequence length is used

    // at various tempo settings, between the given low...top limits

    #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)

    #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)

    // sequence-ms setting values at above low & top tempo

    #define AUTOSEQ_AT_MIN      90.0

    #define AUTOSEQ_AT_MAX      40.0

    #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))

    #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))

    // seek-window-ms setting values at above low & top tempoq

    #define AUTOSEEK_AT_MIN     20.0

    #define AUTOSEEK_AT_MAX     15.0

    #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))

    #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))

    #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))

    double seq, seek;

    if (bAutoSeqSetting)

        seq = AUTOSEQ_C + AUTOSEQ_K * tempo;

        seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);

        sequenceMs = (int)(seq + 0.5);

    if (bAutoSeekSetting)

        seek = AUTOSEEK_C + AUTOSEEK_K * tempo;

        seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);

        seekWindowMs = (int)(seek + 0.5);

    // Update seek window lengths

    seekWindowLength = (sampleRate * sequenceMs) / 1000;

    if (seekWindowLength < 2 * overlapLength)

        seekWindowLength = 2 * overlapLength;

    seekLength = (sampleRate * seekWindowMs) / 1000;

// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower

// tempo, larger faster tempo.

void TDStretch::setTempo(double newTempo)

    int intskip;

    tempo = newTempo;

    // Calculate new sequence duration

    calcSeqParameters();

    // Calculate ideal skip length (according to tempo value)

    nominalSkip = tempo * (seekWindowLength - overlapLength);

    intskip = (int)(nominalSkip + 0.5);

    // Calculate how many samples are needed in the 'inputBuffer' to

    // process another batch of samples

    //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;

    sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;

// Sets the number of channels, 1 = mono, 2 = stereo

void TDStretch::setChannels(int numChannels)

    if (!verifyNumberOfChannels(numChannels) ||

        (channels == numChannels)) return;

    channels = numChannels;

    inputBuffer.setChannels(channels);

    outputBuffer.setChannels(channels);

    // re-init overlap/buffer

    overlapLength=0;

    setParameters(sampleRate);

// nominal tempo, no need for processing, just pass the samples through

// to outputBuffer

/*

void TDStretch::processNominalTempo()

    assert(tempo == 1.0f);

    if (bMidBufferDirty)

        // If there are samples in pMidBuffer waiting for overlapping,

        // do a single sliding overlapping with them in order to prevent a

        // clicking distortion in the output sound

        if (inputBuffer.numSamples() < overlapLength)

            // wait until we've got overlapLength input samples

            return;

        // Mix the samples in the beginning of 'inputBuffer' with the

        // samples in 'midBuffer' using sliding overlapping

        overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);

        outputBuffer.putSamples(overlapLength);

        inputBuffer.receiveSamples(overlapLength);

        clearMidBuffer();

        // now we've caught the nominal sample flow and may switch to

        // bypass mode

    // Simply bypass samples from input to output

    outputBuffer.moveSamples(inputBuffer);

*/

// Processes as many processing frames of the samples 'inputBuffer', store

// the result into 'outputBuffer'

void TDStretch::processSamples()

    int ovlSkip;

    int offset = 0;

    int temp;

    /* Removed this small optimization - can introduce a click to sound when tempo setting

       crosses the nominal value

    if (tempo == 1.0f)

        // tempo not changed from the original, so bypass the processing

        processNominalTempo();

        return;

*/

    // Process samples as long as there are enough samples in 'inputBuffer'

    // to form a processing frame.

    while ((int)inputBuffer.numSamples() >= sampleReq)

        if (isBeginning == false)

            // apart from the very beginning of the track,

            // scan for the best overlapping position & do overlap-add

            offset = seekBestOverlapPosition(inputBuffer.ptrBegin());

            // Mix the samples in the 'inputBuffer' at position of 'offset' with the

            // samples in 'midBuffer' using sliding overlapping

            // ... first partially overlap with the end of the previous sequence

            // (that's in 'midBuffer')

            overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);

            outputBuffer.putSamples((uint)overlapLength);

            offset += overlapLength;

        else

            // Adjust processing offset at beginning of track by not perform initial overlapping

            // and compensating that in the 'input buffer skip' calculation

            isBeginning = false;

            int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);

            #ifdef ST_SIMD_AVOID_UNALIGNED

            // in SIMD mode, round the skip amount to value corresponding to aligned memory address

            if (channels == 1)

                skip &= -4;

            else if (channels == 2)

                skip &= -2;

            #endif

            skipFract -= skip;

            if (skipFract <= -nominalSkip)

                skipFract = -nominalSkip;

        // ... then copy sequence samples from 'inputBuffer' to output:

        // crosscheck that we don't have buffer overflow...

        if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))

            continue;    // just in case, shouldn't really happen

        // length of sequence

        temp = (seekWindowLength - 2 * overlapLength);

        outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);

        // Copies the end of the current sequence from 'inputBuffer' to

        // 'midBuffer' for being mixed with the beginning of the next

        // processing sequence and so on

        assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());

        memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),

            channels * sizeof(SAMPLETYPE) * overlapLength);

        // Remove the processed samples from the input buffer. Update

        // the difference between integer & nominal skip step to 'skipFract'

        // in order to prevent the error from accumulating over time.

        skipFract += nominalSkip;   // real skip size

        ovlSkip = (int)skipFract;   // rounded to integer skip

        skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip

        inputBuffer.receiveSamples((uint)ovlSkip);

// Adds 'numsamples' pcs of samples from the 'samples' memory position into

// the input of the object.

void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)

    // Add the samples into the input buffer

    inputBuffer.putSamples(samples, nSamples);

    // Process the samples in input buffer

    processSamples();

/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.

void TDStretch::acceptNewOverlapLength(int newOverlapLength)

    int prevOvl;

    assert(newOverlapLength >= 0);

    prevOvl = overlapLength;

    overlapLength = newOverlapLength;

    if (overlapLength > prevOvl)

        delete[] pMidBufferUnaligned;

        pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];

        // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency

        pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);

        clearMidBuffer();

// Operator 'new' is overloaded so that it automatically creates a suitable instance

// depending on if we've a MMX/SSE/etc-capable CPU available or not.

void * TDStretch::operator new(size_t s)

    // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!

    ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");

    return newInstance();

TDStretch * TDStretch::newInstance()

#if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)

    uint uExtensions;

    uExtensions = detectCPUextensions();

#endif

    // Check if MMX/SSE instruction set extensions supported by CPU

#ifdef SOUNDTOUCH_ALLOW_MMX

    // MMX routines available only with integer sample types

    if (uExtensions & SUPPORT_MMX)

        return ::new TDStretchMMX;

    else

#endif // SOUNDTOUCH_ALLOW_MMX

#ifdef SOUNDTOUCH_ALLOW_SSE

    if (uExtensions & SUPPORT_SSE)

        // SSE support

        return ::new TDStretchSSE;

    else

#endif // SOUNDTOUCH_ALLOW_SSE

        // ISA optimizations not supported, use plain C version

        return ::new TDStretch;

//////////////////////////////////////////////////////////////////////////////

//

// Integer arithmetic specific algorithm implementations.

//

//////////////////////////////////////////////////////////////////////////////

#ifdef SOUNDTOUCH_INTEGER_SAMPLES

// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'

// version of the routine.

void TDStretch::overlapStereo(short *poutput, const short *input) const

    int i;

    short temp;

    int cnt2;

    for (i = 0; i < overlapLength ; i ++)

        temp = (short)(overlapLength - i);

        cnt2 = 2 * i;

        poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;

        poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;

// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'

// version of the routine.

void TDStretch::overlapMulti(short *poutput, const short *input) const

    short m1;

    int i = 0;

    for (m1 = 0; m1 < overlapLength; m1 ++)

        short m2 = (short)(overlapLength - m1);

        for (int c = 0; c < channels; c ++)

            poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;

            i++;

// Calculates the x having the closest 2^x value for the given value

static int _getClosest2Power(double value)

    return (int)(log(value) / log(2.0) + 0.5);

/// Calculates overlap period length in samples.

/// Integer version rounds overlap length to closest power of 2

/// for a divide scaling operation.

void TDStretch::calculateOverlapLength(int aoverlapMs)

    int newOvl;

    assert(aoverlapMs >= 0);

    // calculate overlap length so that it's power of 2 - thus it's easy to do

    // integer division by right-shifting. Term "-1" at end is to account for

    // the extra most significatnt bit left unused in result by signed multiplication

    overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;

    if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;

    if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;

    newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1);    // +1 => account for -1 above

    acceptNewOverlapLength(newOvl);

    overlapDividerBitsNorm = overlapDividerBitsPure;

    // calculate sloping divider so that crosscorrelation operation won't

    // overflow 32-bit register. Max. sum of the crosscorrelation sum without

    // divider would be 2^30*(N^3-N)/3, where N = overlap length

    slopingDivider = (newOvl * newOvl - 1) / 3;

double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)

    long corr;

    unsigned long lnorm;

    int i;

    #ifdef ST_SIMD_AVOID_UNALIGNED

        // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary

        if (((ulongptr)mixingPos) & 15) return -1e50;

    #endif

    // hint compiler autovectorization that loop length is divisible by 8

    int ilength = (channels * overlapLength) & -8;

    corr = lnorm = 0;

    // Same routine for stereo and mono

    for (i = 0; i < ilength; i += 2)

        corr += (mixingPos[i] * compare[i] +

                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;

        lnorm += (mixingPos[i] * mixingPos[i] +

                  mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;

        // do intermediate scalings to avoid integer overflow

    if (lnorm > maxnorm)

        // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode

        #pragma omp critical

        if (lnorm > maxnorm)

            maxnorm = lnorm;

    // Normalize result by dividing by sqrt(norm) - this step is easiest

    // done using floating point operation

    norm = (double)lnorm;

    return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);

/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value

double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)

    long corr;

    long lnorm;

    int i;

    // hint compiler autovectorization that loop length is divisible by 8

    int ilength = (channels * overlapLength) & -8;

    // cancel first normalizer tap from previous round

    lnorm = 0;

    for (i = 1; i <= channels; i ++)

        lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;

    corr = 0;

    // Same routine for stereo and mono.

    for (i = 0; i < ilength; i += 2)

        corr += (mixingPos[i] * compare[i] +

                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;

    // update normalizer with last samples of this round

    for (int j = 0; j < channels; j ++)

        i --;

        lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;

    norm += (double)lnorm;

    if (norm > maxnorm)

        maxnorm = (unsigned long)norm;

    // Normalize result by dividing by sqrt(norm) - this step is easiest

    // done using floating point operation

    return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);

#endif // SOUNDTOUCH_INTEGER_SAMPLES

//////////////////////////////////////////////////////////////////////////////

//

// Floating point arithmetic specific algorithm implementations.

//

#ifdef SOUNDTOUCH_FLOAT_SAMPLES

// Overlaps samples in 'midBuffer' with the samples in 'pInput'

void TDStretch::overlapStereo(float *pOutput, const float *pInput) const

    int i;

    float fScale;

    float f1;

    float f2;

    fScale = 1.0f / (float)overlapLength;

    f1 = 0;

    f2 = 1.0f;

    for (i = 0; i < 2 * (int)overlapLength ; i += 2)

        pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;

        pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;

        f1 += fScale;

        f2 -= fScale;

// Overlaps samples in 'midBuffer' with the samples in 'input'.

void TDStretch::overlapMulti(float *pOutput, const float *pInput) const

    int i;

    float fScale;

    float f1;

    float f2;

    fScale = 1.0f / (float)overlapLength;

    f1 = 0;

    f2 = 1.0f;

    i=0;

    for (int i2 = 0; i2 < overlapLength; i2 ++)

        // note: Could optimize this slightly by taking into account that always channels > 2

        for (int c = 0; c < channels; c ++)

            pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;

            i++;

        f1 += fScale;

        f2 -= fScale;

/// Calculates overlapInMsec period length in samples.

void TDStretch::calculateOverlapLength(int overlapInMsec)

    int newOvl;

    assert(overlapInMsec >= 0);

    newOvl = (sampleRate * overlapInMsec) / 1000;

    if (newOvl < 16) newOvl = 16;

    // must be divisible by 8

    newOvl -= newOvl % 8;

    acceptNewOverlapLength(newOvl);

/// Calculate cross-correlation

double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)

    float corr;

    float norm;

    int i;

    #ifdef ST_SIMD_AVOID_UNALIGNED

        // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary

        if (((ulongptr)mixingPos) & 15) return -1e50;

    #endif

    // hint compiler autovectorization that loop length is divisible by 8

    int ilength = (channels * overlapLength) & -8;

    corr = norm = 0;

    // Same routine for stereo and mono

    for (i = 0; i < ilength; i ++)

        corr += mixingPos[i] * compare[i];

        norm += mixingPos[i] * mixingPos[i];

    anorm = norm;

    return corr / sqrt((norm < 1e-9 ? 1.0 : norm));

/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value

double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)

    float corr;

    int i;

    corr = 0;

    // cancel first normalizer tap from previous round

    for (i = 1; i <= channels; i ++)

        norm -= mixingPos[-i] * mixingPos[-i];

    // hint compiler autovectorization that loop length is divisible by 8

    int ilength = (channels * overlapLength) & -8;

    // Same routine for stereo and mono

    for (i = 0; i < ilength; i ++)

        corr += mixingPos[i] * compare[i];

    // update normalizer with last samples of this round

    for (int j = 0; j < channels; j ++)

        i --;

        norm += mixingPos[i] * mixingPos[i];

    return corr / sqrt((norm < 1e-9 ? 1.0 : norm));

#endif // SOUNDTOUCH_FLOAT_SAMPLES