mpi_sparc.c - mozsearch

Enable keyboard shortcuts

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* Multiplication performance enhancements for sparc v8+vis CPUs. */

#include "mpi-priv.h"

#include <stddef.h>

#include <sys/systeminfo.h>

#include <strings.h>

/* In the functions below, */

/* vector y must be 8-byte aligned, and n must be even */

/* returns carry out of high order word of result */

/* maximum n is 256 */

/* vector x += vector y * scaler a; where y is of length n words. */

extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);

/* vector z = vector x + vector y * scaler a; where y is of length n words. */

extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,

                        int n, mp_digit a);

/* v8 versions of these functions run on any Sparc v8 CPU. */

/* This trick works on Sparc V8 CPUs with the Workshop compilers. */

#define MP_MUL_DxD(a, b, Phi, Plo)                              \

    {                                                           \

        unsigned long long product = (unsigned long long)a * b; \

        Plo = (mp_digit)product;                                \

        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \

/* c = a * b */

static void

v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

#if !defined(MP_NO_MP_WORD)

    mp_digit d = 0;

    /* Inner product:  Digits of a */

    while (a_len--) {

        mp_word w = ((mp_word)b * *a++) + d;

        *c++ = ACCUM(w);

        d = CARRYOUT(w);

    *c = d;

#else

    mp_digit carry = 0;

    while (a_len--) {

        mp_digit a_i = *a++;

        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;

        if (a0b0 < carry)

            ++a1b1;

        *c++ = a0b0;

        carry = a1b1;

    *c = carry;

#endif

/* c += a * b */

static void

v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

#if !defined(MP_NO_MP_WORD)

    mp_digit d = 0;

    /* Inner product:  Digits of a */

    while (a_len--) {

        mp_word w = ((mp_word)b * *a++) + *c + d;

        *c++ = ACCUM(w);

        d = CARRYOUT(w);

    *c = d;

#else

    mp_digit carry = 0;

    while (a_len--) {

        mp_digit a_i = *a++;

        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;

        if (a0b0 < carry)

            ++a1b1;

        a0b0 += a_i = *c;

        if (a0b0 < a_i)

            ++a1b1;

        *c++ = a0b0;

        carry = a1b1;

    *c = carry;

#endif

/* Presently, this is only used by the Montgomery arithmetic code. */

/* c += a * b */

static void

v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

#if !defined(MP_NO_MP_WORD)

    mp_digit d = 0;

    /* Inner product:  Digits of a */

    while (a_len--) {

        mp_word w = ((mp_word)b * *a++) + *c + d;

        *c++ = ACCUM(w);

        d = CARRYOUT(w);

    while (d) {

        mp_word w = (mp_word)*c + d;

        *c++ = ACCUM(w);

        d = CARRYOUT(w);

#else

    mp_digit carry = 0;

    while (a_len--) {

        mp_digit a_i = *a++;

        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;

        if (a0b0 < carry)

            ++a1b1;

        a0b0 += a_i = *c;

        if (a0b0 < a_i)

            ++a1b1;

        *c++ = a0b0;

        carry = a1b1;

    while (carry) {

        mp_digit c_i = *c;

        carry += c_i;

        *c++ = carry;

        carry = carry < c_i;

#endif

/* These functions run only on v8plus+vis or v9+vis CPUs. */

/* c = a * b */

void

s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

    mp_digit d;

    mp_digit x[258];

    if (a_len <= 256) {

        if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

            mp_digit *px;

            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

            memcpy(px, a, a_len * sizeof(*a));

            a = px;

            if (a_len & 1) {

                px[a_len] = 0;

        s_mp_setz(c, a_len + 1);

        d = mul_add_inp(c, a, a_len, b);

        c[a_len] = d;

    } else {

        v8_mpv_mul_d(a, a_len, b, c);

/* c += a * b, where a is a_len words long. */

void

s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

    mp_digit d;

    mp_digit x[258];

    if (a_len <= 256) {

        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

            mp_digit *px;

            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

            memcpy(px, a, a_len * sizeof(*a));

            a = px;

            if (a_len & 1) {

                px[a_len] = 0;

        d = mul_add_inp(c, a, a_len, b);

        c[a_len] = d;

    } else {

        v8_mpv_mul_d_add(a, a_len, b, c);

/* c += a * b, where a is y words long. */

void

s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

    mp_digit d;

    mp_digit x[258];

    if (a_len <= 256) {

        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

            mp_digit *px;

            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

            memcpy(px, a, a_len * sizeof(*a));

            a = px;

            if (a_len & 1) {

                px[a_len] = 0;

        d = mul_add_inp(c, a, a_len, b);

        if (d) {

            c += a_len;

            do {

                mp_digit sum = d + *c;

                *c++ = sum;

                d = sum < d;

            } while (d);

    } else {

        v8_mpv_mul_d_add_prop(a, a_len, b, c);