Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <regdef.h>
.set noreorder
.set noat
.section .text, 1, 0x00000006, 4, 4
.text:
.section .text
.ent s_mpv_mul_d_add
.globl s_mpv_mul_d_add
s_mpv_mul_d_add:
#/* c += a * b */
#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit c0, c1; regs a6, a7
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.L.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.L.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.L.3
# a1 = a[1];
lwu a5,4(a0)
.L.4:
# a_len -= 2;
addiu a1,a1,-2
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
mflo t0
daddu t0,t0,t2
# w0 += c0;
daddu t0,t0,a6
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# w0 = (mp_word)b * a0;
dmultu a2,a4 #
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.L.4
# a1 = a[1];
lwu a5,4(a0)
# }
.L.3:
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.L.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5
# w0 += c0;
daddu t0,t0,a6 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .L.6
addiu a3,a3,4
# } else {
.L.5:
# w0 += c0;
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
b .L.6
dsrl32 t2,t0,0
# }
# } else {
.L.2:
# c0 = c[0];
lwu a6,0(a3)
# w0 += c0;
mflo t0
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# }
.L.6:
# c[1] = cy;
jr ra
sw t2,4(a3)
# }
.L.1:
jr ra
nop
#}
#
.end s_mpv_mul_d_add
.ent s_mpv_mul_d_add_prop
.globl s_mpv_mul_d_add_prop
s_mpv_mul_d_add_prop:
#/* c += a * b */
#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit c0, c1; regs a6, a7
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.M.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.M.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.M.3
# a1 = a[1];
lwu a5,4(a0)
.M.4:
# a_len -= 2;
addiu a1,a1,-2
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
mflo t0
daddu t0,t0,t2
# w0 += c0;
daddu t0,t0,a6
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# w0 = (mp_word)b * a0;
dmultu a2,a4 #
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.M.4
# a1 = a[1];
lwu a5,4(a0)
# }
.M.3:
# c0 = c[0];
lwu a6,0(a3)
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.M.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5
# w0 += c0;
daddu t0,t0,a6 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# c1 = c[1];
lwu a7,4(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# w1 += c1;
daddu t1,t1,a7
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .M.6
addiu a3,a3,8
# } else {
.M.5:
# w0 += c0;
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
b .M.6
addiu a3,a3,4
# }
# } else {
.M.2:
# c0 = c[0];
lwu a6,0(a3)
# w0 += c0;
mflo t0
daddu t0,t0,a6
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
addiu a3,a3,4
# }
.M.6:
# while (cy) {
beq t2,zero,.M.1
nop
.M.7:
# mp_word w = (mp_word)*c + cy;
lwu a6,0(a3)
daddu t2,t2,a6
# *c++ = ACCUM(w);
sw t2,0(a3)
# cy = CARRYOUT(w);
dsrl32 t2,t2,0
bne t2,zero,.M.7
addiu a3,a3,4
# }
.M.1:
jr ra
nop
#}
#
.end s_mpv_mul_d_add_prop
.ent s_mpv_mul_d
.globl s_mpv_mul_d
s_mpv_mul_d:
#/* c = a * b */
#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
# mp_digit *c)
#{
# mp_digit a0, a1; regs a4, a5
# mp_digit cy = 0; reg t2
# mp_word w0, w1; regs t0, t1
#
# if (a_len) {
beq a1,zero,.N.1
move t2,zero # cy = 0
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
dsrl32 a2,a2,0 # This clears the upper 32 bits.
# a0 = a[0];
lwu a4,0(a0)
# w0 = ((mp_word)b * a0);
dmultu a2,a4
# if (--a_len) {
addiu a1,a1,-1
beq a1,zero,.N.2
# while (a_len >= 2) {
sltiu t3,a1,2
bne t3,zero,.N.3
# a1 = a[1];
lwu a5,4(a0)
.N.4:
# a_len -= 2;
addiu a1,a1,-2
# w0 += cy;
mflo t0
daddu t0,t0,t2
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# w1 = (mp_word)b * a1;
dmultu a2,a5
# c[0] = ACCUM(w0);
sw t0,0(a3)
# a0 = a[2];
lwu a4,8(a0)
# a += 2;
addiu a0,a0,8
# w1 += cy;
mflo t1
daddu t1,t1,t2
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# w0 = (mp_word)b * a0;
dmultu a2,a4
# c[1] = ACCUM(w1);
sw t1,4(a3)
# c += 2;
addiu a3,a3,8
sltiu t3,a1,2
beq t3,zero,.N.4
# a1 = a[1];
lwu a5,4(a0)
# }
.N.3:
# w0 += cy;
# if (a_len) {
mflo t0
beq a1,zero,.N.5
daddu t0,t0,t2
# w1 = (mp_word)b * a1;
dmultu a2,a5 #
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# w1 += cy;
mflo t1
daddu t1,t1,t2
# c[1] = ACCUM(w1);
sw t1,4(a3)
# cy = CARRYOUT(w1);
dsrl32 t2,t1,0
# c += 1;
b .N.6
addiu a3,a3,4
# } else {
.N.5:
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
b .N.6
dsrl32 t2,t0,0
# }
# } else {
.N.2:
mflo t0
# c[0] = ACCUM(w0);
sw t0,0(a3)
# cy = CARRYOUT(w0);
dsrl32 t2,t0,0
# }
.N.6:
# c[1] = cy;
jr ra
sw t2,4(a3)
# }
.N.1:
jr ra
nop
#}
#
.end s_mpv_mul_d
.ent s_mpv_sqr_add_prop
.globl s_mpv_sqr_add_prop
#void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
# registers
# a0 *a
# a1 a_len
# a2 *sqr
# a3 digit from *a, a_i
# a4 square of digit from a
# a5,a6 next 2 digits in sqr
# a7,t0 carry
s_mpv_sqr_add_prop:
move a7,zero
move t0,zero
lwu a3,0(a0)
addiu a1,a1,-1 # --a_len
dmultu a3,a3
beq a1,zero,.P.3 # jump if we've already done the only sqr
addiu a0,a0,4 # ++a
.P.2:
lwu a5,0(a2)
lwu a6,4(a2)
addiu a2,a2,8 # sqrs += 2;
dsll32 a6,a6,0
daddu a5,a5,a6
lwu a3,0(a0)
addiu a0,a0,4 # ++a
mflo a4
daddu a6,a5,a4
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
dmultu a3,a3
daddu a4,a6,t0
sltu t0,a4,a6
add t0,t0,a7
sw a4,-8(a2)
addiu a1,a1,-1 # --a_len
dsrl32 a4,a4,0
bne a1,zero,.P.2 # loop if a_len > 0
sw a4,-4(a2)
.P.3:
lwu a5,0(a2)
lwu a6,4(a2)
addiu a2,a2,8 # sqrs += 2;
dsll32 a6,a6,0
daddu a5,a5,a6
mflo a4
daddu a6,a5,a4
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
daddu a4,a6,t0
sltu t0,a4,a6
add t0,t0,a7
sw a4,-8(a2)
beq t0,zero,.P.9 # jump if no carry
dsrl32 a4,a4,0
.P.8:
sw a4,-4(a2)
/* propagate final carry */
lwu a5,0(a2)
daddu a6,a5,t0
sltu t0,a6,a5
bne t0,zero,.P.8 # loop if carry persists
addiu a2,a2,4 # sqrs++
.P.9:
jr ra
sw a4,-4(a2)
.end s_mpv_sqr_add_prop