2021-10-11 18:20:23 -03:00

278 lines
5.1 KiB
ArmAsm

.seg "data"
.asciz "@(#)fmod.S 1.1 92/07/30 SMI"
#define LOCORE
#include <machine/asm_linkage.h>
! Copyright (c) 1988 by Sun Microsystems, Inc.
!
! double fmod(x,y)
! Algorithm see libm/C/fmod.c
! Note: inner loop is optimized, may look different from fmod.c.
!
.seg "data"
.align 8
constant:
two110 = 0x0
.word 0x46d00000,0x0
twom110 = 0x8
.word 0x39100000,0x0
! local variable using fp
x = -0x8
y = -0x10
! local register usage
! i0 = high x i1 = low x
! i2 = high y i3 = low y
! o0,o1 = x0,x1
! o2,o3 = y0,y1
! o4,o5 = z0,z1
! l0 = constant
! l1 = 0x80000000
! l2 = 0x7ff00000
! l3 = 0x00100000
! l4 = ia
! l5 = ib
! l6 = n
! l7 = k = n&3
! i4,i5 ...scratch register
!
.seg "text"
ENTRY(fmod)
fmod_in: ! label to avoid multi-profiling
save %sp,-128,%sp
set 0x80000000,%l1
andn %i0,%l1,%o0 ! o0 = |x|
andn %i2,%l1,%o2 ! o2 = |y|
set 0x7ff00000,%l2
! purge off exception values
cmp %o0,%l2
bge fmod_exception ! x is inf or NaN, goto exception
orcc %o2,%i3,%g0
be fmod_exception ! y is zero, goto exception
cmp %o2,%l2
bl T1
nop
bg fmod_exception ! y is NaN, goto exception
tst %i3
bne fmod_exception ! y is NaN, goto exception
nop
! y is inf, x is finite , simply return x
T0:
std %i0,[%fp+x]
ba fmod_return
ldd [%fp+x],%f0
T1:
cmp %o0,%o2
bg T2
nop
bl T0
cmp %i1,%i3
blu T0
nop
be fmod_zero
nop
T2:
set 0x03600000,%i4
cmp %o2,%i4
bl fmod_tiny
and %i0,%l2,%l4
and %i2,%l2,%l5
sub %l4,%l5,%l6
srl %l6,20,%l6
set 0x00100000,%l3
andn %o0,%l2,%o0
andn %o2,%l2,%o2
or %l3,%o0,%o0
or %l3,%o2,%o2
and %l6,3,%l7 ! l7 = k = n&3
srl %l6,2,%l6 ! n = n>>2, unroll loop 4 times
! now do fix point fmod
mov %i1,%o1
mov %i3,%o3
tst %l6
ble T3_end
subcc %o1,%o3,%o5 ! z1 = x1-y1 cc
T3_loop:
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %o1,%o3,%o5 ! z1 = x1-y1
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %o1,%o3,%o5 ! z1 = x1-y1
3:
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %o1,%o3,%o5 ! z1 = x1-y1
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %o1,%o3,%o5 ! z1 = x1-y1
3:
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %o1,%o3,%o5 ! z1 = x1-y1
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %o1,%o3,%o5 ! z1 = x1-y1
3:
! final section in loop
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %l6,1,%l6
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %l6,1,%l6
3: bg,a T3_loop
subcc %o1,%o3,%o5
T3_end:
! new sub 1 from k
subcc %l7,1,%l7
bl T4
subcc %o1,%o3,%o5
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %l7,1,%l7
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %l7,1,%l7
3: bl T4
subcc %o1,%o3,%o5 ! z1 = x1-y1
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %l7,1,%l7
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %l7,1,%l7
3: bl T4
subcc %o1,%o3,%o5 ! z1 = x1-y1
subxcc %o0,%o2,%o4 ! z0 = x0-y0-b
bl,a 1f
addcc %o1,%o1,%o1 ! x1 = x1+x1 cc
addcc %o5,%o5,%o1 ! x1 = z1+z1 cc
bne 2f
addxcc %o4,%o4,%o0 ! x0 = z0+z0+c cc
bne 3f
subcc %o1,%o3,%o5 ! z1 = x1-y1
ba fmod_zero
nop
1: addx %o0,%o0,%o0 ! x0 = x0+x0+c
2: subcc %o1,%o3,%o5 ! z1 = x1-y1
3:
T4:
subxcc %o0,%o2,%o4
bl 3f
nop
mov %o4,%o0
mov %o5,%o1
3:
orcc %o0,%o1,%g0
bne,a 4f
nop
and %i0,%l1,%o0
std %o0,[%fp+x]
ba fmod_return
ldd [%fp+x],%f0
4:
! convert back to floating point
andcc %o0,%l3,%g0
bne T6
nop
T5:
addcc %o1,%o1,%o1
addx %o0,%o0,%o0
andcc %o0,%l3,%g0
be T5
sub %l5,%l3,%l5
T6:
and %i0,%l1,%i4 ! i4 = sign(x)
or %l5,%i4,%l5 ! ib |= sign(x)
andn %o0,%l2,%o0 ! clear exponent field of x
or %l5,%o0,%o0
std %o0,[%fp+x]
ldd [%fp+x],%f0
fmod_return:
ret
restore
fmod_zero:
and %i0,%l1,%o4
st %o4,[%fp+x]
st %g0,[%fp+x+4]
ba fmod_return
ldd [%fp+x],%f0
fmod_tiny:
! |y| < 2^969, return 2^-110*fmod(2^110*fmod(x,2^110*y),2^110*y)
mov %i0,%o0
mov %i1,%o1
mov %i3,%o3
set constant,%l0
ldd [%l0+two110],%f2
std %o2,[%fp+y]
ldd [%fp+y],%f0
fmuld %f0,%f2,%f0
std %f0,[%fp+y]
ldd [%fp+y],%o2 ! 2^110*y
call fmod_in
nop
ldd [%l0+two110],%f2
fmuld %f2,%f0,%f0 ! 2^110*fmod(x,2^110*y)
ldd [%fp+y],%o2 ! 2^110*y
std %f0,[%fp+x]
ldd [%fp+x],%o0
call fmod_in
nop
ldd [%l0+twom110],%f2
fmuld %f0,%f2,%f0
ba fmod_return
nop
fmod_exception:
std %i0,[%fp+x]
std %i2,[%fp+y]
ldd [%fp+x],%f0
ldd [%fp+y],%f2
fmuld %f0,%f2,%f0
fdivd %f0,%f0,%f0
ba fmod_return
nop