Files
seta75D 2e8a93c394 Init
2021-10-11 18:20:23 -03:00

308 lines
7.7 KiB
ArmAsm

.seg "data"
.asciz "@(#)sqrt.S 1.1 92/07/30 SMI"
#define LOCORE
#include <machine/asm_linkage.h>
! Copyright (c) 1988 by Sun Microsystems, Inc.
!
! double sqrt(x)
! Double precision IEEE sqrt on Sunrise
! Code by K.C. Ng, March 9, 1987, revised on May 14,1988.
!
! Sqrt(x) returns the correctly rounded (according to the prevailing
! rounding mode) square root of x.
!
! Algorithm:
! 1. Shift and add to get 7.8 bits approximation of 1/sqrt(x).
! 2. Apply reciproot iteration 2 times, then switch to newton iteration
! once to get an approximation of sqrt(x) to one ulp.
! 3. Final adjustment using division to get correctly rounded sqrt(x).
!
.seg "data"
.align 8
constant:
twom57 = 0x0
.word 0x3c600000,0x0 ! 2**-57
two54 = 0x8
.word 0x43500000,0x0 ! = 2**54
twom27 = 0x10
.word 0x3e400000,0x0 ! = 2**-27
half = 0x18
.word 0x3fe00000,0x0 ! = 0.5
one = 0x20
.word 0x3ff00000,0x0 ! = 1.0
onehalf = 0x28
.word 0x3ff80000,0x0 ! = 1.5
onemulp = 0x30
.word 0x3fefffff,0xffffffff ! = 1-2**-53
table = 0x38
.word 0x1500,0x2ef8,0x4d67,0x6b02,0x87be,0xa395,0xbe7a,0xd866
.word 0xf14a,0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34
.word 0x17e5f,0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01
.word 0x1bfde,0x1c28d,0x1c2de,0x1c0db,0x1ba7e,0x1b11c,0x1a4b5
.word 0x1953d,0x18266,0x16be0,0x1683e,0x179d8,0x18a4d,0x19992
.word 0x1a789,0x1b445,0x1bf61,0x1c989,0x1d16d,0x1d77b,0x1dddf
.word 0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,0x1df2a,0x1d635
.word 0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,0x1527f
.word 0x1334a,0x11051,0xe951,0xbe01,0x8e0d,0x5924,0x1edd
! local variable using fp
x = -0x8
y = -0x10
tmp = -0x18
ofsr = -0x20
nfsr = -0x24
tfsr = -0x28
.seg "text"
.global _SVID_libm_err
ENTRY(sqrt)
insqrt:
save %sp,-144,%sp
set constant,%l0 ! l0 = address of constant
std %i0,[%fp+x] ! save x
sethi %hi(0x3ff00000),%o4 ! o4 = 0x3ff00000
subcc %i0,%o4,%o4
bne 1f
addcc %o4,1,%o4
! check whether x < 1+2**-27
sethi %hi(0x02000000),%o3
cmp %i1,%o3
bgu 2f
tst %i1
bne quickcrank
! x = 1, return x
nop
ba sqrt_return
ldd [%fp+x],%f0 ! load x
! x = 1 +- tiny with tiny < 2**-27
quickcrank:
ldd [%l0+twom57],%f2 ! f2 = 2**-57
sra %i1,1,%o2
st %o2,[%fp+x+4]
andcc %i1,1,%g0 ! x even?
bne odd
ldd [%fp+x],%f0
fnegs %f2,%f2 ! even
odd:
faddd %f0,%f2,%f0
ba sqrt_return
nop
1:
bne 2f
sethi %hi(0x7ff00000),%o4
sethi %hi(0xfc000000),%o3
cmp %i1,%o3
bgu quickcrank
nop
2:
sethi %hi(0x00100000),%o3
and %i0,%o4,%o2
cmp %o2,%o3
bg 1f
sethi %hi(0x80000000),%o3 ! o3 = 0x80000000
! x is 0 or very tiny, but may not be subnormal
andn %i0,%o3,%o2 ! purging signed zero
orcc %o2,%i1,%g0
be quickreturn ! x is +-zero, return x+x
tst %i0
bl NaN ! x < 0, return NaN
nop
! tiny positive x
ldd [%l0+two54],%f2 ! set f2 = 2**54
ldd [%fp+x],%f0 ! load x
fmuld %f0,%f2,%f0 ! scale up x by 2**54
std %f0,[%fp+tmp]
ldd [%fp+tmp],%o0 ! get High part of new x
call insqrt ! call sqrt again, avoid profiling
nop
ldd [%l0+twom27],%f2 ! f2 = 2**-(27)
fmuld %f0,%f2,%f0 ! scale down sqrt(x) by 2**27
ba sqrt_return
nop
1:
! finite x, 0.5*x will not underflow
tst %i0
bl NaN ! x < 0 return NaN
cmp %o2,%o4 ! here o4 = 0x7ff00000
be quickreturn ! x must be NaN or +INF; return x+x
nop
! now for reciproot algorithm ...
! save RD, NX flags; reset RD to RN; disable NX; save f4-f11
st %fsr,[%fp+ofsr] ! save fsr
ld [%fp+ofsr],%o2 ! o2 = fsr
sethi %hi(0xff800000),%o3 ! set mask to reset TEM,RP and RD
andn %o2,%o3,%o2 ! fsr with everything clear
sethi %hi(0x40000000),%o3 ! RD = RZ
or %o2,%o3,%o2 ! new fsr with RD = RZ
st %o2,[%fp+nfsr]
st %g0,[%fp+tfsr]
ld [%fp+tfsr],%fsr ! disable TEM, and set RD to RN
ldd [%fp+x],%f0
ldd [%l0+half],%f2
fmuld %f0,%f2,%f2 ! f2 = 0.5*x
! initial approximation for 1/sqrt(x)
srl %i0,1,%o0 ! high x >> 1
sethi %hi(0x5fe80000),%o1 ! offset constant
sub %o1,%o0,%o0 ! o0 = k = 0x5fe80000 - (hx >> 1)
add %l0,table,%o2 ! o2 = address of data table
srl %o0,12,%o1 ! o1 := 4*(k >> 14)
and %o1,0xfc,%o1 ! o1 := 4*[63&(o0>>14)]
add %o2,%o1,%o2 ! o2 = address of the adjusting data
ld [%o2],%o3 ! o3 = adjustment for the approximation
sub %o0,%o3,%o0 ! high y ~ 1/sqrt(x) to 7.8 bits
st %o0,[%fp+y] ! tmp = y
ldd [%fp+y],%f10 ! f10 = y
! reciproot iteration once
fmuld %f2,%f10,%f4 ! f4 = (0.5*x)*y
fmuld %f10,%f4,%f6 ! f6 = ((0.5*x)*y)*y)
ldd [%l0+onehalf],%f8 ! f8 = 1.5
fsubd %f8,%f6,%f6 ! f6 = 1.5-0.5*x*y*y
fmuld %f10,%f6,%f4 ! f4 = y' = y*(1.5-0.5*x*y*y)
! reciproot iteration twice
sethi %hi(0x00100000),%o4
fmuld %f0,%f4,%f6 ! f6 = x*y
std %f4,[%fp+tmp]
ld [%fp+tmp],%o3 ! o3 = Hy
sub %o3,%o4,%o3 ! o3 = H(0.5*y)
st %o3,[%fp+tmp]
ldd [%fp+tmp],%f8 ! f8 = 0.5*y
fmuld %f8,%f6,%f8 ! f8 = 0.5*x*y*y
ldd [%l0+onehalf],%f10 ! f10 = 1.5
fsubd %f10,%f8,%f8 ! f8 = 1.5-0.5*x*y*y
fmuld %f6,%f8,%f4 ! f4 = (x*y)*(1.5-0.5*x*y*y)
! now f4 is 29 bit to sqrt(x)
! Now use Newton iteration, at the same time determine RD
fdivd %f2,%f4,%f6 ! f6 = (0.5*x)/y rounded
ld [%fp+ofsr],%o4 ! o4 = old fsr
or %o4,0x21,%o0
st %o0,[%fp+tfsr] ! old fsr with inexact accured
srl %o4,30,%o3 ! o3 = RD
sethi %hi(0xf0000000),%o2 ! o2 = mask for RD and RP
andn %o4,%o2,%o4
sethi %hi(0x40000000),%o2
or %o4,%o2,%o4 ! old fsr with RD=RZ and RP=0
or %o4,0x20,%o4
xor %o4,0x20,%o4 ! with aexc (inexact)=0
st %o4,[%fp+nfsr]
std %f4,[%fp+tmp]
ld [%fp+tmp],%o0 ! o0 = Hy
sethi %hi(0x00100000),%o1
sub %o0,%o1,%o0 ! o0 = H(0.5*y)
st %o0,[%fp+tmp]
ldd [%fp+tmp],%f10 ! f10 = 0.5*y
faddd %f6,%f10,%f4 ! f4 = sqrt(x) to 1 ulp
! Twisted last bit to make sqrt(x) correctly rounded
ld [%fp+nfsr],%fsr ! restore INEXACT flags, RD=RZ
nop ! three nop after ldfsr
nop
nop
fdivd %f2,%f4,%f0 ! f6 = z = (0.5*x)/y chopped quotient
std %f4,[%fp+tmp] ! y is f4
ldd [%fp+tmp],%o0
sethi %hi(0x00100000),%o4
sub %o0,%o4,%o0 ! y = y/2
st %o0,[%fp+tmp]
ldd [%fp+tmp],%f2 ! f2 = y/2
addcc %o1,1,%o1
addx %o0,%g0,%o0 ! y/2+ulp
std %o0,[%fp+tmp] ! now tmp+4 = y/2+ulp
ldd [%l0+onemulp],%f4 ! f4 = 0.9999999.... chopped
set 0x20,%o4 ! o4 = inexact accrued mask
! o3 = RD from above
std %f0,[%fp+x] ! make sure div is finished
st %fsr,[%fp+nfsr]
fmuld %f0,%f4,%f4 ! f4 = Z-1
ld [%fp+nfsr],%o0 ! o0 = current fsr
andcc %o0,%o4,%o0 ! see if inexact flags was up
bne 1f
nop
! exact division, if z=y then exit
fcmpd %f0,%f2
nop
fbne 2f
nop
ld [%fp+ofsr],%fsr ! restore old fsr
nop ! three nop after ldfsr
nop
nop
faddd %f0,%f0,%f0
ba sqrt_return
nop
2:
! z!=y, set z=z-1
fmovs %f4,%f0
fmovs %f5,%f1
std %f4,[%fp+x]
1:
andcc %o3,1,%g0 ! test RD
bne 1f ! goto 1f if RD is RZ or RM
ld [%fp+x],%o4
ld [%fp+x+4],%o2
addcc %o2,1,%o2
addx %o4,%g0,%o4
st %o4,[%fp+x]
st %o2,[%fp+x+4] ! fp+x store z+ulp
andcc %o3,2,%g0
be 1f ! if RN goto 1f
ldd [%fp+x],%f0
ldd [%fp+tmp],%f2 ! RP: f2 <- y+ulp
1:
faddd %f0,%f2,%f0
nop
nop
ld [%fp+tfsr],%fsr
sqrt_return:
ret
restore
quickreturn:
ldd [%fp+x],%f0
ba sqrt_return
faddd %f0,%f0,%f0
NaN:
! reload x in %f0
ldd [%fp+x],%f0
! purge off NaN argument
ldd [%fp+x],%o0
set 0xfff00000,%o2
and %o0,%o2,%o3
cmp %o3,%o2
bne invalid
andn %o0,%o2,%o0
orcc %o0,%o1,%g0
be invalid
nop
! input is -NaN, just return x+x
ba sqrt_return
faddd %f0,%f0,%f0
! result is invalid
invalid:
! fsubd %f0,%f0,%f0
! fdivd %f0,%f0,%f0 ! 0/0 create invalid signal
!
! Call SVID_libm_err to create invalid signal
!
! set input parameter for system V error handling routine
ldd [%fp+x],%o0
mov %o0,%o2
mov %o1,%o3
set 26,%o4 ! private error type 1: sqrt(-ve)
! call system V error handling routine
call _SVID_libm_err
nop
ba sqrt_return
nop
!
! end of codes for System V
!