Files
seta75D 2e8a93c394 Init
2021-10-11 18:20:23 -03:00

1228 lines
27 KiB
ArmAsm

! @(#)crt.s 1.1 92/07/30 SMI
! Copyright (c) 1988 by Sun Microsystems, Inc.
.seg "text"
.align 4
#include <machine/asm_linkage.h>
#include <machine/trap.h>
/*
* C run time subroutines.
*/
/*
* procedure to perform a 32 by 32 multiply.
* pass the multiplier into %o0, and the multiplicand into %o1
* the least significant 32 bits of the result will be returned in %o0,
* and the most significant in %o1
*
* This code has an optimization built in for short (less than 13 bit)
* multipliers. Short multipliers require 26 or 27 instruction cycles, and
* long ones require 47 to 51 instruction cycles. For two positive numbers,
* the most common case, a long multiply takes 47 instruction cycles.
*
* This code indicates that overflow has occured, by leaving the Z condition
* code clear. The following call sequence would be used if you wish to
* deal with overflow:
*
* call .mul
* nop ( or set up last parameter here )
* bnz overflow_code (or tnz to overflow handler)
*/
RTENTRY(.mul)
mov %o0, %y ! multiplier to Y register
andncc %o0, 0xfff, %g0 ! mask out lower 12 bits
be mul_shortway ! can do it the short way
andcc %g0, %g0, %o4 ! zero the partial product
! and clear N and V conditions
!
! long multiply
!
mulscc %o4, %o1, %o4 ! first iteration of 33
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4 ! 32nd iteration
mulscc %o4, %g0, %o4 ! last iteration only shifts
!
! if %o0 (multiplier) was negative, the result is
! (%o0 * %o1) + %o1 * (2**32)
! we fix that here
!
tst %o0
rd %y, %o0
bge 1f
tst %o0 ! for when we check for overflow
sub %o4, %o1, %o4 ! bit 33 and up of the product are in
! %o4, so we don't have to shift %o1
!
! We haven't overflowed if:
! low-order bits are positive and high-order bits are 0
! low-order bits are negative and high-order bits are -1
!
! if you are not interested in detecting overflow,
! replace the following code with:
!
! 1: jmp %o7+8
! mov %o4, %o1
!
1:
bge 2f ! if low-order bits were positive.
addcc %o4, %g0, %o1 ! return most sig. bits of prod and set
! Z appropriately (for positive product)
jmp %o7+8
subcc %o4, -1, %g0 ! set Z if high order bits are -1 (for
! negative product)
2:
jmp %o7+8 ! leaf routine return
nop
!
! short multiply
!
mul_shortway:
mulscc %o4, %o1, %o4 ! first iteration of 13
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4 ! 12th iteration
mulscc %o4, %g0, %o4 ! last iteration only shifts
rd %y, %o5
sll %o4, 12, %o0 ! left shift middle bits by 12 bits
srl %o5, 20, %o5 ! right shift low bits by 20 bits
!
! We haven't overflowed if:
! low-order bits are positive and high-order bits are 0
! low-order bits are negative and high-order bits are -1
!
! if you are not interested in detecting overflow,
! replace the following code with:
!
! or %o5, %o4, %o0
! jmp %o7+8
! mov %o4, %o1
!
orcc %o5, %o0, %o0 ! merge for true product
bge 3f ! if low-order bits were positive.
sra %o4, 20, %o1 ! right shift high bits by 20 bits
! and put into %o1
jmp %o7+8
subcc %o1, -1, %g0 ! set Z if high order bits are -1 (for
! negative product)
3:
jmp %o7+8 ! leaf routine return
addcc %o1, %g0, %g0 ! set Z if high order bits are 0
/*
* procedure to perform a 32 by 32 unsigned multiply.
* pass the multiplier into %o0, and the multiplicand into %o1
* the least significant 32 bits of the result will be returned in %o0,
* and the most significant in %o1
*
* This code has an optimization built in for short (less than 13 bit)
* multiplies. Short multiplies require 25 instruction cycles, and long ones
* require 46 or 48 instruction cycles.
*
* This code indicates that overflow has occured, by leaving the Z condition
* code clear. The following call sequence would be used if you wish to
* deal with overflow:
*
* call .umul
* nop ( or set up last parameter here )
* bnz overflow_code (or tnz to overflow handler)
*/
RTENTRY(.umul)
or %o0, %o1, %o4 ! logical or of multiplier
! and multiplcand
mov %o0, %y ! multiplier to Y register
andncc %o4, 0xfff, %o5 ! mask out lower 12 bits
be umul_shortway ! can do it the short way
andcc %g0, %g0, %o4 ! zero the partial product
! and clear N and V conditions
!
! long multiply
!
mulscc %o4, %o1, %o4 ! first iteration of 33
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4 ! 32nd iteration
mulscc %o4, %g0, %o4 ! last iteration only shifts
!
! Normally, with the shifty-add approach, if both numbers are positive,
! you get the correct result. With 32-bit twos-complement numbers,
! -x can be represented as ((2 - (x/(2**32)) mod 2) * 2**32. To avoid
! a lot of 2**32's, we can just move the radix point up to be just
! to the left of the sign bit. So:
!
! x * y = (xy) mod 2
! -x * y = (2 - x) mod 2 * y = (2y - xy) mod 2
! x * -y = x * (2 - y) mod 2 = (2x - xy) mod 2
! -x * -y = (2 - x) * (2 - y) = (4 - 2x - 2y + xy) mod 2
!
! For signed multiplies, we subtract (2**32) * x from the partial
! product to fix this problem for negative multipliers (see multiply.s)
! Because of the way the shift into the partial product is calculated
! (N xor V), this term is automatically removed for the multiplicand,
! so we don't have to adjust.
!
! But for unsigned multiplies, the high order bit wasn't a sign bit,
! and the correction is wrong. So for unsigned multiplies where the
! high order bit is one, we end up with xy - (2**32) * y. To fix it
! we add y * (2**32).
!
tst %o1
bge 1f
nop
add %o4, %o0, %o4
1:
rd %y, %o0 ! return least sig. bits of prod
jmp %o7+8 ! leaf routine return
addcc %o4, %g0, %o1 ! delay slot; return high bits and set
! zero bit appropriately
!
! short multiply
!
umul_shortway:
mulscc %o4, %o1, %o4 ! first iteration of 13
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4
mulscc %o4, %o1, %o4 ! 12th iteration
mulscc %o4, %g0, %o4 ! last iteration only shifts
rd %y, %o5
sll %o4, 12, %o4 ! left shift partial product
! by 12 bits
srl %o5, 20, %o5 ! right shift product 20 bits
or %o5, %o4, %o0 ! merge for true product
!
! The delay instruction moves zero into %o1,
! sets the zero condition code, and clears the other conditions.
! This is the equivalent result to a long umultiply which doesn't
! overflow
!
jmp %o7+8 ! leaf routine return
addcc %g0, %g0, %o1
/*
* divison/remainder
*
* Input is:
* dividend -- the thing being divided
* divisor -- how many ways to divide
* Important parameters:
* N -- how many bits per iteration we try to get
* as our current guess:
* WORDSIZE -- how many bits altogether we're talking about:
* obviously:
* A derived constant:
* TOPBITS -- how many bits are in the top "decade" of a number:
*
* Important variables are:
* Q -- the partial quotient under development -- initally 0
* R -- the remainder so far -- initially == the dividend
* ITER -- number of iterations of the main division loop will
* be required. Equal to CEIL( lg2(quotient)/4 )
* Note that this is log_base_(2^4) of the quotient.
* V -- the current comparand -- initially divisor*2^(ITER*4-1)
* Cost:
* current estimate for non-large dividend is
* CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
* a large dividend is one greater than 2^(31-4 ) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
RTENTRY(.udiv) ! UNSIGNED DIVIDE
b divide
mov 0,%g1 ! result always positive
RTENTRY(.div) ! SIGNED DIVIDE
orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
bge divide ! if not, skip this junk
xor %o1,%o0,%g1 ! record sign of result in sign of %g1
tst %o1
bge 2f
tst %o0
! %o1 < 0
bge divide
neg %o1
2:
! %o0 < 0
neg %o0
! FALL THROUGH
divide:
! compute size of quotient, scale comparand
orcc %o1,%g0,%o5 ! movcc %o1,%o5
te ST_DIV0 ! if %o1 = 0
mov %o0,%o3
cmp %o3,%o5
blu got_result ! if %o3<%o5 already, there's no point in continuing
mov 0,%o2
sethi %hi(1<<(32-4 -1)),%g2
cmp %o3,%g2
blu not_really_big
mov 0,%o4
!
! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
! our usual 4-at-a-shot divide step will cause overflow and havoc. The
! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
! the top decade: so don't even bother to compare to %o3.
1:
cmp %o5,%g2
bgeu 3f
mov 1,%g3
sll %o5,4,%o5
b 1b
inc %o4
! now compute %g3
2: addcc %o5,%o5,%o5
bcc not_too_big ! bcc not_too_big
add %g3,1,%g3
!
! here if the %o1 overflowed when Shifting
! this means that %o3 has the high-order bit set
! restore %o5 and subtract from %o3
sll %g2,4 ,%g2 ! high order bit
srl %o5,1,%o5 ! rest of %o5
add %o5,%g2,%o5
b do_single_div
sub %g3,1,%g3
not_too_big:
3: cmp %o5,%o3
blu 2b
nop
be do_single_div
nop
! %o5 > %o3: went too far: back up 1 step
! srl %o5,1,%o5
! dec %g3
! do single-bit divide steps
!
! we have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
do_single_div:
deccc %g3
bl end_regular_divide
nop
sub %o3,%o5,%o3
mov 1,%o2
b,a end_single_divloop
single_divloop:
sll %o2,1,%o2
bl 1f
srl %o5,1,%o5
! %o3 >= 0
sub %o3,%o5,%o3
b 2f
inc %o2
1: ! %o3 < 0
add %o3,%o5,%o3
dec %o2
2:
end_single_divloop:
deccc %g3
bge single_divloop
tst %o3
b,a end_regular_divide
not_really_big:
1:
sll %o5,4,%o5
cmp %o5,%o3
bleu 1b
inccc %o4
be got_result
dec %o4
do_regular_divide:
! do the main division iteration
tst %o3
! fall through into divide loop
divloop:
sll %o2,4,%o2
!depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17: ! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16: ! remainder is negative
addcc %o3,%o5,%o3
!depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15: ! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
end_regular_divide:
deccc %o4
bge divloop
tst %o3
bl,a got_result
dec %o2
got_result:
tst %g1
bl,a 1f
neg %o2 ! quotient <- -%o2
1:
retl
mov %o2,%o0 ! quotient <- %o2
RTENTRY(.urem) ! UNSIGNED REMAINDER
b rem
mov 0,%g1 ! result always positive
RTENTRY(.rem) ! SIGNED REMAINDER
orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
bge rem ! if not, skip this junk
mov %o0,%g1 ! record sign of result in sign of %g1
tst %o1
bge 2f
tst %o0
! %o1 < 0
bge rem
neg %o1
2:
! %o0 < 0
neg %o0
! FALL THROUGH
rem:
! compute size of quotient, scale comparand
orcc %o1,%g0,%o5 ! movcc %o1,%o5
te ST_DIV0 ! if %o1 = 0
mov %o0,%o3
cmp %o3,%o5
blu rgot_result ! if %o3<%o5 already, there's no point in continuing
mov 0,%o2
sethi %hi(1<<(32-4 -1)),%g2
cmp %o3,%g2
blu rnot_really_big
mov 0,%o4
!
! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
! our usual 4-at-a-shot divide step will cause overflow and havoc. The
! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
! the top decade: so don't even bother to compare to %o3.
1:
cmp %o5,%g2
bgeu 3f
mov 1,%g3
sll %o5,4,%o5
b 1b
inc %o4
! now compute %g3
2: addcc %o5,%o5,%o5
bcc rnot_too_big ! bcc rnot_too_big
add %g3,1,%g3
!
! here if the %o1 overflowed when Shifting
! this means that %o3 has the high-order bit set
! restore %o5 and subtract from %o3
sll %g2,4 ,%g2 ! high order bit
srl %o5,1,%o5 ! rest of %o5
add %o5,%g2,%o5
b do_single_rem
sub %g3,1,%g3
rnot_too_big:
3: cmp %o5,%o3
blu 2b
nop
be do_single_rem
nop
! %o5 > %o3: went too far: back up 1 step
! srl %o5,1,%o5
! dec %g3
! do single-bit divide steps
!
! we have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
do_single_rem:
deccc %g3
bl end_regular_remainder
nop
sub %o3,%o5,%o3
mov 1,%o2
b,a end_single_remloop
single_remloop:
sll %o2,1,%o2
bl 1f
srl %o5,1,%o5
! %o3 >= 0
sub %o3,%o5,%o3
b 2f
inc %o2
1: ! %o3 < 0
add %o3,%o5,%o3
dec %o2
2:
end_single_remloop:
deccc %g3
bge single_remloop
tst %o3
b,a end_regular_remainder
rnot_really_big:
1:
sll %o5,4,%o5
cmp %o5,%o3
bleu 1b
inccc %o4
be rgot_result
dec %o4
do_regular_remainder:
! do the main division iteration
tst %o3
! fall through into divide loop
remloop:
sll %o2,4,%o2
!depth 1, accumulated bits 0
bl Lr.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 2, accumulated bits 1
bl Lr.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 3, accumulated bits 3
bl Lr.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits 7
bl Lr.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
Lr.4.23: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
Lr.3.19: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 5
bl Lr.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
Lr.4.21: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
Lr.2.17: ! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits 1
bl Lr.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits 3
bl Lr.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
Lr.4.19: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
Lr.3.17: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 1
bl Lr.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
Lr.4.17: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
Lr.1.16: ! remainder is negative
addcc %o3,%o5,%o3
!depth 2, accumulated bits -1
bl Lr.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 3, accumulated bits -1
bl Lr.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits -1
bl Lr.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
Lr.4.15: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
Lr.3.15: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -3
bl Lr.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
Lr.4.13: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
Lr.2.15: ! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits -3
bl Lr.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
!depth 4, accumulated bits -5
bl Lr.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
Lr.4.11: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
Lr.3.13: ! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -7
bl Lr.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
Lr.4.9: ! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
end_regular_remainder:
deccc %o4
bge remloop
tst %o3
bl,a rgot_result
add %o3,%o1,%o3
rgot_result:
tst %g1
bl,a 1f
neg %o3 ! remainder <- -%o3
1:
retl
mov %o3,%o0 ! remainder <- %o3
/*
* Structure return
*/
#define UNIMP 0
#define MASK 0x00000fff
#define STRUCT_VAL_OFF (16*4)
RTENTRY(.stret4)
RTENTRY(.stret8)
!
! see if key matches: if not, structure value not expected,
! so just return
!
ld [%i7 + 8], %o3
and %o1, MASK, %o4
sethi %hi(UNIMP), %o5
or %o4, %o5, %o5
cmp %o5, %o3
be,a 0f
ld [%fp + STRUCT_VAL_OFF], %i0 ! set expected return value
ret
restore
0: ! copy the struct
subcc %o1, 4, %o1
ld [%o0 + %o1], %o4
bg 0b
st %o4, [%i0 + %o1] ! delay slot
add %i7, 0x4, %i7 ! bump return address
ret
restore
RTENTRY(.stret2)
!
! see if key matches: if not, structure value not expected,
! so just return
!
ld [%i7 + 8], %o3
and %o1, MASK, %o4
sethi %hi(UNIMP), %o5
or %o4, %o5, %o5
cmp %o5, %o3
be,a 0f
ld [%fp + STRUCT_VAL_OFF], %i0 ! set expected return value
ret
restore
0: ! copy the struct
subcc %o1, 2, %o1
lduh [%o0 + %o1], %o4
bg 0b
sth %o4, [%i0 + %o1] ! delay slot
add %i7, 0x4, %i7 ! bump return address
ret
restore
/*
* integer multiply __ip_umul, __ip_mul, __ip_umulcc, __ip_mulcc
* input: %i0 = rs1
* %i1 = rs2 or simm13
* %i2 = address for rd
* %i3 = address for Y-register
* %i4 = address for psr (or icc)
*
* perform a (signed or unsigned) multiplication of a 32 bit multiplier
* %i0 and a 32 bit multiplicand %i1 resulting in a 64-bit product. The
* low order 32-bits of the multiply are placed in [%i2], and the
* upper 32-bits of the multiply are placed in [%i3]. The condition
* codes are set as follows:
* unsigned: N set if product<31> is set
* Z set if product equals zero
* V set if product <63:32> is not zero
* C always cleared
* signed: N set if product<31> is set
* Z set if product equals zero
* V set if product <63:32> is not product<31>
* C always cleared
*
* Traps: (none)
*/
ENTRY(_ip_umul)
save %sp,-96,%sp
mov %i0,%o0
call .umul
mov %i1,%o1
st %o0,[%i2]
st %o1,[%i3]
mov 1, %i0 ! success
ret
restore
ENTRY(_ip_mul)
save %sp,-96,%sp
mov %i0,%o0
call .mul
mov %i1,%o1
st %o0,[%i2]
st %o1,[%i3]
mov 1, %i0 ! success
ret
restore
ENTRY(_ip_umulcc)
save %sp,-96,%sp
mov %i0,%o0
call .umul
mov %i1,%o1
st %o0,[%i2]
st %o1,[%i3]
unsigned_ccret:
mov 0,%l0 ! clear NZVC
srl %o0,31,%l1 ! %l1 = product<31>
sll %l1,3,%l0 ! N = product<31>
orcc %o0,%o1,%g0
be,a 1f
or %l0,0x4,%l0 ! set Z if product = 0
1:
tst %o1
bnz,a 2f ! use common code in _ip_mulcc
or %l0,0x2,%l0 ! set V if product<63:32> is not zero
b 3f ! use common code in _ip_mulcc
sll %l0,20,%l0 ! shift to the icc position in psr
ENTRY(_ip_mulcc)
save %sp,-96,%sp
mov %i0,%o0
call .mul
mov %i1,%o1
st %o0,[%i2]
st %o1,[%i3]
signed_ccret:
mov 0,%l0 ! clear NZVC
srl %o0,31,%l1 ! %l1 = result<31>
sll %l1,3,%l0 ! N = result<31>
orcc %o0,%o1,%g0
be,a 1f
or %l0,0x4,%l0 ! set Z if result = 0
1:
addcc %l1,%o1,%g0
bnz,a 2f
or %l0,0x2,%l0 ! set V if result<63:32> is not result<31>
2:
sll %l0,20,%l0 ! shift to the icc position in psr
3:
ld [%i4],%l1
set 0x00f00000,%l2
andn %l1,%l2,%l1 ! clear icc field
or %l1,%l0,%l1 ! put in icc
st %l1,[%i4] ! store to psr
mov 1, %i0 ! success
ret
restore
/*
* integer divide __ip_udiv, __ip_div, __ip_udivcc, __ip_divcc
* input: %i0 = rs1 -- lower dividend
* %i1 = rs2 or simm13 -- divisor
* %i2 = address for rd -- quotient (lower 32-bits)
* %i3 = address for Y-register -- upper dividend
* %i4 = address for psr (or icc)
*
* When return, %i1 will also contain the upper 32-bits quotient
*
* perform a (signed or unsigned) division of a 64 bit dividend (lower
* 32-bits in %i0 and upper 32-bits in [%i3]) and a 32 bit divisor %i1
* resulting in a 32-bit quotient [%i2]. Overflow is set if the quotient
* cannot be represented in 32-bits. Here we will put the upper 32-bits
* quotient in %i1 when return.
* The condition codes are set as follows:
* N set if MSB of quotient is set
* Z set if quotient equals zero
* V set if division overflow
* C always cleared
*
* Traps: division by zero
*/
ENTRY(_ip_udiv)
tst %o1
bnz,a 1f
save %sp,-96,%sp
b,a div_by_zero
1: mov %i0,%o0
ld [%i3],%o1
call long_udiv
mov %i1,%o2
st %o0,[%i2]
mov %o1,%i1
mov 1, %i0 ! success
ret
restore
ENTRY(_ip_div)
tst %o1
bnz,a 1f
save %sp,-96,%sp
b,a div_by_zero
1: mov %i0,%o0
ld [%i3],%o1
call long_div
mov %i1,%o2
st %o0,[%i2]
mov %o1,%i1
mov 1, %i0 ! success
ret
restore
ENTRY(_ip_udivcc)
tst %o1
bnz,a 1f
save %sp,-96,%sp
b,a div_by_zero
1: mov %i0,%o0
ld [%i3],%o1
call long_udiv
mov %i1,%o2
st %o0,[%i2]
b unsigned_ccret
mov %o1,%i1
ENTRY(_ip_divcc)
tst %o1
bnz,a 1f
save %sp,-96,%sp
b,a div_by_zero
1: mov %i0,%o0
ld [%i3],%o1
call long_div
mov %i1,%o2
st %o0,[%i2]
b signed_ccret
mov %o1,%i1
div_by_zero:
retl
mov -2, %o0 ! dvivide by zero detected, see trap.c
/*
* long_udiv(L,H,D) unsigned 64/32 bits divided
* long_div (L,H,D) signed 64/32 bits divided
* int L,H,D;
*
* Input:
* L = %i0 -- least 32 bits of dividend
* H = %i1 -- most 32 bits of dividend
* D = %i2 -- divisor
* Output:
* %i0 -- least 32 bits of quotient
* %i1 -- most 32 bits of quotient
* %i2 -- remainder
*
* local register usage:
* %i0 least 32 bits dividend
* %i1 most 32 bits dividend
* %i2 divisor
* %i3 &QL
* %i4 &QH
* %i5 &R
* %l0 least 32 bits quotient
* %l1 most 32 bits quotient
* %l2 remainder
* %l3 sign
* %l4 counter
*
* undocumented usage of .udiv:
* .udiv return quotient in o0, remainder in o3; if o3 is
* negative, then replace o3 by o3 + divisor to get the correct
* remainder
*/
long_udiv:
save %sp,-0x100,%sp
b ldivide
mov 0,%l3 ! sign is positive
long_div:
save %sp,-0x100,%sp
orcc %i2,%i1,%g0 ! dividend or divisor negative ?
bge ldivide ! if not, skip this
xor %i2,%i1,%l3 ! record sign of result
tst %i2
bge 2f
tst %i1
! divisor < 0
bge ldivide
neg %i2
2:
! dividend < 0
subcc %g0,%i0,%i0
b ldivide
subx %g0,%i1,%i1
ldivide:
cmp %i2,%i1 ! divisor
bgu,a leastquo ! if divisor > dividendh then goto leastquo
mov 0,%l1 ! and set quotienth = 0.
! quotienth is non-zero
mov %i1,%o0
call .udiv ! quotienth = dividendh/divisor
mov %i2,%o1
mov %o0,%l1 ! store result in quotienth
tst %o3
bge,a leastquo
mov %o3,%i1 ! %o2 is the remainder, set %i1 = %o2
add %o3,%i2,%i1
leastquo:
! computing quotientl, the least sig. 32 bits of the quotient
tst %i1 ! if 0, then quotientl=dividendl/divisor
bne,a 1f
mov 0,%l0 ! initialize quotientl
mov %i0,%o0
call .udiv
mov %i2,%o1
mov %o0,%i0
tst %o3
bge,a 2f
mov %o3,%i2
add %o3,%i2,%i2
2:
ba endldivide
mov %l1,%i1
1:
mov 32,%l4 ! initialize counter
addcc %i0,%i0,%i0
loop:
addxcc %i1,%i1,%i1 ! dividend << 1
bcs 2f
add %l0,%l0,%l0 ! quo << 1
cmp %i1,%i2
bcs,a 1f ! if dividend < divisor skip inc quotient
subcc %l4,1,%l4 ! counter -= 1
2:
sub %i1,%i2,%i1 ! dividendh -= divisor
inc %l0 ! quo += 1 when divisor < dividendh
subcc %l4,1,%l4 ! counter -= 1
1:
bg,a loop
addcc %i0,%i0,%i0
mov %i1,%i2 ! remainder in %i2
mov %l1,%i1 ! most 32 bits of quotient in %i1
mov %l0,%i0 ! least 32 bits of quotient in %i0
endldivide:
tst %l3 ! check sign
bge 1f
nop
neg %i2
subcc %g0,%i0,%i0
subx %g0,%i1,%i1
1:
! store result to QL,QH and R
! mask off
! st %i0,[%i3]
! st %i1,[%i4]
! st %i2,[%i5]
ret
restore