1956 lines
51 KiB
ArmAsm
1956 lines
51 KiB
ArmAsm
/* @(#)copy.s 1.1 94/10/31 SMI */
|
|
|
|
.seg "text"
|
|
.align 4
|
|
|
|
/*
|
|
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/errno.h>
|
|
#include <machine/asm_linkage.h>
|
|
#include <machine/mmu.h>
|
|
#include <machine/pte.h>
|
|
#include "assym.s"
|
|
|
|
#ifdef sun4m
|
|
#define PTE_CACHEABLEMASK 0x80 /* XXX- move to pte.h */
|
|
#endif sun4m
|
|
|
|
/*
|
|
* limit use of the bcopy buffer to transfers of at least this size
|
|
* if the tranfer isn't at least two cache lines in size, forget it
|
|
*/
|
|
#ifdef sun4m
|
|
#define BCOPY_BUF
|
|
#define BCPY_BLKSZ 0x20
|
|
#define BCOPY_LIMIT 0x200 /* XXX- for now looks like best value */
|
|
#define BCOPY_LIMIT_625 0x40 /* small limit for 625 */
|
|
#else
|
|
#define BCOPY_LIMIT 0x40
|
|
#endif sun4m
|
|
|
|
/*
|
|
* sun4m uses a different define for probing the mmu. Set the correct
|
|
* define in the case of the sun4m.
|
|
*/
|
|
#ifdef sun4m
|
|
#undef ASI_PM
|
|
#define ASI_PM ASI_FLPR
|
|
|
|
/*
|
|
* These constants may not be used on a Sun-4M in the
|
|
* way that this file uses them.
|
|
*/
|
|
#undef VME_D16
|
|
#undef VME_D32
|
|
#endif sun4m
|
|
|
|
/*
|
|
* FAST_BLKCOPY enables a specially scheduled set of ldd/std
|
|
* that should be optimal for all page copies.
|
|
* Unfortunately, it uses %l6, which needs to be maintained.
|
|
*
|
|
* #define FAST_BLKCOPY
|
|
*/
|
|
|
|
/*
|
|
* Copy a block of storage, returning an error code if `from' or
|
|
* `to' takes a kernel pagefault which cannot be resolved.
|
|
* Returns errno value on pagefault error, 0 if all ok
|
|
*
|
|
* int
|
|
* kcopy(from, to, count)
|
|
* caddr_t from, to;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(kcopy)
|
|
set copyerr, %o3 ! copyerr is lofault value
|
|
do_copy:
|
|
save %sp, -SA(MINFRAME), %sp ! get another window
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
ld [%g5+U_LOFAULT], %l6 ! save u.u_lofault
|
|
b bcopy_cmn ! common code
|
|
st %i3, [%g5+U_LOFAULT] ! install new vector
|
|
|
|
/*
|
|
* We got here because of a fault during kcopy.
|
|
* Errno value is in %g1.
|
|
*/
|
|
copyerr:
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %l6, [%g5+U_LOFAULT] ! restore old u.u_lofault
|
|
ret
|
|
restore %g1, 0, %o0
|
|
|
|
/*
|
|
* Copy a block of storage - must not overlap (from + len <= to).
|
|
* Registers: l6 - saved u.u_lofault
|
|
*
|
|
* bcopy(from, to, count)
|
|
* caddr_t from, to;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(bcopy)
|
|
save %sp, -SA(MINFRAME), %sp ! get another window
|
|
sethi %hi(_uunix), %g5 ! XXX - global u register?
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
ld [%g5+U_LOFAULT], %l6 ! save u.u_lofault
|
|
bcopy_cmn:
|
|
cmp %i2, 12 ! for small counts
|
|
bl,a dbytecp ! just copy bytes
|
|
|
|
sub %i0, %i1, %i0 ! i0 gets difference of src and dst
|
|
#ifdef VME_D16
|
|
!
|
|
! Check src and dest page types for vme 16 or 32 bit access
|
|
!
|
|
andn %i0, 3, %g1 ! word align src
|
|
ldub [%i0],%g0 ! make sure page is in memory
|
|
lda [%g1]ASI_PM, %g2 ! get its pte
|
|
|
|
andn %i1, 3, %g1 ! word align dest
|
|
ldub [%i1],%g0 ! make sure page is in memory
|
|
lda [%g1]ASI_PM, %g1 ! get its pte
|
|
|
|
or %g1, %g2, %g1 ! if either
|
|
srl %g1, PGT_SHIFT, %g1 ! page type is vme space
|
|
andcc %g1, VME_D16|VME_D32, %g1
|
|
bz bcopy_obmem ! no, do 'on board' memory
|
|
cmp %g1, VME_D16 ! which vme?
|
|
be bcopy_vme16
|
|
nop
|
|
b bcopy_vme32 ! next instruction needed in delay slot
|
|
nop
|
|
#endif VME_D16
|
|
#ifdef SUN4M_690
|
|
and %i0, MMU_PAGEMASK, %i3
|
|
or %i3, FT_ALL<<8, %i3
|
|
lda [%i3]ASI_PM, %i4 ! get source PTE
|
|
|
|
and %i4, 3, %i4
|
|
cmp %i4, MMU_ET_PTE
|
|
bne,a 1f ! if not mapped in,
|
|
ldub [%i0], %g0 ! force source page into memory
|
|
1:
|
|
lda [%i3]ASI_PM, %i4 ! get source PTE again
|
|
|
|
srl %i4, 28, %i4 ! convert pte to space
|
|
|
|
! if source is vme D16, limit transfer width
|
|
! to 16 bits. (space=0xA or 0xC)
|
|
cmp %i4, 0xA ! from user vme-d16
|
|
be bcopy_vme16
|
|
cmp %i4, 0xC ! from supv vme-d16
|
|
be bcopy_vme16
|
|
|
|
! if source is vme D32, limit transfer width
|
|
! to 32 bits. (space=0xB or 0xD)
|
|
cmp %i4, 0xB ! from user vme-d32
|
|
be bcopy_vme32
|
|
cmp %i4, 0xD ! from supv vme-d32
|
|
be bcopy_vme32
|
|
|
|
and %i1, MMU_PAGEMASK, %i3
|
|
or %i3, FT_ALL<<8, %i3
|
|
lda [%i3]ASI_PM, %i4 ! get destination PTE
|
|
|
|
and %i4, 3, %i4
|
|
cmp %i4, MMU_ET_PTE
|
|
bne,a 1f ! if not mapped in,
|
|
ldub [%i1], %g0 ! force destination page into memory
|
|
1:
|
|
lda [%i3]ASI_PM, %i4 ! get destination PTE again
|
|
|
|
srl %i4, 28, %i4 ! convert pte to space
|
|
|
|
! if destination is vme D16, limit transfer width
|
|
! to 16 bits. (space=0xA or 0xC)
|
|
cmp %i4, 0xA ! to user vme-d16
|
|
be bcopy_vme16
|
|
cmp %i4, 0xC ! to supv vme-d16
|
|
be bcopy_vme16
|
|
|
|
! if destination is vme D32, limit transfer width
|
|
! to 32 bits. (space=0xB or 0xD)
|
|
cmp %i4, 0xB ! to user vme-d32
|
|
be bcopy_vme32
|
|
cmp %i4, 0xD ! to supv vme-d32
|
|
be bcopy_vme32
|
|
nop
|
|
#endif
|
|
!
|
|
! use aligned transfers where possible
|
|
!
|
|
bcopy_obmem:
|
|
xor %i0, %i1, %o4 ! xor from and to address
|
|
btst 7, %o4 ! if lower three bits zero
|
|
bz aldoubcp ! can align on double boundary
|
|
.empty ! assembler complaints about label
|
|
#if defined(VME_D16) || defined(SUN4M_690)
|
|
bcopy_vme32:
|
|
xor %i0, %i1, %o4 ! xor from and to address
|
|
#endif /* VME_D16 || SUN4M_690 */
|
|
! (why is this label here?) bcopy_words:
|
|
btst 3, %o4 ! if lower two bits zero
|
|
bz alwordcp ! can align on word boundary
|
|
btst 3, %i0 ! delay slot, from address unaligned?
|
|
!
|
|
! use aligned reads and writes where possible
|
|
! this differs from wordcp in that it copes
|
|
! with odd alignment between source and destnation
|
|
! using word reads and writes with the proper shifts
|
|
! in between to align transfers to and from memory
|
|
! i0 - src address, i1 - dest address, i2 - count
|
|
! i3, i4 - tmps for used generating complete word
|
|
! i5 (word to write)
|
|
! l0 size in bits of upper part of source word (US)
|
|
! l1 size in bits of lower part of source word (LS = 32 - US)
|
|
! l2 size in bits of upper part of destination word (UD)
|
|
! l3 size in bits of lower part of destination word (LD = 32 - UD)
|
|
! l4 number of bytes leftover after aligned transfers complete
|
|
! l5 the number 32
|
|
!
|
|
mov 32, %l5 ! load an oft-needed constant
|
|
bz align_dst_only
|
|
btst 3, %i1 ! is destnation address aligned?
|
|
clr %i4 ! clear registers used in either case
|
|
bz align_src_only
|
|
clr %l0
|
|
!
|
|
! both source and destination addresses are unaligned
|
|
!
|
|
1: ! align source
|
|
ldub [%i0], %i3 ! read a byte from source address
|
|
add %i0, 1, %i0 ! increment source address
|
|
or %i4, %i3, %i4 ! or in with previous bytes (if any)
|
|
btst 3, %i0 ! is source aligned?
|
|
add %l0, 8, %l0 ! increment size of upper source (US)
|
|
bnz,a 1b
|
|
sll %i4, 8, %i4 ! make room for next byte
|
|
|
|
sub %l5, %l0, %l1 ! generate shift left count (LS)
|
|
sll %i4, %l1, %i4 ! prepare to get rest
|
|
ld [%i0], %i3 ! read a word
|
|
add %i0, 4, %i0 ! increment source address
|
|
srl %i3, %l0, %i5 ! upper src bits into lower dst bits
|
|
or %i4, %i5, %i5 ! merge
|
|
mov 24, %l3 ! align destination
|
|
1:
|
|
srl %i5, %l3, %i4 ! prepare to write a single byte
|
|
stb %i4, [%i1] ! write a byte
|
|
add %i1, 1, %i1 ! increment destination address
|
|
sub %i2, 1, %i2 ! decrement count
|
|
btst 3, %i1 ! is destination aligned?
|
|
bnz,a 1b
|
|
sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
|
|
sub %l5, %l3, %l2 ! generate shift left count (UD)
|
|
sll %i5, %l2, %i5 ! move leftover into upper bytes
|
|
cmp %l2, %l0 ! cmp # req'd to fill dst w old src left
|
|
bg more_needed ! need more to fill than we have
|
|
nop
|
|
|
|
sll %i3, %l1, %i3 ! clear upper used byte(s)
|
|
srl %i3, %l1, %i3
|
|
! get the odd bytes between alignments
|
|
sub %l0, %l2, %l0 ! regenerate shift count
|
|
sub %l5, %l0, %l1 ! generate new shift left count (LS)
|
|
and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
|
|
andn %i2, 3, %i2 ! # of aligned bytes that can be moved
|
|
srl %i3, %l0, %i4
|
|
or %i5, %i4, %i5
|
|
st %i5, [%i1] ! write a word
|
|
subcc %i2, 4, %i2 ! decrement count
|
|
bz unalign_out
|
|
add %i1, 4, %i1 ! increment destination address
|
|
|
|
b 2f
|
|
sll %i3, %l1, %i5 ! get leftover into upper bits
|
|
more_needed:
|
|
sll %i3, %l0, %i3 ! save remaining byte(s)
|
|
srl %i3, %l0, %i3
|
|
sub %l2, %l0, %l1 ! regenerate shift count
|
|
sub %l5, %l1, %l0 ! generate new shift left count
|
|
sll %i3, %l1, %i4 ! move to fill empty space
|
|
b 3f
|
|
or %i5, %i4, %i5 ! merge to complete word
|
|
!
|
|
! the source address is aligned and destination is not
|
|
!
|
|
align_dst_only:
|
|
ld [%i0], %i4 ! read a word
|
|
add %i0, 4, %i0 ! increment source address
|
|
mov 24, %l0 ! initial shift alignment count
|
|
1:
|
|
srl %i4, %l0, %i3 ! prepare to write a single byte
|
|
stb %i3, [%i1] ! write a byte
|
|
add %i1, 1, %i1 ! increment destination address
|
|
sub %i2, 1, %i2 ! decrement count
|
|
btst 3, %i1 ! is destination aligned?
|
|
bnz,a 1b
|
|
sub %l0, 8, %l0 ! delay slot, decrement shift count
|
|
xfer:
|
|
sub %l5, %l0, %l1 ! generate shift left count
|
|
sll %i4, %l1, %i5 ! get leftover
|
|
3:
|
|
and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
|
|
andn %i2, 3, %i2 ! # of aligned bytes that can be moved
|
|
2:
|
|
ld [%i0], %i3 ! read a source word
|
|
add %i0, 4, %i0 ! increment source address
|
|
srl %i3, %l0, %i4 ! upper src bits into lower dst bits
|
|
or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
|
|
st %i5, [%i1] ! write a destination word
|
|
subcc %i2, 4, %i2 ! decrement count
|
|
bz unalign_out ! check if done
|
|
add %i1, 4, %i1 ! increment destination address
|
|
b 2b ! loop
|
|
sll %i3, %l1, %i5 ! get leftover
|
|
unalign_out:
|
|
tst %l4 ! any bytes leftover?
|
|
bz cpdone
|
|
.empty ! allow next instruction in delay slot
|
|
1:
|
|
sub %l0, 8, %l0 ! decrement shift
|
|
srl %i3, %l0, %i4 ! upper src byte into lower dst byte
|
|
stb %i4, [%i1] ! write a byte
|
|
subcc %l4, 1, %l4 ! decrement count
|
|
bz cpdone ! done?
|
|
add %i1, 1, %i1 ! increment destination
|
|
tst %l0 ! any more previously read bytes
|
|
bnz 1b ! we have leftover bytes
|
|
mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
|
|
b dbytecp ! let dbytecp do the rest
|
|
sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
|
|
!
|
|
! the destination address is aligned and the source is not
|
|
!
|
|
align_src_only:
|
|
ldub [%i0], %i3 ! read a byte from source address
|
|
add %i0, 1, %i0 ! increment source address
|
|
or %i4, %i3, %i4 ! or in with previous bytes (if any)
|
|
btst 3, %i0 ! is source aligned?
|
|
add %l0, 8, %l0 ! increment shift count (US)
|
|
bnz,a align_src_only
|
|
sll %i4, 8, %i4 ! make room for next byte
|
|
b,a xfer
|
|
!
|
|
! if from address unaligned for double-word moves,
|
|
! move bytes till it is, if count is < 56 it could take
|
|
! longer to align the thing than to do the transfer
|
|
! in word size chunks right away
|
|
!
|
|
aldoubcp:
|
|
cmp %i2, 56 ! if count < 56, use wordcp, it takes
|
|
bl,a alwordcp ! longer to align doubles than words
|
|
mov 3, %o0 ! mask for word alignment
|
|
call alignit ! copy bytes until aligned
|
|
mov 7, %o0 ! mask for double alignment
|
|
|
|
#ifdef BCOPY_BUF
|
|
bcp_bufchk:
|
|
!
|
|
! source and destination are now double-word aligned
|
|
! test for bcopy buffer, on machines that don't have
|
|
! a bcopy buffer the variable bcopy_res is initialized
|
|
! to -1 which keeps anyone from using it
|
|
!
|
|
! the following commented out code is useable when different
|
|
! machines have different sized bcopy buffers
|
|
! it is overkill for now and not used, maybe later....
|
|
!
|
|
! sethi %hi(_bcopy_res), %o2
|
|
! ld [%o2 + %lo(_bcopy_res)], %o3 ! is hardware available
|
|
! tst %o3
|
|
! bnz bcp_nobuf ! reserved or not enabled
|
|
! sethi _vac_linesize, %o4
|
|
! ld [%o4 + %lo(_vac_linesize)], %o4 ! cache line size
|
|
! sll %o4, 1, %o5 ! if > 2 lines try to use hardware
|
|
! cmp %o2, %o5 ! see if it is worth while
|
|
! blt bcp_nobuf ! if not more than 2 lines, punt
|
|
! sub %o4, 1, %o5 ! create alignment mask
|
|
! andcc %o1, %o5, %o1
|
|
! xor %i0, %i1, %o1 ! check alignment of src and dest
|
|
! bnz bcp_nobuf ! not aligned, can't use hardware
|
|
! andcc %i0, %o5, %o1 ! check src alignment
|
|
! bz bcp_buf ! if src is aligned, then so is dest
|
|
! nop ! if not, copy doubles until aligned
|
|
!1:
|
|
! ldd [%i0], %o0 ! align to cache
|
|
! std %o0, [%i1]
|
|
! sub %i2, 8, %i2 ! update count
|
|
! add %i0, 8, %i0 ! update source address
|
|
! andcc %i0, %o5, %o3 ! later....
|
|
! bnz 1b
|
|
! add %i1, 8, %i1 ! update dest address
|
|
!bcp_buf:
|
|
!
|
|
sethi %hi(_bcopy_res), %o5
|
|
ld [%o5 + %lo(_bcopy_res)], %o3 ! is hardware available
|
|
mov 32, %o4 ! interlock, set buffer size
|
|
tst %o3
|
|
bnz bcp_nobuf ! reserved or not enabled
|
|
|
|
#ifdef sun4m
|
|
sethi %hi(_ross625_flag), %l2
|
|
ld [%l2 + %lo(_ross625_flag)], %l2
|
|
tst %l2
|
|
bnz,a 0f
|
|
cmp %i2, BCOPY_LIMIT_625
|
|
#endif sun4m
|
|
cmp %i2, BCOPY_LIMIT ! see if it is worth while
|
|
0:
|
|
blt bcp_nobuf ! if not more than 2 lines, punt
|
|
|
|
xor %i0, %i1, %o1 ! check alignment of src and dest
|
|
andcc %o1, 0x1F, %o1
|
|
bnz bcp_nobuf ! not aligned, can't use hardware
|
|
andcc %i0, 0x1F, %o1 ! check src alignment
|
|
bz bcp_buf ! if src is aligned, then so is dest
|
|
nop ! if not, copy doubles until aligned
|
|
1:
|
|
ldd [%i0], %o0 ! align to cache
|
|
std %o0, [%i1]
|
|
sub %i2, 8, %i2 ! update count
|
|
add %i0, 8, %i0 ! update source address
|
|
andcc %i0, 0x1F, %o3
|
|
bnz 1b
|
|
add %i1, 8, %i1 ! update dest address
|
|
|
|
andn %i2, 7, %i3
|
|
bcp_buf:
|
|
ldstub [%o5 + %lo(_bcopy_res)], %o3 ! try to grab hardware
|
|
tst %o3
|
|
bnz bcp_nobuf ! hardware in use, use software
|
|
nop
|
|
!
|
|
! XXX--Put in something to efficiently use the bcopy HW when the
|
|
! src and dest addrs arent aligned, and the size of the copy is large.
|
|
!
|
|
! Should only get here on sun4m when have viking/mxcc or hyperSPARC
|
|
#ifdef sun4m
|
|
! sethi %hi(_ross625_flag), %l2
|
|
! ld [%l2 + %lo(_ross625_flag)], %l2
|
|
|
|
tst %l2
|
|
bz,a not_hypersparc
|
|
xor %i0, %i1, %i5 ! check alignment of src and dest
|
|
|
|
! hyperSPARC is installed and we are aligned on a cache line
|
|
0:
|
|
|
|
! Attempt store to destination address to force page fault or other
|
|
! exceptions to be handled.
|
|
!
|
|
! This is needed for a Ross625 A0-A3 bug where it's possible
|
|
! for the hardware bcopy to write into the destination if the destination
|
|
! is write protected under some circumstances.
|
|
stb %o4, [%i1] ! Try to write into dest page
|
|
|
|
sta %i1, [%i0]ASI_BC
|
|
add %i0, BCPY_BLKSZ, %i0 ! update src
|
|
sub %i2, BCPY_BLKSZ, %i2 ! update cnt
|
|
cmp %i2, BCPY_BLKSZ
|
|
bge 0b
|
|
add %i1, BCPY_BLKSZ, %i1 ! update dest
|
|
|
|
sethi %hi(_bcopy_cnt), %o3 ! count # times used buffer
|
|
ld [%o3 + %lo(_bcopy_cnt)], %o4
|
|
st %g0, [%o5 + %lo(_bcopy_res)] ! interlock, unlock hardware
|
|
inc %o4
|
|
b bytecp ! do remaining bytes, if any
|
|
st %o4, [%o3 + %lo(_bcopy_cnt)]
|
|
|
|
not_hypersparc:
|
|
xor %i0, %i1, %i5 ! check alignment of src and dest
|
|
btst MMU_PAGEOFFSET, %i5
|
|
bz,a 1f ! src and dest aligned for page copies
|
|
mov 0x10, %l2 ! set cacheable bit in upper word
|
|
!
|
|
! Case where source and dest are not aligned, but neither crosses a page
|
|
! boundary. Can still use HW in this case.
|
|
! Happens quite a bit, so its a big win.
|
|
!
|
|
and %i0, MMU_PAGEOFFSET, %i5
|
|
add %i5, %i2, %i5
|
|
set MMU_PAGESIZE, %i4
|
|
cmp %i5, %i4
|
|
bg,a bcp_nobuf ! Use SW, src would exceed page boundary
|
|
st %g0, [%o5 + %lo(_bcopy_res)]
|
|
|
|
and %i1, MMU_PAGEOFFSET, %i5
|
|
add %i5, %i2, %i5
|
|
cmp %i5, %i4 ! %i4 still contains MMU_PAGESIZE
|
|
ble 1f ! src and dest wont cross page
|
|
mov 0x10, %l2 ! set cacheable bit in upper word
|
|
! Use SW, dst would exceed page boundary
|
|
b bcp_nobuf ! zero out HW lock word
|
|
st %g0, [%o5 + %lo(_bcopy_res)]
|
|
1:
|
|
set MXCC_STRM_SRC, %l5 ! addr of stream source reg
|
|
set MXCC_STRM_DEST, %l4 ! addr of stream destination reg
|
|
|
|
!
|
|
! spl here because dont want phys pages to be stolen from us until
|
|
! we have completed the copy.
|
|
!
|
|
call _splhigh ! Watch register usage on calls!
|
|
ldub [%i0], %o4 ! Set R bit on src page
|
|
mov %o0, %l7 ! must save if make any other calls
|
|
stb %o4, [%i1] ! Set RM bits in dest page
|
|
3:
|
|
and %i1, MMU_PAGEMASK, %l3 ! destination virtual page
|
|
or %l3, FT_ALL<<8, %l3 ! find destination phys addr
|
|
lda [%l3]ASI_PM, %l3 ! get phys addr
|
|
and %l3, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
stb %o4, [%i1] ! force target page into memory
|
|
|
|
srl %l3, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l3 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l2
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l2, %i3, %l2
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead.
|
|
nop
|
|
mov %g0, %l2 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l3, 8, %l3 ! remove non-page bits from pte
|
|
sll %l3, MMU_PAGESHIFT, %l3 ! phys page minus space bits
|
|
and %i1, MMU_PAGEOFFSET, %i3 ! page offset via virt addr
|
|
or %l3, %i3, %l3 ! destination phys addr complete
|
|
mov 0x10, %l0 ! set cacheable bit in upper word
|
|
3:
|
|
and %i0, MMU_PAGEMASK, %l1 ! source virtual page
|
|
or %l1, FT_ALL<<8, %l1 ! find source phys addr
|
|
lda [%l1]ASI_PM, %l1 ! get phys addr
|
|
and %l1, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
ldub [%i0], %g0 ! force target page into memory
|
|
|
|
srl %l1, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l1 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l0
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l0, %i3, %l0
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead.
|
|
nop
|
|
mov %g0, %l0 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l1, 8, %l1 ! remove non-page bits from pte
|
|
sll %l1, MMU_PAGESHIFT, %l1 ! phys page minus space bits
|
|
and %i0, MMU_PAGEOFFSET, %i3 ! page offset via virt addr
|
|
or %l1, %i3, %l1 ! source phys addr complete
|
|
|
|
add %i3, %i2, %i4 ! will we cross page boundary?
|
|
set MMU_PAGESIZE, %o4 ! use for pagesize boundary check
|
|
cmp %i4, %o4 ! Yes if %i3+%i2 > PAGESIZE
|
|
ble,a hw_fastbcopy ! No. Can copy all bytes and exit.
|
|
mov %i2, %o4 ! %i2 has the requested transfer size
|
|
! if yes, loop on page copies
|
|
sub %o4, %i3, %o4 ! round transfer to PAGESIZE
|
|
b hw_pagecopy ! copy bytes in leading page
|
|
sub %i2, %o4, %i2 ! decrease by what we will copy
|
|
|
|
hw_bcopyloop:
|
|
mov %l7, %o0 ! %l7 holds results of splhigh
|
|
call _splx ! allow window for interrupts
|
|
mov 0x10, %l0
|
|
|
|
set BCPY_BLKSZ, %i5 ! Check if virt->phys translations
|
|
cmp %i2, %i5 ! will be necessary
|
|
bl 9f ! Not necessary. Exit,
|
|
nop ! skipping the call to splx()
|
|
|
|
call _splhigh
|
|
ldub [%i0], %o4 ! Set R bit in src page
|
|
mov %o0, %l7 ! save return value of splhigh
|
|
3:
|
|
mov %i0, %l1
|
|
or %l1, FT_ALL<<8, %l1 ! find new source phys addr
|
|
lda [%l1]ASI_PM, %l1 ! get phys addr
|
|
and %l1, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
ldub [%i0], %g0 ! force target page into memory
|
|
|
|
srl %l1, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l1 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l0
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l0, %i3, %l0
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead.a
|
|
nop
|
|
mov %g0, %l0 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l1, 8, %l1 ! remove non-page bits from pte
|
|
sll %l1, MMU_PAGESHIFT, %l1 ! phys page minus space bits
|
|
! We should be on page boundary
|
|
mov 0x10, %l2
|
|
stb %o4, [%i1] ! Set RM bits in dest page
|
|
3:
|
|
mov %i1, %l3
|
|
or %l3, FT_ALL<<8, %l3 ! find new dest phys addr
|
|
lda [%l3]ASI_PM, %l3 ! get phys addr
|
|
and %l3, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
stb %o4, [%i1]
|
|
|
|
srl %l3, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l3 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l2
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l2, %i3, %l2
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead
|
|
nop
|
|
mov %g0, %l2 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l3, 8, %l3 ! remove non-page bits from pte
|
|
sll %l3, MMU_PAGESHIFT, %l3 ! phys page minus space bits
|
|
! We should be on page boundary
|
|
set MMU_PAGESIZE, %o4
|
|
cmp %i2, %o4 ! Do we have more than one page left?
|
|
bg,a hw_pagecopy ! Yes, call hw_pagecopy to loop
|
|
sub %i2, %o4, %i2 ! decrease by what we will copy
|
|
! Always copy PAGESIZE here
|
|
b hw_fastbcopy ! its not
|
|
mov %i2, %o4
|
|
|
|
hw_fastbcopy: ! use for < PAGESIZE copies.
|
|
stda %l0, [%l5]ASI_MXCC
|
|
stda %l2, [%l4]ASI_MXCC
|
|
dec BCPY_BLKSZ, %o4 ! decrement count
|
|
inc BCPY_BLKSZ, %i0 ! increment source virt address
|
|
inc BCPY_BLKSZ, %i1 ! increment dest virt address
|
|
cmp %o4, BCPY_BLKSZ ! still got more than BCPY_BLKSZ to go?
|
|
bl,a hw_bcopyexit ! No, done with HW
|
|
mov %o4, %i2 ! what we need byteclr to finish up
|
|
inc BCPY_BLKSZ, %l1 ! increment to next sublock
|
|
b hw_fastbcopy ! loop until finished
|
|
inc BCPY_BLKSZ, %l3 ! increment to next sublock
|
|
|
|
hw_pagecopy:
|
|
stda %l0, [%l5]ASI_MXCC
|
|
stda %l2, [%l4]ASI_MXCC
|
|
inc BCPY_BLKSZ, %i0 ! increment source virtual address
|
|
deccc BCPY_BLKSZ, %o4 ! decrement count
|
|
bz hw_bcopyloop ! done
|
|
inc BCPY_BLKSZ, %i1 ! increment dest virt address
|
|
inc BCPY_BLKSZ, %l1 ! increment physaddr one sublock
|
|
b hw_pagecopy ! more to do
|
|
inc BCPY_BLKSZ, %l3 ! increment to next sublock
|
|
|
|
hw_bcopyexit:
|
|
mov %l7, %o0 ! %l7 holds results of splhigh call
|
|
call _splx
|
|
.empty ! silence complaint about label
|
|
!
|
|
! Check Error Register to see if AE bit set, indicating problem with
|
|
! stream operation we just did. Fatal, so we go down if a problem
|
|
! is found.
|
|
!
|
|
9:
|
|
sethi %hi(_bcopy_cnt), %o3 ! count # times used buffer
|
|
|
|
chk_dst_copy:
|
|
set MXCC_STRM_DEST, %l4 ! addr of stream dest reg
|
|
ldda [%l4]ASI_MXCC, %l4
|
|
cmp %l4, %g0
|
|
bge chk_dst_copy
|
|
nop
|
|
|
|
set MXCC_ERROR, %l7 ! Load addr of MXCC err reg
|
|
ldda [%l7]ASI_MXCC, %l4 ! %l5 hold bits 0-31 of paddr
|
|
set MXCC_ERR_AE, %o4
|
|
btst %o4, %l4 ! %l4 hold the status bits
|
|
bz 1f ! If bit not set continue exit
|
|
ld [%o3 + %lo(_bcopy_cnt)], %o4
|
|
|
|
set MXCC_ERR_EV, %i4 ! If error bit set...
|
|
btst %i4, %l4 ! check if valid bit set
|
|
bnz 6f
|
|
.empty ! Silence complaint about label
|
|
1:
|
|
st %g0, [%o5 + %lo(_bcopy_res)] ! interlock, unlock hardware
|
|
inc %o4
|
|
b bytecp ! do remaining bytes, if any
|
|
st %o4, [%o3 + %lo(_bcopy_cnt)]
|
|
6:
|
|
set 0f, %o0
|
|
call _panic
|
|
nop
|
|
0:
|
|
.asciz "bcopy stream operation failed"
|
|
.align 4
|
|
|
|
#else sun4m
|
|
!
|
|
! This is where we go if we are using HW bcopy
|
|
! but it isnt a viking/mxcc
|
|
!
|
|
1: lda [%i0]ASI_BC, %g0 ! fill buffer
|
|
sub %i2, %o4, %i2 ! update count
|
|
add %i0, %o4, %i0 ! update source address
|
|
cmp %i2, %o4 ! check if finished
|
|
sta %g0, [%i1]ASI_BC ! write buffer
|
|
bge 1b ! loop until done
|
|
add %i1, %o4, %i1 ! update dest address
|
|
|
|
sethi %hi(_bcopy_cnt), %o3 ! count # times used buffer
|
|
ld [%o3 + %lo(_bcopy_cnt)], %o4
|
|
st %g0, [%o5 + %lo(_bcopy_res)] ! interlock, unlock hardware
|
|
inc %o4
|
|
b bytecp ! do remaining bytes, if any
|
|
st %o4, [%o3 + %lo(_bcopy_cnt)]
|
|
|
|
#endif sun4m
|
|
|
|
bcp_nobuf:
|
|
#endif BCOPY_BUF
|
|
|
|
!
|
|
! source and destination are now double-word aligned
|
|
! see if transfer is large enough to gain by loop unrolling
|
|
!
|
|
cmp %i2, 512 ! if less than 512 bytes
|
|
bge,a blkcopy ! just copy double-words (overwrite i3)
|
|
mov 0x100, %i3 ! blk copy chunk size for unrolled loop
|
|
!
|
|
! i3 has aligned count returned by alignit
|
|
!
|
|
and %i2, 7, %i2 ! unaligned leftover count
|
|
sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
|
|
5:
|
|
ldd [%i0+%i1], %o4 ! read from address
|
|
std %o4, [%i1] ! write at destination address
|
|
subcc %i3, 8, %i3 ! dec count
|
|
bg 5b
|
|
add %i1, 8, %i1 ! delay slot, inc to address
|
|
wcpchk:
|
|
cmp %i2, 4 ! see if we can copy a word
|
|
bl dbytecp ! if 3 or less bytes use bytecp
|
|
.empty
|
|
!
|
|
! for leftover bytes we fall into wordcp, if needed
|
|
!
|
|
wordcp:
|
|
and %i2, 3, %i2 ! unaligned leftover count
|
|
5:
|
|
ld [%i0+%i1], %o4 ! read from address
|
|
st %o4, [%i1] ! write at destination address
|
|
subcc %i3, 4, %i3 ! dec count
|
|
bg 5b
|
|
add %i1, 4, %i1 ! delay slot, inc to address
|
|
b,a dbytecp
|
|
|
|
! we come here to align copies on word boundaries
|
|
alwordcp:
|
|
call alignit ! go word-align it
|
|
mov 3, %o0 ! bits that must be zero to be aligned
|
|
b wordcp
|
|
sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
|
|
|
|
!
|
|
! byte copy, works with any alignment
|
|
!
|
|
bytecp: b dbytecp
|
|
sub %i0, %i1, %i0 ! i0 gets difference of src and dst
|
|
|
|
!
|
|
! differenced byte copy, works with any alignment
|
|
! assumes dest in %i1 and (source - dest) in %i0
|
|
!
|
|
1:
|
|
stb %o4, [%i1] ! write to address
|
|
inc %i1 ! inc to address
|
|
dbytecp:
|
|
deccc %i2 ! dec count
|
|
bge,a 1b ! loop till done
|
|
ldub [%i0+%i1], %o4 ! read from address
|
|
cpdone:
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %l6, [%g5+U_LOFAULT] ! restore old u.u_lofault
|
|
ret
|
|
restore %g0, 0, %o0 ! return (0)
|
|
|
|
/*
|
|
* Common code used to align transfers on word and doubleword
|
|
* boudaries. Aligns source and destination and returns a count
|
|
* of aligned bytes to transfer in %i3
|
|
*/
|
|
1:
|
|
inc %i0 ! inc from
|
|
stb %o4, [%i1] ! write a byte
|
|
inc %i1 ! inc to
|
|
dec %i2 ! dec count
|
|
alignit:
|
|
btst %o0, %i0 ! %o0 is bit mask to check for alignment
|
|
bnz,a 1b
|
|
ldub [%i0], %o4 ! read next byte
|
|
|
|
retl
|
|
andn %i2, %o0, %i3 ! return size of aligned bytes
|
|
|
|
/*
|
|
* Copy a page of memory.
|
|
* Assumes double word alignment and a count >= 256.
|
|
*
|
|
* pgcopy(from, to, count)
|
|
* caddr_t from, to;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(pgcopy)
|
|
save %sp, -SA(MINFRAME), %sp ! get another window
|
|
mov 0x100, %i3
|
|
!
|
|
! loops have been unrolled so that 64 instructions(16 cache-lines)
|
|
! are used; 256 bytes are moved each time through the loop
|
|
! i0 - from; i1 - to; i2 - count; i3 - chunksize; o4,o5 -tmp
|
|
!
|
|
! We read a whole cache line and then we write it to
|
|
! minimize thrashing.
|
|
!
|
|
blkcopy:
|
|
#ifdef FAST_BLKCOPY
|
|
!
|
|
! This schedule is optimal for 4/60 and 4/2xx, and is
|
|
! nearly optimal for 4/3xx. Measurements should be
|
|
! taken also on 4/1xx and 4/4xx products, and
|
|
! eventually on all MBUS modules.
|
|
|
|
ldd [%i0+0xf8], %l0
|
|
ldd [%i0+0xf0], %l2 ; std %l0, [%i1+0xf8]
|
|
ldd [%i0+0xe8], %l4 ; std %l2, [%i1+0xf0]
|
|
ldd [%i0+0xe0], %l6 ; std %l4, [%i1+0xe8]
|
|
ldd [%i0+0xd8], %l0 ; std %l6, [%i1+0xe0]
|
|
ldd [%i0+0xd0], %l2 ; std %l0, [%i1+0xd8]
|
|
ldd [%i0+0xc8], %l4 ; std %l2, [%i1+0xd0]
|
|
ldd [%i0+0xc0], %l6 ; std %l4, [%i1+0xc8]
|
|
ldd [%i0+0xb8], %l0 ; std %l6, [%i1+0xc0]
|
|
ldd [%i0+0xb0], %l2 ; std %l0, [%i1+0xb8]
|
|
ldd [%i0+0xa8], %l4 ; std %l2, [%i1+0xb0]
|
|
ldd [%i0+0xa0], %l6 ; std %l4, [%i1+0xa8]
|
|
ldd [%i0+0x98], %l0 ; std %l6, [%i1+0xa0]
|
|
ldd [%i0+0x90], %l2 ; std %l0, [%i1+0x98]
|
|
ldd [%i0+0x88], %l4 ; std %l2, [%i1+0x90]
|
|
ldd [%i0+0x80], %l6 ; std %l4, [%i1+0x88]
|
|
ldd [%i0+0x78], %l0 ; std %l6, [%i1+0x80]
|
|
ldd [%i0+0x70], %l2 ; std %l0, [%i1+0x78]
|
|
ldd [%i0+0x68], %l4 ; std %l2, [%i1+0x70]
|
|
ldd [%i0+0x60], %l6 ; std %l4, [%i1+0x68]
|
|
ldd [%i0+0x58], %l0 ; std %l6, [%i1+0x60]
|
|
ldd [%i0+0x50], %l2 ; std %l0, [%i1+0x58]
|
|
ldd [%i0+0x48], %l4 ; std %l2, [%i1+0x50]
|
|
ldd [%i0+0x40], %l6 ; std %l4, [%i1+0x48]
|
|
ldd [%i0+0x38], %l0 ; std %l6, [%i1+0x40]
|
|
ldd [%i0+0x30], %l2 ; std %l0, [%i1+0x38]
|
|
ldd [%i0+0x28], %l4 ; std %l2, [%i1+0x30]
|
|
ldd [%i0+0x20], %l6 ; std %l4, [%i1+0x28]
|
|
ldd [%i0+0x18], %l0 ; std %l6, [%i1+0x20]
|
|
ldd [%i0+0x10], %l2 ; std %l0, [%i1+0x18]
|
|
ldd [%i0+0x08], %l4 ; std %l2, [%i1+0x10]
|
|
ldd [%i0+0x00], %l6 ; std %l4, [%i1+0x08]
|
|
b instr ; std %l6, [%i1+0x00]
|
|
|
|
#endif FAST_BLKCOPY
|
|
/*
|
|
* This code generated from the sun4m I/O perf work.
|
|
*/
|
|
#ifdef sun4m
|
|
ldd [%i0+0xf8], %l0 ! 0xfc
|
|
ldd [%i0+0xf0], %l2
|
|
ldd [%i0+0xe8], %o0
|
|
ldd [%i0+0xe0], %o2
|
|
std %l0, [%i1+0xf8]
|
|
std %l2, [%i1+0xf0]
|
|
std %o0, [%i1+0xe8]
|
|
std %o2, [%i1+0xe0]
|
|
|
|
ldd [%i0+0xd8], %l0 ! 0xdc
|
|
ldd [%i0+0xd0], %l2
|
|
ldd [%i0+0xc8], %o0
|
|
ldd [%i0+0xc0], %o2
|
|
std %l0, [%i1+0xd8]
|
|
std %l2, [%i1+0xd0]
|
|
std %o0, [%i1+0xc8]
|
|
std %o2, [%i1+0xc0]
|
|
|
|
ldd [%i0+0xb8], %l0 ! 0xbc
|
|
ldd [%i0+0xb0], %l2
|
|
ldd [%i0+0xa8], %o0
|
|
ldd [%i0+0xa0], %o2
|
|
std %l0, [%i1+0xb8]
|
|
std %l2, [%i1+0xb0]
|
|
std %o0, [%i1+0xa8]
|
|
std %o2, [%i1+0xa0]
|
|
|
|
ldd [%i0+0x98], %l0 ! 0x9c
|
|
ldd [%i0+0x90], %l2
|
|
ldd [%i0+0x88], %o0
|
|
ldd [%i0+0x80], %o2
|
|
std %l0, [%i1+0x98]
|
|
std %l2, [%i1+0x90]
|
|
std %o0, [%i1+0x88]
|
|
std %o2, [%i1+0x80]
|
|
|
|
ldd [%i0+0x78], %l0 ! 0x7c
|
|
ldd [%i0+0x70], %l2
|
|
ldd [%i0+0x68], %o0
|
|
ldd [%i0+0x60], %o2
|
|
std %l0, [%i1+0x78]
|
|
std %l2, [%i1+0x70]
|
|
std %o0, [%i1+0x68]
|
|
std %o2, [%i1+0x60]
|
|
|
|
ldd [%i0+0x58], %l0 ! 0x5c
|
|
ldd [%i0+0x50], %l2
|
|
ldd [%i0+0x48], %o0
|
|
ldd [%i0+0x40], %o2
|
|
std %l0, [%i1+0x58]
|
|
std %l2, [%i1+0x50]
|
|
std %o0, [%i1+0x48]
|
|
std %o2, [%i1+0x40]
|
|
|
|
ldd [%i0+0x38], %l0 ! 0x3c
|
|
ldd [%i0+0x30], %l2
|
|
ldd [%i0+0x28], %o0
|
|
ldd [%i0+0x20], %o2
|
|
std %l0, [%i1+0x38]
|
|
std %l2, [%i1+0x30]
|
|
std %o0, [%i1+0x28]
|
|
std %o2, [%i1+0x20]
|
|
|
|
ldd [%i0+0x18], %l0 ! 0x1c
|
|
ldd [%i0+0x10], %l2
|
|
ldd [%i0+0x8], %o0
|
|
ldd [%i0], %o2
|
|
std %l0, [%i1+0x18]
|
|
std %l2, [%i1+0x10]
|
|
std %o0, [%i1+0x8]
|
|
std %o2, [%i1]
|
|
instr:
|
|
|
|
sub %i2, %i3, %i2 ! decrement count
|
|
add %i0, %i3, %i0 ! increment from address
|
|
cmp %i2, 0x100 ! enough to do another block?
|
|
bge blkcopy ! yes, do another chunk
|
|
add %i1, %i3, %i1 ! increment to address
|
|
tst %i2 ! all done yet?
|
|
ble cpdone ! yes, return
|
|
cmp %i2, 31 ! can we do more cache lines
|
|
bg,a 1f
|
|
andn %i2, 31, %i3 ! %i3 bytes left, aligned (to 32 bytes)
|
|
andn %i2, 3, %i3 ! %i3 bytes left, aligned to 4 bytes
|
|
b wcpchk
|
|
sub %i0, %i1, %i0 ! create diff of src and dest addr
|
|
1:
|
|
set instr, %o5 ! address of copy instructions
|
|
sub %o5, %i3, %o5 ! jmp address relative to instr
|
|
jmp %o5
|
|
nop
|
|
|
|
#else sun4m
|
|
|
|
ldd [%i0+0xf8], %l0 ! 0xfc
|
|
ldd [%i0+0xf0], %l2
|
|
std %l0, [%i1+0xf8]
|
|
std %l2, [%i1+0xf0]
|
|
|
|
ldd [%i0+0xe8], %l0 ! 0xec
|
|
ldd [%i0+0xe0], %l2
|
|
std %l0, [%i1+0xe8]
|
|
std %l2, [%i1+0xe0]
|
|
|
|
ldd [%i0+0xd8], %l0 ! 0xdc
|
|
ldd [%i0+0xd0], %l2
|
|
std %l0, [%i1+0xd8]
|
|
std %l2, [%i1+0xd0]
|
|
|
|
ldd [%i0+0xc8], %l0 ! 0xcc
|
|
ldd [%i0+0xc0], %l2
|
|
std %l0, [%i1+0xc8]
|
|
std %l2, [%i1+0xc0]
|
|
|
|
ldd [%i0+0xb8], %l0 ! 0xbc
|
|
ldd [%i0+0xb0], %l2
|
|
std %l0, [%i1+0xb8]
|
|
std %l2, [%i1+0xb0]
|
|
|
|
ldd [%i0+0xa8], %l0 ! 0xac
|
|
ldd [%i0+0xa0], %l2
|
|
std %l0, [%i1+0xa8]
|
|
std %l2, [%i1+0xa0]
|
|
|
|
ldd [%i0+0x98], %l0 ! 0x9c
|
|
ldd [%i0+0x90], %l2
|
|
std %l0, [%i1+0x98]
|
|
std %l2, [%i1+0x90]
|
|
|
|
ldd [%i0+0x88], %l0 ! 0x8c
|
|
ldd [%i0+0x80], %l2
|
|
std %l0, [%i1+0x88]
|
|
std %l2, [%i1+0x80]
|
|
|
|
ldd [%i0+0x78], %l0 ! 0x7c
|
|
ldd [%i0+0x70], %l2
|
|
std %l0, [%i1+0x78]
|
|
std %l2, [%i1+0x70]
|
|
|
|
ldd [%i0+0x68], %l0 ! 0x6c
|
|
ldd [%i0+0x60], %l2
|
|
std %l0, [%i1+0x68]
|
|
std %l2, [%i1+0x60]
|
|
|
|
ldd [%i0+0x58], %l0 ! 0x5c
|
|
ldd [%i0+0x50], %l2
|
|
std %l0, [%i1+0x58]
|
|
std %l2, [%i1+0x50]
|
|
|
|
ldd [%i0+0x48], %l0 ! 0x4c
|
|
ldd [%i0+0x40], %l2
|
|
std %l0, [%i1+0x48]
|
|
std %l2, [%i1+0x40]
|
|
|
|
ldd [%i0+0x38], %l0 ! 0x3c
|
|
ldd [%i0+0x30], %l2
|
|
std %l0, [%i1+0x38]
|
|
std %l2, [%i1+0x30]
|
|
|
|
ldd [%i0+0x28], %l0 ! 0x2c
|
|
ldd [%i0+0x20], %l2
|
|
std %l0, [%i1+0x28]
|
|
std %l2, [%i1+0x20]
|
|
|
|
ldd [%i0+0x18], %l0 ! 0x1c
|
|
ldd [%i0+0x10], %l2
|
|
std %l0, [%i1+0x18]
|
|
std %l2, [%i1+0x10]
|
|
|
|
ldd [%i0+0x8], %l0 ! 0x0c
|
|
ldd [%i0], %l2
|
|
std %l0, [%i1+0x8]
|
|
std %l2, [%i1]
|
|
instr:
|
|
|
|
sub %i2, %i3, %i2 ! decrement count
|
|
add %i0, %i3, %i0 ! increment from address
|
|
cmp %i2, 0x100 ! enough to do another block?
|
|
bge blkcopy ! yes, do another chunk
|
|
add %i1, %i3, %i1 ! increment to address
|
|
tst %i2 ! all done yet?
|
|
ble cpdone ! yes, return
|
|
cmp %i2, 15 ! can we do more cache lines
|
|
bg,a 1f
|
|
andn %i2, 15, %i3 ! %i3 bytes left, aligned (to 16 bytes)
|
|
andn %i2, 3, %i3 ! %i3 bytes left, aligned to 4 bytes
|
|
b wcpchk
|
|
sub %i0, %i1, %i0 ! create diff of src and dest addr
|
|
1:
|
|
set instr, %o5 ! address of copy instructions
|
|
sub %o5, %i3, %o5 ! jmp address relative to instr
|
|
jmp %o5
|
|
nop
|
|
#endif sun4m
|
|
/*
|
|
* Block copy with possibly overlapped operands.
|
|
*
|
|
* ovbcopy(from, to, count)
|
|
* caddr_t from, to;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(ovbcopy)
|
|
tst %o2 ! check count
|
|
bg,a 1f ! nothing to do or bad arguments
|
|
subcc %o0, %o1, %o3 ! difference of from and to address
|
|
|
|
retl ! return
|
|
nop
|
|
1:
|
|
bneg,a 2f
|
|
neg %o3 ! if < 0, make it positive
|
|
2: cmp %o2, %o3 ! cmp size and abs(from - to)
|
|
ble _bcopy ! if size <= abs(diff): use bcopy,
|
|
.empty ! no overlap
|
|
cmp %o0, %o1 ! compare from and to addresses
|
|
blu ov_bkwd ! if from < to, copy backwards
|
|
nop
|
|
!
|
|
! Copy forwards.
|
|
!
|
|
ov_fwd:
|
|
ldub [%o0], %o3 ! read from address
|
|
inc %o0 ! inc from address
|
|
stb %o3, [%o1] ! write to address
|
|
deccc %o2 ! dec count
|
|
bg ov_fwd ! loop till done
|
|
inc %o1 ! inc to address
|
|
|
|
retl ! return
|
|
nop
|
|
!
|
|
! Copy backwards.
|
|
!
|
|
ov_bkwd:
|
|
deccc %o2 ! dec count
|
|
ldub [%o0 + %o2], %o3 ! get byte at end of src
|
|
bg ov_bkwd ! loop till done
|
|
stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
|
|
|
|
retl ! return
|
|
nop
|
|
|
|
/*
|
|
* Zero a block of storage, returning an error code if we
|
|
* take a kernel pagefault which cannot be resolved.
|
|
* Returns errno value on pagefault error, 0 if all ok
|
|
*
|
|
* int
|
|
* kzero(addr, count)
|
|
* caddr_t addr;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(kzero)
|
|
save %sp, -SA(MINFRAME), %sp ! get another window
|
|
sethi %hi(zeroerr), %i2
|
|
b do_zero
|
|
or %i2, %lo(zeroerr), %i2
|
|
|
|
/*
|
|
* We got here because of a fault during kzero.
|
|
* Errno value is in %g1.
|
|
*/
|
|
zeroerr:
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %i5, [%g5+U_LOFAULT] ! restore old u.u_lofault
|
|
ret
|
|
restore %g1, 0, %o0
|
|
|
|
/*
|
|
* Zero a block of storage.
|
|
*
|
|
* bzero(addr, length)
|
|
* caddr_t addr;
|
|
* u_int count;
|
|
*/
|
|
ENTRY2(bzero,blkclr)
|
|
|
|
save %sp, -SA(MINFRAME), %sp ! get another window
|
|
mov 0, %i2
|
|
do_zero:
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
ld [%g5+U_LOFAULT], %i5 ! save u.u_lofault
|
|
cmp %i1, 15 ! check for small counts
|
|
bl byteclr ! just clear bytes
|
|
st %i2, [%g5+U_LOFAULT] ! install new vector
|
|
|
|
!
|
|
! Check for word alignment.
|
|
!
|
|
btst 3, %i0
|
|
bz bzero_probe
|
|
mov 0x100, %i3 ! constant size of main loop
|
|
!
|
|
!
|
|
! clear bytes until word aligned
|
|
!
|
|
1: clrb [%i0]
|
|
add %i0, 1, %i0
|
|
btst 3, %i0
|
|
bnz 1b
|
|
sub %i1, 1, %i1
|
|
!
|
|
! Word aligned.
|
|
bzero_probe:
|
|
#ifdef VME_D16
|
|
! Check page type to see if word/double access is
|
|
! allowed (i.e. not VME_D16|VME_D32)
|
|
! This assumes that the destination will not change to
|
|
! VME_DXX during the bzero. This will get a data fault with
|
|
! be_vmeserr set if unsuccessful.
|
|
!
|
|
ldub [%i0],%g0 ! make sure page is in memory
|
|
lda [%i0]ASI_PM, %g1 ! get src's pte
|
|
srl %g1, PGT_SHIFT, %g1 ! shift and mask for vme
|
|
andcc %g1, VME_D32|VME_D16, %g1
|
|
bz bzero_obmem ! not vme, then do obio
|
|
!
|
|
! check which vme space and jump accordingly
|
|
!
|
|
cmp %g1, VME_D16
|
|
be bzero_vme16
|
|
nop
|
|
b,a bzero_vme32
|
|
#endif VME_D16
|
|
#ifdef SUN4M_690
|
|
and %i0, MMU_PAGEMASK, %g1
|
|
or %g1, FT_ALL<<8, %g1
|
|
lda [%g1]ASI_PM, %i2 ! get src's pte
|
|
and %i2, 3, %i2
|
|
cmp %i2, MMU_ET_PTE
|
|
bne,a 1f ! if not mapped in,
|
|
ldub [%i0], %g0 ! force target page into memory
|
|
1:
|
|
lda [%g1]ASI_PM, %i2 ! get target PTE again.
|
|
srl %i2, 28, %i2 ! convert pte to space
|
|
|
|
! if target is vme D16, limit transfer width
|
|
! to 16 bits. (space=0xA or 0xC)
|
|
cmp %i2, 0xA ! from user vme-d16
|
|
be bzero_vme16
|
|
cmp %i2, 0xC ! from supv vme-d16
|
|
be bzero_vme16
|
|
|
|
! if target is vme D32, limit transfer width
|
|
! to 32 bits. (space=0xB or 0xD)
|
|
cmp %i2, 0xB ! from user vme-d32
|
|
be bzero_vme32
|
|
cmp %i2, 0xD ! from supv vme-d32
|
|
be bzero_vme32
|
|
nop
|
|
#endif
|
|
|
|
#ifdef BCOPY_BUF
|
|
!
|
|
! HW version of routine.
|
|
! obmem, if needed move a word to become double-word aligned.
|
|
!
|
|
bzero_obmem:
|
|
btst 7, %i0
|
|
bz bzero_bufchk ! is double aligned?
|
|
clr %g1 ! clr g1 for second half of double %g0
|
|
clr [%i0] ! clr to double boundry
|
|
sub %i1, 4, %i1 ! decrement count
|
|
add %i0, 4, %i0 ! increment address
|
|
/*
|
|
* The source and destination are now double-word aligned,
|
|
* see if the bcopy buffer is available.
|
|
*/
|
|
bzero_bufchk:
|
|
sethi %hi(_bcopy_res), %l2
|
|
ld [%l2 + %lo(_bcopy_res)], %i2 ! is the bcopy buffer available
|
|
tst %i2
|
|
bnz bzero_nobuf
|
|
|
|
#ifdef sun4m
|
|
sethi %hi(_ross625_flag), %l3
|
|
ld [%l3 + %lo(_ross625_flag)], %l3
|
|
tst %l3
|
|
bnz,a 0f
|
|
cmp %i1, BCOPY_LIMIT_625
|
|
#endif sun4m
|
|
cmp %i1, BCOPY_LIMIT ! see if it is worth while
|
|
0:
|
|
bl bzero_nobuf
|
|
!
|
|
! set _vac_linesize, %i4
|
|
! ld [%i4 + %lo(_vac_linesize)], %i4
|
|
! sub %i4, 1, %i3 ! create line mask
|
|
! andcc %i0, %i3, %g0
|
|
!
|
|
mov 32, %i4
|
|
andcc %i0, 0x1F, %g0 ! check if cache-line aligned
|
|
1: bz,a bzero_buf
|
|
ldstub [%l2 + %lo(_bcopy_res)], %i2 ! try to grab bcopy buffer
|
|
std %g0, [%i0]
|
|
add %i0, 8, %i0
|
|
sub %i1, 8, %i1 ! decrement count
|
|
b 1b
|
|
andcc %i0, 0x1F, %g0 ! check if aligned
|
|
|
|
bzero_buf:
|
|
tst %i2
|
|
bnz bzero_nobuf ! give up, hardware in use
|
|
.empty ! the following set is ok in delay slot
|
|
set _zeros, %g2
|
|
!
|
|
! Should only get here on sun4m when have viking/mxcc
|
|
! XXX--can play it safe and put in stuff to assure _zeros is page aligned
|
|
!
|
|
#ifdef sun4m
|
|
!
|
|
! spl here because dont want phys pages to be stolen from us until
|
|
! we have completed the copy.
|
|
!
|
|
tst %l3
|
|
bz not_hypersparc2
|
|
nop
|
|
! hyperSPARC is installed and we are aligned on a cache line
|
|
0:
|
|
stda %g0, [%i0]ASI_BF
|
|
sub %i1, BCPY_BLKSZ, %i1 ! update cnt
|
|
cmp %i1, BCPY_BLKSZ
|
|
bge 0b
|
|
add %i0, BCPY_BLKSZ, %i0 ! update src
|
|
|
|
sethi %hi(_bzero_cnt), %i2
|
|
sethi %hi(_bcopy_res), %l2
|
|
ld [%i2 + %lo(_bzero_cnt)], %i4
|
|
stb %g0, [%l2 + %lo(_bcopy_res)] ! interlock, unlock bcopy buffer
|
|
inc %i4
|
|
b byteclr
|
|
st %i4, [%i2 + %lo(_bzero_cnt)]
|
|
|
|
not_hypersparc2:
|
|
|
|
call _splhigh ! Watch register usage on calls!
|
|
ldub [%g2], %g0 ! Set R bit in src page
|
|
mov %o0, %l7 ! must save if make any other calls
|
|
3:
|
|
or %g2, FT_ALL<<8, %l1 ! _zeros should be page aligned
|
|
lda [%l1]ASI_PM, %l1 ! get phys addr
|
|
and %l1, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
ldub [%g2], %g0 ! force target page into memory
|
|
! know that space is obmem here
|
|
set MXCC_STRM_SRC, %l5 ! addr of stream source reg
|
|
mov 0x10, %l0 ! set cacheable bit in upper word
|
|
srl %l1, 8, %l1 ! get rid of non-page bits
|
|
sll %l1, MMU_PAGESHIFT, %l1 ! now have page aligned phys addr
|
|
stda %l0, [%l5]ASI_MXCC ! have finished stream read of zeros
|
|
set MXCC_STRM_DEST, %l5 ! addr of stream destination reg
|
|
|
|
stb %g0, [%i0] ! Set RM bits in dest page
|
|
3:
|
|
and %i0, MMU_PAGEMASK, %l1
|
|
or %l1, FT_ALL<<8, %l1
|
|
lda [%l1]ASI_PM, %l1 ! get phys addr
|
|
and %l1, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
stb %g0, [%i0] ! force target page into memory
|
|
|
|
srl %l1, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l1 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l0
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l0, %i3, %l0
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead.
|
|
nop
|
|
mov %g0, %l0 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l1, 8, %l1 ! remove non-page bits from pte
|
|
sll %l1, MMU_PAGESHIFT, %l1 ! phys page minus space bits
|
|
and %i0, MMU_PAGEOFFSET, %i3 ! page offset via virt addr
|
|
or %l1, %i3, %l1 ! phys addr complete
|
|
|
|
add %i3, %i1, %i2 ! will we cross page boundary?
|
|
set MMU_PAGESIZE, %i4 ! use for pagesize boundary check
|
|
cmp %i2, %i4 ! Yes, if %i3+%i1 > PAGESIZE
|
|
ble,a hw_fastbzero ! No. Can copy all bytes and exit.
|
|
mov %i1, %i4 ! %i1 has the requested transfer size
|
|
|
|
! if yes, loop on page copies
|
|
sub %i4, %i3, %i4 ! round transfer to PAGESIZE
|
|
b hw_pagezero ! copy bytes in leading page
|
|
sub %i1, %i4, %i1 ! decrease by what we will copy
|
|
|
|
hw_bzeroloop:
|
|
mov %l7, %o0
|
|
call _splx
|
|
mov 0x10, %l0
|
|
|
|
set BCPY_BLKSZ, %i4
|
|
cmp %i1, %i4
|
|
bl 9f
|
|
nop
|
|
|
|
call _splhigh
|
|
stb %g0, [%i0] ! set RM bits on dest page
|
|
mov %o0, %l7 ! save return value of splhigh
|
|
3:
|
|
mov %i0, %l1
|
|
or %l1, FT_ALL<<8, %l1
|
|
lda [%l1]ASI_PM, %l1 ! get phys addr
|
|
and %l1, 3, %i3
|
|
cmp %i3, MMU_ET_PTE
|
|
bne,a 3b ! if not mapped in,
|
|
stb %g0, [%i0] ! force target page into memory
|
|
|
|
srl %l1, 28, %i3 ! check space
|
|
tst %i3
|
|
bz 1f ! Obmem page
|
|
btst PTE_CACHEABLEMASK, %l1 ! Double check, page marked cacheable?
|
|
bz,a 2f ! Not obmem, and not cacheable
|
|
mov %i3, %l0
|
|
|
|
b 2f ! Not obmem, but cacheable
|
|
or %l0, %i3, %l0
|
|
1:
|
|
bnz 2f ! Yes, obmem and cacheable, go ahead.
|
|
nop
|
|
mov %g0, %l0 ! No, clear cacheable bit.
|
|
2:
|
|
srl %l1, 8, %l1 ! remove non-page bits from pte
|
|
sll %l1, MMU_PAGESHIFT, %l1 ! phys page minus space bits
|
|
! We should be on page boundary
|
|
set MMU_PAGESIZE, %i4
|
|
cmp %i1, %i4 ! Do we have more than one page left?
|
|
bg,a hw_pagezero ! Yes, call hw_pagezero to loop
|
|
sub %i1, %i4, %i1 ! decrease by what we will copy
|
|
! Always copy PAGESIZE here
|
|
b hw_fastbzero ! No, can call hw_fastbzero to exit
|
|
mov %i1, %i4 ! End of transfer <= PAGESIZE
|
|
|
|
hw_fastbzero: ! use for < PAGESIZE copies.
|
|
stda %l0, [%l5]ASI_MXCC
|
|
dec BCPY_BLKSZ, %i4 ! decrement count
|
|
inc BCPY_BLKSZ, %i0 ! increment address
|
|
cmp %i4, BCPY_BLKSZ ! still got more than BCPY_BLKSZ to go?
|
|
bl,a hw_bzeroexit ! No, done with HW
|
|
mov %i4, %i1 ! what we need byteclr to finish up
|
|
b hw_fastbzero ! loop until finished
|
|
inc BCPY_BLKSZ, %l1 ! increment to next sublock
|
|
|
|
hw_pagezero:
|
|
stda %l0, [%l5]ASI_MXCC
|
|
deccc BCPY_BLKSZ, %i4 ! decrement count
|
|
bz hw_bzeroloop ! done
|
|
inc BCPY_BLKSZ, %i0 ! increment virtual address
|
|
b hw_pagezero ! more to do
|
|
inc BCPY_BLKSZ, %l1 ! increment physaddr one sublock
|
|
|
|
hw_bzeroexit:
|
|
mov %l7, %o0
|
|
call _splx
|
|
.empty ! Silence complaint about label
|
|
!
|
|
! Check Error Register to see if AE bit set, indicating problem with
|
|
! stream operation we just did. Fatal, so we go down if a problem
|
|
! is found.
|
|
!
|
|
9:
|
|
sethi %hi(_bzero_cnt), %i2 ! count # times used buffer
|
|
|
|
chk_dst_zero:
|
|
set MXCC_STRM_DEST, %l4 ! addr of stream dest reg
|
|
ldda [%l4]ASI_MXCC, %l4
|
|
cmp %l4, %g0
|
|
bge chk_dst_zero
|
|
nop
|
|
|
|
set MXCC_ERROR, %l7 ! Load addr of MXCC err reg
|
|
ldda [%l7]ASI_MXCC, %l4 ! %l5 hold bits 0-31 of paddr
|
|
set MXCC_ERR_AE, %i4
|
|
btst %i4, %l4 ! %l4 hold the status bits
|
|
bz 1f ! if not set continue exiting
|
|
ld [%i2 + %lo(_bzero_cnt)], %i4
|
|
|
|
set MXCC_ERR_EV, %o4 ! If error bit set....
|
|
btst %o4, %l4 ! check if valid bit set
|
|
bnz 6f
|
|
.empty ! Silence complaint about label
|
|
1:
|
|
stb %g0, [%l2 + %lo(_bcopy_res)] ! interlock, unlock bcopy buffer
|
|
inc %i4
|
|
b byteclr
|
|
st %i4, [%i2 + %lo(_bzero_cnt)]
|
|
6:
|
|
set 0f, %o0
|
|
call _panic
|
|
nop
|
|
0:
|
|
.asciz "bzero stream operation failed"
|
|
.align 4
|
|
|
|
#else sun4m
|
|
|
|
lda [%g2]ASI_BC, %g0 ! load bcopy buffer with zeros
|
|
3: sta %g0, [%i0]ASI_BC ! zero a line
|
|
sub %i1, %i4, %i1 ! decrement count
|
|
cmp %i1, %i4 ! check if done
|
|
bge 3b ! loop until done
|
|
add %i0, %i4, %i0 ! increment address
|
|
|
|
sethi %hi(_bzero_cnt), %g2 ! count # times used buffer
|
|
ld [%g2 + %lo(_bzero_cnt)], %i4
|
|
stb %g0, [%l2 + %lo(_bcopy_res)] ! interlock, unlock bcopy buffer
|
|
inc %i4
|
|
b byteclr
|
|
st %i4, [%g2 + %lo(_bzero_cnt)]
|
|
|
|
#endif sun4m
|
|
!
|
|
! Software version.
|
|
! obmem, if needed move a word to become double-word aligned.
|
|
!
|
|
bzero_swobmem:
|
|
! We do the btst in the delay slot on branch here.
|
|
#else BCOPY_BUF
|
|
|
|
bzero_obmem:
|
|
btst 7, %i0 ! is double aligned?
|
|
#endif BCOPY_BUF
|
|
bz bzero_nobuf
|
|
clr %g1 ! clr g1 for second half of double %g0
|
|
clr [%i0] ! clr to double boundry
|
|
sub %i1, 4, %i1 ! decrement count
|
|
b bzero_nobuf
|
|
add %i0, 4, %i0 ! increment address
|
|
|
|
!!! std %g0, [%i0+0xf8] ! done below in delay slot
|
|
|
|
bzero_blk:
|
|
|
|
std %g0, [%i0+0xf0]
|
|
std %g0, [%i0+0xe8]
|
|
std %g0, [%i0+0xe0]
|
|
std %g0, [%i0+0xd8]
|
|
std %g0, [%i0+0xd0]
|
|
std %g0, [%i0+0xc8]
|
|
std %g0, [%i0+0xc0]
|
|
std %g0, [%i0+0xb8]
|
|
std %g0, [%i0+0xb0]
|
|
std %g0, [%i0+0xa8]
|
|
std %g0, [%i0+0xa0]
|
|
std %g0, [%i0+0x98]
|
|
std %g0, [%i0+0x90]
|
|
std %g0, [%i0+0x88]
|
|
std %g0, [%i0+0x80]
|
|
std %g0, [%i0+0x78]
|
|
std %g0, [%i0+0x70]
|
|
std %g0, [%i0+0x68]
|
|
std %g0, [%i0+0x60]
|
|
std %g0, [%i0+0x58]
|
|
std %g0, [%i0+0x50]
|
|
std %g0, [%i0+0x48]
|
|
std %g0, [%i0+0x40]
|
|
std %g0, [%i0+0x38]
|
|
std %g0, [%i0+0x30]
|
|
std %g0, [%i0+0x28]
|
|
std %g0, [%i0+0x20]
|
|
std %g0, [%i0+0x18]
|
|
std %g0, [%i0+0x10]
|
|
std %g0, [%i0+0x08]
|
|
std %g0, [%i0+0x00]
|
|
|
|
zinst:
|
|
add %i0, %i3, %i0 ! increment source address
|
|
sub %i1, %i3, %i1 ! decrement count
|
|
bzero_nobuf:
|
|
cmp %i1, 0x100 ! can we do whole chunk?
|
|
bge,a bzero_blk
|
|
std %g0, [%i0+0xf8] ! do first double of chunk
|
|
|
|
cmp %i1, 7 ! can we zero any more double words
|
|
ble byteclr ! too small, go zero bytes
|
|
andn %i1, 7, %i3 ! %i3 bytes left, double-word aligned
|
|
srl %i3, 1, %i2 ! using doubles, need 1 instr / 2 words
|
|
set zinst, %i4 ! address of clr instructions
|
|
sub %i4, %i2, %i4 ! jmp address relative to instr
|
|
jmp %i4
|
|
nop
|
|
!
|
|
! do leftover bytes
|
|
!
|
|
3:
|
|
add %i0, 1, %i0 ! increment address
|
|
byteclr:
|
|
subcc %i1, 1, %i1 ! decrement count
|
|
bge,a 3b
|
|
clrb [%i0] ! zero a byte
|
|
|
|
sethi %hi(_uunix), %g5 ! XXX - global u register?
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %i5, [%g5+U_LOFAULT] ! restore old u.u_lofault
|
|
ret
|
|
restore %g1, 0, %o0
|
|
|
|
#if defined(VME_D16) || defined(SUN4M_690)
|
|
/*
|
|
* bcopy_vme16(from, to, count)
|
|
* Block copy to/from a 16 bit vme device.
|
|
* There is little optimization because the VME is so slow.
|
|
*/
|
|
bcopy_vme16:
|
|
xor %i0, %i1, %i4 ! test for mutually half word aligned
|
|
btst 1, %i4
|
|
bnz bytecp ! misaligned, copy bytes
|
|
.empty
|
|
btst 1, %i0 ! test for initial byte
|
|
bz,a 1f
|
|
andn %i2, 1, %i3 ! count of aligned bytes to clear
|
|
|
|
ldub [%i0], %i4 ! copy initial byte
|
|
add %i0, 1, %i0
|
|
stb %i4, [%i1]
|
|
add %i1, 1, %i1
|
|
sub %i2, 1, %i2
|
|
andn %i2, 1, %i3 ! count of aligned bytes to clear
|
|
1:
|
|
and %i2, 1, %i2 ! unaligned leftover count
|
|
sub %i0, %i1, %i0 ! i0 gets the difference
|
|
2:
|
|
lduh [%i0+%i1], %i4 ! read from address
|
|
sth %i4, [%i1] ! write at destination address
|
|
subcc %i3, 2, %i3 ! dec count
|
|
bg 2b
|
|
add %i1, 2, %i1 ! delay slot, inc to address
|
|
|
|
b,a dbytecp ! copy remaining bytes, if any
|
|
|
|
/*
|
|
* bzero_vme16(addr, length)
|
|
* Zero a block of VME_D16 memory.
|
|
* There is little optimization because the VME is so slow.
|
|
* We come here word aligned
|
|
*/
|
|
bzero_vme16:
|
|
andn %i1, 1, %i3 ! count of aligned shorts to clear
|
|
and %i1, 1, %i1 ! unaligned leftover count
|
|
mov %i3, %i4
|
|
1: subcc %i3, 2, %i3 ! decrement count
|
|
bg 1b
|
|
clrh [%i0+%i3] ! zero short
|
|
|
|
b byteclr ! zero remaining bytes, if any
|
|
add %i0, %i4, %i0 ! increment address
|
|
|
|
/*
|
|
* bzero_vme32(addr, length)
|
|
* Zero a block of VME_D32 memory.
|
|
* There is little optimization because the VME is so slow.
|
|
* We come here word aligned
|
|
*/
|
|
bzero_vme32:
|
|
andn %i1, 3, %i3 ! count of aligned words to clear
|
|
and %i1, 3, %i1 ! unaligned leftover byte count
|
|
mov %i3, %i4
|
|
1:
|
|
subcc %i3, 4, %i3 ! decrement count
|
|
bg 1b
|
|
clr [%i0+%i3] ! zero word
|
|
|
|
b byteclr ! zero remaining bytes, if any
|
|
add %i0, %i4, %i0 ! increment address
|
|
#endif VME_D16 || defined(SUN4M_690)
|
|
|
|
/*
|
|
* Copy a null terminated string from one point to another in
|
|
* the kernel address space.
|
|
* NOTE - don't use %o5 in this routine as copy{in,out}str uses it.
|
|
*
|
|
* copystr(from, to, maxlength, lencopied)
|
|
* caddr_t from, to;
|
|
* u_int maxlength, *lencopied;
|
|
*/
|
|
ENTRY(copystr)
|
|
mov %o2, %o4 ! save original count
|
|
tst %o2
|
|
bg,a 1f
|
|
sub %o0, %o1, %o0 ! o0 gets the difference of src and dst
|
|
|
|
!
|
|
! maxlength <= 0
|
|
!
|
|
bz cs_out ! maxlength = 0
|
|
mov ENAMETOOLONG, %o0
|
|
|
|
retl ! maxlength < 0
|
|
mov EFAULT, %o0 ! return (EFAULT)
|
|
|
|
!
|
|
! Do a byte by byte loop.
|
|
! We do this instead of a word by word copy because most strings
|
|
! are small and this takes a small number of cache lines.
|
|
!
|
|
0:
|
|
stb %g1, [%o1] ! store byte
|
|
tst %g1 ! null byte?
|
|
bnz 1f
|
|
add %o1, 1, %o1 ! incr dst addr
|
|
|
|
b cs_out ! last byte in string
|
|
mov 0, %o0 ! ret code = 0
|
|
1:
|
|
subcc %o2, 1, %o2 ! test count
|
|
bge,a 0b
|
|
ldub [%o0+%o1], %g1 ! delay slot, get source byte
|
|
|
|
mov 0, %o2 ! max number of bytes moved
|
|
mov ENAMETOOLONG, %o0 ! ret code = ENAMETOOLONG
|
|
cs_out:
|
|
tst %o3 ! want length?
|
|
bz 2f
|
|
.empty
|
|
sub %o4, %o2, %o4 ! compute length and store it
|
|
st %o4, [%o3]
|
|
2:
|
|
retl
|
|
nop ! return (ret code)
|
|
|
|
/*
|
|
* Transfer data to and from user space -
|
|
* Note that these routines can cause faults
|
|
* It is assumed that the kernel has nothing at
|
|
* less than KERNELBASE in the virtual address space.
|
|
*/
|
|
|
|
/*
|
|
* Copy kernel data to user space.
|
|
*
|
|
* int
|
|
* copyout(kaddr, uaddr, count)
|
|
* caddr_t kaddr, uaddr;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(copyout)
|
|
sethi %hi(KERNELBASE), %g1 ! test uaddr < KERNELBASE
|
|
cmp %o1, %g1
|
|
sethi %hi(copyioerr), %o3 ! copyioerr is lofault value
|
|
bleu do_copy ! common code
|
|
or %o3, %lo(copyioerr), %o3
|
|
|
|
retl ! return (EFAULT)
|
|
mov EFAULT, %o0
|
|
|
|
/*
|
|
* Copy user data to kernel space.
|
|
*
|
|
* int
|
|
* copyin(uaddr, kaddr, count)
|
|
* caddr_t uaddr, kaddr;
|
|
* u_int count;
|
|
*/
|
|
ENTRY(copyin)
|
|
sethi %hi(KERNELBASE), %g1 ! test uaddr < KERNELBASE
|
|
cmp %o0, %g1
|
|
sethi %hi(copyioerr), %o3 ! copyioerr is lofault value
|
|
bleu do_copy ! common code
|
|
or %o3, %lo(copyioerr), %o3
|
|
|
|
retl ! return (EFAULT)
|
|
mov EFAULT, %o0
|
|
|
|
/*
|
|
* We got here because of a fault during copy{in,out}.
|
|
* Errno value is in %g1.
|
|
*/
|
|
copyioerr:
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %l6, [%g5+U_LOFAULT] ! restore old u.u_lofault
|
|
ret
|
|
restore %g1, 0, %o0
|
|
|
|
/*
|
|
* Copy a null terminated string from the user address space into
|
|
* the kernel address space.
|
|
*
|
|
* copyinstr(uaddr, kaddr, maxlength, lencopied)
|
|
* caddr_t uaddr, kaddr;
|
|
* u_int maxlength, *lencopied;
|
|
*/
|
|
ENTRY(copyinstr)
|
|
sethi %hi(KERNELBASE), %g1 ! test uaddr < KERNELBASE
|
|
cmp %o0, %g1
|
|
bgeu copystrerr
|
|
mov %o7, %o5 ! save return address
|
|
cs_common:
|
|
set copystrerr, %g1
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
call _copystr
|
|
st %g1, [%g5+U_LOFAULT] ! catch faults
|
|
|
|
jmp %o5 + 8 ! return (results of copystr)
|
|
clr [%g5+U_LOFAULT] ! clear fault catcher
|
|
|
|
/*
|
|
* Copy a null terminated string from the kernel
|
|
* address space to the user address space.
|
|
*
|
|
* copyoutstr(kaddr, uaddr, maxlength, lencopied)
|
|
* caddr_t kaddr, uaddr;
|
|
* u_int maxlength, *lencopied;
|
|
*/
|
|
ENTRY(copyoutstr)
|
|
sethi %hi(KERNELBASE), %g1 ! test uaddr < KERNELBASE
|
|
cmp %o1, %g1
|
|
blu cs_common
|
|
mov %o7, %o5 ! save return address
|
|
! fall through
|
|
|
|
/*
|
|
* Fault while trying to move from or to user space.
|
|
* Set and return error code.
|
|
*/
|
|
copystrerr:
|
|
mov EFAULT, %o0
|
|
sethi %hi(_uunix), %g5 ! XXX - global u register?
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
jmp %o5 + 8 ! return (EFAULT)
|
|
clr [%g5+U_LOFAULT]
|
|
|
|
/*
|
|
* Fetch user (long) word.
|
|
*
|
|
* int
|
|
* fuword(addr)
|
|
* caddr_t addr;
|
|
*/
|
|
ENTRY2(fuword,fuiword)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
btst 0x3, %o0 ! test alignment
|
|
bne fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
ld [%o0], %o0 ! get the word
|
|
retl
|
|
clr [%g5+U_LOFAULT] ! clear u.u_lofault
|
|
|
|
/*
|
|
* Fetch user byte.
|
|
*
|
|
* int
|
|
* fubyte(addr)
|
|
* caddr_t addr;
|
|
*/
|
|
ENTRY2(fubyte,fuibyte)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
ldub [%o0], %o0 ! get the byte
|
|
retl
|
|
clr [%g5+U_LOFAULT] ! clear u.u_lofault
|
|
|
|
/*
|
|
* Set user (long) word.
|
|
*
|
|
* int
|
|
* suword(addr, value)
|
|
* caddr_t addr;
|
|
* int value;
|
|
*/
|
|
ENTRY2(suword,suiword)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
btst 0x3, %o0 ! test alignment
|
|
bne fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
b suret
|
|
st %o1, [%o0] ! set the word
|
|
|
|
/*
|
|
* Set user byte.
|
|
*
|
|
* int
|
|
* subyte(addr, value)
|
|
* caddr_t addr;
|
|
* int value;
|
|
*/
|
|
ENTRY2(subyte,suibyte)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
stb %o1, [%o0] ! set the byte
|
|
suret:
|
|
mov 0, %o0 ! indicate success
|
|
retl
|
|
clr [%g5+U_LOFAULT] ! clear u.u_lofault
|
|
|
|
/*
|
|
* Fetch user short (half) word.
|
|
*
|
|
* int
|
|
* fusword(addr)
|
|
* caddr_t addr;
|
|
*/
|
|
ENTRY(fusword)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
btst 0x1, %o0 ! test alignment
|
|
bne fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
lduh [%o0], %o0 ! get the half word
|
|
retl
|
|
clr [%g5+U_LOFAULT] ! clear u.u_lofault
|
|
|
|
/*
|
|
* Set user short word.
|
|
*
|
|
* int
|
|
* susword(addr, value)
|
|
* caddr_t addr;
|
|
* int value;
|
|
*/
|
|
ENTRY(susword)
|
|
sethi %hi(KERNELBASE), %o3 ! compare access addr to KERNELBASE
|
|
cmp %o0, %o3 ! if (KERNELBASE >= addr) error
|
|
bgeu fsuerr
|
|
btst 0x1, %o0 ! test alignment
|
|
bne fsuerr
|
|
.empty
|
|
set fsuerr, %o3 ! set u.u_lofault to catch any fault
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
st %o3, [%g5+U_LOFAULT]
|
|
b suret
|
|
sth %o1, [%o0] ! set the half word
|
|
|
|
fsuerr:
|
|
mov -1, %o0 ! return error
|
|
sethi %hi(_uunix), %g5
|
|
ld [%g5 + %lo(_uunix)], %g5
|
|
retl
|
|
clr [%g5+U_LOFAULT] ! clear u.u_lofault
|