Files
Arquivotheca.SunOS-4.1.3/sys/m68k/movc.s
seta75D 2e8a93c394 Init
2021-10-11 18:20:23 -03:00

665 lines
18 KiB
ArmAsm

.data
.asciz "@(#)movc.s 1.1 92/07/30 SMI"
.even
.text
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#include <machine/asm_linkage.h>
#include <machine/param.h>
#include "assym.s"
#define UNROLLB /* unroll byte copy loop */
#define UNROLLL /* unroll long copy loop */
#ifdef BCOPY
#include <machine/mmu.h>
#include <machine/pte.h>
.data
bcopys: .byte 0 | semaphore for bcopy
ctx_save: .byte 0
.even
.long 0, 0, 0, 0
blockz:
.long 0, 0, 0, 0
.text
#endif BCOPY
/*
* Stack offsets for {k/b}copy.
*/
#define count a6@(0x10)
#define to a6@(0xc)
#define from a6@(0x8)
#define slf a6@(-0x4) /* Saved u_lofault value */
#define lfv a6@(-0x8) /* Lofault vectors */
#define svd a6@(-0xc) /* Saved D2 */
kclfv: .long copyerr
#ifdef BCOPY
.long copysem
.long copyfault
#endif BCOPY
bclfv: .long 0
#ifdef BCOPY
.long 0
.long copyfault
#endif BCOPY
/***********************************************************************
* The "dbra" Comment: (16mar88 limes)
*
* The mc68k "dbra" instruction only decrements the last 16 bits of
* the counter; this has bitten assembly programmers around here
* several times. The problem is that, when the last sixteen bits are
* all zero, the dbra sets them to FFFF but does not borrow from the
* top. The simple and clear way to recover is to clear the bottom
* half and do the decrement again; if the counter is negative after
* this, then we are done. Remember, the count is signed ...
*
* Thus, a loop to move <d0> bytes looks like:
*
* jra 2f | enter loop
* 1: movb a0@+,a1@+ | 68010 loop mode
* 2: dbra d0,1b | up to 65536 loops
* clrw d0 | undo last decrement
* subql #1,d0 | 32 bit decrement
* jpl 1b | branch if not all done yet
*/
/*
* Simple tight loop for copying longs.
* Guarantees long word bus cycles for long word addresses.
* Useful when you can't safely use bcopy due to destination
* size/alignment constraints. (eg. FPA)
*
* Usage: lcopy (from, to, count)
* caddr_t from, to; u_int count;
* count is the number of longs to transfer.
*/
ENTRY(lcopy)
link a6,#0
movl count,d0
movl from,a0
movl to,a1
jra 2f
1: movl a0@+,a1@+
2: dbra d0,1b
clrw d0 | see "dbra comment" above
subql #1,d0 |
jpl 1b |
unlk a6
rts
/*
* Copy a block of storage, returning an error code if `from' or
* `to' takes a kernel pagefault which cannot be resolved.
*
* Usage: int kcopy(from, to, count);
* caddr_t from, to; u_int count;
* returns Exxx on pagefault error, 0 if all ok
*/
ENTRY(kcopy)
link a6,#-0x20 | Build stack frame
movl #kclfv,lfv | Save pointer to lofault vector
jra bcopy_getaddrs | Join common code
/*
* Copy a block of storage - must not overlap (from + len <= to)
* Usage: bcopy(from, to, count)
* caddr_t from, to; u_int count;
*/
ENTRY(bcopy)
link a6,#-0x20 | Build stack frame
movl #bclfv,lfv | Save pointer to lofault vector
bcopy_getaddrs:
movl d2,svd | %%% save d2
movl _uunix, a1
movl a1@(U_LOFAULT),slf | Save u_lofault
movl count,d0 | Get count
jle ret | Return if not positive
movl lfv,a0 | Initialize u_lofault from
movl a0@,a1@(U_LOFAULT) | appropriate lofault vector
movl from,a0 | Get other parameters
movl to,a1
#if !defined(mc68020) || defined(SUN3_E)
/*
* If from address odd, move one byte to try to make things even
*/
movw a0,d1 | from
btst #0,d1 | test for odd bit in from
jeq 0f | even, skip
movb a0@+,a1@+ | move a byte
subql #1,d0 | adjust count
/* If to address is odd now, we have to do byte moves */
0: movl a1,d1 | low bit one if mutual oddness
btst #0,d1 | test low bit
jne bytes | do it slow and easy
#else mc68020 && !SUN3_E
/*
* The mc68020 knows how to handle misaligned transfer. We still align
* the source to a longword for efficiency, but writing a misaligned
* long is faster than doing it byte-by-byte.
*
* However, as noted in bugid 1016954, doing this on a 3/E causes
* incoming "rlogin" sessions to wedge when full packets start
* getting sent over the net. Until we have time to track this
* problem down, require 68010-style alignment if we are supporting
* the Sun-3/E.
*/
0: movw a0,d1 | source addr
andl #3,d1 | check long alignment
jeq 1f | align OK, continue
movb a0@+,a1@+ | move a byte
subql #1,d0 | update count
jeq ret | copy may be complete
jmp 0b | check align again
1:
#endif mc68020 && !SUN3_E
/*
* All aligned - now move longs
*/
movl d0,d1 | save count
lsrl #2,d0 | get # longs
#ifdef BCOPY
/*
* WARNING - THIS CODE IS NOT REENTRANT
* Fast copy routine uses the Block Copy commands of a BCOPY machine.
* The Block Copy Read and Write commands copy BC_LINESIZE (16) bytes
* at a time and avoid the displacement of any valid data from the cache.
* We use Block Copy Read and Write until there are less than
* BC_LINESIZE bytes to move. Here, a0 = from, a1 = to, (a0 and a1 are
* in word boundary. d0 = count in long words, and d1 = count in bytes.
* Isn't worth doing Block Copy Read/Write if 2 * BC_LINESIZE >= byte count
* If we're running on the interrupt stack we can't use the bcopy hardware
* because we might've interrupted trap() while processing a bus error but
* before it had read the bus error register, and we might take a bus error
* fault while doing the bcopy, thus destroying the bus error register before
* the trap() routine that we interrupted had a chance to read it.
*/
tstl _bcenable | test if there is BCOPY
jeq outnr | bit is off, skip fast copy
cmpl #BC_MINBCOPY,d1
jlt outnr | 2*BC_LINESIZE >= byte count
cmpl #eintstack,sp | on interrupt stack?
jls outnr | yes, skip
moveml d2-d4/a2/a4,sp@ | save d2, d3, d4, and a2, a4
movl _uunix, a4 | a4 has current u
tas bcopys | only one person at a time
jne out | busy, skip fast copy
movl lfv,a2 | Update lofault
movl a2@(4),a4@(U_LOFAULT) | from common vector
#ifdef sun3x
movl #ICACHE_ENABLE+DCACHE_CLEAR+DCACHE_ENABLE,d2
movc d2,cacr | inline vac_flush
#endif
/* Use normal bcopy if a0 and a1 are not aligned according to line size */
movl a0,d2
andl #0xf,d2 | get byte residual of line size of a0
movl a1,d3
andl #0xf,d3 | get byte residual of line size of a1
cmpl d2,d3
jne done | not line size aligned
cmpl #0,d2 | skip word move if in 16 byte boundary
jeq 3f
/*
* a0 (from) and a1 (to) are aligned to each other in terms of line size (16).
* Move a word (2 bytes) at a time until byte residual of "from" is zero.
* (a0 and a1 are in word boundary when "fast" bcopy is entered.)
*/
movl #BC_LINESIZE,d2 | figure out number of words to move
subl d3,d2 | d2 = byte to move to align to 16
subl d2,d1 | update byte count
movl d1,d0 | updated long word count
lsrl #2,d0
lsrl #1,d2 | # of words to move in the residual
jra 2f | enter word move loop
1: movw a0@+,a1@+ | move a word in the residual part
2: dbra d2,1b | decr, br if >= 0
3:
movl lfv,a2 | install retry fault handler
movl a2@(8),a4@(U_LOFAULT)
movl d0,d2 | save long word count
movl a0,d3 | save to & from
movl a1,a2
lsrl #BC_LINE_SHIFT,d0 | # of lines to move
#ifndef sun3x
/* Set up Block Copy R/W commands to a0 (from) and a1 (to). */
movl a0,d4 | to set up Block Copy R/W command
andl #BC_BLOCK_OFF,d4 | mask off cmd bits
orl #BC_BLOCK_CPCMD,d4 | set up "from"
movl d4,a0
movl a1,d4 | to set up Block Copy R/W command
andl #BC_BLOCK_OFF,d4 | mask off cmd bits
orl #BC_BLOCK_CPCMD,d4 | set up "to"
movl d4,a1
#endif sun3x
/* Block Copy R/W loop, one line at a time. */
jra 5f | enter block copy loop
4: movsl a0@,d4 | Block Copy Read, d4 is dummy
movsl d4,a1@ | Block Copy Write, d4 is dummy
addl #BC_LINESIZE,a0 | update "from"
addl #BC_LINESIZE,a1 | update "to"
5: dbra d0,4b | decr, br if >= 0
clrw d0 | clear low
subql #1,d0 | borrow from high half if != 0
jpl 4b | loop if high half was not 0
#ifndef sun3x
/* Mask off Block Copy R/W commands in a0 (from) and a1 (to). */
movl a0,d4 | to mask off Block Copy R/W command
andl #BC_BLOCK_OFF,d4 | mask off "from"
movl d4,a0
movl a1,d4 | to mask off Block Copy R/W command
andl #BC_BLOCK_OFF,d4 | mask off "to"
movl d4,a1
#endif sun3x
/*
* Update long count in d0, since only the last two bits of d1 (byte count)
* are to be used and the last two bits of d1 are not affected by the Block
* Copy R/W, we don't have to update d1.
*/
movl d2,d0
andl #BC_LINE_RESIDU,d0 | update long word count
jra done
copyfault: | got here by faulting
movl d2,d0 | restore to,from & count
movl d3,a0
movl a2,a1
done:
clrb bcopys | reset semaphore
out:
movl lfv,a2 | Update lofault
movl a2@,a4@(U_LOFAULT) | one more time
moveml sp@,a2/a4/d2-d4 | restore a2,a4 and d2, d3 & d4
outnr:
#endif BCOPY
/* MOVE A BUNCH OF LONGS: d0 is number of longs, d1 is bytes;
* a0 is source, a1 is destination. only the last two bits of d1
* are useful.
*/
longs:
#ifdef UNROLLL
movl d0,d2
lsrl #4,d0 | loop once per 16 longs
andl #15,d2 | do the leftovers first
lsll #1,d2 | "movl a0@+,a1@+" is two bytes long
negl d2 | make it negative offset
addl #34,d2 | relative to end of loop
jmp pc@(0,d2) | dive into unrolled loop
6: movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
#else UNROLLL
jra 7f | enter long move loop
6: movl a0@+,a1@+ | move a long
#endif UNROLLL
7: dbra d0,6b | decr, br if >= 0
clrw d0 | clear low
subql #1,d0 | borrow from hi half, get F's in lower
jpl 6b | loop if top non-negative, still more
/* Now up to 3 bytes remain to be moved */
movl d1,d0 | restore count
andl #3,d0 | mod sizeof long
/* We have some bytes left over that could not be
* moved as longs; this could be due to alignment
* restrictions, or it could be up to three bytes
* of data remaining after a longword transfer.
*/
bytes:
#ifdef UNROLLB
movl d0,d1
lsrl #4,d0 | loop once per 16 bytes
andl #15,d1 | short bunch in first loop
lsll #1,d1 | "movb a0@+,a1@+" is two bytes long
negl d1 | make it negative offset
addl #0x22,d1 | from end of loop
jmp pc@(0,d1) | dive into unrolled loop
0: movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
movb a0@+,a1@+
#else UNROLLB
jra 1f
0: movb a0@+,a1@+ | loop mode byte moves
#endif UNROLLB
1: dbra d0,0b
clrw d0
subql #1,d0 | see "dbra" comment above
jpl 0b
ret: clrl d0 | ret code (not used for bcopy)
#ifdef BCOPY
jra copyerr | Join common code
/*
* We got here because of a fault during the middle of the block
* copy using the block copy hardware. Clear the semaphore and
* restore preserved registers we've appropriated.
*/
copysem:
clrb bcopys
moveml sp@,a2/a4/d2-d4
#endif BCOPY
/*
* If we got here because of fault failure (kcopy only) then
* an errno value should have already been loaded into d0 by
* trap code that returns here.
*/
copyerr:
movl _uunix, a1
movl slf,a1@(U_LOFAULT) | restore old lofault value
movl svd,d2 | restore D2 register
unlk a6
rts | all done
#undef count
#undef to
#undef from
#undef slf
#undef lfv
/* Block copy with possibly overlapped operands */
ENTRY(ovbcopy)
movl sp@(4),a0 | from
movl sp@(8),a1 | to
movl sp@(12),d0 | get count
jle 3f | return if not positive
cmpl a0,a1 | check direction of copy
jgt bwd | do it backwards
/* Here if from > to - copy bytes forward */
jra 2f
/* Loop mode byte copy */
1: movb a0@+,a1@+
2: dbra d0,1b
clrw d0
subql #1,d0 | see "dbra" comment above
jpl 1b
3:
rts
/* Here if from < to - copy bytes backwards */
bwd: addl d0,a0 | get end of from area
addl d0,a1 | get end of to area
jra 2f | enter loop
/* Loop mode byte copy */
1: movb a0@-,a1@-
2: dbra d0,1b
clrw d0
subql #1,d0 | see "dbra" comment above
jpl 1b
rts | all done
/*
* Stack offsets for {k/b}zero.
*/
#define count a6@(0xc)
#define base a6@(0x8)
#define oab a6@(0xb)
#define slf a6@(-0x4)
#define lfv a6@(-0x8)
kzlfv: .long zeroerr
#ifdef BCOPY
.long zerosem
.long zerofault
#endif BCOPY
bzlfv: .long 0
#ifdef BCOPY
.long 0
.long zerofault
#endif BCOPY
/*
* Zero a block of storage, returning an error code if we
* take a kernel pagefault which cannot be resolved.
*
* Usage: int kzero(base, count);
* caddr_t base; u_int count;
* returns Exxx on pagefault error, 0 if all ok
*/
ENTRY(kzero)
link a6,#-0x1c | Build stack frame
movl #kzlfv,lfv | Initialize lofault vector
jra zerostart | Join common code
/*
* Zero block of storage
* Usage: bzero(base, count);
* caddr_t base; u_int count;
*/
ENTRY2(bzero,blkclr)
link a6,#-0x1c | Build stack frame
movl #bzlfv,lfv | Initialize lofault vector
zerostart:
movl d2,svd | save working register
movl _uunix, a1
movl a1@(U_LOFAULT),slf | Save lofault
movl lfv,a0 | Get vector
movl a0@,a1@(U_LOFAULT) | and set initial value
movl base,a1 | address
movl count,d0 | length
clrl d1 | use zero register to avoid clr fetches
btst #0,oab | odd address?
jeq 1f | no, skip
movb d1,a1@+ | do one byte
subql #1,d0 | to adjust to even address
1: movl d0,a0 | save possibly adjusted count
lsrl #2,d0 | get count of longs
#ifdef BCOPY
/*
* WARNING - THIS CODE IS NOT REENTRANT
* Fast zero routine uses the Block Copy commands of a BCOPY machine.
* The Block Copy Read and Write commands copy BC_LINESIZE bytes
* at a time and avoid the displacement of any valid data from the cache.
* We use Block Copy Read and Write until there are less than
* BC_LINESIZE bytes to zero.
* d0 = count in long words, d1 = 0, a0 = saved count in bytes, a1 = to.
* Doesn't worth doing Block Copy Read/Write if 2 * BC_LINESIZE >= byte count
*/
tstl _bcenable | test if there is BCOPY
jeq zoutnr | bit is off, skip fast zero
cmpl #BC_MINBCOPY,a0
jlt zoutnr | 2*BC_LINESIZE >= byte count
moveml d2/d3/a2/a4,sp@ | save d2-d3 and a2, a4
movl _uunix, a4
tas bcopys | only one person at a time
jne zout | busy, skip fast copy
movl lfv,a2 | Update lofault to
movl a2@(4),a4@(U_LOFAULT) | clear semaphore
#ifdef sun3x
movl #ICACHE_ENABLE+DCACHE_CLEAR+DCACHE_ENABLE,d2
movc d2,cacr | inline vac_flush
#endif
/* Align a1 to line size if not already there; a1 is in word boundary */
movl a1,d2
andl #0xf,d2 | get byte residual of line size of a1
jeq 2f | aligned to line size (16) already
movl #BC_LINESIZE,d3 | figure out number of words to move
subl d2,d3 | d3 = bytes to clear to align a1 to 16
subl d3,a0 | update byte count
movl a0,d0 | update long count
lsrl #2,d0
lsrl #1,d3 | # of words to clear to align a1 to 16
jra 1f | enter word clear loop
0: movw d1,a1@+ | clear a word
1: dbra d3,0b | decr, br if >= 0
2:
movl lfv,a2 | install retry fault handler
movl a2@(8),a4@(U_LOFAULT)
movl d0,d2 | save long word count
movl a1,d1 | save to
lsrl #BC_LINE_SHIFT,d0 | # of lines to move
/*
* Set up Block Copy R/W commands. blockz points to the middle of 32 bytes
* of zeros. "from" (a2) is in 16 byte boundary and points to 16 bytes of
* zero's.
*/
lea blockz,a2 | get address of zero block
movl a2,d3 | set up "from"
#ifdef sun3x
andl #0xFFFFFFF0,d3 | mask off lower 16 bytes
#else
andl #0x0FFFFFF0,d3 | mask off lower 16 bytes and <31,28>
orl #BC_BLOCK_CPCMD,d3
#endif sun3x
movl d3,a2
movsl a2@,d3 | Block Copy Read, d3 is dummy
#ifndef sun3x
movl a1,d3 | to set up Block Copy R/W command
andl #BC_BLOCK_OFF,d3 | mask off cmd bits
orl #BC_BLOCK_CPCMD,d3 | set up "to"
movl d3,a1
#endif sun3x
/* Block Copy R/W loop to zero BC_LINESIZE bytes at a time. */
jra 7f | enter block copy loop
6: movsl d3,a1@ | Block Copy Write, d3 is dummy
addl #BC_LINESIZE,a1 | update "to"
7: dbra d0,6b | decr, br if >= 0
clrw d0 | clear low
subql #1,d0 | borrow from high half if != 0
jpl 6b | loop if high half was not 0
#ifndef sun3x
/* Mask off Block Copy Write command in a1 (to). */
movl a1,d3 | to mask off Block Copy R/W command
andl #BC_BLOCK_OFF,d3 | mask off "to"
movl d3,a1
#endif sun3x
/*
* Update long count in d0, since only the last two bits of a0 (byte count)
* are to be used and the last two bits of a0 are not affected by the Block
* Copy R/W, we don't have to update a0.
*/
movl d2,d0 | update long word count
andl #BC_LINE_RESIDU,d0
jra zdone
zerofault: | got here cause of a fault
movl d2,d0 | restore to & count
movl d1,a1
zdone:
clrb bcopys | reset semaphore
zout:
clrl d1 | reclear, used to save to
movl lfv,a2 | Reset lofault to
movl a2@,a4@(U_LOFAULT) | simple error return
moveml sp@,d2/d3/a2/a4 | restore d2, d3, and a2, a4
zoutnr:
#endif BCOPY
#ifdef UNROLLL
movl d0,d2
lsrl #4,d0 | loop once per 16 longs
andl #15,d2 | short bunch in first loop
lsll #1,d2 | "movl a0@+,a2@+" is two bytes long
negl d2 | make it negative offset
addl #0x22,d2 | from end of loop
jmp pc@(0,d2) | dive into unrolled loop
8: movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
movl d1,a1@+
#else UNROLLL
jra 9f | go to loop test
/* Here is the fast inner loop - loop mode on 68010 */
8: movl d1,a1@+ | store long
#endif UNROLLL
9: dbra d0,8b | decr count; br until done
clrw d0 | clear low
subql #1,d0 | borrow from hi half, get F's in lower
jpl 8b | loop if top non-negative, still more
/* Now up to 3 bytes remain to be cleared */
movl a0,d0 | restore count
btst #1,d0 | need a short word?
jeq 0f | no, skip
movw d1,a1@+ | do a short
0: btst #0,d0 | need another byte
jeq 1f | no, skip
movb d1,a1@+ | do a byte
1:
clrl d0 | ret code (used for kzero only)
#ifdef BCOPY
jra zeroerr | Join common code
/*
* We got here because of a fault failed. Clear semaphore and
* restore preserved registers.
*/
zerosem:
clrb bcopys
moveml sp@,d2/d3/a2/a4
#endif BCOPY
/*
* If we got here because of fault failure (kzero), the errno value
* should have already been loaded into d0 by trap code that returns here.
*/
zeroerr:
movl _uunix, a1
movl slf,a1@(U_LOFAULT) | restore old U_LOFAULT value
movl svd,d2 | restore working register
unlk a6
rts
#undef count
#undef base
#undef oac
#undef slf
#undef lfv