1
0
mirror of https://github.com/prirun/p50em.git synced 2026-02-26 08:13:56 +00:00

PCL fixes, more perf tweaks: shift, prtn, add16

changed EXPCL macro to set and clear bits - not just set
changed shift instructions - made CLEARCL conditional;  this
  means keys are only updated once per shift, not twice
fixed prtn to update stack free pointer after values fetched
  in case a fault occurs and prtn is restarted
changed prtn to invalidate brp cache only if ring changes
changed prtn to inline, to avoid register save/restore
changed argt to only update rp word offset in sf header
added hack to pcl to correct wrapped RP for cpu.pcl case 42
added cgt inline proc, used in 2 places; generates better code
  (store to utempa is avoided altogether)
added pimh (also used for pima) inline proc to avoid stores
inlined invalidate_brp
changed add16 implementation while looking at code generated
  ran faster according to Jeff's timers: 59/52 for old/new
This commit is contained in:
Jim
2007-09-15 00:00:00 -04:00
parent d99d16932d
commit 2fb2e1a348

170
em.c
View File

@@ -199,13 +199,11 @@ static void macheck (unsigned short p300vec, unsigned short chkvec, unsigned int
#define XCLEARC CLEARC
#define XSETC SETC
/* EXPCL sets both the C and L bits for shift instructions
NOTE: unlike EXPC, this doesn't clear anything - bits must be cleared
before executing these macros! */
/* EXPCL sets both the C and L bits for shift instructions */
#define EXPCL(onoff) \
if ((onoff)) crs[KEYS] |= 0120000
if ((onoff)) crs[KEYS] |= 0120000; \
else crs[KEYS] &= ~0120000
#define SETCL crs[KEYS] |= 0120000
#define CLEARCL crs[KEYS] &= ~0120000
@@ -549,11 +547,8 @@ static unsigned short physmem[MEMSIZE]; /* system's physical memory */
increment the whole thing.
DIAG cpu.pcl test 42 does check for segment wraparound, so -DFAST
will cause this test to fail.
Update: when cpuid=40 (6650), cpu.pcl test 42 *expects* 32-bit
increment on RP (segment gets incremented too)!
*/
will cause this test to fail (but, see hack in pcl which fixes it).
*/
#ifdef FAST
#define RPADD(n) (RP+n)
@@ -617,7 +612,7 @@ static struct {
/* invalidates all entries in the mapva supercache */
void invalidate_brp() {
void inline invalidate_brp() {
int i;
for (i=0; i < BRP_SIZE; i++)
@@ -2335,23 +2330,24 @@ static ea_t stex(unsigned int extsize) {
}
/* for PRTN, load values into temps first so that if any faults occur,
PRTN can be restarted
PRTN can be restarted. After all the temps are loaded, the stack
free pointer can be updated since no further faults can occur.
XXX: the order of this look wrong - stack free pointer shouldn't
be updated if a fault occurs fetching base registers
*/
If changing rings, make sure to invalidate the brp supercache. */
static void prtn() {
static inline void prtn() {
unsigned short stackrootseg;
ea_t newrp,newsb,newlb;
unsigned short keys;
stackrootseg = get16(*(unsigned int *)(crs+SB)+1);
put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0));
newrp = get32(*(unsigned int *)(crs+SB)+2);
newsb = get32(*(unsigned int *)(crs+SB)+4);
newlb = get32(*(unsigned int *)(crs+SB)+6);
keys = get16(*(unsigned int *)(crs+SB)+8);
put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0));
if ((newrp ^ RP) & RINGMASK32)
invalidate_brp();
RP = newrp | (RP & RINGMASK32);
*(unsigned int *)(crs+SB) = newsb;
*(unsigned int *)(crs+LB) = newlb;
@@ -2534,11 +2530,16 @@ static argt() {
advance Y to the next arg displacement in the stack. Y
has to be advanced last because the PB store may fault.
If it does, the ARGT starts over, and this argument will
have to be transferred again. */
have to be transferred again.
The full 32-bit rp is incremented, which is technically
wrong but faster, but because PCL DIAG 42 specifically
checks for segment wraparound, only update the 16-bit
word offset in the stack frame header. */
if (advancepb) {
rp += 2;
put32(rp, stackfp+2);
put16(rp & 0xffff, stackfp+3);
crs[XL] = lastarg;
}
if (advancey) {
@@ -2582,6 +2583,16 @@ static pcl (ea_t ecbea) {
}
#endif
/* this hack makes DIAG cpu.pcl happy: RP is only supposed to
have the 16-bit word offset increment, but we do 32-bits
for speed. If RPL == 0 during PCL, it means it used to be
segno/177776 and was incremented to segno+1/0. So fiddle
RP here to pass diags. In practice, RP wraparound is
just stupid and slow. */
if (RPL == 0) /* did RP wrap? */
RP -= (1<<16); /* yes, subtract 1 from seg # */
/* get segment access; mapva ensures either read or gate */
pa = mapva(ecbea, RP, PACC, &access);
@@ -3617,7 +3628,6 @@ static inline arfa(int n, int val) {
static inline unsigned int lrs(unsigned int val, short scount) {
CLEARCL;
if (scount <= 32) {
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
return (*(int *)&val) >> scount;
@@ -3625,13 +3635,13 @@ static inline unsigned int lrs(unsigned int val, short scount) {
SETCL;
return 0xFFFFFFFF;
} else
CLEARCL;
return 0;
}
static inline unsigned int lls(unsigned int val, short scount) {
int templ;
CLEARCL;
if (scount < 32) {
templ = 0x80000000;
templ = templ >> scount; /* create mask */
@@ -3647,21 +3657,21 @@ static inline unsigned int lls(unsigned int val, short scount) {
static inline unsigned int lll(unsigned int val, short scount) {
CLEARCL;
if (scount <= 32) {
EXPCL(val & (((unsigned int)0x80000000) >> (scount-1)));
return val << scount;
} else
CLEARCL;
return 0;
}
static inline unsigned int lrl(unsigned int val, short scount) {
CLEARCL;
if (scount <= 32) {
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
return val >> scount;
} else
CLEARCL;
return 0;
}
@@ -3669,22 +3679,22 @@ static inline unsigned int lrl(unsigned int val, short scount) {
static inline unsigned short arl (unsigned short val, short scount) {
CLEARCL;
if (scount <= 16) {
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
return val >> scount;
} else {
CLEARCL;
return 0;
}
}
static inline unsigned short all (unsigned short val, short scount) {
CLEARCL;
if (scount <= 16) {
EXPCL(val & (((unsigned short)0x8000) >> (scount-1)));
return val << scount;
} else {
CLEARCL;
return 0;
}
}
@@ -3693,7 +3703,6 @@ static inline unsigned short als (unsigned short val, short scount) {
short tempa;
CLEARCL;
if (scount <= 15) {
tempa = 0100000;
tempa = tempa >> scount; /* create mask */
@@ -3702,14 +3711,12 @@ static inline unsigned short als (unsigned short val, short scount) {
EXPCL(!(tempa == -1 || tempa == 0));
return val << scount;
}
if (val != 0)
SETCL;
EXPCL(val != 0);
return 0;
}
static inline unsigned short ars (unsigned short val, short scount) {
CLEARCL;
if (scount <= 16) {
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
return (*(short *)&val) >> scount;
@@ -3717,6 +3724,7 @@ static inline unsigned short ars (unsigned short val, short scount) {
SETCL;
return 0xFFFF;
} else
CLEARCL;
return 0;
}
@@ -3724,7 +3732,6 @@ static inline unsigned short ars (unsigned short val, short scount) {
static inline unsigned int lrr(unsigned int val, short scount) {
CLEARCL;
scount = ((scount-1)%32)+1; /* make scount 1-32 */
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
return (val >> scount) | (val << (32-scount));
@@ -3732,7 +3739,6 @@ static inline unsigned int lrr(unsigned int val, short scount) {
static inline unsigned int llr(unsigned int val, short scount) {
CLEARCL;
scount = ((scount-1)%32)+1; /* make scount 1-32 */
EXPCL(val & (((unsigned int)0x80000000) >> (scount-1)));
return (val << scount) | (val >> (32-scount));
@@ -3742,7 +3748,6 @@ static inline unsigned int llr(unsigned int val, short scount) {
static inline unsigned int alr(unsigned short val, short scount) {
CLEARCL;
scount = ((scount-1)%16)+1; /* make scount 1-16 */
EXPCL(val & (((unsigned short)0x8000) >> (scount-1)));
return (val << scount) | (val >> (16-scount));
@@ -3750,7 +3755,6 @@ static inline unsigned int alr(unsigned short val, short scount) {
static inline unsigned int arr(unsigned short val, short scount) {
CLEARCL;
scount = ((scount-1)%16)+1; /* make scount 1-16 */
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
return (val >> scount) | (val << (16-scount));
@@ -3824,36 +3828,31 @@ static int add32(unsigned int *a1, unsigned int a2, unsigned int a3, ea_t ea) {
static int add16(unsigned short *a1, unsigned short a2, unsigned short a3, ea_t ea) {
unsigned short uorig, uresult;
unsigned int utemp;
short link, eq, lt;
unsigned short uorig;
unsigned int uresult;
int keybits, oflow;
stopwatch_push(&sw_add16);
crs[KEYS] &= ~0120300;
link = eq = lt = 0;
uorig = *a1; /* save original for sign check */
utemp = uorig; /* expand to higher precision */
utemp += a2; /* double-precision add */
utemp += a3; /* again, for subtract */
uresult = utemp; /* truncate result to result size */
uresult = uorig; /* expand to higher precision */
uresult += a2; /* double-precision add */
uresult += a3; /* again, for subtract */
keybits = (uresult & 0x10000) >> 3; /* set L-bit if carry occurred */
uresult &= 0xFFFF; /* truncate result */
*a1 = uresult; /* store result */
if (utemp & 0x10000) /* set L-bit if carry occurred */
link = 020000;
if (uresult == 0) /* set EQ? */
eq = 0100;
if (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) == 0) { /* no overflow */
if (*(int *)&uresult < 0)
lt = 0200;
crs[KEYS] = crs[KEYS] | link | eq | lt;
} else {
if (*(int *)&uresult >= 0)
lt = 0200;
crs[KEYS] = crs[KEYS] | link | eq | lt;
keybits |= 0100;
oflow = (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) != 0); /* overflow! */
if (oflow)
uresult = ~uresult;
keybits |= (uresult & 0x8000) >> 8; /* set LT if result negative */
crs[KEYS] = crs[KEYS] & ~0120300 | keybits;
if (oflow)
mathexception('i', FC_INT_OFLOW, ea);
}
stopwatch_pop(&sw_add16);
}
static inline adlr(int dr) {
if (crs[KEYS] & 020000)
@@ -3864,6 +3863,33 @@ static inline adlr(int dr) {
}
}
static inline cgt(unsigned short n) {
unsigned short utempa;
utempa = iget16(RP); /* get number of words */
if (1 <= n && n < utempa)
RPL = iget16(RPADD(n));
else
RP = RPADD(utempa);
}
static inline pimh(int dr) {
int templ, templ2;
templ = crsl[dr];
/* NOTE: PIMH could be implemented as a left shift, but Prime DIAG
tests require a swap - hence the "or" below */
crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16);
/* check that bits 1-16 were equal to bit 17 before PIMH */
templ2 = (templ << 16) >> 16;
if (templ2 == templ)
CLEARC;
else
mathexception('i', FC_INT_OFLOW, 0);
}
/* NOTE: PMA manuals say the range for absolute RF addressing is
0-'377, but this does not allow addressing a machine with 8 user
register sets. The range should probably be an emulator config
@@ -4731,11 +4757,7 @@ d_iab: /* 000201 */
d_cgt: /* 001314 */
TRACE(T_FLOW, " CGT\n");
utempa = iget16(RP); /* get number of words */
if (1 <= crs[A] && crs[A] < utempa)
RPL = iget16(RPADD(crs[A]));
else
RP = RPADD(utempa);
cgt(crs[A]);
goto fetch;
d_pida: /* 000115 */
@@ -4753,13 +4775,7 @@ d_pidl: /* 000305 */
d_pima: /* 000015 */
TRACE(T_FLOW, " PIMA\n");
templ = *(int *)(crsl+GR2);
crsl[GR2] = (crsl[GR2] << 16) | (crsl[GR2] >> 16);
templ2 = (templ << 16) >> 16;
if (templ != templ2)
mathexception('i', FC_INT_OFLOW, 0);
else
CLEARC;
pimh(GR2);
goto fetch;
d_piml: /* 000301 */
@@ -6753,7 +6769,6 @@ d_gen1:
if (crs[KEYS] & 010000) { /* V/I mode */
crsl[GR2] = lrs(crsl[GR2], scount);
} else {
CLEARCL;
utempa = crs[B] & 0x8000; /* save B bit 1 */
if (scount <= 31) {
templ = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1);
@@ -6765,6 +6780,7 @@ d_gen1:
*(int *)(crs+A) = 0xFFFF7FFF | utempa;
SETCL;
} else {
CLEARCL;
*(int *)(crs+A) = utempa;
}
}
@@ -6822,7 +6838,6 @@ d_gen1:
if (crs[KEYS] & 010000) /* V/I mode */
crsl[GR2] = lls(crsl[GR2], scount);
else {
CLEARCL;
utempa = crs[B] & 0x8000; /* save B bit 1 */
if (scount < 31) {
utempl = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1);
@@ -7242,11 +7257,7 @@ imode:
case 0026:
TRACE(T_FLOW, " CGT\n");
utempa = iget16(RP); /* get number of words */
if (1 <= crs[dr*2] && crs[dr*2] < utempa)
RPL = iget16(INCVA(RP,crs[dr*2]));
else
RPL += utempa;
cgt(crs[dr*2]);
break;
case 0040:
@@ -7691,16 +7702,7 @@ imode:
case 0051:
TRACE(T_FLOW, " PIMH\n");
templ = crsl[dr];
/* NOTE: PIMH could be implemented as a left shift, but Prime DIAG
tests require a swap - hence the "or" below */
crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16);
/* check that bits 1-16 were equal to bit 17 before PIMH */
templ2 = (templ << 16) >> 16;
if (templ2 != templ)
mathexception('i', FC_INT_OFLOW, 0);
else
CLEARC;
pimh(dr);
break;
case 0133:
@@ -8522,7 +8524,7 @@ imode:
case 1:
imodepcl:
stopwatch_push(&sw_pcl);
TRACE(T_FLOW|T_PCL, "#%d %o/%o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e'));
TRACE(T_FLOW|T_PCL, "#%d %o/%:0o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e'));
if (gvp->numtraceprocs > 0 && TRACEUSER)
for (i=0; i<gvp->numtraceprocs; i++)
if (traceprocs[i].ecb == (ea & 0xFFFFFFF) && traceprocs[i].sb == -1) {