mirror of
https://github.com/prirun/p50em.git
synced 2026-02-26 08:13:56 +00:00
PCL fixes, more perf tweaks: shift, prtn, add16
changed EXPCL macro to set and clear bits - not just set changed shift instructions - made CLEARCL conditional; this means keys are only updated once per shift, not twice fixed prtn to update stack free pointer after values fetched in case a fault occurs and prtn is restarted changed prtn to invalidate brp cache only if ring changes changed prtn to inline, to avoid register save/restore changed argt to only update rp word offset in sf header added hack to pcl to correct wrapped RP for cpu.pcl case 42 added cgt inline proc, used in 2 places; generates better code (store to utempa is avoided altogether) added pimh (also used for pima) inline proc to avoid stores inlined invalidate_brp changed add16 implementation while looking at code generated ran faster according to Jeff's timers: 59/52 for old/new
This commit is contained in:
170
em.c
170
em.c
@@ -199,13 +199,11 @@ static void macheck (unsigned short p300vec, unsigned short chkvec, unsigned int
|
||||
#define XCLEARC CLEARC
|
||||
#define XSETC SETC
|
||||
|
||||
/* EXPCL sets both the C and L bits for shift instructions
|
||||
|
||||
NOTE: unlike EXPC, this doesn't clear anything - bits must be cleared
|
||||
before executing these macros! */
|
||||
/* EXPCL sets both the C and L bits for shift instructions */
|
||||
|
||||
#define EXPCL(onoff) \
|
||||
if ((onoff)) crs[KEYS] |= 0120000
|
||||
if ((onoff)) crs[KEYS] |= 0120000; \
|
||||
else crs[KEYS] &= ~0120000
|
||||
|
||||
#define SETCL crs[KEYS] |= 0120000
|
||||
#define CLEARCL crs[KEYS] &= ~0120000
|
||||
@@ -549,11 +547,8 @@ static unsigned short physmem[MEMSIZE]; /* system's physical memory */
|
||||
increment the whole thing.
|
||||
|
||||
DIAG cpu.pcl test 42 does check for segment wraparound, so -DFAST
|
||||
will cause this test to fail.
|
||||
|
||||
Update: when cpuid=40 (6650), cpu.pcl test 42 *expects* 32-bit
|
||||
increment on RP (segment gets incremented too)!
|
||||
*/
|
||||
will cause this test to fail (but, see hack in pcl which fixes it).
|
||||
*/
|
||||
|
||||
#ifdef FAST
|
||||
#define RPADD(n) (RP+n)
|
||||
@@ -617,7 +612,7 @@ static struct {
|
||||
|
||||
/* invalidates all entries in the mapva supercache */
|
||||
|
||||
void invalidate_brp() {
|
||||
void inline invalidate_brp() {
|
||||
int i;
|
||||
|
||||
for (i=0; i < BRP_SIZE; i++)
|
||||
@@ -2335,23 +2330,24 @@ static ea_t stex(unsigned int extsize) {
|
||||
}
|
||||
|
||||
/* for PRTN, load values into temps first so that if any faults occur,
|
||||
PRTN can be restarted
|
||||
PRTN can be restarted. After all the temps are loaded, the stack
|
||||
free pointer can be updated since no further faults can occur.
|
||||
|
||||
XXX: the order of this look wrong - stack free pointer shouldn't
|
||||
be updated if a fault occurs fetching base registers
|
||||
*/
|
||||
If changing rings, make sure to invalidate the brp supercache. */
|
||||
|
||||
static void prtn() {
|
||||
static inline void prtn() {
|
||||
unsigned short stackrootseg;
|
||||
ea_t newrp,newsb,newlb;
|
||||
unsigned short keys;
|
||||
|
||||
stackrootseg = get16(*(unsigned int *)(crs+SB)+1);
|
||||
put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0));
|
||||
newrp = get32(*(unsigned int *)(crs+SB)+2);
|
||||
newsb = get32(*(unsigned int *)(crs+SB)+4);
|
||||
newlb = get32(*(unsigned int *)(crs+SB)+6);
|
||||
keys = get16(*(unsigned int *)(crs+SB)+8);
|
||||
put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0));
|
||||
if ((newrp ^ RP) & RINGMASK32)
|
||||
invalidate_brp();
|
||||
RP = newrp | (RP & RINGMASK32);
|
||||
*(unsigned int *)(crs+SB) = newsb;
|
||||
*(unsigned int *)(crs+LB) = newlb;
|
||||
@@ -2534,11 +2530,16 @@ static argt() {
|
||||
advance Y to the next arg displacement in the stack. Y
|
||||
has to be advanced last because the PB store may fault.
|
||||
If it does, the ARGT starts over, and this argument will
|
||||
have to be transferred again. */
|
||||
have to be transferred again.
|
||||
|
||||
The full 32-bit rp is incremented, which is technically
|
||||
wrong but faster, but because PCL DIAG 42 specifically
|
||||
checks for segment wraparound, only update the 16-bit
|
||||
word offset in the stack frame header. */
|
||||
|
||||
if (advancepb) {
|
||||
rp += 2;
|
||||
put32(rp, stackfp+2);
|
||||
put16(rp & 0xffff, stackfp+3);
|
||||
crs[XL] = lastarg;
|
||||
}
|
||||
if (advancey) {
|
||||
@@ -2582,6 +2583,16 @@ static pcl (ea_t ecbea) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* this hack makes DIAG cpu.pcl happy: RP is only supposed to
|
||||
have the 16-bit word offset increment, but we do 32-bits
|
||||
for speed. If RPL == 0 during PCL, it means it used to be
|
||||
segno/177776 and was incremented to segno+1/0. So fiddle
|
||||
RP here to pass diags. In practice, RP wraparound is
|
||||
just stupid and slow. */
|
||||
|
||||
if (RPL == 0) /* did RP wrap? */
|
||||
RP -= (1<<16); /* yes, subtract 1 from seg # */
|
||||
|
||||
/* get segment access; mapva ensures either read or gate */
|
||||
|
||||
pa = mapva(ecbea, RP, PACC, &access);
|
||||
@@ -3617,7 +3628,6 @@ static inline arfa(int n, int val) {
|
||||
|
||||
static inline unsigned int lrs(unsigned int val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 32) {
|
||||
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
|
||||
return (*(int *)&val) >> scount;
|
||||
@@ -3625,13 +3635,13 @@ static inline unsigned int lrs(unsigned int val, short scount) {
|
||||
SETCL;
|
||||
return 0xFFFFFFFF;
|
||||
} else
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int lls(unsigned int val, short scount) {
|
||||
int templ;
|
||||
|
||||
CLEARCL;
|
||||
if (scount < 32) {
|
||||
templ = 0x80000000;
|
||||
templ = templ >> scount; /* create mask */
|
||||
@@ -3647,21 +3657,21 @@ static inline unsigned int lls(unsigned int val, short scount) {
|
||||
|
||||
static inline unsigned int lll(unsigned int val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 32) {
|
||||
EXPCL(val & (((unsigned int)0x80000000) >> (scount-1)));
|
||||
return val << scount;
|
||||
} else
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int lrl(unsigned int val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 32) {
|
||||
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
|
||||
return val >> scount;
|
||||
} else
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3669,22 +3679,22 @@ static inline unsigned int lrl(unsigned int val, short scount) {
|
||||
|
||||
static inline unsigned short arl (unsigned short val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 16) {
|
||||
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
|
||||
return val >> scount;
|
||||
} else {
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned short all (unsigned short val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 16) {
|
||||
EXPCL(val & (((unsigned short)0x8000) >> (scount-1)));
|
||||
return val << scount;
|
||||
} else {
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -3693,7 +3703,6 @@ static inline unsigned short als (unsigned short val, short scount) {
|
||||
|
||||
short tempa;
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 15) {
|
||||
tempa = 0100000;
|
||||
tempa = tempa >> scount; /* create mask */
|
||||
@@ -3702,14 +3711,12 @@ static inline unsigned short als (unsigned short val, short scount) {
|
||||
EXPCL(!(tempa == -1 || tempa == 0));
|
||||
return val << scount;
|
||||
}
|
||||
if (val != 0)
|
||||
SETCL;
|
||||
EXPCL(val != 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned short ars (unsigned short val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
if (scount <= 16) {
|
||||
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
|
||||
return (*(short *)&val) >> scount;
|
||||
@@ -3717,6 +3724,7 @@ static inline unsigned short ars (unsigned short val, short scount) {
|
||||
SETCL;
|
||||
return 0xFFFF;
|
||||
} else
|
||||
CLEARCL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3724,7 +3732,6 @@ static inline unsigned short ars (unsigned short val, short scount) {
|
||||
|
||||
static inline unsigned int lrr(unsigned int val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
scount = ((scount-1)%32)+1; /* make scount 1-32 */
|
||||
EXPCL(val & (((unsigned int)0x80000000) >> (32-scount)));
|
||||
return (val >> scount) | (val << (32-scount));
|
||||
@@ -3732,7 +3739,6 @@ static inline unsigned int lrr(unsigned int val, short scount) {
|
||||
|
||||
static inline unsigned int llr(unsigned int val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
scount = ((scount-1)%32)+1; /* make scount 1-32 */
|
||||
EXPCL(val & (((unsigned int)0x80000000) >> (scount-1)));
|
||||
return (val << scount) | (val >> (32-scount));
|
||||
@@ -3742,7 +3748,6 @@ static inline unsigned int llr(unsigned int val, short scount) {
|
||||
|
||||
static inline unsigned int alr(unsigned short val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
scount = ((scount-1)%16)+1; /* make scount 1-16 */
|
||||
EXPCL(val & (((unsigned short)0x8000) >> (scount-1)));
|
||||
return (val << scount) | (val >> (16-scount));
|
||||
@@ -3750,7 +3755,6 @@ static inline unsigned int alr(unsigned short val, short scount) {
|
||||
|
||||
static inline unsigned int arr(unsigned short val, short scount) {
|
||||
|
||||
CLEARCL;
|
||||
scount = ((scount-1)%16)+1; /* make scount 1-16 */
|
||||
EXPCL(val & (((unsigned short)0x8000) >> (16-scount)));
|
||||
return (val >> scount) | (val << (16-scount));
|
||||
@@ -3824,36 +3828,31 @@ static int add32(unsigned int *a1, unsigned int a2, unsigned int a3, ea_t ea) {
|
||||
|
||||
static int add16(unsigned short *a1, unsigned short a2, unsigned short a3, ea_t ea) {
|
||||
|
||||
unsigned short uorig, uresult;
|
||||
unsigned int utemp;
|
||||
short link, eq, lt;
|
||||
unsigned short uorig;
|
||||
unsigned int uresult;
|
||||
int keybits, oflow;
|
||||
|
||||
stopwatch_push(&sw_add16);
|
||||
crs[KEYS] &= ~0120300;
|
||||
link = eq = lt = 0;
|
||||
uorig = *a1; /* save original for sign check */
|
||||
utemp = uorig; /* expand to higher precision */
|
||||
utemp += a2; /* double-precision add */
|
||||
utemp += a3; /* again, for subtract */
|
||||
uresult = utemp; /* truncate result to result size */
|
||||
uresult = uorig; /* expand to higher precision */
|
||||
uresult += a2; /* double-precision add */
|
||||
uresult += a3; /* again, for subtract */
|
||||
keybits = (uresult & 0x10000) >> 3; /* set L-bit if carry occurred */
|
||||
uresult &= 0xFFFF; /* truncate result */
|
||||
*a1 = uresult; /* store result */
|
||||
if (utemp & 0x10000) /* set L-bit if carry occurred */
|
||||
link = 020000;
|
||||
if (uresult == 0) /* set EQ? */
|
||||
eq = 0100;
|
||||
if (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) == 0) { /* no overflow */
|
||||
if (*(int *)&uresult < 0)
|
||||
lt = 0200;
|
||||
crs[KEYS] = crs[KEYS] | link | eq | lt;
|
||||
} else {
|
||||
if (*(int *)&uresult >= 0)
|
||||
lt = 0200;
|
||||
crs[KEYS] = crs[KEYS] | link | eq | lt;
|
||||
keybits |= 0100;
|
||||
oflow = (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) != 0); /* overflow! */
|
||||
if (oflow)
|
||||
uresult = ~uresult;
|
||||
keybits |= (uresult & 0x8000) >> 8; /* set LT if result negative */
|
||||
crs[KEYS] = crs[KEYS] & ~0120300 | keybits;
|
||||
if (oflow)
|
||||
mathexception('i', FC_INT_OFLOW, ea);
|
||||
}
|
||||
stopwatch_pop(&sw_add16);
|
||||
}
|
||||
|
||||
|
||||
static inline adlr(int dr) {
|
||||
|
||||
if (crs[KEYS] & 020000)
|
||||
@@ -3864,6 +3863,33 @@ static inline adlr(int dr) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline cgt(unsigned short n) {
|
||||
unsigned short utempa;
|
||||
|
||||
utempa = iget16(RP); /* get number of words */
|
||||
if (1 <= n && n < utempa)
|
||||
RPL = iget16(RPADD(n));
|
||||
else
|
||||
RP = RPADD(utempa);
|
||||
}
|
||||
|
||||
|
||||
static inline pimh(int dr) {
|
||||
int templ, templ2;
|
||||
|
||||
templ = crsl[dr];
|
||||
/* NOTE: PIMH could be implemented as a left shift, but Prime DIAG
|
||||
tests require a swap - hence the "or" below */
|
||||
crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16);
|
||||
/* check that bits 1-16 were equal to bit 17 before PIMH */
|
||||
templ2 = (templ << 16) >> 16;
|
||||
if (templ2 == templ)
|
||||
CLEARC;
|
||||
else
|
||||
mathexception('i', FC_INT_OFLOW, 0);
|
||||
}
|
||||
|
||||
/* NOTE: PMA manuals say the range for absolute RF addressing is
|
||||
0-'377, but this does not allow addressing a machine with 8 user
|
||||
register sets. The range should probably be an emulator config
|
||||
@@ -4731,11 +4757,7 @@ d_iab: /* 000201 */
|
||||
|
||||
d_cgt: /* 001314 */
|
||||
TRACE(T_FLOW, " CGT\n");
|
||||
utempa = iget16(RP); /* get number of words */
|
||||
if (1 <= crs[A] && crs[A] < utempa)
|
||||
RPL = iget16(RPADD(crs[A]));
|
||||
else
|
||||
RP = RPADD(utempa);
|
||||
cgt(crs[A]);
|
||||
goto fetch;
|
||||
|
||||
d_pida: /* 000115 */
|
||||
@@ -4753,13 +4775,7 @@ d_pidl: /* 000305 */
|
||||
|
||||
d_pima: /* 000015 */
|
||||
TRACE(T_FLOW, " PIMA\n");
|
||||
templ = *(int *)(crsl+GR2);
|
||||
crsl[GR2] = (crsl[GR2] << 16) | (crsl[GR2] >> 16);
|
||||
templ2 = (templ << 16) >> 16;
|
||||
if (templ != templ2)
|
||||
mathexception('i', FC_INT_OFLOW, 0);
|
||||
else
|
||||
CLEARC;
|
||||
pimh(GR2);
|
||||
goto fetch;
|
||||
|
||||
d_piml: /* 000301 */
|
||||
@@ -6753,7 +6769,6 @@ d_gen1:
|
||||
if (crs[KEYS] & 010000) { /* V/I mode */
|
||||
crsl[GR2] = lrs(crsl[GR2], scount);
|
||||
} else {
|
||||
CLEARCL;
|
||||
utempa = crs[B] & 0x8000; /* save B bit 1 */
|
||||
if (scount <= 31) {
|
||||
templ = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1);
|
||||
@@ -6765,6 +6780,7 @@ d_gen1:
|
||||
*(int *)(crs+A) = 0xFFFF7FFF | utempa;
|
||||
SETCL;
|
||||
} else {
|
||||
CLEARCL;
|
||||
*(int *)(crs+A) = utempa;
|
||||
}
|
||||
}
|
||||
@@ -6822,7 +6838,6 @@ d_gen1:
|
||||
if (crs[KEYS] & 010000) /* V/I mode */
|
||||
crsl[GR2] = lls(crsl[GR2], scount);
|
||||
else {
|
||||
CLEARCL;
|
||||
utempa = crs[B] & 0x8000; /* save B bit 1 */
|
||||
if (scount < 31) {
|
||||
utempl = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1);
|
||||
@@ -7242,11 +7257,7 @@ imode:
|
||||
|
||||
case 0026:
|
||||
TRACE(T_FLOW, " CGT\n");
|
||||
utempa = iget16(RP); /* get number of words */
|
||||
if (1 <= crs[dr*2] && crs[dr*2] < utempa)
|
||||
RPL = iget16(INCVA(RP,crs[dr*2]));
|
||||
else
|
||||
RPL += utempa;
|
||||
cgt(crs[dr*2]);
|
||||
break;
|
||||
|
||||
case 0040:
|
||||
@@ -7691,16 +7702,7 @@ imode:
|
||||
|
||||
case 0051:
|
||||
TRACE(T_FLOW, " PIMH\n");
|
||||
templ = crsl[dr];
|
||||
/* NOTE: PIMH could be implemented as a left shift, but Prime DIAG
|
||||
tests require a swap - hence the "or" below */
|
||||
crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16);
|
||||
/* check that bits 1-16 were equal to bit 17 before PIMH */
|
||||
templ2 = (templ << 16) >> 16;
|
||||
if (templ2 != templ)
|
||||
mathexception('i', FC_INT_OFLOW, 0);
|
||||
else
|
||||
CLEARC;
|
||||
pimh(dr);
|
||||
break;
|
||||
|
||||
case 0133:
|
||||
@@ -8522,7 +8524,7 @@ imode:
|
||||
case 1:
|
||||
imodepcl:
|
||||
stopwatch_push(&sw_pcl);
|
||||
TRACE(T_FLOW|T_PCL, "#%d %o/%o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e'));
|
||||
TRACE(T_FLOW|T_PCL, "#%d %o/%:0o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e'));
|
||||
if (gvp->numtraceprocs > 0 && TRACEUSER)
|
||||
for (i=0; i<gvp->numtraceprocs; i++)
|
||||
if (traceprocs[i].ecb == (ea & 0xFFFFFFF) && traceprocs[i].sb == -1) {
|
||||
|
||||
Reference in New Issue
Block a user