diff --git a/em.c b/em.c index 69fdc6f..6a2afd0 100644 --- a/em.c +++ b/em.c @@ -199,13 +199,11 @@ static void macheck (unsigned short p300vec, unsigned short chkvec, unsigned int #define XCLEARC CLEARC #define XSETC SETC -/* EXPCL sets both the C and L bits for shift instructions - - NOTE: unlike EXPC, this doesn't clear anything - bits must be cleared - before executing these macros! */ +/* EXPCL sets both the C and L bits for shift instructions */ #define EXPCL(onoff) \ - if ((onoff)) crs[KEYS] |= 0120000 + if ((onoff)) crs[KEYS] |= 0120000; \ + else crs[KEYS] &= ~0120000 #define SETCL crs[KEYS] |= 0120000 #define CLEARCL crs[KEYS] &= ~0120000 @@ -549,11 +547,8 @@ static unsigned short physmem[MEMSIZE]; /* system's physical memory */ increment the whole thing. DIAG cpu.pcl test 42 does check for segment wraparound, so -DFAST - will cause this test to fail. - - Update: when cpuid=40 (6650), cpu.pcl test 42 *expects* 32-bit - increment on RP (segment gets incremented too)! - */ + will cause this test to fail (but, see hack in pcl which fixes it). +*/ #ifdef FAST #define RPADD(n) (RP+n) @@ -617,7 +612,7 @@ static struct { /* invalidates all entries in the mapva supercache */ -void invalidate_brp() { +void inline invalidate_brp() { int i; for (i=0; i < BRP_SIZE; i++) @@ -2335,23 +2330,24 @@ static ea_t stex(unsigned int extsize) { } /* for PRTN, load values into temps first so that if any faults occur, - PRTN can be restarted + PRTN can be restarted. After all the temps are loaded, the stack + free pointer can be updated since no further faults can occur. - XXX: the order of this look wrong - stack free pointer shouldn't - be updated if a fault occurs fetching base registers - */ + If changing rings, make sure to invalidate the brp supercache. */ -static void prtn() { +static inline void prtn() { unsigned short stackrootseg; ea_t newrp,newsb,newlb; unsigned short keys; stackrootseg = get16(*(unsigned int *)(crs+SB)+1); - put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0)); newrp = get32(*(unsigned int *)(crs+SB)+2); newsb = get32(*(unsigned int *)(crs+SB)+4); newlb = get32(*(unsigned int *)(crs+SB)+6); keys = get16(*(unsigned int *)(crs+SB)+8); + put32(*(unsigned int *)(crs+SB), MAKEVA(stackrootseg,0)); + if ((newrp ^ RP) & RINGMASK32) + invalidate_brp(); RP = newrp | (RP & RINGMASK32); *(unsigned int *)(crs+SB) = newsb; *(unsigned int *)(crs+LB) = newlb; @@ -2534,11 +2530,16 @@ static argt() { advance Y to the next arg displacement in the stack. Y has to be advanced last because the PB store may fault. If it does, the ARGT starts over, and this argument will - have to be transferred again. */ + have to be transferred again. + + The full 32-bit rp is incremented, which is technically + wrong but faster, but because PCL DIAG 42 specifically + checks for segment wraparound, only update the 16-bit + word offset in the stack frame header. */ if (advancepb) { rp += 2; - put32(rp, stackfp+2); + put16(rp & 0xffff, stackfp+3); crs[XL] = lastarg; } if (advancey) { @@ -2582,6 +2583,16 @@ static pcl (ea_t ecbea) { } #endif + /* this hack makes DIAG cpu.pcl happy: RP is only supposed to + have the 16-bit word offset increment, but we do 32-bits + for speed. If RPL == 0 during PCL, it means it used to be + segno/177776 and was incremented to segno+1/0. So fiddle + RP here to pass diags. In practice, RP wraparound is + just stupid and slow. */ + + if (RPL == 0) /* did RP wrap? */ + RP -= (1<<16); /* yes, subtract 1 from seg # */ + /* get segment access; mapva ensures either read or gate */ pa = mapva(ecbea, RP, PACC, &access); @@ -3617,7 +3628,6 @@ static inline arfa(int n, int val) { static inline unsigned int lrs(unsigned int val, short scount) { - CLEARCL; if (scount <= 32) { EXPCL(val & (((unsigned int)0x80000000) >> (32-scount))); return (*(int *)&val) >> scount; @@ -3625,13 +3635,13 @@ static inline unsigned int lrs(unsigned int val, short scount) { SETCL; return 0xFFFFFFFF; } else + CLEARCL; return 0; } static inline unsigned int lls(unsigned int val, short scount) { int templ; - CLEARCL; if (scount < 32) { templ = 0x80000000; templ = templ >> scount; /* create mask */ @@ -3647,21 +3657,21 @@ static inline unsigned int lls(unsigned int val, short scount) { static inline unsigned int lll(unsigned int val, short scount) { - CLEARCL; if (scount <= 32) { EXPCL(val & (((unsigned int)0x80000000) >> (scount-1))); return val << scount; } else + CLEARCL; return 0; } static inline unsigned int lrl(unsigned int val, short scount) { - CLEARCL; if (scount <= 32) { EXPCL(val & (((unsigned int)0x80000000) >> (32-scount))); return val >> scount; } else + CLEARCL; return 0; } @@ -3669,22 +3679,22 @@ static inline unsigned int lrl(unsigned int val, short scount) { static inline unsigned short arl (unsigned short val, short scount) { - CLEARCL; if (scount <= 16) { EXPCL(val & (((unsigned short)0x8000) >> (16-scount))); return val >> scount; } else { + CLEARCL; return 0; } } static inline unsigned short all (unsigned short val, short scount) { - CLEARCL; if (scount <= 16) { EXPCL(val & (((unsigned short)0x8000) >> (scount-1))); return val << scount; } else { + CLEARCL; return 0; } } @@ -3693,7 +3703,6 @@ static inline unsigned short als (unsigned short val, short scount) { short tempa; - CLEARCL; if (scount <= 15) { tempa = 0100000; tempa = tempa >> scount; /* create mask */ @@ -3702,14 +3711,12 @@ static inline unsigned short als (unsigned short val, short scount) { EXPCL(!(tempa == -1 || tempa == 0)); return val << scount; } - if (val != 0) - SETCL; + EXPCL(val != 0); return 0; } static inline unsigned short ars (unsigned short val, short scount) { - CLEARCL; if (scount <= 16) { EXPCL(val & (((unsigned short)0x8000) >> (16-scount))); return (*(short *)&val) >> scount; @@ -3717,6 +3724,7 @@ static inline unsigned short ars (unsigned short val, short scount) { SETCL; return 0xFFFF; } else + CLEARCL; return 0; } @@ -3724,7 +3732,6 @@ static inline unsigned short ars (unsigned short val, short scount) { static inline unsigned int lrr(unsigned int val, short scount) { - CLEARCL; scount = ((scount-1)%32)+1; /* make scount 1-32 */ EXPCL(val & (((unsigned int)0x80000000) >> (32-scount))); return (val >> scount) | (val << (32-scount)); @@ -3732,7 +3739,6 @@ static inline unsigned int lrr(unsigned int val, short scount) { static inline unsigned int llr(unsigned int val, short scount) { - CLEARCL; scount = ((scount-1)%32)+1; /* make scount 1-32 */ EXPCL(val & (((unsigned int)0x80000000) >> (scount-1))); return (val << scount) | (val >> (32-scount)); @@ -3742,7 +3748,6 @@ static inline unsigned int llr(unsigned int val, short scount) { static inline unsigned int alr(unsigned short val, short scount) { - CLEARCL; scount = ((scount-1)%16)+1; /* make scount 1-16 */ EXPCL(val & (((unsigned short)0x8000) >> (scount-1))); return (val << scount) | (val >> (16-scount)); @@ -3750,7 +3755,6 @@ static inline unsigned int alr(unsigned short val, short scount) { static inline unsigned int arr(unsigned short val, short scount) { - CLEARCL; scount = ((scount-1)%16)+1; /* make scount 1-16 */ EXPCL(val & (((unsigned short)0x8000) >> (16-scount))); return (val >> scount) | (val << (16-scount)); @@ -3824,36 +3828,31 @@ static int add32(unsigned int *a1, unsigned int a2, unsigned int a3, ea_t ea) { static int add16(unsigned short *a1, unsigned short a2, unsigned short a3, ea_t ea) { - unsigned short uorig, uresult; - unsigned int utemp; - short link, eq, lt; + unsigned short uorig; + unsigned int uresult; + int keybits, oflow; stopwatch_push(&sw_add16); - crs[KEYS] &= ~0120300; - link = eq = lt = 0; uorig = *a1; /* save original for sign check */ - utemp = uorig; /* expand to higher precision */ - utemp += a2; /* double-precision add */ - utemp += a3; /* again, for subtract */ - uresult = utemp; /* truncate result to result size */ + uresult = uorig; /* expand to higher precision */ + uresult += a2; /* double-precision add */ + uresult += a3; /* again, for subtract */ + keybits = (uresult & 0x10000) >> 3; /* set L-bit if carry occurred */ + uresult &= 0xFFFF; /* truncate result */ *a1 = uresult; /* store result */ - if (utemp & 0x10000) /* set L-bit if carry occurred */ - link = 020000; if (uresult == 0) /* set EQ? */ - eq = 0100; - if (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) == 0) { /* no overflow */ - if (*(int *)&uresult < 0) - lt = 0200; - crs[KEYS] = crs[KEYS] | link | eq | lt; - } else { - if (*(int *)&uresult >= 0) - lt = 0200; - crs[KEYS] = crs[KEYS] | link | eq | lt; + keybits |= 0100; + oflow = (((~uorig ^ a2) & (uorig ^ uresult) & 0x8000) != 0); /* overflow! */ + if (oflow) + uresult = ~uresult; + keybits |= (uresult & 0x8000) >> 8; /* set LT if result negative */ + crs[KEYS] = crs[KEYS] & ~0120300 | keybits; + if (oflow) mathexception('i', FC_INT_OFLOW, ea); - } stopwatch_pop(&sw_add16); } + static inline adlr(int dr) { if (crs[KEYS] & 020000) @@ -3864,6 +3863,33 @@ static inline adlr(int dr) { } } + +static inline cgt(unsigned short n) { + unsigned short utempa; + + utempa = iget16(RP); /* get number of words */ + if (1 <= n && n < utempa) + RPL = iget16(RPADD(n)); + else + RP = RPADD(utempa); +} + + +static inline pimh(int dr) { + int templ, templ2; + + templ = crsl[dr]; + /* NOTE: PIMH could be implemented as a left shift, but Prime DIAG + tests require a swap - hence the "or" below */ + crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16); + /* check that bits 1-16 were equal to bit 17 before PIMH */ + templ2 = (templ << 16) >> 16; + if (templ2 == templ) + CLEARC; + else + mathexception('i', FC_INT_OFLOW, 0); +} + /* NOTE: PMA manuals say the range for absolute RF addressing is 0-'377, but this does not allow addressing a machine with 8 user register sets. The range should probably be an emulator config @@ -4731,11 +4757,7 @@ d_iab: /* 000201 */ d_cgt: /* 001314 */ TRACE(T_FLOW, " CGT\n"); - utempa = iget16(RP); /* get number of words */ - if (1 <= crs[A] && crs[A] < utempa) - RPL = iget16(RPADD(crs[A])); - else - RP = RPADD(utempa); + cgt(crs[A]); goto fetch; d_pida: /* 000115 */ @@ -4753,13 +4775,7 @@ d_pidl: /* 000305 */ d_pima: /* 000015 */ TRACE(T_FLOW, " PIMA\n"); - templ = *(int *)(crsl+GR2); - crsl[GR2] = (crsl[GR2] << 16) | (crsl[GR2] >> 16); - templ2 = (templ << 16) >> 16; - if (templ != templ2) - mathexception('i', FC_INT_OFLOW, 0); - else - CLEARC; + pimh(GR2); goto fetch; d_piml: /* 000301 */ @@ -6753,7 +6769,6 @@ d_gen1: if (crs[KEYS] & 010000) { /* V/I mode */ crsl[GR2] = lrs(crsl[GR2], scount); } else { - CLEARCL; utempa = crs[B] & 0x8000; /* save B bit 1 */ if (scount <= 31) { templ = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1); @@ -6765,6 +6780,7 @@ d_gen1: *(int *)(crs+A) = 0xFFFF7FFF | utempa; SETCL; } else { + CLEARCL; *(int *)(crs+A) = utempa; } } @@ -6822,7 +6838,6 @@ d_gen1: if (crs[KEYS] & 010000) /* V/I mode */ crsl[GR2] = lls(crsl[GR2], scount); else { - CLEARCL; utempa = crs[B] & 0x8000; /* save B bit 1 */ if (scount < 31) { utempl = (crs[A]<<16) | ((crs[B] & 0x7FFF)<<1); @@ -7242,11 +7257,7 @@ imode: case 0026: TRACE(T_FLOW, " CGT\n"); - utempa = iget16(RP); /* get number of words */ - if (1 <= crs[dr*2] && crs[dr*2] < utempa) - RPL = iget16(INCVA(RP,crs[dr*2])); - else - RPL += utempa; + cgt(crs[dr*2]); break; case 0040: @@ -7691,16 +7702,7 @@ imode: case 0051: TRACE(T_FLOW, " PIMH\n"); - templ = crsl[dr]; - /* NOTE: PIMH could be implemented as a left shift, but Prime DIAG - tests require a swap - hence the "or" below */ - crsl[dr] = (crsl[dr] << 16) | (crsl[dr] >> 16); - /* check that bits 1-16 were equal to bit 17 before PIMH */ - templ2 = (templ << 16) >> 16; - if (templ2 != templ) - mathexception('i', FC_INT_OFLOW, 0); - else - CLEARC; + pimh(dr); break; case 0133: @@ -8522,7 +8524,7 @@ imode: case 1: imodepcl: stopwatch_push(&sw_pcl); - TRACE(T_FLOW|T_PCL, "#%d %o/%o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e')); + TRACE(T_FLOW|T_PCL, "#%d %o/%:0o: PCL %o/%o %s\n", gvp->instcount, RPH, RPL-2, ea>>16, ea&0xFFFF, searchloadmap(ea, 'e')); if (gvp->numtraceprocs > 0 && TRACEUSER) for (i=0; inumtraceprocs; i++) if (traceprocs[i].ecb == (ea & 0xFFFFFFF) && traceprocs[i].sb == -1) {