Files
Arquivotheca.SunOS-4.1.4/sys/sun4/memerr.c
seta75D ff309bfe1c Init
2021-10-11 18:37:13 -03:00

1035 lines
24 KiB
C

#ifndef lint
static char sccsid[] = "@(#)memerr.c 1.1 94/10/31 SMI";
#endif
/*
* Copyright (c) 1989 by Sun Microsystems, Inc.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/user.h>
#include <machine/cpu.h>
#include <machine/pte.h>
#include <machine/mmu.h>
#include <machine/memerr.h>
#include <machine/eccreg.h>
#include <machine/enable.h>
#include <machine/intreg.h>
#include <vm/hat.h>
#include <vm/page.h>
#include <vm/seg.h>
parity_hard_error = 0;
/*
* Memory error handling for various sun4s
*/
memerr_init()
{
switch (cpu) {
case CPU_SUN4_110:
memerr_init_parity();
break;
case CPU_SUN4_330:
memerr_init330();
break;
case CPU_SUN4_260:
memerr_init260();
break;
case CPU_SUN4_470:
memerr_init470();
break;
}
}
/*
* Handle parity/ECC memory errors.
* XXX - Fix ECC handling
*/
memerr()
{
u_int err, vaddr;
err = MEMERR->me_err; /* read the error bits */
switch (cpu) {
case CPU_SUN4_110:
parity_error(err);
break;
case CPU_SUN4_330:
vaddr = MEMERR->me_vaddr;
memlog_330((int) vaddr, err);
clear_parity_err((addr_t) vaddr);
panic("Aynchronous parity error - DVMA operation");
/* NOTREACHED */
case CPU_SUN4_260:
case CPU_SUN4_470:
ecc_error(err);
break;
default:
printf("memory error handler: unknown CPU\n");
break;
}
}
#if defined(SUN4_110)
parity_error(per)
u_int per;
{
char *mess = 0;
struct ctx *ctx;
struct pte pme;
extern struct ctx *ctxs;
per = MEMERR->me_err;
/*
* Since we are going down in
* flames, disable further
* memory error interrupts to
* prevent confusion.
*/
MEMERR->me_err &= ~ER_INTENA;
if ((per & PER_ERR) != 0) {
printf("Parity Error Register %b\n", per, PARERR_BITS);
mess = "parity error";
}
if (!mess) {
printf("Memory Error Register %b\n", per, ECCERR_BITS);
mess = "unknown memory error";
}
printf("DVMA = %x, context = %x, virtual address = %x\n",
(per & PER_DVMA) >> PER_DVMA_SHIFT,
(per & PER_CTX) >> PER_CTX_SHIFT,
MEMERR->me_vaddr);
ctx = mmu_getctx();
mmu_setctx(&ctxs[(per & PER_CTX) >> PER_CTX_SHIFT]);
mmu_getpte((caddr_t) MEMERR->me_vaddr, &pme);
printf("pme = %x, physical address = %x\n", *(int *) &pme,
ptob(((struct pte *) & pme)->pg_pfnum)
+ (MEMERR->me_vaddr & PGOFSET));
/*
* print the U-number of the
* failure
*/
if (cpu == CPU_SUN4_110)
pr_u_num110((caddr_t) MEMERR->me_vaddr, per);
mmu_setctx(ctx);
/*
* Clear the latching by
* writing to the top nibble of
* the memory address register
*/
MEMERR->me_vaddr = 0;
panic(mess);
/* NOTREACHED */
}
memerr_init_parity()
{
MEMERR->me_per = PER_INTENA | PER_CHECK;
}
#else
memerr_init_parity()
{
}
parity_error(per)
u_int per;
{
}
#endif /* SUN4_100 */
#ifdef SUN4_110
/*
* print the U-number(s) of the failing memory location(s).
*/
pr_u_num110(virt_addr, per)
caddr_t virt_addr;
u_int per;
{
int bank;
int bits;
int u_num_offs = 0;
static char *u_num[] = {
"U1503", "U1502", "U1501", "U1500",
"U1507", "U1506", "U1505", "U1504",
"U1511", "U1510", "U1509", "U1508",
"U1515", "U1514", "U1513", "U1512",
"U1603", "U1602", "U1601", "U1600",
"U1607", "U1606", "U1605", "U1604",
"U1611", "U1610", "U1609", "U1608",
"U1615", "U1614", "U1613", "U1612"
};
bank = bank_num(virt_addr);
if (bank == -1) {
printf("\nNo U-number can be calculated for this ");
printf("memory configuration.\n\n");
return;
}
if ((*romp->v_memorysize == 4 * 1024 * 1024) ||
(*romp->v_memorysize == 16 * 1024 * 1024)) {
u_num_offs = 16;
}
if ((bits = ((per & PER_ERR) >> 1)) > 2)
bits = 3;
printf(" simm = %s\n", u_num[4 * bank + bits + u_num_offs]);
}
bank_num(virt_addr)
caddr_t virt_addr;
{
u_long phys_addr;
int bank = -1;
int temp;
int memsize = *romp->v_memorysize;
u_int map_getpgmap();
memsize = memsize >> 20;
phys_addr = map_getpgmap((addr_t) virt_addr);
switch (memsize) {
case 16:
bank = (phys_addr & 0x3) ^ 0x3;
break;
case 8:
case 32:
bank = (phys_addr & 0x7) ^ 0x7;
break;
case 20:
temp = (phys_addr & 0x3); /* need bits 14:13 and 24 */
bank = (((phys_addr >> 9) & 0x4) | temp) ^ 0x7;
break;
default:
/*
* no U-number can be
* calculated for this
* memory size.
*/
bank = -1;
break;
}
return (bank);
}
#else
/*ARGSUSED*/
pr_u_num110(virt_addr, per)
caddr_t virt_addr;
u_int per;
{
}
#endif /* SUN4_110 */
#if defined(SUN4_260) || defined(SUN4_470)
/*
* Since there is no implied ordering of the memory cards, we store
* a zero terminated list of pointers to eccreg's that are active so
* that we only look at existent memory cards during softecc() handling.
*/
union eccreg *ecc_alive[MAX_ECC + 1];
int prtsoftecc = 1;
extern int noprintf;
int memintvl = MEMINTVL;
ecc_error(err)
u_int err;
{
char *mess = 0;
struct ctx *ctx;
struct pte pme;
extern struct ctx *ctxs;
if ((err & EER_ERR) == EER_CE) {
MEMERR->me_err = ~EER_CE_ENA & 0xff;
switch (cpu) {
case CPU_SUN4_260:
softecc_260();
break;
case CPU_SUN4_470:
softecc_470();
break;
default:
printf("ecc handler: unknown CPU\n");
}
return;
}
/*
* Since we are going down in
* flames, disable further
* memory error interrupts to
* prevent confusion.
*/
MEMERR->me_err &= ~ER_INTENA;
if ((err & EER_ERR) != 0) {
printf("Memory Error Register %b\n", err, ECCERR_BITS);
if (err & EER_TIMEOUT)
mess = "memory timeout error";
if (err & EER_UE)
mess = "uncorrectable ECC error";
if (err & EER_WBACKERR)
mess = "writeback error";
}
if (!mess) {
printf("Memory Error Register %b\n", err, ECCERR_BITS);
mess = "unknown memory error";
}
printf("DVMA = %x, context = %x, virtual address = %x\n",
(MEMERR->me_err & EER_DVMA) >> EER_DVMA_SHIFT,
(MEMERR->me_err & EER_CTX) >> EER_CTX_SHIFT,
MEMERR->me_vaddr);
ctx = mmu_getctx();
mmu_setctx(&ctxs[(MEMERR->me_err & EER_CTX) >> EER_CTX_SHIFT]);
mmu_getpte((caddr_t) MEMERR->me_vaddr, &pme);
printf("pme = %x, physical address = %x\n", *(int *) &pme,
ptob(((struct pte *) & pme)->pg_pfnum)
+ (MEMERR->me_vaddr & PGOFSET));
mmu_setctx(ctx);
/*
* Clear the latching by
* writing to the top nibble of
* the memory address register
*/
MEMERR->me_vaddr = 0;
/*
* turn on interrupts, else sync will not go through
*/
set_intreg(IR_ENA_INT, 1);
panic(mess);
/* NOTREACHED */
}
#else
/*ARGSUSED*/
ecc_error(err)
u_int err;
{
}
#endif /* SUN4_260 || SUN4_470 */
#if defined(SUN4_260)
struct {
u_char m_syndrome;
char m_bit[3];
} memlogtab[] = {
0x01, "64", 0x02, "65", 0x04, "66", 0x08, "67", 0x0B, "30", 0x0E, "31",
0x10, "68", 0x13, "29", 0x15, "28", 0x16, "27", 0x19, "26", 0x1A, "25",
0x1C, "24", 0x20, "69", 0x23, "07", 0x25, "06", 0x26, "05", 0x29, "04",
0x2A, "03", 0x2C, "02", 0x31, "01", 0x34, "00", 0x40, "70", 0x4A, "46",
0x4F, "47", 0x52, "45", 0x54, "44", 0x57, "43", 0x58, "42", 0x5B, "41",
0x5D, "40", 0x62, "55", 0x64, "54", 0x67, "53", 0x68, "52", 0x6B, "51",
0x6D, "50", 0x70, "49", 0x75, "48", 0x80, "71", 0x8A, "62", 0x8F, "63",
0x92, "61", 0x94, "60", 0x97, "59", 0x98, "58", 0x9B, "57", 0x9D, "56",
0xA2, "39", 0xA4, "38", 0xA7, "37", 0xA8, "36", 0xAB, "35", 0xAD, "34",
0xB0, "33", 0xB5, "32", 0xCB, "14", 0xCE, "15", 0xD3, "13", 0xD5, "12",
0xD6, "11", 0xD9, "10", 0xDA, "09", 0xDC, "08", 0xE3, "23", 0xE5, "22",
0xE6, "21", 0xE9, "20", 0xEA, "19", 0xEC, "18", 0xF1, "17", 0xF4, "16",
};
memerr_init260()
{
register union eccreg **ecc_nxt;
register union eccreg *ecc;
/*
* Map in ECC error registers.
*/
map_setpgmap((addr_t) ECCREG,
PG_V | PG_KW | PGT_OBIO | btop(OBIO_ECCREG0_ADDR));
/*
* Go probe for all memory
* cards and perform
* initialization. The address
* of the cards found is
* stashed in ecc_alive[]. We
* assume that the cards are
* already enabled and the base
* addresses have been set
* correctly by the monitor.
* Memory error interrupts will
* not be enabled until we take
* over the console (consconfig
* -> stop_mon_clock).
*/
ecc_nxt = ecc_alive;
for (ecc = ECCREG; ecc < &ECCREG[MAX_ECC]; ecc++) {
if (peekc((char *) ecc) == -1)
continue;
MEMERR->me_err = 0; /* clear intr from mem register */
ecc->ecc_s.syn |= SY_CE_MASK; /* clear syndrom fields */
ecc->ecc_s.ena |= ENA_SCRUB_MASK;
ecc->ecc_s.ena |= ENA_BUSENA_MASK;
*ecc_nxt++ = ecc;
}
*ecc_nxt = (union eccreg *) 0; /* terminate list */
}
/*
* Routine to turn on correctable error reporting.
*/
ce_enable(ecc)
register union eccreg *ecc;
{
ecc->ecc_s.ena |= ENA_BUSENA_MASK;
}
/*
* Probe memory cards to find which one(s) had ecc error(s).
* If prtsoftecc is non-zero, log messages regarding the failing
* syndrome. Then clear the latching on the memory card.
*/
softecc_260()
{
register union eccreg **ecc_nxt, *ecc;
for (ecc_nxt = ecc_alive; *ecc_nxt != (union eccreg *) 0; ecc_nxt++) {
ecc = *ecc_nxt;
if (ecc->ecc_s.syn & SY_CE_MASK) {
if (prtsoftecc) {
noprintf = 1; /* (not on the console) */
memlog_260(ecc); /* log the error */
noprintf = 0;
}
ecc->ecc_s.syn |= SY_CE_MASK; /* clear latching */
/*
* disable
* board
*/
ecc->ecc_s.ena &= ~ENA_BUSENA_MASK;
timeout(ce_enable, (caddr_t) ecc, memintvl * hz);
}
}
}
memlog_260(ecc)
register union eccreg *ecc;
{
register int i;
register u_char syn;
register u_int err_addr;
int unum;
syn = (ecc->ecc_s.syn & SY_SYND_MASK) >> SY_SYND_SHIFT;
/*
* Compute board offset of
* error by subtracting the
* board base address from the
* physical error address and
* then masking off the extra
* bits.
*/
err_addr = ((ecc->ecc_s.syn & SY_ADDR_MASK) << SY_ADDR_SHIFT);
if (ecc->ecc_s.ena & ENA_TYPE_MASK)
err_addr -=
(ecc->ecc_s.ena & ENA_ADDR_MASKL) << ENA_ADDR_SHIFTL;
else
err_addr -= (ecc->ecc_s.ena & ENA_ADDR_MASK) << ENA_ADDR_SHIFT;
if ((ecc->ecc_s.ena & ENA_BDSIZE_MASK) == ENA_BDSIZE_8MB)
err_addr &= MEG8; /* mask to full address */
if ((ecc->ecc_s.ena & ENA_BDSIZE_MASK) == ENA_BDSIZE_16MB)
err_addr &= MEG16; /* mask to full address */
if ((ecc->ecc_s.ena & ENA_BDSIZE_MASK) == ENA_BDSIZE_32MB)
err_addr &= MEG32; /* mask to full address */
printf("mem%d: soft ecc addr %x syn %b ",
ecc - ECCREG, err_addr, syn, SYNDERR_BITS);
for (i = 0; i < (sizeof memlogtab / sizeof memlogtab[0]); i++)
if (memlogtab[i].m_syndrome == syn)
break;
if (i < (sizeof memlogtab / sizeof memlogtab[0])) {
printf("%s ", memlogtab[i].m_bit);
/*
* Compute U number on
* board, first figure
* out which half is
* it.
*/
if (atoi(memlogtab[i].m_bit) >= LOWER) {
if ((ecc->ecc_s.ena & ENA_TYPE_MASK) == TYPE0) {
switch (err_addr & ECC_BITS) {
case U14XX:
unum = 14;
break;
case U16XX:
unum = 16;
break;
case U18XX:
unum = 18;
break;
case U20XX:
unum = 20;
break;
}
} else {
switch (err_addr & PEG_ECC_BITS) {
case U14XX:
unum = 14;
break;
case U16XX:
unum = 16;
break;
case PEG_U18XX:
unum = 18;
break;
case PEG_U20XX:
unum = 20;
break;
}
}
} else {
if ((ecc->ecc_s.ena & ENA_TYPE_MASK) == TYPE0) {
switch (err_addr & ECC_BITS) {
case U15XX:
unum = 15;
break;
case U17XX:
unum = 17;
break;
case U19XX:
unum = 19;
break;
case U21XX:
unum = 21;
break;
}
} else {
switch (err_addr & PEG_ECC_BITS) {
case U15XX:
unum = 15;
break;
case U17XX:
unum = 17;
break;
case PEG_U19XX:
unum = 19;
break;
case PEG_U21XX:
unum = 21;
break;
}
}
}
printf("U%d%s\n", unum, memlogtab[i].m_bit);
} else
printf("No bit information\n");
}
atoi(num_str)
register char *num_str;
{
register int num;
num = (*num_str++ & 0xf) * 10;
num = num + (*num_str & 0xf);
return (num);
}
#else
/*ARGSUSED*/
softecc_260(err)
u_int err;
{
}
memerr_init260()
{
}
#endif /* SUN4_260 */
#ifdef SUN4_470
/*
* Moonshine memory boards have 4 banks of 72 bits per line,
* what follows is the mapping of the syndrome bits to U numbers
* for bank 0. The 64 data bits d[0:63] are followed by the 8
* syndrome/check bits s[0:7] or SX, S0, S1, S2, S4, S8, S16, S32.
*
* The U numbers for other banks can be calculated by multiplying the bank
* number by 200 and adding to the bank zero U numbers given below:
*/
/* bit number to u number mapping */
unsigned char u470[72] = {
4, 5, 6, 7, 8, 9, 10, 11, /* bits 0-7 */
12, 13, 14, 15, 16, 17, 18, 19, /* bits 8-15 */
104, 105, 106, 107, 108, 109, 110, 111, /* bits 16-23 */
112, 113, 114, 115, 116, 117, 118, 119, /* bits 24-31 */
20, 21, 22, 23, 24, 25, 26, 27, /* bits 32-39 */
28, 29, 30, 31, 32, 33, 34, 35, /* bits 40-47 */
120, 121, 122, 123, 124, 125, 126, 127, /* bits 48-55 */
128, 129, 130, 131, 132, 133, 134, 135, /* bits 56-63 */
0, 1, 2, 3, 100, 101, 102, 103 /* check bits */
};
/* syndrome to bit number mapping */
unsigned char syntab[] = {
0xce, 0xcb, 0xd3, 0xd5, 0xd6, 0xd9, 0xda, 0xdc, /* bits 0-7 */
0x23, 0x25, 0x26, 0x29, 0x2a, 0x2c, 0x31, 0x34, /* bits 8-15 */
0x0e, 0x0b, 0x13, 0x15, 0x16, 0x19, 0x1a, 0x1c, /* bits 16-23 */
0xe3, 0xe5, 0xe6, 0x69, 0xea, 0xec, 0xf1, 0xf4, /* bits 24-31 */
0x4f, 0x4a, 0x52, 0x54, 0x57, 0x58, 0x5b, 0x5d, /* bits 32-39 */
0xa2, 0xa4, 0xa7, 0xa8, 0xab, 0x45, 0xb0, 0xb5, /* bits 40-47 */
0x8f, 0x8a, 0x92, 0x94, 0x97, 0x98, 0x9b, 0x9d, /* bits 48-55 */
0x62, 0x64, 0x67, 0x68, 0x6b, 0x6d, 0x70, 0x75, /* bits 56-63 */
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 /* check bits */
};
memerr_init470()
{
register union eccreg **ecc_nxt;
register union eccreg *ecc;
/*
* Map in ECC error registers.
*/
map_setpgmap((addr_t) ECCREG,
PG_V | PG_KW | PGT_OBIO | btop(OBIO_ECCREG1_ADDR));
ecc_nxt = ecc_alive;
for (ecc = ECCREG; ecc < &ECCREG[MAX_ECC]; ecc++) {
MEMERR->me_err = 0;
if (peekc((char *)ecc) == -1)
continue;
ecc->ecc_m.err = 0; /* clear error latching */
ecc->ecc_m.err = 1; /* clear CE led */
ecc->ecc_m.err = 2; /* clear UE led */
ecc->ecc_m.syn = 0; /* clear syndrome fields */
/*
* this is correct ordering for first time, the
* prom has already done this, paranoia
*/
ecc->ecc_m.ena |= MMB_ENA_ECC|MMB_ENA_BOARD|MMB_ENA_SCRUB;
*ecc_nxt++ = ecc;
}
*ecc_nxt = (union eccreg *) 0; /* terminate list */
MEMERR->me_err = 0;
MEMERR->me_err = EER_INTENA | EER_CE_ENA;
}
/*
* turn on reporting of correctable ecc errors
*/
enable_ce_470(ecc)
register union eccreg *ecc;
{
if (prtsoftecc) {
noprintf = 1;
printf("resetting ecc handling\n");
noprintf = 0;
}
ecc->ecc_m.err = 1; /* reset latching, turn off CE led */
MEMERR->me_err = EER_INTENA|EER_CE_ENA; /* reenable ecc reporting */
}
softecc_470()
{
register union eccreg **ecc_nxt, *ecc;
for (ecc_nxt = ecc_alive; *ecc_nxt != (union eccreg *) 0; ecc_nxt++) {
ecc = *ecc_nxt;
if (ecc->ecc_m.err & MMB_CE_ERROR) {
if (prtsoftecc) {
noprintf = 1; /* not on the console */
memlog_470(ecc); /* log the error */
noprintf = 0;
}
/*
* reset ecc errors in memory board
* 0 - enables latching of next ecc error
* 1 - enables latching of next ecc error and
* clears CE led
* 2 - enables latching of next ecc error and
* clears UE led
*/
ecc->ecc_m.err = 0;
/* disable ecc error reporting temporarily */
MEMERR->me_err &= ~(EER_CE_ENA);
/* turn ecc error reporting at a later time */
timeout(enable_ce_470, (caddr_t)ecc, memintvl*hz);
}
}
}
static
memlog_470(ecc)
register union eccreg *ecc;
{
register int bit;
register u_char syn;
register u_int err_addr;
register int bank;
register int mask;
bank = ffs((long)ecc->ecc_m.err & MMB_BANK) - 1;
syn = (ecc->ecc_m.syn & (0xff << (8*bank))) >> (8*bank);
switch ((ecc->ecc_m.ena & MMB_BDSIZ) >> MMB_BDSIZ_SHIFT) {
case 1:
mask = MMB_PA_128;
break;
default:
case 0:
mask = MMB_PA_32;
break;
}
err_addr = (ecc->ecc_m.err & mask) >> MMB_PA_SHIFT;
printf("mem%d: soft ecc addr %x syn %b ",
ecc - ECCREG, err_addr, syn, SYNDERR_BITS);
for (bit = 0; bit < sizeof syntab; bit++)
if (syntab[bit] == syn)
break;
if (bit < sizeof syntab) {
printf("bit %d ", bit);
printf("U%d\n", 2000 + u470[bit] + bank*200);
} else
printf("No bit information\n");
}
#else
memerr_init470()
{
}
softecc_470()
{
}
#endif /* SUN4_470 */
memerr_init330()
{
MEMERR->me_per = PER_INTENA | PER_CHECK;
}
#ifdef SUN4_330
int kill_on_parity = 1;
user_parity_recover(addr, errreg, rw)
register addr_t addr;
register unsigned errreg;
register enum seg_rw rw;
{
struct pte pte;
memlog_330((int) addr, errreg);
mmu_getpte(addr, &pte);
if (retry(addr, pte, 1) != -1) { /* re-read succeeded */
printf(" soft error, recovered \n"); /* continue */
return (1);
}
/*
* parity error still exists,
* try recovery
*/
if (!(pte.pg_v)) { /* paranoia checks */
panic("synchrnous parity error - user invalid page ");
}
if (!(pte.pg_m) && (u.u_exdata.ux_mag == ZMAGIC)) {
if (page_giveup(addr, pte)) {
/*
* fault in the
* page right
* away
*/
if (pagefault(addr, F_INVAL, rw, 0) == 0)
printf(" RECOVERED FROM PARITY ERROR \n");
else
panic("parity error: lost page");
} else {
panic("parity error: no page");
/* NOT REACHED */
}
if (parity_hard_error == 2)
panic("Many Hard parity errors, REPLACE SIMMS");
/* NOT REACHED */
} else if (kill_on_parity) {
printf("CANNOT RECOVER: User process %d killed\n",
u.u_procp->p_pid);
psignal(u.u_procp, SIGBUS);
} else {
panic("synchrnous parity error - user mode");
/* NOT REACHED */
}
MEMERR->me_vaddr = 0; /* clear the me_vaddr and cntl again */
return (1);
}
/* zero is not really a valid index into the next two arrays */
static int xb2b[] = {
8 << 20, 96 << 20, 64 << 20, 48 << 20,
72 << 20, 40 << 20, 24 << 20, 16 << 20};
static int xb1s[] = {
22, 24, 24, 22, 24, 24, 22, 22};
static char simerrfmt[] =
"Memory error in SIMM U%d on %s card\n";
static char simerrsfmt[] =
"Memory error somewhere in SIMMs U%d through U%d on %s card\n";
static char c_cpu[] = "CPU";
static char c_9u[] = "9U Memory";
static char c_3u1[] = "First 3U Memory";
static char c_3u2[] = "Second 3U Memory";
#define SIMERR(s, u) printf(simerrfmt, u, s)
#define SIMERRS(s, u, v) printf(simerrsfmt, u, v, s)
static void
simlog_330(errreg, board, simno)
unsigned errreg;
char *board;
int simno;
{
/*
* If PA0 and PA1 are zeros,
* access goes to the third
* simm in the row, i.e. U703
* and so on.
*/
if (!(errreg & PER_ERR))
SIMERRS(board, simno, simno + 3);
if (errreg & PER_ERR00)
SIMERR(board, simno);
if (errreg & PER_ERR08)
SIMERR(board, simno + 1);
if (errreg & PER_ERR16)
SIMERR(board, simno + 2);
if (errreg & PER_ERR24)
SIMERR(board, simno + 3);
}
memlog_330(vaddr, errreg)
int vaddr;
unsigned errreg;
{
int paddr; /* 32-bit physical address of error */
int conf; /* memory configuration code */
int obmsize; /* bytes of onboard memory */
int obmshft; /* shift factor for for cpu card */
int x9ushft; /* shift factor for 9U mem card */
int shft; /* shift factor for 3U mem board */
int base; /* base address of "this" board */
char *c_3u; /* which 3U card is involved */
paddr = ((map_getpgmap((addr_t) vaddr) & 0x7FFFF) << 13) |
(vaddr & 0x1FFF);
printf("Parity Error: Physical Address 0x%x (Virtal Address 0x%x) Error Register %b\n",
paddr, vaddr, errreg,
"\020\001byte0\002byte1\003byte2\004byte3\005check\006test\007intena\010intr\011dvma");
conf = read_confreg();
obmshft = (conf & 1) ? 22 : 24; /* shift distance per bank */
obmsize = 2 << obmshft; /* always 2 sets of 4 simms */
conf = (conf & CONF) >> CONF_POS; /* leave only xb1 config */
if (paddr < obmsize) {
/*
* Error is somewhere
* on the CPU board.
*/
simlog_330(errreg, c_cpu,
1300 + 4 * (paddr >> obmshft));
} else {
/*
* If we have a 9U
* card, the error is
* somewhere on it.
*/
if ((*romp->v_memorysize - obmsize) > (48 << 20))
x9ushft = 24;
else
x9ushft = 22;
if ((*romp->v_memorysize - obmsize) == (32 << 20)) {
printf("If you have a 9U memory card populated with 1Meg SIMMs:\n");
simlog_330(errreg, c_9u,
1600 + 100 * ((paddr - obmsize) >> (x9ushft + 1)) +
4 * ((paddr >> x9ushft) & 1));
x9ushft = 24;
printf("If you have a 9U memory card populated with 4Meg SIMMs:\n");
} else
printf("If you have a 9U memory card:\n");
simlog_330(errreg, c_9u,
1600 + 100 * ((paddr - obmsize) >> (x9ushft + 1)) +
4 * ((paddr >> x9ushft) & 1));
printf("If you DO NOT have a 9U memory card:\n");
if (paddr < xb2b[conf]) {
/*
* Error is on the first 3U card;
* set xb1s[2] to the "other" simm size
*/
xb1s[2] = (22 + 24) - obmshft;
base = obmsize;
shft = xb1s[conf];
c_3u = c_3u1;
} else {
/*
* Error is on
* the second
* 3U card
*/
base = xb2b[conf];
if ((*romp->v_memorysize - xb2b[conf]) > (16 << 20))
shft = 24;
else
shft = 22;
c_3u = c_3u2;
}
/*
* First 8(32)meg is
* low half of 3U card,
* Second 8(32)meg is
* high half of 3U
* card. Selection of
* the 4(16)meg row is
* done by PA22(24).
*/
printf("If you DO NOT have a 9U memory card:\n");
simlog_330(errreg, c_3u,
700 + 100 * ((paddr - base) >> (shft + 1)) +
((paddr >> shft) & 1));
}
}
/*
* retry()
* See if we get another parity error at the same location
* if soft is zero, write to the location and then read it back.
* if soft is non zero, just read the location and check for an error
*/
int
retry(addr, pte, soft)
addr_t addr;
struct pte pte;
int soft;
{
unsigned long put = 0xaaaaaaaa;
long retval = 0, vaddr;
long taddr = 0;
/*
* there may have been more
* than one byte parity error,
* test out a long word peek.
* Adjust the address to do
* that
*/
vaddr = (long) addr;
while (vaddr & 0x3) {
vaddr--;
addr--;
}
if (vac)
vac_flushone(addr); /* always flush the line before use */
if (!soft) { /* if !soft, write to location then read */
pte.pg_prot = KW;
/*
* change prot, don't
* need to put it back.
* We have already
* given up the page.
*/
mmu_setpte(addr, pte);
*(long *) addr = put; /* stingray has a write through cache */
}
/* read and check for error */
retval = peekl((long *) addr, (long *) &taddr);
if (vac)
vac_flushone(addr);
if ((retval == -1) && !soft) {
printf(" HARD Parity error \n");
parity_hard_error++;
}
return (retval);
}
/*
* handle kernel parity error
* do a soft retry, i.e., just read again. If we get another parity
* error go and panic. Else just continue executing. If we get another
* parity error, clear the error before panic.
*/
kernel_parity_error(addr, errreg)
register addr_t addr;
register unsigned errreg;
{
struct pte pte;
memlog_330((int) addr, errreg); /* report error */
mmu_getpte(addr, &pte);
if (retry(addr, pte, 1) != -1)
return;
/*
* clear parity err else we
* might find ourselves with
* another parity error while
* dumping
*/
clear_parity_err(addr);
panic(" synchronous parity error - kernel"); /* GIVE UP */
/* NOT REACHED */
}
/*
* clear parity error
* Clear a kernel parity error if it is from kernel data space. Else during dump
* we get one more.
*/
clear_parity_err(addr)
register addr_t addr;
{
struct pte pte;
mmu_getpte(addr, &pte);
vac_flushone(addr);
if (pte.pg_prot == KW) /* if writable */
*addr = 0; /* clear the parity error */
vac_flushone(addr);
}
#endif /* SUN4_330 */