Files
seta75D 2e8a93c394 Init
2021-10-11 18:20:23 -03:00

2364 lines
56 KiB
C

/* @(#)vm_hat.c 1.1 92/07/30 SMI */
/*
* Copyright (c) 1988-90 by Sun Microsystems, Inc.
*/
/*
* VM - Hardware Address Translation management.
*
* This file implements the machine specific hardware translation
* needed by the VM system. The machine independent interface is
* described in <vm/hat.h> while the machine dependent interface
* and data structures are described in <machine/vm_hat.h>. For
* Sun computers, we actually share this same source for more than
* one architecture (Sun-2, Sun-3, and Sun-4), since the static
* segment and page map mmu's are very similar. For this reason,
* this file is located in the sun directory and we use ifdef's
* for the few cases where they differ enough to be noticed here.
* The actual loading of the hardware registers is done at the mmu
* layer which is different for each Sun architecture type. In
* reality we probably need a more general cross architecture
* sharing structure so that we can more easily share certain code
* (like this file) across SOME Sun architectures without giving
* the idea that the file MUST be shared across all Sun architectures.
*
* The hat layer manages the address translation hardware as a cache
* driven by calls from the higher levels in the VM system. Nearly
* all the details of how the hardware is managed sound be invisible
* above this layer except for miscellaneous machine specific functions
* (e.g. mapin/mapout) that work in conjunction with this code. Other
* than a small number of machine specific places, the hat data
* structures seen by the higher levels in the VM system are opaque
* and are only operated on by the hat routines. Each address space
* contains a struct hat and a page contains an opaque pointer which
* is used by the hat code to hold a list of active translations to
* that page.
*/
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/debug.h>
#include <sys/user.h> /* for u_ru.ru_minflt */
#include <sys/trace.h>
#include <sys/systm.h>
#include <machine/pte.h>
#include <machine/cpu.h>
#include <machine/mmu.h>
#ifdef IOC
#include <machine/iocache.h>
#endif
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/page.h>
#include <vm/mp.h>
#include <vm/rm.h>
#include <vm/seg_u.h>
#include <vm/seg_vn.h>
#include <vm/vpage.h>
/*
* Private vm_hat data structures
*/
#ifdef KMON_DEBUG
static kmon_t ctx_lock; /* locks ctxs[] */
#ifdef MMU_3LEVEL
static kmon_t sme_lock; /* locks both smgrps[] and sments[] */
#endif MMU_3LEVEL
static kmon_t pme_lock; /* locks both pmgrps[] and pments[] */
#endif /*KMON_DEBUG*/
static struct ctx *ctxhand;
#ifdef MMU_3LEVEL
static struct smgrp *smgrphand;
static struct smgrp *smgrpmin;
static struct smgrp *smgrpfree;
#endif MMU_3LEVEL
static struct pmgrp *pmgrphand;
static struct pmgrp *pmgrpmin;
static struct pmgrp *pmgrpfree;
static struct pmgrp *hat_getpmg(/* addr, as */);
static void hat_pteunload(/* pmg, pme, addr, flags */);
static void hat_ptesync(/* pp, pmg, pme, addr, flags */);
static void hat_pmgfree(/* pmg */);
static void hat_pmglink(/* pmg, as, addr */);
static struct pmgrp *hat_pmgalloc(/* seg, addr */);
static void hat_wrongpmg(/* pmg, addr, as */);
#ifdef MMU_3LEVEL
static struct smgrp *hat_getsmg(/* addr */);
static void hat_smgfree(/* smg */);
static void hat_smglink(/* smg, as, addr */);
static void hat_smgalloc(/* as, addr, pmg */);
#endif MMU_3LEVEL
/*
* Semi-private vm_hat data structures. Other machine specific
* routines need to access these however.
*/
u_short ctx_time; /* used by <machine/vax.s> */
#if !defined(sun4) && !defined(sun4c)
struct ctx ctxs[MNCTXS]; /* used by <machine/mmu.c> */
struct pmgrp pmgrps[MNPMGRPS]; /* used by <machine/mmu.c> */
struct pment pments[MNPMENTS]; /* used by <machine/machdep.c> */
#define hat_xfree(as) hat_xfree_other(as)
#define init_smgs()
#else
struct ctx *ctxs, *ctxsNCTXS; /* used by <machine/mmu.c> */
struct pmgrp *pmgrps; /* used by <machine/mmu.c> */
struct pment *pments, *pmentsNPMENTS; /* used by <machine/machdep.c> */
#endif !sun4 && !sun4c
#ifdef MMU_3LEVEL
struct smgrp *smgrps; /* used by <machine/mmu.c> */
struct sment *sments, *smentsNSMENTS;
#define hat_xfree(as) hat_xfree_sun4(as)
#else
#define init_smgs()
#define hat_xfree(as) hat_xfree_other(as)
#define hat_smginit()
#define hat_smglink(smg, as, addr)
#define hat_smgalloc(as, addr, pmg)
#define hat_smgfree(smg)
#define hat_smgreserve(seg, addr)
#define vac_stolen_ctxflush() vac_ctxflush()
#endif /* MMU_3LEVEL */
/*
* hat layer statistics.
*
* XXX - this is just a start at collecting some interesting
* statistics. undoubtedly there are many more statistics
* that can and should be collected.
*/
struct hatcnt {
int hc_ctxalloc; /* number of context allocations */
int hc_ctxstolen; /* number of contexts stolen from other as's */
int hc_pmghave; /* number of pmg_allocs that already have pmg */
int hc_pmgalloc; /* number of pmg allocations */
int hc_pmgstolennoctx; /* pmgs stolen from as's with no ctx */
int hc_pmgstolenctx; /* pmgs stolen from other as's with a ctx */
int hc_smghave; /* number of smg_allocs that already have smg */
int hc_smgalloc; /* number of smg allocations */
int hc_smgstolennoctx; /* smgs stolen from procs with no ctx */
int hc_smgstolenctx; /* smgs stolen from other procs with a ctx */
int hc_ctxpmgs; /* number of pmgs found on as at ctxalloc */
} hatcnt;
struct cache_stats {
int cs_ionc; /* non-cached IO translations */
int cs_ioc; /* cached IO translations */
int cs_knc; /* non-cached kernel translations */
int cs_kc; /* cached kernel translations */
int cs_unc; /* non-cached user translations */
int cs_uc; /* cached user translations */
int cs_other; /* other non-type 0 pages */
int cs_skip; /* # of skipped vac changes (IO & upages) */
int cs_kchange; /* # of kernel cached to non-cached changes */
int cs_uchange; /* # of user cached to non-cached changes */
int cs_unloadfix; /* # of unload's that made pages cachable */
int cs_unloadnofix; /* # of " that didn't made pages cachable */
} cache_stats;
extern char DVMA[]; /* addresses in kas above DVMA are for "IO" */
/*
* Machine specific public data structures.
*/
struct as kas;
struct ctx *kctx;
struct pmgrp *pmgrp_invalid;
#ifdef MMU_3LEVEL
struct smgrp *smgrp_invalid;
#endif /* MMU_3LEVEL */
/*
* XXX - should be made more efficent by using pment
* indexing instead of virtual address checking.
*/
#define VAC_MASK(a) (((u_int)(a) & MMU_PAGEMASK) & (shm_alignment - 1))
#define VAC_ALIGNED(a1, a2) ((VAC_MASK(a1) ^ VAC_MASK(a2)) == 0)
/*
* These macros allow us to walk the translation list in a sane manner.
* The list walking must be protected, and if the translation we land
* on is interrupt replaceable, all work on that translation must also
* be protected. This complexity is hidden in the macros. NOTE that these
* macros assume that variables "pme", "pmg", "pp", and "s" are declared as
* locals in the calling function. The sequence for use is as follows:
*
* PP_LIST_OPEN
* {
* do operations;
* PP_LIST_NEXT(next item)
* }
* PP_LIST_CLOSE
*
* NOTE that you CANNOT put a semicolon after PP_LIST_OPEN or it will not
* function.
*/
#define PP_LIST_OPEN s = splvm(); \
pmg = (struct pmgrp *)NULL; \
PP_LIST_NEXT((struct pment *)pp->p_mapping) \
while (pme)
#define PP_LIST_NEXT(x) if (s < 0) \
s = splvm(); \
if (pmg) \
pmg->pmg_keepcnt--; \
pme = x; \
if (pme) { \
pmg = &pmgrps[(pme-pments)>>NPMENTPERPMGRPSHIFT]; \
pmg->pmg_keepcnt++; \
if (!pme->pme_intrep) { \
(void) splx(s); \
s = -1; \
} \
}
#define PP_LIST_CLOSE (void) splx(s);
/*
*
* The next set of routines implements the machine
* independent hat interface described in <vm/hat.h>
*
*/
/*
* Initialize the hardware address translation structures.
* Called by startup.
*/
void
hat_init()
{
register struct ctx *ctx;
register struct pmgrp *pmg;
register struct pment *pme;
register int i;
i = 0;
for (ctx = ctxs; ctx < &ctxs[NCTXS]; ctx++)
ctx->c_num = i++;
ctxhand = ctxs;
#ifdef MMU_3LEVEL
if (mmu_3level) {
register struct smgrp *smg;
register struct sment *sme;
i = 0;
sme = sments;
for (smg = smgrps; smg < &smgrps[NSMGRPS]; smg++) {
smg->smg_num = i++;
smg->smg_sme = sme;
sme += NSMENTPERSMGRP;
}
smgrps[SMGRP_INVALID].smg_lock = 1; /* lock invalid smgrp */
smgrps[SMGRP_INVALID].smg_keepcnt++;
smgrp_invalid = &smgrps[SMGRP_INVALID];
smgrphand = smgrps;
}
#endif MMU_3LEVEL
i = 0;
pme = pments;
for (pmg = pmgrps; pmg < &pmgrps[NPMGRPS]; pmg++) {
pmg->pmg_num = i++;
pmg->pmg_pme = pme;
pme += NPMENTPERPMGRP;
}
pmgrps[PMGRP_INVALID].pmg_lock = 1; /* lock invalid pmgrp */
pmgrps[PMGRP_INVALID].pmg_keepcnt++;
pmgrp_invalid = &pmgrps[PMGRP_INVALID];
pmgrphand = pmgrps;
/*
* For now, we just grab a context and keep it locked, as well
* as locking all of the kernel which was loaded into memory.
*/
kctx = &ctxs[KCONTEXT];
kctx->c_lock = 1;
kctx->c_as = &kas;
kas.a_hat.hat_ctx = kctx;
}
/*
* Free all the hat resources held by an address space.
* Called from as_free when an address space is being
* destroyed and when it is to be "swapped out".
*
* XXX - should we do anything about locked translations here?
*/
void
hat_free(as)
register struct as *as;
{
register struct ctx *ctx;
kmon_enter(&ctx_lock);
again:
ctx = as->a_hat.hat_ctx;
if (ctx == NULL)
goto out;
if (ctx->c_lock) {
kcv_wait(&ctx_lock, (char *)ctx);
goto again;
}
mmu_setctx(ctx);
/*
* free the rest of the hardware mapping resources now
*/
hat_xfree(as);
as->a_hat.hat_ctx = NULL;
ctx->c_as = NULL;
kcv_broadcast(&ctx_lock, (char *)ctx);
out:
kmon_exit(&ctx_lock);
/*
* If we didn't have a context, free up the rest of the hardware
* mapping resources now
*/
if (ctx == NULL)
hat_xfree(as);
mmu_setctx(kctx); /* lost ctx, run in kernel context */
}
/*
* Set up addr to map to page pp with protection prot.
*/
void
hat_memload(seg, addr, pp, prot, lock)
struct seg *seg;
addr_t addr;
struct page *pp;
u_int prot;
int lock;
{
struct pte pte;
hat_mempte(pp, prot, &pte);
hat_pteload(seg, addr, pp, pte, lock ? PTELD_LOCK : 0);
}
/*
* Cons up a struct pte using the device's pf bits and protection
* prot to load into the hardware for address addr; treat as minflt.
*/
void
hat_devload(seg, addr, pf, prot, lock)
struct seg *seg;
addr_t addr;
int pf;
u_int prot;
int lock;
{
struct page *pp, *page_numtouserpp();
union {
struct pte u_pte;
int u_pf;
} apte;
register int s = -1;
/*
* If the request is to load a page which is really something
* for which we have a struct page, then we must be sure we
* maintain cache consistency. However, we don't want to
* maintain such consistency for processes just running through
* physical memory, so we only pass along the page struct if it's
* not in a transition state. This is why we use page_numtouserpp
* instead of page_numtopp here. This should fix some readers
* of /dev/mem, but also allow those using the window system lock
* structures to work right too.
*/
if ((pf & PGT_MASK) == PGT_OBMEM) {
s = splvm();
pp = page_numtouserpp((u_int)(pf & PG_PFNUM));
} else
pp = NULL;
apte.u_pf = pf & PG_PFNUM;
apte.u_pte.pg_v = 1;
apte.u_pte.pg_prot = hat_vtop_prot(prot);
hat_pteload(seg, addr, pp, apte.u_pte, lock ? PTELD_LOCK : 0);
u.u_ru.ru_minflt++;
if (s != -1) {
(void) splx(s);
}
}
/*
* Release one hardware address translation lock on the given address.
* For the Sun MMU, this means decrementing the counter on the pmgrp.
*/
void
hat_unlock(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct pmgrp *pmg;
register int s;
hat_setup(seg->s_as);
pmg = hat_getpmg(addr, seg->s_as);
if (pmg == pmgrp_invalid || pmg->pmg_keepcnt < 2)
panic("hat_unlock");
#ifdef VAC
if (vac && pmg->pmg_keepcnt == 2) {
register struct pment *pme = pmg->pmg_pme;
register int cnt;
struct page *pp;
/*
* Now check to see if we now can cache any non-cached pages.
* For now, we use the simple minded algorithm and just
* unload any locked of the locked translations if the
* corresponding page is currently marked as non-cachable.
* This situation doesn't happen all the much, so the
* efficency doesn't have to be all that great.
*/
for (cnt = 0; cnt < NPMENTPERPMGRP; cnt++, pme++) {
if (pme->pme_valid && (pp = pme->pme_page) != NULL &&
pp->p_nc) {
hat_pteunload(pmg, pme, (addr_t)NULL,
HAT_RMSYNC);
if (pp->p_nc) {
/*
* We lost - unloading the mmu
* translation wasn't enough to
* make the page cacheable again.
*/
cache_stats.cs_unloadnofix++;
} else {
/*
* We won - unloading the mmu
* translation made the page
* cacheable again.
*/
cache_stats.cs_unloadfix++;
}
}
}
}
#endif VAC
s = splvm();
pmg->pmg_keepcnt -= 2; /* once for hat_getpmg, once for unlock */
(void) splx(s);
}
/*
* Change the protections in the virtual address range
* given to the specified virtual protection. If
* vprot == ~PROT_WRITE, then all the write permission
* is taken away for the current translations, else if
* vprot == ~PROT_USER, then all the user permissions
* are takem away for the current translations, otherwise
* vprot gives the new virtual protections to load up.
*
* addr and len must be MMU_PAGESIZE aligned.
*/
void
hat_chgprot(seg, addr, len, vprot)
struct seg *seg;
addr_t addr;
u_int len;
u_int vprot; /* virtual page protections */
{
register addr_t a, ea;
register struct pmgrp *pmg = NULL;
register u_int pprot; /* physical page protections */
register int newprot;
struct pte pte;
if (vprot != ~PROT_WRITE && vprot != ~PROT_USER)
pprot = hat_vtop_prot(vprot);
hat_setup(seg->s_as);
for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) {
if (pmg == NULL || ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE){
if (pmg) {
register int s;
s = splvm();
pmg->pmg_keepcnt--;
(void) splx(s);
}
pmg = hat_getpmg(a, seg->s_as);
if (pmg == pmgrp_invalid) {
/*
* Bump up `a' to avoid checking all
* the pte's in the invalid pmgrp.
*/
a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) +
PMGRPSIZE - MMU_PAGESIZE;
continue;
}
}
mmu_getpte(a, &pte);
if (!pte_valid(&pte))
continue;
if (vprot == ~PROT_WRITE) {
switch (pte.pg_prot) {
case KW:
pprot = KR;
newprot = 1;
break;
case UW:
pprot = UR;
newprot = 1;
break;
default:
newprot = 0;
break;
}
} else if (vprot == ~PROT_USER) {
#ifdef sun2
/* XXX - need a better way to do this */
if (pte.pg_prot & 07) {
pprot = pte.pg_prot & ~07;
newprot = 1;
} else {
newprot = 0;
}
#else sun2
switch (pte.pg_prot) {
case UW:
pprot = KW;
newprot = 1;
break;
case UR:
pprot = KR;
newprot = 1;
break;
default:
newprot = 0;
break;
}
#endif sun2
} else if (pte.pg_prot != pprot) {
newprot = 1;
} else {
newprot = 0;
}
if (newprot) {
pte.pg_prot = pprot;
#ifdef VAC
if (vac && !pte.pg_nc && pte.pg_r)
vac_pageflush(a);
#endif VAC
mmu_setpte(a, pte);
}
}
if (pmg != NULL) {
register int s;
s = splvm();
pmg->pmg_keepcnt--;
(void) splx(s);
}
}
/*
* Associate all the mappings in the range [addr..addr+len) with
* segment seg. Since we don't cache segments in this hat implementation,
* this routine is a noop.
*/
/*ARGSUSED*/
void
hat_newseg(seg, addr, len, nseg)
struct seg *seg;
addr_t addr;
u_int len;
struct seg *nseg;
{
return;
}
/*
* Unload all the mappings in the range [addr..addr+len).
*
* addr and len must be MMU_PAGESIZE aligned.
*/
void
hat_unload(seg, addr, len)
struct seg *seg;
addr_t addr;
u_int len;
{
register addr_t a;
register struct pment *pme;
register struct pmgrp *pmg = NULL;
register int s;
addr_t ea;
if (seg->s_as->a_hat.hat_pmgrps == NULL) {
/*
* If there are no allocated pmgrps for this
* address space, we don't want to do anything.
*/
return;
}
hat_setup(seg->s_as);
for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) {
if (pmg == NULL || ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE){
if (pmg != NULL) {
s = splvm();
pmg->pmg_keepcnt--;
(void) splx(s);
}
pmg = hat_getpmg(a, seg->s_as);
if (pmg == pmgrp_invalid) {
/*
* Bump up `a' to avoid checking all
* the pme's in the invalid pmgrp.
*/
a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) +
PMGRPSIZE - MMU_PAGESIZE;
continue;
}
pme = &pmg->pmg_pme[mmu_btop(a - pmg->pmg_base)];
} else {
pme++;
}
/*
* Throw out the mapping.
*/
if (pme->pme_nosync) {
if (pmg->pmg_keepcnt < 2)
panic("hat_unload - pmg not kept");
s = splvm();
pmg->pmg_keepcnt--;
(void) splx(s);
hat_pteunload(pmg, pme, a, HAT_NOSYNC | HAT_VADDR);
} else {
hat_pteunload(pmg, pme, a, HAT_RMSYNC | HAT_VADDR);
}
}
s = splvm();
pmg->pmg_keepcnt--;
(void) splx(s);
}
/*
* Unload all the hardware translations that map page `pp'.
*/
void
hat_pageunload(pp)
register struct page *pp;
{
register struct pmgrp *pmg;
register struct pment *pme;
int s;
PP_LIST_OPEN
{
hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC);
PP_LIST_NEXT((struct pment *)pp->p_mapping)
}
PP_LIST_CLOSE
}
/*
* Get all the hardware dependent attributes for a page struct
*/
void
hat_pagesync(pp)
struct page *pp;
{
register struct pment *pme;
register struct pmgrp *pmg;
int s;
PP_LIST_OPEN
{
/*
* Get page dependent info from hardware for
* each translation, but don't unload them.
*/
hat_ptesync(pp, pmg, pme, (addr_t)NULL, HAT_RMSYNC);
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
}
/*
* Returns the page frame number for a given kernel virtual address.
*/
u_int
hat_getkpfnum(addr)
addr_t addr;
{
struct pte pte;
mmu_getkpte(addr, &pte);
return (MAKE_PFNUM(&pte));
}
/*
* End of machine independent interface routines.
*
* The next few routines implement some machine dependent functions
* need for the Sun MMU. Note that each hat implementation can define
* whatever additional interfaces that make sense for that machine
* These routines are defined in <machine/vm_hat.h>.
*
* Start machine specific interface routines.
*/
/*
* This routine is called for kernel initialization
* to cause a pmgrp to be reserved w/o any unloading,
* links the pmgrp into the address space (if not already there),
* and return with the pmgrp in question leaving its keepcnt incremented.
*/
void
hat_pmgreserve(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct as *as = seg->s_as;
register struct pmgrp *pmg;
pmg = hat_getpmg(addr, as); /* keeps the pmg for us */
if (as->a_hat.hat_ctx == NULL ||
(pmg->pmg_as != NULL && pmg->pmg_as != as))
panic("hat_pmgreserve");
if (pmg != pmgrp_invalid && pmg->pmg_as == NULL) {
hat_pmglink(pmg, as, addr);
hat_smgreserve(seg, addr);
}
}
/*
* Initialize all the unlocked pmgs to have invalid pme's
* and add them to the free list.
* This routine is called during startup after all the
* kernel pmgs have been reserved. This routine will
* also set the pmgrpmin variable for use in hat_pmgalloc.
*/
void
hat_pmginit()
{
register struct pmgrp *pmg;
register addr_t addr;
int s;
s = splvm();
kmon_enter(&pme_lock);
for (pmg = pmgrps; pmg < &pmgrps[NPMGRPS]; pmg++) {
if (pmg->pmg_lock || pmg->pmg_keepcnt != 0)
continue;
if (pmgrpmin == NULL)
pmgrpmin = pmg;
mmu_settpmg(SEGTEMP, pmg);
for (addr = SEGTEMP; addr < SEGTEMP + PMGRPSIZE;
addr += PAGESIZE)
mmu_setpte(addr, mmu_pteinvalid);
pmg->pmg_next = pmgrpfree;
pmgrpfree = pmg;
}
pmgrphand = pmgrpmin;
mmu_pmginval(SEGTEMP);
kmon_exit(&pme_lock);
(void) splx(s);
hat_smginit();
}
/*
* Set addr in segment seg to use pte to (possibly) map to page pp.
* This is the common routine used for hat_memload and hat_devload
* in addition to the machine dependent mapin implementation.
*/
void
hat_pteload(seg, addr, pp, pte, flags)
struct seg *seg;
addr_t addr;
struct page *pp;
struct pte pte;
int flags;
{
register struct pment *ppme;
register struct pmgrp *ppmg;
int s;
if (pp != NULL && pp->p_free)
panic("hat_pteload free page");
ppmg = hat_pmgalloc(seg, addr);
ppme = &ppmg->pmg_pme[mmu_btop(addr - ppmg->pmg_base)];
/*
* We must be sure that setting the pte and adding to the list
* of mappings is atomic.
*/
s = splvm();
#ifdef VAC
/*
* If there's no page structure associated with this mapping,
* or the vac is turned off, or the page is non-cacheable,
* then force the mapping to be non-cached.
*/
if (pp == NULL || !vac || pp->p_nc)
pte.pg_nc = 1;
#endif VAC
if (ppme->pme_valid) {
/*
* Reloading a translation - be sure to preserve the
* existing ref and mod bits for this translation.
*
* XXX - should cache all the attributes of a loaded
* translation in the pme structure so that we can
* avoid reloading all together unless something
* is actually going to change.
*/
struct pte opte;
mmu_getpte(addr, &opte);
pte.pg_r = opte.pg_r;
pte.pg_m = opte.pg_m;
} else {
ppme->pme_valid = 1;
}
#ifdef IOC
if (ioc) {
struct pte *p = &pte;
if (flags & PTELD_IOCACHE) {
ioc_pteset(p);
}
ioc_mbufset(p, addr);
}
#endif IOC
mmu_setpte(addr, pte); /* load the pte into hardware mmu */
/*
* Check to see if this pme needs to be added
* to the list of pme's mapping this page.
*/
if (pp != ppme->pme_page) {
if (ppme->pme_page != NULL)
panic("hat_pteload");
ASSERT(pp != NULL && ppme->pme_page == NULL);
ppme->pme_page = pp;
ppme->pme_next = PMENXT_INDEX(pp->p_mapping);
pp->p_mapping = (caddr_t)ppme;
(void) splx(s);
seg->s_as->a_rss += 1;
#ifdef VAC
/*
* If (vac) active, then check for conflicts.
* A conflict exists if the new and existent mappings
* do not match in their "shm_alignment" fields
* XXX and one of them is writable XXX. If conflicts
* exist, the extant mappings are flushed UNLESS
* one of them is locked. If one of them is locked,
* then the mappings are flushed and converted to
* non-cacheable mappings [must be deconverted in
* hat_pteunload].
* XXX need to store protections in pme
* to employ writable optimization.
*/
if (vac && !pp->p_nc) {
struct pmgrp *pmg; /* temporary pmg */
struct pment *pme; /* temporary pme */
struct pmgrp *pmgpc; /* user's text pmg */
struct pmgrp *pmgsp; /* user's stack pmg */
int ccf; /* cache conflict found flag */
int first = 1;
/*
* mappings to the user's current stack and
* text locations must be locked in memory,
* or we run the risk of getting into an
* infinite paging loop if the program tries
* to read the physical pages containing either
* via a mapping that is not cache aliased.
*/
s = splvm();
if ((u.u_ar0 != (int *)0) && (u.u_ar0[SP] != 0)) {
pmgsp = mmu_getpmg ((addr_t)u.u_ar0[SP]);
if (pmgsp != (struct pmgrp *)0)
pmgsp->pmg_keepcnt ++;
} else
pmgsp = (struct pmgrp *)0;
if ((u.u_ar0 != (int *)0) && (u.u_ar0[PC] != 0)) {
pmgpc = mmu_getpmg ((addr_t)u.u_ar0[PC]);
if (pmgpc != (struct pmgrp *)0)
pmgpc->pmg_keepcnt ++;
} else
pmgpc = (struct pmgrp *)0;
(void)splx(s);
ccf = 0;
PP_LIST_OPEN
{
if (pme == ppme) {
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
continue;
}
if (first && VAC_ALIGNED(addr, pmg->pmg_base +
mmu_ptob(pme - pmg->pmg_pme))) {
PP_LIST_NEXT((struct pment *)NULL)
continue;
}
first = 0;
/*
* Compare keep to 1 because
* list walker keeps it too.
*/
if (pmg->pmg_lock || pmg->pmg_keepcnt > 1) {
ccf = 1;
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
continue;
}
hat_pteunload(pmg, pme, (addr_t)NULL,
HAT_RMSYNC);
PP_LIST_NEXT((struct pment *)pp->p_mapping)
}
PP_LIST_CLOSE
/*
* Scan complete; we can release the locks
* on the text and stack pages now.
*/
if (pmgsp != (struct pmgrp *)0)
pmgsp->pmg_keepcnt --;
if (pmgpc != (struct pmgrp *)0)
pmgpc->pmg_keepcnt --;
if (ccf) {
pte.pg_nc = 1;
pp->p_nc = 1;
/*
* This time we don't exclude our translation,
* so it will get remarked noncacheable.
*/
PP_LIST_OPEN
{
hat_ptesync(pp, pmg, pme, (addr_t)NULL,
HAT_NCSYNC);
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
}
}
#endif VAC
} else {
(void) splx(s);
if (pp != NULL &&
((ppme->pme_intrep != ((flags & PTELD_INTREP) != 0)) ||
(ppme->pme_nosync != ((flags & PTELD_NOSYNC) != 0))))
panic("pteload - remap flags");
#ifdef VAC
/*
* Reloading a pte for an already mapped page. If
* the page is "real", then if we've got a VAC, then
* flush the cache to ensure no protection mismatches
* between cache and MMU.
* XXX - need to store protections in pme to avoid this.
*/
if ((pp != NULL) && vac && pte.pg_r)
vac_pageflush(addr);
#endif VAC
}
ppme->pme_intrep = (flags & PTELD_INTREP) != 0;
ppme->pme_nosync = (flags & PTELD_NOSYNC) != 0;
/* keep some statistics on the cache-ability of the translation */
if (pte.pg_type == OBMEM) {
if (seg->s_as == &kas) {
if (addr >= DVMA) {
#ifdef VAC
if (!pte.pg_nc)
cache_stats.cs_ioc++;
else
#endif VAC
cache_stats.cs_ionc++;
} else {
#ifdef VAC
if (!pte.pg_nc)
cache_stats.cs_kc++;
else
#endif VAC
cache_stats.cs_knc++;
}
} else {
#ifdef VAC
if (!pte.pg_nc)
cache_stats.cs_uc++;
else
#endif VAC
cache_stats.cs_unc++;
}
} else {
cache_stats.cs_other++;
}
if ((flags & PTELD_LOCK) == 0) {
s = splvm();
ppmg->pmg_keepcnt--; /* decr lock count from hat_pmgalloc */
(void) splx(s);
}
}
void
hat_mempte(pp, vprot, ppte)
struct page *pp;
u_int vprot;
register struct pte *ppte;
{
*ppte = mmu_pteinvalid;
ppte->pg_prot = hat_vtop_prot(vprot);
ppte->pg_v = 1;
ppte->pg_type = OBMEM;
ppte->pg_pfnum = page_pptonum(pp);
}
/*
* Allocate a ctx for use by the specified address space.
* If there are any pmgrps associated with the hat, load
* them up after we get the ctx.
*/
void
hat_getctx(as)
struct as *as;
{
register struct ctx *ctx, *sctx = NULL;
register struct pmgrp *pmg;
#ifdef MMU_3LEVEL
register struct smgrp *smg;
#endif
register u_short tt = 0;
if (as->a_hat.hat_ctx)
return;
kmon_enter(&ctx_lock);
hatcnt.hc_ctxalloc++;
/* find a free ctx or an old one */
for (;;) {
ctx = ctxhand;
do {
ctx++;
if (ctx == &ctxs[NCTXS])
ctx = ctxs;
if (ctx->c_lock != 0) /* can't touch */
continue;
if (ctx->c_as == NULL) { /* no as - use it */
ctxhand = ctx;
goto found;
}
if (sctx == NULL || ctx->c_time <= tt) {
sctx = ctx; /* new "best" ctx */
tt = ctx->c_time;
}
} while (ctx != ctxhand);
if (sctx != NULL) {
ctxhand = ctx = sctx;
goto found;
}
panic("hat_getctx - no ctx's");
}
found:
kmon_exit(&ctx_lock);
mmu_setctx(ctx);
if (ctx->c_as) {
vac_ctxflush();
/* invalidate any pmgrps already loaded for this ctx */
for (pmg = ctx->c_as->a_hat.hat_pmgrps; pmg != NULL;
pmg = pmg->pmg_next)
mmu_pmginval(pmg->pmg_base);
ctx->c_as->a_hat.hat_ctx = NULL;
hatcnt.hc_ctxstolen++;
#ifdef MMU_3LEVEL
if (mmu_3level) {
/* invalidate any smgrps already loaded for this ctx */
for (smg = ctx->c_as->a_hat.hat_smgrps; smg != NULL;
smg = smg->smg_next)
mmu_smginval(smg->smg_base);
ctx->c_as->a_hat.hat_ctx = NULL;
}
#endif
}
ctx->c_as = as;
ctx->c_time = ctx_time++;
as->a_hat.hat_ctx = ctx;
#ifdef MMU_3LEVEL
if (mmu_3level) {
/*
* Load up any smgrps already allocated to this hat
*/
for (smg = as->a_hat.hat_smgrps; smg != NULL;
smg = smg->smg_next)
mmu_setsmg(smg->smg_base, smg);
}
#endif
/* load up any pmgrps already allocated to this hat */
for (pmg = as->a_hat.hat_pmgrps; pmg != NULL; pmg = pmg->pmg_next) {
mmu_setpmg(pmg->pmg_base, pmg);
hatcnt.hc_ctxpmgs++;
}
}
/*
* Used to lock down hat resources for an address range. In this implementation,
* this means locking down the necessary pmegs. This currently works only
* for kernel addresses.
*
* addr and len must be MMU_PAGESIZE aligned.
*/
void
hat_reserve(seg, addr, len)
struct seg *seg;
addr_t addr;
u_int len;
{
register addr_t a;
addr_t ea;
if (seg->s_as != &kas)
panic("hat_reserve");
for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE)
(void) hat_pmgalloc(seg, a);
}
u_int
hat_vtop_prot(vprot)
u_int vprot;
{
switch (vprot) {
case 0:
case PROT_USER:
/*
* Since 0 might be a valid protection,
* the caller should not set valid bit
* if vprot == 0 to be sure.
*/
return (0);
case PROT_READ:
case PROT_EXEC:
case PROT_READ | PROT_EXEC:
return (KR);
case PROT_WRITE:
case PROT_WRITE | PROT_EXEC:
case PROT_READ | PROT_WRITE:
case PROT_READ | PROT_WRITE | PROT_EXEC:
return (KW);
case PROT_EXEC | PROT_USER:
case PROT_READ | PROT_USER:
case PROT_READ | PROT_EXEC | PROT_USER:
return (UR);
case PROT_WRITE | PROT_USER:
case PROT_WRITE | PROT_EXEC | PROT_USER:
case PROT_READ | PROT_WRITE | PROT_USER:
case PROT_READ | PROT_WRITE | PROT_EXEC | PROT_USER:
return (UW);
default:
panic("hat_vtop_prot");
/* NOTREACHED */
}
}
#if defined(sun4c) && defined(VAC)
/*
* Flush all possible cache lines mapping the given physical page. This
* is used for software cache consistency with I/O, to clean the cache
* of all data subject to I/O.
*/
void
hat_vacsync(pfnum)
u_int pfnum;
{
register struct page *pp = page_numtopp(pfnum);
register struct pmgrp *pmg;
register struct pment *pme;
struct pte tpte;
int s;
addr_t va;
struct ctx *ctxsav, *nctx, *curctx;
/*
* If the cache is off, the page isn't memory, or the page is
* non-cacheable, then none of the page could be in the cache
* in the first place, with the exception that a page frame
* for kernel .data or .bss objects could be in the cache,
* but will have no page structure.
*/
if (!vac) {
return;
} else if (pp == (struct page *) NULL) {
extern u_int kpfn_dataseg, kpfn_endbss;
if (pfnum >= kpfn_dataseg || pfnum <= kpfn_endbss) {
extern char etext[];
/*
* In sun4c, the page frame number for the start
* of the kernel data segment and the page frame
* number for end are latched up in kvm_init().
* If a page frame number ends up here, then some-
* body is doing i/o to an object in kernel .data
* or .bss.
*
* This is a temporary solution, and it does have
* some holes in it. It assumes that the page frame
* numbers between kernel .data and end are contiguous.
*
* As a side note, we could go to the effort of
* of reading the kernel pte for the calculated
* address to check with the passed page frame
* number, but it isn't really worth the effort.
*/
va = (addr_t) (roundup((u_int) etext, DATA_ALIGN) +
((pfnum - kpfn_dataseg) << MMU_PAGESHIFT));
vac_pageflush (va);
}
return;
} else if (pp->p_nc) {
return;
}
curctx = ctxsav = mmu_getctx();
/*
* Walk the list of translations for this page, flushing each
* one.
*/
PP_LIST_OPEN
{
/*
* If the translation has no context, it can't be
* in the cache.
*/
if ((nctx = pmg->pmg_as->a_hat.hat_ctx) != NULL) {
/*
* Calculate the virtual address, switch to the
* correct context, and flush the page.
*/
va = pmg->pmg_base + mmu_ptob(pme - pmg->pmg_pme);
if (nctx != curctx) {
mmu_setctx(nctx);
curctx = nctx;
}
mmu_getpte(va, &tpte);
if (tpte.pg_r)
vac_pageflush(va);
}
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
/*
* Restore the original context.
*/
if (curctx != ctxsav)
mmu_setctx(ctxsav);
}
#endif defined(sun4c) && defined(VAC)
#ifdef sun4c
/*
* It would be nice if other parity recovery schemes used this mechanism.
*/
/*
* Kill any processes that use this page. (Used for parity recovery)
* If we encounter the kernel's address space, give up (return -1).
* Otherwise, we return 0.
*/
hat_kill_procs(pp, addr)
struct page *pp;
addr_t addr;
{
register struct pmgrp *pmg;
register struct pment *pme;
int s;
struct as *as;
struct proc *p;
int result = 0;
PP_LIST_OPEN
{
/*
* Find the address space that contains this pment.
*/
as = pmg->pmg_as;
/*
* If the address space is the kernel space, then fail.
* The memory is corrupted, and the only thing to do with
* corrupted kernel memory is die.
*/
if (as == &kas) {
printf("parity recovery: kernel address space\n");
result = -1;
}
/*
* Find the proc that uses this address space and kill
* it. Note that more than one process can share the
* same address space, if vfork() was used to create it.
* This means that we have to look through the entire
* process table and not stop at the first match.
*/
for (p = allproc; p; p = p->p_nxt) {
if (p->p_as == as) {
printf("pid %d killed: parity error\n",
p->p_pid);
uprintf("pid %d killed: parity error\n",
p->p_pid);
psignal(p, SIGBUS);
p->p_uarea->u_code = FC_HWERR;
p->p_uarea->u_addr = addr;
}
}
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
return (result);
}
#endif sun4c
/*
* End machine specific interface routines.
*
* The remainder of the routines are private to this module and are used
* by the routines above to implement a service to the outside caller.
*
* Start private routines.
*/
/*
* Unload a pme. We call hat_ptesync() to unload the translation
* then remove the pme from the list of pme's mapping the page.
* Should always be called with the pmgrp for the pme being held.
*/
static void
hat_pteunload(ppmg, ppme, vaddr, flags)
struct pmgrp *ppmg;
register struct pment *ppme;
addr_t vaddr;
int flags;
{
int s;
register u_short *next; /* ptr for list removal */
struct page *pp = ppme->pme_page;
#ifdef VAC
struct pment *pme; /* temporary for listwalk */
struct pmgrp *pmg; /* temporary for listwalk */
struct pment *qpme; /* second temporary for comparison */
struct pmgrp *qpmg; /* second temporary for comparison */
addr_t pa, qa; /* matching address values */
int ccf; /* cache conflict found flag */
int s2;
#endif VAC
if (pp != NULL) {
/*
* Remove it from the list of mappings for the page.
*/
s = splvm();
if (ppme == (struct pment *)(pp->p_mapping)) {
next = &ppme->pme_next;
pp->p_mapping = (caddr_t)PMENXT_PTR(*next);
*next = PMENXT_NULL;
} else {
if (pp->p_mapping == NULL)
panic("hat_pteunload - no mappings");
for (next = &(((struct pment *)
(pp->p_mapping))->pme_next);
*next != PMENXT_INDEX(ppme);
next = &(PMENXT_PTR(*next)->pme_next))
if (*next == PMENXT_NULL)
panic("hat_pteunload - no mapping");
*next = ppme->pme_next;
ppme->pme_next = PMENXT_NULL;
}
(void) splx(s);
ppme->pme_page = NULL;
ppmg->pmg_as->a_rss -= 1;
#ifdef VAC
if (vac && pp->p_nc) {
ccf = 0;
PP_LIST_OPEN
{
s2 = splvm();
if ((qpme = PMENXT_PTR(pme->pme_next))
!= NULL) {
pa = pmg->pmg_base +
mmu_ptob(pme - pmg->pmg_pme);
qpmg = &pmgrps[(qpme - pments) /
NPMENTPERPMGRP];
qa = qpmg->pmg_base +
mmu_ptob(qpme - qpmg->pmg_pme);
if (!VAC_ALIGNED(pa, qa)) {
ccf = 1;
(void) splx(s2);
PP_LIST_NEXT(NULL)
continue;
}
}
(void) splx(s2);
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
if (!ccf) {
/*
* No more cache conflict.
* Use hat_ptesync to resync.
*/
pp->p_nc = 0;
PP_LIST_OPEN
{
hat_ptesync(pp, pmg, pme, (addr_t)NULL,
HAT_NCSYNC);
PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
}
PP_LIST_CLOSE
}
}
#endif VAC
}
/*
* Invalidate the translation.
*/
if (ppme->pme_valid) {
flags |= HAT_INVSYNC;
hat_ptesync(pp, ppmg, ppme, vaddr, flags);
ppme->pme_nosync = ppme->pme_intrep = ppme->pme_valid = 0;
}
}
/*
* Synchronize the hardware and software of a pte. Used for updating the
* hardware nocache bit, the software R & M bits, and invalidating ptes.
*/
static void
hat_ptesync(pp, pmg, pme, vaddr, flags)
struct page *pp;
register struct pmgrp *pmg;
register struct pment *pme;
addr_t vaddr;
int flags;
{
register struct ctx *ctxsav, *nctx;
register addr_t mapaddr;
int s, pmg_off;
struct pte pte, tpte;
int usetemp = 0;
int didsetpte = 0;
if (pme->pme_valid == 0)
panic("hat_ptesync - invalid pme");
if (flags & HAT_VADDR) {
mapaddr = vaddr;
goto skip;
}
pmg_off = mmu_ptob(pme - pmg->pmg_pme);
vaddr = pmg->pmg_base + pmg_off;
ctxsav = mmu_getctx();
/*
* We must protect the use of the mapping address,
* since it is a shared resource.
*/
s = splvm();
if (pmg->pmg_as->a_hat.hat_ctx == NULL) {
/*
* No ctx - set things up so that the pmgrp
* is mapped into a temporary segment. No
* need to do any cache flushing since this
* was done when we took the ctx away. Set
* up the mapaddr within the temporary segment.
*/
/* XXX - this is disgusting! */
#ifdef sun2
extern struct ctx *kctx;
nctx = kctx;
if (nctx != ctxsav)
mmu_setctx(nctx);
#else sun2
nctx = ctxsav; /* no need to switch context */
#endif sun2
mmu_settpmg(SEGTEMP, pmg);
mapaddr = SEGTEMP + pmg_off;
usetemp = 1;
} else {
/*
* We have a ctx, make sure we are in running
* in the right context. Set up to use the
* virtual address as the mapping address.
*/
if ((nctx = pmg->pmg_as->a_hat.hat_ctx) !=
ctxsav)
mmu_setctx(nctx);
mapaddr = vaddr;
}
skip:
if (pp != NULL) {
if (flags & HAT_RMSYNC) {
mmu_getpte(mapaddr, &pte);
/* Call back to inform address space, if turned on */
if (pmg->pmg_as->a_hatcallback) {
as_hatsync(pmg->pmg_as, vaddr,
(u_int) pte.pg_r, (u_int) pte.pg_m,
(u_int)(flags & HAT_INVSYNC ?
AHAT_UNLOAD : 0));
}
pg_setref(pp, pp->p_ref | pte.pg_r);
pg_setmod(pp, pp->p_mod | pte.pg_m);
/*
* When you zero the modified bit in the MMU
* and leave it set in the cache you may not
* get it set in the mmu when the line is
* re-written. Writeback caches perform the
* setting of the modified bit for a page in
* the MMU on the first write miss that happens
* to that page. Subsequent writes don't bother
* to set the modified bit because the first
* write did it. Therefore if you are zeroing
* the modified bit you must flush the cache
* so that subsequent writes, see the modified
* bit unset in the cache and write it back to
* the MMU.
*/
#ifdef VAC
if (vac && pte.pg_r)
vac_pageflush(mapaddr);
#endif VAC
pte.pg_r = pte.pg_m = 0;
}
#ifdef VAC
else if (flags & HAT_NCSYNC) {
/*
* N.B. The following test assumes that there
* are no user addresses at the same virtual
* addresses as DVMA and segu in VAC machines.
*/
if (mapaddr >= DVMA || (segu != NULL &&
mapaddr >= segu->s_base &&
mapaddr < segu->s_base + segu->s_size)) {
/*
* To avoid lots of problems, we don't
* try to convert anything from cached
* to non-cached (or vice-versa) when
* it is being loaded for DVMA use.
* Also, we refuse to mess with user
* areas since it is impossible to
* reliably flush when converting
* from cached to non-cached and we
* don't want to take any performance
* hits from using a non-cached stack.
*/
didsetpte = 1;
cache_stats.cs_skip++;
} else {
mmu_getpte(mapaddr, &pte);
if (vac && !pte.pg_nc && pp->p_nc &&
mapaddr == vaddr) {
int pri, iskas;
/*
* Need to convert from a cached
* translation to a non-cached
* translation. There are lots
* of potential races here in the
* kernel's address space. If
* some clean line ends up in the
* cache after it is flushed here
* and is then written to, the
* Sirius cache system will end
* up giving a memory timeout error.
*
* For now, we assume that between
* time that we flush the virtual
* address and reset the MMU that
* nothing will be getting into
* the cache from things like
* ethernet (this is questionable).
* We also assume that will never
* be converting anything from
* cached to non-cached in the
* kernel for the current stack,
* (i.e., the stack can be accessed
* safely w/o it being changed from
* cached to non-cached), the interrupt
* stack, or anything that might be
* touched at interrupts above splhigh
* (UARTS, level7 profiling).
*/
pte.pg_nc = 1;
didsetpte = 1;
iskas = pmg->pmg_as == &kas;
pri = splhigh();
if (pte.pg_r)
vac_pageflush(mapaddr);
mmu_setpte(mapaddr, pte);
#ifdef SUN4_470
#define CFLUSH_TEST (iskas && (cpu != CPU_SUN4_470))
#else
#define CFLUSH_TEST iskas
#endif
if (CFLUSH_TEST) {
mmu_getpte(mapaddr, &tpte);
/*
* Flush the virtual address
* again just in case some IO
* got in behind our back
* above. Doing this for
* iskas only assumes there
* is no UDVMA to worry about.
*/
if (tpte.pg_r)
vac_pageflush(mapaddr);
cache_stats.cs_kchange++;
} else {
cache_stats.cs_uchange++;
}
(void) splx(pri);
} else {
pte.pg_nc = pp->p_nc;
}
}
}
#endif VAC
}
if (flags & HAT_INVSYNC) {
#ifdef VAC
if (vac && mapaddr == vaddr) {
mmu_getpte(mapaddr, &tpte);
if (tpte.pg_r)
vac_pageflush(mapaddr);
}
#endif VAC
pte = mmu_pteinvalid;
}
if (!didsetpte)
mmu_setpte(mapaddr, pte);
if (flags & HAT_VADDR)
return;
if (usetemp)
mmu_settpmg(SEGTEMP, pmgrp_invalid);
(void) splx(s);
if (nctx != ctxsav)
mmu_setctx(ctxsav);
}
int getpmg_check = 1;
/*
* Allocate a pmgrp to map the specified address.
* Returns w/ the keepcnt incremented for the particular pmgrp used.
* First look for something in the free list and then steal one
* that is currently being used. Use simple round robin algorithm
* to find a used one to steal, skipping over the first few that
* we know are permanently allocated to the kernel, and starting
* at pmgrpmin. XXX - should do something closer to LRU.
*/
static struct pmgrp *
hat_pmgalloc(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct as *as = seg->s_as;
register struct pmgrp *pmg;
int s;
/*
* Read from the mmu to see if we already have a pmgrp allocated.
* If so, keep it and return. N.B. this is an inline hat_getpmg().
*/
hat_setup(as);
s = splvm();
if ((pmg = mmu_getpmg(addr)) != pmgrp_invalid) {
pmg->pmg_keepcnt++;
hatcnt.hc_pmghave++;
if (getpmg_check && pmg->pmg_base !=
(caddr_t)((u_int)addr & ~(PMGRPSIZE - 1)))
hat_wrongpmg(pmg, addr, as);
(void) splx(s);
return (pmg);
}
/*
* No pmgrp allocated to this address space contains the pme,
* allocate a new pmg for this address space. First, try
* the free list.
*/
kmon_enter(&pme_lock);
top:
if ((pmg = pmgrpfree) == NULL) {
int try;
/*
* No pmg's free, have to take one from someone.
* Take from address spaces with no ctx first.
* XXX - could do it with just one pass.
*/
pmg = pmgrphand;
try = 1;
for (;;) {
do {
pmg++;
if (pmg == &pmgrps[NPMGRPS]) {
if (pmgrpmin) {
/* skip some kernel pmgrps */
pmg = pmgrpmin;
} else {
pmg = pmgrps;
}
}
if (pmg->pmg_lock == 0 &&
pmg->pmg_keepcnt == 0) {
/*
* On the first try, only take a pmg
* from an address space with no ctx.
*/
if (try == 1 &&
pmg->pmg_as->a_hat.hat_ctx != NULL)
continue;
/*
* Found a candidate, free
* it up and try again.
*/
if (try == 1)
hatcnt.hc_pmgstolennoctx++;
else
hatcnt.hc_pmgstolenctx++;
pmg->pmg_keepcnt++;
hat_pmgfree(pmg);
pmgrphand = pmg;
goto top;
}
} while (pmg != pmgrphand);
/*
* Give up after 2 tries.
*/
if (try >= 2) {
kmon_exit(&pme_lock);
rm_outofhat();
kmon_enter(&pme_lock);
}
try++;
}
}
hatcnt.hc_pmgalloc++;
pmgrpfree = pmg->pmg_next; /* take it off the free list */
pmg->pmg_lock = 1;
pmg->pmg_keepcnt = 1;
hat_pmglink(pmg, as, addr);
hat_smgalloc(as, addr, pmg);
kmon_exit(&pme_lock);
(void) splx(s);
mmu_setpmg(pmg->pmg_base, pmg);
pmg->pmg_lock = 0;
return (pmg);
}
/*
* This routine will return the pmgrp structure for the given address
* in the current ctx. But unlike mmu_getpmg, this routine will protect
* against the pmgrp being lost by spl'ing and will return a kept pmgrp
* pointer. The keepcnt should be decremented by the caller when it is
* done looking at the pmgrp contents.
*/
static struct pmgrp *
hat_getpmg(addr, as)
addr_t addr;
struct as *as;
{
int s;
struct pmgrp *pmg;
s = splvm();
pmg = mmu_getpmg(addr);
pmg->pmg_keepcnt++;
if (getpmg_check && pmg != pmgrp_invalid && pmg->pmg_base != 0 &&
pmg->pmg_base != (caddr_t)((u_int)addr & ~(PMGRPSIZE - 1)))
hat_wrongpmg(pmg, addr, as);
(void) splx(s);
return (pmg);
}
/*
* Free the specified pmgrp. This is done by calling hat_pteunload
* on all the pme's to process all the referenced and modified bits
* and to invalidate the pme. If the hat containing this pmg currently
* has a ctx, then invalidate that mapping. Finally we unlink the
* the pmgrp from the hat pmgrp list and put it on the free list.
* pmg should be kept (once) when this routine is called.
*/
static void
hat_pmgfree(pmg)
register struct pmgrp *pmg;
{
register struct pment *pme = pmg->pmg_pme;
register struct as *as;
register int cnt;
struct ctx *ctx, *ctxsav;
int s;
ASSERT(pmg->pmg_keepcnt == 1);
if ((as = pmg->pmg_as) != NULL) {
for (cnt = 0; cnt < NPMENTPERPMGRP; cnt++, pme++) {
if (pme->pme_valid)
hat_pteunload(pmg, pme, (addr_t)NULL,
HAT_RMSYNC);
}
if ((ctx = pmg->pmg_as->a_hat.hat_ctx) != NULL) {
ctxsav = mmu_getctx();
if (ctxsav != ctx)
mmu_setctx(ctx);
mmu_pmginval(pmg->pmg_base);
if (ctxsav != ctx)
mmu_setctx(ctxsav);
}
if (as->a_hat.hat_pmgrps == pmg) {
as->a_hat.hat_pmgrps = pmg->pmg_next;
if (pmg->pmg_next)
pmg->pmg_next->pmg_prev = NULL;
} else {
pmg->pmg_prev->pmg_next = pmg->pmg_next;
if (pmg->pmg_next)
pmg->pmg_next->pmg_prev = pmg->pmg_prev;
}
pmg->pmg_as = NULL;
pmg->pmg_next = pmg->pmg_prev = NULL;
}
s = splvm();
pmg->pmg_keepcnt--;
#ifdef MMU_3LEVEL
if (mmu_3level) {
pmg->pmg_sme->sme_valid = 0;
pmg->pmg_sme = (struct sment *)NULL;
}
#endif
pmg->pmg_next = pmgrpfree;
pmgrpfree = pmg;
(void) splx(s);
}
/*
* Add the specified pmgrp to the list of pmgrp's allocated to
* the specified address space. We hang pmgrps off the address
* space and not the ctx so that we can keep them around even if
* we don't have a hardware context.
*/
static void
hat_pmglink(pmg, as, addr)
register struct pmgrp *pmg;
struct as *as;
addr_t addr;
{
int s;
ASSERT(pmg->pmg_keepcnt > 0);
s = splvm();
pmg->pmg_as = as;
pmg->pmg_next = as->a_hat.hat_pmgrps;
if (pmg->pmg_next)
pmg->pmg_next->pmg_prev = pmg;
pmg->pmg_prev = NULL;
as->a_hat.hat_pmgrps = pmg;
pmg->pmg_base = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1));
(void) splx(s);
}
/*
* Called when the wrong pmeg is read out from the MMU.
* Most likely, this is a down rev Carrera CPU board that
* is missing some pullup registers on the segment RAMs.
* The ECO for the needed Carrera CPU board fix is 2555.
*/
static void
hat_wrongpmg(pmg, addr, as)
struct pmgrp *pmg;
addr_t addr;
struct as *as;
{
#ifndef sun2
register struct pmgrp *pmgp;
static char *wrp = "PMG 0x%8x claims VA 0x%8x is PMEG 0x%8x (%s)\n";
#ifdef sparc
u_int map_getsgmap();
#endif sparc
addr = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1));
printf("WRONG PAGE MAP GROUP FOUND FOR VIRTUAL ADDRESS 0x%8x!\n", addr);
#ifdef sparc
printf("Hardware refers this address to segment map 0x%8x\n",
map_getsgmap(addr));
#endif sparc
printf("PMG 0x%8x claims VA 0x%8x is PMEG 0x%8x (hardware)\n",
pmg, pmg->pmg_base, pmg->pmg_num);
/* scan for kernel mappings */
for (pmgp = kas.a_hat.hat_pmgrps; pmgp != NULL; pmgp = pmgp->pmg_next)
if (pmgp->pmg_base == addr)
printf(wrp, pmgp, pmgp->pmg_base,
pmgp->pmg_num, "kernel");
/* scan for user mappings */
for (pmgp = as->a_hat.hat_pmgrps; pmgp != NULL; pmgp = pmgp->pmg_next)
if (pmgp->pmg_base == addr)
printf(wrp, pmgp, pmgp->pmg_base,
pmgp->pmg_num, "user");
#else sun2
printf("WRONG PMG! addr=%x pmg=%x base=%x\n", addr, pmg, base);
#endif sun2
panic("wrong pmg");
/* NOTREACHED */
}
#ifdef MMU_3LEVEL
/*
* Code to handle allocation of smegs is cloned from the pmeg versions
*/
int getsmg_check = 1;
/*
* This routine will return the smgrp structure for the given address
* in the current ctx. But unlike mmu_getsmg, this routine will protect
* against the smgrp being lost by spl'ing and will return a kept smgrp
* pointer. The keepcnt should be decremented by the caller when it is
* done looking at the smgrp contents.
*/
static struct smgrp *
hat_getsmg(addr)
addr_t addr;
{
int s;
struct smgrp *smg;
s = splvm();
smg = mmu_getsmg(addr);
smg->smg_keepcnt++;
if (getsmg_check && smg != smgrp_invalid && smg->smg_base != 0 &&
smg->smg_base != (caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) {
printf("hat_getsmg: addr=%x, smg=%x, smg base=%x\n",
addr, smg, smg->smg_base);
call_debug("hat_getsmg");
}
(void) splx(s);
return (smg);
}
/*
* Free the specified smgrp. This is done by calling hat_pmgfree
* on all the sme's to invalidate the smgrp. If the hat containing
* this smg currently has a ctx, then invalidate that mapping.
* Finally we unlink the the smgrp from the hat smgrp list and
* put it on the free list.
* smg should be kept (once) when this routine is called.
*
* XXX - at some level, this should be optimized to used
* segment and/or region flush
*/
static void
hat_smgfree(smg)
register struct smgrp *smg;
{
register struct sment *sme;
register struct pmgrp *pmg;
register struct as *as;
register int cnt;
struct ctx *ctx, *ctxsav;
int s;
/*
* This may be prohibitively expensize as it flushes each segment by
* flushing its pages. Rarely are all the pages in a pmeg
* used and most processes rarely use greater than two or three pmgs
* If this is a real problem we can flush the context and free
* everything, XXX - instrument this
*/
sme = smg->smg_sme;
if ((as = smg->smg_as) != NULL) {
for (cnt = 0; cnt < NSMENTPERSMGRP; cnt++) {
if (sme->sme_valid) {
pmg = sme->sme_pmg;
s = splvm();
pmg->pmg_keepcnt++;
hat_pmgfree(pmg);
sme->sme_pmg = pmgrp_invalid;
(void) splx(s);
}
sme++;
}
if ((ctx = smg->smg_as->a_hat.hat_ctx) != NULL) {
ctxsav = mmu_getctx();
if (ctxsav != ctx)
mmu_setctx(ctx);
mmu_smginval(smg->smg_base);
if (ctxsav != ctx)
mmu_setctx(ctxsav);
}
if (as->a_hat.hat_smgrps == smg) {
as->a_hat.hat_smgrps = smg->smg_next;
if (smg->smg_next)
smg->smg_next->smg_prev = NULL;
} else {
smg->smg_prev->smg_next = smg->smg_next;
if (smg->smg_next)
smg->smg_next->smg_prev = smg->smg_prev;
}
smg->smg_as = NULL;
smg->smg_next = smg->smg_prev = NULL;
}
s = splvm();
smg->smg_keepcnt--;
smg->smg_next = smgrpfree;
smgrpfree = smg;
(void) splx(s);
}
/*
* Add the specified smgrp to the list of smgrp's allocated to
* the specified address space. We hang smgrps off the address
* space and not the ctx so that we can keep them around even if
* we don't have a hardware context.
*/
static void
hat_smglink(smg, as, addr)
register struct smgrp *smg;
struct as *as;
addr_t addr;
{
int s;
s = splvm();
smg->smg_as = as;
smg->smg_next = as->a_hat.hat_smgrps;
if (smg->smg_next)
smg->smg_next->smg_prev = smg;
smg->smg_prev = NULL;
as->a_hat.hat_smgrps = smg;
smg->smg_base = (addr_t)((u_int)addr & ~(SMGRPSIZE - 1));
(void) splx(s);
}
void
hat_smgreserve(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct as *as = seg->s_as;
register struct smgrp *smg;
if (!mmu_3level)
return;
smg = hat_getsmg(addr); /* keeps the smg for us */
if (as->a_hat.hat_ctx == NULL ||
(smg->smg_as != NULL && smg->smg_as != as))
panic("hat_smgreserve");
if (smg != smgrp_invalid && smg->smg_as == NULL)
hat_smglink(smg, as, addr);
smg->smg_lock = 1; /* if its being reserved, also lock it */
}
/*
* Initialize all the unlocked smgs to have invalid sme's
* and add them to the free list.
* This routine is called during startup after all the
* kernel smgs have been reserved. This routine will
* also set the smgrpmin variable for use in hat_smgalloc.
*
* REGTEMP is only used here so we temporarily steal
* the region before KERNELBASE and mark it invalid
* when we are finished.
*/
void
hat_smginit()
{
register struct smgrp *smg;
register addr_t addr;
int s;
if (!mmu_3level)
return;
s = splvm();
kmon_enter(&sme_lock);
for (smg = smgrps; smg < &smgrps[NSMGRPS]; smg++) {
if (smg->smg_lock || smg->smg_keepcnt != 0)
continue;
if (smgrpmin == NULL)
smgrpmin = smg;
mmu_settsmg((addr_t)REGTEMP, smg);
for (addr = (addr_t)REGTEMP;
addr < (addr_t)(REGTEMP + SMGRPSIZE);
addr += PMGRPSIZE) {
mmu_pmginval(addr);
}
smg->smg_next = smgrpfree;
smgrpfree = smg;
}
smgrphand = smgrpmin;
mmu_smginval((addr_t)REGTEMP);
kmon_exit(&sme_lock);
(void) splx(s);
}
/*
* Allocate a smgrp to map the specified address.
* Returns w/ the keepcnt incremented for the particular smgrp used.
* First look for something in the free list and then steal one
* that is currently being used.
*/
static void
hat_smgalloc(as, addr, pmg)
struct as *as;
addr_t addr;
struct pmgrp *pmg;
{
register struct smgrp *smg;
register struct sment *sme;
int s;
if (!mmu_3level)
return;
s = splvm();
if ((smg = mmu_getsmg(addr)) != smgrp_invalid) {
smg->smg_keepcnt++;
hatcnt.hc_smghave++;
if (getsmg_check && smg->smg_base !=
(caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) {
printf("hat_smgalloc: addr=%x, smg=%x, smg base=%x\n",
addr, smg, smg->smg_base);
call_debug("hat_smgalloc");
}
sme = &(smg->smg_sme
[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]);
sme->sme_pmg = pmg;
sme->sme_valid = 1;
pmg->pmg_sme = sme;
(void) splx(s);
return;
}
/*
* No smgrp allocated to this address space contains the pme,
* allocate a new smg for this address space. First, try
* the free list.
*/
kmon_enter(&sme_lock);
top:
if ((smg = smgrpfree) == NULL) {
int try;
/*
* No smg's free, have to take one from someone.
* Take from address spaces with no ctx first.
* XXX - could do it with just one pass.
*/
smg = smgrphand;
try = 1;
for (;;) {
do {
smg++;
if (smg == &smgrps[NSMGRPS]) {
if (smgrpmin) {
/* skip some kernel smgrps */
smg = smgrpmin;
} else {
smg = smgrps;
}
}
if (smg->smg_lock == 0 && hat_pmgcheck(smg)) {
/* && smg->smg_keepcnt == 0) { */
/*
* On the first try, only take a smg
* from an address space with no ctx.
*/
if (try == 1 &&
smg->smg_as->a_hat.hat_ctx != NULL)
continue;
/*
* Found a candidate, free
* it up and try again.
*/
if (try == 1)
hatcnt.hc_smgstolennoctx++;
else
hatcnt.hc_smgstolenctx++;
/* we are at splvm */
smg->smg_keepcnt++;
hat_smgfree(smg);
smgrphand = smg;
goto top;
}
} while (smg != smgrphand);
/*
* Give up after 2 tries.
*/
if (try >= 2) {
kmon_exit(&sme_lock);
rm_outofhat();
kmon_enter(&sme_lock);
}
try++;
}
}
hatcnt.hc_smgalloc++;
smgrpfree = smg->smg_next; /* take it off the free list */
smg->smg_lock = 1;
smg->smg_keepcnt = 1;
hat_smglink(smg, as, addr);
sme = &(smg->smg_sme[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]);
sme->sme_pmg = pmg;
sme->sme_valid = 1;
pmg->pmg_sme = sme;
kmon_exit(&sme_lock);
(void) splx(s);
mmu_setsmg(smg->smg_base, smg);
smg->smg_lock = 0;
return;
}
hat_xfree_sun4(as)
register struct as *as;
{
register int s;
/* if three-level mmu, free smgs, else free pmgs */
if (mmu_3level) {
register struct smgrp *smg;
s = splvm();
while (smg = as->a_hat.hat_smgrps) {
smg->smg_keepcnt++;
(void) splx(s);
hat_smgfree(smg); /* should unkeep the smg */
s = splvm();
}
} else {
register struct pmgrp *pmg;
s = splvm();
while (pmg = as->a_hat.hat_pmgrps) {
pmg->pmg_keepcnt++; /* XXX pmg->pmp_keepcnt = 1? */
(void) splx(s);
hat_pmgfree(pmg); /* should unkeep the pmg */
s = splvm();
}
}
(void) splx(s);
}
hat_pmgcheck(smg)
struct smgrp *smg;
{
register struct sment *sme;
register struct pmgrp *pmg;
int cnt;
sme = smg->smg_sme;
for (cnt = 0; cnt < NSMENTPERSMGRP; cnt++) {
if (sme->sme_valid) {
pmg = sme->sme_pmg;
if (pmg->pmg_keepcnt != 0)
return (0);
}
sme++;
}
return (1);
}
#else MMU_3LEVEL
hat_xfree_other(as)
register struct as *as;
{
register int s;
register struct pmgrp *pmg;
s = splvm();
while (pmg = as->a_hat.hat_pmgrps) {
pmg->pmg_keepcnt++; /* XXX pmg->pmp_keepcnt = 1? */
(void) splx(s);
hat_pmgfree(pmg); /* should unkeep the pmg */
s = splvm();
}
(void) splx(s);
}
#endif /* MMU_3LEVEL */