#ifndef lint static char sccsid[] = "@(#)vm_hat.c 1.1 94/10/31 SMI"; #endif /* * Copyright (c) 1991 by Sun Microsystems, Inc. */ /* * VM - Hardware Address Translation management. * * This file implements the machine specific hardware translation * needed by the VM system. The machine independent interface is * described in while the machine dependent interface * and data structures are described in . For * Sun computers, we actually share this same source for more than * one architecture (Sun-2, Sun-3, and Sun-4), since the static * segment and page map mmu's are very similar. For this reason, * this file is located in the sun directory and we use ifdef's * for the few cases where they differ enough to be noticed here. * The actual loading of the hardware registers is done at the mmu * layer which is different for each Sun architecture type. In * reality we probably need a more general cross architecture * sharing structure so that we can more easily share certain code * (like this file) across SOME Sun architectures without giving * the idea that the file MUST be shared across all Sun architectures. * * The hat layer manages the address translation hardware as a cache * driven by calls from the higher levels in the VM system. Nearly * all the details of how the hardware is managed shound not be visable * above this layer except for miscellaneous machine specific functions * (e.g. mapin/mapout) that work in conjunction with this code. Other * than a small number of machine specific places, the hat data * structures seen by the higher levels in the VM system are opaque * and are only operated on by the hat routines. Each address space * contains a struct hat and a page contains an opaque pointer which * is used by the hat code to hold a list of active translations to * that page. * * XXX - At integration into 5.0, replace all #define'd symbols such as * NPMGRPSSW with the actual variable names (npmgrpssw in this case). */ #include #include #include #include /* for u_ru.ru_minflt */ #include #include /* for flush_cnt */ #include #include #include #ifdef IOC #include #endif #include #include #include #include #include #include #include #include #include #include #if !(defined(sun4) || defined(sun4c)) ERROR - This HAT works only for sun4 and sun4c #endif !(defined(sun4) || defined(sun4c)) /* * XXX - Review all ASSERT's in this vm_hat.c. Change inexpensive ones to * test and panic code. Remove #include . */ #define ASSERTPMGMAPPED(pmg, msg) /* assertpmgmapped(pmg, msg) */ #define TEST_PANIC(cond, panicmsg) {if (cond) panic(panicmsg); } /* * Machine specific public data structures. */ struct as kas; struct ctx *kctx; struct pmgrp *pmgrp_invalid; #ifdef MMU_3LEVEL struct smgrp *smgrp_invalid; #endif MMU_3LEVEL /* * Semi-private vm_hat data structures. * Other machine specific routines need to access these. */ extern int npmgrpssw; /* now defined in param.c */ int npmghash; u_short ctx_time; struct ctx *ctxs, *ctxsNCTXS; /* used by */ struct pmgrp *pmgrps, *pmgrpsNPMGRPS; /* used by */ struct pment *pments, *pmentsNPMENTS; /* used by */ struct hwpmg *hwpmgs, *hwpmgsNHWPMGS; /* used by */ struct pte *ptes, *ptesNPTES; /* used by */ struct pmgrp **pmghash, **pmghashNPMGHASH; /* used by */ #ifdef MMU_3LEVEL struct smgrp *smgrps, *smgrpsNSMGRPS; struct sment *sments, *smentsNSMENTS; #endif MMU_3LEVEL #if defined(SUNDBE) && defined(sun4) #ifndef MMU_3LEVEL ERROR - ISM will work only with 3 level MMU #endif MMU_3LEVEL static int ispseudo_smgrp(/* smgrp */); #endif ISM extern void vac_flushallctx(); /* * hat layer statistics. */ /* * Context, smeg, and pmeg statistics. */ struct vmhatstat { /* Context allocation statistics */ u_int vh_ctxfree; /* ctx allocations without a ctx steal */ u_int vh_ctxstealclean; /* ctx allocations requiring a ctx steal */ u_int vh_ctxstealflush; /* ctx allocations requiring a ctx steal */ u_int vh_ctxmappmgs; /* pmgs mapped at ctx allocation */ /* SW pmg statistics */ u_int vh_pmgallocfree; /* pmg allocation without a pmg steal */ u_int vh_pmgallocsteal; /* pmg allocations requiring a pmg steal */ /* HW pmg map and load/unload statistics */ u_int vh_pmgmap; /* mappings of loaded pmg's */ u_int vh_pmgldfree; /* alloc.s. of free HW pmg */ u_int vh_pmgldnoctx; /* allocs. of HW pmg with no ctx */ u_int vh_pmgldcleanctx; /* allocs. of HW pmg with clean ctx */ u_int vh_pmgldflush; /* allocs. of HW pmg needing VAC flush */ u_int vh_pmgldnomap; /* allocs. of HW pmg taking unmapped pmg */ /* hat_fault statitistics */ u_int vh_faultmap; /* hat_fault mapped HW pmg */ u_int vh_faultload; /* hat_fault loaded SW pmg in HW */ u_int vh_faultinhw; /* hat_fault failed to resolve the fault */ u_int vh_faultnopmg; /* hat_fault failed to resolve the fault */ /* HW smg allocation statistics */ u_int vh_smgfree; u_int vh_smgnoctx; u_int vh_smgcleanctx; u_int vh_smgflush; /* added later to the end not to break pmegstat */ u_int vh_pmgallochas; /* has already a pmeg */ } vmhatstat; /* * Page cacheability statistics. */ struct cache_stats { int cs_ionc; /* non-cached IO translations */ int cs_ioc; /* cached IO translations */ int cs_knc; /* non-cached kernel translations */ int cs_kc; /* cached kernel translations */ int cs_unc; /* non-cached user translations */ int cs_uc; /* cached user translations */ int cs_other; /* other non-type 0 pages */ int cs_iowantchg; /* # of IO cached to NC changes skipped */ int cs_kchange; /* # of kernel cached to non-cached changes */ int cs_uchange; /* # of user cached to non-cached changes */ int cs_unloadfix; /* # of unload's that made pages cachable */ int cs_unloadnofix; /* # of " that didn't made pages cachable */ int cs_skip; /* XXX should be after cs_other */ } cache_stats; /* * hat_pmgfind() look aside buffer hit/miss statistics. */ struct pmgfindstat { u_int pf_hit; u_int pf_miss; u_int pf_notfound; } pmgfindstat; /* * Private vm_hat data structures */ enum ptesflag { PTESFLAG_SKIP, PTESFLAG_UNLOAD }; static struct ctx *ctxhand; static struct pmgrp *pmgrphand; static struct pmgrp *pmgrpfree; static struct pmgrp *pmgrpmin; static struct hwpmg *hwpmghand; static struct hwpmg *hwpmgfree; static struct hwpmg *hwpmgmin; #ifdef MMU_3LEVEL static struct smgrp *smgrphand; static struct smgrp *smgrpfree; static struct smgrp *smgrpmin; #endif MMU_3LEVEL static void hat_xfree(/* as */); static void hat_pteunload(/* pmg, pme, addr, flags */); static void hat_ptesync(/* pp, pmg, pme, addr, flags */); static void hat_pmgfree(/* pmg */); static void hat_pmglink(/* pmg, as, addr */); static void hat_pmgload(/* pmg */); static void hat_pmgunload(/* pmg, ptesflag */); static struct pmgrp * hat_pmgalloc(/* seg, addr */); static struct pmgrp * hat_pmgfind(/* addr, as */); static void hat_pmgloadptes(/* a, ppte */); static void hat_pmgunloadptes(/* a, ppte */); static void hat_pmgswapptes(/* a, ppte1, ppte2 */); static void hat_pmgmap(/* pmg */); static void hat_clrcleanbit(); static void hat_unmap_aspmgs(/* as */); #ifdef MMU_3LEVEL static struct smgrp *hat_getsmg(/* addr */); static void hat_smgfree(/* smg */); static void hat_smglink(/* smg, as, addr */); static void hat_smgalloc(/* as, addr, pmg */); #define hat_pmgtosmg(pmg) \ (&smgrps[((pmg)->pmg_sme - sments) >> NSMENTPERSMGRPSHIFT]) #endif MMU_3LEVEL static int hatunmaplimit = 30; /* % limit used by hat_unmap_aspmgs() */ /* local inline functions */ #define hat_pmgbase(a) ((addr_t)((u_int)a & PMGRPMASK)) #define hat_pmgisloaded(pmg) (pmg->pmg_num != PMGNUM_SW) #define hat_addrtopte(pmg, a) \ ((pmg)->pmg_pte + (((u_int)(a) & PMGRPOFFSET) >> PAGESHIFT)) #define hat_pmetopte(pmg, pme) ((pmg)->pmg_pte + ((pme) - (pmg)->pmg_pme)) /**************** misc. macros and declarations below ************************/ /* * XXX - Function defitions should be in a header file. */ extern u_int map_getsgmap(); extern char DVMA[]; /* addresses in kas above DVMA are for "IO" */ #ifdef VAC /* * XXX - should be made more efficent by using pment * indexing instead of virtual address checking. */ #define VAC_MASK(a) (((u_int)(a) & MMU_PAGEMASK) & (shm_alignment - 1)) #define VAC_ALIGNED(a1, a2) ((VAC_MASK(a1) ^ VAC_MASK(a2)) == 0) #endif /* * These macros allow us to walk the translation list in a sane manner. * The list walking must be protected, and if the translation we land * on is interrupt replaceable, all work on that translation must also * be protected. This complexity is hidden in the macros. NOTE that these * macros assume that variables "pme", "pmg", "pp", and "s" are declared as * locals in the calling function. The sequence for use is as follows: * * PP_LIST_OPEN * { * do operations; * PP_LIST_NEXT(next item) * } * PP_LIST_CLOSE * * NOTE that you CANNOT put a semicolon after PP_LIST_OPEN or it will not * function. */ #define PP_LIST_OPEN \ s = splvm(); \ pmg = (struct pmgrp *)NULL; \ PP_LIST_NEXT((struct pment *)pp->p_mapping) \ while (pme) #define PP_LIST_NEXT(x) \ (void) splx(s); \ s = splvm(); \ if (pmg) \ pmg->pmg_keepcnt--; \ pme = x; \ if (pme) { \ pmg = &pmgrps[(pme - pments) >> NPMENTPERPMGRPSHIFT]; \ pmg->pmg_keepcnt++; \ } #define PP_LIST_CLOSE (void) splx(s); /* * * The next set of routines implements the machine * independent hat interface described in * */ /* * Initialize the hardware address translation structures. * Called by startup. * * Initialize the SW page tables in the range [0..NPMGRPS) as loaded * and mapped. This is required for hat_pmgreserve to work. After startup * reserves kernel pmg's, it calls hat_pmginit. hat_pmginit will create * a list of free SW page tables and a list of free HW pmg's by skipping * pmg's reserved pmg's. */ void hat_init() { register struct ctx *ctx; register struct pmgrp *pmg; register struct pment *pme; register struct pte *pte; register int i; i = 0; for (ctx = ctxs; ctx < ctxsNCTXS; ctx++) ctx->c_num = i++; ctxhand = ctxs; #ifdef MMU_3LEVEL if (mmu_3level) { register struct smgrp *smg; register struct sment *sme; i = 0; sme = sments; for (smg = smgrps; smg < smgrpsNSMGRPS; smg++) { smg->smg_num = i++; smg->smg_sme = sme; sme += NSMENTPERSMGRP; } smgrps[SMGRP_INVALID].smg_lock = 1; /* lock invalid smgrp */ smgrps[SMGRP_INVALID].smg_keepcnt++; smgrp_invalid = &smgrps[SMGRP_INVALID]; smgrphand = smgrps; } #endif MMU_3LEVEL i = 0; pme = pments; pte = ptes; /* * First NPMGRS pmgs are loaded in HW. hat_pmgreserve() assumes this. * * XXX - Note that &pmgrps[NPMGRPS] cannot be changed to pmgrpsNPMGRPS */ for (pmg = pmgrps; pmg < &pmgrps[NPMGRPS]; i++, pmg++) { pmg->pmg_num = i; pmg->pmg_mapped = 1; hwpmgs[i].hwp_pmgrp = pmg; pmg->pmg_pme = pme; pmg->pmg_pte = pte; pme += NPMENTPERPMGRP; pte += NPMENTPERPMGRP; } /* * The remaining SW pmgrp are not loaded in HW. */ for (; pmg < pmgrpsNPMGRPS; pmg++) { pmg->pmg_num = PMGNUM_SW; pmg->pmg_mapped = 0; pmg->pmg_pme = pme; pmg->pmg_pte = pte; pme += NPMENTPERPMGRP; pte += NPMENTPERPMGRP; } pmgrps[PMGRP_INVALID].pmg_lock = 1; /* lock invalid pmgrp */ pmgrps[PMGRP_INVALID].pmg_keepcnt++; pmgrp_invalid = &pmgrps[PMGRP_INVALID]; /* * For now, we just grab a context and keep it locked, as well * as locking all of the kernel which was loaded into memory. */ kctx = &ctxs[KCONTEXT]; kctx->c_lock = 1; kctx->c_as = &kas; kas.a_hat.hat_ctx = kctx; } /* * Free all the hat resources held by an address space. * Called from as_free when an address space is being * destroyed and when it is to be "swapped out". * * XXX - should we do anything about locked translations here? */ void hat_free(as) register struct as *as; { register struct ctx *ctx; if ((ctx = as->a_hat.hat_ctx) != NULL) { TEST_PANIC(ctx->c_lock, "hat_free - ctx is locked"); /* * Clean context now. This will prevent expensive segment * and page flushing when freeing individual pmgs. */ mmu_setctx(ctx); if (!ctx->c_clean) { vac_ctxflush(); ctx->c_clean = 1; } hat_xfree(as); as->a_hat.hat_ctx = NULL; ctx->c_as = NULL; } else { hat_xfree(as); } /* * Switch to kernel context. */ mmu_setctx(kctx); } /* * Set up addr to map to page pp with protection prot. */ void hat_memload(seg, addr, pp, prot, lock) struct seg *seg; addr_t addr; struct page *pp; u_int prot; int lock; { struct pte pte; hat_mempte(pp, prot, &pte); hat_pteload(seg, addr, pp, pte, lock ? PTELD_LOCK : 0); } /* * Cons up a struct pte using the device's pf bits and protection * prot to load into the hardware for address addr; treat as minflt. */ void hat_devload(seg, addr, pf, prot, lock) struct seg *seg; addr_t addr; int pf; u_int prot; int lock; { struct page *pp, *page_numtouserpp(); union { struct pte u_pte; int u_pf; } apte; int s = -1; /* * If the request is to load a page which is really something * for which we have a struct page, then we must be sure we * maintain cache consistency. However, we don't want to * maintain such consistency for processes just running through * physical memory, so we only pass along the page struct if it's * not in a transition state. This is why we use page_numtouserpp * instead of page_numtopp here. This should fix some readers * of /dev/mem, but also allow those using the window system lock * structures to work right too. */ if ((pf & PGT_MASK) == PGT_OBMEM) { s = splvm(); pp = page_numtouserpp((u_int)(pf & PG_PFNUM)); } else pp = NULL; apte.u_pf = pf & PG_PFNUM; apte.u_pte.pg_v = 1; apte.u_pte.pg_prot = hat_vtop_prot(prot); hat_pteload(seg, addr, pp, apte.u_pte, lock ? PTELD_LOCK : 0); u.u_ru.ru_minflt++; if (s == -1) return; else (void) splx(s); } /* * Release one hardware address translation lock on the given address. * For the Sun MMU, this means decrementing the counter on the pmgrp. */ void hat_unlock(seg, addr) struct seg *seg; addr_t addr; { register struct pmgrp *pmg; int s; pmg = hat_pmgfind(addr, seg->s_as); TEST_PANIC(pmg == NULL || pmg->pmg_keepcnt < 2, "hat_unlock"); #ifdef VAC if (vac && pmg->pmg_keepcnt == 2) { register struct pment *pme = pmg->pmg_pme; register int tcnt; struct page *pp; /* * Now check to see if we now can cache any non-cached pages. * For now, we use the simple minded algorithm and just * unload any locked of the locked translations if the * corresponding page is currently marked as non-cachable. * This situation doesn't happen all the much, so the * efficency doesn't have to be all that great. */ for (tcnt = 0; tcnt < NPMENTPERPMGRP; tcnt++, pme++) { if (pme->pme_valid && (pp = pme->pme_page) != NULL && pp->p_nc) { hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC); if (pp->p_nc) { /* * We lost - unloading the mmu * translation wasn't enough to * make the page cacheable again. */ cache_stats.cs_unloadnofix++; } else { /* * We won - unloading the mmu * translation made the page * cacheable again. */ cache_stats.cs_unloadfix++; } } } } #endif VAC s = splvm(); pmg->pmg_keepcnt -= 2; /* once for hat_getpmg, once for unlock */ #ifdef MMU_3LEVEL /* Decr. once for unlock. */ if (mmu_3level) hat_pmgtosmg(pmg)->smg_keepcnt--; #endif MMU_3LEVEL (void) splx(s); } /* * Change the protections in the virtual address range * given to the specified virtual protection. If * vprot == ~PROT_WRITE, then all the write permission * is taken away for the current translations, else if * vprot == ~PROT_USER, then all the user permissions * are takem away for the current translations, otherwise * vprot gives the new virtual protections to load up. */ void hat_chgprot(seg, addr, len, vprot) struct seg *seg; addr_t addr; u_int len; u_int vprot; /* virtual page protections */ { register addr_t a, ea; register struct pmgrp *pmg = NULL; register u_int pprot; /* physical page protections */ register int newprot; struct pte pte; struct pte *ppte; if (vprot != ~PROT_WRITE && vprot != ~PROT_USER) pprot = hat_vtop_prot(vprot); /* * We must get a context for the AS because we will be * synchronizing SW PTE by reads from MMU. */ hat_setup(seg->s_as); for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) { if (pmg == NULL || ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE) { if (pmg) { register int s; s = splvm(); pmg->pmg_keepcnt--; (void) splx(s); } pmg = hat_pmgfind(a, seg->s_as); if (pmg == NULL) { /* * Bump up `a' to avoid checking all * the pte's in the invalid pmgrp. */ a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) + PMGRPSIZE - MMU_PAGESIZE; continue; } /* * Make sure that loaded pmg is also mapped. */ if (hat_pmgisloaded(pmg)) hat_pmgmap(pmg); } ppte = hat_addrtopte(pmg, a); if (!pte_valid(ppte)) continue; /* * Synchronize PTE from MMU. */ if (hat_pmgisloaded(pmg)) { mmu_getpte(a, ppte); } pte = *ppte; if (vprot == ~PROT_WRITE) { switch (pte.pg_prot) { case KW: pprot = KR; newprot = 1; break; case UW: pprot = UR; newprot = 1; break; default: newprot = 0; break; } } else if (vprot == ~PROT_USER) { #ifdef sun2 /* XXX - need a better way to do this */ if (pte.pg_prot & 07) { pprot = pte.pg_prot & ~07; newprot = 1; } else { newprot = 0; } #else sun2 switch (pte.pg_prot) { case UW: pprot = KW; newprot = 1; break; case UR: pprot = KR; newprot = 1; break; default: newprot = 0; break; } #endif sun2 } else if (pte.pg_prot != pprot) { newprot = 1; } else { newprot = 0; } if (newprot) { pte.pg_prot = pprot; *ppte = pte; /* Set SW PTE */ /* * Synchronize HW PTE if this pmg is loaded. * We assume that hat_setup has been done. */ if (hat_pmgisloaded(pmg)) { #ifdef VAC if (vac && !pte.pg_nc && pte.pg_r) vac_pageflush(a); #endif VAC mmu_setpte(a, pte); } } } if (pmg != NULL) { register int s; s = splvm(); pmg->pmg_keepcnt--; (void) splx(s); } } /* * Associate all the mappings in the range [addr..addr+len) with * segment seg. Since we don't cache segments in this hat implementation, * this routine is a noop. */ /*ARGSUSED*/ void hat_newseg(seg, addr, len, nseg) struct seg *seg; addr_t addr; u_int len; struct seg *nseg; { return; } /* * Unload all the mappings in the range [addr..addr+len). */ void hat_unload(seg, addr, len) struct seg *seg; addr_t addr; u_int len; { register addr_t a; register struct pment *pme; register struct pmgrp *pmg = NULL; addr_t ea; int s; if (seg->s_as->a_hat.hat_pmgrps == NULL) { /* * If there are no allocated pmgrps for this * address space, we don't want to do anything. */ return; } /* * hat_setup needed for HAT_VADDR optimization to work. */ hat_setup(seg->s_as); for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) { if (pmg == NULL || ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE) { if (pmg != NULL) { s = splvm(); pmg->pmg_keepcnt--; (void) splx(s); } pmg = hat_pmgfind(a, seg->s_as); if (pmg == NULL) { /* * Bump up `a' to avoid checking all * the pme's in the invalid pmgrp. */ a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) + PMGRPSIZE - MMU_PAGESIZE; continue; } pme = &pmg->pmg_pme[mmu_btop(a - pmg->pmg_base)]; } else { pme++; } /* * Throw out the mapping. */ if (pme->pme_nosync) { TEST_PANIC(pmg->pmg_keepcnt < 2, "hat_unload - pmg not kept"); /* * Decrement keepcnt on pmg and smg to indicate * 'unlock' on the address. */ s = splvm(); #ifdef MMU_3LEVEL if (mmu_3level) { hat_pmgtosmg(pmg)->smg_keepcnt--; } #endif MMU_3LEVEL pmg->pmg_keepcnt--; (void) splx(s); hat_pteunload(pmg, pme, a, HAT_NOSYNC | HAT_VADDR); } else { hat_pteunload(pmg, pme, a, HAT_RMSYNC | HAT_VADDR); } } if (pmg) { s = splvm(); pmg->pmg_keepcnt--; (void) splx(s); } } /* * Unload all the hardware translations that map page `pp'. */ void hat_pageunload(pp) register struct page *pp; { register struct pmgrp *pmg; register struct pment *pme; int s; PP_LIST_OPEN { hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC); PP_LIST_NEXT((struct pment *)pp->p_mapping) } PP_LIST_CLOSE } /* * Get all the hardware dependent attributes for a page struct */ void hat_pagesync(pp) struct page *pp; { register struct pment *pme; register struct pmgrp *pmg; int s; PP_LIST_OPEN { /* * Get page dependent info from hardware for * each translation, but don't unload them. */ hat_ptesync(pp, pmg, pme, (addr_t)NULL, HAT_RMSYNC); PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE } /* * Returns the page frame number for a given kernel virtual address. */ u_int hat_getkpfnum(addr) addr_t addr; { struct pte pte; mmu_getkpte(addr, &pte); return (MAKE_PFNUM(&pte)); } /* * Resolve a page fault by loading a cached translation. * * hat_fault() is called from the fault handler in locore.s and pagefault. * */ hat_fault(addr) caddr_t addr; { struct pmgrp *pmg; int s; struct as *as; /* * We assume that addresses above KERNELBASE belong to kas. */ if (addr >= (addr_t)KERNELBASE) { as = &kas; } else { /* * We punt if the address space of the running process * hasn't been set up. pagefault will allocate the context * and call hat_fault again if this was a real pagefault. */ if ((as = u.u_procp->p_as) != mmu_getctx()->c_as) return (FC_NOMAP); } if ((pmg = hat_pmgfind(addr, as)) != NULL) { if (pmg->pmg_mapped) { /* * Pmg is mapped and loaded. The hat layer cannot * resolve the fault. */ ASSERT(pmg->pmg_num != PMGNUM_SW); s = splvm(); pmg->pmg_keepcnt--; vmhatstat.vh_faultinhw++; (void) splx(s); return (FC_NOMAP); } else if (hat_pmgisloaded(pmg)) { /* * pmg is loaded but not mapped. */ s = splvm(); /* * The code below is inline hat_pmgmap. */ #ifdef MMU_3LEVEL if (mmu_3level) { hat_smgalloc(pmg->pmg_as, pmg->pmg_base, pmg); hat_pmgtosmg(pmg)->smg_keepcnt--; } #endif MMU_3LEVEL mmu_setpmg(pmg->pmg_base, pmg); pmg->pmg_mapped = 1; /* End of inline hat_pmgmap. */ pmg->pmg_keepcnt--; vmhatstat.vh_faultmap++; (void) splx(s); return ((faultcode_t)0); } else { /* * Pmg is not loaded. */ hat_pmgload(pmg); s = splvm(); pmg->pmg_keepcnt--; #ifdef MMU_3LEVEL if (mmu_3level) hat_pmgtosmg(pmg)->smg_keepcnt--; #endif MMU_3LEVEL (void) splx(s); vmhatstat.vh_faultload++; return ((faultcode_t)0); } } /* * HAT couldn't resolve the fault because there is no SW pmg. */ vmhatstat.vh_faultnopmg++; return (FC_NOMAP); } /* * End of machine independent interface routines. * * The next few routines implement some machine dependent functions * needed for the Sun MMU. Note that each hat implementation can define * whatever additional interfaces that make sense for that machine. * These routines are defined in . * * Start machine specific interface routines. */ /* * This routine is called for kernel initialization * to cause a pmgrp to be reserved w/o any unloading, * links the pmgrp into the address space (if not already there), * and return with the pmgrp in question leaving its keepcnt incremented. */ void hat_pmgreserve(seg, addr) struct seg *seg; addr_t addr; { register struct as *as = seg->s_as; register struct pmgrp *pmg; u_int pmgnum; int s; pmgnum = map_getsgmap(addr); TEST_PANIC(pmgnum == PMGRP_INVALID, "hat_pmgreserve: invalid pmg"); pmg = hwpmgs[pmgnum].hwp_pmgrp; ASSERT(pmg != NULL); ASSERT(pmg->pmg_num == pmgnum); s = splvm(); pmg->pmg_keepcnt++; (void) splx(s); TEST_PANIC(as->a_hat.hat_ctx == NULL || (pmg->pmg_as != NULL && pmg->pmg_as != as), "hat_pmgreserve"); if (pmg != pmgrp_invalid && pmg->pmg_as == NULL) { hat_pmglink(pmg, as, addr); } #ifdef MMU_3LEVEL if (mmu_3level) hat_smgreserve(seg, addr); #endif MMU_3LEVEL } /* * Initialize all the unlocked pmgs to have invalid pme's * and add them to the free list. * This routine is called during startup after all the * kernel pmgs have been reserved. This routine will * also set the pmgrpmin variable for use in hat_pmgalloc. */ void hat_pmginit() { register struct pmgrp *pmg; register addr_t addr; int s; int i; struct hwpmg *hwpmg; s = splvm(); /* * Make HW free list. Skip locked and kept pmgrps. * Here we assume that <0..NPMGRPS) were loaded in MMU by hat_init(). */ for (pmg = pmgrps, hwpmg = hwpmgs; hwpmg < hwpmgsNHWPMGS; pmg++, hwpmg++) { if (pmg->pmg_lock || pmg->pmg_keepcnt > 0) { pmg->pmg_mapped = 1; continue; } if (pmgrpmin == NULL) { pmgrpmin = pmgrphand = pmg; hwpmgmin = hwpmghand = hwpmg; } mmu_settpmg(SEGTEMP, pmg); for (addr = SEGTEMP; addr < SEGTEMP + PMGRPSIZE; addr += PAGESIZE) mmu_setpte(addr, mmu_pteinvalid); pmg->pmg_num = PMGNUM_SW; hwpmg->hwp_next = hwpmgfree; hwpmg->hwp_pmgrp = NULL; hwpmgfree = hwpmg; } mmu_pmginval(SEGTEMP); /* * Make SW pmg free list. */ for (pmg = pmgrps; pmg < pmgrpsNPMGRPS; pmg++) { if (pmg->pmg_lock || pmg->pmg_keepcnt > 0) continue; for (i = 0; i < NPMENTPERPMGRP; i++) { pmg->pmg_pte[i] = mmu_pteinvalid; } pmg->pmg_num = PMGNUM_SW; pmg->pmg_mapped = 0; pmg->pmg_next = pmgrpfree; pmgrpfree = pmg; } (void) splx(s); #ifdef MMU_3LEVEL if (mmu_3level) hat_smginit(); /* * Check keepcnt on smegs and pmegs to detect double mapped pmegs. */ hat_smgcheck_keepcntall(); #endif MMU_3LEVEL } /* * Set addr in segment seg to use pte to (possibly) map to page pp. * This is the common routine used for hat_memload and hat_devload * in addition to the machine dependent mapin implementation. */ void hat_pteload(seg, addr, pp, pte, flags) struct seg *seg; addr_t addr; struct page *pp; struct pte pte; int flags; { register struct pment *ppme; register struct pmgrp *ppmg; int s; struct pte opte; TEST_PANIC(pp != NULL && pp->p_free, "hat_pteload free page"); /* * We need to setup context because we will (potentially) be reading * HW PTE's. */ hat_setup(seg->s_as); ppmg = hat_pmgalloc(seg, addr); hat_pmgload(ppmg); ppme = &ppmg->pmg_pme[mmu_btop(addr - ppmg->pmg_base)]; /* * We must be sure that setting the pte and adding to the list * of mappings is atomic. */ s = splvm(); #ifdef VAC /* * If there's no page structure associated with this mapping, * or the vac is turned off, or the page is non-cacheable, * then force the mapping to be non-cached. */ if (pp == NULL || !vac || pp->p_nc) pte.pg_nc = 1; #endif VAC if (ppme->pme_valid) { /* * Reloading a translation - be sure to preserve the * exiting ref and mod bits for this translation. * * XXX - should cache all the attributes of a loaded * translation in the pme structure so that we can * avoid reloading all together unless something * is actually going to change. */ /* * Synch SW PTE by reading MMU. */ opte = *hat_addrtopte(ppmg, addr); mmu_getpte(addr, &opte); pte.pg_r = opte.pg_r; pte.pg_m = opte.pg_m; } else { ppme->pme_valid = 1; } #ifdef IOC if (ioc) { struct pte *p = &pte; if (flags & PTELD_IOCACHE) { ioc_pteset(p); } ioc_mbufset(p, addr); } #endif IOC *hat_addrtopte(ppmg, addr) = pte; /* Set SW PTE */ mmu_setpte(addr, pte); /* load the SW pte in HW */ /* * Check to see if this pme needs to be added * to the list of pme's mapping this page. */ if (pp != ppme->pme_page) { TEST_PANIC(ppme->pme_page != NULL, "hat_pteload"); ASSERT(pp != NULL && ppme->pme_page == NULL); ppme->pme_page = pp; ppme->pme_next = PMENXT_INDEX(pp->p_mapping); pp->p_mapping = (caddr_t)ppme; (void) splx(s); seg->s_as->a_rss += 1; #ifdef VAC /* * If (vac) active, then check for conflicts. * A conflict exists if the new and existent mappings * do not match in their "shm_alignment" fields * XXX and one of them is writable XXX. If conflicts * exist, the extant mappings are flushed UNLESS * one of them is locked. If one of them is locked, * then the mappings are flushed and converted to * non-cacheable mappings [must be deconverted in * hat_pteunload]. * XXX need to store protections in pme * to employ writable optimization. */ if (vac && !pp->p_nc) { struct pmgrp *pmg; /* temporary pmg */ struct pment *pme; /* temporary pme */ struct pmgrp *pmgpc = (struct pmgrp *)0, /* user's text pmg */ *pmgsp = (struct pmgrp *)0; /* user's stack pmg */ int ccf; /* cache conflict found flag */ int first = 1; /* * mappings to the user's current stack and * text locations must be locked in memory, * or we run the risk of getting into an * infinite paging loop if the program tries * to read the physical pages containing either * via a mapping that is not cache aliased. */ s = splvm(); if (!servicing_interrupt()) { if (u.u_ar0 != (int *)0) { if (u.u_ar0[SP] != 0) { pmgsp = mmu_getpmg ((addr_t)u.u_ar0[SP]); if (pmgsp != (struct pmgrp *)0) pmgsp->pmg_keepcnt ++; } if (u.u_ar0[PC] != 0) { pmgpc = mmu_getpmg ((addr_t)u.u_ar0[PC]); if (pmgpc != (struct pmgrp *)0) pmgpc->pmg_keepcnt ++; } } } (void)splx(s); ccf = 0; PP_LIST_OPEN { if (pme == ppme) { PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) continue; } if (first && VAC_ALIGNED(addr, pmg->pmg_base + mmu_ptob(pme - pmg->pmg_pme))) { PP_LIST_NEXT((struct pment *)NULL) continue; } first = 0; /* * Compare keep to 1 because * list walker keeps it too. */ if (pmg->pmg_lock || pmg->pmg_keepcnt > 1) { ccf = 1; PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) continue; } hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC); PP_LIST_NEXT((struct pment *)pp->p_mapping) } PP_LIST_CLOSE /* * Release locked pmgrps. */ s = splvm(); if (pmgsp != (struct pmgrp *)0) pmgsp->pmg_keepcnt --; if (pmgpc != (struct pmgrp *)0) pmgpc->pmg_keepcnt --; (void) splx(s); if (ccf) { pte.pg_nc = 1; pp->p_nc = 1; /* * This time we don't exclude our translation, * so it will get remarked noncacheable. */ PP_LIST_OPEN { hat_ptesync(pp, pmg, pme, (addr_t)NULL, HAT_NCSYNC); PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE } } #endif VAC } else { (void) splx(s); TEST_PANIC(pp != NULL && ((ppme->pme_intrep != ((flags & PTELD_INTREP) != 0)) || (ppme->pme_nosync != ((flags & PTELD_NOSYNC) != 0))), "pteload - remap flags"); #ifdef VAC /* * Reloading a pte for an already mapped page. If * the page is "real", then if we've got a VAC, then * flush the cache to ensure no protection mismatches * between cache and MMU. * XXX - need to store protections in pme to avoid this. * * XXX - this could cause writeback errors if protection * was changed from writeable to read-only. */ if ((pp != NULL) && vac && pte.pg_r && *(int *)&pte != *(int *)&opte) vac_pageflush(addr); #endif VAC } ppme->pme_intrep = (flags & PTELD_INTREP) != 0; ppme->pme_nosync = (flags & PTELD_NOSYNC) != 0; /* keep some statistics on the cache-ability of the translation */ #if defined(VAC) if (pte.pg_type == OBMEM) { if (seg->s_as == &kas) { if (addr >= DVMA) { if (!pte.pg_nc) cache_stats.cs_ioc++; else cache_stats.cs_ionc++; } else { if (!pte.pg_nc) cache_stats.cs_kc++; else cache_stats.cs_knc++; } } else { if (!pte.pg_nc) cache_stats.cs_uc++; else cache_stats.cs_unc++; } } else { cache_stats.cs_other++; } #else defined(VAC) if (pte.pg_type == OBMEM) { if (seg->s_as == &kas) { if (addr >= DVMA) { cache_stats.cs_ionc++; } else { cache_stats.cs_knc++; } } else { cache_stats.cs_unc++; } } else { cache_stats.cs_other++; } #endif defined(VAC) if ((flags & PTELD_LOCK) == 0) { /* * Decrement lock count. It was incr. by hat_pmgalloc(). */ s = splvm(); ppmg->pmg_keepcnt--; #ifdef MMU_3LEVEL if (mmu_3level) hat_pmgtosmg(ppmg)->smg_keepcnt--; #endif MMU_3LEVEL (void) splx(s); } } void hat_mempte(pp, vprot, ppte) struct page *pp; u_int vprot; register struct pte *ppte; { *ppte = mmu_pteinvalid; ppte->pg_prot = hat_vtop_prot(vprot); ppte->pg_v = 1; ppte->pg_type = OBMEM; ppte->pg_pfnum = page_pptonum(pp); } void hat_getctx(as) struct as *as; { register struct ctx *ctx, *ttctx; register struct pmgrp *pmg; register u_short tt = 0; int s; #ifdef MMU_3LEVEL register struct smgrp *smg; #endif MMU_3LEVEL if (as->a_hat.hat_ctx) { as->a_hat.hat_ctx->c_clean = 0; return; } /* find a free ctx or an old one */ ttctx = NULL; for (ctx = ctxhand + 1; ctx != ctxhand; ctx++) { if (ctx == ctxsNCTXS) /* wrap around the ctx table */ ctx = ctxs; if (ctx->c_lock != 0) /* can't touch */ continue; /* * Take ctx with no address space. */ if (ctx->c_as == NULL) { ttctx = ctx; break; } if (ttctx == NULL || ctx->c_time <= tt) { ttctx = ctx; /* new "best" ctx */ tt = ctx->c_time; } } TEST_PANIC(ttctx == NULL, "hat_getctx - no ctx's"); ctxhand = ctx = ttctx; /* * Update vmhatstat statistics. */ if (ctx->c_as) { if (ctx->c_clean) vmhatstat.vh_ctxstealclean++; else vmhatstat.vh_ctxstealflush++; } else { vmhatstat.vh_ctxfree++; } mmu_setctx(ctx); if (ctx->c_as) { vac_flushallctx(); s = splvm(); #ifdef MMU_3LEVEL if (mmu_3level) { /* invalidate any smgrps already loaded for this ctx */ for (smg = ctx->c_as->a_hat.hat_smgrps; smg != NULL; smg = smg->smg_next) mmu_smginval(smg->smg_base); } else { #endif MMU_3LEVEL /* invalidate any pmgrps already loaded for this ctx */ for (pmg = ctx->c_as->a_hat.hat_pmgrps; pmg != NULL; pmg = pmg->pmg_next) { if (pmg->pmg_mapped) { ASSERT(pmg->pmg_num != PMGNUM_SW); mmu_pmginval(pmg->pmg_base); pmg->pmg_mapped = 0; } } #ifdef MMU_3LEVEL } #endif MMU_3LEVEL ctx->c_as->a_hat.hat_ctx = NULL; (void)splx(s); } s = splvm(); ctx->c_as = as; ctx->c_clean = 0; ctx->c_time = ctx_time++; as->a_hat.hat_ctx = ctx; #ifdef MMU_3LEVEL if (mmu_3level) { /* load up any smgrps already allocated to this hat */ for (smg = as->a_hat.hat_smgrps; smg != NULL; smg = smg->smg_next) mmu_setsmg(smg->smg_base, smg); } else { #endif MMU_3LEVEL /* load up any pmgrps already allocated to this hat */ for (pmg = as->a_hat.hat_pmgrps; pmg != NULL; pmg = pmg->pmg_next) { if (pmg->pmg_num != PMGNUM_SW) { ASSERT(!pmg->pmg_mapped); mmu_setpmg(pmg->pmg_base, pmg); pmg->pmg_mapped = 1; vmhatstat.vh_ctxmappmgs++; } } #ifdef MMU_3LEVEL } #endif MMU_3LEVEL (void)splx(s); } /* * Used to lock down hat resources for an address range. In this implementation, * this means locking down the necessary pmegs. This currently works only * for kernel addresses. */ void hat_reserve(seg, addr, len) struct seg *seg; addr_t addr; u_int len; { register addr_t a; addr_t ea; struct pmgrp *pmg; TEST_PANIC(seg->s_as != &kas, "hat_reserve"); for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) { pmg = hat_pmgalloc(seg, a); TEST_PANIC(pmg == NULL, "hat_reserve: pmg == NULL"); hat_pmgload(pmg); } } u_int hat_vtop_prot(vprot) u_int vprot; { switch (vprot) { case 0: case PROT_USER: /* * Since 0 might be a valid protection, * the caller should not set valid bit * if vprot == 0 to be sure. */ return (0); case PROT_READ: case PROT_EXEC: case PROT_READ | PROT_EXEC: return (KR); case PROT_WRITE: case PROT_WRITE | PROT_EXEC: case PROT_READ | PROT_WRITE: case PROT_READ | PROT_WRITE | PROT_EXEC: return (KW); case PROT_EXEC | PROT_USER: case PROT_READ | PROT_USER: case PROT_READ | PROT_EXEC | PROT_USER: return (UR); case PROT_WRITE | PROT_USER: case PROT_WRITE | PROT_EXEC | PROT_USER: case PROT_READ | PROT_WRITE | PROT_USER: case PROT_READ | PROT_WRITE | PROT_EXEC | PROT_USER: return (UW); default: panic("hat_vtop_prot"); /* NOTREACHED */ } } #if defined(sun4c) && defined(VAC) /* * Flush all possible cache lines mapping the given physical page. This * is used for software cache consistency with I/O, to clean the cache * of all data subject to I/O. */ void hat_vacsync(pfnum) u_int pfnum; { register struct page *pp = page_numtopp(pfnum); register struct pmgrp *pmg; register struct pment *pme; int s; addr_t va; struct ctx *ctxsav, *nctx, *curctx; struct pte tpte; /* * If the cache is off, the page isn't memory, or the page is * non-cacheable, then none of the page could be in the cache * in the first place, with the exception that a page frame * for kernel .data or .bss objects could be in the cache, * but will have no page structure. */ if (!vac) { return; } else if (pp == (struct page *) NULL) { extern u_int kpfn_dataseg, kpfn_endbss; if (pfnum >= kpfn_dataseg || pfnum <= kpfn_endbss) { extern char etext[]; /* * In sun4c, the page frame number for the start * of the kernel data segment and the page frame * number for end are latched up in kvm_init(). * If a page frame number ends up here, then some- * body is doing i/o to an object in kernel .data * or .bss. * * This is a temporary solution, and it does have * some holes in it. It assumes that the page frame * numbers between kernel .data and end are contiguous. * * As a side note, we could go to the effort of * of reading the kernel pte for the calculated * address to check with the passed page frame * number, but it isn't really worth the effort. */ va = (addr_t) (roundup((u_int) etext, DATA_ALIGN) + ((pfnum - kpfn_dataseg) << MMU_PAGESHIFT)); vac_pageflush (va); } return; } else if (pp->p_nc) { return; } curctx = ctxsav = mmu_getctx(); /* * Walk the list of translations for this page, flushing each * one. */ PP_LIST_OPEN { /* * If the translation has no context, it can't be * in the cache. */ if ((nctx = pmg->pmg_as->a_hat.hat_ctx) != NULL && !nctx->c_clean && pmg->pmg_mapped) { /* * Calculate the virtual address, switch to the * correct context, and flush the page. */ va = pmg->pmg_base + mmu_ptob(pme - pmg->pmg_pme); if (nctx != curctx) { mmu_setctx(nctx); curctx = nctx; } mmu_getpte(va, &tpte); if (tpte.pg_r) vac_pageflush(va); } PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE /* * Restore the original context. */ if (curctx != ctxsav) mmu_setctx(ctxsav); } #endif defined(sun4c) && defined(VAC) #ifdef sun4c /* * Kill any processes that use this page. (Used for parity recovery) * If we encounter the kernel's address space, give up (return -1). * Otherwise, we return 0. */ hat_kill_procs(pp, addr) struct page *pp; addr_t addr; { register struct pmgrp *pmg; register struct pment *pme; int s; struct as *as; struct proc *p; int result = 0; PP_LIST_OPEN { /* * Find the address space that contains this pment. */ as = pmg->pmg_as; /* * If the address space is the kernel space, then fail. * The memory is corrupted, and the only thing to do with * corrupted kernel memory is die. */ if (as == &kas) { printf("parity recovery: kernel address space\n"); result = -1; } /* * Find the proc that uses this address space and kill * it. Note that more than one process can share the * same address space, if vfork() was used to create it. * This means that we have to look through the entire * process table and not stop at the first match. */ for (p = allproc; p; p = p->p_nxt) { if (p->p_as == as) { printf("pid %d killed: parity error\n", p->p_pid); uprintf("pid %d killed: parity error\n", p->p_pid); psignal(p, SIGBUS); p->p_uarea->u_code = FC_HWERR; p->p_uarea->u_addr = addr; } } PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE return (result); } #endif /* sun4c */ /* * End machine specific interface routines. * * The remainder of the routines are private to this module and are used * by the routines above to implement a service to the outside caller. * * Start private routines. */ /* * Unload a pme. We call hat_ptesync() to unload the translation * then remove the pme from the list of pme's mapping the page. * Should always be called with the pmgrp for the pme being held. */ static void hat_pteunload(ppmg, ppme, vaddr, flags) struct pmgrp *ppmg; register struct pment *ppme; addr_t vaddr; int flags; { int s; struct page *pp; struct pment *pmentp; u_int ppmeind; #ifdef VAC struct pment *pme; /* temporary for listwalk */ struct pmgrp *pmg; /* temporary for listwalk */ struct pment *qpme; /* second temporary for comparison */ struct pmgrp *qpmg; /* second temporary for comparison */ addr_t pa, qa; /* matching address values */ int ccf; /* cache conflict found flag */ int s2; #endif VAC s = splvm(); pp = ppme->pme_page; ppmeind = PMENXT_INDEX(ppme); if (pp != NULL) { /* * Remove it from the list of mappings for the page. */ if (ppme == (struct pment *)(pp->p_mapping)) { pp->p_mapping = (caddr_t)PMENXT_PTR(ppme->pme_next); ppme->pme_next = PMENXT_NULL; } else { TEST_PANIC(pp->p_mapping == NULL, "hat_pteunload - no mappings"); for (pmentp = ((struct pment *) (pp->p_mapping)); pmentp->pme_next != ppmeind; pmentp = PMENXT_PTR(pmentp->pme_next)) TEST_PANIC(pmentp->pme_next == PMENXT_NULL, "hat_pteunload - no mapping"); pmentp->pme_next = ppme->pme_next; ppme->pme_next = PMENXT_NULL; } (void) splx(s); ppme->pme_page = NULL; ppmg->pmg_as->a_rss -= 1; #ifdef VAC if (vac && pp->p_nc) { ccf = 0; PP_LIST_OPEN { s2 = splvm(); if ((qpme = PMENXT_PTR(pme->pme_next)) != NULL) { pa = pmg->pmg_base + mmu_ptob(pme - pmg->pmg_pme); qpmg = &pmgrps[(qpme - pments) / NPMENTPERPMGRP]; qa = qpmg->pmg_base + mmu_ptob(qpme - qpmg->pmg_pme); if (!VAC_ALIGNED(pa, qa)) { ccf = 1; (void) splx(s2); PP_LIST_NEXT(NULL) continue; } } (void) splx(s2); PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE if (!ccf) { /* * No more cache conflict. * Use hat_ptesync to resync. */ pp->p_nc = 0; PP_LIST_OPEN { hat_ptesync(pp, pmg, pme, (addr_t)NULL, HAT_NCSYNC); PP_LIST_NEXT(PMENXT_PTR(pme->pme_next)) } PP_LIST_CLOSE } } #endif VAC s = splvm(); } /* * Invalidate the translation. */ if (ppme->pme_valid) { flags |= HAT_INVSYNC; hat_ptesync(pp, ppmg, ppme, vaddr, flags); ppme->pme_nosync = ppme->pme_intrep = ppme->pme_valid = 0; } (void) splx(s); } /* * Synchronize the hardware and software of a pte. Used for updating the * hardware nocache bit, the software R & M bits, and invalidating ptes. */ static void hat_ptesync(pp, pmg, pme, vaddr, flags) struct page *pp; register struct pmgrp *pmg; register struct pment *pme; addr_t vaddr; int flags; { register struct ctx *ctxsav, *nctx; register addr_t mapaddr; int s, pmg_off; struct pte pte; struct pte *ppte; /* pointer to SW pte */ struct as *as; struct ctx *ctx; int usetemp; int dommu; int doflush; int didsetpte = 0; int didflush = 0; ppte = hat_pmetopte(pmg, pme); as = pmg->pmg_as; ctx = as->a_hat.hat_ctx; TEST_PANIC(pme->pme_valid == 0, "hat_ptesync - invalid pme"); /* * The HAT_VADDR flag means that the vaddr argument contains a valid * page address. * * It's used as optimization when hat_ptesync is called * from hat_unload(). We know that: * (1) the pmg context is setup * (2) we don't need to splvm() here since we will not * be using SEGTEMP * */ if (flags & HAT_VADDR) { if (hat_pmgisloaded(pmg)) { /* * pmg is loaded in HW - make sure that it is mapped. */ hat_pmgmap(pmg); mapaddr = vaddr; dommu = 1; usetemp = 0; /* * We must flush VAC if the context is not clean. */ doflush = !ctx->c_clean; } else { /* * pmg is not loaded in MMU */ dommu = doflush = usetemp = 0; mapaddr = (addr_t)0; } goto skip; } pmg_off = mmu_ptob(pme - pmg->pmg_pme); vaddr = pmg->pmg_base + pmg_off; ctxsav = mmu_getctx(); /* * We must protect the use of the mapping address, * since it is a shared resource. */ s = splvm(); if (!hat_pmgisloaded(pmg)) { /* * SW page table is not loaded. */ dommu = doflush = usetemp = 0; /* * XXX - Set mapaddr to 0 so that we get a kernel text fault * and panic if we try to use it in the code below. */ mapaddr = (addr_t)0; } else if (ctx == NULL) { /* * pmg is loaded but its as does not have a context. * Set things up so that the pmgrp is mapped into a temporary * segment. No need to do any VAC flushing since this * was done when we took the ctx away. Set * up the mapaddr within the temporary segment. */ /* XXX - this is disgusting! */ #ifdef sun2 extern struct ctx *kctx; nctx = kctx; if (nctx != ctxsav) mmu_setctx(nctx); #else sun2 nctx = ctxsav; /* no need to switch context */ #endif sun2 mmu_settpmg(SEGTEMP, pmg); mapaddr = SEGTEMP + pmg_off; dommu = usetemp = 1; doflush = 0; } else { /* * pmg is loaded and pmg's as has a ctx. * Make sure we are in running in the as context. * Use the virtual address as the mapping address. */ if ((nctx = ctx) != ctxsav) mmu_setctx(nctx); /* * Make sure that pmg is mapped. */ hat_pmgmap(pmg); mapaddr = vaddr; dommu = 1; usetemp = 0; /* * We must flush VAC if the context is not clean. */ doflush = !ctx->c_clean; } skip: if (!vac) doflush = 0; /* no VAC on this system */ if (dommu) { /* Assert that the pmg is mapped */ ASSERTPMGMAPPED(pmg, "ptesync"); } /* * At this point, the flags are set: * * doflush - if the page must be flushed * dommu - if pte must be get/set from HW MMU * usetemp - if SEGTEDMP is used to access the HW MMU map * * Note that a loaded pmg is also mapped if we have a context. */ if (pp != NULL) { if (flags & HAT_RMSYNC) { if (dommu) { mmu_getpte(mapaddr, ppte); } pte = *ppte; /* * Call back to inform address space, if turned on. */ if (as->a_hatcallback) { as_hatsync(as, vaddr, (u_int) pte.pg_r, (u_int) pte.pg_m, (u_int)(flags & HAT_INVSYNC ? AHAT_UNLOAD : 0)); } pg_setref(pp, pp->p_ref | pte.pg_r); pg_setmod(pp, pp->p_mod | pte.pg_m); #ifdef VAC /* * When you zero the modified bit in the MMU * and leave it set in the cache you may not * get it set in the mmu when the line is * re-written. Writeback caches perform the * setting of the modified bit for a page in * the MMU on the first write miss that happens * to that page. Subsequent writes don't bother * to set the modified bit because the first * write did it. Therefore if you are zeroing * the modified bit you must flush the cache * so that subsequent writes, see the modified * bit unset in the cache and write it back to * the MMU. */ if (doflush && pte.pg_r) { vac_pageflush(mapaddr); didflush = 1; } #endif VAC pte.pg_r = pte.pg_m = 0; } #ifdef VAC else if (flags & HAT_NCSYNC) { if (dommu) { mmu_getpte(mapaddr, ppte); } pte = *ppte; /* * N.B. The following test assumes that there * are no user addresses at the same virtual * addresses as DVMA and segu in VAC machines. */ if (mapaddr >= DVMA || (segu != NULL && mapaddr >= segu->s_base && mapaddr < segu->s_base + segu->s_size)) { /* * To avoid lots of problems, we don't * try to convert anything from cached * to non-cached (or vice-versa) when * it is being loaded for DVMA use. * Also, we refuse to mess with user * areas since it is impossible to * reliably flush when converting * from cached to non-cached and we * don't want to take any performance * hits from using a non-cached stack. */ didsetpte = 1; cache_stats.cs_skip++; goto skip_ncsync; } /* * To avoid lots of problems, we don't try to convert * anything from cached to non-cached (or vice-versa) * when it is being loaded for DVMA use. */ if (mapaddr < DVMA) { if (doflush && !pte.pg_nc && pp->p_nc) { int pri, iskas; /* * Need to convert from a cached * translation to a non-cached * translation. There are lots * of potential races here in the * kernel's address space. If * some clean line ends up in the * cache after it is flushed here * and is then written to, the * Sirius cache system will end * up giving a memory timeout error. * * For now, we assume that between * time that we flush the virtual * address and reset the MMU that * nothing will be getting into * the cache from things like * ethernet (this is questionable). * We also assume that will never * be converting anything from * cached to non-cached in the * kernel for the current stack, * (i.e., the stack can be accessed * safely w/o it being changed from * cached to non-cached), the interrupt * stack, or anything that might be * touched at interrupts above splhigh * (UARTS, level7 profiling). * The stack being considered safe * will need to be watched if/when * we go away from using a fixed * virtual address for the user area * that is not managed by the hat layer. */ TEST_PANIC(!pmg->pmg_mapped, "hat_ptesync - pmg not mapped"); pte.pg_nc = 1; iskas = (as == &kas); pri = splhigh(); if (pte.pg_r) vac_pageflush(mapaddr); /* Change both SW and HW pte */ *ppte = pte; mmu_setpte(mapaddr, pte); didsetpte = 1; if (iskas) { /* * Flush the virtual address * again just in case some IO * got in behind our back * above. Doing this for * iskas only assumes there * is no UDVMA to worry about. */ struct pte tpte; mmu_getpte(mapaddr, &tpte); if (tpte.pg_r) vac_pageflush(mapaddr); cache_stats.cs_kchange++; } else { cache_stats.cs_uchange++; } (void) splx(pri); } else { pte.pg_nc = pp->p_nc; } } else { cache_stats.cs_iowantchg++; } skip_ncsync: ; } #endif VAC } if (flags & HAT_INVSYNC) { #ifdef VAC if (vac) { if (mapaddr >= DVMA && mapaddr < DVMA + dvmasize) { /* * Always flush before invalidating a DVMA * translation because the ref bit may lie. * See bugid 1039410. */ vac_pageflush(mapaddr); } else if (doflush && !didflush) { struct pte tpte; mmu_getpte(mapaddr, &tpte); if (tpte.pg_r) vac_pageflush(mapaddr); } } #endif VAC pte = mmu_pteinvalid; } if (!didsetpte) { *ppte = pte; if (dommu) { mmu_setpte(mapaddr, pte); } } /* * Optimized return when hat_ptesync was called from hat_unload. */ if (flags & HAT_VADDR) return; if (usetemp) mmu_settpmg(SEGTEMP, pmgrp_invalid); (void) splx(s); if (nctx != ctxsav) mmu_setctx(ctxsav); } /* * Allocate a SW page table for a given address. */ static struct pmgrp * hat_pmgalloc(seg, addr) struct seg *seg; addr_t addr; { register struct as *as = seg->s_as; register struct pmgrp *pmg; int s; s = splvm(); if ((pmg = hat_pmgfind(addr, as)) != NULL) { vmhatstat.vh_pmgallochas++; (void) splx(s); return (pmg); } /* * No pmgrp allocated to this address space contains the address; * allocate a new pmg for this address space. First, try * the free list. */ /* * Update vmhatstat statistics. */ if (pmgrpfree == NULL) vmhatstat.vh_pmgallocsteal++; else vmhatstat.vh_pmgallocfree++; top: if ((pmg = pmgrpfree) == NULL) { int try; /* * No SW pmg's free, have to take one from someone. * Take from address spaces with no ctx first. */ pmg = pmgrphand; for (try = 1; /* empty */; try++) { do { pmg++; if (pmg == pmgrpsNPMGRPS) { /* * Wrap around and skip kernels pmgs. */ pmg = pmgrpmin; } if (pmg->pmg_lock == 0 && pmg->pmg_keepcnt == 0) { /* * On the first try, only take a pmg * from an address space with no ctx. */ if (try == 1 && pmg->pmg_as->a_hat.hat_ctx != NULL) continue; #ifdef SUNDBE /* * Don't steal pmeg with stack */ if (try == 1 && pmg->pmg_base == (addr_t)KERNELBASE - PMGRPSIZE) continue; #endif /* SUNDBE */ /* * Found a candidate, free * it up and try again. */ pmg->pmg_keepcnt++; hat_pmgfree(pmg); pmgrphand = pmg; goto top; } } while (pmg != pmgrphand); /* * Give up after 2 tries. */ if (try >= 2) { panic("hat_pmgalloc out of hat"); } } } pmgrpfree = pmg->pmg_next; /* take it off the free list */ pmg->pmg_lock = 1; pmg->pmg_keepcnt = 1; hat_pmglink(pmg, as, addr); pmg->pmg_lock = 0; (void) splx(s); return (pmg); } /* * Load SW pmg in HW */ static void hat_pmgload(swpmg) struct pmgrp *swpmg; { register struct as *as; register struct pmgrp *pmg = (struct pmgrp *)NULL; register struct hwpmg *hwpmg; int s; int pass; if (swpmg->pmg_mapped) { ASSERT(swpmg->pmg_num != PMGNUM_SW); goto locksmg; } as = swpmg->pmg_as; /* * Loaded, but not mapped. */ if (swpmg->pmg_num != PMGNUM_SW) { ASSERT(as == &kas || mmu_getctx()->c_as == as); hat_pmgmap(swpmg); goto locksmg; } hat_setup(as); s = splvm(); if ((hwpmg = hwpmgfree) != NULL) { vmhatstat.vh_pmgldfree++; goto found_free_hwpmg; } /* * The strategy for stealing a HW pmeg: * We make 3 passes over the array of HW pmegs (starting at hwpmghand) * and will take the pmeg if: * * Pass 1. pmeg does not require VAC flush * (we clean all pmegs between Pass 1 and Pass 2). * Pass 2. Take any unlocked pmeg. In pass 2, we will take * even a dirty pmeg because it may be an unused kernel pmeg. */ hwpmg = hwpmghand; for (pass = 1; pass < 3; pass++) { struct ctx *ctx; do { hwpmg++; if (hwpmg == hwpmgsNHWPMGS) { /* * Wrap around and skip kernel pmgs. */ hwpmg = hwpmgmin; } pmg = hwpmg->hwp_pmgrp; ASSERT(pmg != NULL); ASSERT(pmg->pmg_num != PMGNUM_SW); /* * Skip locked and kept pmg's */ if (pmg->pmg_lock || pmg->pmg_keepcnt > 0) { continue; } if (pass == 1 && (ctx = pmg->pmg_as->a_hat.hat_ctx) != NULL && !ctx->c_clean && pmg->pmg_mapped) continue; if (pass == 1) { if (ctx == NULL) vmhatstat.vh_pmgldnoctx++; else if (ctx->c_clean) vmhatstat.vh_pmgldcleanctx++; else vmhatstat.vh_pmgldnomap++; } /* * Keep the pmg until hat_pmgswapptes(). */ pmg->pmg_keepcnt++; hat_pmgunload(pmg, PTESFLAG_SKIP); ASSERT(hwpmg = hwpmgfree); hwpmghand = hwpmg; goto found_free_hwpmg; } while (hwpmg != hwpmghand); if (pass == 1) { /* * We haven't found a pmg that does not require * VAC flush. Clear all pmg's now by flushing * all contexts. */ vac_flushallctx(); hat_unmap_aspmgs(as); vmhatstat.vh_pmgldflush++; } } panic("hat_pmgload: failed after two passes"); found_free_hwpmg: hat_clrcleanbit(); swpmg->pmg_num = (hwpmg - hwpmgs); hwpmg->hwp_pmgrp = swpmg; hwpmgfree = hwpmg->hwp_next; /* take it off the free list */ ASSERT(swpmg->pmg_as == &kas || mmu_getctx()->c_as == swpmg->pmg_as); #ifdef MMU_3LEVEL if (mmu_3level) hat_smgalloc(as, swpmg->pmg_base, swpmg); #endif MMU_3LEVEL /* We are already at splvm() */ mmu_setpmg(swpmg->pmg_base, swpmg); swpmg->pmg_mapped = 1; /* * Load all valid SW PTE's in MMU. * */ if (pmg == (struct pmgrp *)NULL) hat_pmgloadptes(swpmg->pmg_base, swpmg->pmg_pte); else { hat_pmgswapptes(swpmg->pmg_base, swpmg->pmg_pte, pmg->pmg_pte); pmg->pmg_keepcnt--; } as->a_hat.hat_pmgldcnt++; /* incr. number of HW pmegs for this as */ (void) splx(s); return; locksmg: #ifdef MMU_3LEVEL if (mmu_3level) { /* * Callers to hat_pmgload assume that hat_pmgload keeps smg. */ s = splvm(); hat_pmgtosmg(swpmg)->smg_keepcnt++; (void)splx(s); } #endif MMU_3LEVEL return; } /* * Map a pmgrp in segment map. */ static void hat_pmgmap(pmg) struct pmgrp *pmg; { int s; if (pmg->pmg_num != PMGNUM_SW && !pmg->pmg_mapped) { s = splvm(); #ifdef MMU_3LEVEL if (mmu_3level) { hat_smgalloc(pmg->pmg_as, pmg->pmg_base, pmg); hat_pmgtosmg(pmg)->smg_keepcnt--; } #endif MMU_3LEVEL mmu_setpmg(pmg->pmg_base, pmg); pmg->pmg_mapped = 1; (void) splx(s); } } /* * Free the specified SW pmgrp. This is done by calling hat_pteunload * on all the pme's to process all the referenced and modified bits * and to invalidate the pme. If the hat containing this pmg currently * has a ctx, then invalidate that mapping. Finally we unlink the * the pmgrp from the hat pmgrp list and put it on the free list. * pmg should be kept (once) when this routine is called. */ static void hat_pmgfree(pmg) register struct pmgrp *pmg; { register struct pment *pme = pmg->pmg_pme; register struct as *as; register int tcnt; int s; ASSERT(pmg->pmg_keepcnt == 1); ASSERT(pmg->pmg_as != NULL); if (hat_pmgisloaded(pmg)) hat_pmgunload(pmg, PTESFLAG_UNLOAD); s = splvm(); if ((as = pmg->pmg_as) != NULL) { for (tcnt = 0; tcnt < NPMENTPERPMGRP; tcnt++, pme++) { if (pme->pme_valid) hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC); } if (as->a_hat.hat_pmgrps == pmg) { as->a_hat.hat_pmgrps = pmg->pmg_next; if (pmg->pmg_next) pmg->pmg_next->pmg_prev = NULL; } else { pmg->pmg_prev->pmg_next = pmg->pmg_next; if (pmg->pmg_next) pmg->pmg_next->pmg_prev = pmg->pmg_prev; } pmg->pmg_as = NULL; pmg->pmg_next = pmg->pmg_prev = NULL; } pmg->pmg_keepcnt--; pmg->pmg_next = pmgrpfree; pmgrpfree = pmg; (void) splx(s); } static void hat_pmgunload(pmg, ptesflag) struct pmgrp *pmg; enum ptesflag ptesflag; { addr_t a; struct pte *ppte = pmg->pmg_pte; struct hwpmg *hwpmg; struct ctx *ctx, *ctxsav; int s; ASSERT(pmg->pmg_num != PMGNUM_SW); s = splvm(); /* using SEGTEMP */ #ifdef notdef /* 3 level mmu */ ASSERT(pmg->pmg_as->a_hat.hat_ctx != NULL || !pmg->pmg_mapped); #endif notdef /* * Unmap pmg from segment map. */ if (pmg->pmg_mapped) { #ifdef MMU_3LEVEL if (mmu_3level) { ASSERT(pmg->pmg_sme != NULL); } #endif MMU_3LEVEL if ((ctx = pmg->pmg_as->a_hat.hat_ctx) != NULL) { ctxsav = mmu_getctx(); if (ctxsav != ctx) mmu_setctx(ctx); if (!ctx->c_clean) vac_segflush(pmg->pmg_base); mmu_pmginval(pmg->pmg_base); if (ctxsav != ctx) mmu_setctx(ctxsav); } else { #ifdef MMU_3LEVEL if (mmu_3level) { /* * We have to use REGTEMP to map the smg. * Note that REGTEMP may be used only in kctx. */ struct smgrp *smg; ctxsav = mmu_getctx(); if (ctxsav != kctx) mmu_setctx(kctx); smg = &smgrps[(pmg->pmg_sme - sments) >> NSMENTPERSMGRPSHIFT]; mmu_setsmg(REGTEMP, smg); mmu_pmginval(REGTEMP + ((u_int)pmg->pmg_base & SMGRPOFFSET)); mmu_smginval(REGTEMP); if (ctxsav != kctx) mmu_setctx(ctxsav); } else panic("hat_pmgunload"); #else MMU_3LEVEL panic("hat_pmgunload"); #endif MMU_3LEVEL } pmg->pmg_mapped = 0; #ifdef MMU_3LEVEL if (mmu_3level) { pmg->pmg_sme->sme_valid = 0; pmg->pmg_sme->sme_pmg = (struct pmgrp *)NULL; pmg->pmg_sme = (struct sment *)NULL; } #endif MMU_3LEVEL } ASSERT(!pmg->pmg_mapped); if (ptesflag == PTESFLAG_UNLOAD) { /* * Unload all valid SW PTE's in MMU. * */ map_setsgmap(SEGTEMP, pmg->pmg_num); a = SEGTEMP; hat_pmgunloadptes(a, ppte); map_setsgmap(SEGTEMP, PMGRP_INVALID); } pmg->pmg_as->a_hat.hat_pmgldcnt--; /* * Put hwpmg in HW pmg free list */ hwpmg = &hwpmgs[pmg->pmg_num]; hwpmg->hwp_next = hwpmgfree; hwpmgfree = hwpmg; hwpmg->hwp_pmgrp = NULL; pmg->pmg_num = PMGNUM_SW; (void) splx(s); } /* * Add the specified pmgrp to the list of pmgrp's allocated to * the specified address space. We hang pmgrps off the address * space and not the ctx so that we can keep them around even if * we don't have a hardware context. */ static void hat_pmglink(pmg, as, addr) register struct pmgrp *pmg; struct as *as; addr_t addr; { int s; ASSERT(pmg->pmg_keepcnt > 0); s = splvm(); pmg->pmg_as = as; pmg->pmg_next = as->a_hat.hat_pmgrps; if (pmg->pmg_next) pmg->pmg_next->pmg_prev = pmg; pmg->pmg_prev = NULL; as->a_hat.hat_pmgrps = pmg; pmg->pmg_base = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1)); (void) splx(s); } #ifdef notdef /* * Called when the wrong pmeg is read out from the MMU. * Most likely, this is a down rev Carrera CPU board that * is missing some pullup registers on the segment RAMs. * The ECO for the needed Carrera CPU board fix is 2550. */ static void hat_wrongpmg(pmg, addr, as) struct pmgrp *pmg; addr_t addr; struct as *as; { #ifndef sun2 register struct pmgrp *pmgp; addr = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1)); printf("hardware claims page map entry group number is 0x%x\n", pmg->pmg_num); if (addr >= (addr_t)KERNELBASE) { /* * Scan the kernel address space for the correct pmg. */ for (pmgp = kas.a_hat.hat_pmgrps; pmgp != NULL; pmgp = pmgp->pmg_next) { if (pmgp->pmg_base == addr) break; } } else { for (pmgp = as->a_hat.hat_pmgrps; pmgp != NULL; pmgp = pmgp->pmg_next) { if (pmgp->pmg_base == addr) break; } } if (pmgp != NULL) { printf(" software says page map entry group number is 0x%x\n", pmgp->pmg_num); } #endif printf("pmg = %x, pmg base = %x, addr = %x\n", pmg, pmg->pmg_base, addr); panic("wrong pmg"); /* NOTREACHED */ } #endif notdef static void hat_xfree(as) register struct as *as; { register int s; register struct pmgrp *pmg; /* * Free pmgrp's. */ s = splvm(); while (pmg = as->a_hat.hat_pmgrps) { pmg->pmg_keepcnt++; (void) splx(s); hat_pmgfree(pmg); s = splvm(); } (void) splx(s); #ifdef MMU_3LEVEL /* * If three-level mmu, free smgrp's. */ if (mmu_3level) { register struct smgrp *smg; s = splvm(); while (smg = as->a_hat.hat_smgrps) { smg->smg_keepcnt++; (void) splx(s); hat_smgfree(smg); s = splvm(); } (void) splx(s); } #endif MMU_3LEVEL ASSERT(as->a_hat.hat_pmgldcnt == 0); } /* * Find a SW page table. * * The returned page table is 'kept'. * * We optimize the search by using a look-aside buffer of mappings * from to a pointer to the pmg structure. For each hashed * value, we store a pointer to the most recently found pmgrp. * 1009 is a prime that was found to be optimal. */ #define hash_asaddr(as, addr) \ ((((u_int)as >> 2) * 1009 + ((u_int)addr >> PMGRPSHIFT)) % (NPMGHASH-1)) static struct pmgrp * hat_pmgfind(addr, as) addr_t addr; struct as *as; { struct pmgrp *pmg; addr_t pmgaddr = hat_pmgbase(addr); int s; int hashind; hashind = hash_asaddr(as, pmgaddr); s = splvm(); /* * Check if pmg is in the lookaside buffer. */ if ((pmg = pmghash[hashind]) != NULL && pmg->pmg_as == as && pmg->pmg_base == pmgaddr) { pmg->pmg_keepcnt++; pmgfindstat.pf_hit++; goto out; } /* * Exhaustive search of pmg's within the address space. */ for (pmg = as->a_hat.hat_pmgrps; pmg != NULL && pmg->pmg_base != pmgaddr; pmg = pmg->pmg_next) ; if (pmg != NULL) { pmg->pmg_keepcnt++; pmgfindstat.pf_miss++; /* * Write the -> pmg entry to look aside buffer */ pmghash[hashind] = pmg; } else { pmgfindstat.pf_notfound++; } out: (void) splx(s); return (pmg); } /* * Clear running process's context clean bit. */ static void hat_clrcleanbit() { struct proc *p; struct as *a; struct ctx *c; if ((p = u.u_procp) != NULL && (a = p->p_as) != NULL && (c = a->a_hat.hat_ctx) != NULL) { c->c_clean = 0; } } #ifdef MMU_3LEVEL /* * Code to handle allocation of smegs is cloned from the pmeg versions */ int getsmg_check = 1; /* * This routine will return the smgrp structure for the given address * in the current ctx. But unlike mmu_getsmg, this routine will protect * against the smgrp being lost by spl'ing and will return a kept smgrp * pointer. The keepcnt should be decremented by the caller when it is * done looking at the smgrp contents. */ static struct smgrp * hat_getsmg(addr) addr_t addr; { int s; struct smgrp *smg; s = splvm(); smg = mmu_getsmg(addr); smg->smg_keepcnt++; if (getsmg_check && smg != smgrp_invalid && smg->smg_base != 0 && smg->smg_base != (caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) { printf("hat_getsmg: addr=%x, smg=%x, smg base=%x\n", addr, smg, smg->smg_base); call_debug("hat_getsmg"); } (void) splx(s); return (smg); } /* * Free the specified smgrp. This is done by calling hat_pmgfree * on all the sme's to invalidate the smgrp. If the hat containing * this smg currently has a ctx, then invalidate that mapping. * Finally we unlink the the smgrp from the hat smgrp list and * put it on the free list. * smg should be kept (once) when this routine is called. */ static void hat_smgfree(smg) register struct smgrp *smg; { register struct sment *sme; register struct pmgrp *pmg; register struct as *as; register int tcnt; struct ctx *ctx, *ctxsav, *curctx; int s; ASSERT(smg->smg_keepcnt == 1); if ((as = smg->smg_as) != NULL) { /* XXX - we should simplify switching between ctx's */ ctxsav = curctx = mmu_getctx(); if ((ctx = smg->smg_as->a_hat.hat_ctx) != NULL && !ctx->c_clean) { mmu_setctx(curctx = ctx); vac_rgnflush(smg->smg_base); } if (curctx != kctx) mmu_setctx(curctx = kctx); #if defined(SUNDBE) && defined(sun4) if (ispseudo_smgrp(smg)) goto skip_if_pseudo; #endif ISM /* * XXX - we may optimize by not using kctx if smg has ctx. */ s = splvm(); ASSERT(mmu_getsmg(REGTEMP) == smgrp_invalid); mmu_setsmg(REGTEMP, smg); sme = smg->smg_sme; for (tcnt = 0; tcnt < NSMENTPERSMGRP; tcnt++) { if (sme->sme_valid) { pmg = sme->sme_pmg; ASSERT(((u_int)pmg->pmg_base & SMGRPOFFSET) < SMGRPSIZE); mmu_pmginval(REGTEMP + ((u_int)pmg->pmg_base & SMGRPOFFSET)); pmg->pmg_mapped = 0; sme->sme_valid = 0; pmg->pmg_sme = (struct sment *)NULL; sme->sme_pmg = (struct pmgrp *)NULL; } sme++; } mmu_smginval(REGTEMP); (void) splx(s); #if defined(SUNDBE) && defined(sun4) skip_if_pseudo: #endif ISM if ((ctx = smg->smg_as->a_hat.hat_ctx) != NULL) { if (ctx != curctx) mmu_setctx(curctx = ctx); mmu_smginval(smg->smg_base); } if (ctxsav != curctx) mmu_setctx(ctxsav); if (as->a_hat.hat_smgrps == smg) { as->a_hat.hat_smgrps = smg->smg_next; if (smg->smg_next) smg->smg_next->smg_prev = NULL; } else { smg->smg_prev->smg_next = smg->smg_next; if (smg->smg_next) smg->smg_next->smg_prev = smg->smg_prev; } smg->smg_as = NULL; smg->smg_next = smg->smg_prev = NULL; } #if defined(SUNDBE) && defined(sun4) if (ispseudo_smgrp(smg)) { kmem_free((char *)smg, sizeof(*smg)); return; } #endif ISM s = splvm(); smg->smg_keepcnt--; smg->smg_next = smgrpfree; smgrpfree = smg; (void) splx(s); } /* * Add the specified smgrp to the list of smgrp's allocated to * the specified address space. We hang smgrps off the address * space and not the ctx so that we can keep them around even if * we don't have a hardware context. */ static void hat_smglink(smg, as, addr) register struct smgrp *smg; struct as *as; addr_t addr; { int s; s = splvm(); smg->smg_as = as; smg->smg_next = as->a_hat.hat_smgrps; if (smg->smg_next) smg->smg_next->smg_prev = smg; smg->smg_prev = NULL; as->a_hat.hat_smgrps = smg; smg->smg_base = (addr_t)((u_int)addr & ~(SMGRPSIZE - 1)); (void) splx(s); } void hat_smgreserve(seg, addr) struct seg *seg; addr_t addr; { register struct as *as = seg->s_as; register struct smgrp *smg; register struct pmgrp *pmg; struct sment *sme; if (!mmu_3level) return; smg = hat_getsmg(addr); /* keeps the smg for us */ /* DEBUGGING */ if (smg == smgrp_invalid) printf("hat_smgreserve: addr 0x%x invalid smg\n", addr); if (as->a_hat.hat_ctx == NULL || (smg->smg_as != NULL && smg->smg_as != as)) panic("hat_smgreserve"); if (smg != smgrp_invalid && smg->smg_as == NULL) hat_smglink(smg, as, addr); smg->smg_lock = 1; /* if its being reserved, also lock it */ /* * Set up sme structure. */ pmg = mmu_getpmg(addr); sme = &(smg->smg_sme[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]); ASSERT(!sme->sme_valid || sme == pmg->pmg_sme); if (!sme->sme_valid) { pmg = mmu_getpmg(addr); ASSERT(pmg->pmg_sme == NULL); sme->sme_pmg = pmg; sme->sme_valid = 1; pmg->pmg_sme = sme; } } /* * Initialize all the unlocked smgs to have invalid sme's * and add them to the free list. * This routine is called during startup after all the * kernel smgs have been reserved. This routine will * also set the smgrpmin variable for use in hat_smgalloc. * * REGTEMP is only used here so we temporarily steal * the region before KERNELBASE and mark it invalid * when we are finished. */ void hat_smginit() { register struct smgrp *smg; register addr_t addr; int s; if (!mmu_3level) return; s = splvm(); for (smg = smgrps; smg < smgrpsNSMGRPS; smg++) { if (smg->smg_lock || smg->smg_keepcnt != 0) continue; if (smgrpmin == NULL) smgrpmin = smg; mmu_settsmg((addr_t)REGTEMP, smg); for (addr = (addr_t)REGTEMP; addr < (addr_t)(REGTEMP + SMGRPSIZE); addr += PMGRPSIZE) { mmu_pmginval(addr); } smg->smg_next = smgrpfree; smgrpfree = smg; } smgrphand = smgrpmin; mmu_smginval((addr_t)REGTEMP); (void) splx(s); } /* * Allocate a smgrp to map the specified address. * Returns w/ the keepcnt incremented for the particular smgrp used. * First look for something in the free list and then steal one * that is currently being used. */ static void hat_smgalloc(as, addr, pmg) struct as *as; addr_t addr; struct pmgrp *pmg; { register struct smgrp *smg; register struct sment *sme; int s; struct ctx *ctx; if (!mmu_3level) return; s = splvm(); if ((smg = mmu_getsmg(addr)) != smgrp_invalid) { smg->smg_keepcnt++; if (getsmg_check && smg->smg_base != (caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) { printf("hat_smgalloc: addr=%x, smg=%x, smg base=%x\n", addr, smg, smg->smg_base); call_debug("hat_smgalloc"); } sme = &(smg->smg_sme [(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]); sme->sme_pmg = pmg; sme->sme_valid = 1; pmg->pmg_sme = sme; (void) splx(s); return; } /* * No smgrp allocated to this address space contains the pme, * allocate a new smg for this address space. First, try * the free list. */ if (smgrpfree != NULL) vmhatstat.vh_smgfree++; top: if ((smg = smgrpfree) == NULL) { int try; /* * No smg's free, have to take one from someone. * Take from address spaces with no ctx first. * XXX - could do it with just one pass. */ smg = smgrphand; try = 1; for (;;) { do { smg++; if (smg == smgrpsNSMGRPS) { if (smgrpmin) { /* skip some kernel smgrps */ smg = smgrpmin; } else { smg = smgrps; } } if (smg->smg_lock == 0 && smg->smg_keepcnt == 0) { /* * On the first try, only take a smg * from an address space with no ctx. */ if (try < 3 && (ctx = smg->smg_as->a_hat.hat_ctx) != NULL && !ctx->c_clean) continue; /* * Found a candidate, free * it up and try again. */ if (try == 1) { if (ctx == NULL) vmhatstat.vh_smgnoctx++; else vmhatstat.vh_smgcleanctx++; } smg->smg_keepcnt++; hat_smgfree(smg); smgrphand = smg; goto top; } } while (smg != smgrphand); if (try == 1) { /* * We were not able to find a segment that * would not require flushing. * * Flush all user VAC lines and try again. */ vac_flushallctx(); vmhatstat.vh_smgflush++; } /* * Give up after 2 tries. */ if (try >= 3) { rm_outofhat(); } try++; } } hat_clrcleanbit(); smgrpfree = smg->smg_next; /* take it off the free list */ smg->smg_lock = 1; smg->smg_keepcnt = 1; hat_smglink(smg, as, addr); sme = &(smg->smg_sme[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]); sme->sme_pmg = pmg; sme->sme_valid = 1; pmg->pmg_sme = sme; (void) splx(s); mmu_setsmg(smg->smg_base, smg); smg->smg_lock = 0; return; } hat_smgcheck_keepcnt(smg) struct smgrp *smg; { int s; /* * smgrp_invalid has smg_keepcnt == 1, but it has no pmgs. */ if (smg == smgrp_invalid) return; s = splvm(); { register struct sment *sme; register struct pmgrp *pmg; int tcnt; int keepcnt = 0; sme = smg->smg_sme; for (tcnt = 0; tcnt < NSMENTPERSMGRP; tcnt++) { if (sme->sme_valid) { pmg = sme->sme_pmg; keepcnt += pmg->pmg_keepcnt; } sme++; } if (keepcnt != smg->smg_keepcnt) { printf( "check_smgkeepcnt: keepcnt %d smg_keepcnt %d base 0x%x smg# %d\n", keepcnt, smg->smg_keepcnt, smg->smg_base, smg->smg_num); panic("possibly double mapped pmgrp"); } } (void) splx(s); } hat_smgcheck_keepcntall() { register struct smgrp *smg; for (smg = smgrps; smg < smgrpsNSMGRPS; smg++) hat_smgcheck_keepcnt(smg); } #endif MMU_3LEVEL /* * Unmap all HW pmegs (except for locked pmegs) held by an address space. */ static void hat_unmap_aspmgs(as) struct as *as; { struct pmgrp *pmg; /* * Don't do anything if this is kernel address space. */ if (as == &kas) return; /* * We don't unmap pmegs if the address space holds only a * "small number" of HW pmegs. We expect that future pmeg allocations * will steal HW pmegs from other address spaces with a clean * or no context. * * If the address space holds less than 'hatunmaplimit' percent * of the total number of HW pmegs we don't unmap. hatunmaplimit * is set to 30% and may be patched. */ if (as->a_hat.hat_pmgldcnt * 100 < NPMGRPS * hatunmaplimit) return; ASSERT(mmu_getctx()->c_as == as); for (pmg = as->a_hat.hat_pmgrps; pmg != NULL; pmg = pmg->pmg_next) { if (pmg->pmg_mapped && pmg->pmg_keepcnt == 0) { ASSERT(pmg->pmg_num != PMGNUM_SW); ASSERT(!pmg->pmg_lock); mmu_pmginval(pmg->pmg_base); pmg->pmg_mapped = 0; } } } #ifdef notdef /* XXX - temporary (and quite dirty) assertion function */ assertpmgmapped(pmg, msg) struct pmgrp *pmg; char *msg; { addr_t a = pmg->pmg_base; if (pmg == NULL) { printf("%s addr 0x%x\n", msg, pmg->pmg_base); ASSERT(pmg != NULL); } if (pmg->pmg_as->a_hat.hat_ctx == NULL) { #ifdef notdef /* 3 level mmu */ if (pmg->pmg_mapped) { printf("%s addr 0x%x\n", msg, pmg->pmg_base); ASSERT(!pmg->pmg_mapped); } #endif notdef a = SEGTEMP; } else { if (!pmg->pmg_mapped) { printf("%s addr 0x%x\n", msg, pmg->pmg_base); ASSERT(pmg->pmg_mapped); } } if (pmg->pmg_num == PMGNUM_SW) { printf("%s addr 0x%x\n", msg, pmg->pmg_base); ASSERT(pmg->pmg_num != PMGNUM_SW); } if (map_getsgmap(a) == PMGRP_INVALID) { printf("%s addr 0x%x\n", msg, pmg->pmg_base); /* Force traceback */ printf("pmg_num %d, pmg_keepcnt %d, ctx 0x%x\n", pmg->pmg_num, pmg->pmg_keepcnt, pmg->pmg_as->a_hat.hat_ctx); ASSERT(map_getsgmap(a) != PMGRP_INVALID); } } #endif notdef #if defined(SUNDBE) && defined(sun4) void hat_ism_lockpmgs(as) struct as *as; { } void hat_ism_unlockpmgs(as) struct as *as; { } void hat_intimate_map(seg, ismseg) struct seg *seg; struct seg *ismseg; { struct as *as = seg->s_as; struct as *ismas = ismseg->s_as; struct smgrp *smg; struct smgrp *smg2; int offset = seg->s_base - ismseg->s_base; struct as *saveas = mmu_getctx()->c_as; hat_setup(seg->s_as); ASSERT(((int)seg->s_base & SMGRPOFFSET) == 0); /* SMEG alignment */ for (smg = ismas->a_hat.hat_smgrps; smg != NULL; smg = smg->smg_next) { ASSERT(smg->smg_keepcnt > 0); /* XXX use kmem_fast_alloc() */ smg2 = (struct smgrp*) kmem_alloc(sizeof(*smg2)); *smg2 = *smg; /* this copies smg->smg_num */ smg2->smg_num = smg->smg_num; smg2->smg_sme = smg->smg_sme; smg2->smg_keepcnt = 1; /* hat_smglink() asserts this */ /* * Link pseudo smeg into process's hat_smgrps list */ hat_smglink(smg2, as, smg->smg_base + offset); smg2->smg_keepcnt = 0; /* hat_smgfree() asserts this */ mmu_setsmg(smg2->smg_base, smg2); } hat_setup(saveas); } void hat_intimate_unmap(seg) struct seg *seg; { struct as *as = seg->s_as; addr_t addr = seg->s_base; u_int len = seg->s_size; addr_t eaddr = addr + len; struct smgrp *smg; int s; s = splvm(); smg = as->a_hat.hat_smgrps; while (smg != NULL) { if (smg->smg_base >= addr && smg->smg_base < eaddr) { (void) splx(s); /* * Free the pseudo pmeg. */ ASSERT(ispseudo_smgrp(smg)); smg->smg_keepcnt++; /* hat_smgfree asserts this */ hat_smgfree(smg); s = splvm(); /* * Go over the entire list again because the list * could have been shuffled. */ smg = as->a_hat.hat_smgrps; } else smg = smg->smg_next; } (void) splx(s); } /* * Return 1 if this is a pseudo pmeg */ static int ispseudo_smgrp(smgrp) struct smgrp *smgrp; { return (!(smgrp >= smgrps && smgrp < smgrpsNSMGRPS)); } #endif ISM