Arquivotheca.SunOS-4.1.4/sys/sun4/vm_hat.c

#ifndef lint
static	char sccsid[] = "@(#)vm_hat.c 1.1 94/10/31 SMI";
#endif

/*
 * Copyright (c) 1991 by Sun Microsystems, Inc.
 */

/*
 * VM - Hardware Address Translation management.
 *
 * This file implements the machine specific hardware translation
 * needed by the VM system.  The machine independent interface is
 * described in <vm/hat.h> while the machine dependent interface
 * and data structures are described in <machine/vm_hat.h>.  For
 * Sun computers, we actually share this same source for more than
 * one architecture (Sun-2, Sun-3, and Sun-4), since the static
 * segment and page map mmu's are very similar.	 For this reason,
 * this file is located in the sun directory and we use ifdef's
 * for the few cases where they differ enough to be noticed here.
 * The actual loading of the hardware registers is done at the mmu
 * layer which is different for each Sun architecture type.  In
 * reality we probably need a more general cross architecture
 * sharing structure so that we can more easily share certain code
 * (like this file) across SOME Sun architectures without giving
 * the idea that the file MUST be shared across all Sun architectures.
 *
 * The hat layer manages the address translation hardware as a cache
 * driven by calls from the higher levels in the VM system.  Nearly
 * all the details of how the hardware is managed shound not be visable
 * above this layer except for miscellaneous machine specific functions
 * (e.g. mapin/mapout) that work in conjunction with this code.	 Other
 * than a small number of machine specific places, the hat data
 * structures seen by the higher levels in the VM system are opaque
 * and are only operated on by the hat routines.  Each address space
 * contains a struct hat and a page contains an opaque pointer which
 * is used by the hat code to hold a list of active translations to
 * that page.
 *
 * XXX - At integration into 5.0, replace all #define'd symbols such as
 *	 NPMGRPSSW with the actual variable names (npmgrpssw in this case).
 */

#include <sys/param.h>
#include <sys/mman.h>
#include <sys/debug.h>
#include <sys/user.h>			/* for u_ru.ru_minflt */
#include <sys/trace.h>
#include <sys/vmmeter.h>		/* for flush_cnt */

#include <machine/pte.h>
#include <machine/cpu.h>
#include <machine/mmu.h>
#ifdef IOC
#include <machine/iocache.h>
#endif

#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/page.h>
#include <vm/mp.h>
#include <vm/vpage.h>
#include <vm/rm.h>
#include <vm/seg_u.h>
#include <vm/seg_vn.h>
#include <vm/faultcode.h>

#if !(defined(sun4) || defined(sun4c))
ERROR - This HAT works only for sun4 and sun4c
#endif !(defined(sun4) || defined(sun4c))

/*
 * XXX - Review all ASSERT's in this vm_hat.c. Change inexpensive ones to
 *	 test and panic code. Remove #include <sys/debug>.
 */
#define	ASSERTPMGMAPPED(pmg, msg) /* assertpmgmapped(pmg, msg) */
#define	TEST_PANIC(cond, panicmsg) {if (cond) panic(panicmsg); }

/*
 * Machine specific public data structures.
 */
struct as kas;
struct ctx *kctx;
struct pmgrp *pmgrp_invalid;
#ifdef MMU_3LEVEL
struct smgrp *smgrp_invalid;
#endif MMU_3LEVEL

/*
 * Semi-private vm_hat data structures.
 * Other machine specific routines need to access these.
 */
extern int	npmgrpssw;		/* now defined in param.c */
int		npmghash;
u_short		ctx_time;

struct	ctx	*ctxs, *ctxsNCTXS;	/* used by <machine/mmu.c> */
struct	pmgrp	*pmgrps, *pmgrpsNPMGRPS; /* used by <machine/mmu.c> */
struct	pment	*pments, *pmentsNPMENTS; /* used by <machine/machdep.c> */
struct	hwpmg	*hwpmgs, *hwpmgsNHWPMGS; /* used by <machine/mmu.c> */
struct	pte	*ptes, *ptesNPTES;	/* used by <machine/machdep.c> */
struct	pmgrp	**pmghash, **pmghashNPMGHASH; /* used by <machine/machdep.c> */
#ifdef MMU_3LEVEL
struct	smgrp *smgrps, *smgrpsNSMGRPS;
struct	sment *sments, *smentsNSMENTS;
#endif MMU_3LEVEL

#if	defined(SUNDBE) && defined(sun4)
#ifndef MMU_3LEVEL
ERROR - ISM will work only with 3 level MMU
#endif  MMU_3LEVEL
static int ispseudo_smgrp(/* smgrp */);
#endif  ISM

extern void vac_flushallctx();

/*
 * hat layer statistics.
 */

/*
 * Context, smeg, and pmeg statistics.
 */
struct vmhatstat {

	/* Context allocation statistics */
	u_int	vh_ctxfree;	  /* ctx allocations without a ctx steal */
	u_int	vh_ctxstealclean; /* ctx allocations requiring a ctx steal */
	u_int	vh_ctxstealflush; /* ctx allocations requiring a ctx steal */
	u_int	vh_ctxmappmgs;	  /* pmgs mapped at ctx allocation */

	/* SW pmg statistics */
	u_int	vh_pmgallocfree;  /* pmg allocation without a pmg steal */
	u_int	vh_pmgallocsteal; /* pmg allocations requiring a pmg steal */

	/* HW pmg map and load/unload statistics */
	u_int	vh_pmgmap;	  /* mappings of loaded pmg's */
	u_int	vh_pmgldfree;	/* alloc.s. of free HW pmg */
	u_int	vh_pmgldnoctx;	/* allocs. of HW pmg with no ctx */
	u_int	vh_pmgldcleanctx; /* allocs. of HW pmg with clean ctx */
	u_int	vh_pmgldflush;	/* allocs. of HW pmg needing VAC flush */
	u_int	vh_pmgldnomap;	/* allocs. of HW pmg taking unmapped pmg */

	/* hat_fault statitistics */
	u_int	vh_faultmap;	  /* hat_fault mapped HW pmg */
	u_int	vh_faultload;	  /* hat_fault loaded SW pmg in HW */
	u_int	vh_faultinhw;	  /* hat_fault failed to resolve the fault */
	u_int	vh_faultnopmg;	  /* hat_fault failed to resolve the fault */

	/* HW smg allocation statistics */
	u_int	vh_smgfree;
	u_int	vh_smgnoctx;
	u_int	vh_smgcleanctx;
	u_int	vh_smgflush;

	/* added later to the end not to break pmegstat */
	u_int	vh_pmgallochas;	/* has already a pmeg */

} vmhatstat;

/*
 * Page cacheability statistics.
 */
struct cache_stats {
	int cs_ionc;		/* non-cached IO translations */
	int cs_ioc;		/* cached IO translations */
	int cs_knc;		/* non-cached kernel translations */
	int cs_kc;		/* cached kernel translations */
	int cs_unc;		/* non-cached user translations */
	int cs_uc;		/* cached user translations */
	int cs_other;		/* other non-type 0 pages */
	int cs_iowantchg;	/* # of IO cached to NC changes skipped */
	int cs_kchange;		/* # of kernel cached to non-cached changes */
	int cs_uchange;		/* # of user cached to non-cached changes */
	int cs_unloadfix;	/* # of unload's that made pages cachable */
	int cs_unloadnofix;	/* # of	 " that didn't made pages cachable */
	int cs_skip;		/* XXX should be after cs_other */
} cache_stats;

/*
 * hat_pmgfind() look aside buffer hit/miss statistics.
 */
struct pmgfindstat {
	u_int	pf_hit;
	u_int	pf_miss;
	u_int	pf_notfound;
} pmgfindstat;


/*
 * Private vm_hat data structures
 */

enum ptesflag { PTESFLAG_SKIP, PTESFLAG_UNLOAD };

static	struct ctx *ctxhand;

static	struct pmgrp *pmgrphand;
static	struct pmgrp *pmgrpfree;
static	struct pmgrp *pmgrpmin;

static	struct hwpmg *hwpmghand;
static	struct hwpmg *hwpmgfree;
static	struct hwpmg *hwpmgmin;

#ifdef MMU_3LEVEL
static	struct smgrp *smgrphand;
static	struct smgrp *smgrpfree;
static	struct smgrp *smgrpmin;
#endif MMU_3LEVEL

static void		hat_xfree(/* as */);
static void		hat_pteunload(/* pmg, pme, addr, flags */);
static void		hat_ptesync(/* pp, pmg, pme, addr, flags */);

static void		hat_pmgfree(/* pmg */);
static void		hat_pmglink(/* pmg, as, addr */);
static void		hat_pmgload(/* pmg */);
static void		hat_pmgunload(/* pmg, ptesflag */);
static struct pmgrp *	hat_pmgalloc(/* seg, addr */);
static struct pmgrp *	hat_pmgfind(/* addr, as */);
static void		hat_pmgloadptes(/* a, ppte */);
static void		hat_pmgunloadptes(/* a, ppte */);
static void		hat_pmgswapptes(/* a, ppte1, ppte2 */);
static void		hat_pmgmap(/* pmg */);
static void		hat_clrcleanbit();
static void		hat_unmap_aspmgs(/* as */);

#ifdef MMU_3LEVEL
static	struct smgrp	*hat_getsmg(/* addr */);
static	void		hat_smgfree(/* smg */);
static	void		hat_smglink(/* smg, as, addr */);
static	void		hat_smgalloc(/* as, addr, pmg */);
#define	hat_pmgtosmg(pmg) \
	(&smgrps[((pmg)->pmg_sme - sments) >> NSMENTPERSMGRPSHIFT])
#endif MMU_3LEVEL

static int hatunmaplimit = 30;		/* % limit used by hat_unmap_aspmgs() */

/* local inline functions */
#define	hat_pmgbase(a) ((addr_t)((u_int)a & PMGRPMASK))
#define	hat_pmgisloaded(pmg) (pmg->pmg_num != PMGNUM_SW)
#define	hat_addrtopte(pmg, a) \
	((pmg)->pmg_pte + (((u_int)(a) & PMGRPOFFSET) >> PAGESHIFT))
#define	hat_pmetopte(pmg, pme) ((pmg)->pmg_pte + ((pme) - (pmg)->pmg_pme))

/**************** misc. macros and declarations below ************************/

/*
 * XXX - Function defitions should be in a header file.
 */
extern	u_int map_getsgmap();

extern char DVMA[];		/* addresses in kas above DVMA are for "IO" */

#ifdef VAC
/*
 * XXX - should be made more efficent by using pment
 * indexing instead of virtual address checking.
 */
#define	VAC_MASK(a) (((u_int)(a) & MMU_PAGEMASK) & (shm_alignment - 1))
#define	VAC_ALIGNED(a1, a2) ((VAC_MASK(a1) ^ VAC_MASK(a2)) == 0)
#endif

/*
 * These macros allow us to walk the translation list in a sane manner.
 * The list walking must be protected, and if the translation we land
 * on is interrupt replaceable, all work on that translation must also
 * be protected. This complexity is hidden in the macros. NOTE that these
 * macros assume that variables "pme", "pmg", "pp", and "s" are declared as
 * locals in the calling function. The sequence for use is as follows:
 *
 *	PP_LIST_OPEN
 *	{
 *		do operations;
 *		PP_LIST_NEXT(next item)
 *	}
 *	PP_LIST_CLOSE
 *
 * NOTE that you CANNOT put a semicolon after PP_LIST_OPEN or it will not
 * function.
 */
#define	PP_LIST_OPEN \
	s = splvm(); \
	pmg = (struct pmgrp *)NULL; \
	PP_LIST_NEXT((struct pment *)pp->p_mapping) \
	while (pme)

#define	PP_LIST_NEXT(x)	\
	(void) splx(s); \
	s = splvm(); \
	if (pmg) \
	    pmg->pmg_keepcnt--; \
	pme = x; \
	if (pme) { \
	    pmg = &pmgrps[(pme - pments) >> NPMENTPERPMGRPSHIFT]; \
	    pmg->pmg_keepcnt++; \
	}

#define	PP_LIST_CLOSE  (void) splx(s);

/*
 *
 * The next set of routines implements the machine
 * independent hat interface described in <vm/hat.h>
 *
 */

/*
 * Initialize the hardware address translation structures.
 * Called by startup.
 *
 * Initialize the SW page tables in the range [0..NPMGRPS) as loaded
 * and mapped. This is required for hat_pmgreserve to work. After startup
 * reserves kernel pmg's, it calls hat_pmginit. hat_pmginit will create
 * a list of free SW page tables and a list of free HW pmg's by skipping
 * pmg's reserved pmg's.
 */
void
hat_init()
{
	register struct ctx	*ctx;
	register struct pmgrp	*pmg;
	register struct pment	*pme;
	register struct pte	*pte;
	register int i;


	i = 0;
	for (ctx = ctxs; ctx < ctxsNCTXS; ctx++)
		ctx->c_num = i++;
	ctxhand = ctxs;

#ifdef MMU_3LEVEL
	if (mmu_3level) {
		register struct smgrp *smg;
		register struct sment *sme;

		i = 0;
		sme = sments;
		for (smg = smgrps; smg < smgrpsNSMGRPS; smg++) {
			smg->smg_num = i++;
			smg->smg_sme = sme;
			sme += NSMENTPERSMGRP;
		}
		smgrps[SMGRP_INVALID].smg_lock = 1;	/* lock invalid smgrp */
		smgrps[SMGRP_INVALID].smg_keepcnt++;
		smgrp_invalid = &smgrps[SMGRP_INVALID];
		smgrphand = smgrps;
	}
#endif MMU_3LEVEL

	i = 0;
	pme = pments;
	pte = ptes;

	/*
	 * First NPMGRS pmgs are loaded in HW. hat_pmgreserve() assumes this.
	 *
	 * XXX - Note that &pmgrps[NPMGRPS] cannot be changed to pmgrpsNPMGRPS
	 */
	for (pmg = pmgrps; pmg < &pmgrps[NPMGRPS]; i++, pmg++) {

		pmg->pmg_num = i;
		pmg->pmg_mapped = 1;
		hwpmgs[i].hwp_pmgrp = pmg;

		pmg->pmg_pme = pme;
		pmg->pmg_pte = pte;
		pme += NPMENTPERPMGRP;
		pte += NPMENTPERPMGRP;
	}

	/*
	 * The remaining SW pmgrp are not loaded in HW.
	 */
	for (; pmg < pmgrpsNPMGRPS; pmg++) {

		pmg->pmg_num = PMGNUM_SW;
		pmg->pmg_mapped = 0;

		pmg->pmg_pme = pme;
		pmg->pmg_pte = pte;
		pme += NPMENTPERPMGRP;
		pte += NPMENTPERPMGRP;
	}

	pmgrps[PMGRP_INVALID].pmg_lock = 1;	/* lock invalid pmgrp */
	pmgrps[PMGRP_INVALID].pmg_keepcnt++;
	pmgrp_invalid = &pmgrps[PMGRP_INVALID];

	/*
	 * For now, we just grab a context and keep it locked, as well
	 * as locking all of the kernel which was loaded into memory.
	 */
	kctx = &ctxs[KCONTEXT];
	kctx->c_lock = 1;
	kctx->c_as = &kas;
	kas.a_hat.hat_ctx = kctx;
}

/*
 * Free all the hat resources held by an address space.
 * Called from as_free when an address space is being
 * destroyed and when it is to be "swapped out".
 *
 * XXX - should we do anything about locked translations here?
 */
void
hat_free(as)
	register struct as *as;
{
	register struct ctx *ctx;

	if ((ctx = as->a_hat.hat_ctx) != NULL) {

		TEST_PANIC(ctx->c_lock, "hat_free - ctx is locked");

		/*
		 * Clean context now. This will prevent expensive segment
		 * and page flushing when freeing individual pmgs.
		 */
		mmu_setctx(ctx);
		if (!ctx->c_clean) {
			vac_ctxflush();
			ctx->c_clean = 1;
		}
		hat_xfree(as);

		as->a_hat.hat_ctx = NULL;
		ctx->c_as = NULL;
	} else {
		hat_xfree(as);
	}

	/*
	 * Switch to kernel context.
	 */
	mmu_setctx(kctx);
}

/*
 * Set up addr to map to page pp with protection prot.
 */
void
hat_memload(seg, addr, pp, prot, lock)
	struct seg *seg;
	addr_t addr;
	struct page *pp;
	u_int prot;
	int lock;
{
	struct pte pte;

	hat_mempte(pp, prot, &pte);
	hat_pteload(seg, addr, pp, pte, lock ? PTELD_LOCK : 0);
}

/*
 * Cons up a struct pte using the device's pf bits and protection
 * prot to load into the hardware for address addr; treat as minflt.
 */
void
hat_devload(seg, addr, pf, prot, lock)
	struct seg *seg;
	addr_t addr;
	int pf;
	u_int prot;
	int lock;
{
	struct page *pp, *page_numtouserpp();
	union {
		struct pte u_pte;
		int u_pf;
	} apte;
	int	s = -1;

	/*
	 * If the request is to load a page which is really something
	 * for which we have a struct page, then we must be sure we
	 * maintain cache consistency.	However, we don't want to
	 * maintain such consistency for processes just running through
	 * physical memory, so we only pass along the page struct if it's
	 * not in a transition state.  This is why we use page_numtouserpp
	 * instead of page_numtopp here.  This should fix some readers
	 * of /dev/mem, but also allow those using the window system lock
	 * structures to work right too.
	 */
	if ((pf & PGT_MASK) == PGT_OBMEM) {
		s = splvm();
		pp = page_numtouserpp((u_int)(pf & PG_PFNUM));
	} else
		pp = NULL;

	apte.u_pf = pf & PG_PFNUM;
	apte.u_pte.pg_v = 1;
	apte.u_pte.pg_prot = hat_vtop_prot(prot);
	hat_pteload(seg, addr, pp, apte.u_pte, lock ? PTELD_LOCK : 0);
	u.u_ru.ru_minflt++;
	if (s == -1)
		return;
	else
		(void) splx(s);
}

/*
 * Release one hardware address translation lock on the given address.
 * For the Sun MMU, this means decrementing the counter on the pmgrp.
 */
void
hat_unlock(seg, addr)
	struct seg *seg;
	addr_t addr;
{
	register struct pmgrp	*pmg;
	int			s;

	pmg = hat_pmgfind(addr, seg->s_as);

	TEST_PANIC(pmg == NULL || pmg->pmg_keepcnt < 2, "hat_unlock");

#ifdef VAC
	if (vac && pmg->pmg_keepcnt == 2) {
		register struct pment *pme = pmg->pmg_pme;
		register int tcnt;
		struct page *pp;

		/*
		 * Now check to see if we now can cache any non-cached pages.
		 * For now, we use the simple minded algorithm and just
		 * unload any locked of the locked translations if the
		 * corresponding page is currently marked as non-cachable.
		 * This situation doesn't happen all the much, so the
		 * efficency doesn't have to be all that great.
		 */
		for (tcnt = 0; tcnt < NPMENTPERPMGRP; tcnt++, pme++) {
			if (pme->pme_valid && (pp = pme->pme_page) != NULL &&
			    pp->p_nc) {
				hat_pteunload(pmg, pme, (addr_t)NULL,
				    HAT_RMSYNC);
				if (pp->p_nc) {
					/*
					 * We lost - unloading the mmu
					 * translation wasn't enough to
					 * make the page cacheable again.
					 */
					cache_stats.cs_unloadnofix++;
				} else {
					/*
					 * We won - unloading the mmu
					 * translation made the page
					 * cacheable again.
					 */
					cache_stats.cs_unloadfix++;
				}
			}
		}
	}
#endif VAC
	s = splvm();
	pmg->pmg_keepcnt -= 2;	/* once for hat_getpmg, once for unlock */
#ifdef MMU_3LEVEL
	/* Decr. once for unlock. */
	if (mmu_3level)
		hat_pmgtosmg(pmg)->smg_keepcnt--;
#endif MMU_3LEVEL
	(void) splx(s);
}

/*
 * Change the protections in the virtual address range
 * given to the specified virtual protection.  If
 * vprot == ~PROT_WRITE, then all the write permission
 * is taken away for the current translations, else if
 * vprot == ~PROT_USER, then all the user permissions
 * are takem away for the current translations, otherwise
 * vprot gives the new virtual protections to load up.
 */
void
hat_chgprot(seg, addr, len, vprot)
	struct seg	*seg;
	addr_t		addr;
	u_int		len;
	u_int		vprot;		/* virtual page protections */
{
	register addr_t		a, ea;
	register struct pmgrp	*pmg = NULL;
	register u_int		pprot;	/* physical page protections */
	register int		newprot;
	struct pte		pte;
	struct pte		*ppte;

	if (vprot != ~PROT_WRITE && vprot != ~PROT_USER)
		pprot = hat_vtop_prot(vprot);

	/*
	 * We must get a context for the AS because we will be
	 * synchronizing SW PTE by reads from MMU.
	 */
	hat_setup(seg->s_as);

	for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) {
		if (pmg == NULL ||
		    ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE) {
			if (pmg) {
				register int s;
				s = splvm();
				pmg->pmg_keepcnt--;
				(void) splx(s);
			}
			pmg = hat_pmgfind(a, seg->s_as);
			if (pmg == NULL) {
				/*
				 * Bump up `a' to avoid checking all
				 * the pte's in the invalid pmgrp.
				 */
				a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) +
				    PMGRPSIZE - MMU_PAGESIZE;
				continue;
			}
			/*
			 * Make sure that loaded pmg is also mapped.
			 */
			if (hat_pmgisloaded(pmg))
				hat_pmgmap(pmg);
		}

		ppte = hat_addrtopte(pmg, a);
		if (!pte_valid(ppte))
			continue;

		/*
		 * Synchronize PTE from MMU.
		 */
		if (hat_pmgisloaded(pmg)) {
			mmu_getpte(a, ppte);
		}

		pte = *ppte;

		if (vprot == ~PROT_WRITE) {
			switch (pte.pg_prot) {
			case KW: pprot = KR; newprot = 1; break;
			case UW: pprot = UR; newprot = 1; break;
			default: newprot = 0; break;
			}
		} else if (vprot == ~PROT_USER) {
#ifdef sun2
			/* XXX - need a better way to do this */
			if (pte.pg_prot & 07) {
				pprot = pte.pg_prot & ~07;
				newprot = 1;
			} else {
				newprot = 0;
			}
#else sun2
			switch (pte.pg_prot) {
			case UW: pprot = KW; newprot = 1; break;
			case UR: pprot = KR; newprot = 1; break;
			default: newprot = 0; break;
			}
#endif sun2
		} else if (pte.pg_prot != pprot) {
			newprot = 1;
		} else {
			newprot = 0;
		}
		if (newprot) {
			pte.pg_prot = pprot;

			*ppte = pte;	/* Set SW PTE */

			/*
			 * Synchronize HW PTE if this pmg is loaded.
			 * We assume that hat_setup has been done.
			 */
			if (hat_pmgisloaded(pmg)) {
#ifdef VAC
				if (vac && !pte.pg_nc && pte.pg_r)
					vac_pageflush(a);
#endif VAC
				mmu_setpte(a, pte);
			}
		}
	}
	if (pmg != NULL) {
		register int s;
		s = splvm();
		pmg->pmg_keepcnt--;
		(void) splx(s);
	}
}

/*
 * Associate all the mappings in the range [addr..addr+len) with
 * segment seg. Since we don't cache segments in this hat implementation,
 * this routine is a noop.
 */
/*ARGSUSED*/
void
hat_newseg(seg, addr, len, nseg)
	struct seg	*seg;
	addr_t		addr;
	u_int		len;
	struct seg	*nseg;
{
	return;
}

/*
 * Unload all the mappings in the range [addr..addr+len).
 */
void
hat_unload(seg, addr, len)
	struct seg	*seg;
	addr_t		addr;
	u_int		len;
{
	register addr_t		a;
	register struct pment	*pme;
	register struct pmgrp	*pmg = NULL;
	addr_t			ea;
	int			s;

	if (seg->s_as->a_hat.hat_pmgrps == NULL) {
		/*
		 * If there are no allocated pmgrps for this
		 * address space, we don't want to do anything.
		 */
		return;
	}

	/*
	 * hat_setup needed for HAT_VADDR optimization to work.
	 */
	hat_setup(seg->s_as);

	for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) {
		if (pmg == NULL ||
		    ((u_int)a & (PMGRPSIZE - 1)) < MMU_PAGESIZE) {
			if (pmg != NULL) {
				s = splvm();
				pmg->pmg_keepcnt--;
				(void) splx(s);
			}
			pmg = hat_pmgfind(a, seg->s_as);
			if (pmg == NULL) {
				/*
				 * Bump up `a' to avoid checking all
				 * the pme's in the invalid pmgrp.
				 */
				a = (addr_t)((u_int)a & ~(PMGRPSIZE - 1)) +
				    PMGRPSIZE - MMU_PAGESIZE;
				continue;
			}
			pme = &pmg->pmg_pme[mmu_btop(a - pmg->pmg_base)];
		} else {
			pme++;
		}
		/*
		 * Throw out the mapping.
		 */
		if (pme->pme_nosync) {

			TEST_PANIC(pmg->pmg_keepcnt < 2,
			    "hat_unload - pmg not kept");

			/*
			 * Decrement keepcnt on pmg and smg to indicate
			 * 'unlock' on the address.
			 */
			s = splvm();
#ifdef MMU_3LEVEL
			if (mmu_3level) {
				hat_pmgtosmg(pmg)->smg_keepcnt--;
			}
#endif MMU_3LEVEL
			pmg->pmg_keepcnt--;
			(void) splx(s);

			hat_pteunload(pmg, pme, a, HAT_NOSYNC | HAT_VADDR);
		} else {
			hat_pteunload(pmg, pme, a, HAT_RMSYNC | HAT_VADDR);
		}
	}
	if (pmg) {
		s = splvm();
		pmg->pmg_keepcnt--;
		(void) splx(s);
	}
}

/*
 * Unload all the hardware translations that map page `pp'.
 */
void
hat_pageunload(pp)
	register struct page *pp;
{
	register struct pmgrp *pmg;
	register struct pment *pme;
	int s;

	PP_LIST_OPEN
	{
		hat_pteunload(pmg, pme, (addr_t)NULL, HAT_RMSYNC);
		PP_LIST_NEXT((struct pment *)pp->p_mapping)
	}
	PP_LIST_CLOSE
}

/*
 * Get all the hardware dependent attributes for a page struct
 */
void
hat_pagesync(pp)
	struct page *pp;
{
	register struct pment *pme;
	register struct pmgrp *pmg;
	int s;

	PP_LIST_OPEN
	{
		/*
		 * Get page dependent info from hardware for
		 * each translation, but don't unload them.
		 */
		hat_ptesync(pp, pmg, pme, (addr_t)NULL, HAT_RMSYNC);
		PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
	}
	PP_LIST_CLOSE
}

/*
 * Returns the page frame number for a given kernel virtual address.
 */
u_int
hat_getkpfnum(addr)
	addr_t	addr;
{
	struct	pte pte;

	mmu_getkpte(addr, &pte);
	return (MAKE_PFNUM(&pte));
}

/*
 * Resolve a page fault by loading a cached translation.
 *
 * hat_fault() is called from the fault handler in locore.s and pagefault.
 *
 */
hat_fault(addr)
	caddr_t		addr;
{
	struct pmgrp	*pmg;
	int		s;
	struct as	*as;

	/*
	 * We assume that addresses above KERNELBASE belong to kas.
	 */
	if (addr >= (addr_t)KERNELBASE) {
		as = &kas;
	} else {

		/*
		 * We punt if the address space of the running process
		 * hasn't been set up. pagefault will allocate the context
		 * and call hat_fault again if this was a real pagefault.
		 */
		if ((as = u.u_procp->p_as) != mmu_getctx()->c_as)
			return (FC_NOMAP);
	}

	if ((pmg = hat_pmgfind(addr, as)) != NULL) {

		if (pmg->pmg_mapped) {
			/*
			 * Pmg is mapped and loaded. The hat layer cannot
			 * resolve the fault.
			 */
			ASSERT(pmg->pmg_num != PMGNUM_SW);
			s = splvm();
			pmg->pmg_keepcnt--;
			vmhatstat.vh_faultinhw++;
			(void) splx(s);
			return (FC_NOMAP);
		} else if (hat_pmgisloaded(pmg)) {

			/*
			 * pmg is loaded but not mapped.
			 */
			s = splvm();

			/*
			 * The code below is inline hat_pmgmap.
			 */
#ifdef MMU_3LEVEL
			if (mmu_3level) {
				hat_smgalloc(pmg->pmg_as, pmg->pmg_base, pmg);
				hat_pmgtosmg(pmg)->smg_keepcnt--;
			}
#endif MMU_3LEVEL
			mmu_setpmg(pmg->pmg_base, pmg);
			pmg->pmg_mapped = 1;
			/* End of inline hat_pmgmap. */

			pmg->pmg_keepcnt--;
			vmhatstat.vh_faultmap++;
			(void) splx(s);
			return ((faultcode_t)0);
		} else {
			/*
			 * Pmg is not loaded.
			 */
			hat_pmgload(pmg);
			s = splvm();
			pmg->pmg_keepcnt--;
#ifdef MMU_3LEVEL
			if (mmu_3level)
				hat_pmgtosmg(pmg)->smg_keepcnt--;
#endif MMU_3LEVEL
			(void) splx(s);
			vmhatstat.vh_faultload++;
			return ((faultcode_t)0);
		}
	}

	/*
	 * HAT couldn't resolve the fault because there is no SW pmg.
	 */
	vmhatstat.vh_faultnopmg++;
	return (FC_NOMAP);
}


/*
 * End of machine independent interface routines.
 *
 * The next few routines implement some machine dependent functions
 * needed for the Sun MMU.  Note that each hat implementation can define
 * whatever additional interfaces that make sense for that machine.
 * These routines are defined in <machine/vm_hat.h>.
 *
 * Start machine specific interface routines.
 */


/*
 * This routine is called for kernel initialization
 * to cause a pmgrp to be reserved w/o any unloading,
 * links the pmgrp into the address space (if not already there),
 * and return with the pmgrp in question leaving its keepcnt incremented.
 */
void
hat_pmgreserve(seg, addr)
	struct seg *seg;
	addr_t addr;
{
	register struct as	*as = seg->s_as;
	register struct pmgrp	*pmg;
	u_int			pmgnum;
	int			s;

	pmgnum = map_getsgmap(addr);
	TEST_PANIC(pmgnum == PMGRP_INVALID, "hat_pmgreserve: invalid pmg");

	pmg = hwpmgs[pmgnum].hwp_pmgrp;
	ASSERT(pmg != NULL);
	ASSERT(pmg->pmg_num == pmgnum);
	s = splvm();
	pmg->pmg_keepcnt++;
	(void) splx(s);

	TEST_PANIC(as->a_hat.hat_ctx == NULL ||
	    (pmg->pmg_as != NULL && pmg->pmg_as != as),
	    "hat_pmgreserve");

	if (pmg != pmgrp_invalid && pmg->pmg_as == NULL) {
		hat_pmglink(pmg, as, addr);
	}

#ifdef MMU_3LEVEL
	if (mmu_3level)
		hat_smgreserve(seg, addr);
#endif MMU_3LEVEL
}

/*
 * Initialize all the unlocked pmgs to have invalid pme's
 * and add them to the free list.
 * This routine is called during startup after all the
 * kernel pmgs have been reserved.  This routine will
 * also set the pmgrpmin variable for use in hat_pmgalloc.
 */
void
hat_pmginit()
{
	register struct pmgrp	*pmg;
	register addr_t		addr;
	int			s;
	int			i;
	struct hwpmg		*hwpmg;

	s = splvm();

	/*
	 * Make HW free list. Skip locked and kept pmgrps.
	 * Here we assume that <0..NPMGRPS) were loaded in MMU by hat_init().
	 */
	for (pmg = pmgrps, hwpmg = hwpmgs;
	    hwpmg < hwpmgsNHWPMGS; pmg++, hwpmg++) {

		if (pmg->pmg_lock || pmg->pmg_keepcnt > 0) {
			pmg->pmg_mapped = 1;
			continue;
		}

		if (pmgrpmin == NULL) {
			pmgrpmin = pmgrphand = pmg;
			hwpmgmin = hwpmghand = hwpmg;
		}

		mmu_settpmg(SEGTEMP, pmg);

		for (addr = SEGTEMP; addr < SEGTEMP + PMGRPSIZE;
		    addr += PAGESIZE)
			mmu_setpte(addr, mmu_pteinvalid);

		pmg->pmg_num = PMGNUM_SW;

		hwpmg->hwp_next = hwpmgfree;
		hwpmg->hwp_pmgrp = NULL;
		hwpmgfree	= hwpmg;

	}
	mmu_pmginval(SEGTEMP);

	/*
	 * Make SW pmg free list.
	 */
	for (pmg = pmgrps; pmg < pmgrpsNPMGRPS; pmg++) {

		if (pmg->pmg_lock || pmg->pmg_keepcnt > 0)
			continue;

		for (i = 0; i < NPMENTPERPMGRP; i++) {
			pmg->pmg_pte[i] = mmu_pteinvalid;
		}
		pmg->pmg_num = PMGNUM_SW;
		pmg->pmg_mapped = 0;

		pmg->pmg_next = pmgrpfree;
		pmgrpfree = pmg;
	}

	(void) splx(s);
#ifdef MMU_3LEVEL
	if (mmu_3level)
		hat_smginit();

	/*
	 * Check keepcnt on smegs and pmegs to detect double mapped pmegs.
	 */
	hat_smgcheck_keepcntall();
#endif MMU_3LEVEL
}

/*
 * Set addr in segment seg to use pte to (possibly) map to page pp.
 * This is the common routine used for hat_memload and hat_devload
 * in addition to the machine dependent mapin implementation.
 */
void
hat_pteload(seg, addr, pp, pte, flags)
	struct seg	*seg;
	addr_t		addr;
	struct page	*pp;
	struct pte	pte;
	int		flags;
{
	register struct pment	*ppme;
	register struct pmgrp	*ppmg;
	int			s;
	struct pte		opte;

	TEST_PANIC(pp != NULL && pp->p_free, "hat_pteload free page");

	/*
	 * We need to setup context because we will (potentially) be reading
	 * HW PTE's.
	 */
	hat_setup(seg->s_as);
	ppmg = hat_pmgalloc(seg, addr);
	hat_pmgload(ppmg);

	ppme = &ppmg->pmg_pme[mmu_btop(addr - ppmg->pmg_base)];

	/*
	 * We must be sure that setting the pte and adding to the list
	 * of mappings is atomic.
	 */
	s = splvm();
#ifdef VAC
	/*
	 * If there's no page structure associated with this mapping,
	 * or the vac is turned off, or the page is non-cacheable,
	 * then force the mapping to be non-cached.
	 */
	if (pp == NULL || !vac || pp->p_nc)
		pte.pg_nc = 1;
#endif VAC
	if (ppme->pme_valid) {
		/*
		 * Reloading a translation - be sure to preserve the
		 * exiting ref and mod bits for this translation.
		 *
		 * XXX - should cache all the attributes of a loaded
		 * translation in the pme structure so that we can
		 * avoid reloading all together unless something
		 * is actually going to change.
		 */

		/*
		 * Synch SW PTE by reading MMU.
		 */
		opte = *hat_addrtopte(ppmg, addr);

		mmu_getpte(addr, &opte);
		pte.pg_r = opte.pg_r;
		pte.pg_m = opte.pg_m;

	} else {
		ppme->pme_valid = 1;
	}

#ifdef IOC
	if (ioc) {
		struct pte *p = &pte;

		if (flags & PTELD_IOCACHE) {
			ioc_pteset(p);
		}
		ioc_mbufset(p, addr);
	}
#endif IOC

	*hat_addrtopte(ppmg, addr) = pte; /* Set SW PTE */

	mmu_setpte(addr, pte);	/* load the SW pte in HW */

	/*
	 * Check to see if this pme needs to be added
	 * to the list of pme's mapping this page.
	 */
	if (pp != ppme->pme_page) {

		TEST_PANIC(ppme->pme_page != NULL, "hat_pteload");
		ASSERT(pp != NULL && ppme->pme_page == NULL);

		ppme->pme_page = pp;
		ppme->pme_next = PMENXT_INDEX(pp->p_mapping);
		pp->p_mapping = (caddr_t)ppme;
		(void) splx(s);

		seg->s_as->a_rss += 1;
#ifdef VAC
		/*
		 * If (vac) active, then check for conflicts.
		 * A conflict exists if the new and existent mappings
		 * do not match in their "shm_alignment" fields
		 * XXX and one of them is writable XXX.	 If conflicts
		 * exist, the extant mappings are flushed UNLESS
		 * one of them is locked.  If one of them is locked,
		 * then the mappings are flushed and converted to
		 * non-cacheable mappings [must be deconverted in
		 * hat_pteunload].
		 * XXX	need to store protections in pme
		 *	to employ writable optimization.
		 */
		if (vac && !pp->p_nc) {
			struct pmgrp *pmg;	/* temporary pmg */
			struct pment *pme;	/* temporary pme */
			struct pmgrp
			    *pmgpc = (struct pmgrp *)0,	/* user's text pmg */
			    *pmgsp = (struct pmgrp *)0;	/* user's stack pmg */
			int ccf;		/* cache conflict found flag */
			int first = 1;

			/*
			 * mappings to the user's current stack and
			 * text locations must be locked in memory,
			 * or we run the risk of getting into an
			 * infinite paging loop if the program tries
			 * to read the physical pages containing either
			 * via a mapping that is not cache aliased.
			 */
			s = splvm();
			if (!servicing_interrupt()) {
			    if (u.u_ar0 != (int *)0) {
				if (u.u_ar0[SP] != 0) {
				    pmgsp = mmu_getpmg ((addr_t)u.u_ar0[SP]);
				    if (pmgsp != (struct pmgrp *)0)
					pmgsp->pmg_keepcnt ++;
				}
				if (u.u_ar0[PC] != 0) {
				    pmgpc = mmu_getpmg ((addr_t)u.u_ar0[PC]);
				    if (pmgpc != (struct pmgrp *)0)
					pmgpc->pmg_keepcnt ++;
				}
			    }
			}
			(void)splx(s);

			ccf = 0;
			PP_LIST_OPEN
			{
				if (pme == ppme) {
					PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
					continue;
				}
				if (first && VAC_ALIGNED(addr, pmg->pmg_base +
				    mmu_ptob(pme - pmg->pmg_pme))) {
					PP_LIST_NEXT((struct pment *)NULL)
					continue;
				}
				first = 0;
				/*
				 * Compare keep to 1 because
				 * list walker keeps it too.
				 */
				if (pmg->pmg_lock || pmg->pmg_keepcnt > 1) {
					ccf = 1;
					PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
					continue;
				}
				hat_pteunload(pmg, pme, (addr_t)NULL,
				    HAT_RMSYNC);
				PP_LIST_NEXT((struct pment *)pp->p_mapping)
			}
			PP_LIST_CLOSE

			/*
			 * Release locked pmgrps.
			 */
			s = splvm();
			if (pmgsp != (struct pmgrp *)0)
				pmgsp->pmg_keepcnt --;
			if (pmgpc != (struct pmgrp *)0)
				pmgpc->pmg_keepcnt --;
			(void) splx(s);

			if (ccf) {
				pte.pg_nc = 1;
				pp->p_nc = 1;
				/*
				 * This time we don't exclude our translation,
				 * so it will get remarked noncacheable.
				 */
				PP_LIST_OPEN
				{
					hat_ptesync(pp, pmg, pme, (addr_t)NULL,
					    HAT_NCSYNC);
					PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
				}
				PP_LIST_CLOSE
			}
		}
#endif VAC
	} else {
		(void) splx(s);

		TEST_PANIC(pp != NULL &&
			((ppme->pme_intrep != ((flags & PTELD_INTREP) != 0)) ||
			(ppme->pme_nosync != ((flags & PTELD_NOSYNC) != 0))),
				"pteload - remap flags");
#ifdef VAC
		/*
		 * Reloading a pte for an already mapped page.	If
		 * the page is "real", then if we've got a VAC, then
		 * flush the cache to ensure no protection mismatches
		 * between cache and MMU.
		 * XXX - need to store protections in pme to avoid this.
		 *
		 * XXX - this could cause writeback errors if protection
		 *	 was changed from writeable to read-only.
		 */
		if ((pp != NULL) && vac && pte.pg_r &&
		    *(int *)&pte != *(int *)&opte)
			vac_pageflush(addr);
#endif VAC
	}
	ppme->pme_intrep = (flags & PTELD_INTREP) != 0;
	ppme->pme_nosync = (flags & PTELD_NOSYNC) != 0;

	/* keep some statistics on the cache-ability of the translation */
#if defined(VAC)
	if (pte.pg_type == OBMEM) {
		if (seg->s_as == &kas) {
			if (addr >= DVMA) {
				if (!pte.pg_nc)
					cache_stats.cs_ioc++;
				else
					cache_stats.cs_ionc++;
			} else {
				if (!pte.pg_nc)
					cache_stats.cs_kc++;
				else
					cache_stats.cs_knc++;
			}
		} else {
			if (!pte.pg_nc)
				cache_stats.cs_uc++;
			else
				cache_stats.cs_unc++;
		}
	} else {
		cache_stats.cs_other++;
	}
#else defined(VAC)
	if (pte.pg_type == OBMEM) {
		if (seg->s_as == &kas) {
			if (addr >= DVMA) {
				cache_stats.cs_ionc++;
			} else {
				cache_stats.cs_knc++;
			}
		} else {
			cache_stats.cs_unc++;
		}
	} else {
		cache_stats.cs_other++;
	}
#endif defined(VAC)

	if ((flags & PTELD_LOCK) == 0) {
		/*
		 * Decrement lock count. It was incr. by hat_pmgalloc().
		 */
		s = splvm();
		ppmg->pmg_keepcnt--;
#ifdef MMU_3LEVEL
		if (mmu_3level)
			hat_pmgtosmg(ppmg)->smg_keepcnt--;
#endif MMU_3LEVEL
		(void) splx(s);
	}
}

void
hat_mempte(pp, vprot, ppte)
	struct page *pp;
	u_int vprot;
	register struct pte *ppte;
{

	*ppte = mmu_pteinvalid;
	ppte->pg_prot = hat_vtop_prot(vprot);
	ppte->pg_v = 1;
	ppte->pg_type = OBMEM;
	ppte->pg_pfnum = page_pptonum(pp);
}

void
hat_getctx(as)
	struct as *as;
{
	register struct ctx	*ctx, *ttctx;
	register struct pmgrp	*pmg;
	register u_short	tt = 0;
	int			s;
#ifdef MMU_3LEVEL
	register struct smgrp *smg;
#endif MMU_3LEVEL

	if (as->a_hat.hat_ctx) {
		as->a_hat.hat_ctx->c_clean = 0;
		return;
	}


	/* find a free ctx or an old one */
	ttctx = NULL;
	for (ctx = ctxhand + 1; ctx != ctxhand; ctx++) {
		if (ctx == ctxsNCTXS) /* wrap around the ctx table */
			ctx = ctxs;
		if (ctx->c_lock != 0)	/* can't touch */
			continue;
		/*
		 * Take ctx with no address space.
		 */
		if (ctx->c_as == NULL) {
			ttctx = ctx;
			break;
		}
		if (ttctx == NULL || ctx->c_time <= tt) {
			ttctx = ctx;		/* new "best" ctx */
			tt = ctx->c_time;
		}
	}

	TEST_PANIC(ttctx == NULL, "hat_getctx - no ctx's");

	ctxhand = ctx = ttctx;

	/*
	 * Update vmhatstat statistics.
	 */
	if (ctx->c_as) {
		if (ctx->c_clean)
			vmhatstat.vh_ctxstealclean++;
		else
			vmhatstat.vh_ctxstealflush++;
	} else {
		vmhatstat.vh_ctxfree++;
	}

	mmu_setctx(ctx);

	if (ctx->c_as) {
		vac_flushallctx();
		s = splvm();

#ifdef MMU_3LEVEL
		if (mmu_3level) {
			/* invalidate any smgrps already loaded for this ctx */
			for (smg = ctx->c_as->a_hat.hat_smgrps; smg != NULL;
			    smg = smg->smg_next)
				mmu_smginval(smg->smg_base);
		} else {
#endif MMU_3LEVEL
			/* invalidate any pmgrps already loaded for this ctx */
			for (pmg = ctx->c_as->a_hat.hat_pmgrps; pmg != NULL;
			    pmg = pmg->pmg_next) {

				if (pmg->pmg_mapped) {
					ASSERT(pmg->pmg_num != PMGNUM_SW);
					mmu_pmginval(pmg->pmg_base);
					pmg->pmg_mapped = 0;
				}
			}
#ifdef	MMU_3LEVEL
		}
#endif	MMU_3LEVEL
		ctx->c_as->a_hat.hat_ctx = NULL;
		(void)splx(s);
	}

	s = splvm();
	ctx->c_as = as;
	ctx->c_clean = 0;
	ctx->c_time = ctx_time++;
	as->a_hat.hat_ctx = ctx;

#ifdef MMU_3LEVEL
	if (mmu_3level) {
		/* load up any smgrps already allocated to this hat */
		for (smg = as->a_hat.hat_smgrps; smg != NULL;
		    smg = smg->smg_next)
			mmu_setsmg(smg->smg_base, smg);
	} else {
#endif MMU_3LEVEL
		/* load up any pmgrps already allocated to this hat */
		for (pmg = as->a_hat.hat_pmgrps; pmg != NULL;
		    pmg = pmg->pmg_next) {

			if (pmg->pmg_num != PMGNUM_SW) {
				ASSERT(!pmg->pmg_mapped);
				mmu_setpmg(pmg->pmg_base, pmg);
				pmg->pmg_mapped = 1;
				vmhatstat.vh_ctxmappmgs++;
			}
		}
#ifdef	MMU_3LEVEL
	}
#endif	MMU_3LEVEL

	(void)splx(s);
}


/*
 * Used to lock down hat resources for an address range. In this implementation,
 * this means locking down the necessary pmegs. This currently works only
 * for kernel addresses.
 */
void
hat_reserve(seg, addr, len)
	struct seg *seg;
	addr_t addr;
	u_int len;
{
	register	addr_t a;
	addr_t		ea;
	struct pmgrp	*pmg;

	TEST_PANIC(seg->s_as != &kas, "hat_reserve");

	for (a = addr, ea = addr + len; a < ea; a += MMU_PAGESIZE) {
		pmg = hat_pmgalloc(seg, a);
		TEST_PANIC(pmg == NULL, "hat_reserve: pmg == NULL");
		hat_pmgload(pmg);
	}
}

u_int
hat_vtop_prot(vprot)
	u_int vprot;
{

	switch (vprot) {
	case 0:
	case PROT_USER:
		/*
		 * Since 0 might be a valid protection,
		 * the caller should not set valid bit
		 * if vprot == 0 to be sure.
		 */
		return (0);
	case PROT_READ:
	case PROT_EXEC:
	case PROT_READ | PROT_EXEC:
		return (KR);
	case PROT_WRITE:
	case PROT_WRITE | PROT_EXEC:
	case PROT_READ | PROT_WRITE:
	case PROT_READ | PROT_WRITE | PROT_EXEC:
		return (KW);
	case PROT_EXEC | PROT_USER:
	case PROT_READ | PROT_USER:
	case PROT_READ | PROT_EXEC | PROT_USER:
		return (UR);
	case PROT_WRITE | PROT_USER:
	case PROT_WRITE | PROT_EXEC | PROT_USER:
	case PROT_READ | PROT_WRITE | PROT_USER:
	case PROT_READ | PROT_WRITE | PROT_EXEC | PROT_USER:
		return (UW);
	default:
		panic("hat_vtop_prot");
		/* NOTREACHED */
	}
}

#if defined(sun4c) && defined(VAC)
/*
 * Flush all possible cache lines mapping the given physical page. This
 * is used for software cache consistency with I/O, to clean the cache
 * of all data subject to I/O.
 */
void
hat_vacsync(pfnum)
	u_int pfnum;
{
	register struct page	*pp = page_numtopp(pfnum);
	register struct pmgrp	*pmg;
	register struct pment	*pme;
	int			s;
	addr_t			va;
	struct ctx		*ctxsav, *nctx, *curctx;
	struct pte		tpte;

	/*
	 * If the cache is off, the page isn't memory, or the page is
	 * non-cacheable, then none of the page could be in the cache
	 * in the first place, with the exception that a page frame
	 * for kernel .data or .bss objects could be in the cache,
	 * but will have no page structure.
	 */
	if (!vac) {
		return;
	} else if (pp == (struct page *) NULL) {
		extern u_int kpfn_dataseg, kpfn_endbss;
		if (pfnum >= kpfn_dataseg || pfnum <= kpfn_endbss) {
			extern char etext[];

			/*
			 * In sun4c, the page frame number for the start
			 * of the kernel data segment and the page frame
			 * number for end are latched up in kvm_init().
			 * If a page frame number ends up here, then some-
			 * body is doing i/o to an object in kernel .data
			 * or .bss.
			 *
			 * This is a temporary solution, and it does have
			 * some holes in it. It assumes that the page frame
			 * numbers between kernel .data and end are contiguous.
			 *
			 * As a side note, we could go to the effort of
			 * of reading the kernel pte for the calculated
			 * address to check with the passed page frame
			 * number, but it isn't really worth the effort.
			 */

			va = (addr_t) (roundup((u_int) etext, DATA_ALIGN) +
			    ((pfnum - kpfn_dataseg) << MMU_PAGESHIFT));

			vac_pageflush (va);
		}
		return;
	} else if (pp->p_nc) {
		return;
	}


	curctx = ctxsav = mmu_getctx();
	/*
	 * Walk the list of translations for this page, flushing each
	 * one.
	 */
	PP_LIST_OPEN
	{
		/*
		 * If the translation has no context, it can't be
		 * in the cache.
		 */
		if ((nctx = pmg->pmg_as->a_hat.hat_ctx) != NULL &&
		    !nctx->c_clean && pmg->pmg_mapped) {
			/*
			 * Calculate the virtual address, switch to the
			 * correct context, and flush the page.
			 */
			va = pmg->pmg_base + mmu_ptob(pme - pmg->pmg_pme);
			if (nctx != curctx) {
				mmu_setctx(nctx);
				curctx = nctx;
			}
			mmu_getpte(va, &tpte);
			if (tpte.pg_r)
				vac_pageflush(va);
		}
		PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
	}
	PP_LIST_CLOSE
	/*
	 * Restore the original context.
	 */
	if (curctx != ctxsav)
		mmu_setctx(ctxsav);
}
#endif defined(sun4c) && defined(VAC)

#ifdef sun4c
/*
 * Kill any processes that use this page. (Used for parity recovery)
 * If we encounter the kernel's address space, give up (return -1).
 * Otherwise, we return 0.
 */
hat_kill_procs(pp, addr)
	struct	page	*pp;
	addr_t	addr;
{
	register struct pmgrp *pmg;
	register struct pment *pme;
	int	s;
	struct	as	*as;
	struct	proc	*p;
	int	result = 0;

	PP_LIST_OPEN
	{
		/*
		 * Find the address space that contains this pment.
		 */
		as = pmg->pmg_as;

		/*
		 * If the address space is the kernel space, then fail.
		 * The memory is corrupted, and the only thing to do with
		 * corrupted kernel memory is die.
		 */
		if (as == &kas) {
			printf("parity recovery: kernel address space\n");
			result = -1;
		}

		/*
		 * Find the proc that uses this address space and kill
		 * it.	Note that more than one process can share the
		 * same address space, if vfork() was used to create it.
		 * This means that we have to look through the entire
		 * process table and not stop at the first match.
		 */
		for (p = allproc; p; p = p->p_nxt) {
			if (p->p_as == as) {
				printf("pid %d killed: parity error\n",
				    p->p_pid);
				uprintf("pid %d killed: parity error\n",
				    p->p_pid);
				psignal(p, SIGBUS);
				p->p_uarea->u_code = FC_HWERR;
				p->p_uarea->u_addr = addr;

			}
		}
		PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
	}
	PP_LIST_CLOSE

	return (result);
}
#endif /* sun4c */

/*
 * End machine specific interface routines.
 *
 * The remainder of the routines are private to this module and are used
 * by the routines above to implement a service to the outside caller.
 *
 * Start private routines.
 */

/*
 * Unload a pme.  We call hat_ptesync() to unload the translation
 * then remove the pme from the list of pme's mapping the page.
 * Should always be called with the pmgrp for the pme being held.
 */
static void
hat_pteunload(ppmg, ppme, vaddr, flags)
	struct pmgrp		*ppmg;
	register struct pment	*ppme;
	addr_t			vaddr;
	int			flags;
{
	int			s;
	struct page		*pp;
	struct pment		*pmentp;
	u_int			ppmeind;
#ifdef VAC
	struct pment		*pme;	/* temporary for listwalk */
	struct pmgrp		*pmg;	/* temporary for listwalk */
	struct pment		*qpme;	/* second temporary for comparison */
	struct pmgrp		*qpmg;	/* second temporary for comparison */
	addr_t			pa, qa; /* matching address values */
	int			ccf;	/* cache conflict found flag */
	int			s2;
#endif VAC

	s = splvm();
	pp = ppme->pme_page;
	ppmeind	 = PMENXT_INDEX(ppme);
	if (pp != NULL) {
		/*
		 * Remove it from the list of mappings for the page.
		 */
		if (ppme == (struct pment *)(pp->p_mapping)) {
			pp->p_mapping = (caddr_t)PMENXT_PTR(ppme->pme_next);
			ppme->pme_next = PMENXT_NULL;
		} else {
			TEST_PANIC(pp->p_mapping == NULL,
			    "hat_pteunload - no mappings");

			for (pmentp = ((struct pment *) (pp->p_mapping));
			    pmentp->pme_next != ppmeind;
			    pmentp = PMENXT_PTR(pmentp->pme_next))
				TEST_PANIC(pmentp->pme_next == PMENXT_NULL,
				    "hat_pteunload - no mapping");

			pmentp->pme_next = ppme->pme_next;
			ppme->pme_next = PMENXT_NULL;
		}
		(void) splx(s);
		ppme->pme_page = NULL;
		ppmg->pmg_as->a_rss -= 1;
#ifdef VAC
		if (vac && pp->p_nc) {
			ccf = 0;
			PP_LIST_OPEN
			{
				s2 = splvm();
				if ((qpme = PMENXT_PTR(pme->pme_next))
				    != NULL) {
					pa = pmg->pmg_base +
					    mmu_ptob(pme - pmg->pmg_pme);
					qpmg = &pmgrps[(qpme - pments) /
					    NPMENTPERPMGRP];
					qa = qpmg->pmg_base +
					    mmu_ptob(qpme - qpmg->pmg_pme);
					if (!VAC_ALIGNED(pa, qa)) {
						ccf = 1;
						(void) splx(s2);
						PP_LIST_NEXT(NULL)
						continue;
					}
				}
				(void) splx(s2);
				PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
			}
			PP_LIST_CLOSE
			if (!ccf) {
				/*
				 * No more cache conflict.
				 * Use hat_ptesync to resync.
				 */
				pp->p_nc = 0;
				PP_LIST_OPEN
				{
					hat_ptesync(pp, pmg, pme, (addr_t)NULL,
					    HAT_NCSYNC);
					PP_LIST_NEXT(PMENXT_PTR(pme->pme_next))
				}
				PP_LIST_CLOSE
			}
		}
#endif VAC
		s = splvm();
	}
	/*
	 * Invalidate the translation.
	 */
	if (ppme->pme_valid) {
		flags |= HAT_INVSYNC;
		hat_ptesync(pp, ppmg, ppme, vaddr, flags);
		ppme->pme_nosync = ppme->pme_intrep = ppme->pme_valid = 0;
	}
	(void) splx(s);
}

/*
 * Synchronize the hardware and software of a pte.  Used for updating the
 * hardware nocache bit, the software R & M bits, and invalidating ptes.
 */
static void
hat_ptesync(pp, pmg, pme, vaddr, flags)
	struct page		*pp;
	register struct pmgrp	*pmg;
	register struct pment	*pme;
	addr_t			vaddr;
	int			flags;
{
	register struct ctx	*ctxsav, *nctx;
	register addr_t		mapaddr;
	int			s, pmg_off;
	struct pte		pte;
	struct pte		*ppte;	/* pointer to SW pte */
	struct as		*as;
	struct ctx		*ctx;
	int			usetemp;
	int			dommu;
	int			doflush;
	int			didsetpte = 0;
	int			didflush = 0;

	ppte = hat_pmetopte(pmg, pme);
	as = pmg->pmg_as;
	ctx = as->a_hat.hat_ctx;

	TEST_PANIC(pme->pme_valid == 0, "hat_ptesync - invalid pme");

	/*
	 * The HAT_VADDR flag means that the vaddr argument contains a valid
	 * page address.
	 *
	 * It's used as optimization when hat_ptesync is called
	 * from hat_unload(). We know that:
	 *	(1) the pmg context is setup
	 *	(2) we don't need to splvm() here since we will not
	 *	    be using SEGTEMP
	 *
	 */
	if (flags & HAT_VADDR) {

		if (hat_pmgisloaded(pmg)) {
			/*
			 * pmg is loaded in HW - make sure that it is mapped.
			 */
			hat_pmgmap(pmg);

			mapaddr = vaddr;
			dommu = 1;
			usetemp = 0;
			/*
			 * We must flush VAC if the context is not clean.
			 */
			doflush = !ctx->c_clean;

		} else {
			/*
			 * pmg is not loaded in MMU
			 */
			dommu = doflush = usetemp = 0;
			mapaddr = (addr_t)0;
		}
		goto skip;
	}

	pmg_off = mmu_ptob(pme - pmg->pmg_pme);
	vaddr = pmg->pmg_base + pmg_off;
	ctxsav = mmu_getctx();
	/*
	 * We must protect the use of the mapping address,
	 * since it is a shared resource.
	 */
	s = splvm();

	if (!hat_pmgisloaded(pmg)) {
		/*
		 * SW page table is not loaded.
		 */
		dommu = doflush = usetemp = 0;

		/*
		 * XXX - Set mapaddr to 0 so that we get a kernel text fault
		 * and panic if we try to use it in the code below.
		 */
		mapaddr = (addr_t)0;
	} else if (ctx == NULL) {
		/*
		 * pmg is loaded but its as does not have a context.
		 * Set things up so that the pmgrp is mapped into a temporary
		 * segment.  No need to do any VAC flushing since this
		 * was done when we took the ctx away.	Set
		 * up the mapaddr within the temporary segment.
		 */

		/* XXX - this is disgusting! */
#ifdef sun2
		extern struct ctx *kctx;

		nctx = kctx;
		if (nctx != ctxsav)
			mmu_setctx(nctx);
#else sun2
		nctx = ctxsav;		/* no need to switch context */
#endif sun2

		mmu_settpmg(SEGTEMP, pmg);
		mapaddr = SEGTEMP + pmg_off;
		dommu = usetemp = 1;
		doflush = 0;
	} else {
		/*
		 * pmg is loaded and pmg's as has a ctx.
		 * Make sure we are in running in the as context.
		 * Use the virtual address as the mapping address.
		 */
		if ((nctx = ctx) != ctxsav)
			mmu_setctx(nctx);

		/*
		 * Make sure that pmg is mapped.
		 */
		hat_pmgmap(pmg);

		mapaddr = vaddr;
		dommu = 1;
		usetemp = 0;
		/*
		 * We must flush VAC if the context is not clean.
		 */
		doflush = !ctx->c_clean;
	}
skip:
	if (!vac)
		doflush = 0;		/* no VAC on this system */

	if (dommu) {
		/* Assert that the pmg is mapped */
		ASSERTPMGMAPPED(pmg, "ptesync");
	}

	/*
	 * At this point, the flags are set:
	 *
	 * doflush - if the page must be flushed
	 * dommu   - if pte must be get/set from HW MMU
	 * usetemp - if SEGTEDMP is used to access the HW MMU map
	 *
	 * Note that a loaded pmg is also mapped if we have a context.
	 */
	if (pp != NULL) {
		if (flags & HAT_RMSYNC) {

			if (dommu) {
				mmu_getpte(mapaddr, ppte);
			}
			pte = *ppte;

			/*
			 * Call back to inform address space, if turned on.
			 */
			if (as->a_hatcallback) {
			    as_hatsync(as, vaddr, (u_int) pte.pg_r,
				(u_int) pte.pg_m,
				(u_int)(flags & HAT_INVSYNC ? AHAT_UNLOAD : 0));
			}
			pg_setref(pp, pp->p_ref | pte.pg_r);
			pg_setmod(pp, pp->p_mod | pte.pg_m);
#ifdef VAC
			/*
			 * When you zero the modified bit in the MMU
			 * and leave it set in the cache you may not
			 * get it set in the mmu when the line is
			 * re-written.	Writeback caches perform the
			 * setting of the modified bit for a page in
			 * the MMU on the first write miss that happens
			 * to that page. Subsequent writes don't bother
			 * to set the modified bit because the first
			 * write did it.  Therefore if you are zeroing
			 * the modified bit you must flush the cache
			 * so that subsequent writes, see the modified
			 * bit unset in the cache and write it back to
			 * the MMU.
			 */
			if (doflush && pte.pg_r) {
				vac_pageflush(mapaddr);
				didflush = 1;
			}
#endif VAC
			pte.pg_r = pte.pg_m = 0;
		}
#ifdef VAC
		else if (flags & HAT_NCSYNC) {

			if (dommu) {
				mmu_getpte(mapaddr, ppte);
			}
			pte = *ppte;
			/*
			 * N.B.	 The following test assumes that there
			 * are no user addresses at the same virtual
			 * addresses as DVMA and segu in VAC machines.
			 */
			if (mapaddr >= DVMA || (segu != NULL &&
			    mapaddr >= segu->s_base &&
			    mapaddr < segu->s_base + segu->s_size)) {
				/*
				 * To avoid lots of problems, we don't
				 * try to convert anything from cached
				 * to non-cached (or vice-versa) when
				 * it is being loaded for DVMA use.
				 * Also, we refuse to mess with user
				 * areas since it is impossible to
				 * reliably flush when converting
				 * from cached to non-cached and we
				 * don't want to take any performance
				 * hits from using a non-cached stack.
				 */
				didsetpte = 1;
				cache_stats.cs_skip++;

				goto skip_ncsync;
			}

			/*
			 * To avoid lots of problems, we don't try to convert
			 * anything from cached to non-cached (or vice-versa)
			 * when it is being loaded for DVMA use.
			 */
			if (mapaddr < DVMA) {
				if (doflush && !pte.pg_nc && pp->p_nc) {
					int pri, iskas;

					/*
					 * Need to convert from a cached
					 * translation to a non-cached
					 * translation.	 There are lots
					 * of potential races here in the
					 * kernel's address space.  If
					 * some clean line ends up in the
					 * cache after it is flushed here
					 * and is then written to, the
					 * Sirius cache system will end
					 * up giving a memory timeout error.
					 *
					 * For now, we assume that between
					 * time that we flush the virtual
					 * address and reset the MMU that
					 * nothing will be getting into
					 * the cache from things like
					 * ethernet (this is questionable).
					 * We also assume that will never
					 * be converting anything from
					 * cached to non-cached in the
					 * kernel for the current stack,
					 * (i.e., the stack can be accessed
					 * safely w/o it being changed from
					 * cached to non-cached), the interrupt
					 * stack, or anything that might be
					 * touched at interrupts above splhigh
					 * (UARTS, level7 profiling).
					 * The stack being considered safe
					 * will need to be watched if/when
					 * we go away from using a fixed
					 * virtual address for the user area
					 * that is not managed by the hat layer.
					 */
					TEST_PANIC(!pmg->pmg_mapped,
					    "hat_ptesync - pmg not mapped");

					pte.pg_nc = 1;

					iskas = (as == &kas);

					pri = splhigh();

					if (pte.pg_r)
						vac_pageflush(mapaddr);

					/* Change both SW and HW pte */
					*ppte = pte;
					mmu_setpte(mapaddr, pte);
					didsetpte = 1;

					if (iskas) {
						/*
						 * Flush the virtual address
						 * again just in case some IO
						 * got in behind our back
						 * above.  Doing this for
						 * iskas only assumes there
						 * is no UDVMA to worry about.
						 */
						struct pte	tpte;

						mmu_getpte(mapaddr, &tpte);
						if (tpte.pg_r)
							vac_pageflush(mapaddr);

						cache_stats.cs_kchange++;
					} else {
						cache_stats.cs_uchange++;
					}
					(void) splx(pri);
				} else {
					pte.pg_nc = pp->p_nc;
				}
			} else {
				cache_stats.cs_iowantchg++;
			}
		skip_ncsync:
			;
		}
#endif VAC
	}

	if (flags & HAT_INVSYNC) {
#ifdef VAC
		if (vac) {
			if (mapaddr >= DVMA && mapaddr < DVMA + dvmasize) {
				/*
				 * Always flush before invalidating a DVMA
				 * translation because the ref bit may lie.
				 * See bugid 1039410.
				 */
				vac_pageflush(mapaddr);
			} else if (doflush && !didflush) {
				struct pte	tpte;

				mmu_getpte(mapaddr, &tpte);
				if (tpte.pg_r)
					vac_pageflush(mapaddr);
			}
		}
#endif VAC
		pte = mmu_pteinvalid;
	}

	if (!didsetpte) {
		*ppte = pte;
		if (dommu) {
			mmu_setpte(mapaddr, pte);
		}
	}

	/*
	 * Optimized return when hat_ptesync was called from hat_unload.
	 */
	if (flags & HAT_VADDR)
		return;
	if (usetemp)
		mmu_settpmg(SEGTEMP, pmgrp_invalid);
	(void) splx(s);
	if (nctx != ctxsav)
		mmu_setctx(ctxsav);
}

/*
 * Allocate a SW page table for a given address.
 */
static struct pmgrp *
hat_pmgalloc(seg, addr)
	struct seg *seg;
	addr_t addr;
{
	register struct as *as = seg->s_as;
	register struct pmgrp *pmg;
	int s;

	s = splvm();
	if ((pmg = hat_pmgfind(addr, as)) != NULL) {
		vmhatstat.vh_pmgallochas++;
		(void) splx(s);
		return (pmg);
	}

	/*
	 * No pmgrp allocated to this address space contains the address;
	 * allocate a new pmg for this address space.  First, try
	 * the free list.
	 */

	/*
	 * Update vmhatstat statistics.
	 */
	if (pmgrpfree == NULL)
		vmhatstat.vh_pmgallocsteal++;
	else
		vmhatstat.vh_pmgallocfree++;

top:
	if ((pmg = pmgrpfree) == NULL) {
		int try;

		/*
		 * No SW pmg's free, have to take one from someone.
		 * Take from address spaces with no ctx first.
		 */
		pmg = pmgrphand;
		for (try = 1; /* empty */; try++) {
			do {
				pmg++;
				if (pmg == pmgrpsNPMGRPS) {
					/*
					 * Wrap around and skip kernels pmgs.
					 */
					pmg = pmgrpmin;
				}

				if (pmg->pmg_lock == 0 &&
				    pmg->pmg_keepcnt == 0) {

					/*
					 * On the first try, only take a pmg
					 * from an address space with no ctx.
					 */
					if (try == 1 &&
					    pmg->pmg_as->a_hat.hat_ctx != NULL)
						continue;
#ifdef SUNDBE
                                        /*
                                         * Don't steal pmeg with stack
                                         */
                                        if (try == 1 &&
                                            pmg->pmg_base ==
                                            (addr_t)KERNELBASE - PMGRPSIZE)
                                                continue;
#endif /* SUNDBE */
					/*
					 * Found a candidate, free
					 * it up and try again.
					 */

					pmg->pmg_keepcnt++;
					hat_pmgfree(pmg);
					pmgrphand = pmg;
					goto top;
				}
			} while (pmg != pmgrphand);
			/*
			 * Give up after 2 tries.
			 */
			if (try >= 2) {
				panic("hat_pmgalloc out of hat");
			}
		}
	}

	pmgrpfree = pmg->pmg_next;	/* take it off the free list */
	pmg->pmg_lock = 1;
	pmg->pmg_keepcnt = 1;

	hat_pmglink(pmg, as, addr);
	pmg->pmg_lock = 0;

	(void) splx(s);

	return (pmg);
}

/*
 * Load SW pmg in HW
 */
static void
hat_pmgload(swpmg)
	struct pmgrp		*swpmg;
{
	register struct as	*as;
	register struct pmgrp	*pmg = (struct pmgrp *)NULL;
	register struct hwpmg	*hwpmg;
	int			s;
	int			pass;

	if (swpmg->pmg_mapped) {
		ASSERT(swpmg->pmg_num != PMGNUM_SW);
		goto locksmg;
	}

	as = swpmg->pmg_as;

	/*
	 * Loaded, but not mapped.
	 */
	if (swpmg->pmg_num != PMGNUM_SW) {
		ASSERT(as == &kas || mmu_getctx()->c_as == as);
		hat_pmgmap(swpmg);
		goto locksmg;
	}

	hat_setup(as);
	s = splvm();

	if ((hwpmg = hwpmgfree) != NULL) {
		vmhatstat.vh_pmgldfree++;
		goto found_free_hwpmg;
	}

	/*
	 * The strategy for stealing a HW pmeg:
	 * We make 3 passes over the array of HW pmegs (starting at hwpmghand)
	 * and will take the pmeg if:
	 *
	 * Pass 1. pmeg does not require VAC flush
	 *    (we clean all pmegs between Pass 1 and Pass 2).
	 * Pass 2. Take any unlocked pmeg. In pass 2, we will take
	 *    even a dirty pmeg because it may be an unused kernel pmeg.
	 */
	hwpmg = hwpmghand;
	for (pass = 1; pass < 3; pass++) {
		struct ctx	*ctx;

		do {
			hwpmg++;
			if (hwpmg == hwpmgsNHWPMGS) {
				/*
				 * Wrap around and skip kernel pmgs.
				 */
				hwpmg = hwpmgmin;
			}

			pmg = hwpmg->hwp_pmgrp;
			ASSERT(pmg != NULL);
			ASSERT(pmg->pmg_num != PMGNUM_SW);

			/*
			 * Skip locked and kept pmg's
			 */
			if (pmg->pmg_lock || pmg->pmg_keepcnt > 0) {
				continue;
			}

			if (pass == 1 &&
			    (ctx = pmg->pmg_as->a_hat.hat_ctx) != NULL &&
			    !ctx->c_clean && pmg->pmg_mapped)
				continue;

			if (pass == 1) {
				if (ctx == NULL)
					vmhatstat.vh_pmgldnoctx++;
				else if (ctx->c_clean)
					vmhatstat.vh_pmgldcleanctx++;
				else
					vmhatstat.vh_pmgldnomap++;
			}

			/*
			 * Keep the pmg until hat_pmgswapptes().
			 */
			pmg->pmg_keepcnt++;
			hat_pmgunload(pmg, PTESFLAG_SKIP);
			ASSERT(hwpmg = hwpmgfree);
			hwpmghand = hwpmg;
			goto found_free_hwpmg;

		} while (hwpmg != hwpmghand);

		if (pass == 1) {

			/*
			 * We haven't found a pmg that does not require
			 * VAC flush. Clear all pmg's now by flushing
			 * all contexts.
			 */
			vac_flushallctx();
			hat_unmap_aspmgs(as);
			vmhatstat.vh_pmgldflush++;
		}
	}
	panic("hat_pmgload: failed after two  passes");

found_free_hwpmg:
	hat_clrcleanbit();
	swpmg->pmg_num = (hwpmg - hwpmgs);
	hwpmg->hwp_pmgrp = swpmg;
	hwpmgfree = hwpmg->hwp_next;	/* take it off the free list */

	ASSERT(swpmg->pmg_as == &kas || mmu_getctx()->c_as == swpmg->pmg_as);

#ifdef MMU_3LEVEL
	if (mmu_3level)
		hat_smgalloc(as, swpmg->pmg_base, swpmg);
#endif MMU_3LEVEL

	/* We are already at splvm() */
	mmu_setpmg(swpmg->pmg_base, swpmg);
	swpmg->pmg_mapped = 1;

	/*
	 * Load all valid SW PTE's in MMU.
	 *
	 */
	if (pmg == (struct pmgrp *)NULL)
		hat_pmgloadptes(swpmg->pmg_base, swpmg->pmg_pte);
	else {
		hat_pmgswapptes(swpmg->pmg_base, swpmg->pmg_pte, pmg->pmg_pte);
		pmg->pmg_keepcnt--;
	}

	as->a_hat.hat_pmgldcnt++;    /* incr. number of HW pmegs for this as */

	(void) splx(s);
	return;

locksmg:
#ifdef MMU_3LEVEL
	if (mmu_3level) {
		/*
		 * Callers to hat_pmgload assume that hat_pmgload keeps smg.
		 */
		s = splvm();
		hat_pmgtosmg(swpmg)->smg_keepcnt++;
		(void)splx(s);
	}
#endif MMU_3LEVEL
	return;
}

/*
 * Map a pmgrp in segment map.
 */
static void
hat_pmgmap(pmg)
	struct pmgrp	*pmg;
{
	int		s;

	if (pmg->pmg_num != PMGNUM_SW && !pmg->pmg_mapped)  {
		s = splvm();
#ifdef MMU_3LEVEL
		if (mmu_3level) {
			hat_smgalloc(pmg->pmg_as, pmg->pmg_base, pmg);
			hat_pmgtosmg(pmg)->smg_keepcnt--;
		}
#endif MMU_3LEVEL
		mmu_setpmg(pmg->pmg_base, pmg);
		pmg->pmg_mapped = 1;
		(void) splx(s);
	}
}

/*
 * Free the specified SW pmgrp.	 This is done by calling hat_pteunload
 * on all the pme's to process all the referenced and modified bits
 * and to invalidate the pme.  If the hat containing this pmg currently
 * has a ctx, then invalidate that mapping.  Finally we unlink the
 * the pmgrp from the hat pmgrp list and put it on the free list.
 * pmg should be kept (once) when this routine is called.
 */
static void
hat_pmgfree(pmg)
	register struct pmgrp *pmg;
{
	register struct pment	*pme = pmg->pmg_pme;
	register struct as	*as;
	register int		tcnt;
	int			s;

	ASSERT(pmg->pmg_keepcnt == 1);
	ASSERT(pmg->pmg_as != NULL);

	if (hat_pmgisloaded(pmg))
		hat_pmgunload(pmg, PTESFLAG_UNLOAD);

	s = splvm();

	if ((as = pmg->pmg_as) != NULL) {
		for (tcnt = 0; tcnt < NPMENTPERPMGRP; tcnt++, pme++) {
			if (pme->pme_valid)
				hat_pteunload(pmg, pme, (addr_t)NULL,
				    HAT_RMSYNC);
		}
		if (as->a_hat.hat_pmgrps == pmg) {
			as->a_hat.hat_pmgrps = pmg->pmg_next;
			if (pmg->pmg_next)
				pmg->pmg_next->pmg_prev = NULL;
		} else {
			pmg->pmg_prev->pmg_next = pmg->pmg_next;
			if (pmg->pmg_next)
				pmg->pmg_next->pmg_prev = pmg->pmg_prev;
		}
		pmg->pmg_as = NULL;
		pmg->pmg_next = pmg->pmg_prev = NULL;
	}

	pmg->pmg_keepcnt--;
	pmg->pmg_next = pmgrpfree;
	pmgrpfree = pmg;

	(void) splx(s);
}

static void
hat_pmgunload(pmg, ptesflag)
	struct pmgrp	*pmg;
	enum ptesflag	ptesflag;
{
	addr_t		a;
	struct pte	*ppte = pmg->pmg_pte;
	struct hwpmg	*hwpmg;
	struct ctx	*ctx, *ctxsav;
	int		s;

	ASSERT(pmg->pmg_num != PMGNUM_SW);

	s = splvm();			/* using SEGTEMP */

#ifdef notdef				/* 3 level mmu */
	ASSERT(pmg->pmg_as->a_hat.hat_ctx != NULL ||  !pmg->pmg_mapped);
#endif notdef

	/*
	 * Unmap pmg from segment map.
	 */
	if (pmg->pmg_mapped) {

#ifdef MMU_3LEVEL
		if (mmu_3level) {
			ASSERT(pmg->pmg_sme != NULL);
		}
#endif MMU_3LEVEL

		if ((ctx = pmg->pmg_as->a_hat.hat_ctx) != NULL) {
			ctxsav = mmu_getctx();
			if (ctxsav != ctx)
				mmu_setctx(ctx);

			if (!ctx->c_clean)
				vac_segflush(pmg->pmg_base);

			mmu_pmginval(pmg->pmg_base);

			if (ctxsav != ctx)
				mmu_setctx(ctxsav);
		} else {
#ifdef MMU_3LEVEL
			if (mmu_3level) {
				/*
				 * We have to use REGTEMP to map the smg.
				 * Note that REGTEMP may be used only in kctx.
				 */

				struct smgrp	*smg;

				ctxsav = mmu_getctx();
				if (ctxsav != kctx)
					mmu_setctx(kctx);

				smg = &smgrps[(pmg->pmg_sme - sments)
				    >> NSMENTPERSMGRPSHIFT];
				mmu_setsmg(REGTEMP, smg);
				mmu_pmginval(REGTEMP +
				    ((u_int)pmg->pmg_base & SMGRPOFFSET));
				mmu_smginval(REGTEMP);

				if (ctxsav != kctx)
					mmu_setctx(ctxsav);
			} else
				panic("hat_pmgunload");
#else  MMU_3LEVEL
			panic("hat_pmgunload");
#endif MMU_3LEVEL
		}
		pmg->pmg_mapped = 0;

#ifdef MMU_3LEVEL
		if (mmu_3level) {
			pmg->pmg_sme->sme_valid = 0;
			pmg->pmg_sme->sme_pmg = (struct pmgrp *)NULL;
			pmg->pmg_sme = (struct sment *)NULL;
		}
#endif MMU_3LEVEL
	}

	ASSERT(!pmg->pmg_mapped);

	if (ptesflag == PTESFLAG_UNLOAD) {
		/*
		 * Unload all valid SW PTE's in MMU.
		 *
		 */
		map_setsgmap(SEGTEMP, pmg->pmg_num);
		a = SEGTEMP;
		hat_pmgunloadptes(a, ppte);
		map_setsgmap(SEGTEMP, PMGRP_INVALID);
	}

	pmg->pmg_as->a_hat.hat_pmgldcnt--;

	/*
	 * Put hwpmg in HW pmg free list
	 */
	hwpmg = &hwpmgs[pmg->pmg_num];
	hwpmg->hwp_next = hwpmgfree;
	hwpmgfree = hwpmg;
	hwpmg->hwp_pmgrp = NULL;

	pmg->pmg_num = PMGNUM_SW;

	(void) splx(s);
}

/*
 * Add the specified pmgrp to the list of pmgrp's allocated to
 * the specified address space.	 We hang pmgrps off the address
 * space and not the ctx so that we can keep them around even if
 * we don't have a hardware context.
 */
static void
hat_pmglink(pmg, as, addr)
	register struct pmgrp *pmg;
	struct as *as;
	addr_t addr;
{
	int s;

	ASSERT(pmg->pmg_keepcnt > 0);

	s = splvm();
	pmg->pmg_as = as;
	pmg->pmg_next = as->a_hat.hat_pmgrps;
	if (pmg->pmg_next)
		pmg->pmg_next->pmg_prev = pmg;
	pmg->pmg_prev = NULL;
	as->a_hat.hat_pmgrps = pmg;
	pmg->pmg_base = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1));
	(void) splx(s);
}

#ifdef notdef
/*
 * Called when the wrong pmeg is read out from the MMU.
 * Most likely, this is a down rev Carrera CPU board that
 * is missing some pullup registers on the segment RAMs.
 * The ECO for the needed Carrera CPU board fix is 2550.
 */
static void
hat_wrongpmg(pmg, addr, as)
	struct pmgrp *pmg;
	addr_t addr;
	struct as *as;
{
#ifndef sun2
	register struct pmgrp *pmgp;

	addr = (addr_t)((u_int)addr & ~(PMGRPSIZE - 1));
	printf("hardware claims page map entry group number is 0x%x\n",
	    pmg->pmg_num);
	if (addr >= (addr_t)KERNELBASE) {
		/*
		 * Scan the kernel address space for the correct pmg.
		 */
		for (pmgp = kas.a_hat.hat_pmgrps; pmgp != NULL;
		    pmgp = pmgp->pmg_next) {
			if (pmgp->pmg_base == addr)
				break;
		}
	} else {
		for (pmgp = as->a_hat.hat_pmgrps; pmgp != NULL;
		    pmgp = pmgp->pmg_next) {
			if (pmgp->pmg_base == addr)
				break;
		}
	}

	if (pmgp != NULL) {
		printf("  software says page map entry group number is 0x%x\n",
		    pmgp->pmg_num);
	}
#endif
	printf("pmg = %x, pmg base = %x, addr = %x\n", pmg, pmg->pmg_base,
	    addr);
	panic("wrong pmg");
	/* NOTREACHED */
}
#endif notdef

static void
hat_xfree(as)
	register struct as *as;
{
	register int s;
	register struct pmgrp *pmg;

	/*
	 * Free pmgrp's.
	 */
	s = splvm();
	while (pmg = as->a_hat.hat_pmgrps) {
		pmg->pmg_keepcnt++;
		(void) splx(s);
		hat_pmgfree(pmg);
		s = splvm();
	}
	(void) splx(s);

#ifdef MMU_3LEVEL
	/*
	 * If three-level mmu, free smgrp's.
	 */
	if (mmu_3level) {
		register struct smgrp *smg;

		s = splvm();
		while (smg = as->a_hat.hat_smgrps) {
			smg->smg_keepcnt++;
			(void) splx(s);
			hat_smgfree(smg);
			s = splvm();
		}
		(void) splx(s);
	}
#endif MMU_3LEVEL

	ASSERT(as->a_hat.hat_pmgldcnt == 0);
}


/*
 * Find a SW page table.
 *
 * The returned page table is 'kept'.
 *
 * We optimize the search by using a look-aside buffer of mappings
 * from <as, addr> to a pointer to the pmg structure. For each hashed
 * <as, addr> value, we store a pointer to the most recently found pmgrp.
 * 1009 is a prime that was found to be optimal.
 */

#define	hash_asaddr(as, addr) \
	((((u_int)as >> 2) * 1009 + ((u_int)addr >> PMGRPSHIFT)) % (NPMGHASH-1))

static struct pmgrp *
hat_pmgfind(addr, as)
	addr_t		addr;
	struct as	*as;
{
	struct pmgrp	*pmg;
	addr_t		pmgaddr = hat_pmgbase(addr);
	int		s;
	int		hashind;

	hashind = hash_asaddr(as, pmgaddr);

	s = splvm();

	/*
	 * Check if pmg is in the lookaside buffer.
	 */
	if ((pmg = pmghash[hashind]) != NULL &&
	    pmg->pmg_as == as && pmg->pmg_base == pmgaddr) {
		pmg->pmg_keepcnt++;
		pmgfindstat.pf_hit++;
		goto out;
	}

	/*
	 * Exhaustive search of pmg's within the address space.
	 */
	for (pmg = as->a_hat.hat_pmgrps; pmg != NULL &&
	    pmg->pmg_base != pmgaddr; pmg = pmg->pmg_next)
		;

	if (pmg != NULL) {
		pmg->pmg_keepcnt++;
		pmgfindstat.pf_miss++;

		/*
		 * Write the <as, addr> -> pmg entry to look aside buffer
		 */
		pmghash[hashind] = pmg;
	} else {
		pmgfindstat.pf_notfound++;
	}

out:
	(void) splx(s);
	return (pmg);
}


/*
 * Clear running process's context clean bit.
 */
static void
hat_clrcleanbit()
{
	struct	proc	*p;
	struct	as	*a;
	struct	ctx	*c;

	if ((p = u.u_procp) != NULL && (a = p->p_as) != NULL &&
	    (c = a->a_hat.hat_ctx) != NULL) {
		c->c_clean = 0;
	}
}


#ifdef MMU_3LEVEL
/*
 * Code to handle allocation of smegs is cloned from the pmeg versions
 */

int getsmg_check = 1;

/*
 * This routine will return the smgrp structure for the given address
 * in the current ctx.	But unlike mmu_getsmg, this routine will protect
 * against the smgrp being lost by spl'ing and will return a kept smgrp
 * pointer.  The keepcnt should be decremented by the caller when it is
 * done looking at the smgrp contents.
 */
static struct smgrp *
hat_getsmg(addr)
	addr_t addr;
{
	int s;
	struct smgrp *smg;

	s = splvm();
	smg = mmu_getsmg(addr);
	smg->smg_keepcnt++;
	if (getsmg_check && smg != smgrp_invalid && smg->smg_base != 0 &&
	    smg->smg_base != (caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) {
		printf("hat_getsmg: addr=%x, smg=%x, smg base=%x\n",
		    addr, smg, smg->smg_base);
		call_debug("hat_getsmg");
	}
	(void) splx(s);
	return (smg);
}

/*
 * Free the specified smgrp.  This is done by calling hat_pmgfree
 * on all the sme's to invalidate the smgrp.  If the hat containing
 * this smg currently has a ctx, then invalidate that mapping.
 * Finally we unlink the the smgrp from the hat smgrp list and
 * put it on the free list.
 * smg should be kept (once) when this routine is called.
 */
static void
hat_smgfree(smg)
	register struct smgrp *smg;
{
	register struct sment *sme;
	register struct pmgrp *pmg;
	register struct as *as;
	register int tcnt;
	struct ctx *ctx, *ctxsav, *curctx;
	int s;

	ASSERT(smg->smg_keepcnt == 1);

	if ((as = smg->smg_as) != NULL) {

		/* XXX - we should simplify switching between ctx's */
		ctxsav = curctx = mmu_getctx();
		if ((ctx = smg->smg_as->a_hat.hat_ctx) != NULL &&
		    !ctx->c_clean) {
			mmu_setctx(curctx = ctx);
			vac_rgnflush(smg->smg_base);
		}

		if (curctx != kctx)
			mmu_setctx(curctx = kctx);

#if	defined(SUNDBE) && defined(sun4)
                if (ispseudo_smgrp(smg))
                        goto skip_if_pseudo;
#endif  ISM

		/*
		 * XXX - we may optimize by not using kctx if smg has ctx.
		 */
		s = splvm();
		ASSERT(mmu_getsmg(REGTEMP) == smgrp_invalid);
		mmu_setsmg(REGTEMP, smg);
		sme = smg->smg_sme;
		for (tcnt = 0; tcnt < NSMENTPERSMGRP; tcnt++) {
			if (sme->sme_valid) {
				pmg = sme->sme_pmg;
				ASSERT(((u_int)pmg->pmg_base & SMGRPOFFSET) <
				    SMGRPSIZE);
				mmu_pmginval(REGTEMP +
				    ((u_int)pmg->pmg_base & SMGRPOFFSET));
				pmg->pmg_mapped = 0;
				sme->sme_valid = 0;
				pmg->pmg_sme = (struct sment *)NULL;
				sme->sme_pmg = (struct pmgrp *)NULL;
			}
			sme++;
		}
		mmu_smginval(REGTEMP);
		(void) splx(s);

#if	defined(SUNDBE) && defined(sun4)
        skip_if_pseudo:
#endif  ISM

		if ((ctx = smg->smg_as->a_hat.hat_ctx) != NULL) {
			if (ctx != curctx)
				mmu_setctx(curctx = ctx);
			mmu_smginval(smg->smg_base);
		}
		if (ctxsav != curctx)
			mmu_setctx(ctxsav);

		if (as->a_hat.hat_smgrps == smg) {
			as->a_hat.hat_smgrps = smg->smg_next;
			if (smg->smg_next)
				smg->smg_next->smg_prev = NULL;
		} else {
			smg->smg_prev->smg_next = smg->smg_next;
			if (smg->smg_next)
				smg->smg_next->smg_prev = smg->smg_prev;
		}
		smg->smg_as = NULL;
		smg->smg_next = smg->smg_prev = NULL;
	}

#if	defined(SUNDBE) && defined(sun4)
        if (ispseudo_smgrp(smg)) {
                kmem_free((char *)smg, sizeof(*smg));
                return;
        }
#endif  ISM

	s = splvm();
	smg->smg_keepcnt--;
	smg->smg_next = smgrpfree;
	smgrpfree = smg;
	(void) splx(s);
}

/*
 * Add the specified smgrp to the list of smgrp's allocated to
 * the specified address space.	 We hang smgrps off the address
 * space and not the ctx so that we can keep them around even if
 * we don't have a hardware context.
 */
static void
hat_smglink(smg, as, addr)
	register struct smgrp *smg;
	struct as *as;
	addr_t addr;
{
	int s;

	s = splvm();
	smg->smg_as = as;
	smg->smg_next = as->a_hat.hat_smgrps;
	if (smg->smg_next)
		smg->smg_next->smg_prev = smg;
	smg->smg_prev = NULL;
	as->a_hat.hat_smgrps = smg;
	smg->smg_base = (addr_t)((u_int)addr & ~(SMGRPSIZE - 1));
	(void) splx(s);
}

void
hat_smgreserve(seg, addr)
	struct seg *seg;
	addr_t addr;
{
	register struct as	*as = seg->s_as;
	register struct smgrp	*smg;
	register struct pmgrp	*pmg;
	struct sment		*sme;

	if (!mmu_3level)
		return;

	smg = hat_getsmg(addr);		/* keeps the smg for us */

	/* DEBUGGING */
	if (smg == smgrp_invalid)
		printf("hat_smgreserve: addr 0x%x invalid smg\n", addr);

	if (as->a_hat.hat_ctx == NULL ||
	    (smg->smg_as != NULL && smg->smg_as != as))
		panic("hat_smgreserve");

	if (smg != smgrp_invalid && smg->smg_as == NULL)
		hat_smglink(smg, as, addr);

	smg->smg_lock = 1;	/* if its being reserved, also lock it */

	/*
	 * Set up sme structure.
	 */
	pmg = mmu_getpmg(addr);
	sme = &(smg->smg_sme[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]);

	ASSERT(!sme->sme_valid || sme == pmg->pmg_sme);

	if (!sme->sme_valid) {
		pmg = mmu_getpmg(addr);
		ASSERT(pmg->pmg_sme == NULL);
		sme->sme_pmg  = pmg;
		sme->sme_valid = 1;
		pmg->pmg_sme = sme;
	}
}

/*
 * Initialize all the unlocked smgs to have invalid sme's
 * and add them to the free list.
 * This routine is called during startup after all the
 * kernel smgs have been reserved.  This routine will
 * also set the smgrpmin variable for use in hat_smgalloc.
 *
 * REGTEMP is only used here so we temporarily steal
 * the region before KERNELBASE and mark it invalid
 * when we are finished.
 */
void
hat_smginit()
{
	register struct smgrp *smg;
	register addr_t addr;
	int s;

	if (!mmu_3level)
		return;

	s = splvm();

	for (smg = smgrps; smg < smgrpsNSMGRPS; smg++) {
		if (smg->smg_lock || smg->smg_keepcnt != 0)
			continue;

		if (smgrpmin == NULL)
			smgrpmin = smg;

		mmu_settsmg((addr_t)REGTEMP, smg);

		for (addr = (addr_t)REGTEMP;
		    addr < (addr_t)(REGTEMP + SMGRPSIZE);
		    addr += PMGRPSIZE) {
			mmu_pmginval(addr);
		}

		smg->smg_next = smgrpfree;
		smgrpfree = smg;
	}
	smgrphand = smgrpmin;

	mmu_smginval((addr_t)REGTEMP);

	(void) splx(s);
}

/*
 * Allocate a smgrp to map the specified address.
 * Returns w/ the keepcnt incremented for the particular smgrp used.
 * First look for something in the free list and then steal one
 * that is currently being used.
 */
static void
hat_smgalloc(as, addr, pmg)
	struct as		*as;
	addr_t			addr;
	struct pmgrp		*pmg;
{
	register struct smgrp	*smg;
	register struct sment	*sme;
	int			s;
	struct ctx		*ctx;

	if (!mmu_3level)
		return;

	s = splvm();
	if ((smg = mmu_getsmg(addr)) != smgrp_invalid) {
		smg->smg_keepcnt++;
		if (getsmg_check && smg->smg_base !=
		    (caddr_t)((u_int)addr & ~(SMGRPSIZE - 1))) {
			printf("hat_smgalloc: addr=%x, smg=%x, smg base=%x\n",
			    addr, smg, smg->smg_base);
			call_debug("hat_smgalloc");
		}
		sme = &(smg->smg_sme
			[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]);
		sme->sme_pmg  = pmg;
		sme->sme_valid = 1;
		pmg->pmg_sme = sme;
		(void) splx(s);
		return;
	}

	/*
	 * No smgrp allocated to this address space contains the pme,
	 * allocate a new smg for this address space.  First, try
	 * the free list.
	 */
	if (smgrpfree != NULL)
		vmhatstat.vh_smgfree++;

top:
	if ((smg = smgrpfree) == NULL) {
		int try;

		/*
		 * No smg's free, have to take one from someone.
		 * Take from address spaces with no ctx first.
		 * XXX - could do it with just one pass.
		 */
		smg = smgrphand;
		try = 1;
		for (;;) {
			do {
				smg++;
				if (smg == smgrpsNSMGRPS) {
					if (smgrpmin) {
						/* skip some kernel smgrps */
						smg = smgrpmin;
					} else {
						smg = smgrps;
					}
				}

				if (smg->smg_lock == 0 &&
				    smg->smg_keepcnt == 0) {
					/*
					 * On the first try, only take a smg
					 * from an address space with no ctx.
					 */
					if (try < 3 &&
					    (ctx = smg->smg_as->a_hat.hat_ctx)
						!= NULL &&
					    !ctx->c_clean)
						continue;

					/*
					 * Found a candidate, free
					 * it up and try again.
					 */
					if (try == 1) {
						if (ctx == NULL)
						    vmhatstat.vh_smgnoctx++;
						else
						    vmhatstat.vh_smgcleanctx++;
					}

					smg->smg_keepcnt++;
					hat_smgfree(smg);
					smgrphand = smg;
					goto top;
				}
			} while (smg != smgrphand);

			if (try == 1) {
				/*
				 * We were not able to find a segment that
				 * would not require flushing.
				 *
				 * Flush all user VAC lines and try again.
				 */
				vac_flushallctx();
				vmhatstat.vh_smgflush++;
			}

			/*
			 * Give up after 2 tries.
			 */
			if (try >= 3) {
				rm_outofhat();
			}
			try++;
		}
	}
	hat_clrcleanbit();
	smgrpfree = smg->smg_next;	/* take it off the free list */
	smg->smg_lock = 1;
	smg->smg_keepcnt = 1;

	hat_smglink(smg, as, addr);

	sme = &(smg->smg_sme[(mmu_btop(addr-smg->smg_base)/NPMENTPERPMGRP)]);
	sme->sme_pmg  = pmg;
	sme->sme_valid = 1;
	pmg->pmg_sme = sme;

	(void) splx(s);

	mmu_setsmg(smg->smg_base, smg);

	smg->smg_lock = 0;

	return;
}


hat_smgcheck_keepcnt(smg)
	struct smgrp	*smg;
{
	int	s;

	/*
	 * smgrp_invalid has smg_keepcnt == 1, but it has no pmgs.
	 */
	if (smg == smgrp_invalid)
		return;

	s = splvm();
	{
		register struct sment *sme;
		register struct pmgrp *pmg;
		int		tcnt;
		int		keepcnt = 0;

		sme = smg->smg_sme;
		for (tcnt = 0; tcnt < NSMENTPERSMGRP; tcnt++) {
			if (sme->sme_valid) {
				pmg = sme->sme_pmg;
				keepcnt += pmg->pmg_keepcnt;
			}
			sme++;
		}

		if (keepcnt != smg->smg_keepcnt) {
			printf(
	"check_smgkeepcnt: keepcnt %d smg_keepcnt %d base 0x%x smg# %d\n",
			    keepcnt, smg->smg_keepcnt,
			    smg->smg_base, smg->smg_num);
			panic("possibly double mapped pmgrp");
		}

	}
	(void) splx(s);
}

hat_smgcheck_keepcntall()
{
	register struct smgrp *smg;

	for (smg = smgrps; smg < smgrpsNSMGRPS; smg++)
		hat_smgcheck_keepcnt(smg);
}
#endif MMU_3LEVEL

/*
 * Unmap all HW pmegs (except for locked pmegs) held by an address space.
 */
static void
hat_unmap_aspmgs(as)
	struct as	*as;
{
	struct pmgrp	*pmg;

	/*
	 * Don't do anything if this is kernel address space.
	 */
	if (as == &kas)
		return;

	/*
	 * We don't unmap pmegs if the address space holds only a
	 * "small number" of HW pmegs. We expect that future pmeg allocations
	 * will steal HW pmegs from other address spaces with a clean
	 * or no context.
	 *
	 * If the address space holds less than 'hatunmaplimit' percent
	 * of the total number of HW pmegs we don't unmap. hatunmaplimit
	 * is set to 30% and may be patched.
	 */

	if (as->a_hat.hat_pmgldcnt * 100 < NPMGRPS * hatunmaplimit)
		return;

	ASSERT(mmu_getctx()->c_as == as);

	for (pmg = as->a_hat.hat_pmgrps; pmg != NULL; pmg = pmg->pmg_next) {
		if (pmg->pmg_mapped && pmg->pmg_keepcnt == 0) {
			ASSERT(pmg->pmg_num != PMGNUM_SW);
			ASSERT(!pmg->pmg_lock);
			mmu_pmginval(pmg->pmg_base);
			pmg->pmg_mapped = 0;
		}
	}
}

#ifdef notdef
/* XXX - temporary (and quite dirty) assertion function */
assertpmgmapped(pmg, msg)
	struct pmgrp	*pmg;
	char		*msg;
{
	addr_t		a = pmg->pmg_base;

	if (pmg == NULL) {
		printf("%s addr 0x%x\n", msg, pmg->pmg_base);
		ASSERT(pmg != NULL);
	}

	if (pmg->pmg_as->a_hat.hat_ctx == NULL) {
#ifdef notdef				/* 3 level mmu */
		if (pmg->pmg_mapped) {
			printf("%s addr 0x%x\n", msg, pmg->pmg_base);
			ASSERT(!pmg->pmg_mapped);
		}
#endif notdef
		a = SEGTEMP;
	} else {
		if (!pmg->pmg_mapped) {
			printf("%s addr 0x%x\n", msg, pmg->pmg_base);
			ASSERT(pmg->pmg_mapped);
		}
	}

	if (pmg->pmg_num == PMGNUM_SW) {
		printf("%s addr 0x%x\n", msg, pmg->pmg_base);
		ASSERT(pmg->pmg_num != PMGNUM_SW);
	}

	if (map_getsgmap(a) == PMGRP_INVALID) {
		printf("%s addr 0x%x\n", msg, pmg->pmg_base);
		/* Force traceback */
		printf("pmg_num %d, pmg_keepcnt %d, ctx 0x%x\n",
		    pmg->pmg_num, pmg->pmg_keepcnt,
		    pmg->pmg_as->a_hat.hat_ctx);
		ASSERT(map_getsgmap(a) != PMGRP_INVALID);
	}
}
#endif notdef

#if	defined(SUNDBE) && defined(sun4)
void
hat_ism_lockpmgs(as)
        struct as       *as;
{
}

void
hat_ism_unlockpmgs(as)
        struct as       *as;
{
}

void
hat_intimate_map(seg, ismseg)
        struct seg      *seg;
        struct seg      *ismseg;
{
        struct as       *as = seg->s_as;
        struct as       *ismas = ismseg->s_as;
        struct smgrp    *smg;
        struct smgrp    *smg2;
        int             offset = seg->s_base - ismseg->s_base;
        struct as       *saveas = mmu_getctx()->c_as;

        hat_setup(seg->s_as);

        ASSERT(((int)seg->s_base & SMGRPOFFSET) == 0); /* SMEG alignment */

        for (smg = ismas->a_hat.hat_smgrps; smg != NULL; smg = smg->smg_next) {

                ASSERT(smg->smg_keepcnt > 0);

                /* XXX  use kmem_fast_alloc() */
                smg2 = (struct smgrp*) kmem_alloc(sizeof(*smg2));

                *smg2 = *smg;        /* this copies smg->smg_num */

                smg2->smg_num = smg->smg_num;
                smg2->smg_sme = smg->smg_sme;
                smg2->smg_keepcnt = 1;       /* hat_smglink() asserts this */

                /*
                 * Link pseudo smeg into process's hat_smgrps list
                 */
                hat_smglink(smg2, as, smg->smg_base + offset);
                smg2->smg_keepcnt = 0;       /* hat_smgfree() asserts this */
                mmu_setsmg(smg2->smg_base, smg2);
        }

        hat_setup(saveas);
}

void
hat_intimate_unmap(seg)
        struct seg      *seg;
{
        struct as       *as = seg->s_as;
        addr_t          addr = seg->s_base;
        u_int           len = seg->s_size;
        addr_t          eaddr = addr + len;
        struct smgrp    *smg;
        int             s;

        s = splvm();
        smg = as->a_hat.hat_smgrps;
        while (smg != NULL) {
                if (smg->smg_base >= addr && smg->smg_base < eaddr) {
                        (void) splx(s);
                        /*
                         * Free the pseudo pmeg.
                         */
                        ASSERT(ispseudo_smgrp(smg));
                        smg->smg_keepcnt++;  /* hat_smgfree asserts this */
                        hat_smgfree(smg);
                        s = splvm();
                        /*
                         * Go over the entire list again because the list
                         * could have been shuffled.
                         */
                        smg = as->a_hat.hat_smgrps;
                }
                else
                        smg = smg->smg_next;
        }
        (void) splx(s);
}

/*
 * Return 1 if this is a pseudo pmeg
 */
static int
ispseudo_smgrp(smgrp)
        struct smgrp    *smgrp;
{
        return (!(smgrp >= smgrps && smgrp <  smgrpsNSMGRPS));
}
#endif  ISM