Init

2021-10-11 18:37:13 -03:00
commit ff309bfe1c
14130 changed files with 3180272 additions and 0 deletions
--- a/sys/vm/Makefile
+++ b/sys/vm/Makefile
@@ -0,0 +1,13 @@
+#
+# @(#)Makefile 1.1 94/10/31 SMI
+#
+
+HFILES = anon.h as.h faultcode.h hat.h mp.h page.h pvn.h rm.h \
+	seg.h seg_dev.h seg_map.h seg_vn.h seg_u.h swap.h vpage.h
+
+HDIR=$(DESTDIR)/usr/include/vm
+
+install_h: $(HFILES) FRC
+	install -d -m 755 $(HDIR)
+	install -m 444 $(HFILES) $(HDIR)
+FRC:
--- a/sys/vm/anon.h
+++ b/sys/vm/anon.h
@@ -0,0 +1,60 @@
+/*	@(#)anon.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_anon_h
+#define	_vm_anon_h
+
+/*
+ * VM - Anonymous pages.
+ */
+
+/*
+ * Each page which is anonymous, either in memory or in swap,
+ * has an anon structure.  The structure's primary purpose is
+ * to hold a reference count so that we can detect when the last
+ * copy of a multiply-referenced copy-on-write page goes away.
+ * When on the free list, un.next gives the next anon structure
+ * in the list.  Otherwise, un.page is a ``hint'' which probably
+ * points to the current page.  This must be explicitly checked
+ * since the page can be moved underneath us.  This is simply
+ * an optimization to avoid having to look up each page when
+ * doing things like fork.
+ */
+struct anon {
+	int	an_refcnt;
+	union {
+		struct	page *an_page;	/* ``hint'' to the real page */
+		struct	anon *an_next;	/* free list pointer */
+	} un;
+};
+
+struct anoninfo {
+	u_int	ani_max;	/* maximum anon pages available */
+	u_int	ani_free;	/* number of anon pages currently free */
+	u_int	ani_resv;	/* number of anon pages reserved */
+};
+
+#ifdef KERNEL
+/*
+ * Flags for anon_private.
+ */
+#define	STEAL_PAGE	0x01	/* page can be stolen */
+#define	LOCK_PAGE	0x02	/* page must be ``logically'' locked */
+
+extern	struct anoninfo anoninfo;
+
+struct	anon *anon_alloc();
+void	anon_dup(/* old, new, size */);
+void	anon_free(/* app, size */);
+int	anon_getpage(/* app, protp, pl, sz, seg, addr, rw, cred */);
+struct	page *anon_private(/* app, seg, addr, opp, oppflags */);
+struct	page *anon_zero(/* seg, addr, app */);
+void	anon_unloadmap(/* ap, ref, mod */);
+int	anon_resv(/* size */);
+void	anon_unresv(/* size */);
+#endif KERNEL
+
+#endif /*!_vm_anon_h*/
--- a/sys/vm/as.h
+++ b/sys/vm/as.h
@@ -0,0 +1,79 @@
+/*	@(#)as.h 1.1 94/10/31 SMI 	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_as_h
+#define	_vm_as_h
+
+#include <vm/faultcode.h>
+
+/*
+ * VM - Address spaces.
+ */
+
+/*
+ * Each address space consists of a list of sorted segments
+ * and machine dependent address translation information.
+ *
+ * All the hard work is in the segment drivers and the
+ * hardware address translation code.
+ */
+struct as {
+	u_int	a_lock: 1;
+	u_int	a_want: 1;
+	u_int	a_paglck: 1;	/* lock mappings into address space */
+	u_int	a_ski:  1;	/* enables recording of page info for ski */
+	u_int	a_hatcallback:  1; /* enables recording of page info for ski */
+	u_int	: 11;
+	u_short	a_keepcnt;	/* number of `keeps' */
+	struct	seg *a_segs;	/* segments in this address space */
+	struct	seg *a_seglast;	/* last segment hit on the address space */
+	int	a_rss;		/* memory claim for this address space */
+	struct	hat a_hat;	/* hardware address translation */
+};
+
+#ifdef KERNEL
+/*
+ * Types of failure for several various address space operations.
+ */
+enum as_res {
+	A_SUCCESS,		/* operation successful */
+	A_BADADDR,		/* illegal address encountered */
+	A_OPFAIL,		/* segment operation failure */
+	A_RESOURCE,		/* resource exhaustion */
+};
+
+/*
+ * Flags for as_hole.
+ */
+#define	AH_DIR		0x1	/* direction flag mask */
+#define	AH_LO		0x0	/* find lowest hole */
+#define	AH_HI		0x1	/* find highest hole */
+#define	AH_CONTAIN	0x2	/* hole must contain `addr' */
+
+/*
+ * Flags for as_hatsync
+ */
+#define	AHAT_UNLOAD	0x01	/* Translation being unloaded */
+
+struct	seg *as_segat(/* as, addr */);
+struct	as *as_alloc();
+void	as_free(/* as */);
+struct	as *as_dup(/* as */);
+enum	as_res as_addseg(/* as, seg */);
+faultcode_t as_fault(/* as, addr, size, type, rw */);
+faultcode_t as_faulta(/* as, addr, size */);
+enum	as_res as_setprot(/* as, addr, size, prot */);
+enum	as_res as_checkprot(/* as, addr, size, prot */);
+enum	as_res as_unmap(/* as, addr, size */);
+int	as_map(/* as, addr, size, crfp, crargsp */);
+enum	as_res as_hole(/* as, minlen, basep, lenp, flags, addr */);
+enum	as_res as_memory(/* as, addrp, sizep */);
+u_int	as_swapout(/* as */);
+enum	as_res as_incore(/* as, addr, size, vecp, sizep */);
+enum	as_res as_ctl(/* as, addr, size, func, arg */);
+void	as_hatsync(/* as, addr, ref, mod, flags */);
+#endif KERNEL
+#endif /*!_vm_as_h*/
--- a/sys/vm/dbx_vm.c
+++ b/sys/vm/dbx_vm.c
@@ -0,0 +1,30 @@
+#ifndef lint
+static	char sccsid[] = "@(#)dbx_vm.c 1.1 94/10/31 SMI";
+#endif
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+/*
+ * This file is optionally brought in by including a
+ * "psuedo-device dbx" line in the config file.  It is
+ * compiled using the "-g" flag to generate structure
+ * information which is used by dbx with the -k flag.
+ */
+
+#include <sys/param.h>
+
+#include <vm/hat.h>
+#include <vm/anon.h>
+#include <vm/as.h>
+#include <vm/mp.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/rm.h>
+#include <vm/seg.h>
+#include <vm/seg_dev.h>
+#include <vm/seg_map.h>
+#include <vm/seg_vn.h>
+#include <vm/swap.h>
+#include <vm/vpage.h>
--- a/sys/vm/faultcode.h
+++ b/sys/vm/faultcode.h
@@ -0,0 +1,33 @@
+/*	@(#)faultcode.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_faultcode_h
+#define	_vm_faultcode_h
+
+/*
+ * This file describes the "code" that is delivered during
+ * SIGBUS and SIGSEGV exceptions.  It also describes the data
+ * type returned by vm routines which handle faults.
+ *
+ * If FC_CODE(fc) == FC_OBJERR, then FC_ERRNO(fc) contains the errno value
+ * returned by the underlying object mapped at the fault address.
+ */
+#define	FC_HWERR	0x1	/* misc hardware error (e.g. bus timeout) */
+#define	FC_ALIGN	0x2	/* hardware alignment error */
+#define	FC_NOMAP	0x3	/* no mapping at the fault address */
+#define	FC_PROT		0x4	/* access exceeded current protections */
+#define	FC_OBJERR	0x5	/* underlying object returned errno value */
+
+#define	FC_MAKE_ERR(e)	(((e) << 8) | FC_OBJERR)
+
+#define	FC_CODE(fc)	((fc) & 0xff)
+#define	FC_ERRNO(fc)	((unsigned)(fc) >> 8)
+
+#ifndef LOCORE
+typedef	int	faultcode_t;	/* type returned by vm fault routines */
+#endif LOCORE
+
+#endif /*!_vm_faultcode_h*/
--- a/sys/vm/hat.h
+++ b/sys/vm/hat.h
@@ -0,0 +1,86 @@
+/*	@(#)hat.h  1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_hat_h
+#define	_vm_hat_h
+
+/*
+ * VM - Hardware Address Translation management.
+ *
+ * This file describes the machine independent interfaces to
+ * the hardware address translation management routines.  Other
+ * machine specific interfaces and structures are defined
+ * in <machine/vm_hat.h>.  The hat layer manages the address
+ * translation hardware as a cache driven by calls from the
+ * higher levels of the VM system.
+ */
+
+#include <machine/vm_hat.h>
+
+#ifdef KERNEL
+/*
+ * One time hat initialization
+ */
+void	hat_init();
+
+/*
+ * Operations on hat resources for an address space:
+ *	- initialize any needed hat structures for the address space
+ *	- free all hat resources now owned by this address space
+ *
+ * N.B. - The hat structure is guaranteed to be zeroed when created.
+ * The hat layer can choose to define hat_alloc as a macro to avoid
+ * a subroutine call if this is sufficient initialization.
+ */
+#ifndef hat_alloc
+void	hat_alloc(/* as */);
+#endif
+void	hat_free(/* as */);
+
+/*
+ * Operations on a named address with in a segment:
+ *	- load/lock the given page struct
+ *	- load/lock the given page frame number
+ *	- unlock the given address
+ *
+ * (Perhaps we need an interface to load several pages at once?)
+ */
+void	hat_memload(/* seg, addr, pp, prot, lock */);
+void	hat_devload(/* seg, addr, pf, prot, lock */);
+void	hat_unlock(/* seg, addr */);
+
+/*
+ * Operations over an address range:
+ *	- change protections
+ *	- change mapping to refer to a new segment
+ *	- unload mapping
+ */
+void	hat_chgprot(/* seg, addr, len, prot */);
+void	hat_newseg(/* seg, addr, len, nseg */);
+void	hat_unload(/* seg, addr, len */);
+
+/*
+ * Operations that work on all active translation for a given page:
+ *	- unload all translations to page
+ *	- get hw stats from hardware into page struct and reset hw stats
+ */
+void	hat_pageunload(/* pp */);
+void	hat_pagesync(/* pp */);
+
+/*
+ * Operations that return physical page numbers (ie - used by mapin):
+ *	- return the pfn for kernel virtual address
+ *	- return the pfn for arbitrary virtual address
+ */
+u_int	hat_getkpfnum(/* addr */);
+/*
+ * XXX - This one is not yet implemented - not yet needed
+ * u_int hat_getpfnum(as, addr);
+ */
+
+#endif KERNEL
+
+#endif /*!_vm_hat_h*/
--- a/sys/vm/mp.h
+++ b/sys/vm/mp.h
@@ -0,0 +1,39 @@
+/*	@(#)mp.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_mp_h
+#define	_vm_mp_h
+
+/*
+ * VM - multiprocessor/ing support.
+ *
+ * Currently the kmon_enter() / kmon_exit() pair implements a
+ * simple monitor for objects protected by the appropriate lock.
+ * The kcv_wait() / kcv_broadcast pait implements a simple
+ * condition variable which can be used for `sleeping'
+ * and `waking' inside a monitor if some resource
+ * is needed which is not available.
+ */
+
+typedef struct kmon_t {
+	u_int	dummy;
+} kmon_t;
+
+
+#define	lock_init(lk)	(lk)->dummy = 0
+
+#ifndef KMON_DEBUG
+#define	kmon_enter(a)
+#define	kmon_exit(a)
+#define	kcv_wait(lk, cond)	(void) sleep(cond, PSWP+1)
+#define	kcv_broadcast(lk, cond)	wakeup(cond)
+#else
+void	kmon_enter(/* lk */);
+void	kmon_exit(/* lk */);
+void	kcv_wait(/* lk, cond */);
+void	kcv_broadcast(/* lk, cond */);
+#endif /*!KMON_DEBUG*/
+#endif /*!_vm_mp_h*/
--- a/sys/vm/page.h
+++ b/sys/vm/page.h
@@ -0,0 +1,166 @@
+/*	@(#)page.h 1.1 94/10/31 SMI 	*/
+
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_page_h
+#define	_vm_page_h
+/*
+ * VM - Ram pages.
+ *
+ * Each physical page has a page structure, which is used to maintain
+ * these pages as a cache.  A page can be found via a hashed lookup
+ * based on the [vp, offset].  If a page has an [vp, offset] identity,
+ * then it is entered on a doubly linked circular list off the
+ * vnode using the vpnext/vpprev pointers.   If the p_free bit
+ * is on, then the page is also on a doubly linked circular free
+ * list using next/prev pointers.  If the p_intrans bit is on,
+ * then the page is currently being read in or written back.
+ * In this case, the next/prev pointers are used to link the
+ * pages together for a consecutive IO request.  If the page
+ * is in transit and the the page is coming in (pagein), then you
+ * must wait for the IO to complete before you can attach to the page.
+ *
+ */
+struct page {
+	u_int	p_lock: 1,		/* locked for name manipulation */
+		p_want: 1,		/* page wanted */
+		p_free: 1,		/* on free list */
+		p_intrans: 1,		/* data for [vp, offset] intransit */
+		p_gone: 1,		/* page has been released */
+		p_mod: 1,		/* software copy of modified bit */
+		p_ref: 1,		/* software copy of reference bit */
+		p_pagein: 1,		/* being paged in, data not valid */
+		p_nc: 1,		/* do not cache page */
+		p_age: 1;		/* on age free list */
+	u_int	p_nio : 6;		/* # of outstanding io reqs needed */
+	u_short	p_keepcnt;		/* number of page `keeps' */
+	struct	vnode *p_vnode;		/* logical vnode this page is from */
+	u_int	p_offset;		/* offset into vnode for this page */
+	struct	page *p_hash;		/* hash by [vnode, offset] */
+	struct	page *p_next;		/* next page in free/intrans lists */
+	struct	page *p_prev;		/* prev page in free/intrans lists */
+	struct	page *p_vpnext;		/* next page in vnode list */
+	struct	page *p_vpprev;		/* prev page in vnode list */
+	caddr_t	p_mapping;		/* hat specific translation info */
+	u_short	p_lckcnt;		/* number of locks on page data */
+	u_short	p_pad;			/* steal bits from here */
+};
+
+/*
+ * Each segment of physical memory is described by a memseg struct. Within
+ * a segment, memory is considered contiguous. The segments from a linked
+ * list to describe all of physical memory. The list is ordered by increasing
+ * physical addresses.
+ */
+struct memseg {
+	struct page *pages, *epages;	/* [from, to) in page array */
+	u_int pages_base, pages_end;	/* [from, to) in page numbers */
+	struct memseg *next;		/* next segment in list */
+};
+
+#ifdef KERNEL
+#define	PAGE_HOLD(pp)	(pp)->p_keepcnt++
+#define	PAGE_RELE(pp)	page_rele(pp)
+
+#define	PAGE_HASHSZ	page_hashsz
+
+extern	int page_hashsz;
+extern	struct page **page_hash;
+
+extern	struct	page *pages;		/* array of all page structures */
+extern	struct	page *epages;		/* end of all pages */
+extern	struct	memseg *memsegs;	/* list of memory segments */
+
+/*
+ * Variables controlling locking of physical memory.
+ */
+extern	u_int	pages_pp_locked;	/* physical pages actually locked */
+extern	u_int	pages_pp_claimed;	/* physical pages reserved */
+extern	u_int	pages_pp_maximum;	/* tuning: lock + claim <= max */
+
+/*
+ * Page frame operations.
+ */
+void	page_init(/* pp, num, base */);
+void	page_reclaim(/* pp */);
+struct	page *page_find(/* vp, off */);
+struct	page *page_exists(/* vp, off */);
+struct	page *page_lookup(/* vp, off */);
+int	page_enter(/* pp, vp, off */);
+void	page_abort(/* pp */);
+void	page_free(/* pp */);
+void	page_unfree(/* pp */);
+struct	page *page_get();
+void	page_rele(/* pp */);
+void	page_lock(/* pp */);
+void	page_unlock(/* pp */);
+int	page_pp_lock(/* pp, claim, check_resv */);
+void	page_pp_unlock(/* pp, claim */);
+int	page_addclaim(/* claim */);
+void	page_subclaim(/* claim */);
+void	page_hashout(/* pp */);
+void	page_add(/* ppp, pp */);
+void	page_sub(/* ppp, pp */);
+void	page_sortadd(/* ppp, pp */);
+void	page_wait(/* pp */);
+u_int	page_pptonum(/* pp */);
+struct	page *page_numtopp(/* pfnum */);
+struct	page *page_numtookpp(/* pfnum */);
+#endif KERNEL
+
+/*
+ * Page hash table is a power-of-two in size, externally chained
+ * through the hash field.  PAGE_HASHAVELEN is the average length
+ * desired for this chain, from which the size of the page_hash
+ * table is derived at boot time and stored in the kernel variable
+ * page_hashsz.  In the hash function it is given by PAGE_HASHSZ.
+ * PAGE_HASHVPSHIFT is defined so that 1 << PAGE_HASHVPSHIFT is
+ * the approximate size of a vnode struct.
+ */
+#define	PAGE_HASHAVELEN		4
+#define	PAGE_HASHVPSHIFT	6
+#define	PAGE_HASHFUNC(vp, off) \
+	((((off) >> PAGESHIFT) + ((int)(vp) >> PAGE_HASHVPSHIFT)) & \
+		(PAGE_HASHSZ - 1))
+
+/*
+ * Macros for setting reference and modify bit values.  These exist as macros
+ * so that tracing code has the opportunity to note the new values.
+ */
+#ifdef	TRACE
+
+#ifdef	lint
+#define	pg_setref(pp, val) \
+	if (pp) { \
+		trace2(TR_PG_SETREF, (pp), (val)); \
+		(pp)->p_ref = (val); \
+	} else
+#define	pg_setmod(pp, val) \
+	if (pp) { \
+		trace2(TR_PG_SETMOD, (pp), (val)); \
+		(pp)->p_mod = (val); \
+	} else
+#else	lint
+#define	pg_setref(pp, val) \
+	if (1) { \
+		trace2(TR_PG_SETREF, (pp), (val)); \
+		(pp)->p_ref = (val); \
+	} else
+#define	pg_setmod(pp, val) \
+	if (1) { \
+		trace2(TR_PG_SETMOD, (pp), (val)); \
+		(pp)->p_mod = (val); \
+	} else
+#endif	lint
+
+#else	TRACE
+
+#define	pg_setref(pp, val)	(pp)->p_ref = (val)
+#define	pg_setmod(pp, val)	(pp)->p_mod = (val)
+
+#endif	TRACE
+
+#endif /*!_vm_page_h*/
--- a/sys/vm/pvn.h
+++ b/sys/vm/pvn.h
@@ -0,0 +1,50 @@
+/*	@(#)pvn.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_pvn_h
+#define	_vm_pvn_h
+
+/*
+ * VM - paged vnode.
+ *
+ * The VM system manages memory as a cache of paged vnodes.
+ * This file desribes the interfaces to common subroutines
+ * used to help implement the VM/file system routines.
+ */
+
+struct	page *pvn_kluster(/* vp, off, seg, addr, offp, lenp, vp_off,
+	    vp_len, isra */);
+void	pvn_fail(/* plist, flags */);
+void	pvn_done(/* bp */);
+struct	page *pvn_vplist_dirty(/* vp, off, flags */);
+struct	page *pvn_range_dirty(/* vp, off, eoff, offlo, offhi, flags */);
+void	pvn_vptrunc(/* vp, vplen, zbytes */);
+void	pvn_unloadmap(/* vp, offset, ref, mod */);
+int	pvn_getpages(/* getapage, vp, off, len, protp, pl, plsz, seg, addr,
+	    rw, cred */);
+
+/*
+ * When requesting pages from the getpage routines, pvn_getpages will
+ * allocate space to return PVN_GETPAGE_NUM pages which map PVN_GETPAGE_SZ
+ * worth of bytes.  These numbers are chosen to be the minimum of the max's
+ * given in terms of bytes and pages.
+ */
+#define	PVN_MAX_GETPAGE_SZ	0x10000		/* getpage size limit */
+#define	PVN_MAX_GETPAGE_NUM	0x8		/* getpage page limit */
+
+#if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
+
+#define	PVN_GETPAGE_SZ	ptob(PVN_MAX_GETPAGE_NUM)
+#define	PVN_GETPAGE_NUM	PVN_MAX_GETPAGE_NUM
+
+#else
+
+#define	PVN_GETPAGE_SZ	PVN_MAX_GETPAGE_SZ
+#define	PVN_GETPAGE_NUM	btop(PVN_MAX_GETPAGE_SZ)
+
+#endif
+
+#endif /*!_vm_pvn_h*/
--- a/sys/vm/rm.h
+++ b/sys/vm/rm.h
@@ -0,0 +1,19 @@
+/*	@(#)rm.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_rm_h
+#define	_vm_rm_h
+
+/*
+ * VM - Resource Management.
+ */
+
+struct	page *rm_allocpage(/* seg, addr */);
+void	rm_outofanon();
+void	rm_outofhat();
+int	rm_asrss(/* as */);
+
+#endif /*!_vm_rm_h*/
--- a/sys/vm/seg.h
+++ b/sys/vm/seg.h
@@ -0,0 +1,93 @@
+/*	@(#)seg.h 1.1 94/10/31 SMI 	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_seg_h
+#define	_vm_seg_h
+#include <vm/faultcode.h>
+#include <vm/mp.h>
+
+/*
+ * VM - Segments.
+ */
+
+/*
+ * An address space contains a set of segments, managed by drivers.
+ * Drivers support mapped devices, sharing, copy-on-write, etc.
+ *
+ * The seg structure contains a lock to prevent races, the base virtual
+ * address and size of the segment, a back pointer to the containing
+ * address space, pointers to maintain a circularly doubly linked list
+ * of segments in the same address space, and procedure and data hooks
+ * for the driver.  The seg list on the address space is sorted by
+ * ascending base addresses and overlapping segments are not allowed.
+ *
+ * After a segment is created, faults may occur on pages of the segment.
+ * When a fault occurs, the fault handling code must get the desired
+ * object and set up the hardware translation to the object.  For some
+ * objects, the fault handling code also implements copy-on-write.
+ *
+ * When the hat wants to unload a translation, it can call the unload
+ * routine which is responsible for processing reference and modify bits.
+ */
+struct seg {
+	kmon_t	s_lock;
+	addr_t	s_base;			/* base virtual address */
+	u_int	s_size;			/* size in bytes */
+	struct	as *s_as;		/* containing address space */
+	struct	seg *s_next;		/* next seg in this address space */
+	struct	seg *s_prev;		/* prev seg in this address space */
+	struct	seg_ops {
+		int	(*dup)(/* seg, newsegp */);
+		int	(*unmap)(/* seg, addr, len */);
+		int	(*free)(/* seg */);
+		faultcode_t	(*fault)(/* seg, addr, len, type, rw */);
+		faultcode_t	(*faulta)(/* seg, addr */);
+		int	(*hatsync)(/* seg, addr, ref, mod, flags */);
+		int	(*setprot)(/* seg, addr, size, prot */);
+		int	(*checkprot)(/* seg, addr, size, prot */);
+		int	(*kluster)(/* seg, addr, delta */);
+		u_int	(*swapout)(/* seg */);
+		int	(*sync)(/* seg, addr, size, flags */);
+		int	(*incore)(/* seg, addr, size, vec */);
+		int	(*lockop)(/* seg, addr, size, op */);
+		int	(*advise)(/* seg, addr, size, behav */);
+	} *s_ops;
+	caddr_t	s_data;			/* private data for instance */
+};
+
+/*
+ * Fault information passed to the seg fault handling routine.
+ * The F_SOFTLOCK and F_SOFTUNLOCK are used by software
+ * to lock and unlock pages for physical I/O.
+ */
+enum fault_type {
+	F_INVAL,		/* invalid page */
+	F_PROT,			/* protection fault */
+	F_SOFTLOCK,		/* software requested locking */
+	F_SOFTUNLOCK,		/* software requested unlocking */
+};
+
+/*
+ * seg_rw gives the access type for a fault operation
+ */
+enum seg_rw {
+	S_OTHER,		/* unknown or not touched */
+	S_READ,			/* read access attempted */
+	S_WRITE,		/* write access attempted */
+	S_EXEC,			/* execution access attempted */
+};
+
+#ifdef KERNEL
+/*
+ * Generic segment operations
+ */
+struct	seg *seg_alloc(/* as, base, size */);
+int	seg_attach(/* as, base, size, seg */);
+void	seg_free(/* seg */);
+u_int	seg_page(/* seg, addr */);
+u_int	seg_pages(/* seg */);
+#endif KERNEL
+#endif /*!_vm_seg_h*/
--- a/sys/vm/seg_dev.c
+++ b/sys/vm/seg_dev.c
@@ -0,0 +1,476 @@
+/*	@(#)seg_dev.c 1.1 94/10/31 SMI 	*/
+
+/*
+ * Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - segment of a mapped device.
+ *
+ * This segment driver is used when mapping character special devices.
+ */
+
+#include <machine/pte.h>
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_dev.h>
+#include <vm/pvn.h>
+#include <vm/vpage.h>
+
+#define	vpgtob(n)	((n) * sizeof (struct vpage))	/* For brevity */
+
+/*
+ * Private seg op routines.
+ */
+static	int segdev_dup(/* seg, newsegp */);
+static	int segdev_unmap(/* seg, addr, len */);
+static	int segdev_free(/* seg */);
+static	faultcode_t segdev_fault(/* seg, addr, len, type, rw */);
+static	faultcode_t segdev_faulta(/* seg, addr */);
+static	int segdev_hatsync(/* seg, addr, ref, mod, flags */);
+static	int segdev_setprot(/* seg, addr, size, len */);
+static	int segdev_checkprot(/* seg, addr, size, len */);
+static	int segdev_badop();
+static	int segdev_incore(/* seg, addr, size, vec */);
+static	int segdev_ctlops(/* seg, addr, size, [flags] */);
+
+struct	seg_ops segdev_ops = {
+	segdev_dup,
+	segdev_unmap,
+	segdev_free,
+	segdev_fault,
+	segdev_faulta,
+	segdev_hatsync,
+	segdev_setprot,
+	segdev_checkprot,
+	segdev_badop,		/* kluster */
+	(u_int (*)()) NULL,	/* swapout */
+	segdev_ctlops,		/* sync */
+	segdev_incore,
+	segdev_ctlops,		/* lockop */
+	segdev_ctlops,		/* advise */
+};
+
+/*
+ * Create a device segment.
+ */
+int
+segdev_create(seg, argsp)
+	struct seg *seg;
+	caddr_t argsp;
+{
+	register struct segdev_data *sdp;
+	register struct segdev_crargs *a = (struct segdev_crargs *)argsp;
+
+	sdp = (struct segdev_data *)
+		new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
+	sdp->mapfunc = a->mapfunc;
+	sdp->dev = a->dev;
+	sdp->offset = a->offset;
+	sdp->prot = a->prot;
+	sdp->maxprot = a->maxprot;
+	sdp->pageprot = 0;
+	sdp->vpage = NULL;
+
+	seg->s_ops = &segdev_ops;
+	seg->s_data = (char *)sdp;
+
+	return (0);
+}
+
+/*
+ * Duplicate seg and return new segment in newsegp.
+ */
+static int
+segdev_dup(seg, newseg)
+	struct seg *seg, *newseg;
+{
+	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register struct segdev_data *newsdp;
+	struct segdev_crargs a;
+
+	a.mapfunc = sdp->mapfunc;
+	a.dev = sdp->dev;
+	a.offset = sdp->offset;
+	a.prot = sdp->prot;
+	a.maxprot = sdp->maxprot;
+
+	(void) segdev_create(newseg, (caddr_t)&a);
+	newsdp = (struct segdev_data *)newseg->s_data;
+	newsdp->pageprot = sdp->pageprot;
+	if (sdp->vpage != NULL) {
+		register u_int nbytes = vpgtob(seg_pages(seg));
+		newsdp->vpage = (struct vpage *)
+					new_kmem_alloc(nbytes, KMEM_SLEEP);
+		bcopy((caddr_t)sdp->vpage, (caddr_t)newsdp->vpage, nbytes);
+	}
+	return (0);
+}
+
+/*
+ * Split a segment at addr for length len.
+ */
+/*ARGSUSED*/
+static int
+segdev_unmap(seg, addr, len)
+	register struct seg *seg;
+	register addr_t addr;
+	u_int len;
+{
+	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register struct segdev_data *nsdp;
+	register struct seg *nseg;
+	register u_int npages, spages, tpages;
+	addr_t nbase;
+	u_int nsize, hpages;
+
+	/*
+	 * Check for bad sizes
+	 */
+	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
+	    (len & PAGEOFFSET) || ((u_int)addr & PAGEOFFSET))
+		panic("segdev_unmap");
+
+	/*
+	 * Unload any hardware translations in the range to be taken out.
+	 */
+	hat_unload(seg, addr, len);
+
+	/*
+	 * Check for entire segment
+	 */
+	if (addr == seg->s_base && len == seg->s_size) {
+		seg_free(seg);
+		return (0);
+	}
+
+	/*
+	 * Check for beginning of segment
+	 */
+	spages = seg_pages(seg);
+	npages = btop(len);
+	if (addr == seg->s_base) {
+		if (sdp->vpage != NULL) {
+			sdp->vpage = (struct vpage *)new_kmem_resize(
+				(caddr_t)sdp->vpage, vpgtob(npages),
+				vpgtob(spages - npages), vpgtob(spages),
+				KMEM_SLEEP);
+		}
+		sdp->offset += len;
+
+		seg->s_base += len;
+		seg->s_size -= len;
+		return (0);
+	}
+
+	/*
+	 * Check for end of segment
+	 */
+	if (addr + len == seg->s_base + seg->s_size) {
+		tpages = spages - npages;
+		if (sdp->vpage != NULL)
+			sdp->vpage = (struct vpage *)
+				new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
+				vpgtob(tpages), vpgtob(spages), KMEM_SLEEP);
+		seg->s_size -= len;
+		return (0);
+	}
+
+	/*
+	 * The section to go is in the middle of the segment,
+	 * have to make it into two segments.  nseg is made for
+	 * the high end while seg is cut down at the low end.
+	 */
+	nbase = addr + len;				/* new seg base */
+	nsize = (seg->s_base + seg->s_size) - nbase;	/* new seg size */
+	seg->s_size = addr - seg->s_base;		/* shrink old seg */
+	nseg = seg_alloc(seg->s_as, nbase, nsize);
+	if (nseg == NULL)
+		panic("segdev_unmap seg_alloc");
+
+	nseg->s_ops = seg->s_ops;
+	nsdp = (struct segdev_data *)
+		new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
+	nseg->s_data = (char *)nsdp;
+	nsdp->pageprot = sdp->pageprot;
+	nsdp->prot = sdp->prot;
+	nsdp->maxprot = sdp->maxprot;
+	nsdp->mapfunc = sdp->mapfunc;
+	nsdp->offset = sdp->offset + nseg->s_base - seg->s_base;
+
+	if (sdp->vpage == NULL)
+		nsdp->vpage = NULL;
+	else {
+		tpages = btop(nseg->s_base - seg->s_base);
+		hpages = btop(addr - seg->s_base);
+
+		nsdp->vpage = (struct vpage *)
+			new_kmem_alloc(vpgtob(spages - tpages), KMEM_SLEEP);
+		bcopy((caddr_t)&sdp->vpage[tpages], (caddr_t)nsdp->vpage,
+			vpgtob(spages - tpages));
+		sdp->vpage = (struct vpage *)
+			new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
+				vpgtob(hpages), vpgtob(spages), KMEM_SLEEP);
+	}
+
+	/*
+	 * Now we do something so that all the translations which used
+	 * to be associated with seg but are now associated with nseg.
+	 */
+	hat_newseg(seg, nseg->s_base, nseg->s_size, nseg);
+
+	return (0);
+}
+
+/*
+ * Free a segment.
+ */
+static
+segdev_free(seg)
+	struct seg *seg;
+{
+	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register u_int nbytes = vpgtob(seg_pages(seg));
+
+	if (sdp->vpage != NULL)
+		kmem_free((caddr_t)sdp->vpage, nbytes);
+	kmem_free((caddr_t)sdp, sizeof (*sdp));
+}
+
+/*
+ * Handle a fault on a device segment.
+ */
+static faultcode_t
+segdev_fault(seg, addr, len, type, rw)
+	register struct seg *seg;
+	addr_t addr;
+	u_int len;
+	enum fault_type type;
+	enum seg_rw rw;
+{
+	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register addr_t adr;
+	register u_int prot, protchk;
+	int pf;
+	struct vpage *vpage;
+
+	if (type == F_PROT) {
+		/*
+		 * Since the seg_dev driver does not implement copy-on-write,
+		 * this means that a valid translation is already loaded,
+		 * but we got an fault trying to access the device.
+		 * Return an error here to prevent going in an endless
+		 * loop reloading the same translation...
+		 */
+		return (FC_PROT);
+	}
+
+	if (type != F_SOFTUNLOCK) {
+		if (sdp->pageprot == 0) {
+			switch (rw) {
+			case S_READ:
+				protchk = PROT_READ;
+				break;
+			case S_WRITE:
+				protchk = PROT_WRITE;
+				break;
+			case S_EXEC:
+				protchk = PROT_EXEC;
+				break;
+			case S_OTHER:
+			default:
+				protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
+				break;
+			}
+			prot = sdp->prot;
+			if ((prot & protchk) == 0)
+				return (FC_PROT);
+			vpage = NULL;
+		} else {
+			vpage = &sdp->vpage[seg_page(seg, addr)];
+		}
+	}
+
+	for (adr = addr; adr < addr + len; adr += PAGESIZE) {
+		if (type == F_SOFTUNLOCK) {
+			hat_unlock(seg, adr);
+			continue;
+		}
+
+		if (vpage != NULL) {
+			switch (rw) {
+			case S_READ:
+				protchk = PROT_READ;
+				break;
+			case S_WRITE:
+				protchk = PROT_WRITE;
+				break;
+			case S_EXEC:
+				protchk = PROT_EXEC;
+				break;
+			case S_OTHER:
+			default:
+				protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
+				break;
+			}
+			prot = vpage->vp_prot;
+			vpage++;
+			if ((prot & protchk) == 0)
+				return (FC_PROT);
+		}
+
+		pf = (*sdp->mapfunc)(sdp->dev,
+		    sdp->offset + (adr - seg->s_base), prot);
+		if (pf == -1)
+			return (FC_MAKE_ERR(EFAULT));
+
+		hat_devload(seg, adr, pf, prot, type == F_SOFTLOCK);
+	}
+
+	return (0);
+}
+
+/*
+ * Asynchronous page fault.  We simply do nothing since this
+ * entry point is not supposed to load up the translation.
+ */
+/*ARGSUSED*/
+static faultcode_t
+segdev_faulta(seg, addr)
+	struct seg *seg;
+	addr_t addr;
+{
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static
+segdev_hatsync(seg, addr, ref, mod, flags)
+	struct seg *seg;
+	addr_t addr;
+	u_int ref, mod;
+	u_int flags;
+{
+
+	/* cannot use ref and mod bits on devices, so ignore 'em */
+}
+
+static int
+segdev_setprot(seg, addr, len, prot)
+	register struct seg *seg;
+	register addr_t addr;
+	register u_int len, prot;
+{
+	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register struct vpage *vp, *evp;
+
+	if ((sdp->maxprot & prot) != prot)
+		return (-1);				/* violated maxprot */
+
+	if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
+		if (sdp->prot == prot)
+			return (0);			/* all done */
+		sdp->prot = prot;
+	} else {
+		sdp->pageprot = 1;
+		if (sdp->vpage == NULL) {
+			/*
+			 * First time through setting per page permissions,
+			 * initialize all the vpage structures to prot
+			 */
+			sdp->vpage = (struct vpage *)new_kmem_zalloc(
+					vpgtob(seg_pages(seg)), KMEM_SLEEP);
+			evp = &sdp->vpage[seg_pages(seg)];
+			for (vp = sdp->vpage; vp < evp; vp++)
+				vp->vp_prot = sdp->prot;
+		}
+		/*
+		 * Now go change the needed vpages protections.
+		 */
+		evp = &sdp->vpage[seg_page(seg, addr + len)];
+		for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
+			vp->vp_prot = prot;
+	}
+
+	if (prot == 0)
+		hat_unload(seg, addr, len);
+	else
+		hat_chgprot(seg, addr, len, prot);
+	return (0);
+}
+
+static int
+segdev_checkprot(seg, addr, len, prot)
+	register struct seg *seg;
+	register addr_t addr;
+	register u_int len, prot;
+{
+	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
+	register struct vpage *vp, *evp;
+
+	/*
+	 * If segment protection can be used, simply check against them
+	 */
+	if (sdp->pageprot == 0)
+		return (((sdp->prot & prot) != prot) ? -1 : 0);
+
+	/*
+	 * Have to check down to the vpage level
+	 */
+	evp = &sdp->vpage[seg_page(seg, addr + len)];
+	for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
+		if ((vp->vp_prot & prot) != prot)
+			return (-1);
+
+	return (0);
+}
+
+static
+segdev_badop()
+{
+
+	panic("segdev_badop");
+	/*NOTREACHED*/
+}
+
+/*
+ * segdev pages are not in the cache, and thus can't really be controlled.
+ * syncs, locks, and advice are simply always successful.
+ */
+/*ARGSUSED*/
+static int
+segdev_ctlops(seg, addr, len, flags)
+	struct seg *seg;
+	addr_t addr;
+	u_int len, flags;
+{
+
+	return (0);
+}
+
+/*
+ * segdev pages are always "in core".
+ */
+/*ARGSUSED*/
+static int
+segdev_incore(seg, addr, len, vec)
+	struct seg *seg;
+	addr_t addr;
+	register u_int len;
+	register char *vec;
+{
+	u_int v = 0;
+
+	for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
+	    v += PAGESIZE)
+		*vec++ = 1;
+	return (v);
+}
--- a/sys/vm/seg_dev.h
+++ b/sys/vm/seg_dev.h
@@ -0,0 +1,38 @@
+/*	@(#)seg_dev.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_seg_dev_h
+#define	_vm_seg_dev_h
+
+/*
+ * Structure who's pointer is passed to the segvn_create routine
+ */
+struct segdev_crargs {
+	int	(*mapfunc)();	/* map function to call */
+	u_int	offset;		/* starting offset */
+	dev_t	dev;		/* device number */
+	u_char	prot;		/* protection */
+	u_char	maxprot;	/* maximum protection */
+};
+
+/*
+ * (Semi) private data maintained by the seg_dev driver per segment mapping
+ */
+struct	segdev_data {
+	int	(*mapfunc)();	/* really returns struct pte, not int */
+	u_int	offset;		/* device offset for start of mapping */
+	dev_t	dev;		/* device number (for mapfunc) */
+	u_char	pageprot;	/* true if per page protections present */
+	u_char	prot;		/* current segment prot if pageprot == 0 */
+	u_char	maxprot;	/* maximum segment protections */
+	struct	vpage *vpage;	/* per-page information, if needed */
+};
+
+#ifdef KERNEL
+int	segdev_create(/* seg, argsp */);
+#endif KERNEL
+
+#endif /*!_vm_seg_dev_h*/
--- a/sys/vm/seg_map.c
+++ b/sys/vm/seg_map.c
@@ -0,0 +1,776 @@
+/*	@(#)seg_map.c 1.1 94/10/31 SMI 	*/
+
+/*
+ * Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - generic vnode mapping segment.
+ *
+ * The segmap driver is used only by the kernel to get faster (than seg_vn)
+ * mappings [lower routine overhead; more persistent cache] to random
+ * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
+ */
+
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+#include <sys/ucred.h>
+#include <sys/trace.h>
+#include <sys/debug.h>
+#include <sys/user.h>
+#include <sys/kernel.h>
+
+#include <machine/seg_kmem.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_map.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/rm.h>
+
+/*
+ * Private seg op routines.
+ */
+static	int segmap_free(/* seg */);
+static	faultcode_t segmap_fault(/* seg, addr, len, type, rw */);
+static	faultcode_t segmap_faulta(/* seg, addr */);
+static	int segmap_checkprot(/* seg, addr, len, prot */);
+static	int segmap_kluster(/* seg, addr, delta */);
+static	int segmap_badop();
+
+struct	seg_ops segmap_ops = {
+	segmap_badop,		/* dup */
+	segmap_badop,		/* unmap */
+	segmap_free,
+	segmap_fault,
+	segmap_faulta,
+	(int (*)()) NULL,	/* unload */
+	segmap_badop,		/* setprot */
+	segmap_checkprot,
+	segmap_kluster,
+	(u_int (*)()) NULL,	/* swapout */
+	segmap_badop,		/* sync */
+	segmap_badop,		/* incore */
+	segmap_badop,		/* lockop */
+	segmap_badop,		/* advise */
+};
+
+/*
+ * Private segmap routines.
+ */
+static	void segmap_smapadd(/* smd, smp */);
+static	void segmap_smapsub(/* smd, smp */);
+static	void segmap_hashin(/* smd, smp, vp, off, flags */);
+static	void segmap_hashout(/* smd, smp */);
+
+/*
+ * Statistics for segmap operations.
+ */
+struct segmapcnt {
+	int	smc_fault;	/* number of segmap_faults */
+	int	smc_faulta;	/* number of segmap_faultas */
+	int	smc_getmap;	/* number of segmap_getmaps */
+	int	smc_get_use;	/* # of getmaps that reuse an existing map */
+	int	smc_get_reclaim; /* # of getmaps that do a reclaim */
+	int	smc_get_reuse;	/* # of getmaps that reuse a slot */
+	int	smc_rel_async;	/* # of releases that are async */
+	int	smc_rel_write;	/* # of releases that write */
+	int	smc_rel_free;	/* # of releases that free */
+	int	smc_rel_abort;	/* # of releases that abort */
+	int	smc_rel_dontneed; /* # of releases with dontneed set */
+	int	smc_release;	/* # of releases with no other action */
+	int	smc_pagecreate;	/* # of pagecreates */
+} segmapcnt;
+
+/*
+ * Return number of map pages in segment.
+ */
+#define	MAP_PAGES(seg)		((seg)->s_size >> MAXBSHIFT)
+
+/*
+ * Translate addr into smap number within segment.
+ */
+#define	MAP_PAGE(seg, addr)	(((addr) - (seg)->s_base) >> MAXBSHIFT)
+
+/*
+ * Translate addr in seg into struct smap pointer.
+ */
+#define	GET_SMAP(seg, addr)	\
+	&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
+
+int
+segmap_create(seg, argsp)
+	struct seg *seg;
+	caddr_t argsp;
+{
+	register struct segmap_data *smd;
+	register struct smap *smp;
+	struct segmap_crargs *a = (struct segmap_crargs *)argsp;
+	register u_int i;
+	u_int hashsz;
+	addr_t segend;
+
+	/*
+	 * Make sure that seg->s_base and seg->s_base + seg->s_size
+	 * are on MAXBSIZE aligned pieces of virtual memory.
+	 *
+	 * Since we assume we are creating a large segment
+	 * (it's just segkmap), trimming off the excess at the
+	 * beginning and end of the segment is considered safe.
+	 */
+	segend = (addr_t)((u_int)(seg->s_base + seg->s_size) & MAXBMASK);
+	seg->s_base = (addr_t)roundup((u_int)(seg->s_base), MAXBSIZE);
+	seg->s_size = segend - seg->s_base;
+
+	i = MAP_PAGES(seg);
+
+	smd = (struct segmap_data *)new_kmem_zalloc(
+			sizeof (struct segmap_data), KMEM_SLEEP);
+	smd->smd_prot = a->prot;
+	smd->smd_sm = (struct smap *)new_kmem_zalloc(
+		(u_int)(sizeof (struct smap) * i), KMEM_SLEEP);
+
+	/*
+	 * Link up all the slots.
+	 */
+	for (smp = &smd->smd_sm[i - 1]; smp >= smd->smd_sm; smp--)
+		segmap_smapadd(smd, smp);
+
+	/*
+	 * Compute hash size rounding down to the next power of two.
+	 */
+	hashsz = MAP_PAGES(seg) / SMAP_HASHAVELEN;
+	for (i = 0x80 << ((sizeof (int) - 1) * NBBY); i != 0; i >>= 1) {
+		if ((hashsz & i) != 0) {
+			smd->smd_hashsz = hashsz = i;
+			break;
+		}
+	}
+	smd->smd_hash = (struct smap **)new_kmem_zalloc(
+			hashsz * sizeof (smd->smd_hash[0]), KMEM_SLEEP);
+
+	seg->s_data = (char *)smd;
+	seg->s_ops = &segmap_ops;
+
+	return (0);
+}
+
+static int
+segmap_free(seg)
+	struct seg *seg;
+{
+	register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
+
+	kmem_free((caddr_t)smd->smd_hash, sizeof (smd->smd_hash[0]) *
+	    smd->smd_hashsz);
+	kmem_free((caddr_t)smd->smd_sm, sizeof (struct smap) * MAP_PAGES(seg));
+	kmem_free((caddr_t)smd, sizeof (*smd));
+}
+
+/*
+ * Do a F_SOFTUNLOCK call over the range requested.
+ * The range must have already been F_SOFTLOCK'ed.
+ */
+static void
+segmap_unlock(seg, addr, len, rw, smp)
+	struct seg *seg;
+	addr_t addr;
+	u_int len;
+	enum seg_rw rw;
+	register struct smap *smp;
+{
+	register struct page *pp;
+	register addr_t adr;
+	u_int off;
+
+	off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
+	for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
+		/*
+		 * For now, we just kludge here by finding the page
+		 * ourselves since we would not find the page using
+		 * page_find() if someone has page_abort()'ed it.
+		 * XXX - need to redo things to avoid this mess.
+		 */
+		for (pp = page_hash[PAGE_HASHFUNC(smp->sm_vp, off)]; pp != NULL;
+		    pp = pp->p_hash)
+			if (pp->p_vnode == smp->sm_vp && pp->p_offset == off)
+				break;
+		if (pp == NULL || pp->p_pagein || pp->p_free)
+			panic("segmap_unlock");
+		if (rw == S_WRITE)
+			pg_setmod(pp, 1);
+		if (rw != S_OTHER) {
+			trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, off, 1);
+			pg_setref(pp, 1);
+		}
+		hat_unlock(seg, adr);
+		PAGE_RELE(pp);
+	}
+}
+
+/*
+ * This routine is called via a machine specific fault handling
+ * routine.  It is also called by software routines wishing to
+ * lock or unlock a range of addresses.
+ */
+static faultcode_t
+segmap_fault(seg, addr, len, type, rw)
+	struct seg *seg;
+	addr_t addr;
+	u_int len;
+	enum fault_type type;
+	enum seg_rw rw;
+{
+	register struct segmap_data *smd;
+	register struct smap *smp;
+	register struct page *pp, **ppp;
+	register struct vnode *vp;
+	register u_int off;
+	struct page *pl[btopr(MAXBSIZE) + 1];
+	u_int prot;
+	u_int addroff;
+	addr_t adr;
+	int err;
+
+	segmapcnt.smc_fault++;
+
+	smd = (struct segmap_data *)seg->s_data;
+	smp = GET_SMAP(seg, addr);
+	vp = smp->sm_vp;
+
+	if (vp == NULL)
+		return (FC_MAKE_ERR(EIO));
+
+	addroff = (u_int)addr & MAXBOFFSET;
+	if (addroff + len > MAXBSIZE)
+		panic("segmap_fault length");
+	off = smp->sm_off + addroff;
+
+	/*
+	 * First handle the easy stuff
+	 */
+	if (type == F_SOFTUNLOCK) {
+		segmap_unlock(seg, addr, len, rw, smp);
+		return (0);
+	}
+
+	trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
+	err = VOP_GETPAGE(vp, off, len, &prot, pl, MAXBSIZE, seg, addr, rw,
+	    (struct ucred *)NULL);		/* XXX - need real cred val */
+
+	if (err)
+		return (FC_MAKE_ERR(err));
+
+	prot &= smd->smd_prot;
+
+	/*
+	 * Handle all pages returned in the pl[] array.
+	 * This loop is coded on the assumption that if
+	 * there was no error from the VOP_GETPAGE routine,
+	 * that the page list returned will contain all the
+	 * needed pages for the vp from [off..off + len).
+	 */
+	for (ppp = pl; (pp = *ppp++) != NULL; ) {
+		/*
+		 * Verify that the pages returned are within the range
+		 * of this segmap region.  Note that this is theoretically
+		 * possible for pages outside this range to be returned,
+		 * but it is not very unlikely.  If we cannot use the
+		 * page here, just release it and go on to the next one.
+		 */
+		if (pp->p_offset < smp->sm_off ||
+		    pp->p_offset >= smp->sm_off + MAXBSIZE) {
+			PAGE_RELE(pp);
+			continue;
+		}
+
+		adr = addr + (pp->p_offset - off);
+		if (adr >= addr && adr < addr + len) {
+			pg_setref(pp, 1);
+			trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, pp->p_offset,
+			    0);
+			trace5(TR_SPG_FLT, u.u_ar0[PC], adr, vp, pp->p_offset,
+			    TRC_SPG_SMAP);
+			trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
+			    trs(u.u_comm,0), trs(u.u_comm,1),
+			    trs(u.u_comm,2), trs(u.u_comm,3));
+			if (type == F_SOFTLOCK) {
+				/*
+				 * Load up the translation keeping it
+				 * locked and don't PAGE_RELE the page.
+				 */
+				hat_memload(seg, adr, pp, prot, 1);
+				continue;
+			}
+		}
+		/*
+		 * Either it was a page outside the fault range or a
+		 * page inside the fault range for a non F_SOFTLOCK -
+		 * load up the hat translation and release the page.
+		 */
+		hat_memload(seg, adr, pp, prot, 0);
+		PAGE_RELE(pp);
+	}
+
+	return (0);
+}
+
+/*
+ * This routine is used to start I/O on pages asynchronously.
+ */
+static faultcode_t
+segmap_faulta(seg, addr)
+	struct seg *seg;
+	addr_t addr;
+{
+	register struct smap *smp;
+	int err;
+
+	segmapcnt.smc_faulta++;
+	smp = GET_SMAP(seg, addr);
+	if (smp->sm_vp == NULL) {
+		call_debug("segmap_faulta - no vp");
+		return (FC_MAKE_ERR(EIO));
+	}
+	trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
+	err = VOP_GETPAGE(smp->sm_vp, smp->sm_off + (u_int)addr & MAXBOFFSET,
+	    PAGESIZE, (u_int *)NULL, (struct page **)NULL, 0,
+	    seg, addr, S_READ,
+	    (struct ucred *)NULL);		/* XXX - need real cred val */
+	if (err)
+		return (FC_MAKE_ERR(err));
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+segmap_checkprot(seg, addr, len, prot)
+	struct seg *seg;
+	addr_t addr;
+	u_int len, prot;
+{
+	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
+
+	return (((smd->smd_prot & prot) != prot) ? -1 : 0);
+}
+
+/*
+ * Check to see if it makes sense to do kluster/read ahead to
+ * addr + delta relative to the mapping at addr.  We assume here
+ * that delta is a signed PAGESIZE'd multiple (which can be negative).
+ *
+ * For segmap we always "approve" of this action from our standpoint.
+ */
+/*ARGSUSED*/
+static int
+segmap_kluster(seg, addr, delta)
+	struct seg *seg;
+	addr_t addr;
+	int delta;
+{
+
+	return (0);
+}
+
+static
+segmap_badop()
+{
+
+	panic("segmap_badop");
+	/*NOTREACHED*/
+}
+
+/*
+ * Special private segmap operations
+ */
+
+/*
+ * Add smp to the free list on smd.  If the smp still has a vnode
+ * association with it, then it is added to the end of the free list,
+ * otherwise it is added to the front of the list.
+ */
+static void
+segmap_smapadd(smd, smp)
+	register struct segmap_data *smd;
+	register struct smap *smp;
+{
+
+	if (smp->sm_refcnt != 0)
+		panic("segmap_smapadd");
+
+	if (smd->smd_free == (struct smap *)NULL) {
+		smp->sm_next = smp->sm_prev = smp;
+	} else {
+		smp->sm_next = smd->smd_free;
+		smp->sm_prev = (smd->smd_free)->sm_prev;
+		(smd->smd_free)->sm_prev = smp;
+		smp->sm_prev->sm_next = smp;
+	}
+
+	if (smp->sm_vp == (struct vnode *)NULL)
+		smd->smd_free = smp;
+	else
+		smd->smd_free = smp->sm_next;
+
+	/*
+	 * XXX - need a better way to do this.
+	 */
+	if (smd->smd_want) {
+		wakeup((caddr_t)&smd->smd_free);
+		smd->smd_want = 0;
+	}
+}
+
+/*
+ * Remove smp from the smd free list.  If there is an old
+ * mapping in effect there, then delete it.
+ */
+static void
+segmap_smapsub(smd, smp)
+	register struct segmap_data *smd;
+	register struct smap *smp;
+{
+
+	if (smd->smd_free == smp)
+		smd->smd_free = smp->sm_next;	/* go to next page */
+
+	if (smd->smd_free == smp)
+		smd->smd_free = NULL;		/* smp list is gone */
+	else {
+		smp->sm_prev->sm_next = smp->sm_next;
+		smp->sm_next->sm_prev = smp->sm_prev;
+	}
+	smp->sm_prev = smp->sm_next = smp;	/* make smp a list of one */
+	smp->sm_refcnt = 1;
+}
+
+static void
+segmap_hashin(smd, smp, vp, off)
+	register struct segmap_data *smd;
+	register struct smap *smp;
+	struct vnode *vp;
+	u_int off;
+{
+	register struct smap **hpp;
+
+	/*
+	 * Funniness here - we don't increment the ref count on the vnode
+	 * even though we have another pointer to it here.  The reason
+	 * for this is that we don't want the fact that a seg_map
+	 * entry somewhere refers to a vnode to prevent the vnode
+	 * itself from going away.  This is because this reference
+	 * to the vnode is a "soft one".  In the case where a mapping
+	 * is being used by a rdwr [or directory routine?] there already
+	 * has to be a non-zero ref count on the vnode.  In the case
+	 * where the vp has been freed and the the smap structure is
+	 * on the free list, there are no pages in memory that can
+	 * refer to the vnode.  Thus even if we reuse the same
+	 * vnode/smap structure for a vnode which has the same
+	 * address but represents a different object, we are ok.
+	 */
+	smp->sm_vp = vp;
+	smp->sm_off = off;
+
+	hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
+	smp->sm_hash = *hpp;
+	*hpp = smp;
+}
+
+static void
+segmap_hashout(smd, smp)
+	register struct segmap_data *smd;
+	register struct smap *smp;
+{
+	register struct smap **hpp, *hp;
+	struct vnode *vp;
+
+	vp = smp->sm_vp;
+	hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, smp->sm_off)];
+	for (;;) {
+		hp = *hpp;
+		if (hp == NULL)
+			panic("segmap_hashout");
+		if (hp == smp)
+			break;
+		hpp = &hp->sm_hash;
+	}
+
+	*hpp = smp->sm_hash;
+	smp->sm_hash = NULL;
+	smp->sm_vp = NULL;
+	smp->sm_off = 0;
+}
+
+/*
+ * Special public segmap operations
+ */
+
+/*
+ * Create pages (without using VOP_GETPAGE) and load up tranlations to them.
+ * If softlock is TRUE, then set things up so that it looks like a call
+ * to segmap_fault with F_SOFTLOCK.
+ */
+void
+segmap_pagecreate(seg, addr, len, softlock)
+	struct seg *seg;
+	register addr_t addr;
+	u_int len;
+	int softlock;
+{
+	register struct page *pp;
+	register u_int off;
+	struct smap *smp;
+	struct vnode *vp;
+	addr_t eaddr;
+	u_int prot;
+
+	segmapcnt.smc_pagecreate++;
+
+	eaddr = addr + len;
+	addr = (addr_t)((u_int)addr & PAGEMASK);
+	smp = GET_SMAP(seg, addr);
+	vp = smp->sm_vp;
+	off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
+	prot = ((struct segmap_data *)seg->s_data)->smd_prot;
+
+	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
+		pp = page_lookup(vp, off);
+		if (pp == NULL) {
+			pp = rm_allocpage(segkmap, addr, PAGESIZE, 1);
+			trace6(TR_SEG_ALLOCPAGE, segkmap, addr,
+				TRC_SEG_SEGKMAP, vp, off, pp);
+			if (page_enter(pp, vp, off))
+				panic("segmap_page_create page_enter");
+			page_unlock(pp);
+			if (softlock) {
+				hat_memload(segkmap, addr, pp, prot, 1);
+			} else {
+				hat_memload(segkmap, addr, pp, prot, 0);
+				PAGE_RELE(pp);
+			}
+		} else {
+			if (softlock) {
+				PAGE_HOLD(pp);
+				hat_memload(segkmap, addr, pp, prot, 1);
+			} else {
+				hat_memload(segkmap, addr, pp, prot, 0);
+			}
+		}
+	}
+}
+
+
+addr_t
+segmap_getmap(seg, vp, off)
+	struct seg *seg;
+	struct vnode *vp;
+	u_int off;
+{
+	register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
+	register struct smap *smp;
+
+	segmapcnt.smc_getmap++;
+
+	if ((off & MAXBOFFSET) != 0)
+		panic("segmap_getmap bad offset");
+
+	/*
+	 * XXX - keep stats for hash function
+	 */
+	for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
+	    smp != NULL; smp = smp->sm_hash)
+		if (smp->sm_vp == vp && smp->sm_off == off)
+			break;
+
+	if (smp != NULL) {
+		if (vp->v_count == 0)		/* XXX - debugging */
+			call_debug("segmap_getmap vp count of zero");
+		if (smp->sm_refcnt != 0) {
+			segmapcnt.smc_get_use++;
+			smp->sm_refcnt++;		/* another user */
+		} else {
+			segmapcnt.smc_get_reclaim++;
+			segmap_smapsub(smd, smp);	/* reclaim */
+		}
+	} else {
+		/*
+		 * Allocate a new slot and set it up.
+		 */
+		while ((smp = smd->smd_free) == NULL) {
+			/*
+			 * XXX - need a better way to do this.
+			 */
+			smd->smd_want = 1;
+			(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
+		}
+		segmap_smapsub(smd, smp);
+		if (smp->sm_vp != (struct vnode *)NULL) {
+			/*
+			 * Destroy old vnode association and unload any
+			 * hardware translations to the old object.
+			 */
+			segmapcnt.smc_get_reuse++;
+			segmap_hashout(smd, smp);
+			hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
+			    MAXBSIZE), MAXBSIZE);
+		}
+		segmap_hashin(smd, smp, vp, off);
+	}
+
+	trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
+		(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
+		TRC_SEG_SEGKMAP, vp, off);
+	return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
+}
+
+
+
+/*
+ * Same as segmap_getmap(), with the following condition added
+ * if (a new mapping is created) 
+ *    prefault the translation 
+ */
+addr_t
+segmap_getmapflt(seg, vp, off)
+ 	struct seg *seg;
+ 	struct vnode *vp;
+ 	u_int off;
+{
+	register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
+ 	register struct smap *smp;
+ 
+ 	segmapcnt.smc_getmap++;
+ 
+ 	if ((off & MAXBOFFSET) != 0)
+ 		panic("segmap_getmap bad offset");
+ 
+ 	/*
+ 	 * XXX - keep stats for hash function
+ 	 */
+ 	for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
+ 	    smp != NULL; smp = smp->sm_hash)
+ 		if (smp->sm_vp == vp && smp->sm_off == off)
+ 			break;
+ 
+ 	if (smp != NULL) {
+ 		if (vp->v_count == 0)		/* XXX - debugging */
+ 			call_debug("segmap_getmap vp count of zero");
+ 		if (smp->sm_refcnt != 0) {
+ 			segmapcnt.smc_get_use++;
+ 			smp->sm_refcnt++;		/* another user */
+ 		} else {
+ 			segmapcnt.smc_get_reclaim++;
+ 			segmap_smapsub(smd, smp);	/* reclaim */
+ 		}
+ 	} else {
+ 		/*
+ 		 * Allocate a new slot and set it up.
+ 		 */
+ 		while ((smp = smd->smd_free) == NULL) {
+ 			/*
+ 			 * XXX - need a better way to do this.
+ 			 */
+ 			smd->smd_want = 1;
+ 			(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
+ 		}
+ 		segmap_smapsub(smd, smp);
+ 		if (smp->sm_vp != (struct vnode *)NULL) {
+ 			/*
+ 			 * Destroy old vnode association and unload any
+ 			 * hardware translations to the old object.
+ 			 */
+ 			segmapcnt.smc_get_reuse++;
+ 			segmap_hashout(smd, smp);
+ 			hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
+ 			    MAXBSIZE), MAXBSIZE);
+ 		}
+ 		segmap_hashin(smd, smp, vp, off);
+ 
+ 		/*
+ 		 * Prefault the translation 
+		 */
+ 		(void)as_fault(&kas,
+ 			seg->s_base + (smp - smd->smd_sm) * MAXBSIZE,
+ 			MAXBSIZE, F_INVAL, S_READ);
+ 	}
+ 
+ 	trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
+ 		(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
+ 		TRC_SEG_SEGKMAP, vp, off);
+ 	return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
+}
+ 
+
+int
+segmap_release(seg, addr, flags)
+	struct seg *seg;
+	addr_t addr;
+	u_int flags;
+{
+	register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
+	register struct smap *smp;
+	int error;
+
+	if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
+	    ((u_int)addr & MAXBOFFSET) != 0)
+		panic("segmap_release addr");
+
+	smp = &smd->smd_sm[MAP_PAGE(seg, addr)];
+	trace4(TR_SEG_RELMAP, seg, addr, TRC_SEG_SEGKMAP, smp->sm_refcnt);
+
+	/*
+	 * Need to call VOP_PUTPAGE if any flags (except SM_DONTNEED)
+	 * are set.
+	 */
+	if ((flags & ~SM_DONTNEED) != 0) {
+		int bflags = 0;
+
+		if (flags & SM_WRITE)
+			segmapcnt.smc_rel_write++;
+		if (flags & SM_ASYNC) {
+			bflags |= B_ASYNC;
+			segmapcnt.smc_rel_async++;
+		}
+		if (flags & SM_INVAL) {
+			bflags |= B_INVAL;
+			segmapcnt.smc_rel_abort++;
+		}
+		if (smp->sm_refcnt == 1) {
+			/*
+			 * We only bother doing the FREE and DONTNEED flags
+			 * if no one else is still referencing this mapping.
+			 */
+			if (flags & SM_FREE) {
+				bflags |= B_FREE;
+				segmapcnt.smc_rel_free++;
+			}
+			if (flags & SM_DONTNEED) {
+				bflags |= B_DONTNEED;
+				segmapcnt.smc_rel_dontneed++;
+			}
+		}
+		error = VOP_PUTPAGE(smp->sm_vp, smp->sm_off, MAXBSIZE, bflags,
+		    (struct ucred *)NULL);	/* XXX - need real cred val */
+	} else {
+		segmapcnt.smc_release++;
+		error = 0;
+	}
+
+	if (--smp->sm_refcnt == 0) {
+		if (flags & SM_INVAL) {
+			hat_unload(seg, addr, MAXBSIZE);
+			segmap_hashout(smd, smp);	/* remove map info */
+		}
+		segmap_smapadd(smd, smp);		/* add to free list */
+	}
+
+	return (error);
+}
--- a/sys/vm/seg_map.h
+++ b/sys/vm/seg_map.h
@@ -0,0 +1,88 @@
+/*	@(#)seg_map.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_seg_map_h
+#define	_vm_seg_map_h
+
+struct segmap_crargs {
+	u_int	prot;
+};
+
+/*
+ * Each smap struct represents a MAXBSIZE sized mapping to the
+ * <sm_vp, sm_off> given in the structure.  The location of the
+ * the structure in the array gives the virtual address of the
+ * mapping.
+ */
+struct	smap {
+	struct	vnode *sm_vp;		/* vnode pointer (if mapped) */
+	u_int	sm_off;			/* file offset for mapping */
+	/*
+	 * These next 4 entries can be coded as
+	 * u_shorts if we are tight on memory.
+	 */
+	u_int	sm_refcnt;		/* reference count for uses */
+	struct	smap *sm_hash;		/* hash pointer */
+	struct	smap *sm_next;		/* next pointer */
+	struct	smap *sm_prev;		/* previous pointer */
+};
+
+/*
+ * (Semi) private data maintained by the segmap driver per SEGMENT mapping
+ */
+struct	segmap_data {
+	struct	smap *smd_sm;		/* array of smap structures */
+	struct	smap *smd_free;		/* free list head pointer */
+	u_char	smd_prot;		/* protections for all smap's */
+	u_char	smd_want;		/* smap want flag */
+	u_int	smd_hashsz;		/* power-of-two hash table size */
+	struct	smap **smd_hash;	/* pointer to hash table */
+};
+
+/*
+ * These are flags used on release.  Some of these might get handled
+ * by segment operations needed for msync (when we figure them out).
+ * SM_ASYNC modifies SM_WRITE.  SM_DONTNEED modifies SM_FREE.  SM_FREE
+ * and SM_INVAL are mutually exclusive.
+ */
+#define	SM_WRITE	0x01		/* write back the pages upon release */
+#define	SM_ASYNC	0x02		/* do the write asynchronously */
+#define	SM_FREE		0x04		/* put pages back on free list */
+#define	SM_INVAL	0x08		/* invalidate page (no caching) */
+#define	SM_DONTNEED	0x10		/* less likely to be needed soon */
+
+#define	MAXBSHIFT	13		/* log2(DEVBSIZE) */
+#define	MAXBOFFSET	(MAXBSIZE - 1)
+#define	MAXBMASK	(~MAXBOFFSET)
+
+/*
+ * SMAP_HASHAVELEN is the average length desired for this chain, from
+ * which the size of the smd_hash table is derived at segment create time.
+ * SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the
+ * approximate size of a vnode struct.
+ */
+#define	SMAP_HASHAVELEN		4
+#define	SMAP_HASHVPSHIFT	6
+
+#define	SMAP_HASHFUNC(smd, vp, off) \
+	((((off) >> MAXBSHIFT) + ((int)(vp) >> SMAP_HASHVPSHIFT)) & \
+		((smd)->smd_hashsz - 1))
+
+#ifdef KERNEL
+int	segmap_create(/* seg, argsp */);
+
+/*
+ * Special seg_map segment operations
+ */
+void	segmap_pagecreate(/* seg, addr, len, softlock */);
+addr_t	segmap_getmap(/* seg, vp, off */);
+int	segmap_release(/* seg, addr, flags */);
+
+extern struct seg *segkmap;	/* the kernel generic mapping segment */
+extern struct seg_ops segmap_ops;
+#endif KERNEL
+
+#endif /*!_vm_seg_map_h*/
--- a/sys/vm/seg_u.c
+++ b/sys/vm/seg_u.c
@@ -0,0 +1,871 @@
+/*	@(#)seg_u.c	1.1 94/10/31	SMI	*/
+
+/*
+ * Copyright (c) 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - u-area segment routines
+ *
+ * XXX:	This segment type should probably be recast as seg_stack
+ *	instead of seg_u.  As the system evolves, we'll need to
+ *	manage variable-sized stacks protected by red zones, some
+ *	of which possibly are accompanied by u-areas.  For the moment
+ *	the implementation copes only with "standard" u-areas,
+ *	each with an embedded stack.  Doing so lets the implementation
+ *	get away with much simpler space management code.
+ *
+ * Desired model:
+ *	segu_data describes nproc u-areas and the segment ops
+ *	manipulate individual slots in segu_data, so that (e.g.)
+ *	copying a u-area upon process creation turns into
+ *	transcribing parts of segu_data from one place to another.
+ *
+ * Red zone handling:
+ *	The implementation maintains the invariant that the MMU mappings
+ *	for unallocated slots are invalid.  This means that red zones
+ *	come for free simply by avoiding establishing mappings over all
+ *	red zone pages and by making sure that all mappings are invalidated
+ *	at segu_release time.
+ *
+ *	Note also that we need neither pages nor swap space for red zones,
+ *	so much of the code works over extents of SEGU_PAGES-1 instead
+ *	of SEGU_PAGES.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/buf.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+#include <sys/kmem_alloc.h>
+#include <sys/proc.h>	/* needed for debugging printouts only */
+#include <sys/vmmeter.h>
+
+#include <vm/anon.h>
+#include <vm/rm.h>
+#include <vm/page.h>
+#include <vm/seg.h>
+#include <vm/seg_u.h>
+#include <vm/swap.h>
+#include <vm/hat.h>
+
+/*
+ * Ugliness to compensate for some machine dependency.
+ */
+#ifdef	i386bug
+#define	UPAGE_PROT	(PROT_READ | PROT_USER)
+#else	i386bug
+#define	UPAGE_PROT	(PROT_READ | PROT_WRITE)
+#endif	i386bug
+
+int	segu_debug = 0;		/* patchable for debugging */
+
+/*
+ * Private seg op routines.
+ *
+ * The swapout operation is null because the generic swapout code
+ * never attempts to swap out anything in the kernel's address
+ * space.  Instead, clients swap the resources this driver manages
+ * by calling segu_fault with a type argument of F_SOFTLOCK to swap
+ * a slot in and with F_SOFTUNLOCK to swap one out.
+ */
+static	int segu_checkprot(/* seg, vaddr, len, prot */);
+static	int segu_kluster(/* seg, vaddr, delta */);
+static	int segu_badop();
+
+struct	seg_ops segu_ops = {
+	segu_badop,		/* dup */
+	segu_badop,		/* unmap */
+	segu_badop,		/* free */
+	segu_fault,
+	segu_badop,		/* faulta */
+	(int (*)()) NULL,	/* unload */
+	segu_badop,		/* setprot */
+	segu_checkprot,
+	segu_kluster,
+	(u_int (*)()) NULL,	/* swapout */
+	segu_badop,		/* sync */
+	segu_badop,		/* incore */
+	segu_badop,		/* lockop */
+	segu_badop,		/* advise */
+};
+
+/*
+ * Declarations of private routines for use by seg_u operations.
+ */
+static	int segu_getslot(/* seg, vaddr, len */);
+static	int segu_softunlock(/* seg, vaddr, len, slot */);
+static	int segu_softload(/* seg, vaddr, len, slot, lock */);
+
+struct seg	*segu;
+
+/*
+ * XXX:	Global change needed -- set up MMU translations before
+ *	keeping pages.
+ */
+
+static
+segu_badop()
+{
+
+	panic("seg_badop");
+	/* NOTREACHED */
+}
+
+/*
+ * Handle a fault on an address corresponding to one of the
+ * slots in the segu segment.
+ */
+faultcode_t
+segu_fault(seg, vaddr, len, type, rw)
+	struct seg	*seg;
+	addr_t		vaddr;
+	u_int		len;
+	enum fault_type	type;
+	enum seg_rw	rw;
+{
+	struct segu_segdata	*sdp = (struct segu_segdata *)seg->s_data;
+	struct segu_data	*sup;
+	int			slot;
+	addr_t			vbase;
+	int			err;
+
+	/*
+	 * Sanity checks.
+	 */
+	if (seg != segu)
+		panic("segu_fault: wrong segment");
+	if (type == F_PROT)
+		panic("segu_fault: unexpected F_PROT fault");
+
+	/*
+	 * Verify that the range specified by vaddr and len falls
+	 * completely within the mapped part of a single allocated
+	 * slot, calculating the slot index and slot pointer while
+	 * we're at it.
+	 */
+	slot = segu_getslot(seg, vaddr, len);
+	if (slot == -1)
+		return (FC_MAKE_ERR(EFAULT));
+	sup = &sdp->usd_slots[slot];
+
+	vbase = seg->s_base + ptob(SEGU_PAGES) * slot;
+
+	/*
+	 * The F_SOFTLOCK and F_SOFTUNLOCK cases have more stringent
+	 * range requirements: the given range must exactly coincide
+	 * with the slot's mapped portion.
+	 */
+	if (type == F_SOFTLOCK || type == F_SOFTUNLOCK) {
+		if (vaddr != segu_stom(vbase) || len != ptob(SEGU_PAGES - 1))
+			return (FC_MAKE_ERR(EFAULT));
+	}
+
+	if (type == F_SOFTLOCK) {
+		/*
+		 * Somebody is trying to lock down this slot, e.g., as
+		 * part of swapping in a u-area contained in the slot.
+		 */
+
+		/*
+		 * It is erroneous to attempt to lock when already locked.
+		 *
+		 * XXX:	Possibly this shouldn't be a panic.  It depends
+		 *	on what assumptions we're willing to let clients
+		 *	make.
+		 */
+		if (sup->su_flags & SEGU_LOCKED)
+			panic("segu_fault: locking locked slot");
+
+		err = segu_softload(seg, segu_stom(vbase),
+				ptob(SEGU_PAGES - 1), slot, 1);
+		if (err)
+			return (FC_MAKE_ERR(err));
+
+		sup->su_flags |= SEGU_LOCKED;
+		return (0);
+	}
+
+	if (type == F_INVAL) {
+		/*
+		 * Normal fault. The processing required
+		 * is quite similar to that for the F_SOFTLOCK case in that
+		 * we have to drag stuff in and make sure it's mapped.  It
+		 * differs in that we don't lock it down.
+		 */
+
+		if (segu_debug)
+			printf("segu_fault(%x, %x, %d)\n", vaddr, len, type);
+
+		/*
+		 * If the slot is already locked, the only way we
+		 * should fault is by referencing the red zone.
+		 *
+		 * XXX:	Probably should tighten this check and verify
+		 *	that it's really a red zone reference.
+		 * XXX:	Is this the most appropriate error code?
+		 */
+		if (sup->su_flags & SEGU_LOCKED)
+			return (FC_MAKE_ERR(EINVAL));
+
+		err = segu_softload(seg, vaddr, len, slot, 0);
+		return (err ? FC_MAKE_ERR(err) : 0);
+	}
+
+	if (type == F_SOFTUNLOCK) {
+		/*
+		 * Somebody is trying to swap out this slot, e.g., as
+		 * part of swapping out a u-area contained in this slot.
+		 */
+
+		/*
+		 * It is erroneous to attempt to unlock when not
+		 * currently locked.
+		 */
+		if (!(sup->su_flags & SEGU_LOCKED))
+			panic("segu_fault: unlocking unlocked slot");
+		sup->su_flags &= ~SEGU_LOCKED;
+
+		err = segu_softunlock(seg, vaddr, len, slot, rw);
+		return (err ? FC_MAKE_ERR(err) : 0);
+	}
+
+	panic("segu_fault: bogus fault type");
+	/* NOTREACHED */
+}
+
+/*
+ * Check that the given protections suffice over the range specified by
+ * vaddr and len.  For this segment type, the only issue is whether or
+ * not the range lies completely within the mapped part of an allocated slot.
+ *
+ * We let segu_getslot do all the dirty work.
+ */
+/* ARGSUSED */
+static int
+segu_checkprot(seg, vaddr, len, prot)
+	struct seg	*seg;
+	addr_t		vaddr;
+	u_int		len;
+	u_int		prot;
+{
+	register int	slot = segu_getslot(seg, vaddr, len);
+
+	return (slot == -1 ? -1 : 0);
+}
+
+/*
+ * Check to see if it makes sense to do kluster/read ahead to
+ * addr + delta relative to the mapping at addr.  We assume here
+ * that delta is a signed PAGESIZE'd multiple (which can be negative).
+ *
+ * For seg_u we always "approve" of this action from our standpoint.
+ */
+/* ARGSUSED */
+static int
+segu_kluster(seg, addr, delta)
+	struct seg	*seg;
+	addr_t		addr;
+	int		delta;
+{
+	return (0);
+}
+
+
+/*
+ * Segment operations specific to the seg_u segment type.
+ */
+
+/*
+ * Finish creating the segu segment by setting up its private state
+ * information.  Called once at boot time after segu has been allocated
+ * and hooked into the kernel address space.
+ *
+ * Note that we have no need for the argsp argument, since everything
+ * we need to set up our private information is contained in the common
+ * segment information.  (This may change at such time as we generalize
+ * the implementation to deal with variable size allocation units.)
+ */
+/* ARGSUSED */
+int
+segu_create(seg, argsp)
+	register struct seg	*seg;
+	caddr_t			argsp;
+{
+	register u_int			numslots;
+	register int			i;
+	register struct segu_segdata	*sdp;
+
+	/*
+	 * Trim the segment's size down to the largest multiple of
+	 * SEGU_PAGES that's no larger than the original value.
+	 *
+	 * XXX:	Does it matter that we're discarding virtual address
+	 *	space off the end with no record of how much there was?
+	 */
+	numslots = seg->s_size / ptob(SEGU_PAGES);
+	seg->s_size = numslots * ptob(SEGU_PAGES);
+
+	/*
+	 * Allocate segment-specific information.
+	 */
+	seg->s_data = new_kmem_alloc(sizeof (struct segu_segdata), KMEM_SLEEP);
+	sdp = (struct segu_segdata *)seg->s_data;
+
+	/*
+	 * Allocate the slot array.
+	 */
+	sdp->usd_slots = (struct segu_data *)new_kmem_alloc(
+			numslots * sizeof (struct segu_data), KMEM_SLEEP);
+
+	/*
+	 * Set up the slot free list, marking each slot as unallocated.
+	 * Note that the list must be sorted in ascending address order.
+	 */
+	sdp->usd_slots[0].su_flags = 0;
+	for (i = 1; i < numslots; i++) {
+		sdp->usd_slots[i - 1].su_next = &sdp->usd_slots[i];
+		sdp->usd_slots[i].su_flags = 0;
+	}
+	sdp->usd_slots[numslots - 1].su_next = NULL;
+	sdp->usd_free = sdp->usd_slots;
+
+	seg->s_ops = &segu_ops;
+	return (0);
+}
+
+/*
+ * Allocate resources for a single slot.
+ *
+ * When used for u-area, called at process creation time.
+ */
+addr_t
+segu_get()
+{
+	struct segu_segdata	*sdp = (struct segu_segdata *)segu->s_data;
+	struct page		*pp;
+	addr_t			vbase;
+	addr_t			va;
+	struct segu_data	*sup;
+	int			slot;
+	int			i;
+
+	/*
+	 * Allocate virtual space.  This amounts to grabbing a free slot.
+	 */
+	if ((sup = sdp->usd_free) == NULL)
+		return (NULL);
+	sdp->usd_free = sup->su_next;
+	slot = sup - sdp->usd_slots;
+
+	vbase = segu->s_base + ptob(SEGU_PAGES) * slot;
+
+	/*
+	 * If this slot has anon resources left over from its last use, free
+	 * them.  (Normally, segu_release will have cleaned up; however, i/o
+	 * in progress at the time of the call prevents it from doing so.)
+	 */
+	if (sup->su_flags & SEGU_HASANON) {
+		anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
+		anon_unresv(ptob(SEGU_PAGES - 1));
+		sup->su_flags &= ~SEGU_HASANON;
+	}
+
+	/*
+	 * Reserve sufficient swap space for this slot.  We'll
+	 * actually allocate it in the loop below, but reserving it
+	 * here allows us to back out more gracefully than if we
+	 * had an allocation failure in the body of the loop.
+	 *
+	 * Note that we don't need swap space for the red zone page.
+	 */
+	if (anon_resv(ptob(SEGU_PAGES - 1)) == 0) {
+		if (segu_debug)
+			printf("segu_get: no swap space available\n");
+		sup->su_next = sdp->usd_free;
+		sdp->usd_free = sup;
+		return (NULL);
+	}
+
+	/*
+	 * Allocate pages, avoiding allocating one for the red zone.
+	 */
+	pp = rm_allocpage(segu, segu_stom(vbase), ptob(SEGU_PAGES - 1), 1);
+	if (pp == NULL) {
+		if (segu_debug)
+			printf("segu_get: no pages available\n");
+		/*
+		 * Give back the resources we've acquired.
+		 */
+		anon_unresv(ptob(SEGU_PAGES - 1));
+		sup->su_next = sdp->usd_free;
+		sdp->usd_free = sup;
+		return (NULL);
+	}
+
+	/*
+	 * Allocate swap space.
+	 *
+	 * Because the interface for getting swap slots is designed
+	 * to handle only one page at a time, we must deal with each
+	 * page in the u-area individually instead of allocating a
+	 * contiguous chunk of swap space for the whole thing as we
+	 * would prefer.
+	 *
+	 * This being the case, we actually do more in this loop than
+	 * simply allocate swap space.  As we handle each page, we
+	 * complete its setup.
+	 */
+	for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
+		register struct anon	*ap;
+		struct vnode		*vp;
+		u_int			off;
+		struct	page		*opp;
+
+		/*
+		 * If this page is the red zone page, we don't need swap
+		 * space for it.  Note that we skip over the code that
+		 * establishes MMU mappings, so that the page remains
+		 * invalid.
+		 */
+		if (i == SEGU_REDZONE) {
+			sup->su_swaddr[i] = NULL;
+			continue;
+		}
+
+		/*
+		 * Sanity check.
+		 */
+		if (pp == NULL)
+			panic("segu_get: not enough pages");
+
+		/*
+		 * Get a swap slot.
+		 */
+		if ((ap = anon_alloc()) == NULL)
+			panic("segu_get: swap allocation failure");
+		sup->su_swaddr[i] = ap;
+
+		/*
+		 * Tie the next page to the swap slot.
+		 */
+		swap_xlate(ap, &vp, &off);
+		while (page_enter(pp, vp, off)) {
+			/*
+			 * The page was already tied to something
+			 * else that we have no record of.  Since
+			 * the page we wish be named by <vp, off>
+			 * already exists, we abort the old page.
+			 */
+			struct page	*p1 = page_find(vp, off);
+
+			if (p1 != NULL) {
+				page_wait(p1);
+				if (p1->p_vnode == vp && p1->p_offset == off)
+					page_abort(p1);
+			}
+		}
+
+		/*
+		 * Page_enter has set the page's lock bit.  Since it's
+		 * kept as well, this is just a nuisance.
+		 */
+		page_unlock(pp);
+
+		/*
+		 * Mark the page for long term keep and release the
+		 * short term claim that rm_allocpage established.
+		 *
+		 * XXX:	When page_pp_lock returns a success/failure
+		 *	indication, we'll probably want to panic if
+		 *	it fails.
+		 */
+		(void) page_pp_lock(pp, 0, 1);
+
+		/*
+		 * Load and lock an MMU translation for the page.
+		 */
+		hat_memload(segu, va, pp, UPAGE_PROT, 1);
+
+		/*
+		 * Prepare to use the next page.
+		 */
+		opp = pp;
+		page_sub(&pp, pp);
+		PAGE_RELE(opp);
+	}
+
+	/*
+	 * Finally, mark this slot as allocated, locked, and in posession
+	 * of anon resources.
+	 */
+	sup->su_flags = SEGU_ALLOCATED | SEGU_LOCKED | SEGU_HASANON;
+
+	/*
+	 * Return the address of the base of the mapped part of
+	 * the slot.
+	 */
+	return (segu_stom(vbase));
+}
+
+/*
+ * Reclaim resources for a single slot.
+ *
+ * When used for u-area, called at process destruction time.  Guaranteed not
+ * to sleep, so that it can be called while running on the interrupt stack.
+ *
+ * N.B.: Since this routine deallocates all of the slot's resources,
+ * callers can't count on the resources remaining accessible.  In
+ * particular, any stack contained in the slot will vanish, so we'd
+ * better not be running on that stack.
+ *
+ * N.B.: Since the routine can't sleep, it must defer deallocation of anon
+ * resources associated with pages that have i/o in progress.  (Anon_decref
+ * calls page_abort, which will sleep until the i/o is complete.)
+ *
+ * We can't simply undo everything that segu_get did directly,
+ * because someone else may have acquired a reference to one or
+ * more of the associated pages in the meantime.
+ */
+void
+segu_release(vaddr)
+	addr_t	vaddr;
+{
+	struct segu_segdata	*sdp = (struct segu_segdata *)segu->s_data;
+	addr_t			vbase = segu_mtos(vaddr);
+	addr_t			va;
+	struct segu_data	*sup;
+	struct segu_data	**supp;
+	int			slot;
+	int			i;
+	int			doing_io = 0;
+	register int		locked;
+
+	/*
+	 * Get the slot corresponding to this virtual address.
+	 */
+	if ((slot = segu_getslot(segu, vaddr, 1)) == -1)
+		panic("segu_release: bad addr");
+	sup = &sdp->usd_slots[slot];
+
+	/*
+	 * XXX:	Do we need to lock this slot's pages while we're
+	 *	messing with them?  What can happen once we decrement
+	 *	the keep count below?
+	 */
+
+	/*
+	 * Examine the slot's pages looking for i/o in progress.
+	 * While doing so, undo locks.
+	 */
+	locked = sup->su_flags & SEGU_LOCKED;
+	for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
+		register struct page	*pp;
+		struct vnode		*vp;
+		u_int			off;
+		register int		s;
+
+		if (i == SEGU_REDZONE)
+			continue;
+
+		if (locked)
+			hat_unlock(segu, va);
+
+		/*
+		 * Find the page associated with this part of the
+		 * slot, tracking it down through its associated swap
+		 * space.
+		 */
+		swap_xlate(sup->su_swaddr[i], &vp, &off);
+
+		/*
+		 * Prevent page status from changing.
+		 */
+		s = splvm();
+
+		if ((pp = page_exists(vp, off)) == NULL) {
+			/*
+			 * The page no longer exists; this is fine
+			 * unless we had it locked.
+			 */
+			if (locked)
+				panic("segu_release: missing locked page");
+			else
+				continue;
+		}
+
+		/*
+		 * See whether the page is quiescent.
+		 */
+		if (pp->p_keepcnt != 0)
+			doing_io = 1;
+
+		/*
+		 * Make this page available to vultures.
+		 */
+		if (locked)
+			page_pp_unlock(pp, 0);
+
+		(void) splx(s);
+	}
+
+	/*
+	 * Unload the mmu translations for this slot.
+	 */
+	hat_unload(segu, vaddr, ptob(SEGU_PAGES - 1));
+
+	/*
+	 * Provided that all of the pages controlled by this segment are
+	 * quiescent, release our claim on the associated anon resources and
+	 * swap space.
+	 */
+	if (!doing_io) {
+		anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
+		anon_unresv(ptob(SEGU_PAGES - 1));
+		sup->su_flags &= ~SEGU_HASANON;
+	} else
+		sup->su_flags |= SEGU_HASANON;
+
+	/*
+	 * Mark the slot as unallocated and unlocked and put it back on the
+	 * free list.  Keep the free list sorted by slot address, to minimize
+	 * fragmentation of seg_u's virtual address range.  (This makes a
+	 * difference on some architectures; e.g., by making it possible to
+	 * use fewer page table entries.)  This code counts on the slot
+	 * address being a monotonically increasing function of indices of
+	 * entries in the usd_slots array.
+	 */
+	sup->su_flags &= ~(SEGU_ALLOCATED|SEGU_LOCKED);
+	for (supp = &sdp->usd_free; *supp != NULL && *supp < sup;
+	    supp = &(*supp)->su_next)
+		continue;
+	sup->su_next = *supp;
+	*supp = sup;
+}
+
+
+/*
+ * Private routines for use by seg_u operations.
+ */
+
+/*
+ * Verify that the range designated by vaddr and len lies completely
+ * within the mapped part of a single allocated slot.  If so, return
+ * the slot's index; otherwise return -1.
+ */
+static int
+segu_getslot(seg, vaddr, len)
+	register struct seg	*seg;
+	addr_t			vaddr;
+	u_int			len;
+{
+	register int			slot;
+	register struct segu_segdata	*sdp;
+	register struct segu_data	*sup;
+	addr_t				vlast;
+	addr_t				vmappedbase;
+
+	sdp = (struct segu_segdata *)seg->s_data;
+
+	/*
+	 * Make sure the base is in range of the segment as a whole.
+	 */
+	if (vaddr < seg->s_base || vaddr >= seg->s_base + seg->s_size)
+		return (-1);
+
+	/*
+	 * Figure out what slot the address lies in.
+	 */
+	slot = (vaddr - seg->s_base) / ptob(SEGU_PAGES);
+	sup = &sdp->usd_slots[slot];
+
+	/*
+	 * Make sure the end of the range falls in the same slot.
+	 */
+	vlast = vaddr + len - 1;
+	if ((vlast - seg->s_base) / ptob(SEGU_PAGES) != slot)
+		return (-1);
+
+	/*
+	 * Nobody has any business touching this slot if it's not currently
+	 * allocated.
+	 */
+	if (!(sup->su_flags & SEGU_ALLOCATED))
+		return (-1);
+
+	/*
+	 * Finally, verify that the range is completely in the mapped part
+	 * of the slot.
+	 */
+	vmappedbase = segu_stom(seg->s_base + ptob(SEGU_PAGES) * slot);
+	if (vaddr < vmappedbase || vlast >= vmappedbase + ptob(SEGU_PAGES - 1))
+		return (-1);
+
+	return (slot);
+}
+
+/*
+ * Unlock intra-slot resources in the range given by vaddr and len.
+ * Assumes that the range is known to fall entirely within the mapped
+ * part of the slot given as argument and that the slot itself is
+ * allocated.
+ */
+static int
+segu_softunlock(seg, vaddr, len, slot, rw)
+	struct seg	*seg;
+	addr_t		vaddr;
+	u_int		len;
+	int		slot;
+	enum seg_rw	rw;
+{
+	struct segu_segdata	*sdp = (struct segu_segdata *)segu->s_data;
+	register struct segu_data
+				*sup = &sdp->usd_slots[slot];
+	register addr_t		va;
+	addr_t			vlim;
+	register u_int		i;
+
+	/*
+	 * Loop through the pages in the given range.
+	 */
+	va = (addr_t)((u_int)vaddr & PAGEMASK);
+	len = roundup(len, ptob(1));
+	vlim = va + len;
+	/* Calculate starting page index within slot. */
+	i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
+	for ( ; va < vlim; va += ptob(1), i++) {
+		register struct page	*pp;
+		struct vnode		*vp;
+		u_int			off;
+
+		/*
+		 * Unlock our MMU translation for this page.
+		 *
+		 * XXX:	Is there any problem with attempting to unlock
+		 *	a translation that isn't locked?
+		 */
+		hat_unlock(seg, va);
+
+		/*
+		 * Unload it.
+		 */
+		hat_unload(seg, va, ptob(1));
+
+		/*
+		 * Find the page associated with this part of the
+		 * slot, tracking it down through its associated swap
+		 * space.
+		 */
+		swap_xlate(sup->su_swaddr[i], &vp, &off);
+		if ((pp = page_find(vp, off)) == NULL)
+			panic("segu_softunlock: missing page");
+
+		/*
+		 * Release our long-term claim on the page.
+		 */
+		page_pp_unlock(pp, 0);
+
+		/*
+		 * If we're "hard" swapping (i.e. we need pages) and
+		 * nobody's using the page any more and it's dirty,
+		 * unlocked, and not kept, push it asynchronously rather
+		 * than waiting for the pageout daemon to find it.
+		 */
+		hat_pagesync(pp);
+		if (rw == S_WRITE && pp->p_mapping == NULL &&
+		    pp->p_keepcnt == 0 && !pp->p_lock && pp->p_mod) {
+			/*
+			 * XXX:	Want most powerful credentials we can
+			 *	get.  Punt for now.
+			 */
+			(void) VOP_PUTPAGE(vp, off, ptob(1), B_ASYNC | B_FREE,
+				(struct ucred *)NULL);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Load and possibly lock intra-slot resources in the range given
+ * by vaddr and len.  Assumes that the range is known to fall entirely
+ * within the mapped part of the slot given as argument and that the
+ * slot itself is allocated.
+ */
+static int
+segu_softload(seg, vaddr, len, slot, lock)
+	struct seg	*seg;
+	addr_t		vaddr;
+	u_int		len;
+	int		slot;
+	int		lock;
+{
+	struct segu_segdata	*sdp = (struct segu_segdata *)segu->s_data;
+	register struct segu_data
+				*sup = &sdp->usd_slots[slot];
+	register addr_t		va;
+	addr_t			vlim;
+	register u_int		i;
+
+	/*
+	 * Loop through the pages in the given range.
+	 */
+	va = (addr_t)((u_int)vaddr & PAGEMASK);
+	vaddr = va;
+	len = roundup(len, ptob(1));
+	vlim = va + len;
+	/* Calculate starting page index within slot. */
+	i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
+	for ( ; va < vlim; va += ptob(1), i++) {
+		struct page	*pl[2];
+		struct vnode	*vp;
+		u_int		off;
+		register int	err;
+
+		/*
+		 * Summon the page.  If it's not resident, arrange
+		 * for synchronous i/o to pull it in.
+		 *
+		 * XXX:	Need read credentials value; for now we punt.
+		 */
+		swap_xlate(sup->su_swaddr[i], &vp, &off);
+		err = VOP_GETPAGE(vp, off, ptob(1), (u_int *)NULL,
+			pl, ptob(1), seg, va, S_READ, (struct ucred *)NULL);
+		if (err) {
+			/*
+			 * Back out of what we've done so far.
+			 */
+			(void) segu_softunlock(seg, vaddr, (u_int)(va - vaddr),
+			    slot, S_OTHER);
+			return (err);
+		}
+		cnt.v_swpin++;
+		/*
+		 * The returned page list will have exactly one entry,
+		 * which is returned to us already kept.
+		 */
+
+		/*
+		 * Load an MMU translation for the page.
+		 */
+		hat_memload(seg, va, pl[0], UPAGE_PROT, lock);
+
+		/*
+		 * If we're locking down resources, we need to increment
+		 * the page's long term keep count.  In any event, we
+		 * need to decrement the (short term) keep count.
+		 *
+		 * XXX:	When page_pp_lock returns a success/failure
+		 *	indication, we'll probably want to panic if
+		 *	it fails.
+		 */
+		if (lock)
+			(void) page_pp_lock(pl[0], 0, 1);
+		PAGE_RELE(pl[0]);
+	}
+
+	return (0);
+}
--- a/sys/vm/seg_u.h
+++ b/sys/vm/seg_u.h
@@ -0,0 +1,130 @@
+/*	@(#)seg_u.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - U-area segment management
+ *
+ * This file contains definitions related to the u-area segment type.
+ *
+ * In its most general form, this segment type provides an interface
+ * for managing stacks that are protected by red zones, with the size
+ * of each stack independently specifiable.  The current implementation
+ * is restricted in the following way.
+ * 1)	It assumes that all stacks are the same size.  In particular,
+ *	it assumes that the stacks it manages are actually traditional
+ *	u-areas, each containing a stack at one end.
+ *
+ * The segment driver manages a contiguous chunk of virtual space,
+ * carving it up into individual stack instances as required, and
+ * associating physical storage, MMU mappings, and swap space with
+ * each individual stack instance.
+ *
+ * As a matter of nomenclature, the individual allocation units are
+ * referred to as "slots".
+ */
+
+#ifndef _vm_seg_u_h
+#define	_vm_seg_u_h
+
+/*
+ * The number of pages covered by a single seg_u slot.
+ *
+ * This value is the number of (software) pages in the u-area
+ * (including the stack in the u-area) plus an additional page
+ * for a stack red zone.  If the seg_u implementation is ever
+ * generalized to allow variable-size stack allocation, this
+ * define will have to change.
+ */
+#define	SEGU_PAGES	(UPAGES/CLSIZE + 1)
+
+/*
+ * XXX:	This define belongs elsewhere, probably in <machine/param.h>.
+ */
+#define	STACK_GROWTH_DOWN
+
+
+/*
+ * Index of the red zone page and macros for interconverting between
+ * the base address of a slot and the base address of its accessible
+ * portion.  (Nomenclature: Slot TO Mapped and vice versa.)
+ */
+#ifdef	STACK_GROWTH_DOWN
+
+#define	SEGU_REDZONE	0
+#define	segu_stom(v)	((v) + ptob(1))
+#define	segu_mtos(v)	((v) - ptob(1))
+
+#else	STACK_GROWTH_DOWN
+
+#define	SEGU_REDZONE	(SEGU_PAGES - 1)
+#define	segu_stom(v)	(v)
+#define	segu_mtos(v)	(v)
+
+#endif	STACK_GROWTH_DOWN
+
+
+/*
+ * Private information per overall segu segment (as opposed
+ * to per slot within segment)
+ *
+ * XXX:	We may wish to modify the free list to handle it as a queue
+ *	instead of a stack; this possibly could reduce the frequency
+ *	of cache flushes.  If so, we would need a list tail pointer
+ *	as well as a list head pointer.
+ */
+struct segu_segdata {
+	/*
+	 * info needed:
+	 *	- slot vacancy info
+	 *	- a way of getting to state info for each slot
+	 */
+	struct	segu_data *usd_slots;	/* array of segu_data structs, */
+					/*   one per slot */
+	struct	segu_data *usd_free;	/* slot free list head */
+};
+
+/*
+ * Private per-slot information.
+ */
+struct segu_data {
+	struct	segu_data *su_next;		/* free list link */
+	struct	anon *su_swaddr[SEGU_PAGES];	/* disk address of u area */
+						/*   when swapped */
+	u_int	su_flags;			/* state info: see below */
+};
+
+/*
+ * Flag bits
+ *
+ * When the SEGU_LOCKED bit is set, all the resources associated with the
+ * corresponding slot are locked in place, so that referencing addresses
+ * in the slot's range will not cause a fault.  Clients using this driver
+ * to manage a u-area lock down the slot when the corresponding process
+ * becomes runnable and unlock it when the process is swapped out.
+ */
+#define	SEGU_ALLOCATED	0x01		/* slot is in use */
+#define	SEGU_LOCKED	0x02		/* slot's resources locked */
+#define	SEGU_HASANON	0x04		/* slot has anon resources */
+
+
+#ifdef	KERNEL
+extern struct seg	*segu;
+
+/*
+ * Public routine declarations not part of the segment ops vector go here.
+ */
+int	segu_create(/* seg, argsp */);
+addr_t	segu_get();
+void	segu_release(/* vaddr */);
+
+/*
+ * We allow explicit calls to segu_fault, even though it's part
+ * of the segu ops vector.
+ */
+faultcode_t	segu_fault(/* seg, vaddr, len, type, rw */);
+#endif	KERNEL
+
+#endif /*!_vm_seg_u_h*/
--- a/sys/vm/seg_vn.c
+++ b/sys/vm/seg_vn.c
--- a/sys/vm/seg_vn.h
+++ b/sys/vm/seg_vn.h
@@ -0,0 +1,108 @@
+/*	@(#)seg_vn.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_seg_vn_h
+#define	_vm_seg_vn_h
+
+#include <vm/mp.h>
+
+/*
+ * Structure who's pointer is passed to the segvn_create routine
+ */
+struct segvn_crargs {
+	struct	vnode *vp;	/* vnode mapped from */
+	u_int	offset;		/* starting offset of vnode for mapping */
+	struct	ucred *cred;	/* creditials */
+	u_char	type;		/* type of sharing done */
+	u_char	prot;		/* protections */
+	u_char	maxprot;	/* maximum protections */
+	struct	anon_map *amp;	/* anon mapping to map to */
+};
+
+/*
+ * The anon_map structure is used by the seg_vn driver to manage
+ * unnamed (anonymous) memory.   When anonymous memory is shared,
+ * then the different segvn_data structures will point to the
+ * same anon_map structure.  Also, if a segment is unmapped
+ * in the middle where an anon_map structure exists, the
+ * newly created segment will also share the anon_map structure,
+ * although the two segments will use different ranges of the
+ * anon array.  When mappings are private (or shared with
+ * a reference count of 1), an unmap operation will free up
+ * a range of anon slots in the array given by the anon_map
+ * structure.  Because of fragmentation due to this unmapping,
+ * we have to store the size of anon array in the anon_map
+ * structure so that we can free everything when the referernce
+ * count goes to zero.
+ */
+struct anon_map {
+	u_int	refcnt;		/* reference count on this structure */
+	u_int	size;		/* size in bytes mapped by the anon array */
+	struct	anon **anon;	/* pointer to an array of anon * pointers */
+	u_int	swresv;		/* swap space reserved for this anon_map */
+	u_int	flags;		/* anon_map flags (see below) */
+};
+
+/* anon_map flags */
+#define	AMAP_LOCKED	0x01	/* anon_map is locked */
+#define	AMAP_WANT	0x02	/* some process waiting on lock */
+
+/*
+ * Lock and unlock anon_map if the segment has private pages.  This
+ * is necessary to ensure that operations on the anon array (e.g., growing
+ * the array, or allocating an anon slot and assigning a page) are atomic.
+ */
+#define	AMAP_LOCK(amp) { \
+	while ((amp)->flags & AMAP_LOCKED) { \
+		(amp)->flags |= AMAP_WANT; \
+		(void) sleep((caddr_t)(amp), PAMAP); \
+	} \
+	(amp)->flags |= AMAP_LOCKED; \
+	masterprocp->p_swlocks++; \
+}
+
+#define	AMAP_UNLOCK(amp) { \
+	(amp)->flags &= ~AMAP_LOCKED; \
+	masterprocp->p_swlocks--; \
+	if ((amp)->flags & AMAP_WANT) { \
+		(amp)->flags &= ~AMAP_WANT; \
+		wakeup((caddr_t)(amp)); \
+	} \
+}
+
+/*
+ * (Semi) private data maintained by the seg_vn driver per segment mapping
+ */
+struct	segvn_data {
+	kmon_t	lock;
+	u_char	pageprot;	/* true if per page protections present */
+	u_char	prot;		/* current segment prot if pageprot == 0 */
+	u_char	maxprot;	/* maximum segment protections */
+	u_char	type;		/* type of sharing done */
+	struct	vnode *vp;	/* vnode that segment mapping is to */
+	u_int	offset;		/* starting offset of vnode for mapping */
+	u_int	anon_index;	/* starting index into anon_map anon array */
+	struct	anon_map *amp;	/* pointer to anon share structure, if needed */
+	struct	vpage *vpage;	/* per-page information, if needed */
+	struct	ucred *cred;	/* mapping creditials */
+	u_int	swresv;		/* swap space reserved for this segment */
+	u_char	advice;		/* madvise flags for segment */
+	u_char	pageadvice;	/* true if per page advice set */
+};
+
+#ifdef KERNEL
+int	segvn_create(/* seg, argsp */);
+
+extern	struct seg_ops segvn_ops;
+
+/*
+ * Provided as short hand for creating user zfod segments
+ */
+extern	caddr_t zfod_argsp;
+extern	caddr_t kzfod_argsp;
+#endif KERNEL
+
+#endif /*!_vm_seg_vn_h*/
--- a/sys/vm/swap.h
+++ b/sys/vm/swap.h
@@ -0,0 +1,35 @@
+/*	@(#)swap.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_swap_h
+#define	_vm_swap_h
+
+/*
+ * VM - virtual swap device.
+ */
+
+struct	swapinfo {
+	struct	vnode *si_vp;		/* vnode for this swap device */
+	u_int	si_size;		/* size (bytes) of this swap device */
+	struct	anon *si_anon;		/* pointer to anon array */
+	struct	anon *si_eanon;		/* pointer to end of anon array */
+	struct	anon *si_free;		/* anon free list for this vp */
+	int	si_allocs;		/* # of conseq. allocs from this area */
+	struct	swapinfo *si_next;	/* next swap area */
+	short	*si_pid;		/* parallel pid array for memory tool */
+};
+
+#define	IS_SWAPVP(vp)	(((vp)->v_flag & VISSWAP) != 0)
+
+#ifdef KERNEL
+int	swap_init(/* vp */);
+struct	anon *swap_alloc();
+void	swap_free(/* ap */);
+void	swap_xlate(/* ap, vpp, offsetp */);
+struct	anon *swap_anon(/* vp, offset */);
+#endif
+
+#endif /*!_vm_swap_h*/
--- a/sys/vm/vm_anon.c
+++ b/sys/vm/vm_anon.c
@@ -0,0 +1,509 @@
+/*	@(#)vm_anon.c	1.1 94/10/31	SMI	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - anonymous pages.
+ *
+ * This layer sits immediately above the vm_swap layer.  It manages
+ * physical pages that have no permanent identity in the file system
+ * name space, using the services of the vm_swap layer to allocate
+ * backing storage for these pages.  Since these pages have no external
+ * identity, they are discarded when the last reference is removed.
+ *
+ * An important function of this layer is to manage low-level sharing
+ * of pages that are logically distinct but that happen to be
+ * physically identical (e.g., the corresponding pages of the processes
+ * resulting from a fork before one process or the other changes their
+ * contents).  This pseudo-sharing is present only as an optimization
+ * and is not to be confused with true sharing in which multiple
+ * address spaces deliberately contain references to the same object;
+ * such sharing is managed at a higher level.
+ *
+ * The key data structure here is the anon struct, which contains a
+ * reference count for its associated physical page and a hint about
+ * the identity of that page.  Anon structs typically live in arrays,
+ * with an instance's position in its array determining where the
+ * corresponding backing storage is allocated; however, the swap_xlate()
+ * routine abstracts away this representation information so that the
+ * rest of the anon layer need not know it.  (See the swap layer for
+ * more details on anon struct layout.)
+ *
+ * In the future versions of the system, the association between an
+ * anon struct and its position on backing store will change so that
+ * we don't require backing store all anonymous pages in the system.
+ * This is important for consideration for large memory systems.
+ * We can also use this technique to delay binding physical locations
+ * to anonymous pages until pageout/swapout time where we can make
+ * smarter allocation decisions to improve anonymous klustering.
+ *
+ * Many of the routines defined here take a (struct anon **) argument,
+ * which allows the code at this level to manage anon pages directly,
+ * so that callers can regard anon structs as opaque objects and not be
+ * concerned with assigning or inspecting their contents.
+ *
+ * Clients of this layer refer to anon pages indirectly.  That is, they
+ * maintain arrays of pointers to anon structs rather than maintaining
+ * anon structs themselves.  The (struct anon **) arguments mentioned
+ * above are pointers to entries in these arrays.  It is these arrays
+ * that capture the mapping between offsets within a given segment and
+ * the corresponding anonymous backing storage address.
+ */
+
+#include <sys/param.h>
+#include <sys/user.h>	/* XXX - for rusage */
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+#include <sys/trace.h>
+#include <sys/debug.h>
+
+#include <vm/hat.h>
+#include <vm/anon.h>
+#include <vm/swap.h>
+#include <vm/as.h>
+#include <vm/page.h>
+#include <vm/seg.h>
+#include <vm/pvn.h>
+#include <vm/rm.h>
+#include <vm/mp.h>
+
+struct	anoninfo anoninfo;
+#ifdef KMON_DEBUG
+kmon_t	anon_lock;
+#endif /* KMON_DEBUG */
+
+int anon_resv_debug = 0;
+int anon_enforce_resv = 1;
+
+/*
+ * Reserve anon space.
+ * Return non-zero on success.
+ */
+int
+anon_resv(size)
+	u_int size;
+{
+
+	anoninfo.ani_resv += btopr(size);
+	if (anoninfo.ani_resv > anoninfo.ani_max) {
+		if (anon_enforce_resv)
+			anoninfo.ani_resv -= btopr(size);
+		else if (anon_resv_debug)
+			printf("anon: swap space overcommitted by %d\n",
+			    anoninfo.ani_resv - anoninfo.ani_max);
+		return (!anon_enforce_resv);
+	} else {
+		return (1);
+	}
+}
+
+/*
+ * Give back an anon reservation.
+ */
+void
+anon_unresv(size)
+	u_int size;
+{
+
+	anoninfo.ani_resv -= btopr(size);
+	if ((int)anoninfo.ani_resv < 0)
+		printf("anon: reservations below zero???\n");
+}
+
+/*
+ * Allocate an anon slot.
+ */
+struct anon *
+anon_alloc()
+{
+	register struct anon *ap;
+
+	kmon_enter(&anon_lock);
+	ap = swap_alloc();
+	if (ap != NULL) {
+		anoninfo.ani_free--;
+		ap->an_refcnt = 1;
+		ap->un.an_page = NULL;
+	}
+	kmon_exit(&anon_lock);
+	return (ap);
+}
+
+/*
+ * Decrement the reference count of an anon page.
+ * If reference count goes to zero, free it and
+ * its associated page (if any).
+ */
+static void
+anon_decref(ap)
+	register struct anon *ap;
+{
+	register struct page *pp;
+	struct vnode *vp;
+	u_int off;
+
+	if (--ap->an_refcnt == 0) {
+		/*
+		 * If there is a page for this anon slot we will need to
+		 * call page_abort to get rid of the vp association and
+		 * put the page back on the free list as really free.
+		 */
+		swap_xlate(ap, &vp, &off);
+		pp = page_find(vp, off);
+		/*
+		 * XXX - If we have a page, wait for its keepcnt to become
+		 * zero, re-verify the identity before aborting it and
+		 * freeing the swap slot.  This ensures that any pending i/o
+		 * always completes before the swap slot is freed.
+		 */
+		if (pp != NULL) {
+			if (pp->p_keepcnt != 0) {
+				page_wait(pp);
+				if (pp->p_vnode == vp && pp->p_offset == off)
+					page_abort(pp);
+			} else {
+				page_abort(pp);
+			}
+		}
+		kmon_enter(&anon_lock);
+		swap_free(ap);
+		anoninfo.ani_free++;
+		kmon_exit(&anon_lock);
+	}
+}
+
+/*
+ * Duplicate references to size bytes worth of anon pages.
+ * Used when duplicating a segment that contains private anon pages.
+ * This code assumes that procedure calling this one has already used
+ * hat_chgprot() to disable write access to the range of addresses that
+ * that *old actually refers to.
+ */
+void
+anon_dup(old, new, size)
+	register struct anon **old, **new;
+	u_int size;
+{
+	register int i;
+
+	i = btopr(size);
+	while (i-- > 0) {
+		if ((*new = *old) != NULL)
+			(*new)->an_refcnt++;
+		old++;
+		new++;
+	}
+}
+
+/*
+ * Free a group of "size" anon pages, size in bytes,
+ * and clear out the pointers to the anon entries.
+ */
+void
+anon_free(app, size)
+	register struct anon **app;
+	u_int size;
+{
+	register int i;
+
+	i = btopr(size);
+	while (i-- > 0) {
+		if (*app != NULL) {
+			anon_decref(*app);
+			*app = NULL;
+		}
+		app++;
+	}
+}
+
+/*
+ * Return the kept page(s) and protections back to the segment driver.
+ */
+int
+anon_getpage(app, protp, pl, plsz, seg, addr, rw, cred)
+	struct anon **app;
+	u_int *protp;
+	struct page *pl[];
+	u_int plsz;
+	struct seg *seg;
+	addr_t addr;
+	enum seg_rw rw;
+	struct ucred *cred;
+{
+	register struct page *pp, **ppp;
+	register struct anon *ap = *app;
+	struct vnode *vp;
+	u_int off;
+	int err;
+	extern int nopagereclaim;
+	register int s;
+
+	swap_xlate(ap, &vp, &off);
+again:
+	pp = ap->un.an_page;
+	/*
+	 * If the anon pointer has a page associated with it,
+	 * see if it looks ok after raising priority to prevent
+	 * it from being ripped away at interrupt level if on the
+	 * free list.  If the page is being paged in, wait for it
+	 * to finish as we must return a list of pages since this
+	 * routine acts like the VOP_GETPAGE routine does.
+	 */
+	s = splvm();
+	if (pp != NULL && pp->p_vnode == vp && pp->p_offset == off &&
+	    !pp->p_gone && pl != NULL) {
+		if (pp->p_intrans && (pp->p_pagein || nopagereclaim)) {
+			(void) splx(s);
+			page_wait(pp);
+			goto again;		/* try again */
+		}
+		if (pp->p_free)
+			page_reclaim(pp);
+		(void) splx(s);
+		PAGE_HOLD(pp);
+		if (ap->an_refcnt == 1)
+			*protp = PROT_ALL;
+		else
+			*protp = PROT_ALL & ~PROT_WRITE;
+		pl[0] = pp;
+		pl[1] = NULL;
+		/* no one else accounted for it so we must */
+		u.u_ru.ru_minflt++;
+		return (0);
+	}
+	(void) splx(s);
+
+	/*
+	 * Simply treat it as a vnode fault on the anon vp.
+	 */
+	trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_ANON);
+	err = VOP_GETPAGE(vp, off, PAGESIZE, protp, pl, plsz,
+	    seg, addr, rw, cred);
+	if (err == 0 && pl != NULL) {
+		for (ppp = pl; (pp = *ppp++) != NULL; ) {
+			if (pp->p_offset == off) {
+				ap->un.an_page = pp;
+				break;
+			}
+		}
+		if (ap->an_refcnt != 1)
+			*protp &= ~PROT_WRITE;	/* make read-only */
+	}
+	return (err);
+}
+
+int npagesteal;
+
+/*
+ * Turn a reference to an object or shared anon page
+ * into a private page with a copy of the data from the
+ * original page.  The original page is always kept, locked
+ * and loaded in the MMU by the caller.  This routine unlocks
+ * the translation and releases the original page, if it isn't
+ * being stolen, before returning to the caller.
+ */
+struct page *
+anon_private(app, seg, addr, opp, oppflags)
+	struct anon **app;
+	struct seg *seg;
+	addr_t addr;
+	struct page *opp;
+	u_int oppflags;
+{
+	register struct anon *old = *app;
+	register struct anon *new;
+	register struct page *pp;
+	struct vnode *vp;
+	u_int off;
+
+	ASSERT(opp->p_mapping);
+	ASSERT(opp->p_keepcnt);
+
+	new = anon_alloc();
+	if (new == (struct anon *)NULL) {
+		rm_outofanon();
+		hat_unlock(seg, addr);
+		PAGE_RELE(opp);
+		return ((struct page *)NULL);	/* out of swap space */
+	}
+	*app = new;
+
+	swap_xlate(new, &vp, &off);
+again:
+	pp = page_lookup(vp, off);
+
+	if (pp == NULL && (oppflags & STEAL_PAGE) &&
+	    opp->p_keepcnt == 1 && opp->p_mod == 0) {
+		pp = opp;
+		hat_unlock(seg, addr);		/* unlock translation */
+		hat_pageunload(pp);		/* unload all translations */
+		page_hashout(pp);		/* destroy old name for page */
+		trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
+		if (page_enter(pp, vp, off))	/* rename as anon page */
+			panic("anon private steal");
+		new->un.an_page = pp;
+		pg_setmod(pp, 1);
+		page_unlock(pp);
+		/*
+		 * If original page is ``locked'', relinquish
+		 * claim for the extra page.
+		 */
+		if (oppflags & LOCK_PAGE)
+			page_subclaim(1);
+		npagesteal++;
+		return (pp);
+	}
+
+	if (pp == NULL) {
+		/*
+		 * Normal case, need to allocate new page frame.
+		 */
+		pp = rm_allocpage(seg, addr, PAGESIZE, 1);
+		trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
+		if (page_enter(pp, vp, off)) {
+			PAGE_RELE(pp);
+			goto again;		/* try again */
+		}
+	} else {
+		/*
+		 * Already found a page with the right identity -- just
+		 * use it if the `keepcnt' is 0.  If not, wait for the
+		 * `keepcnt' to become 0, re-verify the identity before
+		 * using the page.
+		 */
+		if (pp->p_keepcnt != 0) {
+			page_wait(pp);
+			if (pp->p_vnode != vp || pp->p_offset != off)
+				goto again;
+		}
+		page_lock(pp);
+		PAGE_HOLD(pp);
+	}
+	new->un.an_page = pp;
+
+	/*
+	 * Now copy the contents from the original page which
+	 * is loaded and locked in the MMU by the caller to
+	 * prevent yet another page fault.
+	 */
+	pp->p_intrans = pp->p_pagein = 1;
+	pagecopy(addr, pp);
+	pp->p_intrans = pp->p_pagein = 0;
+	pg_setmod(pp, 1);		/* mark as modified */
+	page_unlock(pp);
+
+	/*
+	 * If original page is ``locked'', relinquish claim
+	 * for an extra page reserved for the private copy
+	 * in case of a copy-on-write.  Lock the new page
+	 * ignoring the current reservation check.
+	 */
+	if (oppflags & LOCK_PAGE) {
+		if (old == NULL)
+			page_pp_unlock(opp, 1);
+		else
+			page_pp_unlock(opp, 0);
+		(void) page_pp_lock(pp, 0, 0);
+	}
+
+	/*
+	 * Unlock translation to the original page since
+	 * it can be unloaded if the page is aborted.
+	 */
+	hat_unlock(seg, addr);
+
+	/*
+	 * Ok, now release the original page, or else the
+	 * process will sleep forever in anon_decref()
+	 * waiting for the `keepcnt' to become 0.
+	 */
+	PAGE_RELE(opp);
+
+	/*
+	 * If we copied away from an anonymous page, then
+	 * we are one step closer to freeing up an anon slot.
+	 */
+	if (old != NULL)
+		anon_decref(old);
+	return (pp);
+}
+
+/*
+ * Allocate a zero-filled anon page.
+ */
+struct page *
+anon_zero(seg, addr, app)
+	struct seg *seg;
+	addr_t addr;
+	struct anon **app;
+{
+	register struct anon *ap;
+	register struct page *pp;
+	struct vnode *vp;
+	u_int off;
+
+	*app = ap = anon_alloc();
+	if (ap == NULL) {
+		rm_outofanon();
+		return ((struct page *)NULL);
+	}
+
+	swap_xlate(ap, &vp, &off);
+again:
+	pp = page_lookup(vp, off);
+
+	if (pp == NULL) {
+		/*
+		 * Normal case, need to allocate new page frame.
+		 */
+		pp = rm_allocpage(seg, addr, PAGESIZE, 1);
+		trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
+		if (page_enter(pp, vp, off)) {
+			PAGE_RELE(pp);
+			goto again;		/* try again */
+		}
+	} else {
+		/*
+		 * Already found a page with the right identity -- just
+		 * use it if the `keepcnt' is 0.  If not, wait for the
+		 * `keepcnt' to become 0, re-verify the identity before
+		 * using the page.
+		 */
+		if (pp->p_keepcnt != 0) {
+			page_wait(pp);
+			if (pp->p_vnode != vp || pp->p_offset != off)
+				goto again;
+		}
+		page_lock(pp);
+		PAGE_HOLD(pp);
+	}
+	ap->un.an_page = pp;
+
+	pagezero(pp, 0, PAGESIZE);
+	cnt.v_zfod++;
+	pg_setmod(pp, 1);	/* mark as modified so pageout writes back */
+	page_unlock(pp);
+	return (pp);
+}
+
+/*
+ * This gets calls by the seg_vn driver unload routine
+ * which is called by the hat code when it decides to
+ * unload a particular mapping.
+ */
+void
+anon_unloadmap(ap, ref, mod)
+	struct anon *ap;
+	u_int ref, mod;
+{
+	struct vnode *vp;
+	u_int off;
+
+	swap_xlate(ap, &vp, &off);
+	pvn_unloadmap(vp, off, ref, mod);
+}
--- a/sys/vm/vm_as.c
+++ b/sys/vm/vm_as.c
@@ -0,0 +1,898 @@
+/*	@(#)vm_as.c 1.1 94/10/31 SMI 	*/
+
+/*
+ * Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - address spaces.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/mman.h>
+
+#include <machine/mmu.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+
+/*
+ * Variables for maintaining the free list of address space structures.
+ */
+static struct as *as_freelist;
+static int as_freeincr = 8;
+
+/*
+ * Find a segment containing addr.  as->a_seglast is used as a
+ * cache to remember the last segment hit we had here.  We
+ * first check to see if seglast is another hit, and if not we
+ * determine whether to start from the head of the segment list
+ * (as->a_segs) or from seglast and in which direction to search.
+ */
+struct seg *
+as_segat(as, addr)
+	register struct as *as;
+	register addr_t addr;
+{
+	register struct seg *seg, *sseg;
+	register forward;
+
+	if (as->a_segs == NULL)		/* address space has no segments */
+		return (NULL);
+	if (as->a_seglast == NULL)
+		as->a_seglast = as->a_segs;
+	seg = as->a_seglast;
+	forward = 0;
+	if (seg->s_base <= addr) {
+		if (addr < (seg->s_base + seg->s_size))
+			return (seg);	/* seglast contained addr */
+		sseg = as->a_segs->s_prev;
+		if ((addr - seg->s_base) >
+				((sseg->s_base + sseg->s_size) - addr)) {
+			seg = sseg;
+			sseg = as->a_seglast;
+		} else {
+			seg = as->a_seglast->s_next;
+			sseg = as->a_segs;
+			forward++;
+		}
+	} else {
+		if ((addr - as->a_segs->s_base) > (seg->s_base - addr)) {
+			seg = seg->s_prev;
+			sseg = as->a_segs->s_prev;
+		} else {
+			sseg = seg;
+			seg = as->a_segs;
+			forward++;
+		}
+	}
+	do {
+		if (seg->s_base <= addr &&
+				addr < (seg->s_base + seg->s_size)) {
+			as->a_seglast = seg;
+			return (seg);
+		}
+		if (forward) {
+			seg = seg->s_next;
+			if (seg->s_base > addr)
+				break;
+		} else {
+			seg = seg->s_prev;
+			if (addr > (seg->s_base + seg->s_size))
+				break;
+		}
+	} while (seg != sseg);
+	return (NULL);
+}
+
+/*
+ * Allocate and initialize an address space data structure.
+ * We call hat_alloc to allow any machine dependent
+ * information in the hat structure to be initialized.
+ */
+struct as *
+as_alloc()
+{
+	struct as *as;
+
+	as = (struct as *)new_kmem_fast_alloc((caddr_t *)&as_freelist,
+	    sizeof (*as_freelist), as_freeincr, KMEM_SLEEP);
+	bzero((caddr_t)as, sizeof (*as));
+	hat_alloc(as);
+	return (as);
+}
+
+/*
+ * Free an address space data structure.
+ * Need to free the hat first and then
+ * all the segments on this as and finally
+ * the space for the as struct itself.
+ */
+void
+as_free(as)
+	struct as *as;
+{
+	hat_free(as);
+	while (as->a_segs != NULL)
+		seg_free(as->a_segs);
+	kmem_fast_free((caddr_t *)&as_freelist, (caddr_t)as);
+}
+
+struct as *
+as_dup(as)
+	register struct as *as;
+{
+	register struct as *newas;
+	register struct seg *seg, *sseg, *newseg;
+
+	newas = as_alloc();
+	sseg = seg = as->a_segs;
+	if (seg != NULL) {
+		do {
+			newseg = seg_alloc(newas, seg->s_base, seg->s_size);
+			if (newseg == NULL) {
+				as_free(newas);
+				return (NULL);
+			}
+			if ((*seg->s_ops->dup)(seg, newseg)) {
+				as_free(newas);
+				return (NULL);
+			}
+			seg = seg->s_next;
+		} while (seg != sseg);
+	}
+	return (newas);
+}
+
+/*
+ * Add a new segment to the address space, sorting
+ * it into the proper place in the linked list.
+ */
+enum as_res
+as_addseg(as, new)
+	register struct as *as;
+	register struct seg *new;
+{
+	register struct seg *seg;
+	register addr_t base;
+
+	seg = as->a_segs;
+	if (seg == NULL) {
+		new->s_next = new->s_prev = new;
+		as->a_segs = new;
+	} else {
+		/*
+		 * Figure out where to add the segment to keep list sorted
+		 */
+		base = new->s_base;
+		do {
+			if (base < seg->s_base) {
+				if (base + new->s_size > seg->s_base)
+					return (A_BADADDR);
+				break;
+			}
+			if (base < seg->s_base + seg->s_size)
+				return (A_BADADDR);
+			seg = seg->s_next;
+		} while (seg != as->a_segs);
+
+		new->s_next = seg;
+		new->s_prev = seg->s_prev;
+		seg->s_prev = new;
+		new->s_prev->s_next = new;
+
+		if (base < as->a_segs->s_base)
+			as->a_segs = new;		/* new is at front */
+	}
+	return (A_SUCCESS);
+}
+
+/*
+ * Handle a ``fault'' at addr for size bytes.
+ */
+faultcode_t
+as_fault(as, addr, size, type, rw)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	enum fault_type type;
+	enum seg_rw rw;
+{
+	register struct seg *seg;
+	register addr_t raddr;			/* rounded addr counter */
+	register u_int rsize;			/* rounded size counter */
+	register u_int ssize;
+	register addr_t addrsav;
+	struct seg *segsav;
+	faultcode_t res = 0;
+
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+
+	seg = as_segat(as, raddr);
+	if (seg == NULL)
+		return (FC_NOMAP);
+
+	addrsav = raddr;
+	segsav = seg;
+
+	for (; rsize != 0; rsize -= ssize, raddr += ssize) {
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;	/* goto next seg */
+			if (raddr != seg->s_base) {
+				res = FC_NOMAP;
+				break;
+			}
+		}
+		if (raddr + rsize > seg->s_base + seg->s_size)
+			ssize = seg->s_base + seg->s_size - raddr;
+		else
+			ssize = rsize;
+		res = (*seg->s_ops->fault)(seg, raddr, ssize, type, rw);
+		if (res != 0)
+			break;
+	}
+
+	/*
+	 * If we failed and we were locking, unlock the pages we faulted.
+	 * (Maybe we should just panic if we are SOFTLOCKing
+	 * or even SOFTUNLOCKing right here...)
+	 */
+	if (res != 0 && type == F_SOFTLOCK) {
+		for (seg = segsav; addrsav < raddr; addrsav += ssize) {
+			if (addrsav >= seg->s_base + seg->s_size)
+				seg = seg->s_next;	/* goto next seg */
+			/*
+			 * Now call the fault routine again to perform the
+			 * unlock using S_OTHER instead of the rw variable
+			 * since we never got a chance to touch the pages.
+			 */
+			if (raddr > seg->s_base + seg->s_size)
+				ssize = seg->s_base + seg->s_size - addrsav;
+			else
+				ssize = raddr - addrsav;
+			(void) (*seg->s_ops->fault)(seg, addrsav, ssize,
+			    F_SOFTUNLOCK, S_OTHER);
+		}
+	}
+
+	return (res);
+}
+
+/*
+ * Asynchronous ``fault'' at addr for size bytes.
+ */
+faultcode_t
+as_faulta(as, addr, size)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+{
+	register struct seg *seg;
+	register addr_t raddr;			/* rounded addr counter */
+	register u_int rsize;			/* rounded size counter */
+	faultcode_t res;
+
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+
+	seg = as_segat(as, raddr);
+	if (seg == NULL)
+		return (FC_NOMAP);
+	for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;	/* goto next seg */
+			if (raddr != seg->s_base)
+				return (FC_NOMAP);
+		}
+		res = (*seg->s_ops->faulta)(seg, raddr);
+		if (res != 0)
+			return (res);
+	}
+	return (0);
+}
+
+/*
+ * Set the virtual mapping for the interval from [addr : addr + size)
+ * in address space `as' to have the specified protection.
+ * It is ok for the range to cross over several segments,
+ * as long as they are contiguous.
+ */
+enum as_res
+as_setprot(as, addr, size, prot)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	u_int prot;
+{
+	register struct seg *seg;
+	register u_int ssize;
+	register addr_t raddr;			/* rounded addr counter */
+	register u_int rsize;			/* rounded size counter */
+	enum as_res res = A_SUCCESS;
+
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+
+	seg = as_segat(as, raddr);
+	if (seg == NULL)
+		return (A_BADADDR);
+	for (; rsize != 0; rsize -= ssize, raddr += ssize) {
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;	/* goto next seg */
+			if (raddr != seg->s_base) {
+				res = A_BADADDR;
+				break;
+			}
+		}
+		if ((raddr + rsize) > (seg->s_base + seg->s_size))
+			ssize = seg->s_base + seg->s_size - raddr;
+		else
+			ssize = rsize;
+		if ((*seg->s_ops->setprot)(seg, raddr, ssize, prot) != 0)
+			res = A_OPFAIL;		/* keep on going */
+	}
+	return (res);
+}
+
+/*
+ * Check to make sure that the interval from [addr : addr + size)
+ * in address space `as' has at least the specified protection.
+ * It is ok for the range to cross over several segments, as long
+ * as they are contiguous.
+ */
+enum as_res
+as_checkprot(as, addr, size, prot)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	u_int prot;
+{
+	register struct seg *seg;
+	register u_int ssize;
+	register addr_t raddr;			/* rounded addr counter */
+	register u_int rsize;			/* rounded size counter */
+
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+
+	seg = as_segat(as, raddr);
+	if (seg == NULL)
+		return (A_BADADDR);
+	for (; rsize != 0; rsize -= ssize, raddr += ssize) {
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;	/* goto next seg */
+			if (raddr != seg->s_base)
+				return (A_BADADDR);
+		}
+		if ((raddr + rsize) > (seg->s_base + seg->s_size))
+			ssize = seg->s_base + seg->s_size - raddr;
+		else
+			ssize = rsize;
+		if ((*seg->s_ops->checkprot)(seg, raddr, ssize, prot) != 0)
+			return (A_OPFAIL);
+	}
+	return (A_SUCCESS);
+}
+
+enum as_res
+as_unmap(as, addr, size)
+	register struct as *as;
+	addr_t addr;
+	u_int size;
+{
+	register struct seg *seg, *seg_next;
+	register addr_t raddr, eaddr;
+	register u_int ssize;
+	addr_t obase;
+
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	eaddr = (addr_t)(((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK);
+
+	seg = as->a_segs;
+	if (seg != NULL) {
+		for (; raddr < eaddr; seg = seg_next) {
+			/*
+			 * Save next segment pointer since seg can be
+			 * destroyed during the segment unmap operation.
+			 * We also have to save the old base below.
+			 */
+			seg_next = seg->s_next;
+
+			if (raddr >= seg->s_base + seg->s_size) {
+				if (seg->s_base >= seg_next->s_base)
+					break;		/* looked at all segs */
+				continue;		/* not there yet */
+			}
+
+			if (eaddr <= seg->s_base)
+				break;			/* all done */
+
+			if (raddr < seg->s_base)
+				raddr = seg->s_base;	/* skip to seg start */
+
+			if (eaddr > (seg->s_base + seg->s_size))
+				ssize = seg->s_base + seg->s_size - raddr;
+			else
+				ssize = eaddr - raddr;
+
+			obase = seg->s_base;
+			if ((*seg->s_ops->unmap)(seg, raddr, ssize) != 0)
+				return (A_OPFAIL);
+			raddr += ssize;
+
+			/*
+			 * Carefully check to see if we
+			 * have looked at all the segments.
+			 */
+			if (as->a_segs == NULL || obase >= seg_next->s_base)
+				break;
+		}
+	}
+
+	return (A_SUCCESS);
+}
+
+int
+as_map(as, addr, size, crfp, argsp)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	int (*crfp)();
+	caddr_t argsp;
+{
+	register struct seg *seg;
+	enum as_res res;
+	int error;
+
+	seg = seg_alloc(as, addr, size);
+	if (seg == NULL)
+		return (ENOMEM);
+
+	/*
+	 * Remember that this was the most recently touched segment.
+	 * If the create routine merges this segment into an existing
+	 * segment, seg_free will adjust the a_seglast hint.
+	 */
+	as->a_seglast = seg;
+	error = (*crfp)(seg, argsp);
+	/*
+	 * If some error occurred during the create function, destroy
+	 * this segment.  Otherwise, if the address space is locked,
+	 * establish memory locks for the new segment.  Translate
+	 * error returns as appropriate.
+	 */
+	if (error)
+		seg_free(seg);
+	else if (as->a_paglck) {
+		res = as_ctl(as, seg->s_base, seg->s_size, MC_LOCK, (caddr_t)0);
+		if (res == A_RESOURCE)
+			error = EAGAIN;
+		else if (res != A_SUCCESS)
+			error = EIO;
+		if (error)
+			(void) as_unmap(as, addr, size);
+	}
+	return (error);
+}
+
+/*
+ * Find a hole of at least size minlen within [base, base+len).
+ * If flags specifies AH_HI, the hole will have the highest possible address
+ * in the range. Otherwise, it will have the lowest possible address.
+ * If flags specifies AH_CONTAIN, the hole will contain the address addr.
+ * If an adequate hole is found, base and len are set to reflect the part of
+ * the hole that is within range, and A_SUCCESS is returned. Otherwise,
+ * A_OPFAIL is returned.
+ * XXX This routine is not correct when base+len overflows addr_t.
+ */
+/* VARARGS5 */
+enum as_res
+as_hole(as, minlen, basep, lenp, flags, addr)
+	struct as *as;
+	register u_int minlen;
+	addr_t *basep;
+	u_int *lenp;
+	int flags;
+	addr_t addr;
+{
+	register addr_t	lobound = *basep;
+	register addr_t	hibound = lobound + *lenp;
+	register struct seg *sseg = as->a_segs;
+	register struct seg *lseg, *hseg;
+	register addr_t lo, hi;
+	register int forward;
+
+	if (sseg == NULL)
+		if (valid_va_range(basep, lenp, minlen, flags & AH_DIR))
+			return (A_SUCCESS);
+		else
+			return (A_OPFAIL);
+
+	/*
+	 * Set up to iterate over all the inter-segment holes in the given
+	 * direction.  lseg is NULL for the lowest-addressed hole and hseg is
+	 * NULL for the highest-addressed hole.  If moving backwards, we reset
+	 * sseg to denote the highest-addressed segment.
+	 */
+	forward = (flags & AH_DIR) == AH_LO;
+	if (forward) {
+		lseg = NULL;
+		hseg = sseg;
+	} else {
+		sseg = sseg->s_prev;
+		hseg = NULL;
+		lseg = sseg;
+	}
+	for (;;) {
+		/*
+		 * Set lo and hi to the hole's boundaries.  (We should really
+		 * use MAXADDR in place of hibound in the expression below,
+		 * but can't express it easily; using hibound in its place is
+		 * harmless.)
+		 */
+		lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
+		hi = (hseg == NULL) ? hibound : hseg->s_base;
+		/*
+		 * If the iteration has moved past the interval from lobound
+		 * to hibound it's pointless to continue.
+		 */
+		if ((forward && lo > hibound) || (!forward && hi < lobound))
+			break;
+		else if (lo > hibound || hi < lobound)
+			goto cont;
+		/*
+		 * Candidate hole lies at least partially within the allowable
+		 * range.  Restrict it to fall completely within that range,
+		 * i.e., to [max(lo, lobound), min(hi, hibound)).
+		 */
+		if (lo < lobound)
+			lo = lobound;
+		if (hi > hibound)
+			hi = hibound;
+		/*
+		 * Verify that the candidate hole is big enough and meets
+		 * hardware constraints.
+		 */
+		*basep = lo;
+		*lenp = hi - lo;
+		if (valid_va_range(basep, lenp, minlen,
+		    forward ? AH_LO : AH_HI) &&
+		    ((flags & AH_CONTAIN) == 0 ||
+		    (*basep <= addr && *basep + *lenp > addr)))
+			return (A_SUCCESS);
+
+	cont:
+		/*
+		 * Move to the next hole.
+		 */
+		if (forward) {
+			lseg = hseg;
+			if (lseg == NULL)
+				break;
+			hseg = hseg->s_next;
+			if (hseg == sseg)
+				hseg = NULL;
+		} else {
+			hseg = lseg;
+			if (hseg == NULL)
+				break;
+			lseg = lseg->s_prev;
+			if (lseg == sseg)
+				lseg = NULL;
+		}
+	}
+	return (A_OPFAIL);
+}
+
+/*
+ * Return the next range within [base, base+len) that is backed
+ * with "real memory".  Skip holes and non-seg_vn segments.
+ * We're lazy and only return one segment at a time.
+ */
+enum as_res
+as_memory(as, basep, lenp)
+	struct as *as;
+	addr_t *basep;
+	u_int *lenp;
+{
+	register struct seg *seg, *sseg, *cseg = NULL;
+	register addr_t addr, eaddr, segend;
+
+	/* XXX - really want as_segatorabove? */
+	if (as->a_seglast == NULL)
+		as->a_seglast = as->a_segs;
+
+	addr = *basep;
+	eaddr = addr + *lenp;
+	sseg = seg = as->a_seglast;
+	if (seg != NULL) {
+		do {
+			if (seg->s_ops != &segvn_ops)
+				continue;
+			if (seg->s_base <= addr &&
+			    addr < (segend = (seg->s_base + seg->s_size))) {
+				/* found a containing segment */
+				as->a_seglast = seg;
+				*basep = addr;
+				if (segend > eaddr)
+					*lenp = eaddr - addr;
+				else
+					*lenp = segend - addr;
+				return (A_SUCCESS);
+			} else if (seg->s_base > addr) {
+				if (cseg == NULL ||
+				    cseg->s_base > seg->s_base)
+					/* save closest seg above */
+					cseg = seg;
+			}
+		} while ((seg = seg->s_next) != sseg);
+	}
+	if (cseg == NULL)		/* ??? no segments in address space? */
+		return (A_OPFAIL);
+
+	/*
+	 * Only found a close segment, see if there's
+	 * a valid range we can return.
+	 */
+	if (cseg->s_base > eaddr)
+		return (A_BADADDR);	/* closest segment is out of range */
+	as->a_seglast = cseg;
+	*basep = cseg->s_base;
+	if (cseg->s_base + cseg->s_size > eaddr)
+		*lenp = eaddr - cseg->s_base;	/* segment contains eaddr */
+	else
+		*lenp = cseg->s_size;	/* segment is between addr and eaddr */
+	return (A_SUCCESS);
+}
+
+/*
+ * Swap the pages associated with the address space as out to
+ * secondary storage, returning the number of bytes actually
+ * swapped.
+ *
+ * If we are not doing a "hard" swap (i.e. we're just getting rid
+ * of a deawood process) unlock the segu making it available to be
+ * paged out.
+ *
+ * The value returned is intended to correlate well with the process's
+ * memory requirements.  Its usefulness for this purpose depends on
+ * how well the segment-level routines do at returning accurate
+ * information.
+ */
+u_int
+as_swapout(as, hardswap)
+	struct as *as;
+	short hardswap;
+{
+	register struct seg *seg, *sseg;
+	register u_int swpcnt = 0;
+
+	/*
+	 * Kernel-only processes have given up their address
+	 * spaces.  Of course, we shouldn't be attempting to
+	 * swap out such processes in the first place...
+	 */
+	if (as == NULL)
+		return (0);
+
+	/*
+	 * Free all mapping resources associated with the address
+	 * space.  The segment-level swapout routines capitalize
+	 * on this unmapping by scavanging pages that have become
+	 * unmapped here.
+	 */
+	hat_free(as);
+
+	/*
+	 * Call the swapout routines of all segments in the address
+	 * space to do the actual work, accumulating the amount of
+	 * space reclaimed.
+	 */
+	sseg = seg = as->a_segs;
+	if (hardswap && seg != NULL) {
+		do {
+			register struct seg_ops *ov = seg->s_ops;
+
+		/* for "soft" swaps, should we sync out segment instead? XXX */
+			if (ov->swapout != NULL)
+				swpcnt += (*ov->swapout)(seg);
+		} while ((seg = seg->s_next) != sseg);
+	}
+
+	return (swpcnt);
+}
+
+/*
+ * Determine whether data from the mappings in interval [addr : addr + size)
+ * are in the primary memory (core) cache.
+ */
+enum as_res
+as_incore(as, addr, size, vec, sizep)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	char *vec;
+	u_int *sizep;
+{
+	register struct seg *seg;
+	register u_int ssize;
+	register addr_t raddr;		/* rounded addr counter */
+	register u_int rsize;		/* rounded size counter */
+	u_int isize;			/* iteration size */
+
+	*sizep = 0;
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = ((((u_int)addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+	seg = as_segat(as, raddr);
+	if (seg == NULL)
+		return (A_BADADDR);
+	for (; rsize != 0; rsize -= ssize, raddr += ssize) {
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;
+			if (raddr != seg->s_base)
+				return (A_BADADDR);
+		}
+		if ((raddr + rsize) > (seg->s_base + seg->s_size))
+			ssize = seg->s_base + seg->s_size - raddr;
+		else
+			ssize = rsize;
+		*sizep += isize =
+		    (*seg->s_ops->incore)(seg, raddr, ssize, vec);
+		if (isize != ssize)
+			return (A_OPFAIL);
+		vec += btoc(ssize);
+	}
+	return (A_SUCCESS);
+}
+
+/*
+ * Cache control operations over the interval [addr : addr + size) in
+ * address space "as".
+ */
+enum as_res
+as_ctl(as, addr, size, func, arg)
+	struct as *as;
+	addr_t addr;
+	u_int size;
+	int func;
+	caddr_t arg;
+{
+	register struct seg *seg;	/* working segment */
+	register struct seg *fseg;	/* first segment of address space */
+	register u_int ssize;		/* size of seg */
+	register addr_t raddr;		/* rounded addr counter */
+	register u_int rsize;		/* rounded size counter */
+	enum as_res res;		/* recursive result */
+	int r;				/* local result */
+
+	/*
+	 * Normalize addresses and sizes.
+	 */
+	raddr = (addr_t)((u_int)addr & PAGEMASK);
+	rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
+
+	/*
+	 * If these are address space lock/unlock operations, loop over
+	 * all segments in the address space, as appropriate.
+	 */
+	if ((func == MC_LOCKAS) || (func == MC_UNLOCKAS)) {
+		if (func == MC_UNLOCKAS)
+			as->a_paglck = 0;
+		else {
+			if ((int)arg & MCL_FUTURE)
+				as->a_paglck = 1;
+			if (((int)arg & MCL_CURRENT) == 0)
+				return (A_SUCCESS);
+		}
+		for (fseg = NULL, seg = as->a_segs; seg != fseg;
+		    seg = seg->s_next) {
+			if (fseg == NULL)
+				fseg = seg;
+			if ((res = as_ctl(as, seg->s_base, seg->s_size,
+			    func == MC_LOCKAS ? MC_LOCK : MC_UNLOCK,
+			    (caddr_t)0)) != A_SUCCESS)
+				return (res);
+		}
+		return (A_SUCCESS);
+	}
+
+	/*
+	 * Get initial segment.
+	 */
+	if ((seg = as_segat(as, raddr)) == NULL)
+		return (A_BADADDR);
+
+	/*
+	 * Loop over all segments.  If a hole in the address range is
+	 * discovered, then fail.  For each segment, perform the appropriate
+	 * control operation.
+	 */
+
+	while (rsize != 0) {
+
+		/*
+		 * Make sure there's no hole, calculate the portion
+		 * of the next segment to be operated over.
+		 */
+		if (raddr >= seg->s_base + seg->s_size) {
+			seg = seg->s_next;
+			if (raddr != seg->s_base)
+				return (A_BADADDR);
+		}
+		if ((raddr + rsize) > (seg->s_base + seg->s_size))
+			ssize = seg->s_base + seg->s_size - raddr;
+		else
+			ssize = rsize;
+
+		/*
+		 * Dispatch on specific function.
+		 */
+		switch (func) {
+
+		/*
+		 * Synchronize cached data from mappings with backing
+		 * objects.
+		 */
+		case MC_SYNC:
+			if (r = (*seg->s_ops->sync)
+			    (seg, raddr, ssize, (u_int)arg))
+				return (r == EPERM ? A_RESOURCE : A_OPFAIL);
+			break;
+
+		/*
+		 * Lock pages in memory.
+		 */
+		case MC_LOCK:
+			if (r = (*seg->s_ops->lockop)(seg, raddr, ssize, func))
+				return (r == EAGAIN ? A_RESOURCE : A_OPFAIL);
+			break;
+
+		/*
+		 * Unlock mapped pages.
+		 */
+		case MC_UNLOCK:
+			(void) (*seg->s_ops->lockop)(seg, raddr, ssize, func);
+			break;
+
+		/*
+		 * Store VM advise for mapped pages in segment layer
+		 */
+		case MC_ADVISE:
+			(void) (*seg->s_ops->advise)(seg, raddr, ssize, arg);
+			break;
+
+		/*
+		 * Can't happen.
+		 */
+		default:
+			panic("as_ctl");
+		}
+		rsize -= ssize;
+		raddr += ssize;
+	}
+	return (A_SUCCESS);
+}
+
+
+/*
+ * Inform the as of translation information associated with the given addr.
+ * This is currently only called if a_hatcallback == 1.
+ */
+void
+as_hatsync(as, addr, ref, mod, flags)
+	struct as *as;
+	addr_t addr;
+	u_int ref;
+	u_int mod;
+	u_int flags;
+{
+	struct seg *seg;
+
+	if (seg = as_segat(as, addr))
+		seg->s_ops->hatsync(seg, addr, ref, mod, flags);
+}
--- a/sys/vm/vm_mp.c
+++ b/sys/vm/vm_mp.c
@@ -0,0 +1,122 @@
+/*	@(#)vm_mp.c	1.1	94/10/31	*/
+
+/*
+ * Copyright (c) 1986 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - multiprocessor/ing support.
+ *
+ * Currently the kmon_enter() / kmon_exit() pair implements a
+ * simple monitor for objects protected by the appropriate lock.
+ * The kcv_wait() / kcv_broadcast pait implements a simple
+ * condition variable which can be used for `sleeping'
+ * and `waking' inside a monitor if some resource is
+ * is needed which is not available.
+ *
+ * XXX - this code is written knowing about the semantics
+ * of sleep/wakeup and UNIX scheduling on a uniprocessor machine.
+ */
+
+
+#ifdef KMON_DEBUG
+
+#include <sys/param.h>
+
+#include <vm/mp.h>
+
+#define	ISLOCKED	0x1
+#define	LOCKWANT	0x2
+
+/*
+ * kmon_enter is used as a type of multiprocess semaphore
+ * used to implement a monitor where the lock represents
+ * the ability to operate on the associated object.
+ * For now, the lock/object association is done
+ * by convention only.
+ */
+void
+kmon_enter(lk)
+	kmon_t *lk;
+{
+	int s;
+
+	s = spl6();
+	while ((lk->dummy & ISLOCKED) != 0) {
+#ifdef notnow
+		lk->dummy |= LOCKWANT;
+		(void) sleep((char *)lk, PSWP+1);
+#else notnow
+		panic("kmon_enter");
+#endif notnow
+	}
+	lk->dummy |= ISLOCKED;
+	(void) splx(s);
+}
+
+/*
+ * Release the lock associated with a monitor,
+ * waiting up anybody that has already decided
+ * to wait for this lock (monitor).
+ */
+void
+kmon_exit(lk)
+	kmon_t *lk;
+{
+	int s;
+
+	if ((lk->dummy & ISLOCKED) == 0)	/* paranoid */
+		panic("kmon_exit not locked");
+
+	s = spl6();
+	lk->dummy &= ~ISLOCKED;
+	if ((lk->dummy & LOCKWANT) != 0) {
+		lk->dummy &= ~LOCKWANT;
+		wakeup((char *)lk);
+	}
+	(void) splx(s);
+}
+
+/*
+ * Wait for the named condition variable.
+ * Must already have the monitor lock when kcv_wait is called.
+ */
+void
+kcv_wait(lk, cond)
+	kmon_t *lk;
+	char *cond;
+{
+	int s;
+
+	if ((lk->dummy & ISLOCKED) == 0)	/* paranoia */
+		panic("kcv_wait not locked");
+
+	s = spl6();
+	lk->dummy &= ~ISLOCKED;			/* release lock */
+
+	(void) sleep(cond, PSWP+1);
+
+	if ((lk->dummy & ISLOCKED) != 0)	/* more paranoia */
+		panic("kcv_wait locked");
+
+	lk->dummy |= ISLOCKED;			/* reacquire lock */
+	(void) splx(s);
+}
+
+/*
+ * Wake up all processes waiting on the named condition variable.
+ *
+ * We just use current UNIX sleep/wakeup semantics to delay the actual
+ * context switching until later after we have released the lock.
+ */
+void
+kcv_broadcast(lk, cond)
+	kmon_t *lk;
+	char *cond;
+{
+
+	if ((lk->dummy & ISLOCKED) == 0)
+		panic("kcv_broadcast");
+	wakeup(cond);
+}
+#endif /* !KMON_DEBUG */
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
--- a/sys/vm/vm_pvn.c
+++ b/sys/vm/vm_pvn.c
@@ -0,0 +1,948 @@
+#ident "@(#)vm_pvn.c 1.1 94/10/31 SMI"
+
+/*
+ * Copyright (c) 1988, 1989, 1990 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - paged vnode.
+ *
+ * This file supplies vm support for the vnode operations that deal with pages.
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#include <sys/vmsystm.h>
+#include <sys/mman.h>
+#include <sys/vfs.h>
+#include <sys/debug.h>
+#include <sys/trace.h>
+#include <sys/ucred.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/rm.h>
+#include <vm/pvn.h>
+#include <vm/page.h>
+#include <vm/seg_map.h>
+
+int pvn_nofodklust = 0;
+
+/*
+ * Find the largest contiguous block which contains `addr' for file offset
+ * `offset' in it while living within the file system block sizes (`vp_off'
+ * and `vp_len') and the address space limits for which no pages currently
+ * exist and which map to consecutive file offsets.
+ */
+struct page *
+pvn_kluster(vp, off, seg, addr, offp, lenp, vp_off, vp_len, isra)
+	struct vnode *vp;
+	register u_int off;
+	register struct seg *seg;
+	register addr_t addr;
+	u_int *offp, *lenp;
+	u_int vp_off, vp_len;
+	int isra;
+{
+	register int delta, delta2;
+	register struct page *pp;
+	struct page *plist = NULL;
+	addr_t straddr;
+	int bytesavail;
+	u_int vp_end;
+
+	ASSERT(off >= vp_off && off < vp_off + vp_len);
+
+	/*
+	 * We only want to do klustering/read ahead if there
+	 * is more than minfree pages currently available.
+	 */
+	if (freemem - minfree > 0)
+		bytesavail = ptob(freemem - minfree);
+	else
+		bytesavail = 0;
+
+	if (bytesavail == 0) {
+		if (isra)
+			return ((struct page *)NULL);	/* ra case - give up */
+		else
+			bytesavail = PAGESIZE;		/* just pretending */
+	}
+
+	if (bytesavail < vp_len) {
+		/*
+		 * Don't have enough free memory for the
+		 * max request, try sizing down vp request.
+		 */
+		delta = off - vp_off;
+		vp_len -= delta;
+		vp_off += delta;
+		if (bytesavail < vp_len) {
+			/*
+			 * Still not enough memory, just settle for
+			 * bytesavail which is at least PAGESIZE.
+			 */
+			vp_len = bytesavail;
+		}
+	}
+
+	vp_end = vp_off + vp_len;
+	ASSERT(off >= vp_off && off < vp_end);
+
+	if (page_exists(vp, off))
+		return ((struct page *)NULL);		/* already have page */
+
+	if (vp_len <= PAGESIZE || pvn_nofodklust) {
+		straddr = addr;
+		*offp = off;
+		*lenp = MIN(vp_len, PAGESIZE);
+	} else {
+		/* scan forward from front */
+		for (delta = 0; off + delta < vp_end; delta += PAGESIZE) {
+			/*
+			 * Call back to the segment driver to verify that
+			 * the klustering/read ahead operation makes sense.
+			 */
+			if ((*seg->s_ops->kluster)(seg, addr, delta))
+				break;		/* page not file extension */
+			if (page_exists(vp, off + delta))
+				break;		/* already have this page */
+		}
+		delta2 = delta;
+
+		/* scan back from front */
+		for (delta = 0; off + delta > vp_off; delta -= PAGESIZE) {
+			if (page_exists(vp, off + delta - PAGESIZE))
+				break;		/* already have the page */
+			/*
+			 * Call back to the segment driver to verify that
+			 * the klustering/read ahead operation makes sense.
+			 */
+			if ((*seg->s_ops->kluster)(seg, addr, delta - PAGESIZE))
+				break;		/* page not eligible */
+		}
+
+		straddr = addr + delta;
+		*offp = off = off + delta;
+		*lenp = MAX(delta2 - delta, PAGESIZE);
+		ASSERT(off >= vp_off);
+
+		if ((vp_off + vp_len) < (off + *lenp)) {
+			ASSERT(vp_end > off);
+			*lenp = vp_end - off;
+		}
+	}
+
+	/*
+	 * Allocate pages for <vp, off> at <seg, addr> for delta bytes.
+	 * Note that for the non-read ahead case we might not have the
+	 * memory available right now so that rm_allocpage operation could
+	 * sleep and someone else might race to this same spot if the
+	 * vnode object was not locked before this routine was called.
+	 */
+	delta2 = *lenp;
+	delta = roundup(delta2, PAGESIZE);
+	pp = rm_allocpage(seg, straddr, (u_int)delta, 1); /* `pp' list kept */
+
+	plist = pp;
+	do {
+		pp->p_intrans = 1;
+		pp->p_pagein = 1;
+
+#ifdef TRACE
+		{
+			addr_t taddr = straddr + (off - *offp);
+
+			trace3(TR_SEG_KLUSTER, seg, taddr, isra);
+			trace6(TR_SEG_ALLOCPAGE, seg, taddr, TRC_SEG_UNK,
+			    vp, off, pp);
+		}
+#endif TRACE
+		if (page_enter(pp, vp, off)) {		/* `pp' locked if ok */
+			/*
+			 * Oops - somebody beat us to the punch
+			 * and has entered the page before us.
+			 * To recover, we use pvn_fail to free up
+			 * all the pages we have already allocated
+			 * and we return NULL so that whole operation
+			 * is attempted over again.  This should never
+			 * happen if the caller of pvn_kluster does
+			 * vnode locking to prevent multiple processes
+			 * from creating the same pages as the same time.
+			 */
+			pvn_fail(plist, B_READ);
+			return ((struct page *)NULL);
+		}
+		off += PAGESIZE;
+	} while ((pp = pp->p_next) != plist);
+
+	return (plist);
+}
+
+/*
+ * Entry point to be use by page r/w subr's and other such routines which
+ * want to report an error and abort a list of pages setup for pageio
+ * which do not do though the normal pvn_done processing.
+ */
+void
+pvn_fail(plist, flags)
+	struct page *plist;
+	int flags;
+{
+	static struct buf abort_buf;
+	struct buf *bp;
+	struct page *pp;
+	int len;
+	int s;
+
+	len = 0;
+	pp = plist;
+	do {
+		len += PAGESIZE;
+	} while ((pp = pp->p_next) != plist);
+
+	bp = &abort_buf;
+	s = splimp();
+	while (bp->b_pages != NULL) {
+		(void) sleep((caddr_t)&bp->b_pages, PSWP+2);
+	}
+	(void) splx(s);
+	/* ~B_PAGEIO is a flag to pvn_done not to pageio_done the bp */
+	bp->b_flags = B_ERROR | B_ASYNC | (flags & ~B_PAGEIO);
+	bp->b_pages = plist;
+	bp->b_bcount = len;
+	pvn_done(bp);			/* let pvn_done do all the work */
+	if (bp->b_pages != NULL) {
+		/* XXX - this should never happen, should it be a panic? */
+		bp->b_pages = NULL;
+	}
+	wakeup((caddr_t)&bp->b_pages);
+}
+
+/*
+ * Routine to be called when pageio's complete.
+ * Can only be called from process context, not
+ * from interrupt level.
+ */
+void
+pvn_done(bp)
+	register struct buf *bp;
+{
+	register struct page *pp;
+	register int bytes;
+
+	pp = bp->b_pages;
+
+	/*
+	 * Release any I/O mappings to the pages described by the
+	 * buffer that are finished before processing the completed I/O.
+	 */
+	if ((bp->b_flags & B_REMAPPED) && (pp->p_nio <= 1))
+		bp_mapout(bp);
+
+	/*
+	 * Handle of each page in the I/O operation.
+	 */
+	for (bytes = 0; bytes < bp->b_bcount; bytes += PAGESIZE) {
+		struct vnode *vp;
+		u_int off;
+		register int s;
+
+		if (pp->p_nio > 1) {
+			/*
+			 * There were multiple IO requests outstanding
+			 * for this particular page.  This can happen
+			 * when the file system block size is smaller
+			 * than PAGESIZE.  Since there are more IO
+			 * requests still outstanding, we don't process
+			 * the page given on the buffer now.
+			 */
+			if (bp->b_flags & B_ERROR) {
+				if (bp->b_flags & B_READ) {
+					trace3(TR_PG_PVN_DONE, pp, pp->p_vnode,
+					    pp->p_offset);
+					page_abort(pp);	/* assumes no waiting */
+				} else {
+					pg_setmod(pp, 1);
+				}
+			}
+			pp->p_nio--;
+			break;
+			/* real page locked for the other io operations */
+		}
+
+		pp = bp->b_pages;
+		page_sub(&bp->b_pages, pp);
+
+		vp = pp->p_vnode;
+		off = pp->p_offset;
+		pp->p_intrans = 0;
+		pp->p_pagein = 0;
+
+		PAGE_RELE(pp);
+		/*
+		 * Verify the page identity before checking to see
+		 * if the page was freed by PAGE_RELE().  This must
+		 * be protected by splvm() to prevent the page from
+		 * being ripped away at interrupt level.
+		 */
+		s = splvm();
+		if (pp->p_vnode != vp || pp->p_offset != off || pp->p_free) {
+			(void) splx(s);
+			continue;
+		}
+		(void) splx(s);
+
+		/*
+		 * Check to see if the page has an error.
+		 */
+		if ((bp->b_flags & (B_ERROR|B_READ)) == (B_ERROR|B_READ)) {
+			page_abort(pp);
+			continue;
+		}
+
+		/*
+		 * Check if we are to be doing invalidation.
+		 * XXX - Failed writes with B_INVAL set are
+		 * not handled appropriately.
+		 */
+		if ((bp->b_flags & B_INVAL) != 0) {
+			page_abort(pp);
+			continue;
+		}
+
+		if ((bp->b_flags & (B_ERROR | B_READ)) == B_ERROR) {
+			/*
+			 * Write operation failed.  We don't want
+			 * to abort (or free) the page.  We set
+			 * the mod bit again so it will get
+			 * written back again later when things
+			 * are hopefully better again.
+			 */
+			pg_setmod(pp, 1);
+		}
+
+		if (bp->b_flags & B_FREE) {
+			cnt.v_pgpgout++;
+			if (pp->p_keepcnt == 0 && pp->p_lckcnt == 0) {
+				/*
+				 * Check if someone has reclaimed the
+				 * page.  If no ref or mod, no one is
+				 * using it so we can free it.
+				 * The rest of the system is careful
+				 * to use the ghost unload flag to unload
+				 * translations set up for IO w/o
+				 * affecting ref and mod bits.
+				 */
+				if (pp->p_mod == 0 && pp->p_mapping)
+					hat_pagesync(pp);
+				if (!pp->p_ref && !pp->p_mod) {
+					if (pp->p_mapping)
+						hat_pageunload(pp);
+#ifdef	MULTIPROCESSOR
+				}
+				/*
+				 * The page may have been modified
+				 * between the hat_pagesync and
+				 * the hat_pageunload, and hat_pageunload
+				 * will have picked up final ref and mod
+				 * bits from the PTEs. So, check 'em again.
+				 */
+				if (!pp->p_ref && !pp->p_mod) {
+#endif	MULTIPROCESSOR
+					page_free(pp,
+					    (int)(bp->b_flags & B_DONTNEED));
+					if ((bp->b_flags & B_DONTNEED) == 0)
+						cnt.v_dfree++;
+				} else {
+					page_unlock(pp);
+					cnt.v_pgrec++;
+				}
+			} else {
+				page_unlock(pp);
+			}
+			continue;
+		}
+
+		page_unlock(pp);		/* a read or write */
+	}
+
+	/*
+	 * Count pageout operations if applicable.  Release the
+	 * buf struct associated with the operation if async & pageio.
+	 */
+	if (bp->b_flags & B_FREE)
+		cnt.v_pgout++;
+	if ((bp->b_flags & (B_ASYNC | B_PAGEIO)) == (B_ASYNC | B_PAGEIO))
+		pageio_done(bp);
+}
+
+/*
+ * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI}
+ * B_DELWRI indicates that this page is part of a kluster operation and
+ * is only to be considered if it doesn't involve any waiting here.
+ * Returns non-zero if page added to dirty list.
+ *
+ * NOTE:  The caller must ensure that the page is not on the free list.
+ */
+static int
+pvn_getdirty(pp, dirty, flags)
+	register struct page *pp, **dirty;
+	int flags;
+{
+	register int s;
+	struct vnode *vp;
+	u_int offset;
+
+	ASSERT(pp->p_free == 0);
+	vp = pp->p_vnode;
+	offset = pp->p_offset;
+
+	/*
+	 * If page is logically locked, forget it.
+	 *
+	 * XXX - Can a page locked by some other process be
+	 * written out or invalidated?
+	 */
+	if (pp->p_lckcnt != 0)
+		return (0);
+
+	if ((flags & B_DELWRI) != 0 && (pp->p_keepcnt != 0 || pp->p_lock)) {
+		/*
+		 * This is a klustering case that would
+		 * cause us to block, just give up.
+		 */
+		return (0);
+	}
+
+	if (pp->p_intrans && (flags & (B_INVAL | B_ASYNC)) == B_ASYNC) {
+		/*
+		 * Don't bother waiting for an intrans page if we are not
+		 * doing invalidation and this is an async operation
+		 * (the page will be correct when the current io completes).
+		 */
+		return (0);
+	}
+
+	/*
+	 * If i/o is in progress on the page or we have to
+	 * invalidate or free the page, wait for the page keep
+	 * count to go to zero.
+	 */
+	if (pp->p_intrans || (flags & (B_INVAL | B_FREE)) != 0) {
+		if (pp->p_keepcnt != 0) {
+			page_wait(pp);
+			/*
+			 * Re-verify page identity since it could have
+			 * changed while we were sleeping.
+			 */
+			s = splvm();
+			if (pp->p_vnode != vp || pp->p_offset != offset) {
+				/*
+				 * Lost the page - nothing to do?
+				 */
+				(void) splx(s);
+				return (0);
+			}
+			(void) splx(s);
+			/*
+			 * The page has not lost its identity and hence
+			 * should not be on the free list.
+			 */
+			ASSERT(pp->p_free == 0);
+		}
+	}
+
+	page_lock(pp);
+
+	/*
+	 * If the page has mappings and it is not the case that the
+	 * page is already marked dirty and we are going to unload
+	 * the page below because we are going to free/invalidate
+	 * it, then we sync current mod bits from the hat layer now.
+	 */
+	if (pp->p_mapping && !(pp->p_mod && (flags & (B_FREE | B_INVAL)) != 0))
+		hat_pagesync(pp);
+
+	if (pp->p_mod == 0) {
+		if ((flags & (B_INVAL | B_FREE)) != 0) {
+			if (pp->p_mapping)
+				hat_pageunload(pp);
+			if ((flags & B_INVAL) != 0) {
+				page_abort(pp);
+				return (0);
+			}
+			if (pp->p_free == 0) {
+				if ((flags & B_FREE) != 0) {
+					page_free(pp, (flags & B_DONTNEED));
+					return (0);
+				}
+			}
+		}
+		page_unlock(pp);
+		return (0);
+	}
+
+	/*
+	 * Page is dirty, get it ready for the write back
+	 * and add page to the dirty list.  First unload
+	 * the page if we are going to free/invalidate it.
+	 */
+	if (pp->p_mapping && (flags & (B_FREE | B_INVAL)) != 0)
+		hat_pageunload(pp);
+	pg_setmod(pp, 0);
+	pg_setref(pp, 0);
+	trace3(TR_PG_PVN_GETDIRTY, pp, pp->p_vnode, pp->p_offset);
+	pp->p_intrans = 1;
+	/*
+	 * XXX - The `p_pagein' bit is set for asynchronous or
+	 * synchronous invalidates to prevent other processes
+	 * from accessing the page in the window after the i/o is
+	 * complete but before the page is aborted. If this is not
+	 * done, updates to the page before it is aborted will be lost.
+	 */
+	pp->p_pagein = (flags & B_INVAL) ? 1 : 0;
+	PAGE_HOLD(pp);
+	page_sortadd(dirty, pp);
+	return (1);
+}
+
+/*
+ * Run down the vplist and handle all pages whose offset is >= off.
+ * Returns a list of dirty kept pages all ready to be written back.
+ *
+ * Assumptions:
+ *	The vp is already locked by the VOP_PUTPAGE routine calling this.
+ *	That the VOP_GETPAGE also locks the vp, and thus no one can
+ *	    add a page to the vp list while the vnode is locked.
+ *	Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
+ */
+struct page *
+pvn_vplist_dirty(vp, off, flags)
+	register struct vnode *vp;
+	u_int off;
+	int flags;
+{
+	register struct page *pp;
+	register struct page *ppnext;
+	register struct page *ppsav;
+	register struct page *ppnextnext;
+	register int ppsav_wasfree, pp_wasfree;
+	register int ppsav_age, pp_age;
+	struct page *dirty;
+	register int s;
+	int on_iolist;
+
+	s = splvm();
+	if (vp->v_type == VSOCK || vp->v_type == VCHR ||
+	    (pp = vp->v_pages) == NULL) {
+		(void) splx(s);
+		return ((struct page *)NULL);
+	}
+
+#define	PAGE_RECLAIM(pp, wasfree, age) \
+{ \
+	if ((pp)->p_free) { \
+		age = (pp)->p_age; \
+		page_reclaim(pp); \
+		wasfree = 1; \
+	} else { \
+		age = wasfree = 0; \
+	} \
+}
+#define	PAGE_REFREE(pp, wasfree, age) \
+{ \
+	if (wasfree && (pp)->p_keepcnt == 0 && (pp)->p_mapping == NULL) \
+		page_free(pp, age); \
+}
+
+	/*
+	 * Traverse the page list.  We have to be careful since pages
+	 * can be removed from the vplist while we are looking at it
+	 * (a page being pulled off the free list for something else,
+	 * or an async io operation completing and the page and/or
+	 * bp is marked for invalidation) so have to be careful determining
+	 * that we have examined all the pages.  We use ppsav to point
+	 * to the first page that stayed on the vp list after calling
+	 * pvn_getdirty and we PAGE_RECLAIM and PAGE_HOLD to prevent it
+	 * from going away on us.  When we PAGE_UNKEEP the page, it will
+	 * go back to the free list if that's where we got it from.  We
+	 * also need to PAGE_RECLAIM and PAGE_HOLD the next pp in the
+	 * vplist to prevent it from going away while we are traversing
+	 * the list.
+	 */
+
+	ppnext = NULL;
+	ppsav = NULL;
+	ppsav_age = ppsav_wasfree = 0;
+	pp_age = pp_wasfree = 0;
+
+	dirty = NULL;
+	if (pp->p_vpnext != pp)
+		ppnext = pp->p_vpnext;
+	else
+		ppnext = NULL;
+
+	for (;;) {
+		/* Reclaim and hold the next page */
+		if (ppnext != NULL) {
+			if (ppnext->p_free)
+				page_reclaim(ppnext);
+			PAGE_HOLD(ppnext);
+		}
+
+		if (pp != NULL) {
+			PAGE_RECLAIM(pp, pp_wasfree, pp_age);
+
+			/* Process the current page */
+			if (pp->p_offset >= off) {
+				(void) splx(s);
+				on_iolist = pvn_getdirty(pp, &dirty, flags);
+				s = splvm();
+			} else
+				on_iolist = 0;
+
+			if (pp->p_vnode == vp) {
+				/*
+				 * If the page identity hasn't changed and
+				 * it isn't dirty, free it if reclaimed
+				 * from the free list.
+				 */
+				if (!on_iolist && !pp->p_free)
+					PAGE_REFREE(pp, pp_wasfree, pp_age);
+
+				/*
+				 * If we haven't found a marker before,
+				 * use the current page as our marker.
+				 */
+				if (ppsav == NULL) {
+					ppsav = pp;
+					PAGE_RECLAIM(ppsav, ppsav_wasfree,
+					    ppsav_age);
+					PAGE_HOLD(ppsav);
+				}
+			}
+		}
+
+		/* If no pages left on list, we're done */
+		if (ppnext == NULL)
+			break;
+
+		/* Compute the "next" next page */
+		if (ppnext->p_vpnext != ppnext && ppnext->p_vpnext != ppsav)
+			ppnextnext = ppnext->p_vpnext;
+		else
+			ppnextnext = NULL;
+
+		/* Release the next page */
+		PAGE_RELE(ppnext);
+
+		/* If releasing the next page freed it, ignore it */
+		if (ppnext->p_free) {
+			ASSERT(ppnext->p_vnode == NULL);
+			ppnext = NULL;
+		}
+		/* Move forward to look at next page */
+		pp = ppnext;
+		ppnext = ppnextnext;
+	}
+
+	if (ppsav != NULL) {
+		PAGE_RELE(ppsav);
+		if (!ppsav->p_free)
+			PAGE_REFREE(ppsav, ppsav_wasfree, ppsav_age);
+	}
+	(void) splx(s);
+	return (dirty);
+}
+#undef	PAGE_RECLAIM
+#undef	PAGE_REFREE
+
+/*
+ * Used when we need to find a page but don't care about free pages.
+ */
+static struct page *
+pvn_pagefind(vp, off)
+	register struct vnode *vp;
+	register u_int off;
+{
+	register struct page *pp;
+	register int s;
+
+	s = splvm();
+	pp = page_exists(vp, off);
+	if (pp != NULL && pp->p_free)
+		pp = NULL;
+	(void) splx(s);
+	return (pp);
+}
+
+int pvn_range_noklust = 0;
+
+/*
+ * Use page_find's and handle all pages for this vnode whose offset
+ * is >= off and < eoff.  This routine will also do klustering up
+ * to offlo and offhi up until a page which is not found.  We assume
+ * that offlo <= off and offhi >= eoff.
+ *
+ * Returns a list of dirty kept pages all ready to be written back.
+ */
+struct page *
+pvn_range_dirty(vp, off, eoff, offlo, offhi, flags)
+	register struct vnode *vp;
+	u_int off, eoff;
+	u_int offlo, offhi;
+	int flags;
+{
+	struct page *dirty = NULL;
+	register struct page *pp;
+	register u_int o;
+	register struct page *(*pfind)();
+
+	ASSERT(offlo <= off && offhi >= eoff);
+
+	off &= PAGEMASK;
+	eoff = (eoff + PAGEOFFSET) & PAGEMASK;
+
+	/*
+	 * If we are not invalidating pages, use the routine,
+	 * pvn_pagefind(), to prevent reclaiming them from the
+	 * free list.
+	 */
+	if ((flags & B_INVAL) == 0)
+		pfind = pvn_pagefind;
+	else
+		pfind = page_find;
+
+	/* first do all the pages from [off..eoff] */
+	for (o = off; o < eoff; o += PAGESIZE) {
+		pp = (*pfind)(vp, o);
+		if (pp != NULL) {
+			(void) pvn_getdirty(pp, &dirty, flags);
+		}
+	}
+
+	if (pvn_range_noklust)
+		return (dirty);
+
+	/* now scan backwards looking for pages to kluster */
+	for (o = off - PAGESIZE; (int)o >= 0 && o >= offlo; o -= PAGESIZE) {
+		pp = (*pfind)(vp, o);
+		if (pp == NULL)
+			break;		/* page not found */
+		if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
+			break;		/* page not added to dirty list */
+	}
+
+	/* now scan forwards looking for pages to kluster */
+	for (o = eoff; o < offhi; o += PAGESIZE) {
+		pp = (*pfind)(vp, o);
+		if (pp == NULL)
+			break;		/* page not found */
+		if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
+			break;		/* page not added to dirty list */
+	}
+
+	return (dirty);
+}
+
+/*
+ * Take care of invalidating all the pages for vnode vp going to size
+ * vplen.  This includes zero'ing out zbytes worth of file beyond vplen.
+ * This routine should only be called with the vp locked by the file
+ * system code so that more pages cannot be added when sleep here.
+ */
+void
+pvn_vptrunc(vp, vplen, zbytes)
+	register struct vnode *vp;
+	register u_int vplen;
+	u_int zbytes;
+{
+	register struct page *pp;
+	register int s;
+
+	if (vp->v_pages == NULL || vp->v_type == VCHR || vp->v_type == VSOCK)
+		return;
+
+	/*
+	 * Simple case - abort all the pages on the vnode
+	 */
+	if (vplen == 0) {
+		s = splvm();
+		while ((pp = vp->v_pages) != (struct page *)NULL) {
+			/*
+			 * When aborting these pages, we make sure that
+			 * we wait to make sure they are really gone.
+			 */
+			if (pp->p_keepcnt != 0) {
+				(void) splx(s);
+				page_wait(pp);
+				s = splvm();
+				if (pp->p_vnode != vp)
+					continue;
+			} else {
+				if (pp->p_free)
+					page_reclaim(pp);
+			}
+			page_lock(pp);
+			page_abort(pp);
+		}
+		(void) splx(s);
+		return;
+	}
+
+	/*
+	 * Tougher case - have to find all the pages on the
+	 * vnode which need to be aborted or partially zeroed.
+	 */
+
+	/*
+	 * First we get the last page and handle the partially
+	 * zeroing via kernel mappings.  This will make the page
+	 * dirty so that we know that when this page is written
+	 * back, the zeroed information will go out with it.  If
+	 * the page is not currently in memory, then the kzero
+	 * operation will cause it to be brought it.  We use kzero
+	 * instead of bzero so that if the page cannot be read in
+	 * for any reason, the system will not panic.  We need
+	 * to zero out a minimum of the fs given zbytes, but we
+	 * might also have to do more to get the entire last page.
+	 */
+	if (zbytes != 0) {
+		addr_t addr;
+
+		if ((zbytes + (vplen & MAXBOFFSET)) > MAXBSIZE)
+			panic("pvn_vptrunc zbytes");
+		addr = segmap_getmap(segkmap, vp, vplen & MAXBMASK);
+		(void) kzero(addr + (vplen & MAXBOFFSET),
+		    MAX(zbytes, PAGESIZE - (vplen & PAGEOFFSET)));
+		(void) segmap_release(segkmap, addr, SM_WRITE | SM_ASYNC);
+	}
+
+	/*
+	 * Synchronously abort all pages on the vp list which are
+	 * beyond the new length.  The algorithm here is to start
+	 * scanning at the beginning of the vplist until there
+	 * are no pages with an offset >= vplen.  If we find such
+	 * a page, we wait for it if it is kept for any reason and
+	 * then we abort it after verifying that it is still a page
+	 * that needs to go away.  We assume here that the vplist
+	 * is not messed with at interrupt level.
+	 */
+
+	s = splvm();
+again:
+	for (pp = vp->v_pages; pp != NULL; pp = pp->p_vpnext) {
+		if (pp->p_offset >= vplen) {
+			/* need to abort this page */
+			if (pp->p_keepcnt != 0) {
+				(void) splx(s);
+				page_wait(pp);
+				s = splvm();
+				/* verify page identity again */
+				if (pp->p_vnode != vp || pp->p_offset < vplen)
+					goto again;
+			} else {
+				if (pp->p_free)
+					page_reclaim(pp);
+			}
+			page_lock(pp);
+			page_abort(pp);
+			goto again;		/* start over again */
+		}
+		if (pp == pp->p_vpnext || vp->v_pages == pp->p_vpnext)
+			break;
+	}
+	(void) splx(s);
+}
+
+/*
+ * This routine is called when the low level address translation
+ * code decides to unload a translation.  It calls back to the
+ * segment driver which in many cases ends up here.
+ */
+/*ARGSUSED*/
+void
+pvn_unloadmap(vp, offset, ref, mod)
+	struct vnode *vp;
+	u_int offset;
+	u_int ref, mod;
+{
+
+	/*
+	 * XXX - what is the pvn code going to do w/ this information?
+	 * This guy gets called for each loaded page when a executable
+	 * using the segvn driver terminates...
+	 */
+}
+
+/*
+ * Handles common work of the VOP_GETPAGE routines when more than
+ * one page must be returned by calling a file system specific operation
+ * to do most of the work.  Must be called with the vp already locked
+ * by the VOP_GETPAGE routine.
+ */
+int
+pvn_getpages(getapage, vp, off, len, protp, pl, plsz, seg, addr, rw, cred)
+	int (*getapage)();
+	struct vnode *vp;
+	u_int off, len;
+	u_int *protp;
+	struct page *pl[];
+	u_int plsz;
+	struct seg *seg;
+	register addr_t addr;
+	enum seg_rw rw;
+	struct ucred *cred;
+{
+	register struct page **ppp;
+	register u_int o, eoff;
+	u_int sz;
+	int err;
+
+	ASSERT(plsz >= len);		/* insure that we have enough space */
+
+	/*
+	 * Loop one page at a time and let getapage function fill
+	 * in the next page in array.  We only allow one page to be
+	 * returned at a time (except for the last page) so that we
+	 * don't have any problems with duplicates and other such
+	 * painful problems.  This is a very simple minded algorithm,
+	 * but it does the job correctly.  We hope that the cost of a
+	 * getapage call for a resident page that we might have been
+	 * able to get from an earlier call doesn't cost too much.
+	 */
+	ppp = pl;
+	sz = PAGESIZE;
+	eoff = off + len;
+	for (o = off; o < eoff; o += PAGESIZE, addr += PAGESIZE) {
+		if (o + PAGESIZE >= eoff) {
+			/*
+			 * Last time through - allow the all of
+			 * what's left of the pl[] array to be used.
+			 */
+			sz = plsz - (o - off);
+		}
+		err = (*getapage)(vp, o, protp, ppp, sz, seg, addr, rw, cred);
+		if (err) {
+			/*
+			 * Release any pages we already got.
+			 */
+			if (o > off && pl != NULL) {
+				for (ppp = pl; *ppp != NULL; *ppp++ = NULL) {
+					PAGE_RELE(*ppp);
+				}
+			}
+			break;
+		}
+		if (pl != NULL)
+			ppp++;
+	}
+
+	return (err);
+}
--- a/sys/vm/vm_rm.c
+++ b/sys/vm/vm_rm.c
@@ -0,0 +1,87 @@
+/*	@(#)vm_rm.c	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1987 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - resource manager
+ * As you can see, it needs lots of work
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/proc.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/rm.h>
+#include <vm/seg.h>
+#include <vm/page.h>
+
+/*ARGSUSED*/
+struct page *
+rm_allocpage(seg, addr, len, canwait)
+	struct seg *seg;
+	addr_t addr;
+	u_int len;
+	int canwait;
+{
+
+	return (page_get(len, canwait));
+}
+
+/*
+ * This routine is called when we couldn't allocate an anon slot.
+ * For now, we simply print out a message and kill of the process
+ * who happened to have gotten burned.
+ *
+ * XXX - swap reservation needs lots of work so this only happens in
+ * `nice' places or we need to have a method to allow for recovery.
+ */
+void
+rm_outofanon()
+{
+	struct proc *p;
+
+	p = u.u_procp;
+	printf("Sorry, pid %d (%s) was killed due to lack of swap space\n",
+	    p->p_pid, u.u_comm);
+	/*
+	 * To be sure no looping (e.g. in vmsched trying to
+	 * swap out) mark process locked in core (as though
+	 * done by user) after killing it so noone will try
+	 * to swap it out.
+	 */
+	psignal(p, SIGKILL);
+	p->p_flag |= SULOCK;
+	/*NOTREACHED*/
+}
+
+void
+rm_outofhat()
+{
+
+	panic("out of mapping resources");			/* XXX */
+	/*NOTREACHED*/
+}
+
+/*
+ * Yield the memory claim requirement for an address space.
+ *
+ * This is currently implemented as the number of active hardware
+ * translations that have page structures.  Therefore, it can
+ * underestimate the traditional resident set size, eg, if the
+ * physical page is present and the hardware translation is missing;
+ * and it can overestimate the rss, eg, if there are active
+ * translations to a frame buffer with page structs.
+ * Also, it does not take sharing into account.
+ */
+int
+rm_asrss(as)
+	struct as *as;
+{
+
+	return (as == (struct as *)NULL ? 0 : as->a_rss);
+}
--- a/sys/vm/vm_seg.c
+++ b/sys/vm/vm_seg.c
@@ -0,0 +1,132 @@
+/*	@(#)vm_seg.c	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+/*
+ * VM - segment management.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/mmu.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/mp.h>
+
+/*
+ * Variables for maintaining the free list of segment structures.
+ */
+static struct seg *seg_freelist;
+static int seg_freeincr = 24;
+
+/*
+ * Allocate a segment to cover [base, base+size)
+ * and attach it to the specified address space.
+ */
+struct seg *
+seg_alloc(as, base, size)
+	struct as *as;
+	register addr_t base;
+	register u_int size;
+{
+	register struct seg *new;
+	addr_t segbase;
+	u_int segsize;
+
+	segbase = (addr_t)((u_int)base & PAGEMASK);
+	segsize =
+	    (((u_int)(base + size) + PAGEOFFSET) & PAGEMASK) - (u_int)segbase;
+
+	if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
+		return ((struct seg *)NULL);	/* bad virtual addr range */
+
+	new = (struct seg *)new_kmem_fast_alloc((caddr_t *)&seg_freelist,
+	    sizeof (*seg_freelist), seg_freeincr, KMEM_SLEEP);
+	bzero((caddr_t)new, sizeof (*new));
+	if (seg_attach(as, segbase, segsize, new) < 0) {
+		kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)new);
+		return ((struct seg *)NULL);
+	}
+	/* caller must fill in ops, data */
+	return (new);
+}
+
+/*
+ * Attach a segment to the address space.  Used by seg_alloc()
+ * and for kernel startup to attach to static segments.
+ */
+int
+seg_attach(as, base, size, seg)
+	struct as *as;
+	addr_t base;
+	u_int size;
+	struct seg *seg;
+{
+
+	seg->s_as = as;
+	seg->s_base = base;
+	seg->s_size = size;
+	if (as_addseg(as, seg) == A_SUCCESS)
+		return (0);
+	return (-1);
+}
+
+/*
+ * Free the segment from its associated as,
+ */
+void
+seg_free(seg)
+	register struct seg *seg;
+{
+	register struct as *as = seg->s_as;
+
+	if (as->a_segs == seg)
+		as->a_segs = seg->s_next;		/* go to next seg */
+
+	if (as->a_segs == seg)
+		as->a_segs = NULL;			/* seg list is gone */
+	else {
+		seg->s_prev->s_next = seg->s_next;
+		seg->s_next->s_prev = seg->s_prev;
+	}
+
+	if (as->a_seglast == seg)
+		as->a_seglast = as->a_segs;
+
+	/*
+	 * If the segment private data field is NULL,
+	 * then segment driver is not attached yet.
+	 */
+	if (seg->s_data != NULL)
+		(*seg->s_ops->free)(seg);
+
+	kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)seg);
+}
+
+/*
+ * Translate addr into page number within segment.
+ */
+u_int
+seg_page(seg, addr)
+	struct seg *seg;
+	addr_t addr;
+{
+
+	return ((u_int)((addr - seg->s_base) >> PAGESHIFT));
+}
+
+/*
+ * Return number of pages in segment.
+ */
+u_int
+seg_pages(seg)
+	struct seg *seg;
+{
+
+	return ((u_int)((seg->s_size + PAGEOFFSET) >> PAGESHIFT));
+}
--- a/sys/vm/vm_swap.c
+++ b/sys/vm/vm_swap.c
@@ -0,0 +1,648 @@
+/*	@(#)vm_swap.c 1.1 94/10/31 SMI */
+
+#ident	"$SunId: @(#)vm_swap.c 1.2 91/02/19 SMI [RMTC] $"
+
+/*
+ * Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
+ */
+
+/*
+ * Virtual swap device
+ *
+ * The virtual swap device consists of the logical concatenation of one
+ * or more physical swap areas.  It provides a logical array of anon
+ * slots, each of which corresponds to a page of swap space.
+ *
+ * Each physical swap area has an associated anon array representing
+ * its physical storage.  These anon arrays are logically concatenated
+ * sequentially to form the overall swap device anon array.  Thus, the
+ * offset of a given entry within this logical array is computed as the
+ * sum of the sizes of each area preceding the entry plus the offset
+ * within the area containing the entry.
+ *
+ * The anon array entries for unused swap slots within an area are
+ * linked together into a free list.  Allocation proceeds by finding a
+ * suitable area (attempting to balance use among all the areas) and
+ * then returning the first free entry within the area.  Thus, there's
+ * no linear relation between offset within the swap device and the
+ * address (within its segment(s)) of the page that the slot backs;
+ * instead, it's an arbitrary one-to-one mapping.
+ *
+ * Associated with each swap area is a swapinfo structure.  These
+ * structures are linked into a linear list that determines the
+ * ordering of swap areas in the logical swap device.  Each contains a
+ * pointer to the corresponding anon array, the area's size, and its
+ * associated vnode.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/user.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/conf.h>
+#include <sys/bootconf.h>
+#include <sys/trace.h>
+
+#include <vm/hat.h>
+#include <vm/anon.h>
+#include <vm/page.h>
+#include <vm/swap.h>
+
+/* these includes are used for the "fake" swap support of /dev/drum */
+#include <sun/mem.h>
+#include <specfs/snode.h>
+
+static struct swapinfo *silast;
+struct swapinfo *swapinfo;
+
+/*
+ * To balance the load among multiple swap areas, we don't allow
+ * more than swap_maxcontig allocations to be satisfied from a
+ * single swap area before moving on to the next swap area.  This
+ * effectively "interleaves" allocations among the many swap areas.
+ */
+int	swap_maxcontig = 1024 * 1024 / PAGESIZE;	/* 1MB of pages */
+
+extern	int klustsize;		/* from spec_vnodeops.c */
+int     swap_order = 1;         /* see swap_alloc,free */
+
+#define	MINIROOTSIZE	14000   /* ~7 Meg */
+
+/*
+ * Initialize a new swapinfo structure.
+ */
+static int
+swapinfo_init(vp, npages, skip)
+	struct vnode *vp;
+	register u_int npages;
+	u_int skip;
+{
+	register struct anon *ap, *ap2;
+	register struct swapinfo **sipp, *nsip;
+
+	for (sipp = &swapinfo; nsip = *sipp; sipp = &nsip->si_next)
+		if (nsip->si_vp == vp)
+			return (EBUSY);		/* swap device already in use */
+
+	nsip = (struct swapinfo *)new_kmem_zalloc(
+			sizeof (struct swapinfo), KMEM_SLEEP);
+	nsip->si_vp = vp;
+	nsip->si_size = ptob(npages);
+	/*
+	 * Don't indirect through NULL if called with npages < skip (too tacky)
+	 */
+	if (npages < skip)
+		npages = skip;
+	/*
+	 * Don't sleep when allocating memory for the anon structures.
+	 * This allocation can be large for very large swap spaces and we
+	 * cannot count on such contigous chunk to become available
+	 * in the heap.
+	 */
+	nsip->si_anon = (struct anon *)new_kmem_zalloc(
+		npages * sizeof (struct anon), KMEM_NOSLEEP);
+	if (!nsip->si_anon) {
+		kmem_free(nsip, sizeof(struct swapinfo));
+		return (ENOMEM);
+	}
+	nsip->si_eanon = &nsip->si_anon[npages - 1];
+#ifdef RECORD_USAGE
+	/*
+	 *  Monitoring of swap space usage is enabled,  so malloc
+	 *  a parallel array to hold the PID responsible for
+	 *  causing the anon page to be created.
+	 */
+	nsip->si_pid = (short *)
+		new_kmem_zalloc(npages * sizeof (short), KMEM_NOSLEEP);
+	if (!nsip->si_pid) {
+		kmem_free(nsip->si_anon, npages * sizeof (struct anon));
+		kmem_free(nsip, sizeof(struct swapinfo));
+		return (ENOMEM);
+	}
+#endif RECORD_USAGE
+	npages -= skip;
+
+	/*
+	 * ap2 now points to the first usable slot in the swap area.
+	 * Set up free list links so that the head of the list is at
+	 * the front of the usable portion of the array.
+	 */
+	ap = nsip->si_eanon;
+	ap2 = nsip->si_anon + skip;
+	while (--ap >= ap2)
+		ap->un.an_next = ap + 1;
+	if (npages == 0) 			/* if size was <= skip */
+		nsip->si_free = NULL;
+	else
+		nsip->si_free = ap + 1;
+	anoninfo.ani_free += npages;
+	anoninfo.ani_max += npages;
+
+	*sipp = nsip;
+	if (silast == NULL)		/* first swap device */
+		silast = nsip;
+	return (0);
+}
+
+/*
+ * Initialize a swap vnode.
+ */
+int
+swap_init(vp)
+	struct vnode *vp;
+{
+	struct vattr vattr;
+	u_int skip;
+	int err;
+        
+	err = VOP_GETATTR(vp, &vattr, u.u_cred);	/* XXX - u.u_cred? */
+	if (err) {
+		printf("swap_init: getattr failed, errno %d\n", err);
+		return (err);
+	}
+
+	/*
+	 * To prevent swap I/O requests from crossing the boundary
+	 * between swap areas, we erect a "fence" between areas by
+	 * not allowing the first page of each swap area to be used.
+	 * (This also prevents us from scribbling on the disk label
+	 * if the swap partition is the first partition on the disk.)
+	 * This may not be strictly necessary, since swap_blksize also
+	 * prevents requests from crossing the boundary.
+	 *
+	 * If swapping on the root filesystem, don't put swap blocks that
+	 * correspond to the miniroot filesystem on the swap free list.
+	 */
+	if (rootvp == vp)
+		skip = btoc(roundup(dbtob(MINIROOTSIZE), klustsize));
+	else
+		skip = 1;
+
+	err = swapinfo_init(vp, (u_int)btop(vattr.va_size), skip);
+
+	if (!err)
+		vp->v_flag |= VISSWAP;
+	return (err);
+}
+
+/*
+ * This routine is used to fake npages worth of swap space.
+ * These pages will have no backing and cannot be paged out any where.
+ */
+swap_cons(npages)
+	u_int npages;
+{
+
+	if (swapinfo_init((struct vnode *)NULL, npages, 0) != 0)
+		panic("swap_cons");
+}
+
+/*
+ * Points to the location (close to) the last block handed to
+ * swap_free.  The theory is that if you free one in this area,
+ * you'll probably free more, so use the hint as a starting point.
+ * hint is reset on each free to the block that preceeds the one
+ * freed (or the block freed, if we can't find the block before it).
+ * It is also reset if it points at block that is allocated.
+ *
+ * XXX - swap_free and swap_alloc both manipulate hint; the free
+ * lists are now protected with splswap(). Don't call into these routines
+ * from higher level interrupts!
+ */
+static struct {
+        struct anon     *ap;    /* pointer to the last freed */
+        struct swapinfo *sip;   /* swap list for which hint is valid */
+} hint;
+
+int     swap_hit;               /* hint helped */
+int     swap_miss;              /* hint was no good */
+
+
+/*
+ * Allocate a single page from the virtual swap device.
+ */
+struct anon *
+swap_alloc()
+{
+	struct swapinfo *sip = silast;
+	struct anon *ap;
+
+	do {
+		ap = sip->si_free;
+		if (ap) {
+                        /*
+                         * can't condition this on swap_order since some
+                         * idiot might turn it on and off.  It's not cool
+                         * to have the hint point at an allocated block.
+                         */
+                        if (hint.sip == sip && hint.ap == ap)
+                                hint.sip = NULL;
+			sip->si_free = ap->un.an_next;
+			if (++sip->si_allocs >= swap_maxcontig) {
+				sip->si_allocs = 0;
+				if (sip == silast) {
+					silast = sip->si_next;
+					if (silast == NULL)
+						silast = swapinfo;
+				}
+			} else {
+				silast = sip;
+			}
+#			ifdef	TRACE
+			{
+				struct vnode *vp;
+				u_int off;
+
+				swap_xlate(ap, &vp, &off);
+				trace3(TR_MP_SWAP, vp, off, ap);
+			}
+#			endif	TRACE
+#ifdef RECORD_USAGE
+			if (u.u_procp) {
+			/*  swap monitoring is on - record the current PID */
+			sip->si_pid[ap - sip->si_anon] = u.u_procp->p_pid;
+			}
+#endif RECORD_USAGE
+			return (ap);
+		}
+		/*
+		 * No more free anon slots here.
+		 */
+		sip->si_allocs = 0;
+		sip = sip->si_next;
+		if (sip == NULL)
+			sip = swapinfo;
+	} while (sip != silast);
+	return ((struct anon *)NULL);
+}
+
+/*
+ * Free a swap page.
+ * List is maintained in sorted order.  Worst case is a linear search on the
+ * list; we maintain a hint to mitigate this.
+ *
+ * Pointing the hint at the most recently free'd anon struct makes it
+ * really fast to free anon pages in ascending order.
+ *
+ * Pointing the hint at the anon struct that is just *before* this makes
+ * it really fast to free anon pages in descending order, at nearly zero
+ * cost.
+ *
+ * This alogrithm points the hint at the anon struct that points to
+ * the one most recently free'd. When freeing a block of anon structs
+ * presented in ascending order, the hint advances one block behind
+ * the blocks as they are free'd. When freeing a block of anon structs
+ * precented in descending order -- which happens if a large hunk of
+ * memory is allocated in reverse order then free'd in forward order,
+ * common enough to be a problem -- the hint remains pointing at the
+ * anon struct that ends up pointing at each of the free'd blocks
+ * in order. This is worth an example.
+ *
+ * Assume anons #2 and #9 are free, the hint points to anon #2, and
+ * #2's "next" pointer goes to #9. Now, we present a set of swap_free
+ * requests for blocks #8 through #3, in descending order. This results
+ * in a series of hits on the hint, which just keeps pointing at #2.
+ * The previous algorithm would have set the hint to each block as
+ * it came in, resulting in worst-case behavior as the list had to
+ * be scanned from the front.
+ */
+void
+swap_free(ap)
+	struct anon *ap;
+{
+	register struct swapinfo *sip = silast;
+        register struct anon *tap, **tapp;
+	register struct anon *tap_hint;
+
+	/*
+	 * Find the swap area containing ap and then put
+	 * ap at the head of that area's free list.
+	 */
+	do {
+		if (sip->si_anon <= ap && ap <= sip->si_eanon) {
+/*
+			ap->un.an_next = sip->si_free;
+			sip->si_free = ap;
+*/
+                        /*
+                         * old unordered way
+                         */
+                        if (!swap_order) {
+                                ap->un.an_next = sip->si_free;
+                                sip->si_free = ap;
+#ifdef RECORD_USAGE
+                                /*  Swap monitoring is on - undo the PID */
+                                sip->si_pid[ap - sip->si_anon] = 0;
+#endif RECORD_USAGE
+                                return;
+                        }
+                        /*
+                         * Do it in order; use hint if possible
+                         */
+			tap = hint.ap;
+                        if (hint.sip == sip && tap < ap) {
+				/*
+				 * The anon we are freeing
+				 * follows the hint tap somewhere.
+				 * save the hint and advance
+				 * to the next free anon.
+				 */
+				tapp = &tap->un.an_next;
+				tap_hint = tap;
+				tap = tap->un.an_next;
+                                swap_hit++;
+                        } else {
+				/*
+				 * Wrong swapinfo, or
+				 * the anon being free'd
+				 * preceeds the hint.
+				 * must start scanning
+				 * from the front of the
+				 * list. The best hint we
+				 * can seed with is the
+				 * anon we are freeing.
+				 */
+                                tapp = &sip->si_free;
+                                tap = sip->si_free;
+				tap_hint = ap;
+                                swap_miss++;
+                        }
+			/*
+			 * advance tap until it is greater
+			 * than the incoming anon.
+			 */
+                        while (tap && tap < ap) {
+                            tapp = &tap->un.an_next;
+			    tap_hint = tap;
+                            tap = tap->un.an_next;
+                        }
+                        *tapp = ap;
+                        ap->un.an_next = tap;
+#ifdef RECORD_USAGE
+			/*  Swap monitoring is on - undo the PID */
+			sip->si_pid[ap - sip->si_anon] = 0;
+#endif RECORD_USAGE
+                        hint.sip = sip;
+                        hint.ap = tap_hint;
+			return;
+		}
+		sip = sip->si_next;
+		if (sip == NULL)
+			sip = swapinfo;
+	} while (sip != silast);
+	panic("swap_free");
+	/* NOTREACHED */
+}
+
+/*
+ * Return the <vnode, offset> pair
+ * corresponding to the given anon struct.
+ */
+void
+swap_xlate(ap, vpp, offsetp)
+	struct anon *ap;
+	struct vnode **vpp;
+	u_int *offsetp;
+{
+	register struct swapinfo *sip = silast;
+
+	do {
+		if (sip->si_anon <= ap && ap <= sip->si_eanon) {
+			*offsetp = ptob(ap - sip->si_anon);
+			*vpp = sip->si_vp;
+			return;
+		}
+		sip = sip->si_next;
+		if (sip == NULL)
+			sip = swapinfo;
+	} while (sip != silast);
+	panic("swap_xlate");
+	/* NOTREACHED */
+}
+
+/*
+ * Like swap_xlate, but return a status instead of panic'ing.
+ * Used by dump routines when we know we may be corrupted.
+ */
+swap_xlate_nopanic(ap, vpp, offsetp)
+	struct anon *ap;
+	struct vnode **vpp;
+	u_int *offsetp;
+{
+	register struct swapinfo *sip = swapinfo;
+
+	do {
+		if (sip->si_anon <= ap && ap <= sip->si_eanon) {
+			*offsetp = (ap - sip->si_anon) << PAGESHIFT;
+			*vpp = sip->si_vp;
+			return (1);
+		}
+	} while (sip = sip->si_next);
+
+	/* Couldn't find it; return failure */
+	return (0);
+}
+
+/*
+ * Return the anon struct corresponding for the given
+ * <vnode, offset> if it is part of the virtual swap device.
+ */
+struct anon *
+swap_anon(vp, offset)
+	struct vnode *vp;
+	u_int offset;
+{
+	register struct swapinfo *sip = silast;
+
+	if (vp && sip) {
+		do {
+			if (vp == sip->si_vp && offset < sip->si_size)
+				return (sip->si_anon + (offset >> PAGESHIFT));
+			sip = sip->si_next;
+			if (sip == NULL)
+				sip = swapinfo;
+		} while (sip != silast);
+	}
+	/*
+	 * Note - we don't return the anon structure for
+	 * fake'd anon slots which have no real vp.
+	 */
+	return ((struct anon *)NULL);
+}
+
+/*
+ * swread and swwrite implement the /dev/drum device, an indirect,
+ * user visible, device to allow reading of the (virtual) swap device.
+ */
+
+/*ARGSUSED*/
+swread(dev, uio)
+	dev_t dev;
+	struct uio *uio;
+{
+
+	return (sw_rdwr(uio, UIO_READ));
+}
+
+/*ARGSUSED*/
+swwrite(dev, uio)
+	dev_t dev;
+	struct uio *uio;
+{
+
+	return (sw_rdwr(uio, UIO_WRITE));
+}
+
+/*
+ * Handle all the work of reading "fake" swap pages that are in memory.
+ */
+static int
+fake_sw_rdwr(uio, rw, cred)
+	register struct uio *uio;
+	enum uio_rw rw;
+	struct ucred *cred;
+{
+	struct page *pp;
+	struct vnode *memvp;
+	int nbytes;
+	u_int off;
+	int err;
+	extern int mem_no;
+
+	nbytes = uio->uio_resid;
+	off = uio->uio_offset;
+	memvp = makespecvp(makedev(mem_no, M_MEM), VCHR);
+
+	do {
+		/*
+		 * Find the page corresponding to the "fake" name
+		 * and then read the corresponding page from /dev/mem.
+		 */
+		pp = page_find((struct vnode *)NULL, (u_int)(off & PAGEMASK));
+		if (pp == NULL) {
+			err = EIO;
+			break;
+		}
+		uio->uio_offset = ptob(page_pptonum(pp)) + (off & PAGEOFFSET);
+
+		if ((off & PAGEOFFSET) == 0)
+			uio->uio_resid = MIN(PAGESIZE, nbytes);
+		else
+			uio->uio_resid = min(ptob(btopr(off)) - off,
+			    (u_int)nbytes);
+		nbytes -= uio->uio_resid;
+		off += uio->uio_resid;
+		err = VOP_RDWR(memvp, uio, rw, 0, cred);
+	} while (err == 0 && nbytes > 0 && uio->uio_resid == 0);
+
+	VN_RELE(memvp);
+	return (err);
+}
+
+/*
+ * Common routine used to break up reads and writes to the
+ * (virtual) swap device to the underlying vnode(s).  This is
+ * used to implement the user visable /dev/drum interface.
+ */
+static int
+sw_rdwr(uio, rw)
+	register struct uio *uio;
+	enum uio_rw rw;
+{
+	register struct swapinfo *sip = swapinfo;
+	int nbytes = uio->uio_resid;
+	u_int off = 0;
+	int err = 0;
+
+	do {
+		if (uio->uio_offset >= off &&
+		    uio->uio_offset < off + sip->si_size)
+			break;
+		off += sip->si_size;
+	} while (sip = sip->si_next);
+
+	if (sip) {
+		uio->uio_offset -= off;
+		do {
+			uio->uio_resid = MIN(sip->si_size - uio->uio_offset,
+			    nbytes);
+			nbytes -= uio->uio_resid;
+			if (sip->si_vp)
+				err = VOP_RDWR(sip->si_vp, uio, rw, 0,
+				    u.u_cred);
+			else
+				err = fake_sw_rdwr(uio, rw, u.u_cred);
+			uio->uio_offset = 0;
+		} while (err == 0 && nbytes > 0 && uio->uio_resid == 0 &&
+		    (sip = sip->si_next));
+		uio->uio_resid = nbytes + uio->uio_resid;
+	}
+
+	return (err);
+}
+
+/*
+ * System call swapon(name) enables swapping on device name,
+ * Return EBUSY if already swapping on this device.
+ */
+swapon()
+{
+	register struct a {
+		char	*name;
+	} *uap = (struct a *)u.u_ap;
+	struct vnode *vp;
+
+	if (!suser())
+		return;
+	uap = (struct a *)u.u_ap;
+	if (u.u_error = lookupname(uap->name, UIOSEG_USER, FOLLOW_LINK,
+	    (struct vnode **)NULL, &vp))
+		return;
+
+	switch (vp->v_type) {
+	case VBLK: {
+		struct vnode *nvp;
+
+		nvp = bdevvp(vp->v_rdev);
+		VN_RELE(vp);
+		vp = nvp;
+		/*
+		 * Call the partition's open routine, to give it a chance to
+		 * check itself for consistency (e.g., for scrambled disk
+		 * labels).  (The open isn't otherwise required.)
+		 */
+		if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
+			goto out;
+		break;
+	}
+
+	case VREG:
+		if (vp->v_vfsp->vfs_flag & VFS_RDONLY) {
+			u.u_error = EROFS;
+			goto out;
+		}
+		if (u.u_error = VOP_ACCESS(vp, VREAD|VWRITE, u.u_cred))
+			goto out;
+		if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
+			goto out;
+		break;
+
+	case VDIR:
+		u.u_error = EISDIR;
+		goto out;
+
+	case VCHR:
+	case VSOCK:
+	default:
+		u.u_error = EOPNOTSUPP;
+		goto out;
+	}
+	u.u_error = swap_init(vp);
+out:
+	if (u.u_error) {
+		VN_RELE(vp);
+	}
+}
--- a/sys/vm/vpage.h
+++ b/sys/vm/vpage.h
@@ -0,0 +1,28 @@
+/*	@(#)vpage.h	1.1 94/10/31 SMI	*/
+
+/*
+ * Copyright (c) 1988 by Sun Microsystems, Inc.
+ */
+
+#ifndef _vm_vpage_h
+#define	_vm_vpage_h
+
+/*
+ * VM - Information per virtual page.
+ */
+struct vpage {
+	u_int	vp_prot: 4;		/* see <sys/mman.h> prot flags */
+	u_int	vp_advice: 3;		/* see <sys/mman.h> madvise flags */
+	u_int	vp_pplock: 1;		/* physical page locked by me */
+	/*
+	 * The following two are for use with a
+	 * local page replacement algorithm (someday).
+	 */
+	u_int	vp_ref: 1;		/* reference bit */
+	u_int	vp_mod: 1;		/* (maybe) modify bit, from hat */
+	u_int	vp_ski_ref: 1;		/* ski reference bit */
+	u_int	vp_ski_mod: 1;		/* ski modified bit */
+	u_int   : 4;
+};
+
+#endif /*!_vm_vpage_h*/