This commit is contained in:
seta75D
2021-10-11 18:37:13 -03:00
commit ff309bfe1c
14130 changed files with 3180272 additions and 0 deletions

13
sys/vm/Makefile Normal file
View File

@@ -0,0 +1,13 @@
#
# @(#)Makefile 1.1 94/10/31 SMI
#
HFILES = anon.h as.h faultcode.h hat.h mp.h page.h pvn.h rm.h \
seg.h seg_dev.h seg_map.h seg_vn.h seg_u.h swap.h vpage.h
HDIR=$(DESTDIR)/usr/include/vm
install_h: $(HFILES) FRC
install -d -m 755 $(HDIR)
install -m 444 $(HFILES) $(HDIR)
FRC:

60
sys/vm/anon.h Normal file
View File

@@ -0,0 +1,60 @@
/* @(#)anon.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_anon_h
#define _vm_anon_h
/*
* VM - Anonymous pages.
*/
/*
* Each page which is anonymous, either in memory or in swap,
* has an anon structure. The structure's primary purpose is
* to hold a reference count so that we can detect when the last
* copy of a multiply-referenced copy-on-write page goes away.
* When on the free list, un.next gives the next anon structure
* in the list. Otherwise, un.page is a ``hint'' which probably
* points to the current page. This must be explicitly checked
* since the page can be moved underneath us. This is simply
* an optimization to avoid having to look up each page when
* doing things like fork.
*/
struct anon {
int an_refcnt;
union {
struct page *an_page; /* ``hint'' to the real page */
struct anon *an_next; /* free list pointer */
} un;
};
struct anoninfo {
u_int ani_max; /* maximum anon pages available */
u_int ani_free; /* number of anon pages currently free */
u_int ani_resv; /* number of anon pages reserved */
};
#ifdef KERNEL
/*
* Flags for anon_private.
*/
#define STEAL_PAGE 0x01 /* page can be stolen */
#define LOCK_PAGE 0x02 /* page must be ``logically'' locked */
extern struct anoninfo anoninfo;
struct anon *anon_alloc();
void anon_dup(/* old, new, size */);
void anon_free(/* app, size */);
int anon_getpage(/* app, protp, pl, sz, seg, addr, rw, cred */);
struct page *anon_private(/* app, seg, addr, opp, oppflags */);
struct page *anon_zero(/* seg, addr, app */);
void anon_unloadmap(/* ap, ref, mod */);
int anon_resv(/* size */);
void anon_unresv(/* size */);
#endif KERNEL
#endif /*!_vm_anon_h*/

79
sys/vm/as.h Normal file
View File

@@ -0,0 +1,79 @@
/* @(#)as.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
#ifndef _vm_as_h
#define _vm_as_h
#include <vm/faultcode.h>
/*
* VM - Address spaces.
*/
/*
* Each address space consists of a list of sorted segments
* and machine dependent address translation information.
*
* All the hard work is in the segment drivers and the
* hardware address translation code.
*/
struct as {
u_int a_lock: 1;
u_int a_want: 1;
u_int a_paglck: 1; /* lock mappings into address space */
u_int a_ski: 1; /* enables recording of page info for ski */
u_int a_hatcallback: 1; /* enables recording of page info for ski */
u_int : 11;
u_short a_keepcnt; /* number of `keeps' */
struct seg *a_segs; /* segments in this address space */
struct seg *a_seglast; /* last segment hit on the address space */
int a_rss; /* memory claim for this address space */
struct hat a_hat; /* hardware address translation */
};
#ifdef KERNEL
/*
* Types of failure for several various address space operations.
*/
enum as_res {
A_SUCCESS, /* operation successful */
A_BADADDR, /* illegal address encountered */
A_OPFAIL, /* segment operation failure */
A_RESOURCE, /* resource exhaustion */
};
/*
* Flags for as_hole.
*/
#define AH_DIR 0x1 /* direction flag mask */
#define AH_LO 0x0 /* find lowest hole */
#define AH_HI 0x1 /* find highest hole */
#define AH_CONTAIN 0x2 /* hole must contain `addr' */
/*
* Flags for as_hatsync
*/
#define AHAT_UNLOAD 0x01 /* Translation being unloaded */
struct seg *as_segat(/* as, addr */);
struct as *as_alloc();
void as_free(/* as */);
struct as *as_dup(/* as */);
enum as_res as_addseg(/* as, seg */);
faultcode_t as_fault(/* as, addr, size, type, rw */);
faultcode_t as_faulta(/* as, addr, size */);
enum as_res as_setprot(/* as, addr, size, prot */);
enum as_res as_checkprot(/* as, addr, size, prot */);
enum as_res as_unmap(/* as, addr, size */);
int as_map(/* as, addr, size, crfp, crargsp */);
enum as_res as_hole(/* as, minlen, basep, lenp, flags, addr */);
enum as_res as_memory(/* as, addrp, sizep */);
u_int as_swapout(/* as */);
enum as_res as_incore(/* as, addr, size, vecp, sizep */);
enum as_res as_ctl(/* as, addr, size, func, arg */);
void as_hatsync(/* as, addr, ref, mod, flags */);
#endif KERNEL
#endif /*!_vm_as_h*/

30
sys/vm/dbx_vm.c Normal file
View File

@@ -0,0 +1,30 @@
#ifndef lint
static char sccsid[] = "@(#)dbx_vm.c 1.1 94/10/31 SMI";
#endif
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
/*
* This file is optionally brought in by including a
* "psuedo-device dbx" line in the config file. It is
* compiled using the "-g" flag to generate structure
* information which is used by dbx with the -k flag.
*/
#include <sys/param.h>
#include <vm/hat.h>
#include <vm/anon.h>
#include <vm/as.h>
#include <vm/mp.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/rm.h>
#include <vm/seg.h>
#include <vm/seg_dev.h>
#include <vm/seg_map.h>
#include <vm/seg_vn.h>
#include <vm/swap.h>
#include <vm/vpage.h>

33
sys/vm/faultcode.h Normal file
View File

@@ -0,0 +1,33 @@
/* @(#)faultcode.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_faultcode_h
#define _vm_faultcode_h
/*
* This file describes the "code" that is delivered during
* SIGBUS and SIGSEGV exceptions. It also describes the data
* type returned by vm routines which handle faults.
*
* If FC_CODE(fc) == FC_OBJERR, then FC_ERRNO(fc) contains the errno value
* returned by the underlying object mapped at the fault address.
*/
#define FC_HWERR 0x1 /* misc hardware error (e.g. bus timeout) */
#define FC_ALIGN 0x2 /* hardware alignment error */
#define FC_NOMAP 0x3 /* no mapping at the fault address */
#define FC_PROT 0x4 /* access exceeded current protections */
#define FC_OBJERR 0x5 /* underlying object returned errno value */
#define FC_MAKE_ERR(e) (((e) << 8) | FC_OBJERR)
#define FC_CODE(fc) ((fc) & 0xff)
#define FC_ERRNO(fc) ((unsigned)(fc) >> 8)
#ifndef LOCORE
typedef int faultcode_t; /* type returned by vm fault routines */
#endif LOCORE
#endif /*!_vm_faultcode_h*/

86
sys/vm/hat.h Normal file
View File

@@ -0,0 +1,86 @@
/* @(#)hat.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_hat_h
#define _vm_hat_h
/*
* VM - Hardware Address Translation management.
*
* This file describes the machine independent interfaces to
* the hardware address translation management routines. Other
* machine specific interfaces and structures are defined
* in <machine/vm_hat.h>. The hat layer manages the address
* translation hardware as a cache driven by calls from the
* higher levels of the VM system.
*/
#include <machine/vm_hat.h>
#ifdef KERNEL
/*
* One time hat initialization
*/
void hat_init();
/*
* Operations on hat resources for an address space:
* - initialize any needed hat structures for the address space
* - free all hat resources now owned by this address space
*
* N.B. - The hat structure is guaranteed to be zeroed when created.
* The hat layer can choose to define hat_alloc as a macro to avoid
* a subroutine call if this is sufficient initialization.
*/
#ifndef hat_alloc
void hat_alloc(/* as */);
#endif
void hat_free(/* as */);
/*
* Operations on a named address with in a segment:
* - load/lock the given page struct
* - load/lock the given page frame number
* - unlock the given address
*
* (Perhaps we need an interface to load several pages at once?)
*/
void hat_memload(/* seg, addr, pp, prot, lock */);
void hat_devload(/* seg, addr, pf, prot, lock */);
void hat_unlock(/* seg, addr */);
/*
* Operations over an address range:
* - change protections
* - change mapping to refer to a new segment
* - unload mapping
*/
void hat_chgprot(/* seg, addr, len, prot */);
void hat_newseg(/* seg, addr, len, nseg */);
void hat_unload(/* seg, addr, len */);
/*
* Operations that work on all active translation for a given page:
* - unload all translations to page
* - get hw stats from hardware into page struct and reset hw stats
*/
void hat_pageunload(/* pp */);
void hat_pagesync(/* pp */);
/*
* Operations that return physical page numbers (ie - used by mapin):
* - return the pfn for kernel virtual address
* - return the pfn for arbitrary virtual address
*/
u_int hat_getkpfnum(/* addr */);
/*
* XXX - This one is not yet implemented - not yet needed
* u_int hat_getpfnum(as, addr);
*/
#endif KERNEL
#endif /*!_vm_hat_h*/

39
sys/vm/mp.h Normal file
View File

@@ -0,0 +1,39 @@
/* @(#)mp.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_mp_h
#define _vm_mp_h
/*
* VM - multiprocessor/ing support.
*
* Currently the kmon_enter() / kmon_exit() pair implements a
* simple monitor for objects protected by the appropriate lock.
* The kcv_wait() / kcv_broadcast pait implements a simple
* condition variable which can be used for `sleeping'
* and `waking' inside a monitor if some resource
* is needed which is not available.
*/
typedef struct kmon_t {
u_int dummy;
} kmon_t;
#define lock_init(lk) (lk)->dummy = 0
#ifndef KMON_DEBUG
#define kmon_enter(a)
#define kmon_exit(a)
#define kcv_wait(lk, cond) (void) sleep(cond, PSWP+1)
#define kcv_broadcast(lk, cond) wakeup(cond)
#else
void kmon_enter(/* lk */);
void kmon_exit(/* lk */);
void kcv_wait(/* lk, cond */);
void kcv_broadcast(/* lk, cond */);
#endif /*!KMON_DEBUG*/
#endif /*!_vm_mp_h*/

166
sys/vm/page.h Normal file
View File

@@ -0,0 +1,166 @@
/* @(#)page.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
#ifndef _vm_page_h
#define _vm_page_h
/*
* VM - Ram pages.
*
* Each physical page has a page structure, which is used to maintain
* these pages as a cache. A page can be found via a hashed lookup
* based on the [vp, offset]. If a page has an [vp, offset] identity,
* then it is entered on a doubly linked circular list off the
* vnode using the vpnext/vpprev pointers. If the p_free bit
* is on, then the page is also on a doubly linked circular free
* list using next/prev pointers. If the p_intrans bit is on,
* then the page is currently being read in or written back.
* In this case, the next/prev pointers are used to link the
* pages together for a consecutive IO request. If the page
* is in transit and the the page is coming in (pagein), then you
* must wait for the IO to complete before you can attach to the page.
*
*/
struct page {
u_int p_lock: 1, /* locked for name manipulation */
p_want: 1, /* page wanted */
p_free: 1, /* on free list */
p_intrans: 1, /* data for [vp, offset] intransit */
p_gone: 1, /* page has been released */
p_mod: 1, /* software copy of modified bit */
p_ref: 1, /* software copy of reference bit */
p_pagein: 1, /* being paged in, data not valid */
p_nc: 1, /* do not cache page */
p_age: 1; /* on age free list */
u_int p_nio : 6; /* # of outstanding io reqs needed */
u_short p_keepcnt; /* number of page `keeps' */
struct vnode *p_vnode; /* logical vnode this page is from */
u_int p_offset; /* offset into vnode for this page */
struct page *p_hash; /* hash by [vnode, offset] */
struct page *p_next; /* next page in free/intrans lists */
struct page *p_prev; /* prev page in free/intrans lists */
struct page *p_vpnext; /* next page in vnode list */
struct page *p_vpprev; /* prev page in vnode list */
caddr_t p_mapping; /* hat specific translation info */
u_short p_lckcnt; /* number of locks on page data */
u_short p_pad; /* steal bits from here */
};
/*
* Each segment of physical memory is described by a memseg struct. Within
* a segment, memory is considered contiguous. The segments from a linked
* list to describe all of physical memory. The list is ordered by increasing
* physical addresses.
*/
struct memseg {
struct page *pages, *epages; /* [from, to) in page array */
u_int pages_base, pages_end; /* [from, to) in page numbers */
struct memseg *next; /* next segment in list */
};
#ifdef KERNEL
#define PAGE_HOLD(pp) (pp)->p_keepcnt++
#define PAGE_RELE(pp) page_rele(pp)
#define PAGE_HASHSZ page_hashsz
extern int page_hashsz;
extern struct page **page_hash;
extern struct page *pages; /* array of all page structures */
extern struct page *epages; /* end of all pages */
extern struct memseg *memsegs; /* list of memory segments */
/*
* Variables controlling locking of physical memory.
*/
extern u_int pages_pp_locked; /* physical pages actually locked */
extern u_int pages_pp_claimed; /* physical pages reserved */
extern u_int pages_pp_maximum; /* tuning: lock + claim <= max */
/*
* Page frame operations.
*/
void page_init(/* pp, num, base */);
void page_reclaim(/* pp */);
struct page *page_find(/* vp, off */);
struct page *page_exists(/* vp, off */);
struct page *page_lookup(/* vp, off */);
int page_enter(/* pp, vp, off */);
void page_abort(/* pp */);
void page_free(/* pp */);
void page_unfree(/* pp */);
struct page *page_get();
void page_rele(/* pp */);
void page_lock(/* pp */);
void page_unlock(/* pp */);
int page_pp_lock(/* pp, claim, check_resv */);
void page_pp_unlock(/* pp, claim */);
int page_addclaim(/* claim */);
void page_subclaim(/* claim */);
void page_hashout(/* pp */);
void page_add(/* ppp, pp */);
void page_sub(/* ppp, pp */);
void page_sortadd(/* ppp, pp */);
void page_wait(/* pp */);
u_int page_pptonum(/* pp */);
struct page *page_numtopp(/* pfnum */);
struct page *page_numtookpp(/* pfnum */);
#endif KERNEL
/*
* Page hash table is a power-of-two in size, externally chained
* through the hash field. PAGE_HASHAVELEN is the average length
* desired for this chain, from which the size of the page_hash
* table is derived at boot time and stored in the kernel variable
* page_hashsz. In the hash function it is given by PAGE_HASHSZ.
* PAGE_HASHVPSHIFT is defined so that 1 << PAGE_HASHVPSHIFT is
* the approximate size of a vnode struct.
*/
#define PAGE_HASHAVELEN 4
#define PAGE_HASHVPSHIFT 6
#define PAGE_HASHFUNC(vp, off) \
((((off) >> PAGESHIFT) + ((int)(vp) >> PAGE_HASHVPSHIFT)) & \
(PAGE_HASHSZ - 1))
/*
* Macros for setting reference and modify bit values. These exist as macros
* so that tracing code has the opportunity to note the new values.
*/
#ifdef TRACE
#ifdef lint
#define pg_setref(pp, val) \
if (pp) { \
trace2(TR_PG_SETREF, (pp), (val)); \
(pp)->p_ref = (val); \
} else
#define pg_setmod(pp, val) \
if (pp) { \
trace2(TR_PG_SETMOD, (pp), (val)); \
(pp)->p_mod = (val); \
} else
#else lint
#define pg_setref(pp, val) \
if (1) { \
trace2(TR_PG_SETREF, (pp), (val)); \
(pp)->p_ref = (val); \
} else
#define pg_setmod(pp, val) \
if (1) { \
trace2(TR_PG_SETMOD, (pp), (val)); \
(pp)->p_mod = (val); \
} else
#endif lint
#else TRACE
#define pg_setref(pp, val) (pp)->p_ref = (val)
#define pg_setmod(pp, val) (pp)->p_mod = (val)
#endif TRACE
#endif /*!_vm_page_h*/

50
sys/vm/pvn.h Normal file
View File

@@ -0,0 +1,50 @@
/* @(#)pvn.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
#ifndef _vm_pvn_h
#define _vm_pvn_h
/*
* VM - paged vnode.
*
* The VM system manages memory as a cache of paged vnodes.
* This file desribes the interfaces to common subroutines
* used to help implement the VM/file system routines.
*/
struct page *pvn_kluster(/* vp, off, seg, addr, offp, lenp, vp_off,
vp_len, isra */);
void pvn_fail(/* plist, flags */);
void pvn_done(/* bp */);
struct page *pvn_vplist_dirty(/* vp, off, flags */);
struct page *pvn_range_dirty(/* vp, off, eoff, offlo, offhi, flags */);
void pvn_vptrunc(/* vp, vplen, zbytes */);
void pvn_unloadmap(/* vp, offset, ref, mod */);
int pvn_getpages(/* getapage, vp, off, len, protp, pl, plsz, seg, addr,
rw, cred */);
/*
* When requesting pages from the getpage routines, pvn_getpages will
* allocate space to return PVN_GETPAGE_NUM pages which map PVN_GETPAGE_SZ
* worth of bytes. These numbers are chosen to be the minimum of the max's
* given in terms of bytes and pages.
*/
#define PVN_MAX_GETPAGE_SZ 0x10000 /* getpage size limit */
#define PVN_MAX_GETPAGE_NUM 0x8 /* getpage page limit */
#if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
#define PVN_GETPAGE_SZ ptob(PVN_MAX_GETPAGE_NUM)
#define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
#else
#define PVN_GETPAGE_SZ PVN_MAX_GETPAGE_SZ
#define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
#endif
#endif /*!_vm_pvn_h*/

19
sys/vm/rm.h Normal file
View File

@@ -0,0 +1,19 @@
/* @(#)rm.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_rm_h
#define _vm_rm_h
/*
* VM - Resource Management.
*/
struct page *rm_allocpage(/* seg, addr */);
void rm_outofanon();
void rm_outofhat();
int rm_asrss(/* as */);
#endif /*!_vm_rm_h*/

93
sys/vm/seg.h Normal file
View File

@@ -0,0 +1,93 @@
/* @(#)seg.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
#ifndef _vm_seg_h
#define _vm_seg_h
#include <vm/faultcode.h>
#include <vm/mp.h>
/*
* VM - Segments.
*/
/*
* An address space contains a set of segments, managed by drivers.
* Drivers support mapped devices, sharing, copy-on-write, etc.
*
* The seg structure contains a lock to prevent races, the base virtual
* address and size of the segment, a back pointer to the containing
* address space, pointers to maintain a circularly doubly linked list
* of segments in the same address space, and procedure and data hooks
* for the driver. The seg list on the address space is sorted by
* ascending base addresses and overlapping segments are not allowed.
*
* After a segment is created, faults may occur on pages of the segment.
* When a fault occurs, the fault handling code must get the desired
* object and set up the hardware translation to the object. For some
* objects, the fault handling code also implements copy-on-write.
*
* When the hat wants to unload a translation, it can call the unload
* routine which is responsible for processing reference and modify bits.
*/
struct seg {
kmon_t s_lock;
addr_t s_base; /* base virtual address */
u_int s_size; /* size in bytes */
struct as *s_as; /* containing address space */
struct seg *s_next; /* next seg in this address space */
struct seg *s_prev; /* prev seg in this address space */
struct seg_ops {
int (*dup)(/* seg, newsegp */);
int (*unmap)(/* seg, addr, len */);
int (*free)(/* seg */);
faultcode_t (*fault)(/* seg, addr, len, type, rw */);
faultcode_t (*faulta)(/* seg, addr */);
int (*hatsync)(/* seg, addr, ref, mod, flags */);
int (*setprot)(/* seg, addr, size, prot */);
int (*checkprot)(/* seg, addr, size, prot */);
int (*kluster)(/* seg, addr, delta */);
u_int (*swapout)(/* seg */);
int (*sync)(/* seg, addr, size, flags */);
int (*incore)(/* seg, addr, size, vec */);
int (*lockop)(/* seg, addr, size, op */);
int (*advise)(/* seg, addr, size, behav */);
} *s_ops;
caddr_t s_data; /* private data for instance */
};
/*
* Fault information passed to the seg fault handling routine.
* The F_SOFTLOCK and F_SOFTUNLOCK are used by software
* to lock and unlock pages for physical I/O.
*/
enum fault_type {
F_INVAL, /* invalid page */
F_PROT, /* protection fault */
F_SOFTLOCK, /* software requested locking */
F_SOFTUNLOCK, /* software requested unlocking */
};
/*
* seg_rw gives the access type for a fault operation
*/
enum seg_rw {
S_OTHER, /* unknown or not touched */
S_READ, /* read access attempted */
S_WRITE, /* write access attempted */
S_EXEC, /* execution access attempted */
};
#ifdef KERNEL
/*
* Generic segment operations
*/
struct seg *seg_alloc(/* as, base, size */);
int seg_attach(/* as, base, size, seg */);
void seg_free(/* seg */);
u_int seg_page(/* seg, addr */);
u_int seg_pages(/* seg */);
#endif KERNEL
#endif /*!_vm_seg_h*/

476
sys/vm/seg_dev.c Normal file
View File

@@ -0,0 +1,476 @@
/* @(#)seg_dev.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
*/
/*
* VM - segment of a mapped device.
*
* This segment driver is used when mapping character special devices.
*/
#include <machine/pte.h>
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_dev.h>
#include <vm/pvn.h>
#include <vm/vpage.h>
#define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
/*
* Private seg op routines.
*/
static int segdev_dup(/* seg, newsegp */);
static int segdev_unmap(/* seg, addr, len */);
static int segdev_free(/* seg */);
static faultcode_t segdev_fault(/* seg, addr, len, type, rw */);
static faultcode_t segdev_faulta(/* seg, addr */);
static int segdev_hatsync(/* seg, addr, ref, mod, flags */);
static int segdev_setprot(/* seg, addr, size, len */);
static int segdev_checkprot(/* seg, addr, size, len */);
static int segdev_badop();
static int segdev_incore(/* seg, addr, size, vec */);
static int segdev_ctlops(/* seg, addr, size, [flags] */);
struct seg_ops segdev_ops = {
segdev_dup,
segdev_unmap,
segdev_free,
segdev_fault,
segdev_faulta,
segdev_hatsync,
segdev_setprot,
segdev_checkprot,
segdev_badop, /* kluster */
(u_int (*)()) NULL, /* swapout */
segdev_ctlops, /* sync */
segdev_incore,
segdev_ctlops, /* lockop */
segdev_ctlops, /* advise */
};
/*
* Create a device segment.
*/
int
segdev_create(seg, argsp)
struct seg *seg;
caddr_t argsp;
{
register struct segdev_data *sdp;
register struct segdev_crargs *a = (struct segdev_crargs *)argsp;
sdp = (struct segdev_data *)
new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
sdp->mapfunc = a->mapfunc;
sdp->dev = a->dev;
sdp->offset = a->offset;
sdp->prot = a->prot;
sdp->maxprot = a->maxprot;
sdp->pageprot = 0;
sdp->vpage = NULL;
seg->s_ops = &segdev_ops;
seg->s_data = (char *)sdp;
return (0);
}
/*
* Duplicate seg and return new segment in newsegp.
*/
static int
segdev_dup(seg, newseg)
struct seg *seg, *newseg;
{
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register struct segdev_data *newsdp;
struct segdev_crargs a;
a.mapfunc = sdp->mapfunc;
a.dev = sdp->dev;
a.offset = sdp->offset;
a.prot = sdp->prot;
a.maxprot = sdp->maxprot;
(void) segdev_create(newseg, (caddr_t)&a);
newsdp = (struct segdev_data *)newseg->s_data;
newsdp->pageprot = sdp->pageprot;
if (sdp->vpage != NULL) {
register u_int nbytes = vpgtob(seg_pages(seg));
newsdp->vpage = (struct vpage *)
new_kmem_alloc(nbytes, KMEM_SLEEP);
bcopy((caddr_t)sdp->vpage, (caddr_t)newsdp->vpage, nbytes);
}
return (0);
}
/*
* Split a segment at addr for length len.
*/
/*ARGSUSED*/
static int
segdev_unmap(seg, addr, len)
register struct seg *seg;
register addr_t addr;
u_int len;
{
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register struct segdev_data *nsdp;
register struct seg *nseg;
register u_int npages, spages, tpages;
addr_t nbase;
u_int nsize, hpages;
/*
* Check for bad sizes
*/
if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
(len & PAGEOFFSET) || ((u_int)addr & PAGEOFFSET))
panic("segdev_unmap");
/*
* Unload any hardware translations in the range to be taken out.
*/
hat_unload(seg, addr, len);
/*
* Check for entire segment
*/
if (addr == seg->s_base && len == seg->s_size) {
seg_free(seg);
return (0);
}
/*
* Check for beginning of segment
*/
spages = seg_pages(seg);
npages = btop(len);
if (addr == seg->s_base) {
if (sdp->vpage != NULL) {
sdp->vpage = (struct vpage *)new_kmem_resize(
(caddr_t)sdp->vpage, vpgtob(npages),
vpgtob(spages - npages), vpgtob(spages),
KMEM_SLEEP);
}
sdp->offset += len;
seg->s_base += len;
seg->s_size -= len;
return (0);
}
/*
* Check for end of segment
*/
if (addr + len == seg->s_base + seg->s_size) {
tpages = spages - npages;
if (sdp->vpage != NULL)
sdp->vpage = (struct vpage *)
new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
vpgtob(tpages), vpgtob(spages), KMEM_SLEEP);
seg->s_size -= len;
return (0);
}
/*
* The section to go is in the middle of the segment,
* have to make it into two segments. nseg is made for
* the high end while seg is cut down at the low end.
*/
nbase = addr + len; /* new seg base */
nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */
seg->s_size = addr - seg->s_base; /* shrink old seg */
nseg = seg_alloc(seg->s_as, nbase, nsize);
if (nseg == NULL)
panic("segdev_unmap seg_alloc");
nseg->s_ops = seg->s_ops;
nsdp = (struct segdev_data *)
new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
nseg->s_data = (char *)nsdp;
nsdp->pageprot = sdp->pageprot;
nsdp->prot = sdp->prot;
nsdp->maxprot = sdp->maxprot;
nsdp->mapfunc = sdp->mapfunc;
nsdp->offset = sdp->offset + nseg->s_base - seg->s_base;
if (sdp->vpage == NULL)
nsdp->vpage = NULL;
else {
tpages = btop(nseg->s_base - seg->s_base);
hpages = btop(addr - seg->s_base);
nsdp->vpage = (struct vpage *)
new_kmem_alloc(vpgtob(spages - tpages), KMEM_SLEEP);
bcopy((caddr_t)&sdp->vpage[tpages], (caddr_t)nsdp->vpage,
vpgtob(spages - tpages));
sdp->vpage = (struct vpage *)
new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
vpgtob(hpages), vpgtob(spages), KMEM_SLEEP);
}
/*
* Now we do something so that all the translations which used
* to be associated with seg but are now associated with nseg.
*/
hat_newseg(seg, nseg->s_base, nseg->s_size, nseg);
return (0);
}
/*
* Free a segment.
*/
static
segdev_free(seg)
struct seg *seg;
{
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register u_int nbytes = vpgtob(seg_pages(seg));
if (sdp->vpage != NULL)
kmem_free((caddr_t)sdp->vpage, nbytes);
kmem_free((caddr_t)sdp, sizeof (*sdp));
}
/*
* Handle a fault on a device segment.
*/
static faultcode_t
segdev_fault(seg, addr, len, type, rw)
register struct seg *seg;
addr_t addr;
u_int len;
enum fault_type type;
enum seg_rw rw;
{
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register addr_t adr;
register u_int prot, protchk;
int pf;
struct vpage *vpage;
if (type == F_PROT) {
/*
* Since the seg_dev driver does not implement copy-on-write,
* this means that a valid translation is already loaded,
* but we got an fault trying to access the device.
* Return an error here to prevent going in an endless
* loop reloading the same translation...
*/
return (FC_PROT);
}
if (type != F_SOFTUNLOCK) {
if (sdp->pageprot == 0) {
switch (rw) {
case S_READ:
protchk = PROT_READ;
break;
case S_WRITE:
protchk = PROT_WRITE;
break;
case S_EXEC:
protchk = PROT_EXEC;
break;
case S_OTHER:
default:
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
break;
}
prot = sdp->prot;
if ((prot & protchk) == 0)
return (FC_PROT);
vpage = NULL;
} else {
vpage = &sdp->vpage[seg_page(seg, addr)];
}
}
for (adr = addr; adr < addr + len; adr += PAGESIZE) {
if (type == F_SOFTUNLOCK) {
hat_unlock(seg, adr);
continue;
}
if (vpage != NULL) {
switch (rw) {
case S_READ:
protchk = PROT_READ;
break;
case S_WRITE:
protchk = PROT_WRITE;
break;
case S_EXEC:
protchk = PROT_EXEC;
break;
case S_OTHER:
default:
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
break;
}
prot = vpage->vp_prot;
vpage++;
if ((prot & protchk) == 0)
return (FC_PROT);
}
pf = (*sdp->mapfunc)(sdp->dev,
sdp->offset + (adr - seg->s_base), prot);
if (pf == -1)
return (FC_MAKE_ERR(EFAULT));
hat_devload(seg, adr, pf, prot, type == F_SOFTLOCK);
}
return (0);
}
/*
* Asynchronous page fault. We simply do nothing since this
* entry point is not supposed to load up the translation.
*/
/*ARGSUSED*/
static faultcode_t
segdev_faulta(seg, addr)
struct seg *seg;
addr_t addr;
{
return (0);
}
/*ARGSUSED*/
static
segdev_hatsync(seg, addr, ref, mod, flags)
struct seg *seg;
addr_t addr;
u_int ref, mod;
u_int flags;
{
/* cannot use ref and mod bits on devices, so ignore 'em */
}
static int
segdev_setprot(seg, addr, len, prot)
register struct seg *seg;
register addr_t addr;
register u_int len, prot;
{
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register struct vpage *vp, *evp;
if ((sdp->maxprot & prot) != prot)
return (-1); /* violated maxprot */
if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
if (sdp->prot == prot)
return (0); /* all done */
sdp->prot = prot;
} else {
sdp->pageprot = 1;
if (sdp->vpage == NULL) {
/*
* First time through setting per page permissions,
* initialize all the vpage structures to prot
*/
sdp->vpage = (struct vpage *)new_kmem_zalloc(
vpgtob(seg_pages(seg)), KMEM_SLEEP);
evp = &sdp->vpage[seg_pages(seg)];
for (vp = sdp->vpage; vp < evp; vp++)
vp->vp_prot = sdp->prot;
}
/*
* Now go change the needed vpages protections.
*/
evp = &sdp->vpage[seg_page(seg, addr + len)];
for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
vp->vp_prot = prot;
}
if (prot == 0)
hat_unload(seg, addr, len);
else
hat_chgprot(seg, addr, len, prot);
return (0);
}
static int
segdev_checkprot(seg, addr, len, prot)
register struct seg *seg;
register addr_t addr;
register u_int len, prot;
{
struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
register struct vpage *vp, *evp;
/*
* If segment protection can be used, simply check against them
*/
if (sdp->pageprot == 0)
return (((sdp->prot & prot) != prot) ? -1 : 0);
/*
* Have to check down to the vpage level
*/
evp = &sdp->vpage[seg_page(seg, addr + len)];
for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
if ((vp->vp_prot & prot) != prot)
return (-1);
return (0);
}
static
segdev_badop()
{
panic("segdev_badop");
/*NOTREACHED*/
}
/*
* segdev pages are not in the cache, and thus can't really be controlled.
* syncs, locks, and advice are simply always successful.
*/
/*ARGSUSED*/
static int
segdev_ctlops(seg, addr, len, flags)
struct seg *seg;
addr_t addr;
u_int len, flags;
{
return (0);
}
/*
* segdev pages are always "in core".
*/
/*ARGSUSED*/
static int
segdev_incore(seg, addr, len, vec)
struct seg *seg;
addr_t addr;
register u_int len;
register char *vec;
{
u_int v = 0;
for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
v += PAGESIZE)
*vec++ = 1;
return (v);
}

38
sys/vm/seg_dev.h Normal file
View File

@@ -0,0 +1,38 @@
/* @(#)seg_dev.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_seg_dev_h
#define _vm_seg_dev_h
/*
* Structure who's pointer is passed to the segvn_create routine
*/
struct segdev_crargs {
int (*mapfunc)(); /* map function to call */
u_int offset; /* starting offset */
dev_t dev; /* device number */
u_char prot; /* protection */
u_char maxprot; /* maximum protection */
};
/*
* (Semi) private data maintained by the seg_dev driver per segment mapping
*/
struct segdev_data {
int (*mapfunc)(); /* really returns struct pte, not int */
u_int offset; /* device offset for start of mapping */
dev_t dev; /* device number (for mapfunc) */
u_char pageprot; /* true if per page protections present */
u_char prot; /* current segment prot if pageprot == 0 */
u_char maxprot; /* maximum segment protections */
struct vpage *vpage; /* per-page information, if needed */
};
#ifdef KERNEL
int segdev_create(/* seg, argsp */);
#endif KERNEL
#endif /*!_vm_seg_dev_h*/

776
sys/vm/seg_map.c Normal file
View File

@@ -0,0 +1,776 @@
/* @(#)seg_map.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
*/
/*
* VM - generic vnode mapping segment.
*
* The segmap driver is used only by the kernel to get faster (than seg_vn)
* mappings [lower routine overhead; more persistent cache] to random
* vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
*/
#include <sys/param.h>
#include <sys/buf.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mman.h>
#include <sys/errno.h>
#include <sys/ucred.h>
#include <sys/trace.h>
#include <sys/debug.h>
#include <sys/user.h>
#include <sys/kernel.h>
#include <machine/seg_kmem.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_map.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/rm.h>
/*
* Private seg op routines.
*/
static int segmap_free(/* seg */);
static faultcode_t segmap_fault(/* seg, addr, len, type, rw */);
static faultcode_t segmap_faulta(/* seg, addr */);
static int segmap_checkprot(/* seg, addr, len, prot */);
static int segmap_kluster(/* seg, addr, delta */);
static int segmap_badop();
struct seg_ops segmap_ops = {
segmap_badop, /* dup */
segmap_badop, /* unmap */
segmap_free,
segmap_fault,
segmap_faulta,
(int (*)()) NULL, /* unload */
segmap_badop, /* setprot */
segmap_checkprot,
segmap_kluster,
(u_int (*)()) NULL, /* swapout */
segmap_badop, /* sync */
segmap_badop, /* incore */
segmap_badop, /* lockop */
segmap_badop, /* advise */
};
/*
* Private segmap routines.
*/
static void segmap_smapadd(/* smd, smp */);
static void segmap_smapsub(/* smd, smp */);
static void segmap_hashin(/* smd, smp, vp, off, flags */);
static void segmap_hashout(/* smd, smp */);
/*
* Statistics for segmap operations.
*/
struct segmapcnt {
int smc_fault; /* number of segmap_faults */
int smc_faulta; /* number of segmap_faultas */
int smc_getmap; /* number of segmap_getmaps */
int smc_get_use; /* # of getmaps that reuse an existing map */
int smc_get_reclaim; /* # of getmaps that do a reclaim */
int smc_get_reuse; /* # of getmaps that reuse a slot */
int smc_rel_async; /* # of releases that are async */
int smc_rel_write; /* # of releases that write */
int smc_rel_free; /* # of releases that free */
int smc_rel_abort; /* # of releases that abort */
int smc_rel_dontneed; /* # of releases with dontneed set */
int smc_release; /* # of releases with no other action */
int smc_pagecreate; /* # of pagecreates */
} segmapcnt;
/*
* Return number of map pages in segment.
*/
#define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
/*
* Translate addr into smap number within segment.
*/
#define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
/*
* Translate addr in seg into struct smap pointer.
*/
#define GET_SMAP(seg, addr) \
&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
int
segmap_create(seg, argsp)
struct seg *seg;
caddr_t argsp;
{
register struct segmap_data *smd;
register struct smap *smp;
struct segmap_crargs *a = (struct segmap_crargs *)argsp;
register u_int i;
u_int hashsz;
addr_t segend;
/*
* Make sure that seg->s_base and seg->s_base + seg->s_size
* are on MAXBSIZE aligned pieces of virtual memory.
*
* Since we assume we are creating a large segment
* (it's just segkmap), trimming off the excess at the
* beginning and end of the segment is considered safe.
*/
segend = (addr_t)((u_int)(seg->s_base + seg->s_size) & MAXBMASK);
seg->s_base = (addr_t)roundup((u_int)(seg->s_base), MAXBSIZE);
seg->s_size = segend - seg->s_base;
i = MAP_PAGES(seg);
smd = (struct segmap_data *)new_kmem_zalloc(
sizeof (struct segmap_data), KMEM_SLEEP);
smd->smd_prot = a->prot;
smd->smd_sm = (struct smap *)new_kmem_zalloc(
(u_int)(sizeof (struct smap) * i), KMEM_SLEEP);
/*
* Link up all the slots.
*/
for (smp = &smd->smd_sm[i - 1]; smp >= smd->smd_sm; smp--)
segmap_smapadd(smd, smp);
/*
* Compute hash size rounding down to the next power of two.
*/
hashsz = MAP_PAGES(seg) / SMAP_HASHAVELEN;
for (i = 0x80 << ((sizeof (int) - 1) * NBBY); i != 0; i >>= 1) {
if ((hashsz & i) != 0) {
smd->smd_hashsz = hashsz = i;
break;
}
}
smd->smd_hash = (struct smap **)new_kmem_zalloc(
hashsz * sizeof (smd->smd_hash[0]), KMEM_SLEEP);
seg->s_data = (char *)smd;
seg->s_ops = &segmap_ops;
return (0);
}
static int
segmap_free(seg)
struct seg *seg;
{
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
kmem_free((caddr_t)smd->smd_hash, sizeof (smd->smd_hash[0]) *
smd->smd_hashsz);
kmem_free((caddr_t)smd->smd_sm, sizeof (struct smap) * MAP_PAGES(seg));
kmem_free((caddr_t)smd, sizeof (*smd));
}
/*
* Do a F_SOFTUNLOCK call over the range requested.
* The range must have already been F_SOFTLOCK'ed.
*/
static void
segmap_unlock(seg, addr, len, rw, smp)
struct seg *seg;
addr_t addr;
u_int len;
enum seg_rw rw;
register struct smap *smp;
{
register struct page *pp;
register addr_t adr;
u_int off;
off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
/*
* For now, we just kludge here by finding the page
* ourselves since we would not find the page using
* page_find() if someone has page_abort()'ed it.
* XXX - need to redo things to avoid this mess.
*/
for (pp = page_hash[PAGE_HASHFUNC(smp->sm_vp, off)]; pp != NULL;
pp = pp->p_hash)
if (pp->p_vnode == smp->sm_vp && pp->p_offset == off)
break;
if (pp == NULL || pp->p_pagein || pp->p_free)
panic("segmap_unlock");
if (rw == S_WRITE)
pg_setmod(pp, 1);
if (rw != S_OTHER) {
trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, off, 1);
pg_setref(pp, 1);
}
hat_unlock(seg, adr);
PAGE_RELE(pp);
}
}
/*
* This routine is called via a machine specific fault handling
* routine. It is also called by software routines wishing to
* lock or unlock a range of addresses.
*/
static faultcode_t
segmap_fault(seg, addr, len, type, rw)
struct seg *seg;
addr_t addr;
u_int len;
enum fault_type type;
enum seg_rw rw;
{
register struct segmap_data *smd;
register struct smap *smp;
register struct page *pp, **ppp;
register struct vnode *vp;
register u_int off;
struct page *pl[btopr(MAXBSIZE) + 1];
u_int prot;
u_int addroff;
addr_t adr;
int err;
segmapcnt.smc_fault++;
smd = (struct segmap_data *)seg->s_data;
smp = GET_SMAP(seg, addr);
vp = smp->sm_vp;
if (vp == NULL)
return (FC_MAKE_ERR(EIO));
addroff = (u_int)addr & MAXBOFFSET;
if (addroff + len > MAXBSIZE)
panic("segmap_fault length");
off = smp->sm_off + addroff;
/*
* First handle the easy stuff
*/
if (type == F_SOFTUNLOCK) {
segmap_unlock(seg, addr, len, rw, smp);
return (0);
}
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
err = VOP_GETPAGE(vp, off, len, &prot, pl, MAXBSIZE, seg, addr, rw,
(struct ucred *)NULL); /* XXX - need real cred val */
if (err)
return (FC_MAKE_ERR(err));
prot &= smd->smd_prot;
/*
* Handle all pages returned in the pl[] array.
* This loop is coded on the assumption that if
* there was no error from the VOP_GETPAGE routine,
* that the page list returned will contain all the
* needed pages for the vp from [off..off + len).
*/
for (ppp = pl; (pp = *ppp++) != NULL; ) {
/*
* Verify that the pages returned are within the range
* of this segmap region. Note that this is theoretically
* possible for pages outside this range to be returned,
* but it is not very unlikely. If we cannot use the
* page here, just release it and go on to the next one.
*/
if (pp->p_offset < smp->sm_off ||
pp->p_offset >= smp->sm_off + MAXBSIZE) {
PAGE_RELE(pp);
continue;
}
adr = addr + (pp->p_offset - off);
if (adr >= addr && adr < addr + len) {
pg_setref(pp, 1);
trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, pp->p_offset,
0);
trace5(TR_SPG_FLT, u.u_ar0[PC], adr, vp, pp->p_offset,
TRC_SPG_SMAP);
trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
trs(u.u_comm,0), trs(u.u_comm,1),
trs(u.u_comm,2), trs(u.u_comm,3));
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't PAGE_RELE the page.
*/
hat_memload(seg, adr, pp, prot, 1);
continue;
}
}
/*
* Either it was a page outside the fault range or a
* page inside the fault range for a non F_SOFTLOCK -
* load up the hat translation and release the page.
*/
hat_memload(seg, adr, pp, prot, 0);
PAGE_RELE(pp);
}
return (0);
}
/*
* This routine is used to start I/O on pages asynchronously.
*/
static faultcode_t
segmap_faulta(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct smap *smp;
int err;
segmapcnt.smc_faulta++;
smp = GET_SMAP(seg, addr);
if (smp->sm_vp == NULL) {
call_debug("segmap_faulta - no vp");
return (FC_MAKE_ERR(EIO));
}
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
err = VOP_GETPAGE(smp->sm_vp, smp->sm_off + (u_int)addr & MAXBOFFSET,
PAGESIZE, (u_int *)NULL, (struct page **)NULL, 0,
seg, addr, S_READ,
(struct ucred *)NULL); /* XXX - need real cred val */
if (err)
return (FC_MAKE_ERR(err));
return (0);
}
/*ARGSUSED*/
static int
segmap_checkprot(seg, addr, len, prot)
struct seg *seg;
addr_t addr;
u_int len, prot;
{
struct segmap_data *smd = (struct segmap_data *)seg->s_data;
return (((smd->smd_prot & prot) != prot) ? -1 : 0);
}
/*
* Check to see if it makes sense to do kluster/read ahead to
* addr + delta relative to the mapping at addr. We assume here
* that delta is a signed PAGESIZE'd multiple (which can be negative).
*
* For segmap we always "approve" of this action from our standpoint.
*/
/*ARGSUSED*/
static int
segmap_kluster(seg, addr, delta)
struct seg *seg;
addr_t addr;
int delta;
{
return (0);
}
static
segmap_badop()
{
panic("segmap_badop");
/*NOTREACHED*/
}
/*
* Special private segmap operations
*/
/*
* Add smp to the free list on smd. If the smp still has a vnode
* association with it, then it is added to the end of the free list,
* otherwise it is added to the front of the list.
*/
static void
segmap_smapadd(smd, smp)
register struct segmap_data *smd;
register struct smap *smp;
{
if (smp->sm_refcnt != 0)
panic("segmap_smapadd");
if (smd->smd_free == (struct smap *)NULL) {
smp->sm_next = smp->sm_prev = smp;
} else {
smp->sm_next = smd->smd_free;
smp->sm_prev = (smd->smd_free)->sm_prev;
(smd->smd_free)->sm_prev = smp;
smp->sm_prev->sm_next = smp;
}
if (smp->sm_vp == (struct vnode *)NULL)
smd->smd_free = smp;
else
smd->smd_free = smp->sm_next;
/*
* XXX - need a better way to do this.
*/
if (smd->smd_want) {
wakeup((caddr_t)&smd->smd_free);
smd->smd_want = 0;
}
}
/*
* Remove smp from the smd free list. If there is an old
* mapping in effect there, then delete it.
*/
static void
segmap_smapsub(smd, smp)
register struct segmap_data *smd;
register struct smap *smp;
{
if (smd->smd_free == smp)
smd->smd_free = smp->sm_next; /* go to next page */
if (smd->smd_free == smp)
smd->smd_free = NULL; /* smp list is gone */
else {
smp->sm_prev->sm_next = smp->sm_next;
smp->sm_next->sm_prev = smp->sm_prev;
}
smp->sm_prev = smp->sm_next = smp; /* make smp a list of one */
smp->sm_refcnt = 1;
}
static void
segmap_hashin(smd, smp, vp, off)
register struct segmap_data *smd;
register struct smap *smp;
struct vnode *vp;
u_int off;
{
register struct smap **hpp;
/*
* Funniness here - we don't increment the ref count on the vnode
* even though we have another pointer to it here. The reason
* for this is that we don't want the fact that a seg_map
* entry somewhere refers to a vnode to prevent the vnode
* itself from going away. This is because this reference
* to the vnode is a "soft one". In the case where a mapping
* is being used by a rdwr [or directory routine?] there already
* has to be a non-zero ref count on the vnode. In the case
* where the vp has been freed and the the smap structure is
* on the free list, there are no pages in memory that can
* refer to the vnode. Thus even if we reuse the same
* vnode/smap structure for a vnode which has the same
* address but represents a different object, we are ok.
*/
smp->sm_vp = vp;
smp->sm_off = off;
hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
smp->sm_hash = *hpp;
*hpp = smp;
}
static void
segmap_hashout(smd, smp)
register struct segmap_data *smd;
register struct smap *smp;
{
register struct smap **hpp, *hp;
struct vnode *vp;
vp = smp->sm_vp;
hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, smp->sm_off)];
for (;;) {
hp = *hpp;
if (hp == NULL)
panic("segmap_hashout");
if (hp == smp)
break;
hpp = &hp->sm_hash;
}
*hpp = smp->sm_hash;
smp->sm_hash = NULL;
smp->sm_vp = NULL;
smp->sm_off = 0;
}
/*
* Special public segmap operations
*/
/*
* Create pages (without using VOP_GETPAGE) and load up tranlations to them.
* If softlock is TRUE, then set things up so that it looks like a call
* to segmap_fault with F_SOFTLOCK.
*/
void
segmap_pagecreate(seg, addr, len, softlock)
struct seg *seg;
register addr_t addr;
u_int len;
int softlock;
{
register struct page *pp;
register u_int off;
struct smap *smp;
struct vnode *vp;
addr_t eaddr;
u_int prot;
segmapcnt.smc_pagecreate++;
eaddr = addr + len;
addr = (addr_t)((u_int)addr & PAGEMASK);
smp = GET_SMAP(seg, addr);
vp = smp->sm_vp;
off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
prot = ((struct segmap_data *)seg->s_data)->smd_prot;
for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
pp = page_lookup(vp, off);
if (pp == NULL) {
pp = rm_allocpage(segkmap, addr, PAGESIZE, 1);
trace6(TR_SEG_ALLOCPAGE, segkmap, addr,
TRC_SEG_SEGKMAP, vp, off, pp);
if (page_enter(pp, vp, off))
panic("segmap_page_create page_enter");
page_unlock(pp);
if (softlock) {
hat_memload(segkmap, addr, pp, prot, 1);
} else {
hat_memload(segkmap, addr, pp, prot, 0);
PAGE_RELE(pp);
}
} else {
if (softlock) {
PAGE_HOLD(pp);
hat_memload(segkmap, addr, pp, prot, 1);
} else {
hat_memload(segkmap, addr, pp, prot, 0);
}
}
}
}
addr_t
segmap_getmap(seg, vp, off)
struct seg *seg;
struct vnode *vp;
u_int off;
{
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
register struct smap *smp;
segmapcnt.smc_getmap++;
if ((off & MAXBOFFSET) != 0)
panic("segmap_getmap bad offset");
/*
* XXX - keep stats for hash function
*/
for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
smp != NULL; smp = smp->sm_hash)
if (smp->sm_vp == vp && smp->sm_off == off)
break;
if (smp != NULL) {
if (vp->v_count == 0) /* XXX - debugging */
call_debug("segmap_getmap vp count of zero");
if (smp->sm_refcnt != 0) {
segmapcnt.smc_get_use++;
smp->sm_refcnt++; /* another user */
} else {
segmapcnt.smc_get_reclaim++;
segmap_smapsub(smd, smp); /* reclaim */
}
} else {
/*
* Allocate a new slot and set it up.
*/
while ((smp = smd->smd_free) == NULL) {
/*
* XXX - need a better way to do this.
*/
smd->smd_want = 1;
(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
}
segmap_smapsub(smd, smp);
if (smp->sm_vp != (struct vnode *)NULL) {
/*
* Destroy old vnode association and unload any
* hardware translations to the old object.
*/
segmapcnt.smc_get_reuse++;
segmap_hashout(smd, smp);
hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
MAXBSIZE), MAXBSIZE);
}
segmap_hashin(smd, smp, vp, off);
}
trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
TRC_SEG_SEGKMAP, vp, off);
return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
}
/*
* Same as segmap_getmap(), with the following condition added
* if (a new mapping is created)
* prefault the translation
*/
addr_t
segmap_getmapflt(seg, vp, off)
struct seg *seg;
struct vnode *vp;
u_int off;
{
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
register struct smap *smp;
segmapcnt.smc_getmap++;
if ((off & MAXBOFFSET) != 0)
panic("segmap_getmap bad offset");
/*
* XXX - keep stats for hash function
*/
for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
smp != NULL; smp = smp->sm_hash)
if (smp->sm_vp == vp && smp->sm_off == off)
break;
if (smp != NULL) {
if (vp->v_count == 0) /* XXX - debugging */
call_debug("segmap_getmap vp count of zero");
if (smp->sm_refcnt != 0) {
segmapcnt.smc_get_use++;
smp->sm_refcnt++; /* another user */
} else {
segmapcnt.smc_get_reclaim++;
segmap_smapsub(smd, smp); /* reclaim */
}
} else {
/*
* Allocate a new slot and set it up.
*/
while ((smp = smd->smd_free) == NULL) {
/*
* XXX - need a better way to do this.
*/
smd->smd_want = 1;
(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
}
segmap_smapsub(smd, smp);
if (smp->sm_vp != (struct vnode *)NULL) {
/*
* Destroy old vnode association and unload any
* hardware translations to the old object.
*/
segmapcnt.smc_get_reuse++;
segmap_hashout(smd, smp);
hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
MAXBSIZE), MAXBSIZE);
}
segmap_hashin(smd, smp, vp, off);
/*
* Prefault the translation
*/
(void)as_fault(&kas,
seg->s_base + (smp - smd->smd_sm) * MAXBSIZE,
MAXBSIZE, F_INVAL, S_READ);
}
trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
TRC_SEG_SEGKMAP, vp, off);
return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
}
int
segmap_release(seg, addr, flags)
struct seg *seg;
addr_t addr;
u_int flags;
{
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
register struct smap *smp;
int error;
if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
((u_int)addr & MAXBOFFSET) != 0)
panic("segmap_release addr");
smp = &smd->smd_sm[MAP_PAGE(seg, addr)];
trace4(TR_SEG_RELMAP, seg, addr, TRC_SEG_SEGKMAP, smp->sm_refcnt);
/*
* Need to call VOP_PUTPAGE if any flags (except SM_DONTNEED)
* are set.
*/
if ((flags & ~SM_DONTNEED) != 0) {
int bflags = 0;
if (flags & SM_WRITE)
segmapcnt.smc_rel_write++;
if (flags & SM_ASYNC) {
bflags |= B_ASYNC;
segmapcnt.smc_rel_async++;
}
if (flags & SM_INVAL) {
bflags |= B_INVAL;
segmapcnt.smc_rel_abort++;
}
if (smp->sm_refcnt == 1) {
/*
* We only bother doing the FREE and DONTNEED flags
* if no one else is still referencing this mapping.
*/
if (flags & SM_FREE) {
bflags |= B_FREE;
segmapcnt.smc_rel_free++;
}
if (flags & SM_DONTNEED) {
bflags |= B_DONTNEED;
segmapcnt.smc_rel_dontneed++;
}
}
error = VOP_PUTPAGE(smp->sm_vp, smp->sm_off, MAXBSIZE, bflags,
(struct ucred *)NULL); /* XXX - need real cred val */
} else {
segmapcnt.smc_release++;
error = 0;
}
if (--smp->sm_refcnt == 0) {
if (flags & SM_INVAL) {
hat_unload(seg, addr, MAXBSIZE);
segmap_hashout(smd, smp); /* remove map info */
}
segmap_smapadd(smd, smp); /* add to free list */
}
return (error);
}

88
sys/vm/seg_map.h Normal file
View File

@@ -0,0 +1,88 @@
/* @(#)seg_map.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_seg_map_h
#define _vm_seg_map_h
struct segmap_crargs {
u_int prot;
};
/*
* Each smap struct represents a MAXBSIZE sized mapping to the
* <sm_vp, sm_off> given in the structure. The location of the
* the structure in the array gives the virtual address of the
* mapping.
*/
struct smap {
struct vnode *sm_vp; /* vnode pointer (if mapped) */
u_int sm_off; /* file offset for mapping */
/*
* These next 4 entries can be coded as
* u_shorts if we are tight on memory.
*/
u_int sm_refcnt; /* reference count for uses */
struct smap *sm_hash; /* hash pointer */
struct smap *sm_next; /* next pointer */
struct smap *sm_prev; /* previous pointer */
};
/*
* (Semi) private data maintained by the segmap driver per SEGMENT mapping
*/
struct segmap_data {
struct smap *smd_sm; /* array of smap structures */
struct smap *smd_free; /* free list head pointer */
u_char smd_prot; /* protections for all smap's */
u_char smd_want; /* smap want flag */
u_int smd_hashsz; /* power-of-two hash table size */
struct smap **smd_hash; /* pointer to hash table */
};
/*
* These are flags used on release. Some of these might get handled
* by segment operations needed for msync (when we figure them out).
* SM_ASYNC modifies SM_WRITE. SM_DONTNEED modifies SM_FREE. SM_FREE
* and SM_INVAL are mutually exclusive.
*/
#define SM_WRITE 0x01 /* write back the pages upon release */
#define SM_ASYNC 0x02 /* do the write asynchronously */
#define SM_FREE 0x04 /* put pages back on free list */
#define SM_INVAL 0x08 /* invalidate page (no caching) */
#define SM_DONTNEED 0x10 /* less likely to be needed soon */
#define MAXBSHIFT 13 /* log2(DEVBSIZE) */
#define MAXBOFFSET (MAXBSIZE - 1)
#define MAXBMASK (~MAXBOFFSET)
/*
* SMAP_HASHAVELEN is the average length desired for this chain, from
* which the size of the smd_hash table is derived at segment create time.
* SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the
* approximate size of a vnode struct.
*/
#define SMAP_HASHAVELEN 4
#define SMAP_HASHVPSHIFT 6
#define SMAP_HASHFUNC(smd, vp, off) \
((((off) >> MAXBSHIFT) + ((int)(vp) >> SMAP_HASHVPSHIFT)) & \
((smd)->smd_hashsz - 1))
#ifdef KERNEL
int segmap_create(/* seg, argsp */);
/*
* Special seg_map segment operations
*/
void segmap_pagecreate(/* seg, addr, len, softlock */);
addr_t segmap_getmap(/* seg, vp, off */);
int segmap_release(/* seg, addr, flags */);
extern struct seg *segkmap; /* the kernel generic mapping segment */
extern struct seg_ops segmap_ops;
#endif KERNEL
#endif /*!_vm_seg_map_h*/

871
sys/vm/seg_u.c Normal file
View File

@@ -0,0 +1,871 @@
/* @(#)seg_u.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1989 by Sun Microsystems, Inc.
*/
/*
* VM - u-area segment routines
*
* XXX: This segment type should probably be recast as seg_stack
* instead of seg_u. As the system evolves, we'll need to
* manage variable-sized stacks protected by red zones, some
* of which possibly are accompanied by u-areas. For the moment
* the implementation copes only with "standard" u-areas,
* each with an embedded stack. Doing so lets the implementation
* get away with much simpler space management code.
*
* Desired model:
* segu_data describes nproc u-areas and the segment ops
* manipulate individual slots in segu_data, so that (e.g.)
* copying a u-area upon process creation turns into
* transcribing parts of segu_data from one place to another.
*
* Red zone handling:
* The implementation maintains the invariant that the MMU mappings
* for unallocated slots are invalid. This means that red zones
* come for free simply by avoiding establishing mappings over all
* red zone pages and by making sure that all mappings are invalidated
* at segu_release time.
*
* Note also that we need neither pages nor swap space for red zones,
* so much of the code works over extents of SEGU_PAGES-1 instead
* of SEGU_PAGES.
*/
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/ucred.h>
#include <sys/vnode.h>
#include <sys/kmem_alloc.h>
#include <sys/proc.h> /* needed for debugging printouts only */
#include <sys/vmmeter.h>
#include <vm/anon.h>
#include <vm/rm.h>
#include <vm/page.h>
#include <vm/seg.h>
#include <vm/seg_u.h>
#include <vm/swap.h>
#include <vm/hat.h>
/*
* Ugliness to compensate for some machine dependency.
*/
#ifdef i386bug
#define UPAGE_PROT (PROT_READ | PROT_USER)
#else i386bug
#define UPAGE_PROT (PROT_READ | PROT_WRITE)
#endif i386bug
int segu_debug = 0; /* patchable for debugging */
/*
* Private seg op routines.
*
* The swapout operation is null because the generic swapout code
* never attempts to swap out anything in the kernel's address
* space. Instead, clients swap the resources this driver manages
* by calling segu_fault with a type argument of F_SOFTLOCK to swap
* a slot in and with F_SOFTUNLOCK to swap one out.
*/
static int segu_checkprot(/* seg, vaddr, len, prot */);
static int segu_kluster(/* seg, vaddr, delta */);
static int segu_badop();
struct seg_ops segu_ops = {
segu_badop, /* dup */
segu_badop, /* unmap */
segu_badop, /* free */
segu_fault,
segu_badop, /* faulta */
(int (*)()) NULL, /* unload */
segu_badop, /* setprot */
segu_checkprot,
segu_kluster,
(u_int (*)()) NULL, /* swapout */
segu_badop, /* sync */
segu_badop, /* incore */
segu_badop, /* lockop */
segu_badop, /* advise */
};
/*
* Declarations of private routines for use by seg_u operations.
*/
static int segu_getslot(/* seg, vaddr, len */);
static int segu_softunlock(/* seg, vaddr, len, slot */);
static int segu_softload(/* seg, vaddr, len, slot, lock */);
struct seg *segu;
/*
* XXX: Global change needed -- set up MMU translations before
* keeping pages.
*/
static
segu_badop()
{
panic("seg_badop");
/* NOTREACHED */
}
/*
* Handle a fault on an address corresponding to one of the
* slots in the segu segment.
*/
faultcode_t
segu_fault(seg, vaddr, len, type, rw)
struct seg *seg;
addr_t vaddr;
u_int len;
enum fault_type type;
enum seg_rw rw;
{
struct segu_segdata *sdp = (struct segu_segdata *)seg->s_data;
struct segu_data *sup;
int slot;
addr_t vbase;
int err;
/*
* Sanity checks.
*/
if (seg != segu)
panic("segu_fault: wrong segment");
if (type == F_PROT)
panic("segu_fault: unexpected F_PROT fault");
/*
* Verify that the range specified by vaddr and len falls
* completely within the mapped part of a single allocated
* slot, calculating the slot index and slot pointer while
* we're at it.
*/
slot = segu_getslot(seg, vaddr, len);
if (slot == -1)
return (FC_MAKE_ERR(EFAULT));
sup = &sdp->usd_slots[slot];
vbase = seg->s_base + ptob(SEGU_PAGES) * slot;
/*
* The F_SOFTLOCK and F_SOFTUNLOCK cases have more stringent
* range requirements: the given range must exactly coincide
* with the slot's mapped portion.
*/
if (type == F_SOFTLOCK || type == F_SOFTUNLOCK) {
if (vaddr != segu_stom(vbase) || len != ptob(SEGU_PAGES - 1))
return (FC_MAKE_ERR(EFAULT));
}
if (type == F_SOFTLOCK) {
/*
* Somebody is trying to lock down this slot, e.g., as
* part of swapping in a u-area contained in the slot.
*/
/*
* It is erroneous to attempt to lock when already locked.
*
* XXX: Possibly this shouldn't be a panic. It depends
* on what assumptions we're willing to let clients
* make.
*/
if (sup->su_flags & SEGU_LOCKED)
panic("segu_fault: locking locked slot");
err = segu_softload(seg, segu_stom(vbase),
ptob(SEGU_PAGES - 1), slot, 1);
if (err)
return (FC_MAKE_ERR(err));
sup->su_flags |= SEGU_LOCKED;
return (0);
}
if (type == F_INVAL) {
/*
* Normal fault. The processing required
* is quite similar to that for the F_SOFTLOCK case in that
* we have to drag stuff in and make sure it's mapped. It
* differs in that we don't lock it down.
*/
if (segu_debug)
printf("segu_fault(%x, %x, %d)\n", vaddr, len, type);
/*
* If the slot is already locked, the only way we
* should fault is by referencing the red zone.
*
* XXX: Probably should tighten this check and verify
* that it's really a red zone reference.
* XXX: Is this the most appropriate error code?
*/
if (sup->su_flags & SEGU_LOCKED)
return (FC_MAKE_ERR(EINVAL));
err = segu_softload(seg, vaddr, len, slot, 0);
return (err ? FC_MAKE_ERR(err) : 0);
}
if (type == F_SOFTUNLOCK) {
/*
* Somebody is trying to swap out this slot, e.g., as
* part of swapping out a u-area contained in this slot.
*/
/*
* It is erroneous to attempt to unlock when not
* currently locked.
*/
if (!(sup->su_flags & SEGU_LOCKED))
panic("segu_fault: unlocking unlocked slot");
sup->su_flags &= ~SEGU_LOCKED;
err = segu_softunlock(seg, vaddr, len, slot, rw);
return (err ? FC_MAKE_ERR(err) : 0);
}
panic("segu_fault: bogus fault type");
/* NOTREACHED */
}
/*
* Check that the given protections suffice over the range specified by
* vaddr and len. For this segment type, the only issue is whether or
* not the range lies completely within the mapped part of an allocated slot.
*
* We let segu_getslot do all the dirty work.
*/
/* ARGSUSED */
static int
segu_checkprot(seg, vaddr, len, prot)
struct seg *seg;
addr_t vaddr;
u_int len;
u_int prot;
{
register int slot = segu_getslot(seg, vaddr, len);
return (slot == -1 ? -1 : 0);
}
/*
* Check to see if it makes sense to do kluster/read ahead to
* addr + delta relative to the mapping at addr. We assume here
* that delta is a signed PAGESIZE'd multiple (which can be negative).
*
* For seg_u we always "approve" of this action from our standpoint.
*/
/* ARGSUSED */
static int
segu_kluster(seg, addr, delta)
struct seg *seg;
addr_t addr;
int delta;
{
return (0);
}
/*
* Segment operations specific to the seg_u segment type.
*/
/*
* Finish creating the segu segment by setting up its private state
* information. Called once at boot time after segu has been allocated
* and hooked into the kernel address space.
*
* Note that we have no need for the argsp argument, since everything
* we need to set up our private information is contained in the common
* segment information. (This may change at such time as we generalize
* the implementation to deal with variable size allocation units.)
*/
/* ARGSUSED */
int
segu_create(seg, argsp)
register struct seg *seg;
caddr_t argsp;
{
register u_int numslots;
register int i;
register struct segu_segdata *sdp;
/*
* Trim the segment's size down to the largest multiple of
* SEGU_PAGES that's no larger than the original value.
*
* XXX: Does it matter that we're discarding virtual address
* space off the end with no record of how much there was?
*/
numslots = seg->s_size / ptob(SEGU_PAGES);
seg->s_size = numslots * ptob(SEGU_PAGES);
/*
* Allocate segment-specific information.
*/
seg->s_data = new_kmem_alloc(sizeof (struct segu_segdata), KMEM_SLEEP);
sdp = (struct segu_segdata *)seg->s_data;
/*
* Allocate the slot array.
*/
sdp->usd_slots = (struct segu_data *)new_kmem_alloc(
numslots * sizeof (struct segu_data), KMEM_SLEEP);
/*
* Set up the slot free list, marking each slot as unallocated.
* Note that the list must be sorted in ascending address order.
*/
sdp->usd_slots[0].su_flags = 0;
for (i = 1; i < numslots; i++) {
sdp->usd_slots[i - 1].su_next = &sdp->usd_slots[i];
sdp->usd_slots[i].su_flags = 0;
}
sdp->usd_slots[numslots - 1].su_next = NULL;
sdp->usd_free = sdp->usd_slots;
seg->s_ops = &segu_ops;
return (0);
}
/*
* Allocate resources for a single slot.
*
* When used for u-area, called at process creation time.
*/
addr_t
segu_get()
{
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
struct page *pp;
addr_t vbase;
addr_t va;
struct segu_data *sup;
int slot;
int i;
/*
* Allocate virtual space. This amounts to grabbing a free slot.
*/
if ((sup = sdp->usd_free) == NULL)
return (NULL);
sdp->usd_free = sup->su_next;
slot = sup - sdp->usd_slots;
vbase = segu->s_base + ptob(SEGU_PAGES) * slot;
/*
* If this slot has anon resources left over from its last use, free
* them. (Normally, segu_release will have cleaned up; however, i/o
* in progress at the time of the call prevents it from doing so.)
*/
if (sup->su_flags & SEGU_HASANON) {
anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
anon_unresv(ptob(SEGU_PAGES - 1));
sup->su_flags &= ~SEGU_HASANON;
}
/*
* Reserve sufficient swap space for this slot. We'll
* actually allocate it in the loop below, but reserving it
* here allows us to back out more gracefully than if we
* had an allocation failure in the body of the loop.
*
* Note that we don't need swap space for the red zone page.
*/
if (anon_resv(ptob(SEGU_PAGES - 1)) == 0) {
if (segu_debug)
printf("segu_get: no swap space available\n");
sup->su_next = sdp->usd_free;
sdp->usd_free = sup;
return (NULL);
}
/*
* Allocate pages, avoiding allocating one for the red zone.
*/
pp = rm_allocpage(segu, segu_stom(vbase), ptob(SEGU_PAGES - 1), 1);
if (pp == NULL) {
if (segu_debug)
printf("segu_get: no pages available\n");
/*
* Give back the resources we've acquired.
*/
anon_unresv(ptob(SEGU_PAGES - 1));
sup->su_next = sdp->usd_free;
sdp->usd_free = sup;
return (NULL);
}
/*
* Allocate swap space.
*
* Because the interface for getting swap slots is designed
* to handle only one page at a time, we must deal with each
* page in the u-area individually instead of allocating a
* contiguous chunk of swap space for the whole thing as we
* would prefer.
*
* This being the case, we actually do more in this loop than
* simply allocate swap space. As we handle each page, we
* complete its setup.
*/
for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
register struct anon *ap;
struct vnode *vp;
u_int off;
struct page *opp;
/*
* If this page is the red zone page, we don't need swap
* space for it. Note that we skip over the code that
* establishes MMU mappings, so that the page remains
* invalid.
*/
if (i == SEGU_REDZONE) {
sup->su_swaddr[i] = NULL;
continue;
}
/*
* Sanity check.
*/
if (pp == NULL)
panic("segu_get: not enough pages");
/*
* Get a swap slot.
*/
if ((ap = anon_alloc()) == NULL)
panic("segu_get: swap allocation failure");
sup->su_swaddr[i] = ap;
/*
* Tie the next page to the swap slot.
*/
swap_xlate(ap, &vp, &off);
while (page_enter(pp, vp, off)) {
/*
* The page was already tied to something
* else that we have no record of. Since
* the page we wish be named by <vp, off>
* already exists, we abort the old page.
*/
struct page *p1 = page_find(vp, off);
if (p1 != NULL) {
page_wait(p1);
if (p1->p_vnode == vp && p1->p_offset == off)
page_abort(p1);
}
}
/*
* Page_enter has set the page's lock bit. Since it's
* kept as well, this is just a nuisance.
*/
page_unlock(pp);
/*
* Mark the page for long term keep and release the
* short term claim that rm_allocpage established.
*
* XXX: When page_pp_lock returns a success/failure
* indication, we'll probably want to panic if
* it fails.
*/
(void) page_pp_lock(pp, 0, 1);
/*
* Load and lock an MMU translation for the page.
*/
hat_memload(segu, va, pp, UPAGE_PROT, 1);
/*
* Prepare to use the next page.
*/
opp = pp;
page_sub(&pp, pp);
PAGE_RELE(opp);
}
/*
* Finally, mark this slot as allocated, locked, and in posession
* of anon resources.
*/
sup->su_flags = SEGU_ALLOCATED | SEGU_LOCKED | SEGU_HASANON;
/*
* Return the address of the base of the mapped part of
* the slot.
*/
return (segu_stom(vbase));
}
/*
* Reclaim resources for a single slot.
*
* When used for u-area, called at process destruction time. Guaranteed not
* to sleep, so that it can be called while running on the interrupt stack.
*
* N.B.: Since this routine deallocates all of the slot's resources,
* callers can't count on the resources remaining accessible. In
* particular, any stack contained in the slot will vanish, so we'd
* better not be running on that stack.
*
* N.B.: Since the routine can't sleep, it must defer deallocation of anon
* resources associated with pages that have i/o in progress. (Anon_decref
* calls page_abort, which will sleep until the i/o is complete.)
*
* We can't simply undo everything that segu_get did directly,
* because someone else may have acquired a reference to one or
* more of the associated pages in the meantime.
*/
void
segu_release(vaddr)
addr_t vaddr;
{
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
addr_t vbase = segu_mtos(vaddr);
addr_t va;
struct segu_data *sup;
struct segu_data **supp;
int slot;
int i;
int doing_io = 0;
register int locked;
/*
* Get the slot corresponding to this virtual address.
*/
if ((slot = segu_getslot(segu, vaddr, 1)) == -1)
panic("segu_release: bad addr");
sup = &sdp->usd_slots[slot];
/*
* XXX: Do we need to lock this slot's pages while we're
* messing with them? What can happen once we decrement
* the keep count below?
*/
/*
* Examine the slot's pages looking for i/o in progress.
* While doing so, undo locks.
*/
locked = sup->su_flags & SEGU_LOCKED;
for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
register struct page *pp;
struct vnode *vp;
u_int off;
register int s;
if (i == SEGU_REDZONE)
continue;
if (locked)
hat_unlock(segu, va);
/*
* Find the page associated with this part of the
* slot, tracking it down through its associated swap
* space.
*/
swap_xlate(sup->su_swaddr[i], &vp, &off);
/*
* Prevent page status from changing.
*/
s = splvm();
if ((pp = page_exists(vp, off)) == NULL) {
/*
* The page no longer exists; this is fine
* unless we had it locked.
*/
if (locked)
panic("segu_release: missing locked page");
else
continue;
}
/*
* See whether the page is quiescent.
*/
if (pp->p_keepcnt != 0)
doing_io = 1;
/*
* Make this page available to vultures.
*/
if (locked)
page_pp_unlock(pp, 0);
(void) splx(s);
}
/*
* Unload the mmu translations for this slot.
*/
hat_unload(segu, vaddr, ptob(SEGU_PAGES - 1));
/*
* Provided that all of the pages controlled by this segment are
* quiescent, release our claim on the associated anon resources and
* swap space.
*/
if (!doing_io) {
anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
anon_unresv(ptob(SEGU_PAGES - 1));
sup->su_flags &= ~SEGU_HASANON;
} else
sup->su_flags |= SEGU_HASANON;
/*
* Mark the slot as unallocated and unlocked and put it back on the
* free list. Keep the free list sorted by slot address, to minimize
* fragmentation of seg_u's virtual address range. (This makes a
* difference on some architectures; e.g., by making it possible to
* use fewer page table entries.) This code counts on the slot
* address being a monotonically increasing function of indices of
* entries in the usd_slots array.
*/
sup->su_flags &= ~(SEGU_ALLOCATED|SEGU_LOCKED);
for (supp = &sdp->usd_free; *supp != NULL && *supp < sup;
supp = &(*supp)->su_next)
continue;
sup->su_next = *supp;
*supp = sup;
}
/*
* Private routines for use by seg_u operations.
*/
/*
* Verify that the range designated by vaddr and len lies completely
* within the mapped part of a single allocated slot. If so, return
* the slot's index; otherwise return -1.
*/
static int
segu_getslot(seg, vaddr, len)
register struct seg *seg;
addr_t vaddr;
u_int len;
{
register int slot;
register struct segu_segdata *sdp;
register struct segu_data *sup;
addr_t vlast;
addr_t vmappedbase;
sdp = (struct segu_segdata *)seg->s_data;
/*
* Make sure the base is in range of the segment as a whole.
*/
if (vaddr < seg->s_base || vaddr >= seg->s_base + seg->s_size)
return (-1);
/*
* Figure out what slot the address lies in.
*/
slot = (vaddr - seg->s_base) / ptob(SEGU_PAGES);
sup = &sdp->usd_slots[slot];
/*
* Make sure the end of the range falls in the same slot.
*/
vlast = vaddr + len - 1;
if ((vlast - seg->s_base) / ptob(SEGU_PAGES) != slot)
return (-1);
/*
* Nobody has any business touching this slot if it's not currently
* allocated.
*/
if (!(sup->su_flags & SEGU_ALLOCATED))
return (-1);
/*
* Finally, verify that the range is completely in the mapped part
* of the slot.
*/
vmappedbase = segu_stom(seg->s_base + ptob(SEGU_PAGES) * slot);
if (vaddr < vmappedbase || vlast >= vmappedbase + ptob(SEGU_PAGES - 1))
return (-1);
return (slot);
}
/*
* Unlock intra-slot resources in the range given by vaddr and len.
* Assumes that the range is known to fall entirely within the mapped
* part of the slot given as argument and that the slot itself is
* allocated.
*/
static int
segu_softunlock(seg, vaddr, len, slot, rw)
struct seg *seg;
addr_t vaddr;
u_int len;
int slot;
enum seg_rw rw;
{
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
register struct segu_data
*sup = &sdp->usd_slots[slot];
register addr_t va;
addr_t vlim;
register u_int i;
/*
* Loop through the pages in the given range.
*/
va = (addr_t)((u_int)vaddr & PAGEMASK);
len = roundup(len, ptob(1));
vlim = va + len;
/* Calculate starting page index within slot. */
i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
for ( ; va < vlim; va += ptob(1), i++) {
register struct page *pp;
struct vnode *vp;
u_int off;
/*
* Unlock our MMU translation for this page.
*
* XXX: Is there any problem with attempting to unlock
* a translation that isn't locked?
*/
hat_unlock(seg, va);
/*
* Unload it.
*/
hat_unload(seg, va, ptob(1));
/*
* Find the page associated with this part of the
* slot, tracking it down through its associated swap
* space.
*/
swap_xlate(sup->su_swaddr[i], &vp, &off);
if ((pp = page_find(vp, off)) == NULL)
panic("segu_softunlock: missing page");
/*
* Release our long-term claim on the page.
*/
page_pp_unlock(pp, 0);
/*
* If we're "hard" swapping (i.e. we need pages) and
* nobody's using the page any more and it's dirty,
* unlocked, and not kept, push it asynchronously rather
* than waiting for the pageout daemon to find it.
*/
hat_pagesync(pp);
if (rw == S_WRITE && pp->p_mapping == NULL &&
pp->p_keepcnt == 0 && !pp->p_lock && pp->p_mod) {
/*
* XXX: Want most powerful credentials we can
* get. Punt for now.
*/
(void) VOP_PUTPAGE(vp, off, ptob(1), B_ASYNC | B_FREE,
(struct ucred *)NULL);
}
}
return (0);
}
/*
* Load and possibly lock intra-slot resources in the range given
* by vaddr and len. Assumes that the range is known to fall entirely
* within the mapped part of the slot given as argument and that the
* slot itself is allocated.
*/
static int
segu_softload(seg, vaddr, len, slot, lock)
struct seg *seg;
addr_t vaddr;
u_int len;
int slot;
int lock;
{
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
register struct segu_data
*sup = &sdp->usd_slots[slot];
register addr_t va;
addr_t vlim;
register u_int i;
/*
* Loop through the pages in the given range.
*/
va = (addr_t)((u_int)vaddr & PAGEMASK);
vaddr = va;
len = roundup(len, ptob(1));
vlim = va + len;
/* Calculate starting page index within slot. */
i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
for ( ; va < vlim; va += ptob(1), i++) {
struct page *pl[2];
struct vnode *vp;
u_int off;
register int err;
/*
* Summon the page. If it's not resident, arrange
* for synchronous i/o to pull it in.
*
* XXX: Need read credentials value; for now we punt.
*/
swap_xlate(sup->su_swaddr[i], &vp, &off);
err = VOP_GETPAGE(vp, off, ptob(1), (u_int *)NULL,
pl, ptob(1), seg, va, S_READ, (struct ucred *)NULL);
if (err) {
/*
* Back out of what we've done so far.
*/
(void) segu_softunlock(seg, vaddr, (u_int)(va - vaddr),
slot, S_OTHER);
return (err);
}
cnt.v_swpin++;
/*
* The returned page list will have exactly one entry,
* which is returned to us already kept.
*/
/*
* Load an MMU translation for the page.
*/
hat_memload(seg, va, pl[0], UPAGE_PROT, lock);
/*
* If we're locking down resources, we need to increment
* the page's long term keep count. In any event, we
* need to decrement the (short term) keep count.
*
* XXX: When page_pp_lock returns a success/failure
* indication, we'll probably want to panic if
* it fails.
*/
if (lock)
(void) page_pp_lock(pl[0], 0, 1);
PAGE_RELE(pl[0]);
}
return (0);
}

130
sys/vm/seg_u.h Normal file
View File

@@ -0,0 +1,130 @@
/* @(#)seg_u.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1989 by Sun Microsystems, Inc.
*/
/*
* VM - U-area segment management
*
* This file contains definitions related to the u-area segment type.
*
* In its most general form, this segment type provides an interface
* for managing stacks that are protected by red zones, with the size
* of each stack independently specifiable. The current implementation
* is restricted in the following way.
* 1) It assumes that all stacks are the same size. In particular,
* it assumes that the stacks it manages are actually traditional
* u-areas, each containing a stack at one end.
*
* The segment driver manages a contiguous chunk of virtual space,
* carving it up into individual stack instances as required, and
* associating physical storage, MMU mappings, and swap space with
* each individual stack instance.
*
* As a matter of nomenclature, the individual allocation units are
* referred to as "slots".
*/
#ifndef _vm_seg_u_h
#define _vm_seg_u_h
/*
* The number of pages covered by a single seg_u slot.
*
* This value is the number of (software) pages in the u-area
* (including the stack in the u-area) plus an additional page
* for a stack red zone. If the seg_u implementation is ever
* generalized to allow variable-size stack allocation, this
* define will have to change.
*/
#define SEGU_PAGES (UPAGES/CLSIZE + 1)
/*
* XXX: This define belongs elsewhere, probably in <machine/param.h>.
*/
#define STACK_GROWTH_DOWN
/*
* Index of the red zone page and macros for interconverting between
* the base address of a slot and the base address of its accessible
* portion. (Nomenclature: Slot TO Mapped and vice versa.)
*/
#ifdef STACK_GROWTH_DOWN
#define SEGU_REDZONE 0
#define segu_stom(v) ((v) + ptob(1))
#define segu_mtos(v) ((v) - ptob(1))
#else STACK_GROWTH_DOWN
#define SEGU_REDZONE (SEGU_PAGES - 1)
#define segu_stom(v) (v)
#define segu_mtos(v) (v)
#endif STACK_GROWTH_DOWN
/*
* Private information per overall segu segment (as opposed
* to per slot within segment)
*
* XXX: We may wish to modify the free list to handle it as a queue
* instead of a stack; this possibly could reduce the frequency
* of cache flushes. If so, we would need a list tail pointer
* as well as a list head pointer.
*/
struct segu_segdata {
/*
* info needed:
* - slot vacancy info
* - a way of getting to state info for each slot
*/
struct segu_data *usd_slots; /* array of segu_data structs, */
/* one per slot */
struct segu_data *usd_free; /* slot free list head */
};
/*
* Private per-slot information.
*/
struct segu_data {
struct segu_data *su_next; /* free list link */
struct anon *su_swaddr[SEGU_PAGES]; /* disk address of u area */
/* when swapped */
u_int su_flags; /* state info: see below */
};
/*
* Flag bits
*
* When the SEGU_LOCKED bit is set, all the resources associated with the
* corresponding slot are locked in place, so that referencing addresses
* in the slot's range will not cause a fault. Clients using this driver
* to manage a u-area lock down the slot when the corresponding process
* becomes runnable and unlock it when the process is swapped out.
*/
#define SEGU_ALLOCATED 0x01 /* slot is in use */
#define SEGU_LOCKED 0x02 /* slot's resources locked */
#define SEGU_HASANON 0x04 /* slot has anon resources */
#ifdef KERNEL
extern struct seg *segu;
/*
* Public routine declarations not part of the segment ops vector go here.
*/
int segu_create(/* seg, argsp */);
addr_t segu_get();
void segu_release(/* vaddr */);
/*
* We allow explicit calls to segu_fault, even though it's part
* of the segu ops vector.
*/
faultcode_t segu_fault(/* seg, vaddr, len, type, rw */);
#endif KERNEL
#endif /*!_vm_seg_u_h*/

2460
sys/vm/seg_vn.c Normal file

File diff suppressed because it is too large Load Diff

108
sys/vm/seg_vn.h Normal file
View File

@@ -0,0 +1,108 @@
/* @(#)seg_vn.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_seg_vn_h
#define _vm_seg_vn_h
#include <vm/mp.h>
/*
* Structure who's pointer is passed to the segvn_create routine
*/
struct segvn_crargs {
struct vnode *vp; /* vnode mapped from */
u_int offset; /* starting offset of vnode for mapping */
struct ucred *cred; /* creditials */
u_char type; /* type of sharing done */
u_char prot; /* protections */
u_char maxprot; /* maximum protections */
struct anon_map *amp; /* anon mapping to map to */
};
/*
* The anon_map structure is used by the seg_vn driver to manage
* unnamed (anonymous) memory. When anonymous memory is shared,
* then the different segvn_data structures will point to the
* same anon_map structure. Also, if a segment is unmapped
* in the middle where an anon_map structure exists, the
* newly created segment will also share the anon_map structure,
* although the two segments will use different ranges of the
* anon array. When mappings are private (or shared with
* a reference count of 1), an unmap operation will free up
* a range of anon slots in the array given by the anon_map
* structure. Because of fragmentation due to this unmapping,
* we have to store the size of anon array in the anon_map
* structure so that we can free everything when the referernce
* count goes to zero.
*/
struct anon_map {
u_int refcnt; /* reference count on this structure */
u_int size; /* size in bytes mapped by the anon array */
struct anon **anon; /* pointer to an array of anon * pointers */
u_int swresv; /* swap space reserved for this anon_map */
u_int flags; /* anon_map flags (see below) */
};
/* anon_map flags */
#define AMAP_LOCKED 0x01 /* anon_map is locked */
#define AMAP_WANT 0x02 /* some process waiting on lock */
/*
* Lock and unlock anon_map if the segment has private pages. This
* is necessary to ensure that operations on the anon array (e.g., growing
* the array, or allocating an anon slot and assigning a page) are atomic.
*/
#define AMAP_LOCK(amp) { \
while ((amp)->flags & AMAP_LOCKED) { \
(amp)->flags |= AMAP_WANT; \
(void) sleep((caddr_t)(amp), PAMAP); \
} \
(amp)->flags |= AMAP_LOCKED; \
masterprocp->p_swlocks++; \
}
#define AMAP_UNLOCK(amp) { \
(amp)->flags &= ~AMAP_LOCKED; \
masterprocp->p_swlocks--; \
if ((amp)->flags & AMAP_WANT) { \
(amp)->flags &= ~AMAP_WANT; \
wakeup((caddr_t)(amp)); \
} \
}
/*
* (Semi) private data maintained by the seg_vn driver per segment mapping
*/
struct segvn_data {
kmon_t lock;
u_char pageprot; /* true if per page protections present */
u_char prot; /* current segment prot if pageprot == 0 */
u_char maxprot; /* maximum segment protections */
u_char type; /* type of sharing done */
struct vnode *vp; /* vnode that segment mapping is to */
u_int offset; /* starting offset of vnode for mapping */
u_int anon_index; /* starting index into anon_map anon array */
struct anon_map *amp; /* pointer to anon share structure, if needed */
struct vpage *vpage; /* per-page information, if needed */
struct ucred *cred; /* mapping creditials */
u_int swresv; /* swap space reserved for this segment */
u_char advice; /* madvise flags for segment */
u_char pageadvice; /* true if per page advice set */
};
#ifdef KERNEL
int segvn_create(/* seg, argsp */);
extern struct seg_ops segvn_ops;
/*
* Provided as short hand for creating user zfod segments
*/
extern caddr_t zfod_argsp;
extern caddr_t kzfod_argsp;
#endif KERNEL
#endif /*!_vm_seg_vn_h*/

35
sys/vm/swap.h Normal file
View File

@@ -0,0 +1,35 @@
/* @(#)swap.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#ifndef _vm_swap_h
#define _vm_swap_h
/*
* VM - virtual swap device.
*/
struct swapinfo {
struct vnode *si_vp; /* vnode for this swap device */
u_int si_size; /* size (bytes) of this swap device */
struct anon *si_anon; /* pointer to anon array */
struct anon *si_eanon; /* pointer to end of anon array */
struct anon *si_free; /* anon free list for this vp */
int si_allocs; /* # of conseq. allocs from this area */
struct swapinfo *si_next; /* next swap area */
short *si_pid; /* parallel pid array for memory tool */
};
#define IS_SWAPVP(vp) (((vp)->v_flag & VISSWAP) != 0)
#ifdef KERNEL
int swap_init(/* vp */);
struct anon *swap_alloc();
void swap_free(/* ap */);
void swap_xlate(/* ap, vpp, offsetp */);
struct anon *swap_anon(/* vp, offset */);
#endif
#endif /*!_vm_swap_h*/

509
sys/vm/vm_anon.c Normal file
View File

@@ -0,0 +1,509 @@
/* @(#)vm_anon.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
/*
* VM - anonymous pages.
*
* This layer sits immediately above the vm_swap layer. It manages
* physical pages that have no permanent identity in the file system
* name space, using the services of the vm_swap layer to allocate
* backing storage for these pages. Since these pages have no external
* identity, they are discarded when the last reference is removed.
*
* An important function of this layer is to manage low-level sharing
* of pages that are logically distinct but that happen to be
* physically identical (e.g., the corresponding pages of the processes
* resulting from a fork before one process or the other changes their
* contents). This pseudo-sharing is present only as an optimization
* and is not to be confused with true sharing in which multiple
* address spaces deliberately contain references to the same object;
* such sharing is managed at a higher level.
*
* The key data structure here is the anon struct, which contains a
* reference count for its associated physical page and a hint about
* the identity of that page. Anon structs typically live in arrays,
* with an instance's position in its array determining where the
* corresponding backing storage is allocated; however, the swap_xlate()
* routine abstracts away this representation information so that the
* rest of the anon layer need not know it. (See the swap layer for
* more details on anon struct layout.)
*
* In the future versions of the system, the association between an
* anon struct and its position on backing store will change so that
* we don't require backing store all anonymous pages in the system.
* This is important for consideration for large memory systems.
* We can also use this technique to delay binding physical locations
* to anonymous pages until pageout/swapout time where we can make
* smarter allocation decisions to improve anonymous klustering.
*
* Many of the routines defined here take a (struct anon **) argument,
* which allows the code at this level to manage anon pages directly,
* so that callers can regard anon structs as opaque objects and not be
* concerned with assigning or inspecting their contents.
*
* Clients of this layer refer to anon pages indirectly. That is, they
* maintain arrays of pointers to anon structs rather than maintaining
* anon structs themselves. The (struct anon **) arguments mentioned
* above are pointers to entries in these arrays. It is these arrays
* that capture the mapping between offsets within a given segment and
* the corresponding anonymous backing storage address.
*/
#include <sys/param.h>
#include <sys/user.h> /* XXX - for rusage */
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/ucred.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/trace.h>
#include <sys/debug.h>
#include <vm/hat.h>
#include <vm/anon.h>
#include <vm/swap.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/seg.h>
#include <vm/pvn.h>
#include <vm/rm.h>
#include <vm/mp.h>
struct anoninfo anoninfo;
#ifdef KMON_DEBUG
kmon_t anon_lock;
#endif /* KMON_DEBUG */
int anon_resv_debug = 0;
int anon_enforce_resv = 1;
/*
* Reserve anon space.
* Return non-zero on success.
*/
int
anon_resv(size)
u_int size;
{
anoninfo.ani_resv += btopr(size);
if (anoninfo.ani_resv > anoninfo.ani_max) {
if (anon_enforce_resv)
anoninfo.ani_resv -= btopr(size);
else if (anon_resv_debug)
printf("anon: swap space overcommitted by %d\n",
anoninfo.ani_resv - anoninfo.ani_max);
return (!anon_enforce_resv);
} else {
return (1);
}
}
/*
* Give back an anon reservation.
*/
void
anon_unresv(size)
u_int size;
{
anoninfo.ani_resv -= btopr(size);
if ((int)anoninfo.ani_resv < 0)
printf("anon: reservations below zero???\n");
}
/*
* Allocate an anon slot.
*/
struct anon *
anon_alloc()
{
register struct anon *ap;
kmon_enter(&anon_lock);
ap = swap_alloc();
if (ap != NULL) {
anoninfo.ani_free--;
ap->an_refcnt = 1;
ap->un.an_page = NULL;
}
kmon_exit(&anon_lock);
return (ap);
}
/*
* Decrement the reference count of an anon page.
* If reference count goes to zero, free it and
* its associated page (if any).
*/
static void
anon_decref(ap)
register struct anon *ap;
{
register struct page *pp;
struct vnode *vp;
u_int off;
if (--ap->an_refcnt == 0) {
/*
* If there is a page for this anon slot we will need to
* call page_abort to get rid of the vp association and
* put the page back on the free list as really free.
*/
swap_xlate(ap, &vp, &off);
pp = page_find(vp, off);
/*
* XXX - If we have a page, wait for its keepcnt to become
* zero, re-verify the identity before aborting it and
* freeing the swap slot. This ensures that any pending i/o
* always completes before the swap slot is freed.
*/
if (pp != NULL) {
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode == vp && pp->p_offset == off)
page_abort(pp);
} else {
page_abort(pp);
}
}
kmon_enter(&anon_lock);
swap_free(ap);
anoninfo.ani_free++;
kmon_exit(&anon_lock);
}
}
/*
* Duplicate references to size bytes worth of anon pages.
* Used when duplicating a segment that contains private anon pages.
* This code assumes that procedure calling this one has already used
* hat_chgprot() to disable write access to the range of addresses that
* that *old actually refers to.
*/
void
anon_dup(old, new, size)
register struct anon **old, **new;
u_int size;
{
register int i;
i = btopr(size);
while (i-- > 0) {
if ((*new = *old) != NULL)
(*new)->an_refcnt++;
old++;
new++;
}
}
/*
* Free a group of "size" anon pages, size in bytes,
* and clear out the pointers to the anon entries.
*/
void
anon_free(app, size)
register struct anon **app;
u_int size;
{
register int i;
i = btopr(size);
while (i-- > 0) {
if (*app != NULL) {
anon_decref(*app);
*app = NULL;
}
app++;
}
}
/*
* Return the kept page(s) and protections back to the segment driver.
*/
int
anon_getpage(app, protp, pl, plsz, seg, addr, rw, cred)
struct anon **app;
u_int *protp;
struct page *pl[];
u_int plsz;
struct seg *seg;
addr_t addr;
enum seg_rw rw;
struct ucred *cred;
{
register struct page *pp, **ppp;
register struct anon *ap = *app;
struct vnode *vp;
u_int off;
int err;
extern int nopagereclaim;
register int s;
swap_xlate(ap, &vp, &off);
again:
pp = ap->un.an_page;
/*
* If the anon pointer has a page associated with it,
* see if it looks ok after raising priority to prevent
* it from being ripped away at interrupt level if on the
* free list. If the page is being paged in, wait for it
* to finish as we must return a list of pages since this
* routine acts like the VOP_GETPAGE routine does.
*/
s = splvm();
if (pp != NULL && pp->p_vnode == vp && pp->p_offset == off &&
!pp->p_gone && pl != NULL) {
if (pp->p_intrans && (pp->p_pagein || nopagereclaim)) {
(void) splx(s);
page_wait(pp);
goto again; /* try again */
}
if (pp->p_free)
page_reclaim(pp);
(void) splx(s);
PAGE_HOLD(pp);
if (ap->an_refcnt == 1)
*protp = PROT_ALL;
else
*protp = PROT_ALL & ~PROT_WRITE;
pl[0] = pp;
pl[1] = NULL;
/* no one else accounted for it so we must */
u.u_ru.ru_minflt++;
return (0);
}
(void) splx(s);
/*
* Simply treat it as a vnode fault on the anon vp.
*/
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_ANON);
err = VOP_GETPAGE(vp, off, PAGESIZE, protp, pl, plsz,
seg, addr, rw, cred);
if (err == 0 && pl != NULL) {
for (ppp = pl; (pp = *ppp++) != NULL; ) {
if (pp->p_offset == off) {
ap->un.an_page = pp;
break;
}
}
if (ap->an_refcnt != 1)
*protp &= ~PROT_WRITE; /* make read-only */
}
return (err);
}
int npagesteal;
/*
* Turn a reference to an object or shared anon page
* into a private page with a copy of the data from the
* original page. The original page is always kept, locked
* and loaded in the MMU by the caller. This routine unlocks
* the translation and releases the original page, if it isn't
* being stolen, before returning to the caller.
*/
struct page *
anon_private(app, seg, addr, opp, oppflags)
struct anon **app;
struct seg *seg;
addr_t addr;
struct page *opp;
u_int oppflags;
{
register struct anon *old = *app;
register struct anon *new;
register struct page *pp;
struct vnode *vp;
u_int off;
ASSERT(opp->p_mapping);
ASSERT(opp->p_keepcnt);
new = anon_alloc();
if (new == (struct anon *)NULL) {
rm_outofanon();
hat_unlock(seg, addr);
PAGE_RELE(opp);
return ((struct page *)NULL); /* out of swap space */
}
*app = new;
swap_xlate(new, &vp, &off);
again:
pp = page_lookup(vp, off);
if (pp == NULL && (oppflags & STEAL_PAGE) &&
opp->p_keepcnt == 1 && opp->p_mod == 0) {
pp = opp;
hat_unlock(seg, addr); /* unlock translation */
hat_pageunload(pp); /* unload all translations */
page_hashout(pp); /* destroy old name for page */
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) /* rename as anon page */
panic("anon private steal");
new->un.an_page = pp;
pg_setmod(pp, 1);
page_unlock(pp);
/*
* If original page is ``locked'', relinquish
* claim for the extra page.
*/
if (oppflags & LOCK_PAGE)
page_subclaim(1);
npagesteal++;
return (pp);
}
if (pp == NULL) {
/*
* Normal case, need to allocate new page frame.
*/
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) {
PAGE_RELE(pp);
goto again; /* try again */
}
} else {
/*
* Already found a page with the right identity -- just
* use it if the `keepcnt' is 0. If not, wait for the
* `keepcnt' to become 0, re-verify the identity before
* using the page.
*/
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode != vp || pp->p_offset != off)
goto again;
}
page_lock(pp);
PAGE_HOLD(pp);
}
new->un.an_page = pp;
/*
* Now copy the contents from the original page which
* is loaded and locked in the MMU by the caller to
* prevent yet another page fault.
*/
pp->p_intrans = pp->p_pagein = 1;
pagecopy(addr, pp);
pp->p_intrans = pp->p_pagein = 0;
pg_setmod(pp, 1); /* mark as modified */
page_unlock(pp);
/*
* If original page is ``locked'', relinquish claim
* for an extra page reserved for the private copy
* in case of a copy-on-write. Lock the new page
* ignoring the current reservation check.
*/
if (oppflags & LOCK_PAGE) {
if (old == NULL)
page_pp_unlock(opp, 1);
else
page_pp_unlock(opp, 0);
(void) page_pp_lock(pp, 0, 0);
}
/*
* Unlock translation to the original page since
* it can be unloaded if the page is aborted.
*/
hat_unlock(seg, addr);
/*
* Ok, now release the original page, or else the
* process will sleep forever in anon_decref()
* waiting for the `keepcnt' to become 0.
*/
PAGE_RELE(opp);
/*
* If we copied away from an anonymous page, then
* we are one step closer to freeing up an anon slot.
*/
if (old != NULL)
anon_decref(old);
return (pp);
}
/*
* Allocate a zero-filled anon page.
*/
struct page *
anon_zero(seg, addr, app)
struct seg *seg;
addr_t addr;
struct anon **app;
{
register struct anon *ap;
register struct page *pp;
struct vnode *vp;
u_int off;
*app = ap = anon_alloc();
if (ap == NULL) {
rm_outofanon();
return ((struct page *)NULL);
}
swap_xlate(ap, &vp, &off);
again:
pp = page_lookup(vp, off);
if (pp == NULL) {
/*
* Normal case, need to allocate new page frame.
*/
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) {
PAGE_RELE(pp);
goto again; /* try again */
}
} else {
/*
* Already found a page with the right identity -- just
* use it if the `keepcnt' is 0. If not, wait for the
* `keepcnt' to become 0, re-verify the identity before
* using the page.
*/
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode != vp || pp->p_offset != off)
goto again;
}
page_lock(pp);
PAGE_HOLD(pp);
}
ap->un.an_page = pp;
pagezero(pp, 0, PAGESIZE);
cnt.v_zfod++;
pg_setmod(pp, 1); /* mark as modified so pageout writes back */
page_unlock(pp);
return (pp);
}
/*
* This gets calls by the seg_vn driver unload routine
* which is called by the hat code when it decides to
* unload a particular mapping.
*/
void
anon_unloadmap(ap, ref, mod)
struct anon *ap;
u_int ref, mod;
{
struct vnode *vp;
u_int off;
swap_xlate(ap, &vp, &off);
pvn_unloadmap(vp, off, ref, mod);
}

898
sys/vm/vm_as.c Normal file
View File

@@ -0,0 +1,898 @@
/* @(#)vm_as.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
*/
/*
* VM - address spaces.
*/
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/mman.h>
#include <machine/mmu.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_vn.h>
/*
* Variables for maintaining the free list of address space structures.
*/
static struct as *as_freelist;
static int as_freeincr = 8;
/*
* Find a segment containing addr. as->a_seglast is used as a
* cache to remember the last segment hit we had here. We
* first check to see if seglast is another hit, and if not we
* determine whether to start from the head of the segment list
* (as->a_segs) or from seglast and in which direction to search.
*/
struct seg *
as_segat(as, addr)
register struct as *as;
register addr_t addr;
{
register struct seg *seg, *sseg;
register forward;
if (as->a_segs == NULL) /* address space has no segments */
return (NULL);
if (as->a_seglast == NULL)
as->a_seglast = as->a_segs;
seg = as->a_seglast;
forward = 0;
if (seg->s_base <= addr) {
if (addr < (seg->s_base + seg->s_size))
return (seg); /* seglast contained addr */
sseg = as->a_segs->s_prev;
if ((addr - seg->s_base) >
((sseg->s_base + sseg->s_size) - addr)) {
seg = sseg;
sseg = as->a_seglast;
} else {
seg = as->a_seglast->s_next;
sseg = as->a_segs;
forward++;
}
} else {
if ((addr - as->a_segs->s_base) > (seg->s_base - addr)) {
seg = seg->s_prev;
sseg = as->a_segs->s_prev;
} else {
sseg = seg;
seg = as->a_segs;
forward++;
}
}
do {
if (seg->s_base <= addr &&
addr < (seg->s_base + seg->s_size)) {
as->a_seglast = seg;
return (seg);
}
if (forward) {
seg = seg->s_next;
if (seg->s_base > addr)
break;
} else {
seg = seg->s_prev;
if (addr > (seg->s_base + seg->s_size))
break;
}
} while (seg != sseg);
return (NULL);
}
/*
* Allocate and initialize an address space data structure.
* We call hat_alloc to allow any machine dependent
* information in the hat structure to be initialized.
*/
struct as *
as_alloc()
{
struct as *as;
as = (struct as *)new_kmem_fast_alloc((caddr_t *)&as_freelist,
sizeof (*as_freelist), as_freeincr, KMEM_SLEEP);
bzero((caddr_t)as, sizeof (*as));
hat_alloc(as);
return (as);
}
/*
* Free an address space data structure.
* Need to free the hat first and then
* all the segments on this as and finally
* the space for the as struct itself.
*/
void
as_free(as)
struct as *as;
{
hat_free(as);
while (as->a_segs != NULL)
seg_free(as->a_segs);
kmem_fast_free((caddr_t *)&as_freelist, (caddr_t)as);
}
struct as *
as_dup(as)
register struct as *as;
{
register struct as *newas;
register struct seg *seg, *sseg, *newseg;
newas = as_alloc();
sseg = seg = as->a_segs;
if (seg != NULL) {
do {
newseg = seg_alloc(newas, seg->s_base, seg->s_size);
if (newseg == NULL) {
as_free(newas);
return (NULL);
}
if ((*seg->s_ops->dup)(seg, newseg)) {
as_free(newas);
return (NULL);
}
seg = seg->s_next;
} while (seg != sseg);
}
return (newas);
}
/*
* Add a new segment to the address space, sorting
* it into the proper place in the linked list.
*/
enum as_res
as_addseg(as, new)
register struct as *as;
register struct seg *new;
{
register struct seg *seg;
register addr_t base;
seg = as->a_segs;
if (seg == NULL) {
new->s_next = new->s_prev = new;
as->a_segs = new;
} else {
/*
* Figure out where to add the segment to keep list sorted
*/
base = new->s_base;
do {
if (base < seg->s_base) {
if (base + new->s_size > seg->s_base)
return (A_BADADDR);
break;
}
if (base < seg->s_base + seg->s_size)
return (A_BADADDR);
seg = seg->s_next;
} while (seg != as->a_segs);
new->s_next = seg;
new->s_prev = seg->s_prev;
seg->s_prev = new;
new->s_prev->s_next = new;
if (base < as->a_segs->s_base)
as->a_segs = new; /* new is at front */
}
return (A_SUCCESS);
}
/*
* Handle a ``fault'' at addr for size bytes.
*/
faultcode_t
as_fault(as, addr, size, type, rw)
struct as *as;
addr_t addr;
u_int size;
enum fault_type type;
enum seg_rw rw;
{
register struct seg *seg;
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
register u_int ssize;
register addr_t addrsav;
struct seg *segsav;
faultcode_t res = 0;
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
seg = as_segat(as, raddr);
if (seg == NULL)
return (FC_NOMAP);
addrsav = raddr;
segsav = seg;
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next; /* goto next seg */
if (raddr != seg->s_base) {
res = FC_NOMAP;
break;
}
}
if (raddr + rsize > seg->s_base + seg->s_size)
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = rsize;
res = (*seg->s_ops->fault)(seg, raddr, ssize, type, rw);
if (res != 0)
break;
}
/*
* If we failed and we were locking, unlock the pages we faulted.
* (Maybe we should just panic if we are SOFTLOCKing
* or even SOFTUNLOCKing right here...)
*/
if (res != 0 && type == F_SOFTLOCK) {
for (seg = segsav; addrsav < raddr; addrsav += ssize) {
if (addrsav >= seg->s_base + seg->s_size)
seg = seg->s_next; /* goto next seg */
/*
* Now call the fault routine again to perform the
* unlock using S_OTHER instead of the rw variable
* since we never got a chance to touch the pages.
*/
if (raddr > seg->s_base + seg->s_size)
ssize = seg->s_base + seg->s_size - addrsav;
else
ssize = raddr - addrsav;
(void) (*seg->s_ops->fault)(seg, addrsav, ssize,
F_SOFTUNLOCK, S_OTHER);
}
}
return (res);
}
/*
* Asynchronous ``fault'' at addr for size bytes.
*/
faultcode_t
as_faulta(as, addr, size)
struct as *as;
addr_t addr;
u_int size;
{
register struct seg *seg;
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
faultcode_t res;
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
seg = as_segat(as, raddr);
if (seg == NULL)
return (FC_NOMAP);
for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next; /* goto next seg */
if (raddr != seg->s_base)
return (FC_NOMAP);
}
res = (*seg->s_ops->faulta)(seg, raddr);
if (res != 0)
return (res);
}
return (0);
}
/*
* Set the virtual mapping for the interval from [addr : addr + size)
* in address space `as' to have the specified protection.
* It is ok for the range to cross over several segments,
* as long as they are contiguous.
*/
enum as_res
as_setprot(as, addr, size, prot)
struct as *as;
addr_t addr;
u_int size;
u_int prot;
{
register struct seg *seg;
register u_int ssize;
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
enum as_res res = A_SUCCESS;
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
seg = as_segat(as, raddr);
if (seg == NULL)
return (A_BADADDR);
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next; /* goto next seg */
if (raddr != seg->s_base) {
res = A_BADADDR;
break;
}
}
if ((raddr + rsize) > (seg->s_base + seg->s_size))
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = rsize;
if ((*seg->s_ops->setprot)(seg, raddr, ssize, prot) != 0)
res = A_OPFAIL; /* keep on going */
}
return (res);
}
/*
* Check to make sure that the interval from [addr : addr + size)
* in address space `as' has at least the specified protection.
* It is ok for the range to cross over several segments, as long
* as they are contiguous.
*/
enum as_res
as_checkprot(as, addr, size, prot)
struct as *as;
addr_t addr;
u_int size;
u_int prot;
{
register struct seg *seg;
register u_int ssize;
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
seg = as_segat(as, raddr);
if (seg == NULL)
return (A_BADADDR);
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next; /* goto next seg */
if (raddr != seg->s_base)
return (A_BADADDR);
}
if ((raddr + rsize) > (seg->s_base + seg->s_size))
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = rsize;
if ((*seg->s_ops->checkprot)(seg, raddr, ssize, prot) != 0)
return (A_OPFAIL);
}
return (A_SUCCESS);
}
enum as_res
as_unmap(as, addr, size)
register struct as *as;
addr_t addr;
u_int size;
{
register struct seg *seg, *seg_next;
register addr_t raddr, eaddr;
register u_int ssize;
addr_t obase;
raddr = (addr_t)((u_int)addr & PAGEMASK);
eaddr = (addr_t)(((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK);
seg = as->a_segs;
if (seg != NULL) {
for (; raddr < eaddr; seg = seg_next) {
/*
* Save next segment pointer since seg can be
* destroyed during the segment unmap operation.
* We also have to save the old base below.
*/
seg_next = seg->s_next;
if (raddr >= seg->s_base + seg->s_size) {
if (seg->s_base >= seg_next->s_base)
break; /* looked at all segs */
continue; /* not there yet */
}
if (eaddr <= seg->s_base)
break; /* all done */
if (raddr < seg->s_base)
raddr = seg->s_base; /* skip to seg start */
if (eaddr > (seg->s_base + seg->s_size))
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = eaddr - raddr;
obase = seg->s_base;
if ((*seg->s_ops->unmap)(seg, raddr, ssize) != 0)
return (A_OPFAIL);
raddr += ssize;
/*
* Carefully check to see if we
* have looked at all the segments.
*/
if (as->a_segs == NULL || obase >= seg_next->s_base)
break;
}
}
return (A_SUCCESS);
}
int
as_map(as, addr, size, crfp, argsp)
struct as *as;
addr_t addr;
u_int size;
int (*crfp)();
caddr_t argsp;
{
register struct seg *seg;
enum as_res res;
int error;
seg = seg_alloc(as, addr, size);
if (seg == NULL)
return (ENOMEM);
/*
* Remember that this was the most recently touched segment.
* If the create routine merges this segment into an existing
* segment, seg_free will adjust the a_seglast hint.
*/
as->a_seglast = seg;
error = (*crfp)(seg, argsp);
/*
* If some error occurred during the create function, destroy
* this segment. Otherwise, if the address space is locked,
* establish memory locks for the new segment. Translate
* error returns as appropriate.
*/
if (error)
seg_free(seg);
else if (as->a_paglck) {
res = as_ctl(as, seg->s_base, seg->s_size, MC_LOCK, (caddr_t)0);
if (res == A_RESOURCE)
error = EAGAIN;
else if (res != A_SUCCESS)
error = EIO;
if (error)
(void) as_unmap(as, addr, size);
}
return (error);
}
/*
* Find a hole of at least size minlen within [base, base+len).
* If flags specifies AH_HI, the hole will have the highest possible address
* in the range. Otherwise, it will have the lowest possible address.
* If flags specifies AH_CONTAIN, the hole will contain the address addr.
* If an adequate hole is found, base and len are set to reflect the part of
* the hole that is within range, and A_SUCCESS is returned. Otherwise,
* A_OPFAIL is returned.
* XXX This routine is not correct when base+len overflows addr_t.
*/
/* VARARGS5 */
enum as_res
as_hole(as, minlen, basep, lenp, flags, addr)
struct as *as;
register u_int minlen;
addr_t *basep;
u_int *lenp;
int flags;
addr_t addr;
{
register addr_t lobound = *basep;
register addr_t hibound = lobound + *lenp;
register struct seg *sseg = as->a_segs;
register struct seg *lseg, *hseg;
register addr_t lo, hi;
register int forward;
if (sseg == NULL)
if (valid_va_range(basep, lenp, minlen, flags & AH_DIR))
return (A_SUCCESS);
else
return (A_OPFAIL);
/*
* Set up to iterate over all the inter-segment holes in the given
* direction. lseg is NULL for the lowest-addressed hole and hseg is
* NULL for the highest-addressed hole. If moving backwards, we reset
* sseg to denote the highest-addressed segment.
*/
forward = (flags & AH_DIR) == AH_LO;
if (forward) {
lseg = NULL;
hseg = sseg;
} else {
sseg = sseg->s_prev;
hseg = NULL;
lseg = sseg;
}
for (;;) {
/*
* Set lo and hi to the hole's boundaries. (We should really
* use MAXADDR in place of hibound in the expression below,
* but can't express it easily; using hibound in its place is
* harmless.)
*/
lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
hi = (hseg == NULL) ? hibound : hseg->s_base;
/*
* If the iteration has moved past the interval from lobound
* to hibound it's pointless to continue.
*/
if ((forward && lo > hibound) || (!forward && hi < lobound))
break;
else if (lo > hibound || hi < lobound)
goto cont;
/*
* Candidate hole lies at least partially within the allowable
* range. Restrict it to fall completely within that range,
* i.e., to [max(lo, lobound), min(hi, hibound)).
*/
if (lo < lobound)
lo = lobound;
if (hi > hibound)
hi = hibound;
/*
* Verify that the candidate hole is big enough and meets
* hardware constraints.
*/
*basep = lo;
*lenp = hi - lo;
if (valid_va_range(basep, lenp, minlen,
forward ? AH_LO : AH_HI) &&
((flags & AH_CONTAIN) == 0 ||
(*basep <= addr && *basep + *lenp > addr)))
return (A_SUCCESS);
cont:
/*
* Move to the next hole.
*/
if (forward) {
lseg = hseg;
if (lseg == NULL)
break;
hseg = hseg->s_next;
if (hseg == sseg)
hseg = NULL;
} else {
hseg = lseg;
if (hseg == NULL)
break;
lseg = lseg->s_prev;
if (lseg == sseg)
lseg = NULL;
}
}
return (A_OPFAIL);
}
/*
* Return the next range within [base, base+len) that is backed
* with "real memory". Skip holes and non-seg_vn segments.
* We're lazy and only return one segment at a time.
*/
enum as_res
as_memory(as, basep, lenp)
struct as *as;
addr_t *basep;
u_int *lenp;
{
register struct seg *seg, *sseg, *cseg = NULL;
register addr_t addr, eaddr, segend;
/* XXX - really want as_segatorabove? */
if (as->a_seglast == NULL)
as->a_seglast = as->a_segs;
addr = *basep;
eaddr = addr + *lenp;
sseg = seg = as->a_seglast;
if (seg != NULL) {
do {
if (seg->s_ops != &segvn_ops)
continue;
if (seg->s_base <= addr &&
addr < (segend = (seg->s_base + seg->s_size))) {
/* found a containing segment */
as->a_seglast = seg;
*basep = addr;
if (segend > eaddr)
*lenp = eaddr - addr;
else
*lenp = segend - addr;
return (A_SUCCESS);
} else if (seg->s_base > addr) {
if (cseg == NULL ||
cseg->s_base > seg->s_base)
/* save closest seg above */
cseg = seg;
}
} while ((seg = seg->s_next) != sseg);
}
if (cseg == NULL) /* ??? no segments in address space? */
return (A_OPFAIL);
/*
* Only found a close segment, see if there's
* a valid range we can return.
*/
if (cseg->s_base > eaddr)
return (A_BADADDR); /* closest segment is out of range */
as->a_seglast = cseg;
*basep = cseg->s_base;
if (cseg->s_base + cseg->s_size > eaddr)
*lenp = eaddr - cseg->s_base; /* segment contains eaddr */
else
*lenp = cseg->s_size; /* segment is between addr and eaddr */
return (A_SUCCESS);
}
/*
* Swap the pages associated with the address space as out to
* secondary storage, returning the number of bytes actually
* swapped.
*
* If we are not doing a "hard" swap (i.e. we're just getting rid
* of a deawood process) unlock the segu making it available to be
* paged out.
*
* The value returned is intended to correlate well with the process's
* memory requirements. Its usefulness for this purpose depends on
* how well the segment-level routines do at returning accurate
* information.
*/
u_int
as_swapout(as, hardswap)
struct as *as;
short hardswap;
{
register struct seg *seg, *sseg;
register u_int swpcnt = 0;
/*
* Kernel-only processes have given up their address
* spaces. Of course, we shouldn't be attempting to
* swap out such processes in the first place...
*/
if (as == NULL)
return (0);
/*
* Free all mapping resources associated with the address
* space. The segment-level swapout routines capitalize
* on this unmapping by scavanging pages that have become
* unmapped here.
*/
hat_free(as);
/*
* Call the swapout routines of all segments in the address
* space to do the actual work, accumulating the amount of
* space reclaimed.
*/
sseg = seg = as->a_segs;
if (hardswap && seg != NULL) {
do {
register struct seg_ops *ov = seg->s_ops;
/* for "soft" swaps, should we sync out segment instead? XXX */
if (ov->swapout != NULL)
swpcnt += (*ov->swapout)(seg);
} while ((seg = seg->s_next) != sseg);
}
return (swpcnt);
}
/*
* Determine whether data from the mappings in interval [addr : addr + size)
* are in the primary memory (core) cache.
*/
enum as_res
as_incore(as, addr, size, vec, sizep)
struct as *as;
addr_t addr;
u_int size;
char *vec;
u_int *sizep;
{
register struct seg *seg;
register u_int ssize;
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
u_int isize; /* iteration size */
*sizep = 0;
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = ((((u_int)addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
seg = as_segat(as, raddr);
if (seg == NULL)
return (A_BADADDR);
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next;
if (raddr != seg->s_base)
return (A_BADADDR);
}
if ((raddr + rsize) > (seg->s_base + seg->s_size))
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = rsize;
*sizep += isize =
(*seg->s_ops->incore)(seg, raddr, ssize, vec);
if (isize != ssize)
return (A_OPFAIL);
vec += btoc(ssize);
}
return (A_SUCCESS);
}
/*
* Cache control operations over the interval [addr : addr + size) in
* address space "as".
*/
enum as_res
as_ctl(as, addr, size, func, arg)
struct as *as;
addr_t addr;
u_int size;
int func;
caddr_t arg;
{
register struct seg *seg; /* working segment */
register struct seg *fseg; /* first segment of address space */
register u_int ssize; /* size of seg */
register addr_t raddr; /* rounded addr counter */
register u_int rsize; /* rounded size counter */
enum as_res res; /* recursive result */
int r; /* local result */
/*
* Normalize addresses and sizes.
*/
raddr = (addr_t)((u_int)addr & PAGEMASK);
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
/*
* If these are address space lock/unlock operations, loop over
* all segments in the address space, as appropriate.
*/
if ((func == MC_LOCKAS) || (func == MC_UNLOCKAS)) {
if (func == MC_UNLOCKAS)
as->a_paglck = 0;
else {
if ((int)arg & MCL_FUTURE)
as->a_paglck = 1;
if (((int)arg & MCL_CURRENT) == 0)
return (A_SUCCESS);
}
for (fseg = NULL, seg = as->a_segs; seg != fseg;
seg = seg->s_next) {
if (fseg == NULL)
fseg = seg;
if ((res = as_ctl(as, seg->s_base, seg->s_size,
func == MC_LOCKAS ? MC_LOCK : MC_UNLOCK,
(caddr_t)0)) != A_SUCCESS)
return (res);
}
return (A_SUCCESS);
}
/*
* Get initial segment.
*/
if ((seg = as_segat(as, raddr)) == NULL)
return (A_BADADDR);
/*
* Loop over all segments. If a hole in the address range is
* discovered, then fail. For each segment, perform the appropriate
* control operation.
*/
while (rsize != 0) {
/*
* Make sure there's no hole, calculate the portion
* of the next segment to be operated over.
*/
if (raddr >= seg->s_base + seg->s_size) {
seg = seg->s_next;
if (raddr != seg->s_base)
return (A_BADADDR);
}
if ((raddr + rsize) > (seg->s_base + seg->s_size))
ssize = seg->s_base + seg->s_size - raddr;
else
ssize = rsize;
/*
* Dispatch on specific function.
*/
switch (func) {
/*
* Synchronize cached data from mappings with backing
* objects.
*/
case MC_SYNC:
if (r = (*seg->s_ops->sync)
(seg, raddr, ssize, (u_int)arg))
return (r == EPERM ? A_RESOURCE : A_OPFAIL);
break;
/*
* Lock pages in memory.
*/
case MC_LOCK:
if (r = (*seg->s_ops->lockop)(seg, raddr, ssize, func))
return (r == EAGAIN ? A_RESOURCE : A_OPFAIL);
break;
/*
* Unlock mapped pages.
*/
case MC_UNLOCK:
(void) (*seg->s_ops->lockop)(seg, raddr, ssize, func);
break;
/*
* Store VM advise for mapped pages in segment layer
*/
case MC_ADVISE:
(void) (*seg->s_ops->advise)(seg, raddr, ssize, arg);
break;
/*
* Can't happen.
*/
default:
panic("as_ctl");
}
rsize -= ssize;
raddr += ssize;
}
return (A_SUCCESS);
}
/*
* Inform the as of translation information associated with the given addr.
* This is currently only called if a_hatcallback == 1.
*/
void
as_hatsync(as, addr, ref, mod, flags)
struct as *as;
addr_t addr;
u_int ref;
u_int mod;
u_int flags;
{
struct seg *seg;
if (seg = as_segat(as, addr))
seg->s_ops->hatsync(seg, addr, ref, mod, flags);
}

122
sys/vm/vm_mp.c Normal file
View File

@@ -0,0 +1,122 @@
/* @(#)vm_mp.c 1.1 94/10/31 */
/*
* Copyright (c) 1986 by Sun Microsystems, Inc.
*/
/*
* VM - multiprocessor/ing support.
*
* Currently the kmon_enter() / kmon_exit() pair implements a
* simple monitor for objects protected by the appropriate lock.
* The kcv_wait() / kcv_broadcast pait implements a simple
* condition variable which can be used for `sleeping'
* and `waking' inside a monitor if some resource is
* is needed which is not available.
*
* XXX - this code is written knowing about the semantics
* of sleep/wakeup and UNIX scheduling on a uniprocessor machine.
*/
#ifdef KMON_DEBUG
#include <sys/param.h>
#include <vm/mp.h>
#define ISLOCKED 0x1
#define LOCKWANT 0x2
/*
* kmon_enter is used as a type of multiprocess semaphore
* used to implement a monitor where the lock represents
* the ability to operate on the associated object.
* For now, the lock/object association is done
* by convention only.
*/
void
kmon_enter(lk)
kmon_t *lk;
{
int s;
s = spl6();
while ((lk->dummy & ISLOCKED) != 0) {
#ifdef notnow
lk->dummy |= LOCKWANT;
(void) sleep((char *)lk, PSWP+1);
#else notnow
panic("kmon_enter");
#endif notnow
}
lk->dummy |= ISLOCKED;
(void) splx(s);
}
/*
* Release the lock associated with a monitor,
* waiting up anybody that has already decided
* to wait for this lock (monitor).
*/
void
kmon_exit(lk)
kmon_t *lk;
{
int s;
if ((lk->dummy & ISLOCKED) == 0) /* paranoid */
panic("kmon_exit not locked");
s = spl6();
lk->dummy &= ~ISLOCKED;
if ((lk->dummy & LOCKWANT) != 0) {
lk->dummy &= ~LOCKWANT;
wakeup((char *)lk);
}
(void) splx(s);
}
/*
* Wait for the named condition variable.
* Must already have the monitor lock when kcv_wait is called.
*/
void
kcv_wait(lk, cond)
kmon_t *lk;
char *cond;
{
int s;
if ((lk->dummy & ISLOCKED) == 0) /* paranoia */
panic("kcv_wait not locked");
s = spl6();
lk->dummy &= ~ISLOCKED; /* release lock */
(void) sleep(cond, PSWP+1);
if ((lk->dummy & ISLOCKED) != 0) /* more paranoia */
panic("kcv_wait locked");
lk->dummy |= ISLOCKED; /* reacquire lock */
(void) splx(s);
}
/*
* Wake up all processes waiting on the named condition variable.
*
* We just use current UNIX sleep/wakeup semantics to delay the actual
* context switching until later after we have released the lock.
*/
void
kcv_broadcast(lk, cond)
kmon_t *lk;
char *cond;
{
if ((lk->dummy & ISLOCKED) == 0)
panic("kcv_broadcast");
wakeup(cond);
}
#endif /* !KMON_DEBUG */

1606
sys/vm/vm_page.c Normal file

File diff suppressed because it is too large Load Diff

948
sys/vm/vm_pvn.c Normal file
View File

@@ -0,0 +1,948 @@
#ident "@(#)vm_pvn.c 1.1 94/10/31 SMI"
/*
* Copyright (c) 1988, 1989, 1990 by Sun Microsystems, Inc.
*/
/*
* VM - paged vnode.
*
* This file supplies vm support for the vnode operations that deal with pages.
*/
#include <sys/param.h>
#include <sys/time.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/vmmeter.h>
#include <sys/vmsystm.h>
#include <sys/mman.h>
#include <sys/vfs.h>
#include <sys/debug.h>
#include <sys/trace.h>
#include <sys/ucred.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/rm.h>
#include <vm/pvn.h>
#include <vm/page.h>
#include <vm/seg_map.h>
int pvn_nofodklust = 0;
/*
* Find the largest contiguous block which contains `addr' for file offset
* `offset' in it while living within the file system block sizes (`vp_off'
* and `vp_len') and the address space limits for which no pages currently
* exist and which map to consecutive file offsets.
*/
struct page *
pvn_kluster(vp, off, seg, addr, offp, lenp, vp_off, vp_len, isra)
struct vnode *vp;
register u_int off;
register struct seg *seg;
register addr_t addr;
u_int *offp, *lenp;
u_int vp_off, vp_len;
int isra;
{
register int delta, delta2;
register struct page *pp;
struct page *plist = NULL;
addr_t straddr;
int bytesavail;
u_int vp_end;
ASSERT(off >= vp_off && off < vp_off + vp_len);
/*
* We only want to do klustering/read ahead if there
* is more than minfree pages currently available.
*/
if (freemem - minfree > 0)
bytesavail = ptob(freemem - minfree);
else
bytesavail = 0;
if (bytesavail == 0) {
if (isra)
return ((struct page *)NULL); /* ra case - give up */
else
bytesavail = PAGESIZE; /* just pretending */
}
if (bytesavail < vp_len) {
/*
* Don't have enough free memory for the
* max request, try sizing down vp request.
*/
delta = off - vp_off;
vp_len -= delta;
vp_off += delta;
if (bytesavail < vp_len) {
/*
* Still not enough memory, just settle for
* bytesavail which is at least PAGESIZE.
*/
vp_len = bytesavail;
}
}
vp_end = vp_off + vp_len;
ASSERT(off >= vp_off && off < vp_end);
if (page_exists(vp, off))
return ((struct page *)NULL); /* already have page */
if (vp_len <= PAGESIZE || pvn_nofodklust) {
straddr = addr;
*offp = off;
*lenp = MIN(vp_len, PAGESIZE);
} else {
/* scan forward from front */
for (delta = 0; off + delta < vp_end; delta += PAGESIZE) {
/*
* Call back to the segment driver to verify that
* the klustering/read ahead operation makes sense.
*/
if ((*seg->s_ops->kluster)(seg, addr, delta))
break; /* page not file extension */
if (page_exists(vp, off + delta))
break; /* already have this page */
}
delta2 = delta;
/* scan back from front */
for (delta = 0; off + delta > vp_off; delta -= PAGESIZE) {
if (page_exists(vp, off + delta - PAGESIZE))
break; /* already have the page */
/*
* Call back to the segment driver to verify that
* the klustering/read ahead operation makes sense.
*/
if ((*seg->s_ops->kluster)(seg, addr, delta - PAGESIZE))
break; /* page not eligible */
}
straddr = addr + delta;
*offp = off = off + delta;
*lenp = MAX(delta2 - delta, PAGESIZE);
ASSERT(off >= vp_off);
if ((vp_off + vp_len) < (off + *lenp)) {
ASSERT(vp_end > off);
*lenp = vp_end - off;
}
}
/*
* Allocate pages for <vp, off> at <seg, addr> for delta bytes.
* Note that for the non-read ahead case we might not have the
* memory available right now so that rm_allocpage operation could
* sleep and someone else might race to this same spot if the
* vnode object was not locked before this routine was called.
*/
delta2 = *lenp;
delta = roundup(delta2, PAGESIZE);
pp = rm_allocpage(seg, straddr, (u_int)delta, 1); /* `pp' list kept */
plist = pp;
do {
pp->p_intrans = 1;
pp->p_pagein = 1;
#ifdef TRACE
{
addr_t taddr = straddr + (off - *offp);
trace3(TR_SEG_KLUSTER, seg, taddr, isra);
trace6(TR_SEG_ALLOCPAGE, seg, taddr, TRC_SEG_UNK,
vp, off, pp);
}
#endif TRACE
if (page_enter(pp, vp, off)) { /* `pp' locked if ok */
/*
* Oops - somebody beat us to the punch
* and has entered the page before us.
* To recover, we use pvn_fail to free up
* all the pages we have already allocated
* and we return NULL so that whole operation
* is attempted over again. This should never
* happen if the caller of pvn_kluster does
* vnode locking to prevent multiple processes
* from creating the same pages as the same time.
*/
pvn_fail(plist, B_READ);
return ((struct page *)NULL);
}
off += PAGESIZE;
} while ((pp = pp->p_next) != plist);
return (plist);
}
/*
* Entry point to be use by page r/w subr's and other such routines which
* want to report an error and abort a list of pages setup for pageio
* which do not do though the normal pvn_done processing.
*/
void
pvn_fail(plist, flags)
struct page *plist;
int flags;
{
static struct buf abort_buf;
struct buf *bp;
struct page *pp;
int len;
int s;
len = 0;
pp = plist;
do {
len += PAGESIZE;
} while ((pp = pp->p_next) != plist);
bp = &abort_buf;
s = splimp();
while (bp->b_pages != NULL) {
(void) sleep((caddr_t)&bp->b_pages, PSWP+2);
}
(void) splx(s);
/* ~B_PAGEIO is a flag to pvn_done not to pageio_done the bp */
bp->b_flags = B_ERROR | B_ASYNC | (flags & ~B_PAGEIO);
bp->b_pages = plist;
bp->b_bcount = len;
pvn_done(bp); /* let pvn_done do all the work */
if (bp->b_pages != NULL) {
/* XXX - this should never happen, should it be a panic? */
bp->b_pages = NULL;
}
wakeup((caddr_t)&bp->b_pages);
}
/*
* Routine to be called when pageio's complete.
* Can only be called from process context, not
* from interrupt level.
*/
void
pvn_done(bp)
register struct buf *bp;
{
register struct page *pp;
register int bytes;
pp = bp->b_pages;
/*
* Release any I/O mappings to the pages described by the
* buffer that are finished before processing the completed I/O.
*/
if ((bp->b_flags & B_REMAPPED) && (pp->p_nio <= 1))
bp_mapout(bp);
/*
* Handle of each page in the I/O operation.
*/
for (bytes = 0; bytes < bp->b_bcount; bytes += PAGESIZE) {
struct vnode *vp;
u_int off;
register int s;
if (pp->p_nio > 1) {
/*
* There were multiple IO requests outstanding
* for this particular page. This can happen
* when the file system block size is smaller
* than PAGESIZE. Since there are more IO
* requests still outstanding, we don't process
* the page given on the buffer now.
*/
if (bp->b_flags & B_ERROR) {
if (bp->b_flags & B_READ) {
trace3(TR_PG_PVN_DONE, pp, pp->p_vnode,
pp->p_offset);
page_abort(pp); /* assumes no waiting */
} else {
pg_setmod(pp, 1);
}
}
pp->p_nio--;
break;
/* real page locked for the other io operations */
}
pp = bp->b_pages;
page_sub(&bp->b_pages, pp);
vp = pp->p_vnode;
off = pp->p_offset;
pp->p_intrans = 0;
pp->p_pagein = 0;
PAGE_RELE(pp);
/*
* Verify the page identity before checking to see
* if the page was freed by PAGE_RELE(). This must
* be protected by splvm() to prevent the page from
* being ripped away at interrupt level.
*/
s = splvm();
if (pp->p_vnode != vp || pp->p_offset != off || pp->p_free) {
(void) splx(s);
continue;
}
(void) splx(s);
/*
* Check to see if the page has an error.
*/
if ((bp->b_flags & (B_ERROR|B_READ)) == (B_ERROR|B_READ)) {
page_abort(pp);
continue;
}
/*
* Check if we are to be doing invalidation.
* XXX - Failed writes with B_INVAL set are
* not handled appropriately.
*/
if ((bp->b_flags & B_INVAL) != 0) {
page_abort(pp);
continue;
}
if ((bp->b_flags & (B_ERROR | B_READ)) == B_ERROR) {
/*
* Write operation failed. We don't want
* to abort (or free) the page. We set
* the mod bit again so it will get
* written back again later when things
* are hopefully better again.
*/
pg_setmod(pp, 1);
}
if (bp->b_flags & B_FREE) {
cnt.v_pgpgout++;
if (pp->p_keepcnt == 0 && pp->p_lckcnt == 0) {
/*
* Check if someone has reclaimed the
* page. If no ref or mod, no one is
* using it so we can free it.
* The rest of the system is careful
* to use the ghost unload flag to unload
* translations set up for IO w/o
* affecting ref and mod bits.
*/
if (pp->p_mod == 0 && pp->p_mapping)
hat_pagesync(pp);
if (!pp->p_ref && !pp->p_mod) {
if (pp->p_mapping)
hat_pageunload(pp);
#ifdef MULTIPROCESSOR
}
/*
* The page may have been modified
* between the hat_pagesync and
* the hat_pageunload, and hat_pageunload
* will have picked up final ref and mod
* bits from the PTEs. So, check 'em again.
*/
if (!pp->p_ref && !pp->p_mod) {
#endif MULTIPROCESSOR
page_free(pp,
(int)(bp->b_flags & B_DONTNEED));
if ((bp->b_flags & B_DONTNEED) == 0)
cnt.v_dfree++;
} else {
page_unlock(pp);
cnt.v_pgrec++;
}
} else {
page_unlock(pp);
}
continue;
}
page_unlock(pp); /* a read or write */
}
/*
* Count pageout operations if applicable. Release the
* buf struct associated with the operation if async & pageio.
*/
if (bp->b_flags & B_FREE)
cnt.v_pgout++;
if ((bp->b_flags & (B_ASYNC | B_PAGEIO)) == (B_ASYNC | B_PAGEIO))
pageio_done(bp);
}
/*
* Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI}
* B_DELWRI indicates that this page is part of a kluster operation and
* is only to be considered if it doesn't involve any waiting here.
* Returns non-zero if page added to dirty list.
*
* NOTE: The caller must ensure that the page is not on the free list.
*/
static int
pvn_getdirty(pp, dirty, flags)
register struct page *pp, **dirty;
int flags;
{
register int s;
struct vnode *vp;
u_int offset;
ASSERT(pp->p_free == 0);
vp = pp->p_vnode;
offset = pp->p_offset;
/*
* If page is logically locked, forget it.
*
* XXX - Can a page locked by some other process be
* written out or invalidated?
*/
if (pp->p_lckcnt != 0)
return (0);
if ((flags & B_DELWRI) != 0 && (pp->p_keepcnt != 0 || pp->p_lock)) {
/*
* This is a klustering case that would
* cause us to block, just give up.
*/
return (0);
}
if (pp->p_intrans && (flags & (B_INVAL | B_ASYNC)) == B_ASYNC) {
/*
* Don't bother waiting for an intrans page if we are not
* doing invalidation and this is an async operation
* (the page will be correct when the current io completes).
*/
return (0);
}
/*
* If i/o is in progress on the page or we have to
* invalidate or free the page, wait for the page keep
* count to go to zero.
*/
if (pp->p_intrans || (flags & (B_INVAL | B_FREE)) != 0) {
if (pp->p_keepcnt != 0) {
page_wait(pp);
/*
* Re-verify page identity since it could have
* changed while we were sleeping.
*/
s = splvm();
if (pp->p_vnode != vp || pp->p_offset != offset) {
/*
* Lost the page - nothing to do?
*/
(void) splx(s);
return (0);
}
(void) splx(s);
/*
* The page has not lost its identity and hence
* should not be on the free list.
*/
ASSERT(pp->p_free == 0);
}
}
page_lock(pp);
/*
* If the page has mappings and it is not the case that the
* page is already marked dirty and we are going to unload
* the page below because we are going to free/invalidate
* it, then we sync current mod bits from the hat layer now.
*/
if (pp->p_mapping && !(pp->p_mod && (flags & (B_FREE | B_INVAL)) != 0))
hat_pagesync(pp);
if (pp->p_mod == 0) {
if ((flags & (B_INVAL | B_FREE)) != 0) {
if (pp->p_mapping)
hat_pageunload(pp);
if ((flags & B_INVAL) != 0) {
page_abort(pp);
return (0);
}
if (pp->p_free == 0) {
if ((flags & B_FREE) != 0) {
page_free(pp, (flags & B_DONTNEED));
return (0);
}
}
}
page_unlock(pp);
return (0);
}
/*
* Page is dirty, get it ready for the write back
* and add page to the dirty list. First unload
* the page if we are going to free/invalidate it.
*/
if (pp->p_mapping && (flags & (B_FREE | B_INVAL)) != 0)
hat_pageunload(pp);
pg_setmod(pp, 0);
pg_setref(pp, 0);
trace3(TR_PG_PVN_GETDIRTY, pp, pp->p_vnode, pp->p_offset);
pp->p_intrans = 1;
/*
* XXX - The `p_pagein' bit is set for asynchronous or
* synchronous invalidates to prevent other processes
* from accessing the page in the window after the i/o is
* complete but before the page is aborted. If this is not
* done, updates to the page before it is aborted will be lost.
*/
pp->p_pagein = (flags & B_INVAL) ? 1 : 0;
PAGE_HOLD(pp);
page_sortadd(dirty, pp);
return (1);
}
/*
* Run down the vplist and handle all pages whose offset is >= off.
* Returns a list of dirty kept pages all ready to be written back.
*
* Assumptions:
* The vp is already locked by the VOP_PUTPAGE routine calling this.
* That the VOP_GETPAGE also locks the vp, and thus no one can
* add a page to the vp list while the vnode is locked.
* Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
*/
struct page *
pvn_vplist_dirty(vp, off, flags)
register struct vnode *vp;
u_int off;
int flags;
{
register struct page *pp;
register struct page *ppnext;
register struct page *ppsav;
register struct page *ppnextnext;
register int ppsav_wasfree, pp_wasfree;
register int ppsav_age, pp_age;
struct page *dirty;
register int s;
int on_iolist;
s = splvm();
if (vp->v_type == VSOCK || vp->v_type == VCHR ||
(pp = vp->v_pages) == NULL) {
(void) splx(s);
return ((struct page *)NULL);
}
#define PAGE_RECLAIM(pp, wasfree, age) \
{ \
if ((pp)->p_free) { \
age = (pp)->p_age; \
page_reclaim(pp); \
wasfree = 1; \
} else { \
age = wasfree = 0; \
} \
}
#define PAGE_REFREE(pp, wasfree, age) \
{ \
if (wasfree && (pp)->p_keepcnt == 0 && (pp)->p_mapping == NULL) \
page_free(pp, age); \
}
/*
* Traverse the page list. We have to be careful since pages
* can be removed from the vplist while we are looking at it
* (a page being pulled off the free list for something else,
* or an async io operation completing and the page and/or
* bp is marked for invalidation) so have to be careful determining
* that we have examined all the pages. We use ppsav to point
* to the first page that stayed on the vp list after calling
* pvn_getdirty and we PAGE_RECLAIM and PAGE_HOLD to prevent it
* from going away on us. When we PAGE_UNKEEP the page, it will
* go back to the free list if that's where we got it from. We
* also need to PAGE_RECLAIM and PAGE_HOLD the next pp in the
* vplist to prevent it from going away while we are traversing
* the list.
*/
ppnext = NULL;
ppsav = NULL;
ppsav_age = ppsav_wasfree = 0;
pp_age = pp_wasfree = 0;
dirty = NULL;
if (pp->p_vpnext != pp)
ppnext = pp->p_vpnext;
else
ppnext = NULL;
for (;;) {
/* Reclaim and hold the next page */
if (ppnext != NULL) {
if (ppnext->p_free)
page_reclaim(ppnext);
PAGE_HOLD(ppnext);
}
if (pp != NULL) {
PAGE_RECLAIM(pp, pp_wasfree, pp_age);
/* Process the current page */
if (pp->p_offset >= off) {
(void) splx(s);
on_iolist = pvn_getdirty(pp, &dirty, flags);
s = splvm();
} else
on_iolist = 0;
if (pp->p_vnode == vp) {
/*
* If the page identity hasn't changed and
* it isn't dirty, free it if reclaimed
* from the free list.
*/
if (!on_iolist && !pp->p_free)
PAGE_REFREE(pp, pp_wasfree, pp_age);
/*
* If we haven't found a marker before,
* use the current page as our marker.
*/
if (ppsav == NULL) {
ppsav = pp;
PAGE_RECLAIM(ppsav, ppsav_wasfree,
ppsav_age);
PAGE_HOLD(ppsav);
}
}
}
/* If no pages left on list, we're done */
if (ppnext == NULL)
break;
/* Compute the "next" next page */
if (ppnext->p_vpnext != ppnext && ppnext->p_vpnext != ppsav)
ppnextnext = ppnext->p_vpnext;
else
ppnextnext = NULL;
/* Release the next page */
PAGE_RELE(ppnext);
/* If releasing the next page freed it, ignore it */
if (ppnext->p_free) {
ASSERT(ppnext->p_vnode == NULL);
ppnext = NULL;
}
/* Move forward to look at next page */
pp = ppnext;
ppnext = ppnextnext;
}
if (ppsav != NULL) {
PAGE_RELE(ppsav);
if (!ppsav->p_free)
PAGE_REFREE(ppsav, ppsav_wasfree, ppsav_age);
}
(void) splx(s);
return (dirty);
}
#undef PAGE_RECLAIM
#undef PAGE_REFREE
/*
* Used when we need to find a page but don't care about free pages.
*/
static struct page *
pvn_pagefind(vp, off)
register struct vnode *vp;
register u_int off;
{
register struct page *pp;
register int s;
s = splvm();
pp = page_exists(vp, off);
if (pp != NULL && pp->p_free)
pp = NULL;
(void) splx(s);
return (pp);
}
int pvn_range_noklust = 0;
/*
* Use page_find's and handle all pages for this vnode whose offset
* is >= off and < eoff. This routine will also do klustering up
* to offlo and offhi up until a page which is not found. We assume
* that offlo <= off and offhi >= eoff.
*
* Returns a list of dirty kept pages all ready to be written back.
*/
struct page *
pvn_range_dirty(vp, off, eoff, offlo, offhi, flags)
register struct vnode *vp;
u_int off, eoff;
u_int offlo, offhi;
int flags;
{
struct page *dirty = NULL;
register struct page *pp;
register u_int o;
register struct page *(*pfind)();
ASSERT(offlo <= off && offhi >= eoff);
off &= PAGEMASK;
eoff = (eoff + PAGEOFFSET) & PAGEMASK;
/*
* If we are not invalidating pages, use the routine,
* pvn_pagefind(), to prevent reclaiming them from the
* free list.
*/
if ((flags & B_INVAL) == 0)
pfind = pvn_pagefind;
else
pfind = page_find;
/* first do all the pages from [off..eoff] */
for (o = off; o < eoff; o += PAGESIZE) {
pp = (*pfind)(vp, o);
if (pp != NULL) {
(void) pvn_getdirty(pp, &dirty, flags);
}
}
if (pvn_range_noklust)
return (dirty);
/* now scan backwards looking for pages to kluster */
for (o = off - PAGESIZE; (int)o >= 0 && o >= offlo; o -= PAGESIZE) {
pp = (*pfind)(vp, o);
if (pp == NULL)
break; /* page not found */
if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
break; /* page not added to dirty list */
}
/* now scan forwards looking for pages to kluster */
for (o = eoff; o < offhi; o += PAGESIZE) {
pp = (*pfind)(vp, o);
if (pp == NULL)
break; /* page not found */
if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
break; /* page not added to dirty list */
}
return (dirty);
}
/*
* Take care of invalidating all the pages for vnode vp going to size
* vplen. This includes zero'ing out zbytes worth of file beyond vplen.
* This routine should only be called with the vp locked by the file
* system code so that more pages cannot be added when sleep here.
*/
void
pvn_vptrunc(vp, vplen, zbytes)
register struct vnode *vp;
register u_int vplen;
u_int zbytes;
{
register struct page *pp;
register int s;
if (vp->v_pages == NULL || vp->v_type == VCHR || vp->v_type == VSOCK)
return;
/*
* Simple case - abort all the pages on the vnode
*/
if (vplen == 0) {
s = splvm();
while ((pp = vp->v_pages) != (struct page *)NULL) {
/*
* When aborting these pages, we make sure that
* we wait to make sure they are really gone.
*/
if (pp->p_keepcnt != 0) {
(void) splx(s);
page_wait(pp);
s = splvm();
if (pp->p_vnode != vp)
continue;
} else {
if (pp->p_free)
page_reclaim(pp);
}
page_lock(pp);
page_abort(pp);
}
(void) splx(s);
return;
}
/*
* Tougher case - have to find all the pages on the
* vnode which need to be aborted or partially zeroed.
*/
/*
* First we get the last page and handle the partially
* zeroing via kernel mappings. This will make the page
* dirty so that we know that when this page is written
* back, the zeroed information will go out with it. If
* the page is not currently in memory, then the kzero
* operation will cause it to be brought it. We use kzero
* instead of bzero so that if the page cannot be read in
* for any reason, the system will not panic. We need
* to zero out a minimum of the fs given zbytes, but we
* might also have to do more to get the entire last page.
*/
if (zbytes != 0) {
addr_t addr;
if ((zbytes + (vplen & MAXBOFFSET)) > MAXBSIZE)
panic("pvn_vptrunc zbytes");
addr = segmap_getmap(segkmap, vp, vplen & MAXBMASK);
(void) kzero(addr + (vplen & MAXBOFFSET),
MAX(zbytes, PAGESIZE - (vplen & PAGEOFFSET)));
(void) segmap_release(segkmap, addr, SM_WRITE | SM_ASYNC);
}
/*
* Synchronously abort all pages on the vp list which are
* beyond the new length. The algorithm here is to start
* scanning at the beginning of the vplist until there
* are no pages with an offset >= vplen. If we find such
* a page, we wait for it if it is kept for any reason and
* then we abort it after verifying that it is still a page
* that needs to go away. We assume here that the vplist
* is not messed with at interrupt level.
*/
s = splvm();
again:
for (pp = vp->v_pages; pp != NULL; pp = pp->p_vpnext) {
if (pp->p_offset >= vplen) {
/* need to abort this page */
if (pp->p_keepcnt != 0) {
(void) splx(s);
page_wait(pp);
s = splvm();
/* verify page identity again */
if (pp->p_vnode != vp || pp->p_offset < vplen)
goto again;
} else {
if (pp->p_free)
page_reclaim(pp);
}
page_lock(pp);
page_abort(pp);
goto again; /* start over again */
}
if (pp == pp->p_vpnext || vp->v_pages == pp->p_vpnext)
break;
}
(void) splx(s);
}
/*
* This routine is called when the low level address translation
* code decides to unload a translation. It calls back to the
* segment driver which in many cases ends up here.
*/
/*ARGSUSED*/
void
pvn_unloadmap(vp, offset, ref, mod)
struct vnode *vp;
u_int offset;
u_int ref, mod;
{
/*
* XXX - what is the pvn code going to do w/ this information?
* This guy gets called for each loaded page when a executable
* using the segvn driver terminates...
*/
}
/*
* Handles common work of the VOP_GETPAGE routines when more than
* one page must be returned by calling a file system specific operation
* to do most of the work. Must be called with the vp already locked
* by the VOP_GETPAGE routine.
*/
int
pvn_getpages(getapage, vp, off, len, protp, pl, plsz, seg, addr, rw, cred)
int (*getapage)();
struct vnode *vp;
u_int off, len;
u_int *protp;
struct page *pl[];
u_int plsz;
struct seg *seg;
register addr_t addr;
enum seg_rw rw;
struct ucred *cred;
{
register struct page **ppp;
register u_int o, eoff;
u_int sz;
int err;
ASSERT(plsz >= len); /* insure that we have enough space */
/*
* Loop one page at a time and let getapage function fill
* in the next page in array. We only allow one page to be
* returned at a time (except for the last page) so that we
* don't have any problems with duplicates and other such
* painful problems. This is a very simple minded algorithm,
* but it does the job correctly. We hope that the cost of a
* getapage call for a resident page that we might have been
* able to get from an earlier call doesn't cost too much.
*/
ppp = pl;
sz = PAGESIZE;
eoff = off + len;
for (o = off; o < eoff; o += PAGESIZE, addr += PAGESIZE) {
if (o + PAGESIZE >= eoff) {
/*
* Last time through - allow the all of
* what's left of the pl[] array to be used.
*/
sz = plsz - (o - off);
}
err = (*getapage)(vp, o, protp, ppp, sz, seg, addr, rw, cred);
if (err) {
/*
* Release any pages we already got.
*/
if (o > off && pl != NULL) {
for (ppp = pl; *ppp != NULL; *ppp++ = NULL) {
PAGE_RELE(*ppp);
}
}
break;
}
if (pl != NULL)
ppp++;
}
return (err);
}

87
sys/vm/vm_rm.c Normal file
View File

@@ -0,0 +1,87 @@
/* @(#)vm_rm.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
/*
* VM - resource manager
* As you can see, it needs lots of work
*/
#include <sys/param.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/proc.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/rm.h>
#include <vm/seg.h>
#include <vm/page.h>
/*ARGSUSED*/
struct page *
rm_allocpage(seg, addr, len, canwait)
struct seg *seg;
addr_t addr;
u_int len;
int canwait;
{
return (page_get(len, canwait));
}
/*
* This routine is called when we couldn't allocate an anon slot.
* For now, we simply print out a message and kill of the process
* who happened to have gotten burned.
*
* XXX - swap reservation needs lots of work so this only happens in
* `nice' places or we need to have a method to allow for recovery.
*/
void
rm_outofanon()
{
struct proc *p;
p = u.u_procp;
printf("Sorry, pid %d (%s) was killed due to lack of swap space\n",
p->p_pid, u.u_comm);
/*
* To be sure no looping (e.g. in vmsched trying to
* swap out) mark process locked in core (as though
* done by user) after killing it so noone will try
* to swap it out.
*/
psignal(p, SIGKILL);
p->p_flag |= SULOCK;
/*NOTREACHED*/
}
void
rm_outofhat()
{
panic("out of mapping resources"); /* XXX */
/*NOTREACHED*/
}
/*
* Yield the memory claim requirement for an address space.
*
* This is currently implemented as the number of active hardware
* translations that have page structures. Therefore, it can
* underestimate the traditional resident set size, eg, if the
* physical page is present and the hardware translation is missing;
* and it can overestimate the rss, eg, if there are active
* translations to a frame buffer with page structs.
* Also, it does not take sharing into account.
*/
int
rm_asrss(as)
struct as *as;
{
return (as == (struct as *)NULL ? 0 : as->a_rss);
}

132
sys/vm/vm_seg.c Normal file
View File

@@ -0,0 +1,132 @@
/* @(#)vm_seg.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
/*
* VM - segment management.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/mmu.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/mp.h>
/*
* Variables for maintaining the free list of segment structures.
*/
static struct seg *seg_freelist;
static int seg_freeincr = 24;
/*
* Allocate a segment to cover [base, base+size)
* and attach it to the specified address space.
*/
struct seg *
seg_alloc(as, base, size)
struct as *as;
register addr_t base;
register u_int size;
{
register struct seg *new;
addr_t segbase;
u_int segsize;
segbase = (addr_t)((u_int)base & PAGEMASK);
segsize =
(((u_int)(base + size) + PAGEOFFSET) & PAGEMASK) - (u_int)segbase;
if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
return ((struct seg *)NULL); /* bad virtual addr range */
new = (struct seg *)new_kmem_fast_alloc((caddr_t *)&seg_freelist,
sizeof (*seg_freelist), seg_freeincr, KMEM_SLEEP);
bzero((caddr_t)new, sizeof (*new));
if (seg_attach(as, segbase, segsize, new) < 0) {
kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)new);
return ((struct seg *)NULL);
}
/* caller must fill in ops, data */
return (new);
}
/*
* Attach a segment to the address space. Used by seg_alloc()
* and for kernel startup to attach to static segments.
*/
int
seg_attach(as, base, size, seg)
struct as *as;
addr_t base;
u_int size;
struct seg *seg;
{
seg->s_as = as;
seg->s_base = base;
seg->s_size = size;
if (as_addseg(as, seg) == A_SUCCESS)
return (0);
return (-1);
}
/*
* Free the segment from its associated as,
*/
void
seg_free(seg)
register struct seg *seg;
{
register struct as *as = seg->s_as;
if (as->a_segs == seg)
as->a_segs = seg->s_next; /* go to next seg */
if (as->a_segs == seg)
as->a_segs = NULL; /* seg list is gone */
else {
seg->s_prev->s_next = seg->s_next;
seg->s_next->s_prev = seg->s_prev;
}
if (as->a_seglast == seg)
as->a_seglast = as->a_segs;
/*
* If the segment private data field is NULL,
* then segment driver is not attached yet.
*/
if (seg->s_data != NULL)
(*seg->s_ops->free)(seg);
kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)seg);
}
/*
* Translate addr into page number within segment.
*/
u_int
seg_page(seg, addr)
struct seg *seg;
addr_t addr;
{
return ((u_int)((addr - seg->s_base) >> PAGESHIFT));
}
/*
* Return number of pages in segment.
*/
u_int
seg_pages(seg)
struct seg *seg;
{
return ((u_int)((seg->s_size + PAGEOFFSET) >> PAGESHIFT));
}

648
sys/vm/vm_swap.c Normal file
View File

@@ -0,0 +1,648 @@
/* @(#)vm_swap.c 1.1 94/10/31 SMI */
#ident "$SunId: @(#)vm_swap.c 1.2 91/02/19 SMI [RMTC] $"
/*
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
*/
/*
* Virtual swap device
*
* The virtual swap device consists of the logical concatenation of one
* or more physical swap areas. It provides a logical array of anon
* slots, each of which corresponds to a page of swap space.
*
* Each physical swap area has an associated anon array representing
* its physical storage. These anon arrays are logically concatenated
* sequentially to form the overall swap device anon array. Thus, the
* offset of a given entry within this logical array is computed as the
* sum of the sizes of each area preceding the entry plus the offset
* within the area containing the entry.
*
* The anon array entries for unused swap slots within an area are
* linked together into a free list. Allocation proceeds by finding a
* suitable area (attempting to balance use among all the areas) and
* then returning the first free entry within the area. Thus, there's
* no linear relation between offset within the swap device and the
* address (within its segment(s)) of the page that the slot backs;
* instead, it's an arbitrary one-to-one mapping.
*
* Associated with each swap area is a swapinfo structure. These
* structures are linked into a linear list that determines the
* ordering of swap areas in the logical swap device. Each contains a
* pointer to the corresponding anon array, the area's size, and its
* associated vnode.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/user.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/bootconf.h>
#include <sys/trace.h>
#include <vm/hat.h>
#include <vm/anon.h>
#include <vm/page.h>
#include <vm/swap.h>
/* these includes are used for the "fake" swap support of /dev/drum */
#include <sun/mem.h>
#include <specfs/snode.h>
static struct swapinfo *silast;
struct swapinfo *swapinfo;
/*
* To balance the load among multiple swap areas, we don't allow
* more than swap_maxcontig allocations to be satisfied from a
* single swap area before moving on to the next swap area. This
* effectively "interleaves" allocations among the many swap areas.
*/
int swap_maxcontig = 1024 * 1024 / PAGESIZE; /* 1MB of pages */
extern int klustsize; /* from spec_vnodeops.c */
int swap_order = 1; /* see swap_alloc,free */
#define MINIROOTSIZE 14000 /* ~7 Meg */
/*
* Initialize a new swapinfo structure.
*/
static int
swapinfo_init(vp, npages, skip)
struct vnode *vp;
register u_int npages;
u_int skip;
{
register struct anon *ap, *ap2;
register struct swapinfo **sipp, *nsip;
for (sipp = &swapinfo; nsip = *sipp; sipp = &nsip->si_next)
if (nsip->si_vp == vp)
return (EBUSY); /* swap device already in use */
nsip = (struct swapinfo *)new_kmem_zalloc(
sizeof (struct swapinfo), KMEM_SLEEP);
nsip->si_vp = vp;
nsip->si_size = ptob(npages);
/*
* Don't indirect through NULL if called with npages < skip (too tacky)
*/
if (npages < skip)
npages = skip;
/*
* Don't sleep when allocating memory for the anon structures.
* This allocation can be large for very large swap spaces and we
* cannot count on such contigous chunk to become available
* in the heap.
*/
nsip->si_anon = (struct anon *)new_kmem_zalloc(
npages * sizeof (struct anon), KMEM_NOSLEEP);
if (!nsip->si_anon) {
kmem_free(nsip, sizeof(struct swapinfo));
return (ENOMEM);
}
nsip->si_eanon = &nsip->si_anon[npages - 1];
#ifdef RECORD_USAGE
/*
* Monitoring of swap space usage is enabled, so malloc
* a parallel array to hold the PID responsible for
* causing the anon page to be created.
*/
nsip->si_pid = (short *)
new_kmem_zalloc(npages * sizeof (short), KMEM_NOSLEEP);
if (!nsip->si_pid) {
kmem_free(nsip->si_anon, npages * sizeof (struct anon));
kmem_free(nsip, sizeof(struct swapinfo));
return (ENOMEM);
}
#endif RECORD_USAGE
npages -= skip;
/*
* ap2 now points to the first usable slot in the swap area.
* Set up free list links so that the head of the list is at
* the front of the usable portion of the array.
*/
ap = nsip->si_eanon;
ap2 = nsip->si_anon + skip;
while (--ap >= ap2)
ap->un.an_next = ap + 1;
if (npages == 0) /* if size was <= skip */
nsip->si_free = NULL;
else
nsip->si_free = ap + 1;
anoninfo.ani_free += npages;
anoninfo.ani_max += npages;
*sipp = nsip;
if (silast == NULL) /* first swap device */
silast = nsip;
return (0);
}
/*
* Initialize a swap vnode.
*/
int
swap_init(vp)
struct vnode *vp;
{
struct vattr vattr;
u_int skip;
int err;
err = VOP_GETATTR(vp, &vattr, u.u_cred); /* XXX - u.u_cred? */
if (err) {
printf("swap_init: getattr failed, errno %d\n", err);
return (err);
}
/*
* To prevent swap I/O requests from crossing the boundary
* between swap areas, we erect a "fence" between areas by
* not allowing the first page of each swap area to be used.
* (This also prevents us from scribbling on the disk label
* if the swap partition is the first partition on the disk.)
* This may not be strictly necessary, since swap_blksize also
* prevents requests from crossing the boundary.
*
* If swapping on the root filesystem, don't put swap blocks that
* correspond to the miniroot filesystem on the swap free list.
*/
if (rootvp == vp)
skip = btoc(roundup(dbtob(MINIROOTSIZE), klustsize));
else
skip = 1;
err = swapinfo_init(vp, (u_int)btop(vattr.va_size), skip);
if (!err)
vp->v_flag |= VISSWAP;
return (err);
}
/*
* This routine is used to fake npages worth of swap space.
* These pages will have no backing and cannot be paged out any where.
*/
swap_cons(npages)
u_int npages;
{
if (swapinfo_init((struct vnode *)NULL, npages, 0) != 0)
panic("swap_cons");
}
/*
* Points to the location (close to) the last block handed to
* swap_free. The theory is that if you free one in this area,
* you'll probably free more, so use the hint as a starting point.
* hint is reset on each free to the block that preceeds the one
* freed (or the block freed, if we can't find the block before it).
* It is also reset if it points at block that is allocated.
*
* XXX - swap_free and swap_alloc both manipulate hint; the free
* lists are now protected with splswap(). Don't call into these routines
* from higher level interrupts!
*/
static struct {
struct anon *ap; /* pointer to the last freed */
struct swapinfo *sip; /* swap list for which hint is valid */
} hint;
int swap_hit; /* hint helped */
int swap_miss; /* hint was no good */
/*
* Allocate a single page from the virtual swap device.
*/
struct anon *
swap_alloc()
{
struct swapinfo *sip = silast;
struct anon *ap;
do {
ap = sip->si_free;
if (ap) {
/*
* can't condition this on swap_order since some
* idiot might turn it on and off. It's not cool
* to have the hint point at an allocated block.
*/
if (hint.sip == sip && hint.ap == ap)
hint.sip = NULL;
sip->si_free = ap->un.an_next;
if (++sip->si_allocs >= swap_maxcontig) {
sip->si_allocs = 0;
if (sip == silast) {
silast = sip->si_next;
if (silast == NULL)
silast = swapinfo;
}
} else {
silast = sip;
}
# ifdef TRACE
{
struct vnode *vp;
u_int off;
swap_xlate(ap, &vp, &off);
trace3(TR_MP_SWAP, vp, off, ap);
}
# endif TRACE
#ifdef RECORD_USAGE
if (u.u_procp) {
/* swap monitoring is on - record the current PID */
sip->si_pid[ap - sip->si_anon] = u.u_procp->p_pid;
}
#endif RECORD_USAGE
return (ap);
}
/*
* No more free anon slots here.
*/
sip->si_allocs = 0;
sip = sip->si_next;
if (sip == NULL)
sip = swapinfo;
} while (sip != silast);
return ((struct anon *)NULL);
}
/*
* Free a swap page.
* List is maintained in sorted order. Worst case is a linear search on the
* list; we maintain a hint to mitigate this.
*
* Pointing the hint at the most recently free'd anon struct makes it
* really fast to free anon pages in ascending order.
*
* Pointing the hint at the anon struct that is just *before* this makes
* it really fast to free anon pages in descending order, at nearly zero
* cost.
*
* This alogrithm points the hint at the anon struct that points to
* the one most recently free'd. When freeing a block of anon structs
* presented in ascending order, the hint advances one block behind
* the blocks as they are free'd. When freeing a block of anon structs
* precented in descending order -- which happens if a large hunk of
* memory is allocated in reverse order then free'd in forward order,
* common enough to be a problem -- the hint remains pointing at the
* anon struct that ends up pointing at each of the free'd blocks
* in order. This is worth an example.
*
* Assume anons #2 and #9 are free, the hint points to anon #2, and
* #2's "next" pointer goes to #9. Now, we present a set of swap_free
* requests for blocks #8 through #3, in descending order. This results
* in a series of hits on the hint, which just keeps pointing at #2.
* The previous algorithm would have set the hint to each block as
* it came in, resulting in worst-case behavior as the list had to
* be scanned from the front.
*/
void
swap_free(ap)
struct anon *ap;
{
register struct swapinfo *sip = silast;
register struct anon *tap, **tapp;
register struct anon *tap_hint;
/*
* Find the swap area containing ap and then put
* ap at the head of that area's free list.
*/
do {
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
/*
ap->un.an_next = sip->si_free;
sip->si_free = ap;
*/
/*
* old unordered way
*/
if (!swap_order) {
ap->un.an_next = sip->si_free;
sip->si_free = ap;
#ifdef RECORD_USAGE
/* Swap monitoring is on - undo the PID */
sip->si_pid[ap - sip->si_anon] = 0;
#endif RECORD_USAGE
return;
}
/*
* Do it in order; use hint if possible
*/
tap = hint.ap;
if (hint.sip == sip && tap < ap) {
/*
* The anon we are freeing
* follows the hint tap somewhere.
* save the hint and advance
* to the next free anon.
*/
tapp = &tap->un.an_next;
tap_hint = tap;
tap = tap->un.an_next;
swap_hit++;
} else {
/*
* Wrong swapinfo, or
* the anon being free'd
* preceeds the hint.
* must start scanning
* from the front of the
* list. The best hint we
* can seed with is the
* anon we are freeing.
*/
tapp = &sip->si_free;
tap = sip->si_free;
tap_hint = ap;
swap_miss++;
}
/*
* advance tap until it is greater
* than the incoming anon.
*/
while (tap && tap < ap) {
tapp = &tap->un.an_next;
tap_hint = tap;
tap = tap->un.an_next;
}
*tapp = ap;
ap->un.an_next = tap;
#ifdef RECORD_USAGE
/* Swap monitoring is on - undo the PID */
sip->si_pid[ap - sip->si_anon] = 0;
#endif RECORD_USAGE
hint.sip = sip;
hint.ap = tap_hint;
return;
}
sip = sip->si_next;
if (sip == NULL)
sip = swapinfo;
} while (sip != silast);
panic("swap_free");
/* NOTREACHED */
}
/*
* Return the <vnode, offset> pair
* corresponding to the given anon struct.
*/
void
swap_xlate(ap, vpp, offsetp)
struct anon *ap;
struct vnode **vpp;
u_int *offsetp;
{
register struct swapinfo *sip = silast;
do {
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
*offsetp = ptob(ap - sip->si_anon);
*vpp = sip->si_vp;
return;
}
sip = sip->si_next;
if (sip == NULL)
sip = swapinfo;
} while (sip != silast);
panic("swap_xlate");
/* NOTREACHED */
}
/*
* Like swap_xlate, but return a status instead of panic'ing.
* Used by dump routines when we know we may be corrupted.
*/
swap_xlate_nopanic(ap, vpp, offsetp)
struct anon *ap;
struct vnode **vpp;
u_int *offsetp;
{
register struct swapinfo *sip = swapinfo;
do {
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
*offsetp = (ap - sip->si_anon) << PAGESHIFT;
*vpp = sip->si_vp;
return (1);
}
} while (sip = sip->si_next);
/* Couldn't find it; return failure */
return (0);
}
/*
* Return the anon struct corresponding for the given
* <vnode, offset> if it is part of the virtual swap device.
*/
struct anon *
swap_anon(vp, offset)
struct vnode *vp;
u_int offset;
{
register struct swapinfo *sip = silast;
if (vp && sip) {
do {
if (vp == sip->si_vp && offset < sip->si_size)
return (sip->si_anon + (offset >> PAGESHIFT));
sip = sip->si_next;
if (sip == NULL)
sip = swapinfo;
} while (sip != silast);
}
/*
* Note - we don't return the anon structure for
* fake'd anon slots which have no real vp.
*/
return ((struct anon *)NULL);
}
/*
* swread and swwrite implement the /dev/drum device, an indirect,
* user visible, device to allow reading of the (virtual) swap device.
*/
/*ARGSUSED*/
swread(dev, uio)
dev_t dev;
struct uio *uio;
{
return (sw_rdwr(uio, UIO_READ));
}
/*ARGSUSED*/
swwrite(dev, uio)
dev_t dev;
struct uio *uio;
{
return (sw_rdwr(uio, UIO_WRITE));
}
/*
* Handle all the work of reading "fake" swap pages that are in memory.
*/
static int
fake_sw_rdwr(uio, rw, cred)
register struct uio *uio;
enum uio_rw rw;
struct ucred *cred;
{
struct page *pp;
struct vnode *memvp;
int nbytes;
u_int off;
int err;
extern int mem_no;
nbytes = uio->uio_resid;
off = uio->uio_offset;
memvp = makespecvp(makedev(mem_no, M_MEM), VCHR);
do {
/*
* Find the page corresponding to the "fake" name
* and then read the corresponding page from /dev/mem.
*/
pp = page_find((struct vnode *)NULL, (u_int)(off & PAGEMASK));
if (pp == NULL) {
err = EIO;
break;
}
uio->uio_offset = ptob(page_pptonum(pp)) + (off & PAGEOFFSET);
if ((off & PAGEOFFSET) == 0)
uio->uio_resid = MIN(PAGESIZE, nbytes);
else
uio->uio_resid = min(ptob(btopr(off)) - off,
(u_int)nbytes);
nbytes -= uio->uio_resid;
off += uio->uio_resid;
err = VOP_RDWR(memvp, uio, rw, 0, cred);
} while (err == 0 && nbytes > 0 && uio->uio_resid == 0);
VN_RELE(memvp);
return (err);
}
/*
* Common routine used to break up reads and writes to the
* (virtual) swap device to the underlying vnode(s). This is
* used to implement the user visable /dev/drum interface.
*/
static int
sw_rdwr(uio, rw)
register struct uio *uio;
enum uio_rw rw;
{
register struct swapinfo *sip = swapinfo;
int nbytes = uio->uio_resid;
u_int off = 0;
int err = 0;
do {
if (uio->uio_offset >= off &&
uio->uio_offset < off + sip->si_size)
break;
off += sip->si_size;
} while (sip = sip->si_next);
if (sip) {
uio->uio_offset -= off;
do {
uio->uio_resid = MIN(sip->si_size - uio->uio_offset,
nbytes);
nbytes -= uio->uio_resid;
if (sip->si_vp)
err = VOP_RDWR(sip->si_vp, uio, rw, 0,
u.u_cred);
else
err = fake_sw_rdwr(uio, rw, u.u_cred);
uio->uio_offset = 0;
} while (err == 0 && nbytes > 0 && uio->uio_resid == 0 &&
(sip = sip->si_next));
uio->uio_resid = nbytes + uio->uio_resid;
}
return (err);
}
/*
* System call swapon(name) enables swapping on device name,
* Return EBUSY if already swapping on this device.
*/
swapon()
{
register struct a {
char *name;
} *uap = (struct a *)u.u_ap;
struct vnode *vp;
if (!suser())
return;
uap = (struct a *)u.u_ap;
if (u.u_error = lookupname(uap->name, UIOSEG_USER, FOLLOW_LINK,
(struct vnode **)NULL, &vp))
return;
switch (vp->v_type) {
case VBLK: {
struct vnode *nvp;
nvp = bdevvp(vp->v_rdev);
VN_RELE(vp);
vp = nvp;
/*
* Call the partition's open routine, to give it a chance to
* check itself for consistency (e.g., for scrambled disk
* labels). (The open isn't otherwise required.)
*/
if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
goto out;
break;
}
case VREG:
if (vp->v_vfsp->vfs_flag & VFS_RDONLY) {
u.u_error = EROFS;
goto out;
}
if (u.u_error = VOP_ACCESS(vp, VREAD|VWRITE, u.u_cred))
goto out;
if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
goto out;
break;
case VDIR:
u.u_error = EISDIR;
goto out;
case VCHR:
case VSOCK:
default:
u.u_error = EOPNOTSUPP;
goto out;
}
u.u_error = swap_init(vp);
out:
if (u.u_error) {
VN_RELE(vp);
}
}

28
sys/vm/vpage.h Normal file
View File

@@ -0,0 +1,28 @@
/* @(#)vpage.h 1.1 94/10/31 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
#ifndef _vm_vpage_h
#define _vm_vpage_h
/*
* VM - Information per virtual page.
*/
struct vpage {
u_int vp_prot: 4; /* see <sys/mman.h> prot flags */
u_int vp_advice: 3; /* see <sys/mman.h> madvise flags */
u_int vp_pplock: 1; /* physical page locked by me */
/*
* The following two are for use with a
* local page replacement algorithm (someday).
*/
u_int vp_ref: 1; /* reference bit */
u_int vp_mod: 1; /* (maybe) modify bit, from hat */
u_int vp_ski_ref: 1; /* ski reference bit */
u_int vp_ski_mod: 1; /* ski modified bit */
u_int : 4;
};
#endif /*!_vm_vpage_h*/