Init
This commit is contained in:
13
sys/vm/Makefile
Normal file
13
sys/vm/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
#
|
||||
# @(#)Makefile 1.1 94/10/31 SMI
|
||||
#
|
||||
|
||||
HFILES = anon.h as.h faultcode.h hat.h mp.h page.h pvn.h rm.h \
|
||||
seg.h seg_dev.h seg_map.h seg_vn.h seg_u.h swap.h vpage.h
|
||||
|
||||
HDIR=$(DESTDIR)/usr/include/vm
|
||||
|
||||
install_h: $(HFILES) FRC
|
||||
install -d -m 755 $(HDIR)
|
||||
install -m 444 $(HFILES) $(HDIR)
|
||||
FRC:
|
||||
60
sys/vm/anon.h
Normal file
60
sys/vm/anon.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/* @(#)anon.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_anon_h
|
||||
#define _vm_anon_h
|
||||
|
||||
/*
|
||||
* VM - Anonymous pages.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Each page which is anonymous, either in memory or in swap,
|
||||
* has an anon structure. The structure's primary purpose is
|
||||
* to hold a reference count so that we can detect when the last
|
||||
* copy of a multiply-referenced copy-on-write page goes away.
|
||||
* When on the free list, un.next gives the next anon structure
|
||||
* in the list. Otherwise, un.page is a ``hint'' which probably
|
||||
* points to the current page. This must be explicitly checked
|
||||
* since the page can be moved underneath us. This is simply
|
||||
* an optimization to avoid having to look up each page when
|
||||
* doing things like fork.
|
||||
*/
|
||||
struct anon {
|
||||
int an_refcnt;
|
||||
union {
|
||||
struct page *an_page; /* ``hint'' to the real page */
|
||||
struct anon *an_next; /* free list pointer */
|
||||
} un;
|
||||
};
|
||||
|
||||
struct anoninfo {
|
||||
u_int ani_max; /* maximum anon pages available */
|
||||
u_int ani_free; /* number of anon pages currently free */
|
||||
u_int ani_resv; /* number of anon pages reserved */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* Flags for anon_private.
|
||||
*/
|
||||
#define STEAL_PAGE 0x01 /* page can be stolen */
|
||||
#define LOCK_PAGE 0x02 /* page must be ``logically'' locked */
|
||||
|
||||
extern struct anoninfo anoninfo;
|
||||
|
||||
struct anon *anon_alloc();
|
||||
void anon_dup(/* old, new, size */);
|
||||
void anon_free(/* app, size */);
|
||||
int anon_getpage(/* app, protp, pl, sz, seg, addr, rw, cred */);
|
||||
struct page *anon_private(/* app, seg, addr, opp, oppflags */);
|
||||
struct page *anon_zero(/* seg, addr, app */);
|
||||
void anon_unloadmap(/* ap, ref, mod */);
|
||||
int anon_resv(/* size */);
|
||||
void anon_unresv(/* size */);
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_anon_h*/
|
||||
79
sys/vm/as.h
Normal file
79
sys/vm/as.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/* @(#)as.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_as_h
|
||||
#define _vm_as_h
|
||||
|
||||
#include <vm/faultcode.h>
|
||||
|
||||
/*
|
||||
* VM - Address spaces.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Each address space consists of a list of sorted segments
|
||||
* and machine dependent address translation information.
|
||||
*
|
||||
* All the hard work is in the segment drivers and the
|
||||
* hardware address translation code.
|
||||
*/
|
||||
struct as {
|
||||
u_int a_lock: 1;
|
||||
u_int a_want: 1;
|
||||
u_int a_paglck: 1; /* lock mappings into address space */
|
||||
u_int a_ski: 1; /* enables recording of page info for ski */
|
||||
u_int a_hatcallback: 1; /* enables recording of page info for ski */
|
||||
u_int : 11;
|
||||
u_short a_keepcnt; /* number of `keeps' */
|
||||
struct seg *a_segs; /* segments in this address space */
|
||||
struct seg *a_seglast; /* last segment hit on the address space */
|
||||
int a_rss; /* memory claim for this address space */
|
||||
struct hat a_hat; /* hardware address translation */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* Types of failure for several various address space operations.
|
||||
*/
|
||||
enum as_res {
|
||||
A_SUCCESS, /* operation successful */
|
||||
A_BADADDR, /* illegal address encountered */
|
||||
A_OPFAIL, /* segment operation failure */
|
||||
A_RESOURCE, /* resource exhaustion */
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags for as_hole.
|
||||
*/
|
||||
#define AH_DIR 0x1 /* direction flag mask */
|
||||
#define AH_LO 0x0 /* find lowest hole */
|
||||
#define AH_HI 0x1 /* find highest hole */
|
||||
#define AH_CONTAIN 0x2 /* hole must contain `addr' */
|
||||
|
||||
/*
|
||||
* Flags for as_hatsync
|
||||
*/
|
||||
#define AHAT_UNLOAD 0x01 /* Translation being unloaded */
|
||||
|
||||
struct seg *as_segat(/* as, addr */);
|
||||
struct as *as_alloc();
|
||||
void as_free(/* as */);
|
||||
struct as *as_dup(/* as */);
|
||||
enum as_res as_addseg(/* as, seg */);
|
||||
faultcode_t as_fault(/* as, addr, size, type, rw */);
|
||||
faultcode_t as_faulta(/* as, addr, size */);
|
||||
enum as_res as_setprot(/* as, addr, size, prot */);
|
||||
enum as_res as_checkprot(/* as, addr, size, prot */);
|
||||
enum as_res as_unmap(/* as, addr, size */);
|
||||
int as_map(/* as, addr, size, crfp, crargsp */);
|
||||
enum as_res as_hole(/* as, minlen, basep, lenp, flags, addr */);
|
||||
enum as_res as_memory(/* as, addrp, sizep */);
|
||||
u_int as_swapout(/* as */);
|
||||
enum as_res as_incore(/* as, addr, size, vecp, sizep */);
|
||||
enum as_res as_ctl(/* as, addr, size, func, arg */);
|
||||
void as_hatsync(/* as, addr, ref, mod, flags */);
|
||||
#endif KERNEL
|
||||
#endif /*!_vm_as_h*/
|
||||
30
sys/vm/dbx_vm.c
Normal file
30
sys/vm/dbx_vm.c
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef lint
|
||||
static char sccsid[] = "@(#)dbx_vm.c 1.1 94/10/31 SMI";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is optionally brought in by including a
|
||||
* "psuedo-device dbx" line in the config file. It is
|
||||
* compiled using the "-g" flag to generate structure
|
||||
* information which is used by dbx with the -k flag.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/anon.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/mp.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/pvn.h>
|
||||
#include <vm/rm.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/seg_dev.h>
|
||||
#include <vm/seg_map.h>
|
||||
#include <vm/seg_vn.h>
|
||||
#include <vm/swap.h>
|
||||
#include <vm/vpage.h>
|
||||
33
sys/vm/faultcode.h
Normal file
33
sys/vm/faultcode.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/* @(#)faultcode.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_faultcode_h
|
||||
#define _vm_faultcode_h
|
||||
|
||||
/*
|
||||
* This file describes the "code" that is delivered during
|
||||
* SIGBUS and SIGSEGV exceptions. It also describes the data
|
||||
* type returned by vm routines which handle faults.
|
||||
*
|
||||
* If FC_CODE(fc) == FC_OBJERR, then FC_ERRNO(fc) contains the errno value
|
||||
* returned by the underlying object mapped at the fault address.
|
||||
*/
|
||||
#define FC_HWERR 0x1 /* misc hardware error (e.g. bus timeout) */
|
||||
#define FC_ALIGN 0x2 /* hardware alignment error */
|
||||
#define FC_NOMAP 0x3 /* no mapping at the fault address */
|
||||
#define FC_PROT 0x4 /* access exceeded current protections */
|
||||
#define FC_OBJERR 0x5 /* underlying object returned errno value */
|
||||
|
||||
#define FC_MAKE_ERR(e) (((e) << 8) | FC_OBJERR)
|
||||
|
||||
#define FC_CODE(fc) ((fc) & 0xff)
|
||||
#define FC_ERRNO(fc) ((unsigned)(fc) >> 8)
|
||||
|
||||
#ifndef LOCORE
|
||||
typedef int faultcode_t; /* type returned by vm fault routines */
|
||||
#endif LOCORE
|
||||
|
||||
#endif /*!_vm_faultcode_h*/
|
||||
86
sys/vm/hat.h
Normal file
86
sys/vm/hat.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/* @(#)hat.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_hat_h
|
||||
#define _vm_hat_h
|
||||
|
||||
/*
|
||||
* VM - Hardware Address Translation management.
|
||||
*
|
||||
* This file describes the machine independent interfaces to
|
||||
* the hardware address translation management routines. Other
|
||||
* machine specific interfaces and structures are defined
|
||||
* in <machine/vm_hat.h>. The hat layer manages the address
|
||||
* translation hardware as a cache driven by calls from the
|
||||
* higher levels of the VM system.
|
||||
*/
|
||||
|
||||
#include <machine/vm_hat.h>
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* One time hat initialization
|
||||
*/
|
||||
void hat_init();
|
||||
|
||||
/*
|
||||
* Operations on hat resources for an address space:
|
||||
* - initialize any needed hat structures for the address space
|
||||
* - free all hat resources now owned by this address space
|
||||
*
|
||||
* N.B. - The hat structure is guaranteed to be zeroed when created.
|
||||
* The hat layer can choose to define hat_alloc as a macro to avoid
|
||||
* a subroutine call if this is sufficient initialization.
|
||||
*/
|
||||
#ifndef hat_alloc
|
||||
void hat_alloc(/* as */);
|
||||
#endif
|
||||
void hat_free(/* as */);
|
||||
|
||||
/*
|
||||
* Operations on a named address with in a segment:
|
||||
* - load/lock the given page struct
|
||||
* - load/lock the given page frame number
|
||||
* - unlock the given address
|
||||
*
|
||||
* (Perhaps we need an interface to load several pages at once?)
|
||||
*/
|
||||
void hat_memload(/* seg, addr, pp, prot, lock */);
|
||||
void hat_devload(/* seg, addr, pf, prot, lock */);
|
||||
void hat_unlock(/* seg, addr */);
|
||||
|
||||
/*
|
||||
* Operations over an address range:
|
||||
* - change protections
|
||||
* - change mapping to refer to a new segment
|
||||
* - unload mapping
|
||||
*/
|
||||
void hat_chgprot(/* seg, addr, len, prot */);
|
||||
void hat_newseg(/* seg, addr, len, nseg */);
|
||||
void hat_unload(/* seg, addr, len */);
|
||||
|
||||
/*
|
||||
* Operations that work on all active translation for a given page:
|
||||
* - unload all translations to page
|
||||
* - get hw stats from hardware into page struct and reset hw stats
|
||||
*/
|
||||
void hat_pageunload(/* pp */);
|
||||
void hat_pagesync(/* pp */);
|
||||
|
||||
/*
|
||||
* Operations that return physical page numbers (ie - used by mapin):
|
||||
* - return the pfn for kernel virtual address
|
||||
* - return the pfn for arbitrary virtual address
|
||||
*/
|
||||
u_int hat_getkpfnum(/* addr */);
|
||||
/*
|
||||
* XXX - This one is not yet implemented - not yet needed
|
||||
* u_int hat_getpfnum(as, addr);
|
||||
*/
|
||||
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_hat_h*/
|
||||
39
sys/vm/mp.h
Normal file
39
sys/vm/mp.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/* @(#)mp.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_mp_h
|
||||
#define _vm_mp_h
|
||||
|
||||
/*
|
||||
* VM - multiprocessor/ing support.
|
||||
*
|
||||
* Currently the kmon_enter() / kmon_exit() pair implements a
|
||||
* simple monitor for objects protected by the appropriate lock.
|
||||
* The kcv_wait() / kcv_broadcast pait implements a simple
|
||||
* condition variable which can be used for `sleeping'
|
||||
* and `waking' inside a monitor if some resource
|
||||
* is needed which is not available.
|
||||
*/
|
||||
|
||||
typedef struct kmon_t {
|
||||
u_int dummy;
|
||||
} kmon_t;
|
||||
|
||||
|
||||
#define lock_init(lk) (lk)->dummy = 0
|
||||
|
||||
#ifndef KMON_DEBUG
|
||||
#define kmon_enter(a)
|
||||
#define kmon_exit(a)
|
||||
#define kcv_wait(lk, cond) (void) sleep(cond, PSWP+1)
|
||||
#define kcv_broadcast(lk, cond) wakeup(cond)
|
||||
#else
|
||||
void kmon_enter(/* lk */);
|
||||
void kmon_exit(/* lk */);
|
||||
void kcv_wait(/* lk, cond */);
|
||||
void kcv_broadcast(/* lk, cond */);
|
||||
#endif /*!KMON_DEBUG*/
|
||||
#endif /*!_vm_mp_h*/
|
||||
166
sys/vm/page.h
Normal file
166
sys/vm/page.h
Normal file
@@ -0,0 +1,166 @@
|
||||
/* @(#)page.h 1.1 94/10/31 SMI */
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_page_h
|
||||
#define _vm_page_h
|
||||
/*
|
||||
* VM - Ram pages.
|
||||
*
|
||||
* Each physical page has a page structure, which is used to maintain
|
||||
* these pages as a cache. A page can be found via a hashed lookup
|
||||
* based on the [vp, offset]. If a page has an [vp, offset] identity,
|
||||
* then it is entered on a doubly linked circular list off the
|
||||
* vnode using the vpnext/vpprev pointers. If the p_free bit
|
||||
* is on, then the page is also on a doubly linked circular free
|
||||
* list using next/prev pointers. If the p_intrans bit is on,
|
||||
* then the page is currently being read in or written back.
|
||||
* In this case, the next/prev pointers are used to link the
|
||||
* pages together for a consecutive IO request. If the page
|
||||
* is in transit and the the page is coming in (pagein), then you
|
||||
* must wait for the IO to complete before you can attach to the page.
|
||||
*
|
||||
*/
|
||||
struct page {
|
||||
u_int p_lock: 1, /* locked for name manipulation */
|
||||
p_want: 1, /* page wanted */
|
||||
p_free: 1, /* on free list */
|
||||
p_intrans: 1, /* data for [vp, offset] intransit */
|
||||
p_gone: 1, /* page has been released */
|
||||
p_mod: 1, /* software copy of modified bit */
|
||||
p_ref: 1, /* software copy of reference bit */
|
||||
p_pagein: 1, /* being paged in, data not valid */
|
||||
p_nc: 1, /* do not cache page */
|
||||
p_age: 1; /* on age free list */
|
||||
u_int p_nio : 6; /* # of outstanding io reqs needed */
|
||||
u_short p_keepcnt; /* number of page `keeps' */
|
||||
struct vnode *p_vnode; /* logical vnode this page is from */
|
||||
u_int p_offset; /* offset into vnode for this page */
|
||||
struct page *p_hash; /* hash by [vnode, offset] */
|
||||
struct page *p_next; /* next page in free/intrans lists */
|
||||
struct page *p_prev; /* prev page in free/intrans lists */
|
||||
struct page *p_vpnext; /* next page in vnode list */
|
||||
struct page *p_vpprev; /* prev page in vnode list */
|
||||
caddr_t p_mapping; /* hat specific translation info */
|
||||
u_short p_lckcnt; /* number of locks on page data */
|
||||
u_short p_pad; /* steal bits from here */
|
||||
};
|
||||
|
||||
/*
|
||||
* Each segment of physical memory is described by a memseg struct. Within
|
||||
* a segment, memory is considered contiguous. The segments from a linked
|
||||
* list to describe all of physical memory. The list is ordered by increasing
|
||||
* physical addresses.
|
||||
*/
|
||||
struct memseg {
|
||||
struct page *pages, *epages; /* [from, to) in page array */
|
||||
u_int pages_base, pages_end; /* [from, to) in page numbers */
|
||||
struct memseg *next; /* next segment in list */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
#define PAGE_HOLD(pp) (pp)->p_keepcnt++
|
||||
#define PAGE_RELE(pp) page_rele(pp)
|
||||
|
||||
#define PAGE_HASHSZ page_hashsz
|
||||
|
||||
extern int page_hashsz;
|
||||
extern struct page **page_hash;
|
||||
|
||||
extern struct page *pages; /* array of all page structures */
|
||||
extern struct page *epages; /* end of all pages */
|
||||
extern struct memseg *memsegs; /* list of memory segments */
|
||||
|
||||
/*
|
||||
* Variables controlling locking of physical memory.
|
||||
*/
|
||||
extern u_int pages_pp_locked; /* physical pages actually locked */
|
||||
extern u_int pages_pp_claimed; /* physical pages reserved */
|
||||
extern u_int pages_pp_maximum; /* tuning: lock + claim <= max */
|
||||
|
||||
/*
|
||||
* Page frame operations.
|
||||
*/
|
||||
void page_init(/* pp, num, base */);
|
||||
void page_reclaim(/* pp */);
|
||||
struct page *page_find(/* vp, off */);
|
||||
struct page *page_exists(/* vp, off */);
|
||||
struct page *page_lookup(/* vp, off */);
|
||||
int page_enter(/* pp, vp, off */);
|
||||
void page_abort(/* pp */);
|
||||
void page_free(/* pp */);
|
||||
void page_unfree(/* pp */);
|
||||
struct page *page_get();
|
||||
void page_rele(/* pp */);
|
||||
void page_lock(/* pp */);
|
||||
void page_unlock(/* pp */);
|
||||
int page_pp_lock(/* pp, claim, check_resv */);
|
||||
void page_pp_unlock(/* pp, claim */);
|
||||
int page_addclaim(/* claim */);
|
||||
void page_subclaim(/* claim */);
|
||||
void page_hashout(/* pp */);
|
||||
void page_add(/* ppp, pp */);
|
||||
void page_sub(/* ppp, pp */);
|
||||
void page_sortadd(/* ppp, pp */);
|
||||
void page_wait(/* pp */);
|
||||
u_int page_pptonum(/* pp */);
|
||||
struct page *page_numtopp(/* pfnum */);
|
||||
struct page *page_numtookpp(/* pfnum */);
|
||||
#endif KERNEL
|
||||
|
||||
/*
|
||||
* Page hash table is a power-of-two in size, externally chained
|
||||
* through the hash field. PAGE_HASHAVELEN is the average length
|
||||
* desired for this chain, from which the size of the page_hash
|
||||
* table is derived at boot time and stored in the kernel variable
|
||||
* page_hashsz. In the hash function it is given by PAGE_HASHSZ.
|
||||
* PAGE_HASHVPSHIFT is defined so that 1 << PAGE_HASHVPSHIFT is
|
||||
* the approximate size of a vnode struct.
|
||||
*/
|
||||
#define PAGE_HASHAVELEN 4
|
||||
#define PAGE_HASHVPSHIFT 6
|
||||
#define PAGE_HASHFUNC(vp, off) \
|
||||
((((off) >> PAGESHIFT) + ((int)(vp) >> PAGE_HASHVPSHIFT)) & \
|
||||
(PAGE_HASHSZ - 1))
|
||||
|
||||
/*
|
||||
* Macros for setting reference and modify bit values. These exist as macros
|
||||
* so that tracing code has the opportunity to note the new values.
|
||||
*/
|
||||
#ifdef TRACE
|
||||
|
||||
#ifdef lint
|
||||
#define pg_setref(pp, val) \
|
||||
if (pp) { \
|
||||
trace2(TR_PG_SETREF, (pp), (val)); \
|
||||
(pp)->p_ref = (val); \
|
||||
} else
|
||||
#define pg_setmod(pp, val) \
|
||||
if (pp) { \
|
||||
trace2(TR_PG_SETMOD, (pp), (val)); \
|
||||
(pp)->p_mod = (val); \
|
||||
} else
|
||||
#else lint
|
||||
#define pg_setref(pp, val) \
|
||||
if (1) { \
|
||||
trace2(TR_PG_SETREF, (pp), (val)); \
|
||||
(pp)->p_ref = (val); \
|
||||
} else
|
||||
#define pg_setmod(pp, val) \
|
||||
if (1) { \
|
||||
trace2(TR_PG_SETMOD, (pp), (val)); \
|
||||
(pp)->p_mod = (val); \
|
||||
} else
|
||||
#endif lint
|
||||
|
||||
#else TRACE
|
||||
|
||||
#define pg_setref(pp, val) (pp)->p_ref = (val)
|
||||
#define pg_setmod(pp, val) (pp)->p_mod = (val)
|
||||
|
||||
#endif TRACE
|
||||
|
||||
#endif /*!_vm_page_h*/
|
||||
50
sys/vm/pvn.h
Normal file
50
sys/vm/pvn.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/* @(#)pvn.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_pvn_h
|
||||
#define _vm_pvn_h
|
||||
|
||||
/*
|
||||
* VM - paged vnode.
|
||||
*
|
||||
* The VM system manages memory as a cache of paged vnodes.
|
||||
* This file desribes the interfaces to common subroutines
|
||||
* used to help implement the VM/file system routines.
|
||||
*/
|
||||
|
||||
struct page *pvn_kluster(/* vp, off, seg, addr, offp, lenp, vp_off,
|
||||
vp_len, isra */);
|
||||
void pvn_fail(/* plist, flags */);
|
||||
void pvn_done(/* bp */);
|
||||
struct page *pvn_vplist_dirty(/* vp, off, flags */);
|
||||
struct page *pvn_range_dirty(/* vp, off, eoff, offlo, offhi, flags */);
|
||||
void pvn_vptrunc(/* vp, vplen, zbytes */);
|
||||
void pvn_unloadmap(/* vp, offset, ref, mod */);
|
||||
int pvn_getpages(/* getapage, vp, off, len, protp, pl, plsz, seg, addr,
|
||||
rw, cred */);
|
||||
|
||||
/*
|
||||
* When requesting pages from the getpage routines, pvn_getpages will
|
||||
* allocate space to return PVN_GETPAGE_NUM pages which map PVN_GETPAGE_SZ
|
||||
* worth of bytes. These numbers are chosen to be the minimum of the max's
|
||||
* given in terms of bytes and pages.
|
||||
*/
|
||||
#define PVN_MAX_GETPAGE_SZ 0x10000 /* getpage size limit */
|
||||
#define PVN_MAX_GETPAGE_NUM 0x8 /* getpage page limit */
|
||||
|
||||
#if PVN_MAX_GETPAGE_SZ > PVN_MAX_GETPAGE_NUM * PAGESIZE
|
||||
|
||||
#define PVN_GETPAGE_SZ ptob(PVN_MAX_GETPAGE_NUM)
|
||||
#define PVN_GETPAGE_NUM PVN_MAX_GETPAGE_NUM
|
||||
|
||||
#else
|
||||
|
||||
#define PVN_GETPAGE_SZ PVN_MAX_GETPAGE_SZ
|
||||
#define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /*!_vm_pvn_h*/
|
||||
19
sys/vm/rm.h
Normal file
19
sys/vm/rm.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/* @(#)rm.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_rm_h
|
||||
#define _vm_rm_h
|
||||
|
||||
/*
|
||||
* VM - Resource Management.
|
||||
*/
|
||||
|
||||
struct page *rm_allocpage(/* seg, addr */);
|
||||
void rm_outofanon();
|
||||
void rm_outofhat();
|
||||
int rm_asrss(/* as */);
|
||||
|
||||
#endif /*!_vm_rm_h*/
|
||||
93
sys/vm/seg.h
Normal file
93
sys/vm/seg.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/* @(#)seg.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_seg_h
|
||||
#define _vm_seg_h
|
||||
#include <vm/faultcode.h>
|
||||
#include <vm/mp.h>
|
||||
|
||||
/*
|
||||
* VM - Segments.
|
||||
*/
|
||||
|
||||
/*
|
||||
* An address space contains a set of segments, managed by drivers.
|
||||
* Drivers support mapped devices, sharing, copy-on-write, etc.
|
||||
*
|
||||
* The seg structure contains a lock to prevent races, the base virtual
|
||||
* address and size of the segment, a back pointer to the containing
|
||||
* address space, pointers to maintain a circularly doubly linked list
|
||||
* of segments in the same address space, and procedure and data hooks
|
||||
* for the driver. The seg list on the address space is sorted by
|
||||
* ascending base addresses and overlapping segments are not allowed.
|
||||
*
|
||||
* After a segment is created, faults may occur on pages of the segment.
|
||||
* When a fault occurs, the fault handling code must get the desired
|
||||
* object and set up the hardware translation to the object. For some
|
||||
* objects, the fault handling code also implements copy-on-write.
|
||||
*
|
||||
* When the hat wants to unload a translation, it can call the unload
|
||||
* routine which is responsible for processing reference and modify bits.
|
||||
*/
|
||||
struct seg {
|
||||
kmon_t s_lock;
|
||||
addr_t s_base; /* base virtual address */
|
||||
u_int s_size; /* size in bytes */
|
||||
struct as *s_as; /* containing address space */
|
||||
struct seg *s_next; /* next seg in this address space */
|
||||
struct seg *s_prev; /* prev seg in this address space */
|
||||
struct seg_ops {
|
||||
int (*dup)(/* seg, newsegp */);
|
||||
int (*unmap)(/* seg, addr, len */);
|
||||
int (*free)(/* seg */);
|
||||
faultcode_t (*fault)(/* seg, addr, len, type, rw */);
|
||||
faultcode_t (*faulta)(/* seg, addr */);
|
||||
int (*hatsync)(/* seg, addr, ref, mod, flags */);
|
||||
int (*setprot)(/* seg, addr, size, prot */);
|
||||
int (*checkprot)(/* seg, addr, size, prot */);
|
||||
int (*kluster)(/* seg, addr, delta */);
|
||||
u_int (*swapout)(/* seg */);
|
||||
int (*sync)(/* seg, addr, size, flags */);
|
||||
int (*incore)(/* seg, addr, size, vec */);
|
||||
int (*lockop)(/* seg, addr, size, op */);
|
||||
int (*advise)(/* seg, addr, size, behav */);
|
||||
} *s_ops;
|
||||
caddr_t s_data; /* private data for instance */
|
||||
};
|
||||
|
||||
/*
|
||||
* Fault information passed to the seg fault handling routine.
|
||||
* The F_SOFTLOCK and F_SOFTUNLOCK are used by software
|
||||
* to lock and unlock pages for physical I/O.
|
||||
*/
|
||||
enum fault_type {
|
||||
F_INVAL, /* invalid page */
|
||||
F_PROT, /* protection fault */
|
||||
F_SOFTLOCK, /* software requested locking */
|
||||
F_SOFTUNLOCK, /* software requested unlocking */
|
||||
};
|
||||
|
||||
/*
|
||||
* seg_rw gives the access type for a fault operation
|
||||
*/
|
||||
enum seg_rw {
|
||||
S_OTHER, /* unknown or not touched */
|
||||
S_READ, /* read access attempted */
|
||||
S_WRITE, /* write access attempted */
|
||||
S_EXEC, /* execution access attempted */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* Generic segment operations
|
||||
*/
|
||||
struct seg *seg_alloc(/* as, base, size */);
|
||||
int seg_attach(/* as, base, size, seg */);
|
||||
void seg_free(/* seg */);
|
||||
u_int seg_page(/* seg, addr */);
|
||||
u_int seg_pages(/* seg */);
|
||||
#endif KERNEL
|
||||
#endif /*!_vm_seg_h*/
|
||||
476
sys/vm/seg_dev.c
Normal file
476
sys/vm/seg_dev.c
Normal file
@@ -0,0 +1,476 @@
|
||||
/* @(#)seg_dev.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - segment of a mapped device.
|
||||
*
|
||||
* This segment driver is used when mapping character special devices.
|
||||
*/
|
||||
|
||||
#include <machine/pte.h>
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/seg_dev.h>
|
||||
#include <vm/pvn.h>
|
||||
#include <vm/vpage.h>
|
||||
|
||||
#define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
|
||||
|
||||
/*
|
||||
* Private seg op routines.
|
||||
*/
|
||||
static int segdev_dup(/* seg, newsegp */);
|
||||
static int segdev_unmap(/* seg, addr, len */);
|
||||
static int segdev_free(/* seg */);
|
||||
static faultcode_t segdev_fault(/* seg, addr, len, type, rw */);
|
||||
static faultcode_t segdev_faulta(/* seg, addr */);
|
||||
static int segdev_hatsync(/* seg, addr, ref, mod, flags */);
|
||||
static int segdev_setprot(/* seg, addr, size, len */);
|
||||
static int segdev_checkprot(/* seg, addr, size, len */);
|
||||
static int segdev_badop();
|
||||
static int segdev_incore(/* seg, addr, size, vec */);
|
||||
static int segdev_ctlops(/* seg, addr, size, [flags] */);
|
||||
|
||||
struct seg_ops segdev_ops = {
|
||||
segdev_dup,
|
||||
segdev_unmap,
|
||||
segdev_free,
|
||||
segdev_fault,
|
||||
segdev_faulta,
|
||||
segdev_hatsync,
|
||||
segdev_setprot,
|
||||
segdev_checkprot,
|
||||
segdev_badop, /* kluster */
|
||||
(u_int (*)()) NULL, /* swapout */
|
||||
segdev_ctlops, /* sync */
|
||||
segdev_incore,
|
||||
segdev_ctlops, /* lockop */
|
||||
segdev_ctlops, /* advise */
|
||||
};
|
||||
|
||||
/*
|
||||
* Create a device segment.
|
||||
*/
|
||||
int
|
||||
segdev_create(seg, argsp)
|
||||
struct seg *seg;
|
||||
caddr_t argsp;
|
||||
{
|
||||
register struct segdev_data *sdp;
|
||||
register struct segdev_crargs *a = (struct segdev_crargs *)argsp;
|
||||
|
||||
sdp = (struct segdev_data *)
|
||||
new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
|
||||
sdp->mapfunc = a->mapfunc;
|
||||
sdp->dev = a->dev;
|
||||
sdp->offset = a->offset;
|
||||
sdp->prot = a->prot;
|
||||
sdp->maxprot = a->maxprot;
|
||||
sdp->pageprot = 0;
|
||||
sdp->vpage = NULL;
|
||||
|
||||
seg->s_ops = &segdev_ops;
|
||||
seg->s_data = (char *)sdp;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Duplicate seg and return new segment in newsegp.
|
||||
*/
|
||||
static int
|
||||
segdev_dup(seg, newseg)
|
||||
struct seg *seg, *newseg;
|
||||
{
|
||||
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register struct segdev_data *newsdp;
|
||||
struct segdev_crargs a;
|
||||
|
||||
a.mapfunc = sdp->mapfunc;
|
||||
a.dev = sdp->dev;
|
||||
a.offset = sdp->offset;
|
||||
a.prot = sdp->prot;
|
||||
a.maxprot = sdp->maxprot;
|
||||
|
||||
(void) segdev_create(newseg, (caddr_t)&a);
|
||||
newsdp = (struct segdev_data *)newseg->s_data;
|
||||
newsdp->pageprot = sdp->pageprot;
|
||||
if (sdp->vpage != NULL) {
|
||||
register u_int nbytes = vpgtob(seg_pages(seg));
|
||||
newsdp->vpage = (struct vpage *)
|
||||
new_kmem_alloc(nbytes, KMEM_SLEEP);
|
||||
bcopy((caddr_t)sdp->vpage, (caddr_t)newsdp->vpage, nbytes);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Split a segment at addr for length len.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
segdev_unmap(seg, addr, len)
|
||||
register struct seg *seg;
|
||||
register addr_t addr;
|
||||
u_int len;
|
||||
{
|
||||
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register struct segdev_data *nsdp;
|
||||
register struct seg *nseg;
|
||||
register u_int npages, spages, tpages;
|
||||
addr_t nbase;
|
||||
u_int nsize, hpages;
|
||||
|
||||
/*
|
||||
* Check for bad sizes
|
||||
*/
|
||||
if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
|
||||
(len & PAGEOFFSET) || ((u_int)addr & PAGEOFFSET))
|
||||
panic("segdev_unmap");
|
||||
|
||||
/*
|
||||
* Unload any hardware translations in the range to be taken out.
|
||||
*/
|
||||
hat_unload(seg, addr, len);
|
||||
|
||||
/*
|
||||
* Check for entire segment
|
||||
*/
|
||||
if (addr == seg->s_base && len == seg->s_size) {
|
||||
seg_free(seg);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for beginning of segment
|
||||
*/
|
||||
spages = seg_pages(seg);
|
||||
npages = btop(len);
|
||||
if (addr == seg->s_base) {
|
||||
if (sdp->vpage != NULL) {
|
||||
sdp->vpage = (struct vpage *)new_kmem_resize(
|
||||
(caddr_t)sdp->vpage, vpgtob(npages),
|
||||
vpgtob(spages - npages), vpgtob(spages),
|
||||
KMEM_SLEEP);
|
||||
}
|
||||
sdp->offset += len;
|
||||
|
||||
seg->s_base += len;
|
||||
seg->s_size -= len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for end of segment
|
||||
*/
|
||||
if (addr + len == seg->s_base + seg->s_size) {
|
||||
tpages = spages - npages;
|
||||
if (sdp->vpage != NULL)
|
||||
sdp->vpage = (struct vpage *)
|
||||
new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
|
||||
vpgtob(tpages), vpgtob(spages), KMEM_SLEEP);
|
||||
seg->s_size -= len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The section to go is in the middle of the segment,
|
||||
* have to make it into two segments. nseg is made for
|
||||
* the high end while seg is cut down at the low end.
|
||||
*/
|
||||
nbase = addr + len; /* new seg base */
|
||||
nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */
|
||||
seg->s_size = addr - seg->s_base; /* shrink old seg */
|
||||
nseg = seg_alloc(seg->s_as, nbase, nsize);
|
||||
if (nseg == NULL)
|
||||
panic("segdev_unmap seg_alloc");
|
||||
|
||||
nseg->s_ops = seg->s_ops;
|
||||
nsdp = (struct segdev_data *)
|
||||
new_kmem_alloc(sizeof (struct segdev_data), KMEM_SLEEP);
|
||||
nseg->s_data = (char *)nsdp;
|
||||
nsdp->pageprot = sdp->pageprot;
|
||||
nsdp->prot = sdp->prot;
|
||||
nsdp->maxprot = sdp->maxprot;
|
||||
nsdp->mapfunc = sdp->mapfunc;
|
||||
nsdp->offset = sdp->offset + nseg->s_base - seg->s_base;
|
||||
|
||||
if (sdp->vpage == NULL)
|
||||
nsdp->vpage = NULL;
|
||||
else {
|
||||
tpages = btop(nseg->s_base - seg->s_base);
|
||||
hpages = btop(addr - seg->s_base);
|
||||
|
||||
nsdp->vpage = (struct vpage *)
|
||||
new_kmem_alloc(vpgtob(spages - tpages), KMEM_SLEEP);
|
||||
bcopy((caddr_t)&sdp->vpage[tpages], (caddr_t)nsdp->vpage,
|
||||
vpgtob(spages - tpages));
|
||||
sdp->vpage = (struct vpage *)
|
||||
new_kmem_resize((caddr_t)sdp->vpage, (u_int)0,
|
||||
vpgtob(hpages), vpgtob(spages), KMEM_SLEEP);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we do something so that all the translations which used
|
||||
* to be associated with seg but are now associated with nseg.
|
||||
*/
|
||||
hat_newseg(seg, nseg->s_base, nseg->s_size, nseg);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a segment.
|
||||
*/
|
||||
static
|
||||
segdev_free(seg)
|
||||
struct seg *seg;
|
||||
{
|
||||
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register u_int nbytes = vpgtob(seg_pages(seg));
|
||||
|
||||
if (sdp->vpage != NULL)
|
||||
kmem_free((caddr_t)sdp->vpage, nbytes);
|
||||
kmem_free((caddr_t)sdp, sizeof (*sdp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a fault on a device segment.
|
||||
*/
|
||||
static faultcode_t
|
||||
segdev_fault(seg, addr, len, type, rw)
|
||||
register struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len;
|
||||
enum fault_type type;
|
||||
enum seg_rw rw;
|
||||
{
|
||||
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register addr_t adr;
|
||||
register u_int prot, protchk;
|
||||
int pf;
|
||||
struct vpage *vpage;
|
||||
|
||||
if (type == F_PROT) {
|
||||
/*
|
||||
* Since the seg_dev driver does not implement copy-on-write,
|
||||
* this means that a valid translation is already loaded,
|
||||
* but we got an fault trying to access the device.
|
||||
* Return an error here to prevent going in an endless
|
||||
* loop reloading the same translation...
|
||||
*/
|
||||
return (FC_PROT);
|
||||
}
|
||||
|
||||
if (type != F_SOFTUNLOCK) {
|
||||
if (sdp->pageprot == 0) {
|
||||
switch (rw) {
|
||||
case S_READ:
|
||||
protchk = PROT_READ;
|
||||
break;
|
||||
case S_WRITE:
|
||||
protchk = PROT_WRITE;
|
||||
break;
|
||||
case S_EXEC:
|
||||
protchk = PROT_EXEC;
|
||||
break;
|
||||
case S_OTHER:
|
||||
default:
|
||||
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
break;
|
||||
}
|
||||
prot = sdp->prot;
|
||||
if ((prot & protchk) == 0)
|
||||
return (FC_PROT);
|
||||
vpage = NULL;
|
||||
} else {
|
||||
vpage = &sdp->vpage[seg_page(seg, addr)];
|
||||
}
|
||||
}
|
||||
|
||||
for (adr = addr; adr < addr + len; adr += PAGESIZE) {
|
||||
if (type == F_SOFTUNLOCK) {
|
||||
hat_unlock(seg, adr);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vpage != NULL) {
|
||||
switch (rw) {
|
||||
case S_READ:
|
||||
protchk = PROT_READ;
|
||||
break;
|
||||
case S_WRITE:
|
||||
protchk = PROT_WRITE;
|
||||
break;
|
||||
case S_EXEC:
|
||||
protchk = PROT_EXEC;
|
||||
break;
|
||||
case S_OTHER:
|
||||
default:
|
||||
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
break;
|
||||
}
|
||||
prot = vpage->vp_prot;
|
||||
vpage++;
|
||||
if ((prot & protchk) == 0)
|
||||
return (FC_PROT);
|
||||
}
|
||||
|
||||
pf = (*sdp->mapfunc)(sdp->dev,
|
||||
sdp->offset + (adr - seg->s_base), prot);
|
||||
if (pf == -1)
|
||||
return (FC_MAKE_ERR(EFAULT));
|
||||
|
||||
hat_devload(seg, adr, pf, prot, type == F_SOFTLOCK);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronous page fault. We simply do nothing since this
|
||||
* entry point is not supposed to load up the translation.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
static faultcode_t
|
||||
segdev_faulta(seg, addr)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static
|
||||
segdev_hatsync(seg, addr, ref, mod, flags)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int ref, mod;
|
||||
u_int flags;
|
||||
{
|
||||
|
||||
/* cannot use ref and mod bits on devices, so ignore 'em */
|
||||
}
|
||||
|
||||
static int
|
||||
segdev_setprot(seg, addr, len, prot)
|
||||
register struct seg *seg;
|
||||
register addr_t addr;
|
||||
register u_int len, prot;
|
||||
{
|
||||
register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register struct vpage *vp, *evp;
|
||||
|
||||
if ((sdp->maxprot & prot) != prot)
|
||||
return (-1); /* violated maxprot */
|
||||
|
||||
if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
|
||||
if (sdp->prot == prot)
|
||||
return (0); /* all done */
|
||||
sdp->prot = prot;
|
||||
} else {
|
||||
sdp->pageprot = 1;
|
||||
if (sdp->vpage == NULL) {
|
||||
/*
|
||||
* First time through setting per page permissions,
|
||||
* initialize all the vpage structures to prot
|
||||
*/
|
||||
sdp->vpage = (struct vpage *)new_kmem_zalloc(
|
||||
vpgtob(seg_pages(seg)), KMEM_SLEEP);
|
||||
evp = &sdp->vpage[seg_pages(seg)];
|
||||
for (vp = sdp->vpage; vp < evp; vp++)
|
||||
vp->vp_prot = sdp->prot;
|
||||
}
|
||||
/*
|
||||
* Now go change the needed vpages protections.
|
||||
*/
|
||||
evp = &sdp->vpage[seg_page(seg, addr + len)];
|
||||
for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
|
||||
vp->vp_prot = prot;
|
||||
}
|
||||
|
||||
if (prot == 0)
|
||||
hat_unload(seg, addr, len);
|
||||
else
|
||||
hat_chgprot(seg, addr, len, prot);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
segdev_checkprot(seg, addr, len, prot)
|
||||
register struct seg *seg;
|
||||
register addr_t addr;
|
||||
register u_int len, prot;
|
||||
{
|
||||
struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
|
||||
register struct vpage *vp, *evp;
|
||||
|
||||
/*
|
||||
* If segment protection can be used, simply check against them
|
||||
*/
|
||||
if (sdp->pageprot == 0)
|
||||
return (((sdp->prot & prot) != prot) ? -1 : 0);
|
||||
|
||||
/*
|
||||
* Have to check down to the vpage level
|
||||
*/
|
||||
evp = &sdp->vpage[seg_page(seg, addr + len)];
|
||||
for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
|
||||
if ((vp->vp_prot & prot) != prot)
|
||||
return (-1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static
|
||||
segdev_badop()
|
||||
{
|
||||
|
||||
panic("segdev_badop");
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
/*
|
||||
* segdev pages are not in the cache, and thus can't really be controlled.
|
||||
* syncs, locks, and advice are simply always successful.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
segdev_ctlops(seg, addr, len, flags)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len, flags;
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* segdev pages are always "in core".
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
segdev_incore(seg, addr, len, vec)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
register u_int len;
|
||||
register char *vec;
|
||||
{
|
||||
u_int v = 0;
|
||||
|
||||
for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
|
||||
v += PAGESIZE)
|
||||
*vec++ = 1;
|
||||
return (v);
|
||||
}
|
||||
38
sys/vm/seg_dev.h
Normal file
38
sys/vm/seg_dev.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/* @(#)seg_dev.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_seg_dev_h
|
||||
#define _vm_seg_dev_h
|
||||
|
||||
/*
|
||||
* Structure who's pointer is passed to the segvn_create routine
|
||||
*/
|
||||
struct segdev_crargs {
|
||||
int (*mapfunc)(); /* map function to call */
|
||||
u_int offset; /* starting offset */
|
||||
dev_t dev; /* device number */
|
||||
u_char prot; /* protection */
|
||||
u_char maxprot; /* maximum protection */
|
||||
};
|
||||
|
||||
/*
|
||||
* (Semi) private data maintained by the seg_dev driver per segment mapping
|
||||
*/
|
||||
struct segdev_data {
|
||||
int (*mapfunc)(); /* really returns struct pte, not int */
|
||||
u_int offset; /* device offset for start of mapping */
|
||||
dev_t dev; /* device number (for mapfunc) */
|
||||
u_char pageprot; /* true if per page protections present */
|
||||
u_char prot; /* current segment prot if pageprot == 0 */
|
||||
u_char maxprot; /* maximum segment protections */
|
||||
struct vpage *vpage; /* per-page information, if needed */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
int segdev_create(/* seg, argsp */);
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_seg_dev_h*/
|
||||
776
sys/vm/seg_map.c
Normal file
776
sys/vm/seg_map.c
Normal file
@@ -0,0 +1,776 @@
|
||||
/* @(#)seg_map.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - generic vnode mapping segment.
|
||||
*
|
||||
* The segmap driver is used only by the kernel to get faster (than seg_vn)
|
||||
* mappings [lower routine overhead; more persistent cache] to random
|
||||
* vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/ucred.h>
|
||||
#include <sys/trace.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#include <machine/seg_kmem.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/seg_map.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/pvn.h>
|
||||
#include <vm/rm.h>
|
||||
|
||||
/*
|
||||
* Private seg op routines.
|
||||
*/
|
||||
static int segmap_free(/* seg */);
|
||||
static faultcode_t segmap_fault(/* seg, addr, len, type, rw */);
|
||||
static faultcode_t segmap_faulta(/* seg, addr */);
|
||||
static int segmap_checkprot(/* seg, addr, len, prot */);
|
||||
static int segmap_kluster(/* seg, addr, delta */);
|
||||
static int segmap_badop();
|
||||
|
||||
struct seg_ops segmap_ops = {
|
||||
segmap_badop, /* dup */
|
||||
segmap_badop, /* unmap */
|
||||
segmap_free,
|
||||
segmap_fault,
|
||||
segmap_faulta,
|
||||
(int (*)()) NULL, /* unload */
|
||||
segmap_badop, /* setprot */
|
||||
segmap_checkprot,
|
||||
segmap_kluster,
|
||||
(u_int (*)()) NULL, /* swapout */
|
||||
segmap_badop, /* sync */
|
||||
segmap_badop, /* incore */
|
||||
segmap_badop, /* lockop */
|
||||
segmap_badop, /* advise */
|
||||
};
|
||||
|
||||
/*
|
||||
* Private segmap routines.
|
||||
*/
|
||||
static void segmap_smapadd(/* smd, smp */);
|
||||
static void segmap_smapsub(/* smd, smp */);
|
||||
static void segmap_hashin(/* smd, smp, vp, off, flags */);
|
||||
static void segmap_hashout(/* smd, smp */);
|
||||
|
||||
/*
|
||||
* Statistics for segmap operations.
|
||||
*/
|
||||
struct segmapcnt {
|
||||
int smc_fault; /* number of segmap_faults */
|
||||
int smc_faulta; /* number of segmap_faultas */
|
||||
int smc_getmap; /* number of segmap_getmaps */
|
||||
int smc_get_use; /* # of getmaps that reuse an existing map */
|
||||
int smc_get_reclaim; /* # of getmaps that do a reclaim */
|
||||
int smc_get_reuse; /* # of getmaps that reuse a slot */
|
||||
int smc_rel_async; /* # of releases that are async */
|
||||
int smc_rel_write; /* # of releases that write */
|
||||
int smc_rel_free; /* # of releases that free */
|
||||
int smc_rel_abort; /* # of releases that abort */
|
||||
int smc_rel_dontneed; /* # of releases with dontneed set */
|
||||
int smc_release; /* # of releases with no other action */
|
||||
int smc_pagecreate; /* # of pagecreates */
|
||||
} segmapcnt;
|
||||
|
||||
/*
|
||||
* Return number of map pages in segment.
|
||||
*/
|
||||
#define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
|
||||
|
||||
/*
|
||||
* Translate addr into smap number within segment.
|
||||
*/
|
||||
#define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
|
||||
|
||||
/*
|
||||
* Translate addr in seg into struct smap pointer.
|
||||
*/
|
||||
#define GET_SMAP(seg, addr) \
|
||||
&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
|
||||
|
||||
int
|
||||
segmap_create(seg, argsp)
|
||||
struct seg *seg;
|
||||
caddr_t argsp;
|
||||
{
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
struct segmap_crargs *a = (struct segmap_crargs *)argsp;
|
||||
register u_int i;
|
||||
u_int hashsz;
|
||||
addr_t segend;
|
||||
|
||||
/*
|
||||
* Make sure that seg->s_base and seg->s_base + seg->s_size
|
||||
* are on MAXBSIZE aligned pieces of virtual memory.
|
||||
*
|
||||
* Since we assume we are creating a large segment
|
||||
* (it's just segkmap), trimming off the excess at the
|
||||
* beginning and end of the segment is considered safe.
|
||||
*/
|
||||
segend = (addr_t)((u_int)(seg->s_base + seg->s_size) & MAXBMASK);
|
||||
seg->s_base = (addr_t)roundup((u_int)(seg->s_base), MAXBSIZE);
|
||||
seg->s_size = segend - seg->s_base;
|
||||
|
||||
i = MAP_PAGES(seg);
|
||||
|
||||
smd = (struct segmap_data *)new_kmem_zalloc(
|
||||
sizeof (struct segmap_data), KMEM_SLEEP);
|
||||
smd->smd_prot = a->prot;
|
||||
smd->smd_sm = (struct smap *)new_kmem_zalloc(
|
||||
(u_int)(sizeof (struct smap) * i), KMEM_SLEEP);
|
||||
|
||||
/*
|
||||
* Link up all the slots.
|
||||
*/
|
||||
for (smp = &smd->smd_sm[i - 1]; smp >= smd->smd_sm; smp--)
|
||||
segmap_smapadd(smd, smp);
|
||||
|
||||
/*
|
||||
* Compute hash size rounding down to the next power of two.
|
||||
*/
|
||||
hashsz = MAP_PAGES(seg) / SMAP_HASHAVELEN;
|
||||
for (i = 0x80 << ((sizeof (int) - 1) * NBBY); i != 0; i >>= 1) {
|
||||
if ((hashsz & i) != 0) {
|
||||
smd->smd_hashsz = hashsz = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
smd->smd_hash = (struct smap **)new_kmem_zalloc(
|
||||
hashsz * sizeof (smd->smd_hash[0]), KMEM_SLEEP);
|
||||
|
||||
seg->s_data = (char *)smd;
|
||||
seg->s_ops = &segmap_ops;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
segmap_free(seg)
|
||||
struct seg *seg;
|
||||
{
|
||||
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
|
||||
|
||||
kmem_free((caddr_t)smd->smd_hash, sizeof (smd->smd_hash[0]) *
|
||||
smd->smd_hashsz);
|
||||
kmem_free((caddr_t)smd->smd_sm, sizeof (struct smap) * MAP_PAGES(seg));
|
||||
kmem_free((caddr_t)smd, sizeof (*smd));
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a F_SOFTUNLOCK call over the range requested.
|
||||
* The range must have already been F_SOFTLOCK'ed.
|
||||
*/
|
||||
static void
|
||||
segmap_unlock(seg, addr, len, rw, smp)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len;
|
||||
enum seg_rw rw;
|
||||
register struct smap *smp;
|
||||
{
|
||||
register struct page *pp;
|
||||
register addr_t adr;
|
||||
u_int off;
|
||||
|
||||
off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
|
||||
for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
|
||||
/*
|
||||
* For now, we just kludge here by finding the page
|
||||
* ourselves since we would not find the page using
|
||||
* page_find() if someone has page_abort()'ed it.
|
||||
* XXX - need to redo things to avoid this mess.
|
||||
*/
|
||||
for (pp = page_hash[PAGE_HASHFUNC(smp->sm_vp, off)]; pp != NULL;
|
||||
pp = pp->p_hash)
|
||||
if (pp->p_vnode == smp->sm_vp && pp->p_offset == off)
|
||||
break;
|
||||
if (pp == NULL || pp->p_pagein || pp->p_free)
|
||||
panic("segmap_unlock");
|
||||
if (rw == S_WRITE)
|
||||
pg_setmod(pp, 1);
|
||||
if (rw != S_OTHER) {
|
||||
trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, off, 1);
|
||||
pg_setref(pp, 1);
|
||||
}
|
||||
hat_unlock(seg, adr);
|
||||
PAGE_RELE(pp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called via a machine specific fault handling
|
||||
* routine. It is also called by software routines wishing to
|
||||
* lock or unlock a range of addresses.
|
||||
*/
|
||||
static faultcode_t
|
||||
segmap_fault(seg, addr, len, type, rw)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len;
|
||||
enum fault_type type;
|
||||
enum seg_rw rw;
|
||||
{
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
register struct page *pp, **ppp;
|
||||
register struct vnode *vp;
|
||||
register u_int off;
|
||||
struct page *pl[btopr(MAXBSIZE) + 1];
|
||||
u_int prot;
|
||||
u_int addroff;
|
||||
addr_t adr;
|
||||
int err;
|
||||
|
||||
segmapcnt.smc_fault++;
|
||||
|
||||
smd = (struct segmap_data *)seg->s_data;
|
||||
smp = GET_SMAP(seg, addr);
|
||||
vp = smp->sm_vp;
|
||||
|
||||
if (vp == NULL)
|
||||
return (FC_MAKE_ERR(EIO));
|
||||
|
||||
addroff = (u_int)addr & MAXBOFFSET;
|
||||
if (addroff + len > MAXBSIZE)
|
||||
panic("segmap_fault length");
|
||||
off = smp->sm_off + addroff;
|
||||
|
||||
/*
|
||||
* First handle the easy stuff
|
||||
*/
|
||||
if (type == F_SOFTUNLOCK) {
|
||||
segmap_unlock(seg, addr, len, rw, smp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
|
||||
err = VOP_GETPAGE(vp, off, len, &prot, pl, MAXBSIZE, seg, addr, rw,
|
||||
(struct ucred *)NULL); /* XXX - need real cred val */
|
||||
|
||||
if (err)
|
||||
return (FC_MAKE_ERR(err));
|
||||
|
||||
prot &= smd->smd_prot;
|
||||
|
||||
/*
|
||||
* Handle all pages returned in the pl[] array.
|
||||
* This loop is coded on the assumption that if
|
||||
* there was no error from the VOP_GETPAGE routine,
|
||||
* that the page list returned will contain all the
|
||||
* needed pages for the vp from [off..off + len).
|
||||
*/
|
||||
for (ppp = pl; (pp = *ppp++) != NULL; ) {
|
||||
/*
|
||||
* Verify that the pages returned are within the range
|
||||
* of this segmap region. Note that this is theoretically
|
||||
* possible for pages outside this range to be returned,
|
||||
* but it is not very unlikely. If we cannot use the
|
||||
* page here, just release it and go on to the next one.
|
||||
*/
|
||||
if (pp->p_offset < smp->sm_off ||
|
||||
pp->p_offset >= smp->sm_off + MAXBSIZE) {
|
||||
PAGE_RELE(pp);
|
||||
continue;
|
||||
}
|
||||
|
||||
adr = addr + (pp->p_offset - off);
|
||||
if (adr >= addr && adr < addr + len) {
|
||||
pg_setref(pp, 1);
|
||||
trace4(TR_PG_SEGMAP_FLT, pp, pp->p_vnode, pp->p_offset,
|
||||
0);
|
||||
trace5(TR_SPG_FLT, u.u_ar0[PC], adr, vp, pp->p_offset,
|
||||
TRC_SPG_SMAP);
|
||||
trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
|
||||
trs(u.u_comm,0), trs(u.u_comm,1),
|
||||
trs(u.u_comm,2), trs(u.u_comm,3));
|
||||
if (type == F_SOFTLOCK) {
|
||||
/*
|
||||
* Load up the translation keeping it
|
||||
* locked and don't PAGE_RELE the page.
|
||||
*/
|
||||
hat_memload(seg, adr, pp, prot, 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Either it was a page outside the fault range or a
|
||||
* page inside the fault range for a non F_SOFTLOCK -
|
||||
* load up the hat translation and release the page.
|
||||
*/
|
||||
hat_memload(seg, adr, pp, prot, 0);
|
||||
PAGE_RELE(pp);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is used to start I/O on pages asynchronously.
|
||||
*/
|
||||
static faultcode_t
|
||||
segmap_faulta(seg, addr)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
{
|
||||
register struct smap *smp;
|
||||
int err;
|
||||
|
||||
segmapcnt.smc_faulta++;
|
||||
smp = GET_SMAP(seg, addr);
|
||||
if (smp->sm_vp == NULL) {
|
||||
call_debug("segmap_faulta - no vp");
|
||||
return (FC_MAKE_ERR(EIO));
|
||||
}
|
||||
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_SEGKMAP);
|
||||
err = VOP_GETPAGE(smp->sm_vp, smp->sm_off + (u_int)addr & MAXBOFFSET,
|
||||
PAGESIZE, (u_int *)NULL, (struct page **)NULL, 0,
|
||||
seg, addr, S_READ,
|
||||
(struct ucred *)NULL); /* XXX - need real cred val */
|
||||
if (err)
|
||||
return (FC_MAKE_ERR(err));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
segmap_checkprot(seg, addr, len, prot)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len, prot;
|
||||
{
|
||||
struct segmap_data *smd = (struct segmap_data *)seg->s_data;
|
||||
|
||||
return (((smd->smd_prot & prot) != prot) ? -1 : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if it makes sense to do kluster/read ahead to
|
||||
* addr + delta relative to the mapping at addr. We assume here
|
||||
* that delta is a signed PAGESIZE'd multiple (which can be negative).
|
||||
*
|
||||
* For segmap we always "approve" of this action from our standpoint.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
segmap_kluster(seg, addr, delta)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
int delta;
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static
|
||||
segmap_badop()
|
||||
{
|
||||
|
||||
panic("segmap_badop");
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Special private segmap operations
|
||||
*/
|
||||
|
||||
/*
|
||||
* Add smp to the free list on smd. If the smp still has a vnode
|
||||
* association with it, then it is added to the end of the free list,
|
||||
* otherwise it is added to the front of the list.
|
||||
*/
|
||||
static void
|
||||
segmap_smapadd(smd, smp)
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
{
|
||||
|
||||
if (smp->sm_refcnt != 0)
|
||||
panic("segmap_smapadd");
|
||||
|
||||
if (smd->smd_free == (struct smap *)NULL) {
|
||||
smp->sm_next = smp->sm_prev = smp;
|
||||
} else {
|
||||
smp->sm_next = smd->smd_free;
|
||||
smp->sm_prev = (smd->smd_free)->sm_prev;
|
||||
(smd->smd_free)->sm_prev = smp;
|
||||
smp->sm_prev->sm_next = smp;
|
||||
}
|
||||
|
||||
if (smp->sm_vp == (struct vnode *)NULL)
|
||||
smd->smd_free = smp;
|
||||
else
|
||||
smd->smd_free = smp->sm_next;
|
||||
|
||||
/*
|
||||
* XXX - need a better way to do this.
|
||||
*/
|
||||
if (smd->smd_want) {
|
||||
wakeup((caddr_t)&smd->smd_free);
|
||||
smd->smd_want = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove smp from the smd free list. If there is an old
|
||||
* mapping in effect there, then delete it.
|
||||
*/
|
||||
static void
|
||||
segmap_smapsub(smd, smp)
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
{
|
||||
|
||||
if (smd->smd_free == smp)
|
||||
smd->smd_free = smp->sm_next; /* go to next page */
|
||||
|
||||
if (smd->smd_free == smp)
|
||||
smd->smd_free = NULL; /* smp list is gone */
|
||||
else {
|
||||
smp->sm_prev->sm_next = smp->sm_next;
|
||||
smp->sm_next->sm_prev = smp->sm_prev;
|
||||
}
|
||||
smp->sm_prev = smp->sm_next = smp; /* make smp a list of one */
|
||||
smp->sm_refcnt = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
segmap_hashin(smd, smp, vp, off)
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
{
|
||||
register struct smap **hpp;
|
||||
|
||||
/*
|
||||
* Funniness here - we don't increment the ref count on the vnode
|
||||
* even though we have another pointer to it here. The reason
|
||||
* for this is that we don't want the fact that a seg_map
|
||||
* entry somewhere refers to a vnode to prevent the vnode
|
||||
* itself from going away. This is because this reference
|
||||
* to the vnode is a "soft one". In the case where a mapping
|
||||
* is being used by a rdwr [or directory routine?] there already
|
||||
* has to be a non-zero ref count on the vnode. In the case
|
||||
* where the vp has been freed and the the smap structure is
|
||||
* on the free list, there are no pages in memory that can
|
||||
* refer to the vnode. Thus even if we reuse the same
|
||||
* vnode/smap structure for a vnode which has the same
|
||||
* address but represents a different object, we are ok.
|
||||
*/
|
||||
smp->sm_vp = vp;
|
||||
smp->sm_off = off;
|
||||
|
||||
hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
|
||||
smp->sm_hash = *hpp;
|
||||
*hpp = smp;
|
||||
}
|
||||
|
||||
static void
|
||||
segmap_hashout(smd, smp)
|
||||
register struct segmap_data *smd;
|
||||
register struct smap *smp;
|
||||
{
|
||||
register struct smap **hpp, *hp;
|
||||
struct vnode *vp;
|
||||
|
||||
vp = smp->sm_vp;
|
||||
hpp = &smd->smd_hash[SMAP_HASHFUNC(smd, vp, smp->sm_off)];
|
||||
for (;;) {
|
||||
hp = *hpp;
|
||||
if (hp == NULL)
|
||||
panic("segmap_hashout");
|
||||
if (hp == smp)
|
||||
break;
|
||||
hpp = &hp->sm_hash;
|
||||
}
|
||||
|
||||
*hpp = smp->sm_hash;
|
||||
smp->sm_hash = NULL;
|
||||
smp->sm_vp = NULL;
|
||||
smp->sm_off = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special public segmap operations
|
||||
*/
|
||||
|
||||
/*
|
||||
* Create pages (without using VOP_GETPAGE) and load up tranlations to them.
|
||||
* If softlock is TRUE, then set things up so that it looks like a call
|
||||
* to segmap_fault with F_SOFTLOCK.
|
||||
*/
|
||||
void
|
||||
segmap_pagecreate(seg, addr, len, softlock)
|
||||
struct seg *seg;
|
||||
register addr_t addr;
|
||||
u_int len;
|
||||
int softlock;
|
||||
{
|
||||
register struct page *pp;
|
||||
register u_int off;
|
||||
struct smap *smp;
|
||||
struct vnode *vp;
|
||||
addr_t eaddr;
|
||||
u_int prot;
|
||||
|
||||
segmapcnt.smc_pagecreate++;
|
||||
|
||||
eaddr = addr + len;
|
||||
addr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
smp = GET_SMAP(seg, addr);
|
||||
vp = smp->sm_vp;
|
||||
off = smp->sm_off + ((u_int)addr & MAXBOFFSET);
|
||||
prot = ((struct segmap_data *)seg->s_data)->smd_prot;
|
||||
|
||||
for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
|
||||
pp = page_lookup(vp, off);
|
||||
if (pp == NULL) {
|
||||
pp = rm_allocpage(segkmap, addr, PAGESIZE, 1);
|
||||
trace6(TR_SEG_ALLOCPAGE, segkmap, addr,
|
||||
TRC_SEG_SEGKMAP, vp, off, pp);
|
||||
if (page_enter(pp, vp, off))
|
||||
panic("segmap_page_create page_enter");
|
||||
page_unlock(pp);
|
||||
if (softlock) {
|
||||
hat_memload(segkmap, addr, pp, prot, 1);
|
||||
} else {
|
||||
hat_memload(segkmap, addr, pp, prot, 0);
|
||||
PAGE_RELE(pp);
|
||||
}
|
||||
} else {
|
||||
if (softlock) {
|
||||
PAGE_HOLD(pp);
|
||||
hat_memload(segkmap, addr, pp, prot, 1);
|
||||
} else {
|
||||
hat_memload(segkmap, addr, pp, prot, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
addr_t
|
||||
segmap_getmap(seg, vp, off)
|
||||
struct seg *seg;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
{
|
||||
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
|
||||
register struct smap *smp;
|
||||
|
||||
segmapcnt.smc_getmap++;
|
||||
|
||||
if ((off & MAXBOFFSET) != 0)
|
||||
panic("segmap_getmap bad offset");
|
||||
|
||||
/*
|
||||
* XXX - keep stats for hash function
|
||||
*/
|
||||
for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
|
||||
smp != NULL; smp = smp->sm_hash)
|
||||
if (smp->sm_vp == vp && smp->sm_off == off)
|
||||
break;
|
||||
|
||||
if (smp != NULL) {
|
||||
if (vp->v_count == 0) /* XXX - debugging */
|
||||
call_debug("segmap_getmap vp count of zero");
|
||||
if (smp->sm_refcnt != 0) {
|
||||
segmapcnt.smc_get_use++;
|
||||
smp->sm_refcnt++; /* another user */
|
||||
} else {
|
||||
segmapcnt.smc_get_reclaim++;
|
||||
segmap_smapsub(smd, smp); /* reclaim */
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Allocate a new slot and set it up.
|
||||
*/
|
||||
while ((smp = smd->smd_free) == NULL) {
|
||||
/*
|
||||
* XXX - need a better way to do this.
|
||||
*/
|
||||
smd->smd_want = 1;
|
||||
(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
|
||||
}
|
||||
segmap_smapsub(smd, smp);
|
||||
if (smp->sm_vp != (struct vnode *)NULL) {
|
||||
/*
|
||||
* Destroy old vnode association and unload any
|
||||
* hardware translations to the old object.
|
||||
*/
|
||||
segmapcnt.smc_get_reuse++;
|
||||
segmap_hashout(smd, smp);
|
||||
hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
|
||||
MAXBSIZE), MAXBSIZE);
|
||||
}
|
||||
segmap_hashin(smd, smp, vp, off);
|
||||
}
|
||||
|
||||
trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
|
||||
(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
|
||||
TRC_SEG_SEGKMAP, vp, off);
|
||||
return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Same as segmap_getmap(), with the following condition added
|
||||
* if (a new mapping is created)
|
||||
* prefault the translation
|
||||
*/
|
||||
addr_t
|
||||
segmap_getmapflt(seg, vp, off)
|
||||
struct seg *seg;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
{
|
||||
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
|
||||
register struct smap *smp;
|
||||
|
||||
segmapcnt.smc_getmap++;
|
||||
|
||||
if ((off & MAXBOFFSET) != 0)
|
||||
panic("segmap_getmap bad offset");
|
||||
|
||||
/*
|
||||
* XXX - keep stats for hash function
|
||||
*/
|
||||
for (smp = smd->smd_hash[SMAP_HASHFUNC(smd, vp, off)];
|
||||
smp != NULL; smp = smp->sm_hash)
|
||||
if (smp->sm_vp == vp && smp->sm_off == off)
|
||||
break;
|
||||
|
||||
if (smp != NULL) {
|
||||
if (vp->v_count == 0) /* XXX - debugging */
|
||||
call_debug("segmap_getmap vp count of zero");
|
||||
if (smp->sm_refcnt != 0) {
|
||||
segmapcnt.smc_get_use++;
|
||||
smp->sm_refcnt++; /* another user */
|
||||
} else {
|
||||
segmapcnt.smc_get_reclaim++;
|
||||
segmap_smapsub(smd, smp); /* reclaim */
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Allocate a new slot and set it up.
|
||||
*/
|
||||
while ((smp = smd->smd_free) == NULL) {
|
||||
/*
|
||||
* XXX - need a better way to do this.
|
||||
*/
|
||||
smd->smd_want = 1;
|
||||
(void) sleep((caddr_t)&smd->smd_free, PSWP+2);
|
||||
}
|
||||
segmap_smapsub(smd, smp);
|
||||
if (smp->sm_vp != (struct vnode *)NULL) {
|
||||
/*
|
||||
* Destroy old vnode association and unload any
|
||||
* hardware translations to the old object.
|
||||
*/
|
||||
segmapcnt.smc_get_reuse++;
|
||||
segmap_hashout(smd, smp);
|
||||
hat_unload(seg, seg->s_base + ((smp - smd->smd_sm) *
|
||||
MAXBSIZE), MAXBSIZE);
|
||||
}
|
||||
segmap_hashin(smd, smp, vp, off);
|
||||
|
||||
/*
|
||||
* Prefault the translation
|
||||
*/
|
||||
(void)as_fault(&kas,
|
||||
seg->s_base + (smp - smd->smd_sm) * MAXBSIZE,
|
||||
MAXBSIZE, F_INVAL, S_READ);
|
||||
}
|
||||
|
||||
trace5(TR_SEG_GETMAP, seg, (u_int)(seg->s_base +
|
||||
(smp - smd->smd_sm) * MAXBSIZE) & PAGEMASK,
|
||||
TRC_SEG_SEGKMAP, vp, off);
|
||||
return (seg->s_base + ((smp - smd->smd_sm) * MAXBSIZE));
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
segmap_release(seg, addr, flags)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int flags;
|
||||
{
|
||||
register struct segmap_data *smd = (struct segmap_data *)seg->s_data;
|
||||
register struct smap *smp;
|
||||
int error;
|
||||
|
||||
if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
|
||||
((u_int)addr & MAXBOFFSET) != 0)
|
||||
panic("segmap_release addr");
|
||||
|
||||
smp = &smd->smd_sm[MAP_PAGE(seg, addr)];
|
||||
trace4(TR_SEG_RELMAP, seg, addr, TRC_SEG_SEGKMAP, smp->sm_refcnt);
|
||||
|
||||
/*
|
||||
* Need to call VOP_PUTPAGE if any flags (except SM_DONTNEED)
|
||||
* are set.
|
||||
*/
|
||||
if ((flags & ~SM_DONTNEED) != 0) {
|
||||
int bflags = 0;
|
||||
|
||||
if (flags & SM_WRITE)
|
||||
segmapcnt.smc_rel_write++;
|
||||
if (flags & SM_ASYNC) {
|
||||
bflags |= B_ASYNC;
|
||||
segmapcnt.smc_rel_async++;
|
||||
}
|
||||
if (flags & SM_INVAL) {
|
||||
bflags |= B_INVAL;
|
||||
segmapcnt.smc_rel_abort++;
|
||||
}
|
||||
if (smp->sm_refcnt == 1) {
|
||||
/*
|
||||
* We only bother doing the FREE and DONTNEED flags
|
||||
* if no one else is still referencing this mapping.
|
||||
*/
|
||||
if (flags & SM_FREE) {
|
||||
bflags |= B_FREE;
|
||||
segmapcnt.smc_rel_free++;
|
||||
}
|
||||
if (flags & SM_DONTNEED) {
|
||||
bflags |= B_DONTNEED;
|
||||
segmapcnt.smc_rel_dontneed++;
|
||||
}
|
||||
}
|
||||
error = VOP_PUTPAGE(smp->sm_vp, smp->sm_off, MAXBSIZE, bflags,
|
||||
(struct ucred *)NULL); /* XXX - need real cred val */
|
||||
} else {
|
||||
segmapcnt.smc_release++;
|
||||
error = 0;
|
||||
}
|
||||
|
||||
if (--smp->sm_refcnt == 0) {
|
||||
if (flags & SM_INVAL) {
|
||||
hat_unload(seg, addr, MAXBSIZE);
|
||||
segmap_hashout(smd, smp); /* remove map info */
|
||||
}
|
||||
segmap_smapadd(smd, smp); /* add to free list */
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
88
sys/vm/seg_map.h
Normal file
88
sys/vm/seg_map.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/* @(#)seg_map.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_seg_map_h
|
||||
#define _vm_seg_map_h
|
||||
|
||||
struct segmap_crargs {
|
||||
u_int prot;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each smap struct represents a MAXBSIZE sized mapping to the
|
||||
* <sm_vp, sm_off> given in the structure. The location of the
|
||||
* the structure in the array gives the virtual address of the
|
||||
* mapping.
|
||||
*/
|
||||
struct smap {
|
||||
struct vnode *sm_vp; /* vnode pointer (if mapped) */
|
||||
u_int sm_off; /* file offset for mapping */
|
||||
/*
|
||||
* These next 4 entries can be coded as
|
||||
* u_shorts if we are tight on memory.
|
||||
*/
|
||||
u_int sm_refcnt; /* reference count for uses */
|
||||
struct smap *sm_hash; /* hash pointer */
|
||||
struct smap *sm_next; /* next pointer */
|
||||
struct smap *sm_prev; /* previous pointer */
|
||||
};
|
||||
|
||||
/*
|
||||
* (Semi) private data maintained by the segmap driver per SEGMENT mapping
|
||||
*/
|
||||
struct segmap_data {
|
||||
struct smap *smd_sm; /* array of smap structures */
|
||||
struct smap *smd_free; /* free list head pointer */
|
||||
u_char smd_prot; /* protections for all smap's */
|
||||
u_char smd_want; /* smap want flag */
|
||||
u_int smd_hashsz; /* power-of-two hash table size */
|
||||
struct smap **smd_hash; /* pointer to hash table */
|
||||
};
|
||||
|
||||
/*
|
||||
* These are flags used on release. Some of these might get handled
|
||||
* by segment operations needed for msync (when we figure them out).
|
||||
* SM_ASYNC modifies SM_WRITE. SM_DONTNEED modifies SM_FREE. SM_FREE
|
||||
* and SM_INVAL are mutually exclusive.
|
||||
*/
|
||||
#define SM_WRITE 0x01 /* write back the pages upon release */
|
||||
#define SM_ASYNC 0x02 /* do the write asynchronously */
|
||||
#define SM_FREE 0x04 /* put pages back on free list */
|
||||
#define SM_INVAL 0x08 /* invalidate page (no caching) */
|
||||
#define SM_DONTNEED 0x10 /* less likely to be needed soon */
|
||||
|
||||
#define MAXBSHIFT 13 /* log2(DEVBSIZE) */
|
||||
#define MAXBOFFSET (MAXBSIZE - 1)
|
||||
#define MAXBMASK (~MAXBOFFSET)
|
||||
|
||||
/*
|
||||
* SMAP_HASHAVELEN is the average length desired for this chain, from
|
||||
* which the size of the smd_hash table is derived at segment create time.
|
||||
* SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the
|
||||
* approximate size of a vnode struct.
|
||||
*/
|
||||
#define SMAP_HASHAVELEN 4
|
||||
#define SMAP_HASHVPSHIFT 6
|
||||
|
||||
#define SMAP_HASHFUNC(smd, vp, off) \
|
||||
((((off) >> MAXBSHIFT) + ((int)(vp) >> SMAP_HASHVPSHIFT)) & \
|
||||
((smd)->smd_hashsz - 1))
|
||||
|
||||
#ifdef KERNEL
|
||||
int segmap_create(/* seg, argsp */);
|
||||
|
||||
/*
|
||||
* Special seg_map segment operations
|
||||
*/
|
||||
void segmap_pagecreate(/* seg, addr, len, softlock */);
|
||||
addr_t segmap_getmap(/* seg, vp, off */);
|
||||
int segmap_release(/* seg, addr, flags */);
|
||||
|
||||
extern struct seg *segkmap; /* the kernel generic mapping segment */
|
||||
extern struct seg_ops segmap_ops;
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_seg_map_h*/
|
||||
871
sys/vm/seg_u.c
Normal file
871
sys/vm/seg_u.c
Normal file
@@ -0,0 +1,871 @@
|
||||
/* @(#)seg_u.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - u-area segment routines
|
||||
*
|
||||
* XXX: This segment type should probably be recast as seg_stack
|
||||
* instead of seg_u. As the system evolves, we'll need to
|
||||
* manage variable-sized stacks protected by red zones, some
|
||||
* of which possibly are accompanied by u-areas. For the moment
|
||||
* the implementation copes only with "standard" u-areas,
|
||||
* each with an embedded stack. Doing so lets the implementation
|
||||
* get away with much simpler space management code.
|
||||
*
|
||||
* Desired model:
|
||||
* segu_data describes nproc u-areas and the segment ops
|
||||
* manipulate individual slots in segu_data, so that (e.g.)
|
||||
* copying a u-area upon process creation turns into
|
||||
* transcribing parts of segu_data from one place to another.
|
||||
*
|
||||
* Red zone handling:
|
||||
* The implementation maintains the invariant that the MMU mappings
|
||||
* for unallocated slots are invalid. This means that red zones
|
||||
* come for free simply by avoiding establishing mappings over all
|
||||
* red zone pages and by making sure that all mappings are invalidated
|
||||
* at segu_release time.
|
||||
*
|
||||
* Note also that we need neither pages nor swap space for red zones,
|
||||
* so much of the code works over extents of SEGU_PAGES-1 instead
|
||||
* of SEGU_PAGES.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ucred.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/kmem_alloc.h>
|
||||
#include <sys/proc.h> /* needed for debugging printouts only */
|
||||
#include <sys/vmmeter.h>
|
||||
|
||||
#include <vm/anon.h>
|
||||
#include <vm/rm.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/seg_u.h>
|
||||
#include <vm/swap.h>
|
||||
#include <vm/hat.h>
|
||||
|
||||
/*
|
||||
* Ugliness to compensate for some machine dependency.
|
||||
*/
|
||||
#ifdef i386bug
|
||||
#define UPAGE_PROT (PROT_READ | PROT_USER)
|
||||
#else i386bug
|
||||
#define UPAGE_PROT (PROT_READ | PROT_WRITE)
|
||||
#endif i386bug
|
||||
|
||||
int segu_debug = 0; /* patchable for debugging */
|
||||
|
||||
/*
|
||||
* Private seg op routines.
|
||||
*
|
||||
* The swapout operation is null because the generic swapout code
|
||||
* never attempts to swap out anything in the kernel's address
|
||||
* space. Instead, clients swap the resources this driver manages
|
||||
* by calling segu_fault with a type argument of F_SOFTLOCK to swap
|
||||
* a slot in and with F_SOFTUNLOCK to swap one out.
|
||||
*/
|
||||
static int segu_checkprot(/* seg, vaddr, len, prot */);
|
||||
static int segu_kluster(/* seg, vaddr, delta */);
|
||||
static int segu_badop();
|
||||
|
||||
struct seg_ops segu_ops = {
|
||||
segu_badop, /* dup */
|
||||
segu_badop, /* unmap */
|
||||
segu_badop, /* free */
|
||||
segu_fault,
|
||||
segu_badop, /* faulta */
|
||||
(int (*)()) NULL, /* unload */
|
||||
segu_badop, /* setprot */
|
||||
segu_checkprot,
|
||||
segu_kluster,
|
||||
(u_int (*)()) NULL, /* swapout */
|
||||
segu_badop, /* sync */
|
||||
segu_badop, /* incore */
|
||||
segu_badop, /* lockop */
|
||||
segu_badop, /* advise */
|
||||
};
|
||||
|
||||
/*
|
||||
* Declarations of private routines for use by seg_u operations.
|
||||
*/
|
||||
static int segu_getslot(/* seg, vaddr, len */);
|
||||
static int segu_softunlock(/* seg, vaddr, len, slot */);
|
||||
static int segu_softload(/* seg, vaddr, len, slot, lock */);
|
||||
|
||||
struct seg *segu;
|
||||
|
||||
/*
|
||||
* XXX: Global change needed -- set up MMU translations before
|
||||
* keeping pages.
|
||||
*/
|
||||
|
||||
static
|
||||
segu_badop()
|
||||
{
|
||||
|
||||
panic("seg_badop");
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a fault on an address corresponding to one of the
|
||||
* slots in the segu segment.
|
||||
*/
|
||||
faultcode_t
|
||||
segu_fault(seg, vaddr, len, type, rw)
|
||||
struct seg *seg;
|
||||
addr_t vaddr;
|
||||
u_int len;
|
||||
enum fault_type type;
|
||||
enum seg_rw rw;
|
||||
{
|
||||
struct segu_segdata *sdp = (struct segu_segdata *)seg->s_data;
|
||||
struct segu_data *sup;
|
||||
int slot;
|
||||
addr_t vbase;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Sanity checks.
|
||||
*/
|
||||
if (seg != segu)
|
||||
panic("segu_fault: wrong segment");
|
||||
if (type == F_PROT)
|
||||
panic("segu_fault: unexpected F_PROT fault");
|
||||
|
||||
/*
|
||||
* Verify that the range specified by vaddr and len falls
|
||||
* completely within the mapped part of a single allocated
|
||||
* slot, calculating the slot index and slot pointer while
|
||||
* we're at it.
|
||||
*/
|
||||
slot = segu_getslot(seg, vaddr, len);
|
||||
if (slot == -1)
|
||||
return (FC_MAKE_ERR(EFAULT));
|
||||
sup = &sdp->usd_slots[slot];
|
||||
|
||||
vbase = seg->s_base + ptob(SEGU_PAGES) * slot;
|
||||
|
||||
/*
|
||||
* The F_SOFTLOCK and F_SOFTUNLOCK cases have more stringent
|
||||
* range requirements: the given range must exactly coincide
|
||||
* with the slot's mapped portion.
|
||||
*/
|
||||
if (type == F_SOFTLOCK || type == F_SOFTUNLOCK) {
|
||||
if (vaddr != segu_stom(vbase) || len != ptob(SEGU_PAGES - 1))
|
||||
return (FC_MAKE_ERR(EFAULT));
|
||||
}
|
||||
|
||||
if (type == F_SOFTLOCK) {
|
||||
/*
|
||||
* Somebody is trying to lock down this slot, e.g., as
|
||||
* part of swapping in a u-area contained in the slot.
|
||||
*/
|
||||
|
||||
/*
|
||||
* It is erroneous to attempt to lock when already locked.
|
||||
*
|
||||
* XXX: Possibly this shouldn't be a panic. It depends
|
||||
* on what assumptions we're willing to let clients
|
||||
* make.
|
||||
*/
|
||||
if (sup->su_flags & SEGU_LOCKED)
|
||||
panic("segu_fault: locking locked slot");
|
||||
|
||||
err = segu_softload(seg, segu_stom(vbase),
|
||||
ptob(SEGU_PAGES - 1), slot, 1);
|
||||
if (err)
|
||||
return (FC_MAKE_ERR(err));
|
||||
|
||||
sup->su_flags |= SEGU_LOCKED;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (type == F_INVAL) {
|
||||
/*
|
||||
* Normal fault. The processing required
|
||||
* is quite similar to that for the F_SOFTLOCK case in that
|
||||
* we have to drag stuff in and make sure it's mapped. It
|
||||
* differs in that we don't lock it down.
|
||||
*/
|
||||
|
||||
if (segu_debug)
|
||||
printf("segu_fault(%x, %x, %d)\n", vaddr, len, type);
|
||||
|
||||
/*
|
||||
* If the slot is already locked, the only way we
|
||||
* should fault is by referencing the red zone.
|
||||
*
|
||||
* XXX: Probably should tighten this check and verify
|
||||
* that it's really a red zone reference.
|
||||
* XXX: Is this the most appropriate error code?
|
||||
*/
|
||||
if (sup->su_flags & SEGU_LOCKED)
|
||||
return (FC_MAKE_ERR(EINVAL));
|
||||
|
||||
err = segu_softload(seg, vaddr, len, slot, 0);
|
||||
return (err ? FC_MAKE_ERR(err) : 0);
|
||||
}
|
||||
|
||||
if (type == F_SOFTUNLOCK) {
|
||||
/*
|
||||
* Somebody is trying to swap out this slot, e.g., as
|
||||
* part of swapping out a u-area contained in this slot.
|
||||
*/
|
||||
|
||||
/*
|
||||
* It is erroneous to attempt to unlock when not
|
||||
* currently locked.
|
||||
*/
|
||||
if (!(sup->su_flags & SEGU_LOCKED))
|
||||
panic("segu_fault: unlocking unlocked slot");
|
||||
sup->su_flags &= ~SEGU_LOCKED;
|
||||
|
||||
err = segu_softunlock(seg, vaddr, len, slot, rw);
|
||||
return (err ? FC_MAKE_ERR(err) : 0);
|
||||
}
|
||||
|
||||
panic("segu_fault: bogus fault type");
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the given protections suffice over the range specified by
|
||||
* vaddr and len. For this segment type, the only issue is whether or
|
||||
* not the range lies completely within the mapped part of an allocated slot.
|
||||
*
|
||||
* We let segu_getslot do all the dirty work.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
segu_checkprot(seg, vaddr, len, prot)
|
||||
struct seg *seg;
|
||||
addr_t vaddr;
|
||||
u_int len;
|
||||
u_int prot;
|
||||
{
|
||||
register int slot = segu_getslot(seg, vaddr, len);
|
||||
|
||||
return (slot == -1 ? -1 : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if it makes sense to do kluster/read ahead to
|
||||
* addr + delta relative to the mapping at addr. We assume here
|
||||
* that delta is a signed PAGESIZE'd multiple (which can be negative).
|
||||
*
|
||||
* For seg_u we always "approve" of this action from our standpoint.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
segu_kluster(seg, addr, delta)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
int delta;
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Segment operations specific to the seg_u segment type.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Finish creating the segu segment by setting up its private state
|
||||
* information. Called once at boot time after segu has been allocated
|
||||
* and hooked into the kernel address space.
|
||||
*
|
||||
* Note that we have no need for the argsp argument, since everything
|
||||
* we need to set up our private information is contained in the common
|
||||
* segment information. (This may change at such time as we generalize
|
||||
* the implementation to deal with variable size allocation units.)
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
segu_create(seg, argsp)
|
||||
register struct seg *seg;
|
||||
caddr_t argsp;
|
||||
{
|
||||
register u_int numslots;
|
||||
register int i;
|
||||
register struct segu_segdata *sdp;
|
||||
|
||||
/*
|
||||
* Trim the segment's size down to the largest multiple of
|
||||
* SEGU_PAGES that's no larger than the original value.
|
||||
*
|
||||
* XXX: Does it matter that we're discarding virtual address
|
||||
* space off the end with no record of how much there was?
|
||||
*/
|
||||
numslots = seg->s_size / ptob(SEGU_PAGES);
|
||||
seg->s_size = numslots * ptob(SEGU_PAGES);
|
||||
|
||||
/*
|
||||
* Allocate segment-specific information.
|
||||
*/
|
||||
seg->s_data = new_kmem_alloc(sizeof (struct segu_segdata), KMEM_SLEEP);
|
||||
sdp = (struct segu_segdata *)seg->s_data;
|
||||
|
||||
/*
|
||||
* Allocate the slot array.
|
||||
*/
|
||||
sdp->usd_slots = (struct segu_data *)new_kmem_alloc(
|
||||
numslots * sizeof (struct segu_data), KMEM_SLEEP);
|
||||
|
||||
/*
|
||||
* Set up the slot free list, marking each slot as unallocated.
|
||||
* Note that the list must be sorted in ascending address order.
|
||||
*/
|
||||
sdp->usd_slots[0].su_flags = 0;
|
||||
for (i = 1; i < numslots; i++) {
|
||||
sdp->usd_slots[i - 1].su_next = &sdp->usd_slots[i];
|
||||
sdp->usd_slots[i].su_flags = 0;
|
||||
}
|
||||
sdp->usd_slots[numslots - 1].su_next = NULL;
|
||||
sdp->usd_free = sdp->usd_slots;
|
||||
|
||||
seg->s_ops = &segu_ops;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate resources for a single slot.
|
||||
*
|
||||
* When used for u-area, called at process creation time.
|
||||
*/
|
||||
addr_t
|
||||
segu_get()
|
||||
{
|
||||
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
|
||||
struct page *pp;
|
||||
addr_t vbase;
|
||||
addr_t va;
|
||||
struct segu_data *sup;
|
||||
int slot;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Allocate virtual space. This amounts to grabbing a free slot.
|
||||
*/
|
||||
if ((sup = sdp->usd_free) == NULL)
|
||||
return (NULL);
|
||||
sdp->usd_free = sup->su_next;
|
||||
slot = sup - sdp->usd_slots;
|
||||
|
||||
vbase = segu->s_base + ptob(SEGU_PAGES) * slot;
|
||||
|
||||
/*
|
||||
* If this slot has anon resources left over from its last use, free
|
||||
* them. (Normally, segu_release will have cleaned up; however, i/o
|
||||
* in progress at the time of the call prevents it from doing so.)
|
||||
*/
|
||||
if (sup->su_flags & SEGU_HASANON) {
|
||||
anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
|
||||
anon_unresv(ptob(SEGU_PAGES - 1));
|
||||
sup->su_flags &= ~SEGU_HASANON;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve sufficient swap space for this slot. We'll
|
||||
* actually allocate it in the loop below, but reserving it
|
||||
* here allows us to back out more gracefully than if we
|
||||
* had an allocation failure in the body of the loop.
|
||||
*
|
||||
* Note that we don't need swap space for the red zone page.
|
||||
*/
|
||||
if (anon_resv(ptob(SEGU_PAGES - 1)) == 0) {
|
||||
if (segu_debug)
|
||||
printf("segu_get: no swap space available\n");
|
||||
sup->su_next = sdp->usd_free;
|
||||
sdp->usd_free = sup;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate pages, avoiding allocating one for the red zone.
|
||||
*/
|
||||
pp = rm_allocpage(segu, segu_stom(vbase), ptob(SEGU_PAGES - 1), 1);
|
||||
if (pp == NULL) {
|
||||
if (segu_debug)
|
||||
printf("segu_get: no pages available\n");
|
||||
/*
|
||||
* Give back the resources we've acquired.
|
||||
*/
|
||||
anon_unresv(ptob(SEGU_PAGES - 1));
|
||||
sup->su_next = sdp->usd_free;
|
||||
sdp->usd_free = sup;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate swap space.
|
||||
*
|
||||
* Because the interface for getting swap slots is designed
|
||||
* to handle only one page at a time, we must deal with each
|
||||
* page in the u-area individually instead of allocating a
|
||||
* contiguous chunk of swap space for the whole thing as we
|
||||
* would prefer.
|
||||
*
|
||||
* This being the case, we actually do more in this loop than
|
||||
* simply allocate swap space. As we handle each page, we
|
||||
* complete its setup.
|
||||
*/
|
||||
for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
|
||||
register struct anon *ap;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
struct page *opp;
|
||||
|
||||
/*
|
||||
* If this page is the red zone page, we don't need swap
|
||||
* space for it. Note that we skip over the code that
|
||||
* establishes MMU mappings, so that the page remains
|
||||
* invalid.
|
||||
*/
|
||||
if (i == SEGU_REDZONE) {
|
||||
sup->su_swaddr[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check.
|
||||
*/
|
||||
if (pp == NULL)
|
||||
panic("segu_get: not enough pages");
|
||||
|
||||
/*
|
||||
* Get a swap slot.
|
||||
*/
|
||||
if ((ap = anon_alloc()) == NULL)
|
||||
panic("segu_get: swap allocation failure");
|
||||
sup->su_swaddr[i] = ap;
|
||||
|
||||
/*
|
||||
* Tie the next page to the swap slot.
|
||||
*/
|
||||
swap_xlate(ap, &vp, &off);
|
||||
while (page_enter(pp, vp, off)) {
|
||||
/*
|
||||
* The page was already tied to something
|
||||
* else that we have no record of. Since
|
||||
* the page we wish be named by <vp, off>
|
||||
* already exists, we abort the old page.
|
||||
*/
|
||||
struct page *p1 = page_find(vp, off);
|
||||
|
||||
if (p1 != NULL) {
|
||||
page_wait(p1);
|
||||
if (p1->p_vnode == vp && p1->p_offset == off)
|
||||
page_abort(p1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Page_enter has set the page's lock bit. Since it's
|
||||
* kept as well, this is just a nuisance.
|
||||
*/
|
||||
page_unlock(pp);
|
||||
|
||||
/*
|
||||
* Mark the page for long term keep and release the
|
||||
* short term claim that rm_allocpage established.
|
||||
*
|
||||
* XXX: When page_pp_lock returns a success/failure
|
||||
* indication, we'll probably want to panic if
|
||||
* it fails.
|
||||
*/
|
||||
(void) page_pp_lock(pp, 0, 1);
|
||||
|
||||
/*
|
||||
* Load and lock an MMU translation for the page.
|
||||
*/
|
||||
hat_memload(segu, va, pp, UPAGE_PROT, 1);
|
||||
|
||||
/*
|
||||
* Prepare to use the next page.
|
||||
*/
|
||||
opp = pp;
|
||||
page_sub(&pp, pp);
|
||||
PAGE_RELE(opp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally, mark this slot as allocated, locked, and in posession
|
||||
* of anon resources.
|
||||
*/
|
||||
sup->su_flags = SEGU_ALLOCATED | SEGU_LOCKED | SEGU_HASANON;
|
||||
|
||||
/*
|
||||
* Return the address of the base of the mapped part of
|
||||
* the slot.
|
||||
*/
|
||||
return (segu_stom(vbase));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reclaim resources for a single slot.
|
||||
*
|
||||
* When used for u-area, called at process destruction time. Guaranteed not
|
||||
* to sleep, so that it can be called while running on the interrupt stack.
|
||||
*
|
||||
* N.B.: Since this routine deallocates all of the slot's resources,
|
||||
* callers can't count on the resources remaining accessible. In
|
||||
* particular, any stack contained in the slot will vanish, so we'd
|
||||
* better not be running on that stack.
|
||||
*
|
||||
* N.B.: Since the routine can't sleep, it must defer deallocation of anon
|
||||
* resources associated with pages that have i/o in progress. (Anon_decref
|
||||
* calls page_abort, which will sleep until the i/o is complete.)
|
||||
*
|
||||
* We can't simply undo everything that segu_get did directly,
|
||||
* because someone else may have acquired a reference to one or
|
||||
* more of the associated pages in the meantime.
|
||||
*/
|
||||
void
|
||||
segu_release(vaddr)
|
||||
addr_t vaddr;
|
||||
{
|
||||
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
|
||||
addr_t vbase = segu_mtos(vaddr);
|
||||
addr_t va;
|
||||
struct segu_data *sup;
|
||||
struct segu_data **supp;
|
||||
int slot;
|
||||
int i;
|
||||
int doing_io = 0;
|
||||
register int locked;
|
||||
|
||||
/*
|
||||
* Get the slot corresponding to this virtual address.
|
||||
*/
|
||||
if ((slot = segu_getslot(segu, vaddr, 1)) == -1)
|
||||
panic("segu_release: bad addr");
|
||||
sup = &sdp->usd_slots[slot];
|
||||
|
||||
/*
|
||||
* XXX: Do we need to lock this slot's pages while we're
|
||||
* messing with them? What can happen once we decrement
|
||||
* the keep count below?
|
||||
*/
|
||||
|
||||
/*
|
||||
* Examine the slot's pages looking for i/o in progress.
|
||||
* While doing so, undo locks.
|
||||
*/
|
||||
locked = sup->su_flags & SEGU_LOCKED;
|
||||
for (i = 0, va = vbase; i < SEGU_PAGES; i++, va += ptob(1)) {
|
||||
register struct page *pp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
register int s;
|
||||
|
||||
if (i == SEGU_REDZONE)
|
||||
continue;
|
||||
|
||||
if (locked)
|
||||
hat_unlock(segu, va);
|
||||
|
||||
/*
|
||||
* Find the page associated with this part of the
|
||||
* slot, tracking it down through its associated swap
|
||||
* space.
|
||||
*/
|
||||
swap_xlate(sup->su_swaddr[i], &vp, &off);
|
||||
|
||||
/*
|
||||
* Prevent page status from changing.
|
||||
*/
|
||||
s = splvm();
|
||||
|
||||
if ((pp = page_exists(vp, off)) == NULL) {
|
||||
/*
|
||||
* The page no longer exists; this is fine
|
||||
* unless we had it locked.
|
||||
*/
|
||||
if (locked)
|
||||
panic("segu_release: missing locked page");
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* See whether the page is quiescent.
|
||||
*/
|
||||
if (pp->p_keepcnt != 0)
|
||||
doing_io = 1;
|
||||
|
||||
/*
|
||||
* Make this page available to vultures.
|
||||
*/
|
||||
if (locked)
|
||||
page_pp_unlock(pp, 0);
|
||||
|
||||
(void) splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unload the mmu translations for this slot.
|
||||
*/
|
||||
hat_unload(segu, vaddr, ptob(SEGU_PAGES - 1));
|
||||
|
||||
/*
|
||||
* Provided that all of the pages controlled by this segment are
|
||||
* quiescent, release our claim on the associated anon resources and
|
||||
* swap space.
|
||||
*/
|
||||
if (!doing_io) {
|
||||
anon_free(sup->su_swaddr, ptob(SEGU_PAGES));
|
||||
anon_unresv(ptob(SEGU_PAGES - 1));
|
||||
sup->su_flags &= ~SEGU_HASANON;
|
||||
} else
|
||||
sup->su_flags |= SEGU_HASANON;
|
||||
|
||||
/*
|
||||
* Mark the slot as unallocated and unlocked and put it back on the
|
||||
* free list. Keep the free list sorted by slot address, to minimize
|
||||
* fragmentation of seg_u's virtual address range. (This makes a
|
||||
* difference on some architectures; e.g., by making it possible to
|
||||
* use fewer page table entries.) This code counts on the slot
|
||||
* address being a monotonically increasing function of indices of
|
||||
* entries in the usd_slots array.
|
||||
*/
|
||||
sup->su_flags &= ~(SEGU_ALLOCATED|SEGU_LOCKED);
|
||||
for (supp = &sdp->usd_free; *supp != NULL && *supp < sup;
|
||||
supp = &(*supp)->su_next)
|
||||
continue;
|
||||
sup->su_next = *supp;
|
||||
*supp = sup;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Private routines for use by seg_u operations.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Verify that the range designated by vaddr and len lies completely
|
||||
* within the mapped part of a single allocated slot. If so, return
|
||||
* the slot's index; otherwise return -1.
|
||||
*/
|
||||
static int
|
||||
segu_getslot(seg, vaddr, len)
|
||||
register struct seg *seg;
|
||||
addr_t vaddr;
|
||||
u_int len;
|
||||
{
|
||||
register int slot;
|
||||
register struct segu_segdata *sdp;
|
||||
register struct segu_data *sup;
|
||||
addr_t vlast;
|
||||
addr_t vmappedbase;
|
||||
|
||||
sdp = (struct segu_segdata *)seg->s_data;
|
||||
|
||||
/*
|
||||
* Make sure the base is in range of the segment as a whole.
|
||||
*/
|
||||
if (vaddr < seg->s_base || vaddr >= seg->s_base + seg->s_size)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* Figure out what slot the address lies in.
|
||||
*/
|
||||
slot = (vaddr - seg->s_base) / ptob(SEGU_PAGES);
|
||||
sup = &sdp->usd_slots[slot];
|
||||
|
||||
/*
|
||||
* Make sure the end of the range falls in the same slot.
|
||||
*/
|
||||
vlast = vaddr + len - 1;
|
||||
if ((vlast - seg->s_base) / ptob(SEGU_PAGES) != slot)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* Nobody has any business touching this slot if it's not currently
|
||||
* allocated.
|
||||
*/
|
||||
if (!(sup->su_flags & SEGU_ALLOCATED))
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* Finally, verify that the range is completely in the mapped part
|
||||
* of the slot.
|
||||
*/
|
||||
vmappedbase = segu_stom(seg->s_base + ptob(SEGU_PAGES) * slot);
|
||||
if (vaddr < vmappedbase || vlast >= vmappedbase + ptob(SEGU_PAGES - 1))
|
||||
return (-1);
|
||||
|
||||
return (slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock intra-slot resources in the range given by vaddr and len.
|
||||
* Assumes that the range is known to fall entirely within the mapped
|
||||
* part of the slot given as argument and that the slot itself is
|
||||
* allocated.
|
||||
*/
|
||||
static int
|
||||
segu_softunlock(seg, vaddr, len, slot, rw)
|
||||
struct seg *seg;
|
||||
addr_t vaddr;
|
||||
u_int len;
|
||||
int slot;
|
||||
enum seg_rw rw;
|
||||
{
|
||||
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
|
||||
register struct segu_data
|
||||
*sup = &sdp->usd_slots[slot];
|
||||
register addr_t va;
|
||||
addr_t vlim;
|
||||
register u_int i;
|
||||
|
||||
/*
|
||||
* Loop through the pages in the given range.
|
||||
*/
|
||||
va = (addr_t)((u_int)vaddr & PAGEMASK);
|
||||
len = roundup(len, ptob(1));
|
||||
vlim = va + len;
|
||||
/* Calculate starting page index within slot. */
|
||||
i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
|
||||
for ( ; va < vlim; va += ptob(1), i++) {
|
||||
register struct page *pp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
/*
|
||||
* Unlock our MMU translation for this page.
|
||||
*
|
||||
* XXX: Is there any problem with attempting to unlock
|
||||
* a translation that isn't locked?
|
||||
*/
|
||||
hat_unlock(seg, va);
|
||||
|
||||
/*
|
||||
* Unload it.
|
||||
*/
|
||||
hat_unload(seg, va, ptob(1));
|
||||
|
||||
/*
|
||||
* Find the page associated with this part of the
|
||||
* slot, tracking it down through its associated swap
|
||||
* space.
|
||||
*/
|
||||
swap_xlate(sup->su_swaddr[i], &vp, &off);
|
||||
if ((pp = page_find(vp, off)) == NULL)
|
||||
panic("segu_softunlock: missing page");
|
||||
|
||||
/*
|
||||
* Release our long-term claim on the page.
|
||||
*/
|
||||
page_pp_unlock(pp, 0);
|
||||
|
||||
/*
|
||||
* If we're "hard" swapping (i.e. we need pages) and
|
||||
* nobody's using the page any more and it's dirty,
|
||||
* unlocked, and not kept, push it asynchronously rather
|
||||
* than waiting for the pageout daemon to find it.
|
||||
*/
|
||||
hat_pagesync(pp);
|
||||
if (rw == S_WRITE && pp->p_mapping == NULL &&
|
||||
pp->p_keepcnt == 0 && !pp->p_lock && pp->p_mod) {
|
||||
/*
|
||||
* XXX: Want most powerful credentials we can
|
||||
* get. Punt for now.
|
||||
*/
|
||||
(void) VOP_PUTPAGE(vp, off, ptob(1), B_ASYNC | B_FREE,
|
||||
(struct ucred *)NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Load and possibly lock intra-slot resources in the range given
|
||||
* by vaddr and len. Assumes that the range is known to fall entirely
|
||||
* within the mapped part of the slot given as argument and that the
|
||||
* slot itself is allocated.
|
||||
*/
|
||||
static int
|
||||
segu_softload(seg, vaddr, len, slot, lock)
|
||||
struct seg *seg;
|
||||
addr_t vaddr;
|
||||
u_int len;
|
||||
int slot;
|
||||
int lock;
|
||||
{
|
||||
struct segu_segdata *sdp = (struct segu_segdata *)segu->s_data;
|
||||
register struct segu_data
|
||||
*sup = &sdp->usd_slots[slot];
|
||||
register addr_t va;
|
||||
addr_t vlim;
|
||||
register u_int i;
|
||||
|
||||
/*
|
||||
* Loop through the pages in the given range.
|
||||
*/
|
||||
va = (addr_t)((u_int)vaddr & PAGEMASK);
|
||||
vaddr = va;
|
||||
len = roundup(len, ptob(1));
|
||||
vlim = va + len;
|
||||
/* Calculate starting page index within slot. */
|
||||
i = (va - (seg->s_base + slot * ptob(SEGU_PAGES))) / ptob(1);
|
||||
for ( ; va < vlim; va += ptob(1), i++) {
|
||||
struct page *pl[2];
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
register int err;
|
||||
|
||||
/*
|
||||
* Summon the page. If it's not resident, arrange
|
||||
* for synchronous i/o to pull it in.
|
||||
*
|
||||
* XXX: Need read credentials value; for now we punt.
|
||||
*/
|
||||
swap_xlate(sup->su_swaddr[i], &vp, &off);
|
||||
err = VOP_GETPAGE(vp, off, ptob(1), (u_int *)NULL,
|
||||
pl, ptob(1), seg, va, S_READ, (struct ucred *)NULL);
|
||||
if (err) {
|
||||
/*
|
||||
* Back out of what we've done so far.
|
||||
*/
|
||||
(void) segu_softunlock(seg, vaddr, (u_int)(va - vaddr),
|
||||
slot, S_OTHER);
|
||||
return (err);
|
||||
}
|
||||
cnt.v_swpin++;
|
||||
/*
|
||||
* The returned page list will have exactly one entry,
|
||||
* which is returned to us already kept.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Load an MMU translation for the page.
|
||||
*/
|
||||
hat_memload(seg, va, pl[0], UPAGE_PROT, lock);
|
||||
|
||||
/*
|
||||
* If we're locking down resources, we need to increment
|
||||
* the page's long term keep count. In any event, we
|
||||
* need to decrement the (short term) keep count.
|
||||
*
|
||||
* XXX: When page_pp_lock returns a success/failure
|
||||
* indication, we'll probably want to panic if
|
||||
* it fails.
|
||||
*/
|
||||
if (lock)
|
||||
(void) page_pp_lock(pl[0], 0, 1);
|
||||
PAGE_RELE(pl[0]);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
130
sys/vm/seg_u.h
Normal file
130
sys/vm/seg_u.h
Normal file
@@ -0,0 +1,130 @@
|
||||
/* @(#)seg_u.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - U-area segment management
|
||||
*
|
||||
* This file contains definitions related to the u-area segment type.
|
||||
*
|
||||
* In its most general form, this segment type provides an interface
|
||||
* for managing stacks that are protected by red zones, with the size
|
||||
* of each stack independently specifiable. The current implementation
|
||||
* is restricted in the following way.
|
||||
* 1) It assumes that all stacks are the same size. In particular,
|
||||
* it assumes that the stacks it manages are actually traditional
|
||||
* u-areas, each containing a stack at one end.
|
||||
*
|
||||
* The segment driver manages a contiguous chunk of virtual space,
|
||||
* carving it up into individual stack instances as required, and
|
||||
* associating physical storage, MMU mappings, and swap space with
|
||||
* each individual stack instance.
|
||||
*
|
||||
* As a matter of nomenclature, the individual allocation units are
|
||||
* referred to as "slots".
|
||||
*/
|
||||
|
||||
#ifndef _vm_seg_u_h
|
||||
#define _vm_seg_u_h
|
||||
|
||||
/*
|
||||
* The number of pages covered by a single seg_u slot.
|
||||
*
|
||||
* This value is the number of (software) pages in the u-area
|
||||
* (including the stack in the u-area) plus an additional page
|
||||
* for a stack red zone. If the seg_u implementation is ever
|
||||
* generalized to allow variable-size stack allocation, this
|
||||
* define will have to change.
|
||||
*/
|
||||
#define SEGU_PAGES (UPAGES/CLSIZE + 1)
|
||||
|
||||
/*
|
||||
* XXX: This define belongs elsewhere, probably in <machine/param.h>.
|
||||
*/
|
||||
#define STACK_GROWTH_DOWN
|
||||
|
||||
|
||||
/*
|
||||
* Index of the red zone page and macros for interconverting between
|
||||
* the base address of a slot and the base address of its accessible
|
||||
* portion. (Nomenclature: Slot TO Mapped and vice versa.)
|
||||
*/
|
||||
#ifdef STACK_GROWTH_DOWN
|
||||
|
||||
#define SEGU_REDZONE 0
|
||||
#define segu_stom(v) ((v) + ptob(1))
|
||||
#define segu_mtos(v) ((v) - ptob(1))
|
||||
|
||||
#else STACK_GROWTH_DOWN
|
||||
|
||||
#define SEGU_REDZONE (SEGU_PAGES - 1)
|
||||
#define segu_stom(v) (v)
|
||||
#define segu_mtos(v) (v)
|
||||
|
||||
#endif STACK_GROWTH_DOWN
|
||||
|
||||
|
||||
/*
|
||||
* Private information per overall segu segment (as opposed
|
||||
* to per slot within segment)
|
||||
*
|
||||
* XXX: We may wish to modify the free list to handle it as a queue
|
||||
* instead of a stack; this possibly could reduce the frequency
|
||||
* of cache flushes. If so, we would need a list tail pointer
|
||||
* as well as a list head pointer.
|
||||
*/
|
||||
struct segu_segdata {
|
||||
/*
|
||||
* info needed:
|
||||
* - slot vacancy info
|
||||
* - a way of getting to state info for each slot
|
||||
*/
|
||||
struct segu_data *usd_slots; /* array of segu_data structs, */
|
||||
/* one per slot */
|
||||
struct segu_data *usd_free; /* slot free list head */
|
||||
};
|
||||
|
||||
/*
|
||||
* Private per-slot information.
|
||||
*/
|
||||
struct segu_data {
|
||||
struct segu_data *su_next; /* free list link */
|
||||
struct anon *su_swaddr[SEGU_PAGES]; /* disk address of u area */
|
||||
/* when swapped */
|
||||
u_int su_flags; /* state info: see below */
|
||||
};
|
||||
|
||||
/*
|
||||
* Flag bits
|
||||
*
|
||||
* When the SEGU_LOCKED bit is set, all the resources associated with the
|
||||
* corresponding slot are locked in place, so that referencing addresses
|
||||
* in the slot's range will not cause a fault. Clients using this driver
|
||||
* to manage a u-area lock down the slot when the corresponding process
|
||||
* becomes runnable and unlock it when the process is swapped out.
|
||||
*/
|
||||
#define SEGU_ALLOCATED 0x01 /* slot is in use */
|
||||
#define SEGU_LOCKED 0x02 /* slot's resources locked */
|
||||
#define SEGU_HASANON 0x04 /* slot has anon resources */
|
||||
|
||||
|
||||
#ifdef KERNEL
|
||||
extern struct seg *segu;
|
||||
|
||||
/*
|
||||
* Public routine declarations not part of the segment ops vector go here.
|
||||
*/
|
||||
int segu_create(/* seg, argsp */);
|
||||
addr_t segu_get();
|
||||
void segu_release(/* vaddr */);
|
||||
|
||||
/*
|
||||
* We allow explicit calls to segu_fault, even though it's part
|
||||
* of the segu ops vector.
|
||||
*/
|
||||
faultcode_t segu_fault(/* seg, vaddr, len, type, rw */);
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_seg_u_h*/
|
||||
2460
sys/vm/seg_vn.c
Normal file
2460
sys/vm/seg_vn.c
Normal file
File diff suppressed because it is too large
Load Diff
108
sys/vm/seg_vn.h
Normal file
108
sys/vm/seg_vn.h
Normal file
@@ -0,0 +1,108 @@
|
||||
/* @(#)seg_vn.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_seg_vn_h
|
||||
#define _vm_seg_vn_h
|
||||
|
||||
#include <vm/mp.h>
|
||||
|
||||
/*
|
||||
* Structure who's pointer is passed to the segvn_create routine
|
||||
*/
|
||||
struct segvn_crargs {
|
||||
struct vnode *vp; /* vnode mapped from */
|
||||
u_int offset; /* starting offset of vnode for mapping */
|
||||
struct ucred *cred; /* creditials */
|
||||
u_char type; /* type of sharing done */
|
||||
u_char prot; /* protections */
|
||||
u_char maxprot; /* maximum protections */
|
||||
struct anon_map *amp; /* anon mapping to map to */
|
||||
};
|
||||
|
||||
/*
|
||||
* The anon_map structure is used by the seg_vn driver to manage
|
||||
* unnamed (anonymous) memory. When anonymous memory is shared,
|
||||
* then the different segvn_data structures will point to the
|
||||
* same anon_map structure. Also, if a segment is unmapped
|
||||
* in the middle where an anon_map structure exists, the
|
||||
* newly created segment will also share the anon_map structure,
|
||||
* although the two segments will use different ranges of the
|
||||
* anon array. When mappings are private (or shared with
|
||||
* a reference count of 1), an unmap operation will free up
|
||||
* a range of anon slots in the array given by the anon_map
|
||||
* structure. Because of fragmentation due to this unmapping,
|
||||
* we have to store the size of anon array in the anon_map
|
||||
* structure so that we can free everything when the referernce
|
||||
* count goes to zero.
|
||||
*/
|
||||
struct anon_map {
|
||||
u_int refcnt; /* reference count on this structure */
|
||||
u_int size; /* size in bytes mapped by the anon array */
|
||||
struct anon **anon; /* pointer to an array of anon * pointers */
|
||||
u_int swresv; /* swap space reserved for this anon_map */
|
||||
u_int flags; /* anon_map flags (see below) */
|
||||
};
|
||||
|
||||
/* anon_map flags */
|
||||
#define AMAP_LOCKED 0x01 /* anon_map is locked */
|
||||
#define AMAP_WANT 0x02 /* some process waiting on lock */
|
||||
|
||||
/*
|
||||
* Lock and unlock anon_map if the segment has private pages. This
|
||||
* is necessary to ensure that operations on the anon array (e.g., growing
|
||||
* the array, or allocating an anon slot and assigning a page) are atomic.
|
||||
*/
|
||||
#define AMAP_LOCK(amp) { \
|
||||
while ((amp)->flags & AMAP_LOCKED) { \
|
||||
(amp)->flags |= AMAP_WANT; \
|
||||
(void) sleep((caddr_t)(amp), PAMAP); \
|
||||
} \
|
||||
(amp)->flags |= AMAP_LOCKED; \
|
||||
masterprocp->p_swlocks++; \
|
||||
}
|
||||
|
||||
#define AMAP_UNLOCK(amp) { \
|
||||
(amp)->flags &= ~AMAP_LOCKED; \
|
||||
masterprocp->p_swlocks--; \
|
||||
if ((amp)->flags & AMAP_WANT) { \
|
||||
(amp)->flags &= ~AMAP_WANT; \
|
||||
wakeup((caddr_t)(amp)); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
* (Semi) private data maintained by the seg_vn driver per segment mapping
|
||||
*/
|
||||
struct segvn_data {
|
||||
kmon_t lock;
|
||||
u_char pageprot; /* true if per page protections present */
|
||||
u_char prot; /* current segment prot if pageprot == 0 */
|
||||
u_char maxprot; /* maximum segment protections */
|
||||
u_char type; /* type of sharing done */
|
||||
struct vnode *vp; /* vnode that segment mapping is to */
|
||||
u_int offset; /* starting offset of vnode for mapping */
|
||||
u_int anon_index; /* starting index into anon_map anon array */
|
||||
struct anon_map *amp; /* pointer to anon share structure, if needed */
|
||||
struct vpage *vpage; /* per-page information, if needed */
|
||||
struct ucred *cred; /* mapping creditials */
|
||||
u_int swresv; /* swap space reserved for this segment */
|
||||
u_char advice; /* madvise flags for segment */
|
||||
u_char pageadvice; /* true if per page advice set */
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
int segvn_create(/* seg, argsp */);
|
||||
|
||||
extern struct seg_ops segvn_ops;
|
||||
|
||||
/*
|
||||
* Provided as short hand for creating user zfod segments
|
||||
*/
|
||||
extern caddr_t zfod_argsp;
|
||||
extern caddr_t kzfod_argsp;
|
||||
#endif KERNEL
|
||||
|
||||
#endif /*!_vm_seg_vn_h*/
|
||||
35
sys/vm/swap.h
Normal file
35
sys/vm/swap.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/* @(#)swap.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_swap_h
|
||||
#define _vm_swap_h
|
||||
|
||||
/*
|
||||
* VM - virtual swap device.
|
||||
*/
|
||||
|
||||
struct swapinfo {
|
||||
struct vnode *si_vp; /* vnode for this swap device */
|
||||
u_int si_size; /* size (bytes) of this swap device */
|
||||
struct anon *si_anon; /* pointer to anon array */
|
||||
struct anon *si_eanon; /* pointer to end of anon array */
|
||||
struct anon *si_free; /* anon free list for this vp */
|
||||
int si_allocs; /* # of conseq. allocs from this area */
|
||||
struct swapinfo *si_next; /* next swap area */
|
||||
short *si_pid; /* parallel pid array for memory tool */
|
||||
};
|
||||
|
||||
#define IS_SWAPVP(vp) (((vp)->v_flag & VISSWAP) != 0)
|
||||
|
||||
#ifdef KERNEL
|
||||
int swap_init(/* vp */);
|
||||
struct anon *swap_alloc();
|
||||
void swap_free(/* ap */);
|
||||
void swap_xlate(/* ap, vpp, offsetp */);
|
||||
struct anon *swap_anon(/* vp, offset */);
|
||||
#endif
|
||||
|
||||
#endif /*!_vm_swap_h*/
|
||||
509
sys/vm/vm_anon.c
Normal file
509
sys/vm/vm_anon.c
Normal file
@@ -0,0 +1,509 @@
|
||||
/* @(#)vm_anon.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - anonymous pages.
|
||||
*
|
||||
* This layer sits immediately above the vm_swap layer. It manages
|
||||
* physical pages that have no permanent identity in the file system
|
||||
* name space, using the services of the vm_swap layer to allocate
|
||||
* backing storage for these pages. Since these pages have no external
|
||||
* identity, they are discarded when the last reference is removed.
|
||||
*
|
||||
* An important function of this layer is to manage low-level sharing
|
||||
* of pages that are logically distinct but that happen to be
|
||||
* physically identical (e.g., the corresponding pages of the processes
|
||||
* resulting from a fork before one process or the other changes their
|
||||
* contents). This pseudo-sharing is present only as an optimization
|
||||
* and is not to be confused with true sharing in which multiple
|
||||
* address spaces deliberately contain references to the same object;
|
||||
* such sharing is managed at a higher level.
|
||||
*
|
||||
* The key data structure here is the anon struct, which contains a
|
||||
* reference count for its associated physical page and a hint about
|
||||
* the identity of that page. Anon structs typically live in arrays,
|
||||
* with an instance's position in its array determining where the
|
||||
* corresponding backing storage is allocated; however, the swap_xlate()
|
||||
* routine abstracts away this representation information so that the
|
||||
* rest of the anon layer need not know it. (See the swap layer for
|
||||
* more details on anon struct layout.)
|
||||
*
|
||||
* In the future versions of the system, the association between an
|
||||
* anon struct and its position on backing store will change so that
|
||||
* we don't require backing store all anonymous pages in the system.
|
||||
* This is important for consideration for large memory systems.
|
||||
* We can also use this technique to delay binding physical locations
|
||||
* to anonymous pages until pageout/swapout time where we can make
|
||||
* smarter allocation decisions to improve anonymous klustering.
|
||||
*
|
||||
* Many of the routines defined here take a (struct anon **) argument,
|
||||
* which allows the code at this level to manage anon pages directly,
|
||||
* so that callers can regard anon structs as opaque objects and not be
|
||||
* concerned with assigning or inspecting their contents.
|
||||
*
|
||||
* Clients of this layer refer to anon pages indirectly. That is, they
|
||||
* maintain arrays of pointers to anon structs rather than maintaining
|
||||
* anon structs themselves. The (struct anon **) arguments mentioned
|
||||
* above are pointers to entries in these arrays. It is these arrays
|
||||
* that capture the mapping between offsets within a given segment and
|
||||
* the corresponding anonymous backing storage address.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/user.h> /* XXX - for rusage */
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/ucred.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/trace.h>
|
||||
#include <sys/debug.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/anon.h>
|
||||
#include <vm/swap.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/pvn.h>
|
||||
#include <vm/rm.h>
|
||||
#include <vm/mp.h>
|
||||
|
||||
struct anoninfo anoninfo;
|
||||
#ifdef KMON_DEBUG
|
||||
kmon_t anon_lock;
|
||||
#endif /* KMON_DEBUG */
|
||||
|
||||
int anon_resv_debug = 0;
|
||||
int anon_enforce_resv = 1;
|
||||
|
||||
/*
|
||||
* Reserve anon space.
|
||||
* Return non-zero on success.
|
||||
*/
|
||||
int
|
||||
anon_resv(size)
|
||||
u_int size;
|
||||
{
|
||||
|
||||
anoninfo.ani_resv += btopr(size);
|
||||
if (anoninfo.ani_resv > anoninfo.ani_max) {
|
||||
if (anon_enforce_resv)
|
||||
anoninfo.ani_resv -= btopr(size);
|
||||
else if (anon_resv_debug)
|
||||
printf("anon: swap space overcommitted by %d\n",
|
||||
anoninfo.ani_resv - anoninfo.ani_max);
|
||||
return (!anon_enforce_resv);
|
||||
} else {
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Give back an anon reservation.
|
||||
*/
|
||||
void
|
||||
anon_unresv(size)
|
||||
u_int size;
|
||||
{
|
||||
|
||||
anoninfo.ani_resv -= btopr(size);
|
||||
if ((int)anoninfo.ani_resv < 0)
|
||||
printf("anon: reservations below zero???\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an anon slot.
|
||||
*/
|
||||
struct anon *
|
||||
anon_alloc()
|
||||
{
|
||||
register struct anon *ap;
|
||||
|
||||
kmon_enter(&anon_lock);
|
||||
ap = swap_alloc();
|
||||
if (ap != NULL) {
|
||||
anoninfo.ani_free--;
|
||||
ap->an_refcnt = 1;
|
||||
ap->un.an_page = NULL;
|
||||
}
|
||||
kmon_exit(&anon_lock);
|
||||
return (ap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the reference count of an anon page.
|
||||
* If reference count goes to zero, free it and
|
||||
* its associated page (if any).
|
||||
*/
|
||||
static void
|
||||
anon_decref(ap)
|
||||
register struct anon *ap;
|
||||
{
|
||||
register struct page *pp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
if (--ap->an_refcnt == 0) {
|
||||
/*
|
||||
* If there is a page for this anon slot we will need to
|
||||
* call page_abort to get rid of the vp association and
|
||||
* put the page back on the free list as really free.
|
||||
*/
|
||||
swap_xlate(ap, &vp, &off);
|
||||
pp = page_find(vp, off);
|
||||
/*
|
||||
* XXX - If we have a page, wait for its keepcnt to become
|
||||
* zero, re-verify the identity before aborting it and
|
||||
* freeing the swap slot. This ensures that any pending i/o
|
||||
* always completes before the swap slot is freed.
|
||||
*/
|
||||
if (pp != NULL) {
|
||||
if (pp->p_keepcnt != 0) {
|
||||
page_wait(pp);
|
||||
if (pp->p_vnode == vp && pp->p_offset == off)
|
||||
page_abort(pp);
|
||||
} else {
|
||||
page_abort(pp);
|
||||
}
|
||||
}
|
||||
kmon_enter(&anon_lock);
|
||||
swap_free(ap);
|
||||
anoninfo.ani_free++;
|
||||
kmon_exit(&anon_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Duplicate references to size bytes worth of anon pages.
|
||||
* Used when duplicating a segment that contains private anon pages.
|
||||
* This code assumes that procedure calling this one has already used
|
||||
* hat_chgprot() to disable write access to the range of addresses that
|
||||
* that *old actually refers to.
|
||||
*/
|
||||
void
|
||||
anon_dup(old, new, size)
|
||||
register struct anon **old, **new;
|
||||
u_int size;
|
||||
{
|
||||
register int i;
|
||||
|
||||
i = btopr(size);
|
||||
while (i-- > 0) {
|
||||
if ((*new = *old) != NULL)
|
||||
(*new)->an_refcnt++;
|
||||
old++;
|
||||
new++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a group of "size" anon pages, size in bytes,
|
||||
* and clear out the pointers to the anon entries.
|
||||
*/
|
||||
void
|
||||
anon_free(app, size)
|
||||
register struct anon **app;
|
||||
u_int size;
|
||||
{
|
||||
register int i;
|
||||
|
||||
i = btopr(size);
|
||||
while (i-- > 0) {
|
||||
if (*app != NULL) {
|
||||
anon_decref(*app);
|
||||
*app = NULL;
|
||||
}
|
||||
app++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the kept page(s) and protections back to the segment driver.
|
||||
*/
|
||||
int
|
||||
anon_getpage(app, protp, pl, plsz, seg, addr, rw, cred)
|
||||
struct anon **app;
|
||||
u_int *protp;
|
||||
struct page *pl[];
|
||||
u_int plsz;
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
enum seg_rw rw;
|
||||
struct ucred *cred;
|
||||
{
|
||||
register struct page *pp, **ppp;
|
||||
register struct anon *ap = *app;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
int err;
|
||||
extern int nopagereclaim;
|
||||
register int s;
|
||||
|
||||
swap_xlate(ap, &vp, &off);
|
||||
again:
|
||||
pp = ap->un.an_page;
|
||||
/*
|
||||
* If the anon pointer has a page associated with it,
|
||||
* see if it looks ok after raising priority to prevent
|
||||
* it from being ripped away at interrupt level if on the
|
||||
* free list. If the page is being paged in, wait for it
|
||||
* to finish as we must return a list of pages since this
|
||||
* routine acts like the VOP_GETPAGE routine does.
|
||||
*/
|
||||
s = splvm();
|
||||
if (pp != NULL && pp->p_vnode == vp && pp->p_offset == off &&
|
||||
!pp->p_gone && pl != NULL) {
|
||||
if (pp->p_intrans && (pp->p_pagein || nopagereclaim)) {
|
||||
(void) splx(s);
|
||||
page_wait(pp);
|
||||
goto again; /* try again */
|
||||
}
|
||||
if (pp->p_free)
|
||||
page_reclaim(pp);
|
||||
(void) splx(s);
|
||||
PAGE_HOLD(pp);
|
||||
if (ap->an_refcnt == 1)
|
||||
*protp = PROT_ALL;
|
||||
else
|
||||
*protp = PROT_ALL & ~PROT_WRITE;
|
||||
pl[0] = pp;
|
||||
pl[1] = NULL;
|
||||
/* no one else accounted for it so we must */
|
||||
u.u_ru.ru_minflt++;
|
||||
return (0);
|
||||
}
|
||||
(void) splx(s);
|
||||
|
||||
/*
|
||||
* Simply treat it as a vnode fault on the anon vp.
|
||||
*/
|
||||
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_ANON);
|
||||
err = VOP_GETPAGE(vp, off, PAGESIZE, protp, pl, plsz,
|
||||
seg, addr, rw, cred);
|
||||
if (err == 0 && pl != NULL) {
|
||||
for (ppp = pl; (pp = *ppp++) != NULL; ) {
|
||||
if (pp->p_offset == off) {
|
||||
ap->un.an_page = pp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ap->an_refcnt != 1)
|
||||
*protp &= ~PROT_WRITE; /* make read-only */
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int npagesteal;
|
||||
|
||||
/*
|
||||
* Turn a reference to an object or shared anon page
|
||||
* into a private page with a copy of the data from the
|
||||
* original page. The original page is always kept, locked
|
||||
* and loaded in the MMU by the caller. This routine unlocks
|
||||
* the translation and releases the original page, if it isn't
|
||||
* being stolen, before returning to the caller.
|
||||
*/
|
||||
struct page *
|
||||
anon_private(app, seg, addr, opp, oppflags)
|
||||
struct anon **app;
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
struct page *opp;
|
||||
u_int oppflags;
|
||||
{
|
||||
register struct anon *old = *app;
|
||||
register struct anon *new;
|
||||
register struct page *pp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
ASSERT(opp->p_mapping);
|
||||
ASSERT(opp->p_keepcnt);
|
||||
|
||||
new = anon_alloc();
|
||||
if (new == (struct anon *)NULL) {
|
||||
rm_outofanon();
|
||||
hat_unlock(seg, addr);
|
||||
PAGE_RELE(opp);
|
||||
return ((struct page *)NULL); /* out of swap space */
|
||||
}
|
||||
*app = new;
|
||||
|
||||
swap_xlate(new, &vp, &off);
|
||||
again:
|
||||
pp = page_lookup(vp, off);
|
||||
|
||||
if (pp == NULL && (oppflags & STEAL_PAGE) &&
|
||||
opp->p_keepcnt == 1 && opp->p_mod == 0) {
|
||||
pp = opp;
|
||||
hat_unlock(seg, addr); /* unlock translation */
|
||||
hat_pageunload(pp); /* unload all translations */
|
||||
page_hashout(pp); /* destroy old name for page */
|
||||
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
|
||||
if (page_enter(pp, vp, off)) /* rename as anon page */
|
||||
panic("anon private steal");
|
||||
new->un.an_page = pp;
|
||||
pg_setmod(pp, 1);
|
||||
page_unlock(pp);
|
||||
/*
|
||||
* If original page is ``locked'', relinquish
|
||||
* claim for the extra page.
|
||||
*/
|
||||
if (oppflags & LOCK_PAGE)
|
||||
page_subclaim(1);
|
||||
npagesteal++;
|
||||
return (pp);
|
||||
}
|
||||
|
||||
if (pp == NULL) {
|
||||
/*
|
||||
* Normal case, need to allocate new page frame.
|
||||
*/
|
||||
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
|
||||
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
|
||||
if (page_enter(pp, vp, off)) {
|
||||
PAGE_RELE(pp);
|
||||
goto again; /* try again */
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Already found a page with the right identity -- just
|
||||
* use it if the `keepcnt' is 0. If not, wait for the
|
||||
* `keepcnt' to become 0, re-verify the identity before
|
||||
* using the page.
|
||||
*/
|
||||
if (pp->p_keepcnt != 0) {
|
||||
page_wait(pp);
|
||||
if (pp->p_vnode != vp || pp->p_offset != off)
|
||||
goto again;
|
||||
}
|
||||
page_lock(pp);
|
||||
PAGE_HOLD(pp);
|
||||
}
|
||||
new->un.an_page = pp;
|
||||
|
||||
/*
|
||||
* Now copy the contents from the original page which
|
||||
* is loaded and locked in the MMU by the caller to
|
||||
* prevent yet another page fault.
|
||||
*/
|
||||
pp->p_intrans = pp->p_pagein = 1;
|
||||
pagecopy(addr, pp);
|
||||
pp->p_intrans = pp->p_pagein = 0;
|
||||
pg_setmod(pp, 1); /* mark as modified */
|
||||
page_unlock(pp);
|
||||
|
||||
/*
|
||||
* If original page is ``locked'', relinquish claim
|
||||
* for an extra page reserved for the private copy
|
||||
* in case of a copy-on-write. Lock the new page
|
||||
* ignoring the current reservation check.
|
||||
*/
|
||||
if (oppflags & LOCK_PAGE) {
|
||||
if (old == NULL)
|
||||
page_pp_unlock(opp, 1);
|
||||
else
|
||||
page_pp_unlock(opp, 0);
|
||||
(void) page_pp_lock(pp, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock translation to the original page since
|
||||
* it can be unloaded if the page is aborted.
|
||||
*/
|
||||
hat_unlock(seg, addr);
|
||||
|
||||
/*
|
||||
* Ok, now release the original page, or else the
|
||||
* process will sleep forever in anon_decref()
|
||||
* waiting for the `keepcnt' to become 0.
|
||||
*/
|
||||
PAGE_RELE(opp);
|
||||
|
||||
/*
|
||||
* If we copied away from an anonymous page, then
|
||||
* we are one step closer to freeing up an anon slot.
|
||||
*/
|
||||
if (old != NULL)
|
||||
anon_decref(old);
|
||||
return (pp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a zero-filled anon page.
|
||||
*/
|
||||
struct page *
|
||||
anon_zero(seg, addr, app)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
struct anon **app;
|
||||
{
|
||||
register struct anon *ap;
|
||||
register struct page *pp;
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
*app = ap = anon_alloc();
|
||||
if (ap == NULL) {
|
||||
rm_outofanon();
|
||||
return ((struct page *)NULL);
|
||||
}
|
||||
|
||||
swap_xlate(ap, &vp, &off);
|
||||
again:
|
||||
pp = page_lookup(vp, off);
|
||||
|
||||
if (pp == NULL) {
|
||||
/*
|
||||
* Normal case, need to allocate new page frame.
|
||||
*/
|
||||
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
|
||||
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
|
||||
if (page_enter(pp, vp, off)) {
|
||||
PAGE_RELE(pp);
|
||||
goto again; /* try again */
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Already found a page with the right identity -- just
|
||||
* use it if the `keepcnt' is 0. If not, wait for the
|
||||
* `keepcnt' to become 0, re-verify the identity before
|
||||
* using the page.
|
||||
*/
|
||||
if (pp->p_keepcnt != 0) {
|
||||
page_wait(pp);
|
||||
if (pp->p_vnode != vp || pp->p_offset != off)
|
||||
goto again;
|
||||
}
|
||||
page_lock(pp);
|
||||
PAGE_HOLD(pp);
|
||||
}
|
||||
ap->un.an_page = pp;
|
||||
|
||||
pagezero(pp, 0, PAGESIZE);
|
||||
cnt.v_zfod++;
|
||||
pg_setmod(pp, 1); /* mark as modified so pageout writes back */
|
||||
page_unlock(pp);
|
||||
return (pp);
|
||||
}
|
||||
|
||||
/*
|
||||
* This gets calls by the seg_vn driver unload routine
|
||||
* which is called by the hat code when it decides to
|
||||
* unload a particular mapping.
|
||||
*/
|
||||
void
|
||||
anon_unloadmap(ap, ref, mod)
|
||||
struct anon *ap;
|
||||
u_int ref, mod;
|
||||
{
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
swap_xlate(ap, &vp, &off);
|
||||
pvn_unloadmap(vp, off, ref, mod);
|
||||
}
|
||||
898
sys/vm/vm_as.c
Normal file
898
sys/vm/vm_as.c
Normal file
@@ -0,0 +1,898 @@
|
||||
/* @(#)vm_as.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - address spaces.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <machine/mmu.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/seg_vn.h>
|
||||
|
||||
/*
|
||||
* Variables for maintaining the free list of address space structures.
|
||||
*/
|
||||
static struct as *as_freelist;
|
||||
static int as_freeincr = 8;
|
||||
|
||||
/*
|
||||
* Find a segment containing addr. as->a_seglast is used as a
|
||||
* cache to remember the last segment hit we had here. We
|
||||
* first check to see if seglast is another hit, and if not we
|
||||
* determine whether to start from the head of the segment list
|
||||
* (as->a_segs) or from seglast and in which direction to search.
|
||||
*/
|
||||
struct seg *
|
||||
as_segat(as, addr)
|
||||
register struct as *as;
|
||||
register addr_t addr;
|
||||
{
|
||||
register struct seg *seg, *sseg;
|
||||
register forward;
|
||||
|
||||
if (as->a_segs == NULL) /* address space has no segments */
|
||||
return (NULL);
|
||||
if (as->a_seglast == NULL)
|
||||
as->a_seglast = as->a_segs;
|
||||
seg = as->a_seglast;
|
||||
forward = 0;
|
||||
if (seg->s_base <= addr) {
|
||||
if (addr < (seg->s_base + seg->s_size))
|
||||
return (seg); /* seglast contained addr */
|
||||
sseg = as->a_segs->s_prev;
|
||||
if ((addr - seg->s_base) >
|
||||
((sseg->s_base + sseg->s_size) - addr)) {
|
||||
seg = sseg;
|
||||
sseg = as->a_seglast;
|
||||
} else {
|
||||
seg = as->a_seglast->s_next;
|
||||
sseg = as->a_segs;
|
||||
forward++;
|
||||
}
|
||||
} else {
|
||||
if ((addr - as->a_segs->s_base) > (seg->s_base - addr)) {
|
||||
seg = seg->s_prev;
|
||||
sseg = as->a_segs->s_prev;
|
||||
} else {
|
||||
sseg = seg;
|
||||
seg = as->a_segs;
|
||||
forward++;
|
||||
}
|
||||
}
|
||||
do {
|
||||
if (seg->s_base <= addr &&
|
||||
addr < (seg->s_base + seg->s_size)) {
|
||||
as->a_seglast = seg;
|
||||
return (seg);
|
||||
}
|
||||
if (forward) {
|
||||
seg = seg->s_next;
|
||||
if (seg->s_base > addr)
|
||||
break;
|
||||
} else {
|
||||
seg = seg->s_prev;
|
||||
if (addr > (seg->s_base + seg->s_size))
|
||||
break;
|
||||
}
|
||||
} while (seg != sseg);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialize an address space data structure.
|
||||
* We call hat_alloc to allow any machine dependent
|
||||
* information in the hat structure to be initialized.
|
||||
*/
|
||||
struct as *
|
||||
as_alloc()
|
||||
{
|
||||
struct as *as;
|
||||
|
||||
as = (struct as *)new_kmem_fast_alloc((caddr_t *)&as_freelist,
|
||||
sizeof (*as_freelist), as_freeincr, KMEM_SLEEP);
|
||||
bzero((caddr_t)as, sizeof (*as));
|
||||
hat_alloc(as);
|
||||
return (as);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an address space data structure.
|
||||
* Need to free the hat first and then
|
||||
* all the segments on this as and finally
|
||||
* the space for the as struct itself.
|
||||
*/
|
||||
void
|
||||
as_free(as)
|
||||
struct as *as;
|
||||
{
|
||||
hat_free(as);
|
||||
while (as->a_segs != NULL)
|
||||
seg_free(as->a_segs);
|
||||
kmem_fast_free((caddr_t *)&as_freelist, (caddr_t)as);
|
||||
}
|
||||
|
||||
struct as *
|
||||
as_dup(as)
|
||||
register struct as *as;
|
||||
{
|
||||
register struct as *newas;
|
||||
register struct seg *seg, *sseg, *newseg;
|
||||
|
||||
newas = as_alloc();
|
||||
sseg = seg = as->a_segs;
|
||||
if (seg != NULL) {
|
||||
do {
|
||||
newseg = seg_alloc(newas, seg->s_base, seg->s_size);
|
||||
if (newseg == NULL) {
|
||||
as_free(newas);
|
||||
return (NULL);
|
||||
}
|
||||
if ((*seg->s_ops->dup)(seg, newseg)) {
|
||||
as_free(newas);
|
||||
return (NULL);
|
||||
}
|
||||
seg = seg->s_next;
|
||||
} while (seg != sseg);
|
||||
}
|
||||
return (newas);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new segment to the address space, sorting
|
||||
* it into the proper place in the linked list.
|
||||
*/
|
||||
enum as_res
|
||||
as_addseg(as, new)
|
||||
register struct as *as;
|
||||
register struct seg *new;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register addr_t base;
|
||||
|
||||
seg = as->a_segs;
|
||||
if (seg == NULL) {
|
||||
new->s_next = new->s_prev = new;
|
||||
as->a_segs = new;
|
||||
} else {
|
||||
/*
|
||||
* Figure out where to add the segment to keep list sorted
|
||||
*/
|
||||
base = new->s_base;
|
||||
do {
|
||||
if (base < seg->s_base) {
|
||||
if (base + new->s_size > seg->s_base)
|
||||
return (A_BADADDR);
|
||||
break;
|
||||
}
|
||||
if (base < seg->s_base + seg->s_size)
|
||||
return (A_BADADDR);
|
||||
seg = seg->s_next;
|
||||
} while (seg != as->a_segs);
|
||||
|
||||
new->s_next = seg;
|
||||
new->s_prev = seg->s_prev;
|
||||
seg->s_prev = new;
|
||||
new->s_prev->s_next = new;
|
||||
|
||||
if (base < as->a_segs->s_base)
|
||||
as->a_segs = new; /* new is at front */
|
||||
}
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a ``fault'' at addr for size bytes.
|
||||
*/
|
||||
faultcode_t
|
||||
as_fault(as, addr, size, type, rw)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
enum fault_type type;
|
||||
enum seg_rw rw;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
register u_int ssize;
|
||||
register addr_t addrsav;
|
||||
struct seg *segsav;
|
||||
faultcode_t res = 0;
|
||||
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
|
||||
seg = as_segat(as, raddr);
|
||||
if (seg == NULL)
|
||||
return (FC_NOMAP);
|
||||
|
||||
addrsav = raddr;
|
||||
segsav = seg;
|
||||
|
||||
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next; /* goto next seg */
|
||||
if (raddr != seg->s_base) {
|
||||
res = FC_NOMAP;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (raddr + rsize > seg->s_base + seg->s_size)
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = rsize;
|
||||
res = (*seg->s_ops->fault)(seg, raddr, ssize, type, rw);
|
||||
if (res != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we failed and we were locking, unlock the pages we faulted.
|
||||
* (Maybe we should just panic if we are SOFTLOCKing
|
||||
* or even SOFTUNLOCKing right here...)
|
||||
*/
|
||||
if (res != 0 && type == F_SOFTLOCK) {
|
||||
for (seg = segsav; addrsav < raddr; addrsav += ssize) {
|
||||
if (addrsav >= seg->s_base + seg->s_size)
|
||||
seg = seg->s_next; /* goto next seg */
|
||||
/*
|
||||
* Now call the fault routine again to perform the
|
||||
* unlock using S_OTHER instead of the rw variable
|
||||
* since we never got a chance to touch the pages.
|
||||
*/
|
||||
if (raddr > seg->s_base + seg->s_size)
|
||||
ssize = seg->s_base + seg->s_size - addrsav;
|
||||
else
|
||||
ssize = raddr - addrsav;
|
||||
(void) (*seg->s_ops->fault)(seg, addrsav, ssize,
|
||||
F_SOFTUNLOCK, S_OTHER);
|
||||
}
|
||||
}
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronous ``fault'' at addr for size bytes.
|
||||
*/
|
||||
faultcode_t
|
||||
as_faulta(as, addr, size)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
faultcode_t res;
|
||||
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
|
||||
seg = as_segat(as, raddr);
|
||||
if (seg == NULL)
|
||||
return (FC_NOMAP);
|
||||
for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next; /* goto next seg */
|
||||
if (raddr != seg->s_base)
|
||||
return (FC_NOMAP);
|
||||
}
|
||||
res = (*seg->s_ops->faulta)(seg, raddr);
|
||||
if (res != 0)
|
||||
return (res);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the virtual mapping for the interval from [addr : addr + size)
|
||||
* in address space `as' to have the specified protection.
|
||||
* It is ok for the range to cross over several segments,
|
||||
* as long as they are contiguous.
|
||||
*/
|
||||
enum as_res
|
||||
as_setprot(as, addr, size, prot)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
u_int prot;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register u_int ssize;
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
enum as_res res = A_SUCCESS;
|
||||
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
|
||||
seg = as_segat(as, raddr);
|
||||
if (seg == NULL)
|
||||
return (A_BADADDR);
|
||||
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next; /* goto next seg */
|
||||
if (raddr != seg->s_base) {
|
||||
res = A_BADADDR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((raddr + rsize) > (seg->s_base + seg->s_size))
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = rsize;
|
||||
if ((*seg->s_ops->setprot)(seg, raddr, ssize, prot) != 0)
|
||||
res = A_OPFAIL; /* keep on going */
|
||||
}
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure that the interval from [addr : addr + size)
|
||||
* in address space `as' has at least the specified protection.
|
||||
* It is ok for the range to cross over several segments, as long
|
||||
* as they are contiguous.
|
||||
*/
|
||||
enum as_res
|
||||
as_checkprot(as, addr, size, prot)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
u_int prot;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register u_int ssize;
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
|
||||
seg = as_segat(as, raddr);
|
||||
if (seg == NULL)
|
||||
return (A_BADADDR);
|
||||
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next; /* goto next seg */
|
||||
if (raddr != seg->s_base)
|
||||
return (A_BADADDR);
|
||||
}
|
||||
if ((raddr + rsize) > (seg->s_base + seg->s_size))
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = rsize;
|
||||
if ((*seg->s_ops->checkprot)(seg, raddr, ssize, prot) != 0)
|
||||
return (A_OPFAIL);
|
||||
}
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
enum as_res
|
||||
as_unmap(as, addr, size)
|
||||
register struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
{
|
||||
register struct seg *seg, *seg_next;
|
||||
register addr_t raddr, eaddr;
|
||||
register u_int ssize;
|
||||
addr_t obase;
|
||||
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
eaddr = (addr_t)(((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK);
|
||||
|
||||
seg = as->a_segs;
|
||||
if (seg != NULL) {
|
||||
for (; raddr < eaddr; seg = seg_next) {
|
||||
/*
|
||||
* Save next segment pointer since seg can be
|
||||
* destroyed during the segment unmap operation.
|
||||
* We also have to save the old base below.
|
||||
*/
|
||||
seg_next = seg->s_next;
|
||||
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
if (seg->s_base >= seg_next->s_base)
|
||||
break; /* looked at all segs */
|
||||
continue; /* not there yet */
|
||||
}
|
||||
|
||||
if (eaddr <= seg->s_base)
|
||||
break; /* all done */
|
||||
|
||||
if (raddr < seg->s_base)
|
||||
raddr = seg->s_base; /* skip to seg start */
|
||||
|
||||
if (eaddr > (seg->s_base + seg->s_size))
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = eaddr - raddr;
|
||||
|
||||
obase = seg->s_base;
|
||||
if ((*seg->s_ops->unmap)(seg, raddr, ssize) != 0)
|
||||
return (A_OPFAIL);
|
||||
raddr += ssize;
|
||||
|
||||
/*
|
||||
* Carefully check to see if we
|
||||
* have looked at all the segments.
|
||||
*/
|
||||
if (as->a_segs == NULL || obase >= seg_next->s_base)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
as_map(as, addr, size, crfp, argsp)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
int (*crfp)();
|
||||
caddr_t argsp;
|
||||
{
|
||||
register struct seg *seg;
|
||||
enum as_res res;
|
||||
int error;
|
||||
|
||||
seg = seg_alloc(as, addr, size);
|
||||
if (seg == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
/*
|
||||
* Remember that this was the most recently touched segment.
|
||||
* If the create routine merges this segment into an existing
|
||||
* segment, seg_free will adjust the a_seglast hint.
|
||||
*/
|
||||
as->a_seglast = seg;
|
||||
error = (*crfp)(seg, argsp);
|
||||
/*
|
||||
* If some error occurred during the create function, destroy
|
||||
* this segment. Otherwise, if the address space is locked,
|
||||
* establish memory locks for the new segment. Translate
|
||||
* error returns as appropriate.
|
||||
*/
|
||||
if (error)
|
||||
seg_free(seg);
|
||||
else if (as->a_paglck) {
|
||||
res = as_ctl(as, seg->s_base, seg->s_size, MC_LOCK, (caddr_t)0);
|
||||
if (res == A_RESOURCE)
|
||||
error = EAGAIN;
|
||||
else if (res != A_SUCCESS)
|
||||
error = EIO;
|
||||
if (error)
|
||||
(void) as_unmap(as, addr, size);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a hole of at least size minlen within [base, base+len).
|
||||
* If flags specifies AH_HI, the hole will have the highest possible address
|
||||
* in the range. Otherwise, it will have the lowest possible address.
|
||||
* If flags specifies AH_CONTAIN, the hole will contain the address addr.
|
||||
* If an adequate hole is found, base and len are set to reflect the part of
|
||||
* the hole that is within range, and A_SUCCESS is returned. Otherwise,
|
||||
* A_OPFAIL is returned.
|
||||
* XXX This routine is not correct when base+len overflows addr_t.
|
||||
*/
|
||||
/* VARARGS5 */
|
||||
enum as_res
|
||||
as_hole(as, minlen, basep, lenp, flags, addr)
|
||||
struct as *as;
|
||||
register u_int minlen;
|
||||
addr_t *basep;
|
||||
u_int *lenp;
|
||||
int flags;
|
||||
addr_t addr;
|
||||
{
|
||||
register addr_t lobound = *basep;
|
||||
register addr_t hibound = lobound + *lenp;
|
||||
register struct seg *sseg = as->a_segs;
|
||||
register struct seg *lseg, *hseg;
|
||||
register addr_t lo, hi;
|
||||
register int forward;
|
||||
|
||||
if (sseg == NULL)
|
||||
if (valid_va_range(basep, lenp, minlen, flags & AH_DIR))
|
||||
return (A_SUCCESS);
|
||||
else
|
||||
return (A_OPFAIL);
|
||||
|
||||
/*
|
||||
* Set up to iterate over all the inter-segment holes in the given
|
||||
* direction. lseg is NULL for the lowest-addressed hole and hseg is
|
||||
* NULL for the highest-addressed hole. If moving backwards, we reset
|
||||
* sseg to denote the highest-addressed segment.
|
||||
*/
|
||||
forward = (flags & AH_DIR) == AH_LO;
|
||||
if (forward) {
|
||||
lseg = NULL;
|
||||
hseg = sseg;
|
||||
} else {
|
||||
sseg = sseg->s_prev;
|
||||
hseg = NULL;
|
||||
lseg = sseg;
|
||||
}
|
||||
for (;;) {
|
||||
/*
|
||||
* Set lo and hi to the hole's boundaries. (We should really
|
||||
* use MAXADDR in place of hibound in the expression below,
|
||||
* but can't express it easily; using hibound in its place is
|
||||
* harmless.)
|
||||
*/
|
||||
lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
|
||||
hi = (hseg == NULL) ? hibound : hseg->s_base;
|
||||
/*
|
||||
* If the iteration has moved past the interval from lobound
|
||||
* to hibound it's pointless to continue.
|
||||
*/
|
||||
if ((forward && lo > hibound) || (!forward && hi < lobound))
|
||||
break;
|
||||
else if (lo > hibound || hi < lobound)
|
||||
goto cont;
|
||||
/*
|
||||
* Candidate hole lies at least partially within the allowable
|
||||
* range. Restrict it to fall completely within that range,
|
||||
* i.e., to [max(lo, lobound), min(hi, hibound)).
|
||||
*/
|
||||
if (lo < lobound)
|
||||
lo = lobound;
|
||||
if (hi > hibound)
|
||||
hi = hibound;
|
||||
/*
|
||||
* Verify that the candidate hole is big enough and meets
|
||||
* hardware constraints.
|
||||
*/
|
||||
*basep = lo;
|
||||
*lenp = hi - lo;
|
||||
if (valid_va_range(basep, lenp, minlen,
|
||||
forward ? AH_LO : AH_HI) &&
|
||||
((flags & AH_CONTAIN) == 0 ||
|
||||
(*basep <= addr && *basep + *lenp > addr)))
|
||||
return (A_SUCCESS);
|
||||
|
||||
cont:
|
||||
/*
|
||||
* Move to the next hole.
|
||||
*/
|
||||
if (forward) {
|
||||
lseg = hseg;
|
||||
if (lseg == NULL)
|
||||
break;
|
||||
hseg = hseg->s_next;
|
||||
if (hseg == sseg)
|
||||
hseg = NULL;
|
||||
} else {
|
||||
hseg = lseg;
|
||||
if (hseg == NULL)
|
||||
break;
|
||||
lseg = lseg->s_prev;
|
||||
if (lseg == sseg)
|
||||
lseg = NULL;
|
||||
}
|
||||
}
|
||||
return (A_OPFAIL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the next range within [base, base+len) that is backed
|
||||
* with "real memory". Skip holes and non-seg_vn segments.
|
||||
* We're lazy and only return one segment at a time.
|
||||
*/
|
||||
enum as_res
|
||||
as_memory(as, basep, lenp)
|
||||
struct as *as;
|
||||
addr_t *basep;
|
||||
u_int *lenp;
|
||||
{
|
||||
register struct seg *seg, *sseg, *cseg = NULL;
|
||||
register addr_t addr, eaddr, segend;
|
||||
|
||||
/* XXX - really want as_segatorabove? */
|
||||
if (as->a_seglast == NULL)
|
||||
as->a_seglast = as->a_segs;
|
||||
|
||||
addr = *basep;
|
||||
eaddr = addr + *lenp;
|
||||
sseg = seg = as->a_seglast;
|
||||
if (seg != NULL) {
|
||||
do {
|
||||
if (seg->s_ops != &segvn_ops)
|
||||
continue;
|
||||
if (seg->s_base <= addr &&
|
||||
addr < (segend = (seg->s_base + seg->s_size))) {
|
||||
/* found a containing segment */
|
||||
as->a_seglast = seg;
|
||||
*basep = addr;
|
||||
if (segend > eaddr)
|
||||
*lenp = eaddr - addr;
|
||||
else
|
||||
*lenp = segend - addr;
|
||||
return (A_SUCCESS);
|
||||
} else if (seg->s_base > addr) {
|
||||
if (cseg == NULL ||
|
||||
cseg->s_base > seg->s_base)
|
||||
/* save closest seg above */
|
||||
cseg = seg;
|
||||
}
|
||||
} while ((seg = seg->s_next) != sseg);
|
||||
}
|
||||
if (cseg == NULL) /* ??? no segments in address space? */
|
||||
return (A_OPFAIL);
|
||||
|
||||
/*
|
||||
* Only found a close segment, see if there's
|
||||
* a valid range we can return.
|
||||
*/
|
||||
if (cseg->s_base > eaddr)
|
||||
return (A_BADADDR); /* closest segment is out of range */
|
||||
as->a_seglast = cseg;
|
||||
*basep = cseg->s_base;
|
||||
if (cseg->s_base + cseg->s_size > eaddr)
|
||||
*lenp = eaddr - cseg->s_base; /* segment contains eaddr */
|
||||
else
|
||||
*lenp = cseg->s_size; /* segment is between addr and eaddr */
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap the pages associated with the address space as out to
|
||||
* secondary storage, returning the number of bytes actually
|
||||
* swapped.
|
||||
*
|
||||
* If we are not doing a "hard" swap (i.e. we're just getting rid
|
||||
* of a deawood process) unlock the segu making it available to be
|
||||
* paged out.
|
||||
*
|
||||
* The value returned is intended to correlate well with the process's
|
||||
* memory requirements. Its usefulness for this purpose depends on
|
||||
* how well the segment-level routines do at returning accurate
|
||||
* information.
|
||||
*/
|
||||
u_int
|
||||
as_swapout(as, hardswap)
|
||||
struct as *as;
|
||||
short hardswap;
|
||||
{
|
||||
register struct seg *seg, *sseg;
|
||||
register u_int swpcnt = 0;
|
||||
|
||||
/*
|
||||
* Kernel-only processes have given up their address
|
||||
* spaces. Of course, we shouldn't be attempting to
|
||||
* swap out such processes in the first place...
|
||||
*/
|
||||
if (as == NULL)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Free all mapping resources associated with the address
|
||||
* space. The segment-level swapout routines capitalize
|
||||
* on this unmapping by scavanging pages that have become
|
||||
* unmapped here.
|
||||
*/
|
||||
hat_free(as);
|
||||
|
||||
/*
|
||||
* Call the swapout routines of all segments in the address
|
||||
* space to do the actual work, accumulating the amount of
|
||||
* space reclaimed.
|
||||
*/
|
||||
sseg = seg = as->a_segs;
|
||||
if (hardswap && seg != NULL) {
|
||||
do {
|
||||
register struct seg_ops *ov = seg->s_ops;
|
||||
|
||||
/* for "soft" swaps, should we sync out segment instead? XXX */
|
||||
if (ov->swapout != NULL)
|
||||
swpcnt += (*ov->swapout)(seg);
|
||||
} while ((seg = seg->s_next) != sseg);
|
||||
}
|
||||
|
||||
return (swpcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether data from the mappings in interval [addr : addr + size)
|
||||
* are in the primary memory (core) cache.
|
||||
*/
|
||||
enum as_res
|
||||
as_incore(as, addr, size, vec, sizep)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
char *vec;
|
||||
u_int *sizep;
|
||||
{
|
||||
register struct seg *seg;
|
||||
register u_int ssize;
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
u_int isize; /* iteration size */
|
||||
|
||||
*sizep = 0;
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = ((((u_int)addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
seg = as_segat(as, raddr);
|
||||
if (seg == NULL)
|
||||
return (A_BADADDR);
|
||||
for (; rsize != 0; rsize -= ssize, raddr += ssize) {
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next;
|
||||
if (raddr != seg->s_base)
|
||||
return (A_BADADDR);
|
||||
}
|
||||
if ((raddr + rsize) > (seg->s_base + seg->s_size))
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = rsize;
|
||||
*sizep += isize =
|
||||
(*seg->s_ops->incore)(seg, raddr, ssize, vec);
|
||||
if (isize != ssize)
|
||||
return (A_OPFAIL);
|
||||
vec += btoc(ssize);
|
||||
}
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cache control operations over the interval [addr : addr + size) in
|
||||
* address space "as".
|
||||
*/
|
||||
enum as_res
|
||||
as_ctl(as, addr, size, func, arg)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int size;
|
||||
int func;
|
||||
caddr_t arg;
|
||||
{
|
||||
register struct seg *seg; /* working segment */
|
||||
register struct seg *fseg; /* first segment of address space */
|
||||
register u_int ssize; /* size of seg */
|
||||
register addr_t raddr; /* rounded addr counter */
|
||||
register u_int rsize; /* rounded size counter */
|
||||
enum as_res res; /* recursive result */
|
||||
int r; /* local result */
|
||||
|
||||
/*
|
||||
* Normalize addresses and sizes.
|
||||
*/
|
||||
raddr = (addr_t)((u_int)addr & PAGEMASK);
|
||||
rsize = (((u_int)(addr + size) + PAGEOFFSET) & PAGEMASK) - (u_int)raddr;
|
||||
|
||||
/*
|
||||
* If these are address space lock/unlock operations, loop over
|
||||
* all segments in the address space, as appropriate.
|
||||
*/
|
||||
if ((func == MC_LOCKAS) || (func == MC_UNLOCKAS)) {
|
||||
if (func == MC_UNLOCKAS)
|
||||
as->a_paglck = 0;
|
||||
else {
|
||||
if ((int)arg & MCL_FUTURE)
|
||||
as->a_paglck = 1;
|
||||
if (((int)arg & MCL_CURRENT) == 0)
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
for (fseg = NULL, seg = as->a_segs; seg != fseg;
|
||||
seg = seg->s_next) {
|
||||
if (fseg == NULL)
|
||||
fseg = seg;
|
||||
if ((res = as_ctl(as, seg->s_base, seg->s_size,
|
||||
func == MC_LOCKAS ? MC_LOCK : MC_UNLOCK,
|
||||
(caddr_t)0)) != A_SUCCESS)
|
||||
return (res);
|
||||
}
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get initial segment.
|
||||
*/
|
||||
if ((seg = as_segat(as, raddr)) == NULL)
|
||||
return (A_BADADDR);
|
||||
|
||||
/*
|
||||
* Loop over all segments. If a hole in the address range is
|
||||
* discovered, then fail. For each segment, perform the appropriate
|
||||
* control operation.
|
||||
*/
|
||||
|
||||
while (rsize != 0) {
|
||||
|
||||
/*
|
||||
* Make sure there's no hole, calculate the portion
|
||||
* of the next segment to be operated over.
|
||||
*/
|
||||
if (raddr >= seg->s_base + seg->s_size) {
|
||||
seg = seg->s_next;
|
||||
if (raddr != seg->s_base)
|
||||
return (A_BADADDR);
|
||||
}
|
||||
if ((raddr + rsize) > (seg->s_base + seg->s_size))
|
||||
ssize = seg->s_base + seg->s_size - raddr;
|
||||
else
|
||||
ssize = rsize;
|
||||
|
||||
/*
|
||||
* Dispatch on specific function.
|
||||
*/
|
||||
switch (func) {
|
||||
|
||||
/*
|
||||
* Synchronize cached data from mappings with backing
|
||||
* objects.
|
||||
*/
|
||||
case MC_SYNC:
|
||||
if (r = (*seg->s_ops->sync)
|
||||
(seg, raddr, ssize, (u_int)arg))
|
||||
return (r == EPERM ? A_RESOURCE : A_OPFAIL);
|
||||
break;
|
||||
|
||||
/*
|
||||
* Lock pages in memory.
|
||||
*/
|
||||
case MC_LOCK:
|
||||
if (r = (*seg->s_ops->lockop)(seg, raddr, ssize, func))
|
||||
return (r == EAGAIN ? A_RESOURCE : A_OPFAIL);
|
||||
break;
|
||||
|
||||
/*
|
||||
* Unlock mapped pages.
|
||||
*/
|
||||
case MC_UNLOCK:
|
||||
(void) (*seg->s_ops->lockop)(seg, raddr, ssize, func);
|
||||
break;
|
||||
|
||||
/*
|
||||
* Store VM advise for mapped pages in segment layer
|
||||
*/
|
||||
case MC_ADVISE:
|
||||
(void) (*seg->s_ops->advise)(seg, raddr, ssize, arg);
|
||||
break;
|
||||
|
||||
/*
|
||||
* Can't happen.
|
||||
*/
|
||||
default:
|
||||
panic("as_ctl");
|
||||
}
|
||||
rsize -= ssize;
|
||||
raddr += ssize;
|
||||
}
|
||||
return (A_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Inform the as of translation information associated with the given addr.
|
||||
* This is currently only called if a_hatcallback == 1.
|
||||
*/
|
||||
void
|
||||
as_hatsync(as, addr, ref, mod, flags)
|
||||
struct as *as;
|
||||
addr_t addr;
|
||||
u_int ref;
|
||||
u_int mod;
|
||||
u_int flags;
|
||||
{
|
||||
struct seg *seg;
|
||||
|
||||
if (seg = as_segat(as, addr))
|
||||
seg->s_ops->hatsync(seg, addr, ref, mod, flags);
|
||||
}
|
||||
122
sys/vm/vm_mp.c
Normal file
122
sys/vm/vm_mp.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/* @(#)vm_mp.c 1.1 94/10/31 */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1986 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - multiprocessor/ing support.
|
||||
*
|
||||
* Currently the kmon_enter() / kmon_exit() pair implements a
|
||||
* simple monitor for objects protected by the appropriate lock.
|
||||
* The kcv_wait() / kcv_broadcast pait implements a simple
|
||||
* condition variable which can be used for `sleeping'
|
||||
* and `waking' inside a monitor if some resource is
|
||||
* is needed which is not available.
|
||||
*
|
||||
* XXX - this code is written knowing about the semantics
|
||||
* of sleep/wakeup and UNIX scheduling on a uniprocessor machine.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef KMON_DEBUG
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <vm/mp.h>
|
||||
|
||||
#define ISLOCKED 0x1
|
||||
#define LOCKWANT 0x2
|
||||
|
||||
/*
|
||||
* kmon_enter is used as a type of multiprocess semaphore
|
||||
* used to implement a monitor where the lock represents
|
||||
* the ability to operate on the associated object.
|
||||
* For now, the lock/object association is done
|
||||
* by convention only.
|
||||
*/
|
||||
void
|
||||
kmon_enter(lk)
|
||||
kmon_t *lk;
|
||||
{
|
||||
int s;
|
||||
|
||||
s = spl6();
|
||||
while ((lk->dummy & ISLOCKED) != 0) {
|
||||
#ifdef notnow
|
||||
lk->dummy |= LOCKWANT;
|
||||
(void) sleep((char *)lk, PSWP+1);
|
||||
#else notnow
|
||||
panic("kmon_enter");
|
||||
#endif notnow
|
||||
}
|
||||
lk->dummy |= ISLOCKED;
|
||||
(void) splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the lock associated with a monitor,
|
||||
* waiting up anybody that has already decided
|
||||
* to wait for this lock (monitor).
|
||||
*/
|
||||
void
|
||||
kmon_exit(lk)
|
||||
kmon_t *lk;
|
||||
{
|
||||
int s;
|
||||
|
||||
if ((lk->dummy & ISLOCKED) == 0) /* paranoid */
|
||||
panic("kmon_exit not locked");
|
||||
|
||||
s = spl6();
|
||||
lk->dummy &= ~ISLOCKED;
|
||||
if ((lk->dummy & LOCKWANT) != 0) {
|
||||
lk->dummy &= ~LOCKWANT;
|
||||
wakeup((char *)lk);
|
||||
}
|
||||
(void) splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the named condition variable.
|
||||
* Must already have the monitor lock when kcv_wait is called.
|
||||
*/
|
||||
void
|
||||
kcv_wait(lk, cond)
|
||||
kmon_t *lk;
|
||||
char *cond;
|
||||
{
|
||||
int s;
|
||||
|
||||
if ((lk->dummy & ISLOCKED) == 0) /* paranoia */
|
||||
panic("kcv_wait not locked");
|
||||
|
||||
s = spl6();
|
||||
lk->dummy &= ~ISLOCKED; /* release lock */
|
||||
|
||||
(void) sleep(cond, PSWP+1);
|
||||
|
||||
if ((lk->dummy & ISLOCKED) != 0) /* more paranoia */
|
||||
panic("kcv_wait locked");
|
||||
|
||||
lk->dummy |= ISLOCKED; /* reacquire lock */
|
||||
(void) splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up all processes waiting on the named condition variable.
|
||||
*
|
||||
* We just use current UNIX sleep/wakeup semantics to delay the actual
|
||||
* context switching until later after we have released the lock.
|
||||
*/
|
||||
void
|
||||
kcv_broadcast(lk, cond)
|
||||
kmon_t *lk;
|
||||
char *cond;
|
||||
{
|
||||
|
||||
if ((lk->dummy & ISLOCKED) == 0)
|
||||
panic("kcv_broadcast");
|
||||
wakeup(cond);
|
||||
}
|
||||
#endif /* !KMON_DEBUG */
|
||||
1606
sys/vm/vm_page.c
Normal file
1606
sys/vm/vm_page.c
Normal file
File diff suppressed because it is too large
Load Diff
948
sys/vm/vm_pvn.c
Normal file
948
sys/vm/vm_pvn.c
Normal file
@@ -0,0 +1,948 @@
|
||||
#ident "@(#)vm_pvn.c 1.1 94/10/31 SMI"
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1989, 1990 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - paged vnode.
|
||||
*
|
||||
* This file supplies vm support for the vnode operations that deal with pages.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/vmsystm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/trace.h>
|
||||
#include <sys/ucred.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/rm.h>
|
||||
#include <vm/pvn.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/seg_map.h>
|
||||
|
||||
int pvn_nofodklust = 0;
|
||||
|
||||
/*
|
||||
* Find the largest contiguous block which contains `addr' for file offset
|
||||
* `offset' in it while living within the file system block sizes (`vp_off'
|
||||
* and `vp_len') and the address space limits for which no pages currently
|
||||
* exist and which map to consecutive file offsets.
|
||||
*/
|
||||
struct page *
|
||||
pvn_kluster(vp, off, seg, addr, offp, lenp, vp_off, vp_len, isra)
|
||||
struct vnode *vp;
|
||||
register u_int off;
|
||||
register struct seg *seg;
|
||||
register addr_t addr;
|
||||
u_int *offp, *lenp;
|
||||
u_int vp_off, vp_len;
|
||||
int isra;
|
||||
{
|
||||
register int delta, delta2;
|
||||
register struct page *pp;
|
||||
struct page *plist = NULL;
|
||||
addr_t straddr;
|
||||
int bytesavail;
|
||||
u_int vp_end;
|
||||
|
||||
ASSERT(off >= vp_off && off < vp_off + vp_len);
|
||||
|
||||
/*
|
||||
* We only want to do klustering/read ahead if there
|
||||
* is more than minfree pages currently available.
|
||||
*/
|
||||
if (freemem - minfree > 0)
|
||||
bytesavail = ptob(freemem - minfree);
|
||||
else
|
||||
bytesavail = 0;
|
||||
|
||||
if (bytesavail == 0) {
|
||||
if (isra)
|
||||
return ((struct page *)NULL); /* ra case - give up */
|
||||
else
|
||||
bytesavail = PAGESIZE; /* just pretending */
|
||||
}
|
||||
|
||||
if (bytesavail < vp_len) {
|
||||
/*
|
||||
* Don't have enough free memory for the
|
||||
* max request, try sizing down vp request.
|
||||
*/
|
||||
delta = off - vp_off;
|
||||
vp_len -= delta;
|
||||
vp_off += delta;
|
||||
if (bytesavail < vp_len) {
|
||||
/*
|
||||
* Still not enough memory, just settle for
|
||||
* bytesavail which is at least PAGESIZE.
|
||||
*/
|
||||
vp_len = bytesavail;
|
||||
}
|
||||
}
|
||||
|
||||
vp_end = vp_off + vp_len;
|
||||
ASSERT(off >= vp_off && off < vp_end);
|
||||
|
||||
if (page_exists(vp, off))
|
||||
return ((struct page *)NULL); /* already have page */
|
||||
|
||||
if (vp_len <= PAGESIZE || pvn_nofodklust) {
|
||||
straddr = addr;
|
||||
*offp = off;
|
||||
*lenp = MIN(vp_len, PAGESIZE);
|
||||
} else {
|
||||
/* scan forward from front */
|
||||
for (delta = 0; off + delta < vp_end; delta += PAGESIZE) {
|
||||
/*
|
||||
* Call back to the segment driver to verify that
|
||||
* the klustering/read ahead operation makes sense.
|
||||
*/
|
||||
if ((*seg->s_ops->kluster)(seg, addr, delta))
|
||||
break; /* page not file extension */
|
||||
if (page_exists(vp, off + delta))
|
||||
break; /* already have this page */
|
||||
}
|
||||
delta2 = delta;
|
||||
|
||||
/* scan back from front */
|
||||
for (delta = 0; off + delta > vp_off; delta -= PAGESIZE) {
|
||||
if (page_exists(vp, off + delta - PAGESIZE))
|
||||
break; /* already have the page */
|
||||
/*
|
||||
* Call back to the segment driver to verify that
|
||||
* the klustering/read ahead operation makes sense.
|
||||
*/
|
||||
if ((*seg->s_ops->kluster)(seg, addr, delta - PAGESIZE))
|
||||
break; /* page not eligible */
|
||||
}
|
||||
|
||||
straddr = addr + delta;
|
||||
*offp = off = off + delta;
|
||||
*lenp = MAX(delta2 - delta, PAGESIZE);
|
||||
ASSERT(off >= vp_off);
|
||||
|
||||
if ((vp_off + vp_len) < (off + *lenp)) {
|
||||
ASSERT(vp_end > off);
|
||||
*lenp = vp_end - off;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate pages for <vp, off> at <seg, addr> for delta bytes.
|
||||
* Note that for the non-read ahead case we might not have the
|
||||
* memory available right now so that rm_allocpage operation could
|
||||
* sleep and someone else might race to this same spot if the
|
||||
* vnode object was not locked before this routine was called.
|
||||
*/
|
||||
delta2 = *lenp;
|
||||
delta = roundup(delta2, PAGESIZE);
|
||||
pp = rm_allocpage(seg, straddr, (u_int)delta, 1); /* `pp' list kept */
|
||||
|
||||
plist = pp;
|
||||
do {
|
||||
pp->p_intrans = 1;
|
||||
pp->p_pagein = 1;
|
||||
|
||||
#ifdef TRACE
|
||||
{
|
||||
addr_t taddr = straddr + (off - *offp);
|
||||
|
||||
trace3(TR_SEG_KLUSTER, seg, taddr, isra);
|
||||
trace6(TR_SEG_ALLOCPAGE, seg, taddr, TRC_SEG_UNK,
|
||||
vp, off, pp);
|
||||
}
|
||||
#endif TRACE
|
||||
if (page_enter(pp, vp, off)) { /* `pp' locked if ok */
|
||||
/*
|
||||
* Oops - somebody beat us to the punch
|
||||
* and has entered the page before us.
|
||||
* To recover, we use pvn_fail to free up
|
||||
* all the pages we have already allocated
|
||||
* and we return NULL so that whole operation
|
||||
* is attempted over again. This should never
|
||||
* happen if the caller of pvn_kluster does
|
||||
* vnode locking to prevent multiple processes
|
||||
* from creating the same pages as the same time.
|
||||
*/
|
||||
pvn_fail(plist, B_READ);
|
||||
return ((struct page *)NULL);
|
||||
}
|
||||
off += PAGESIZE;
|
||||
} while ((pp = pp->p_next) != plist);
|
||||
|
||||
return (plist);
|
||||
}
|
||||
|
||||
/*
|
||||
* Entry point to be use by page r/w subr's and other such routines which
|
||||
* want to report an error and abort a list of pages setup for pageio
|
||||
* which do not do though the normal pvn_done processing.
|
||||
*/
|
||||
void
|
||||
pvn_fail(plist, flags)
|
||||
struct page *plist;
|
||||
int flags;
|
||||
{
|
||||
static struct buf abort_buf;
|
||||
struct buf *bp;
|
||||
struct page *pp;
|
||||
int len;
|
||||
int s;
|
||||
|
||||
len = 0;
|
||||
pp = plist;
|
||||
do {
|
||||
len += PAGESIZE;
|
||||
} while ((pp = pp->p_next) != plist);
|
||||
|
||||
bp = &abort_buf;
|
||||
s = splimp();
|
||||
while (bp->b_pages != NULL) {
|
||||
(void) sleep((caddr_t)&bp->b_pages, PSWP+2);
|
||||
}
|
||||
(void) splx(s);
|
||||
/* ~B_PAGEIO is a flag to pvn_done not to pageio_done the bp */
|
||||
bp->b_flags = B_ERROR | B_ASYNC | (flags & ~B_PAGEIO);
|
||||
bp->b_pages = plist;
|
||||
bp->b_bcount = len;
|
||||
pvn_done(bp); /* let pvn_done do all the work */
|
||||
if (bp->b_pages != NULL) {
|
||||
/* XXX - this should never happen, should it be a panic? */
|
||||
bp->b_pages = NULL;
|
||||
}
|
||||
wakeup((caddr_t)&bp->b_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Routine to be called when pageio's complete.
|
||||
* Can only be called from process context, not
|
||||
* from interrupt level.
|
||||
*/
|
||||
void
|
||||
pvn_done(bp)
|
||||
register struct buf *bp;
|
||||
{
|
||||
register struct page *pp;
|
||||
register int bytes;
|
||||
|
||||
pp = bp->b_pages;
|
||||
|
||||
/*
|
||||
* Release any I/O mappings to the pages described by the
|
||||
* buffer that are finished before processing the completed I/O.
|
||||
*/
|
||||
if ((bp->b_flags & B_REMAPPED) && (pp->p_nio <= 1))
|
||||
bp_mapout(bp);
|
||||
|
||||
/*
|
||||
* Handle of each page in the I/O operation.
|
||||
*/
|
||||
for (bytes = 0; bytes < bp->b_bcount; bytes += PAGESIZE) {
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
register int s;
|
||||
|
||||
if (pp->p_nio > 1) {
|
||||
/*
|
||||
* There were multiple IO requests outstanding
|
||||
* for this particular page. This can happen
|
||||
* when the file system block size is smaller
|
||||
* than PAGESIZE. Since there are more IO
|
||||
* requests still outstanding, we don't process
|
||||
* the page given on the buffer now.
|
||||
*/
|
||||
if (bp->b_flags & B_ERROR) {
|
||||
if (bp->b_flags & B_READ) {
|
||||
trace3(TR_PG_PVN_DONE, pp, pp->p_vnode,
|
||||
pp->p_offset);
|
||||
page_abort(pp); /* assumes no waiting */
|
||||
} else {
|
||||
pg_setmod(pp, 1);
|
||||
}
|
||||
}
|
||||
pp->p_nio--;
|
||||
break;
|
||||
/* real page locked for the other io operations */
|
||||
}
|
||||
|
||||
pp = bp->b_pages;
|
||||
page_sub(&bp->b_pages, pp);
|
||||
|
||||
vp = pp->p_vnode;
|
||||
off = pp->p_offset;
|
||||
pp->p_intrans = 0;
|
||||
pp->p_pagein = 0;
|
||||
|
||||
PAGE_RELE(pp);
|
||||
/*
|
||||
* Verify the page identity before checking to see
|
||||
* if the page was freed by PAGE_RELE(). This must
|
||||
* be protected by splvm() to prevent the page from
|
||||
* being ripped away at interrupt level.
|
||||
*/
|
||||
s = splvm();
|
||||
if (pp->p_vnode != vp || pp->p_offset != off || pp->p_free) {
|
||||
(void) splx(s);
|
||||
continue;
|
||||
}
|
||||
(void) splx(s);
|
||||
|
||||
/*
|
||||
* Check to see if the page has an error.
|
||||
*/
|
||||
if ((bp->b_flags & (B_ERROR|B_READ)) == (B_ERROR|B_READ)) {
|
||||
page_abort(pp);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we are to be doing invalidation.
|
||||
* XXX - Failed writes with B_INVAL set are
|
||||
* not handled appropriately.
|
||||
*/
|
||||
if ((bp->b_flags & B_INVAL) != 0) {
|
||||
page_abort(pp);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((bp->b_flags & (B_ERROR | B_READ)) == B_ERROR) {
|
||||
/*
|
||||
* Write operation failed. We don't want
|
||||
* to abort (or free) the page. We set
|
||||
* the mod bit again so it will get
|
||||
* written back again later when things
|
||||
* are hopefully better again.
|
||||
*/
|
||||
pg_setmod(pp, 1);
|
||||
}
|
||||
|
||||
if (bp->b_flags & B_FREE) {
|
||||
cnt.v_pgpgout++;
|
||||
if (pp->p_keepcnt == 0 && pp->p_lckcnt == 0) {
|
||||
/*
|
||||
* Check if someone has reclaimed the
|
||||
* page. If no ref or mod, no one is
|
||||
* using it so we can free it.
|
||||
* The rest of the system is careful
|
||||
* to use the ghost unload flag to unload
|
||||
* translations set up for IO w/o
|
||||
* affecting ref and mod bits.
|
||||
*/
|
||||
if (pp->p_mod == 0 && pp->p_mapping)
|
||||
hat_pagesync(pp);
|
||||
if (!pp->p_ref && !pp->p_mod) {
|
||||
if (pp->p_mapping)
|
||||
hat_pageunload(pp);
|
||||
#ifdef MULTIPROCESSOR
|
||||
}
|
||||
/*
|
||||
* The page may have been modified
|
||||
* between the hat_pagesync and
|
||||
* the hat_pageunload, and hat_pageunload
|
||||
* will have picked up final ref and mod
|
||||
* bits from the PTEs. So, check 'em again.
|
||||
*/
|
||||
if (!pp->p_ref && !pp->p_mod) {
|
||||
#endif MULTIPROCESSOR
|
||||
page_free(pp,
|
||||
(int)(bp->b_flags & B_DONTNEED));
|
||||
if ((bp->b_flags & B_DONTNEED) == 0)
|
||||
cnt.v_dfree++;
|
||||
} else {
|
||||
page_unlock(pp);
|
||||
cnt.v_pgrec++;
|
||||
}
|
||||
} else {
|
||||
page_unlock(pp);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
page_unlock(pp); /* a read or write */
|
||||
}
|
||||
|
||||
/*
|
||||
* Count pageout operations if applicable. Release the
|
||||
* buf struct associated with the operation if async & pageio.
|
||||
*/
|
||||
if (bp->b_flags & B_FREE)
|
||||
cnt.v_pgout++;
|
||||
if ((bp->b_flags & (B_ASYNC | B_PAGEIO)) == (B_ASYNC | B_PAGEIO))
|
||||
pageio_done(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI}
|
||||
* B_DELWRI indicates that this page is part of a kluster operation and
|
||||
* is only to be considered if it doesn't involve any waiting here.
|
||||
* Returns non-zero if page added to dirty list.
|
||||
*
|
||||
* NOTE: The caller must ensure that the page is not on the free list.
|
||||
*/
|
||||
static int
|
||||
pvn_getdirty(pp, dirty, flags)
|
||||
register struct page *pp, **dirty;
|
||||
int flags;
|
||||
{
|
||||
register int s;
|
||||
struct vnode *vp;
|
||||
u_int offset;
|
||||
|
||||
ASSERT(pp->p_free == 0);
|
||||
vp = pp->p_vnode;
|
||||
offset = pp->p_offset;
|
||||
|
||||
/*
|
||||
* If page is logically locked, forget it.
|
||||
*
|
||||
* XXX - Can a page locked by some other process be
|
||||
* written out or invalidated?
|
||||
*/
|
||||
if (pp->p_lckcnt != 0)
|
||||
return (0);
|
||||
|
||||
if ((flags & B_DELWRI) != 0 && (pp->p_keepcnt != 0 || pp->p_lock)) {
|
||||
/*
|
||||
* This is a klustering case that would
|
||||
* cause us to block, just give up.
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (pp->p_intrans && (flags & (B_INVAL | B_ASYNC)) == B_ASYNC) {
|
||||
/*
|
||||
* Don't bother waiting for an intrans page if we are not
|
||||
* doing invalidation and this is an async operation
|
||||
* (the page will be correct when the current io completes).
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If i/o is in progress on the page or we have to
|
||||
* invalidate or free the page, wait for the page keep
|
||||
* count to go to zero.
|
||||
*/
|
||||
if (pp->p_intrans || (flags & (B_INVAL | B_FREE)) != 0) {
|
||||
if (pp->p_keepcnt != 0) {
|
||||
page_wait(pp);
|
||||
/*
|
||||
* Re-verify page identity since it could have
|
||||
* changed while we were sleeping.
|
||||
*/
|
||||
s = splvm();
|
||||
if (pp->p_vnode != vp || pp->p_offset != offset) {
|
||||
/*
|
||||
* Lost the page - nothing to do?
|
||||
*/
|
||||
(void) splx(s);
|
||||
return (0);
|
||||
}
|
||||
(void) splx(s);
|
||||
/*
|
||||
* The page has not lost its identity and hence
|
||||
* should not be on the free list.
|
||||
*/
|
||||
ASSERT(pp->p_free == 0);
|
||||
}
|
||||
}
|
||||
|
||||
page_lock(pp);
|
||||
|
||||
/*
|
||||
* If the page has mappings and it is not the case that the
|
||||
* page is already marked dirty and we are going to unload
|
||||
* the page below because we are going to free/invalidate
|
||||
* it, then we sync current mod bits from the hat layer now.
|
||||
*/
|
||||
if (pp->p_mapping && !(pp->p_mod && (flags & (B_FREE | B_INVAL)) != 0))
|
||||
hat_pagesync(pp);
|
||||
|
||||
if (pp->p_mod == 0) {
|
||||
if ((flags & (B_INVAL | B_FREE)) != 0) {
|
||||
if (pp->p_mapping)
|
||||
hat_pageunload(pp);
|
||||
if ((flags & B_INVAL) != 0) {
|
||||
page_abort(pp);
|
||||
return (0);
|
||||
}
|
||||
if (pp->p_free == 0) {
|
||||
if ((flags & B_FREE) != 0) {
|
||||
page_free(pp, (flags & B_DONTNEED));
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
}
|
||||
page_unlock(pp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Page is dirty, get it ready for the write back
|
||||
* and add page to the dirty list. First unload
|
||||
* the page if we are going to free/invalidate it.
|
||||
*/
|
||||
if (pp->p_mapping && (flags & (B_FREE | B_INVAL)) != 0)
|
||||
hat_pageunload(pp);
|
||||
pg_setmod(pp, 0);
|
||||
pg_setref(pp, 0);
|
||||
trace3(TR_PG_PVN_GETDIRTY, pp, pp->p_vnode, pp->p_offset);
|
||||
pp->p_intrans = 1;
|
||||
/*
|
||||
* XXX - The `p_pagein' bit is set for asynchronous or
|
||||
* synchronous invalidates to prevent other processes
|
||||
* from accessing the page in the window after the i/o is
|
||||
* complete but before the page is aborted. If this is not
|
||||
* done, updates to the page before it is aborted will be lost.
|
||||
*/
|
||||
pp->p_pagein = (flags & B_INVAL) ? 1 : 0;
|
||||
PAGE_HOLD(pp);
|
||||
page_sortadd(dirty, pp);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Run down the vplist and handle all pages whose offset is >= off.
|
||||
* Returns a list of dirty kept pages all ready to be written back.
|
||||
*
|
||||
* Assumptions:
|
||||
* The vp is already locked by the VOP_PUTPAGE routine calling this.
|
||||
* That the VOP_GETPAGE also locks the vp, and thus no one can
|
||||
* add a page to the vp list while the vnode is locked.
|
||||
* Flags are {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
|
||||
*/
|
||||
struct page *
|
||||
pvn_vplist_dirty(vp, off, flags)
|
||||
register struct vnode *vp;
|
||||
u_int off;
|
||||
int flags;
|
||||
{
|
||||
register struct page *pp;
|
||||
register struct page *ppnext;
|
||||
register struct page *ppsav;
|
||||
register struct page *ppnextnext;
|
||||
register int ppsav_wasfree, pp_wasfree;
|
||||
register int ppsav_age, pp_age;
|
||||
struct page *dirty;
|
||||
register int s;
|
||||
int on_iolist;
|
||||
|
||||
s = splvm();
|
||||
if (vp->v_type == VSOCK || vp->v_type == VCHR ||
|
||||
(pp = vp->v_pages) == NULL) {
|
||||
(void) splx(s);
|
||||
return ((struct page *)NULL);
|
||||
}
|
||||
|
||||
#define PAGE_RECLAIM(pp, wasfree, age) \
|
||||
{ \
|
||||
if ((pp)->p_free) { \
|
||||
age = (pp)->p_age; \
|
||||
page_reclaim(pp); \
|
||||
wasfree = 1; \
|
||||
} else { \
|
||||
age = wasfree = 0; \
|
||||
} \
|
||||
}
|
||||
#define PAGE_REFREE(pp, wasfree, age) \
|
||||
{ \
|
||||
if (wasfree && (pp)->p_keepcnt == 0 && (pp)->p_mapping == NULL) \
|
||||
page_free(pp, age); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Traverse the page list. We have to be careful since pages
|
||||
* can be removed from the vplist while we are looking at it
|
||||
* (a page being pulled off the free list for something else,
|
||||
* or an async io operation completing and the page and/or
|
||||
* bp is marked for invalidation) so have to be careful determining
|
||||
* that we have examined all the pages. We use ppsav to point
|
||||
* to the first page that stayed on the vp list after calling
|
||||
* pvn_getdirty and we PAGE_RECLAIM and PAGE_HOLD to prevent it
|
||||
* from going away on us. When we PAGE_UNKEEP the page, it will
|
||||
* go back to the free list if that's where we got it from. We
|
||||
* also need to PAGE_RECLAIM and PAGE_HOLD the next pp in the
|
||||
* vplist to prevent it from going away while we are traversing
|
||||
* the list.
|
||||
*/
|
||||
|
||||
ppnext = NULL;
|
||||
ppsav = NULL;
|
||||
ppsav_age = ppsav_wasfree = 0;
|
||||
pp_age = pp_wasfree = 0;
|
||||
|
||||
dirty = NULL;
|
||||
if (pp->p_vpnext != pp)
|
||||
ppnext = pp->p_vpnext;
|
||||
else
|
||||
ppnext = NULL;
|
||||
|
||||
for (;;) {
|
||||
/* Reclaim and hold the next page */
|
||||
if (ppnext != NULL) {
|
||||
if (ppnext->p_free)
|
||||
page_reclaim(ppnext);
|
||||
PAGE_HOLD(ppnext);
|
||||
}
|
||||
|
||||
if (pp != NULL) {
|
||||
PAGE_RECLAIM(pp, pp_wasfree, pp_age);
|
||||
|
||||
/* Process the current page */
|
||||
if (pp->p_offset >= off) {
|
||||
(void) splx(s);
|
||||
on_iolist = pvn_getdirty(pp, &dirty, flags);
|
||||
s = splvm();
|
||||
} else
|
||||
on_iolist = 0;
|
||||
|
||||
if (pp->p_vnode == vp) {
|
||||
/*
|
||||
* If the page identity hasn't changed and
|
||||
* it isn't dirty, free it if reclaimed
|
||||
* from the free list.
|
||||
*/
|
||||
if (!on_iolist && !pp->p_free)
|
||||
PAGE_REFREE(pp, pp_wasfree, pp_age);
|
||||
|
||||
/*
|
||||
* If we haven't found a marker before,
|
||||
* use the current page as our marker.
|
||||
*/
|
||||
if (ppsav == NULL) {
|
||||
ppsav = pp;
|
||||
PAGE_RECLAIM(ppsav, ppsav_wasfree,
|
||||
ppsav_age);
|
||||
PAGE_HOLD(ppsav);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If no pages left on list, we're done */
|
||||
if (ppnext == NULL)
|
||||
break;
|
||||
|
||||
/* Compute the "next" next page */
|
||||
if (ppnext->p_vpnext != ppnext && ppnext->p_vpnext != ppsav)
|
||||
ppnextnext = ppnext->p_vpnext;
|
||||
else
|
||||
ppnextnext = NULL;
|
||||
|
||||
/* Release the next page */
|
||||
PAGE_RELE(ppnext);
|
||||
|
||||
/* If releasing the next page freed it, ignore it */
|
||||
if (ppnext->p_free) {
|
||||
ASSERT(ppnext->p_vnode == NULL);
|
||||
ppnext = NULL;
|
||||
}
|
||||
/* Move forward to look at next page */
|
||||
pp = ppnext;
|
||||
ppnext = ppnextnext;
|
||||
}
|
||||
|
||||
if (ppsav != NULL) {
|
||||
PAGE_RELE(ppsav);
|
||||
if (!ppsav->p_free)
|
||||
PAGE_REFREE(ppsav, ppsav_wasfree, ppsav_age);
|
||||
}
|
||||
(void) splx(s);
|
||||
return (dirty);
|
||||
}
|
||||
#undef PAGE_RECLAIM
|
||||
#undef PAGE_REFREE
|
||||
|
||||
/*
|
||||
* Used when we need to find a page but don't care about free pages.
|
||||
*/
|
||||
static struct page *
|
||||
pvn_pagefind(vp, off)
|
||||
register struct vnode *vp;
|
||||
register u_int off;
|
||||
{
|
||||
register struct page *pp;
|
||||
register int s;
|
||||
|
||||
s = splvm();
|
||||
pp = page_exists(vp, off);
|
||||
if (pp != NULL && pp->p_free)
|
||||
pp = NULL;
|
||||
(void) splx(s);
|
||||
return (pp);
|
||||
}
|
||||
|
||||
int pvn_range_noklust = 0;
|
||||
|
||||
/*
|
||||
* Use page_find's and handle all pages for this vnode whose offset
|
||||
* is >= off and < eoff. This routine will also do klustering up
|
||||
* to offlo and offhi up until a page which is not found. We assume
|
||||
* that offlo <= off and offhi >= eoff.
|
||||
*
|
||||
* Returns a list of dirty kept pages all ready to be written back.
|
||||
*/
|
||||
struct page *
|
||||
pvn_range_dirty(vp, off, eoff, offlo, offhi, flags)
|
||||
register struct vnode *vp;
|
||||
u_int off, eoff;
|
||||
u_int offlo, offhi;
|
||||
int flags;
|
||||
{
|
||||
struct page *dirty = NULL;
|
||||
register struct page *pp;
|
||||
register u_int o;
|
||||
register struct page *(*pfind)();
|
||||
|
||||
ASSERT(offlo <= off && offhi >= eoff);
|
||||
|
||||
off &= PAGEMASK;
|
||||
eoff = (eoff + PAGEOFFSET) & PAGEMASK;
|
||||
|
||||
/*
|
||||
* If we are not invalidating pages, use the routine,
|
||||
* pvn_pagefind(), to prevent reclaiming them from the
|
||||
* free list.
|
||||
*/
|
||||
if ((flags & B_INVAL) == 0)
|
||||
pfind = pvn_pagefind;
|
||||
else
|
||||
pfind = page_find;
|
||||
|
||||
/* first do all the pages from [off..eoff] */
|
||||
for (o = off; o < eoff; o += PAGESIZE) {
|
||||
pp = (*pfind)(vp, o);
|
||||
if (pp != NULL) {
|
||||
(void) pvn_getdirty(pp, &dirty, flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (pvn_range_noklust)
|
||||
return (dirty);
|
||||
|
||||
/* now scan backwards looking for pages to kluster */
|
||||
for (o = off - PAGESIZE; (int)o >= 0 && o >= offlo; o -= PAGESIZE) {
|
||||
pp = (*pfind)(vp, o);
|
||||
if (pp == NULL)
|
||||
break; /* page not found */
|
||||
if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
|
||||
break; /* page not added to dirty list */
|
||||
}
|
||||
|
||||
/* now scan forwards looking for pages to kluster */
|
||||
for (o = eoff; o < offhi; o += PAGESIZE) {
|
||||
pp = (*pfind)(vp, o);
|
||||
if (pp == NULL)
|
||||
break; /* page not found */
|
||||
if (pvn_getdirty(pp, &dirty, flags | B_DELWRI) == 0)
|
||||
break; /* page not added to dirty list */
|
||||
}
|
||||
|
||||
return (dirty);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take care of invalidating all the pages for vnode vp going to size
|
||||
* vplen. This includes zero'ing out zbytes worth of file beyond vplen.
|
||||
* This routine should only be called with the vp locked by the file
|
||||
* system code so that more pages cannot be added when sleep here.
|
||||
*/
|
||||
void
|
||||
pvn_vptrunc(vp, vplen, zbytes)
|
||||
register struct vnode *vp;
|
||||
register u_int vplen;
|
||||
u_int zbytes;
|
||||
{
|
||||
register struct page *pp;
|
||||
register int s;
|
||||
|
||||
if (vp->v_pages == NULL || vp->v_type == VCHR || vp->v_type == VSOCK)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Simple case - abort all the pages on the vnode
|
||||
*/
|
||||
if (vplen == 0) {
|
||||
s = splvm();
|
||||
while ((pp = vp->v_pages) != (struct page *)NULL) {
|
||||
/*
|
||||
* When aborting these pages, we make sure that
|
||||
* we wait to make sure they are really gone.
|
||||
*/
|
||||
if (pp->p_keepcnt != 0) {
|
||||
(void) splx(s);
|
||||
page_wait(pp);
|
||||
s = splvm();
|
||||
if (pp->p_vnode != vp)
|
||||
continue;
|
||||
} else {
|
||||
if (pp->p_free)
|
||||
page_reclaim(pp);
|
||||
}
|
||||
page_lock(pp);
|
||||
page_abort(pp);
|
||||
}
|
||||
(void) splx(s);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tougher case - have to find all the pages on the
|
||||
* vnode which need to be aborted or partially zeroed.
|
||||
*/
|
||||
|
||||
/*
|
||||
* First we get the last page and handle the partially
|
||||
* zeroing via kernel mappings. This will make the page
|
||||
* dirty so that we know that when this page is written
|
||||
* back, the zeroed information will go out with it. If
|
||||
* the page is not currently in memory, then the kzero
|
||||
* operation will cause it to be brought it. We use kzero
|
||||
* instead of bzero so that if the page cannot be read in
|
||||
* for any reason, the system will not panic. We need
|
||||
* to zero out a minimum of the fs given zbytes, but we
|
||||
* might also have to do more to get the entire last page.
|
||||
*/
|
||||
if (zbytes != 0) {
|
||||
addr_t addr;
|
||||
|
||||
if ((zbytes + (vplen & MAXBOFFSET)) > MAXBSIZE)
|
||||
panic("pvn_vptrunc zbytes");
|
||||
addr = segmap_getmap(segkmap, vp, vplen & MAXBMASK);
|
||||
(void) kzero(addr + (vplen & MAXBOFFSET),
|
||||
MAX(zbytes, PAGESIZE - (vplen & PAGEOFFSET)));
|
||||
(void) segmap_release(segkmap, addr, SM_WRITE | SM_ASYNC);
|
||||
}
|
||||
|
||||
/*
|
||||
* Synchronously abort all pages on the vp list which are
|
||||
* beyond the new length. The algorithm here is to start
|
||||
* scanning at the beginning of the vplist until there
|
||||
* are no pages with an offset >= vplen. If we find such
|
||||
* a page, we wait for it if it is kept for any reason and
|
||||
* then we abort it after verifying that it is still a page
|
||||
* that needs to go away. We assume here that the vplist
|
||||
* is not messed with at interrupt level.
|
||||
*/
|
||||
|
||||
s = splvm();
|
||||
again:
|
||||
for (pp = vp->v_pages; pp != NULL; pp = pp->p_vpnext) {
|
||||
if (pp->p_offset >= vplen) {
|
||||
/* need to abort this page */
|
||||
if (pp->p_keepcnt != 0) {
|
||||
(void) splx(s);
|
||||
page_wait(pp);
|
||||
s = splvm();
|
||||
/* verify page identity again */
|
||||
if (pp->p_vnode != vp || pp->p_offset < vplen)
|
||||
goto again;
|
||||
} else {
|
||||
if (pp->p_free)
|
||||
page_reclaim(pp);
|
||||
}
|
||||
page_lock(pp);
|
||||
page_abort(pp);
|
||||
goto again; /* start over again */
|
||||
}
|
||||
if (pp == pp->p_vpnext || vp->v_pages == pp->p_vpnext)
|
||||
break;
|
||||
}
|
||||
(void) splx(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called when the low level address translation
|
||||
* code decides to unload a translation. It calls back to the
|
||||
* segment driver which in many cases ends up here.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
pvn_unloadmap(vp, offset, ref, mod)
|
||||
struct vnode *vp;
|
||||
u_int offset;
|
||||
u_int ref, mod;
|
||||
{
|
||||
|
||||
/*
|
||||
* XXX - what is the pvn code going to do w/ this information?
|
||||
* This guy gets called for each loaded page when a executable
|
||||
* using the segvn driver terminates...
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles common work of the VOP_GETPAGE routines when more than
|
||||
* one page must be returned by calling a file system specific operation
|
||||
* to do most of the work. Must be called with the vp already locked
|
||||
* by the VOP_GETPAGE routine.
|
||||
*/
|
||||
int
|
||||
pvn_getpages(getapage, vp, off, len, protp, pl, plsz, seg, addr, rw, cred)
|
||||
int (*getapage)();
|
||||
struct vnode *vp;
|
||||
u_int off, len;
|
||||
u_int *protp;
|
||||
struct page *pl[];
|
||||
u_int plsz;
|
||||
struct seg *seg;
|
||||
register addr_t addr;
|
||||
enum seg_rw rw;
|
||||
struct ucred *cred;
|
||||
{
|
||||
register struct page **ppp;
|
||||
register u_int o, eoff;
|
||||
u_int sz;
|
||||
int err;
|
||||
|
||||
ASSERT(plsz >= len); /* insure that we have enough space */
|
||||
|
||||
/*
|
||||
* Loop one page at a time and let getapage function fill
|
||||
* in the next page in array. We only allow one page to be
|
||||
* returned at a time (except for the last page) so that we
|
||||
* don't have any problems with duplicates and other such
|
||||
* painful problems. This is a very simple minded algorithm,
|
||||
* but it does the job correctly. We hope that the cost of a
|
||||
* getapage call for a resident page that we might have been
|
||||
* able to get from an earlier call doesn't cost too much.
|
||||
*/
|
||||
ppp = pl;
|
||||
sz = PAGESIZE;
|
||||
eoff = off + len;
|
||||
for (o = off; o < eoff; o += PAGESIZE, addr += PAGESIZE) {
|
||||
if (o + PAGESIZE >= eoff) {
|
||||
/*
|
||||
* Last time through - allow the all of
|
||||
* what's left of the pl[] array to be used.
|
||||
*/
|
||||
sz = plsz - (o - off);
|
||||
}
|
||||
err = (*getapage)(vp, o, protp, ppp, sz, seg, addr, rw, cred);
|
||||
if (err) {
|
||||
/*
|
||||
* Release any pages we already got.
|
||||
*/
|
||||
if (o > off && pl != NULL) {
|
||||
for (ppp = pl; *ppp != NULL; *ppp++ = NULL) {
|
||||
PAGE_RELE(*ppp);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (pl != NULL)
|
||||
ppp++;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
87
sys/vm/vm_rm.c
Normal file
87
sys/vm/vm_rm.c
Normal file
@@ -0,0 +1,87 @@
|
||||
/* @(#)vm_rm.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1987 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - resource manager
|
||||
* As you can see, it needs lots of work
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/proc.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/rm.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/page.h>
|
||||
|
||||
/*ARGSUSED*/
|
||||
struct page *
|
||||
rm_allocpage(seg, addr, len, canwait)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
u_int len;
|
||||
int canwait;
|
||||
{
|
||||
|
||||
return (page_get(len, canwait));
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called when we couldn't allocate an anon slot.
|
||||
* For now, we simply print out a message and kill of the process
|
||||
* who happened to have gotten burned.
|
||||
*
|
||||
* XXX - swap reservation needs lots of work so this only happens in
|
||||
* `nice' places or we need to have a method to allow for recovery.
|
||||
*/
|
||||
void
|
||||
rm_outofanon()
|
||||
{
|
||||
struct proc *p;
|
||||
|
||||
p = u.u_procp;
|
||||
printf("Sorry, pid %d (%s) was killed due to lack of swap space\n",
|
||||
p->p_pid, u.u_comm);
|
||||
/*
|
||||
* To be sure no looping (e.g. in vmsched trying to
|
||||
* swap out) mark process locked in core (as though
|
||||
* done by user) after killing it so noone will try
|
||||
* to swap it out.
|
||||
*/
|
||||
psignal(p, SIGKILL);
|
||||
p->p_flag |= SULOCK;
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
void
|
||||
rm_outofhat()
|
||||
{
|
||||
|
||||
panic("out of mapping resources"); /* XXX */
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Yield the memory claim requirement for an address space.
|
||||
*
|
||||
* This is currently implemented as the number of active hardware
|
||||
* translations that have page structures. Therefore, it can
|
||||
* underestimate the traditional resident set size, eg, if the
|
||||
* physical page is present and the hardware translation is missing;
|
||||
* and it can overestimate the rss, eg, if there are active
|
||||
* translations to a frame buffer with page structs.
|
||||
* Also, it does not take sharing into account.
|
||||
*/
|
||||
int
|
||||
rm_asrss(as)
|
||||
struct as *as;
|
||||
{
|
||||
|
||||
return (as == (struct as *)NULL ? 0 : as->a_rss);
|
||||
}
|
||||
132
sys/vm/vm_seg.c
Normal file
132
sys/vm/vm_seg.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/* @(#)vm_seg.c 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* VM - segment management.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/mmu.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/as.h>
|
||||
#include <vm/seg.h>
|
||||
#include <vm/mp.h>
|
||||
|
||||
/*
|
||||
* Variables for maintaining the free list of segment structures.
|
||||
*/
|
||||
static struct seg *seg_freelist;
|
||||
static int seg_freeincr = 24;
|
||||
|
||||
/*
|
||||
* Allocate a segment to cover [base, base+size)
|
||||
* and attach it to the specified address space.
|
||||
*/
|
||||
struct seg *
|
||||
seg_alloc(as, base, size)
|
||||
struct as *as;
|
||||
register addr_t base;
|
||||
register u_int size;
|
||||
{
|
||||
register struct seg *new;
|
||||
addr_t segbase;
|
||||
u_int segsize;
|
||||
|
||||
segbase = (addr_t)((u_int)base & PAGEMASK);
|
||||
segsize =
|
||||
(((u_int)(base + size) + PAGEOFFSET) & PAGEMASK) - (u_int)segbase;
|
||||
|
||||
if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
|
||||
return ((struct seg *)NULL); /* bad virtual addr range */
|
||||
|
||||
new = (struct seg *)new_kmem_fast_alloc((caddr_t *)&seg_freelist,
|
||||
sizeof (*seg_freelist), seg_freeincr, KMEM_SLEEP);
|
||||
bzero((caddr_t)new, sizeof (*new));
|
||||
if (seg_attach(as, segbase, segsize, new) < 0) {
|
||||
kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)new);
|
||||
return ((struct seg *)NULL);
|
||||
}
|
||||
/* caller must fill in ops, data */
|
||||
return (new);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a segment to the address space. Used by seg_alloc()
|
||||
* and for kernel startup to attach to static segments.
|
||||
*/
|
||||
int
|
||||
seg_attach(as, base, size, seg)
|
||||
struct as *as;
|
||||
addr_t base;
|
||||
u_int size;
|
||||
struct seg *seg;
|
||||
{
|
||||
|
||||
seg->s_as = as;
|
||||
seg->s_base = base;
|
||||
seg->s_size = size;
|
||||
if (as_addseg(as, seg) == A_SUCCESS)
|
||||
return (0);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the segment from its associated as,
|
||||
*/
|
||||
void
|
||||
seg_free(seg)
|
||||
register struct seg *seg;
|
||||
{
|
||||
register struct as *as = seg->s_as;
|
||||
|
||||
if (as->a_segs == seg)
|
||||
as->a_segs = seg->s_next; /* go to next seg */
|
||||
|
||||
if (as->a_segs == seg)
|
||||
as->a_segs = NULL; /* seg list is gone */
|
||||
else {
|
||||
seg->s_prev->s_next = seg->s_next;
|
||||
seg->s_next->s_prev = seg->s_prev;
|
||||
}
|
||||
|
||||
if (as->a_seglast == seg)
|
||||
as->a_seglast = as->a_segs;
|
||||
|
||||
/*
|
||||
* If the segment private data field is NULL,
|
||||
* then segment driver is not attached yet.
|
||||
*/
|
||||
if (seg->s_data != NULL)
|
||||
(*seg->s_ops->free)(seg);
|
||||
|
||||
kmem_fast_free((caddr_t *)&seg_freelist, (caddr_t)seg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate addr into page number within segment.
|
||||
*/
|
||||
u_int
|
||||
seg_page(seg, addr)
|
||||
struct seg *seg;
|
||||
addr_t addr;
|
||||
{
|
||||
|
||||
return ((u_int)((addr - seg->s_base) >> PAGESHIFT));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return number of pages in segment.
|
||||
*/
|
||||
u_int
|
||||
seg_pages(seg)
|
||||
struct seg *seg;
|
||||
{
|
||||
|
||||
return ((u_int)((seg->s_size + PAGEOFFSET) >> PAGESHIFT));
|
||||
}
|
||||
648
sys/vm/vm_swap.c
Normal file
648
sys/vm/vm_swap.c
Normal file
@@ -0,0 +1,648 @@
|
||||
/* @(#)vm_swap.c 1.1 94/10/31 SMI */
|
||||
|
||||
#ident "$SunId: @(#)vm_swap.c 1.2 91/02/19 SMI [RMTC] $"
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Virtual swap device
|
||||
*
|
||||
* The virtual swap device consists of the logical concatenation of one
|
||||
* or more physical swap areas. It provides a logical array of anon
|
||||
* slots, each of which corresponds to a page of swap space.
|
||||
*
|
||||
* Each physical swap area has an associated anon array representing
|
||||
* its physical storage. These anon arrays are logically concatenated
|
||||
* sequentially to form the overall swap device anon array. Thus, the
|
||||
* offset of a given entry within this logical array is computed as the
|
||||
* sum of the sizes of each area preceding the entry plus the offset
|
||||
* within the area containing the entry.
|
||||
*
|
||||
* The anon array entries for unused swap slots within an area are
|
||||
* linked together into a free list. Allocation proceeds by finding a
|
||||
* suitable area (attempting to balance use among all the areas) and
|
||||
* then returning the first free entry within the area. Thus, there's
|
||||
* no linear relation between offset within the swap device and the
|
||||
* address (within its segment(s)) of the page that the slot backs;
|
||||
* instead, it's an arbitrary one-to-one mapping.
|
||||
*
|
||||
* Associated with each swap area is a swapinfo structure. These
|
||||
* structures are linked into a linear list that determines the
|
||||
* ordering of swap areas in the logical swap device. Each contains a
|
||||
* pointer to the corresponding anon array, the area's size, and its
|
||||
* associated vnode.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/bootconf.h>
|
||||
#include <sys/trace.h>
|
||||
|
||||
#include <vm/hat.h>
|
||||
#include <vm/anon.h>
|
||||
#include <vm/page.h>
|
||||
#include <vm/swap.h>
|
||||
|
||||
/* these includes are used for the "fake" swap support of /dev/drum */
|
||||
#include <sun/mem.h>
|
||||
#include <specfs/snode.h>
|
||||
|
||||
static struct swapinfo *silast;
|
||||
struct swapinfo *swapinfo;
|
||||
|
||||
/*
|
||||
* To balance the load among multiple swap areas, we don't allow
|
||||
* more than swap_maxcontig allocations to be satisfied from a
|
||||
* single swap area before moving on to the next swap area. This
|
||||
* effectively "interleaves" allocations among the many swap areas.
|
||||
*/
|
||||
int swap_maxcontig = 1024 * 1024 / PAGESIZE; /* 1MB of pages */
|
||||
|
||||
extern int klustsize; /* from spec_vnodeops.c */
|
||||
int swap_order = 1; /* see swap_alloc,free */
|
||||
|
||||
#define MINIROOTSIZE 14000 /* ~7 Meg */
|
||||
|
||||
/*
|
||||
* Initialize a new swapinfo structure.
|
||||
*/
|
||||
static int
|
||||
swapinfo_init(vp, npages, skip)
|
||||
struct vnode *vp;
|
||||
register u_int npages;
|
||||
u_int skip;
|
||||
{
|
||||
register struct anon *ap, *ap2;
|
||||
register struct swapinfo **sipp, *nsip;
|
||||
|
||||
for (sipp = &swapinfo; nsip = *sipp; sipp = &nsip->si_next)
|
||||
if (nsip->si_vp == vp)
|
||||
return (EBUSY); /* swap device already in use */
|
||||
|
||||
nsip = (struct swapinfo *)new_kmem_zalloc(
|
||||
sizeof (struct swapinfo), KMEM_SLEEP);
|
||||
nsip->si_vp = vp;
|
||||
nsip->si_size = ptob(npages);
|
||||
/*
|
||||
* Don't indirect through NULL if called with npages < skip (too tacky)
|
||||
*/
|
||||
if (npages < skip)
|
||||
npages = skip;
|
||||
/*
|
||||
* Don't sleep when allocating memory for the anon structures.
|
||||
* This allocation can be large for very large swap spaces and we
|
||||
* cannot count on such contigous chunk to become available
|
||||
* in the heap.
|
||||
*/
|
||||
nsip->si_anon = (struct anon *)new_kmem_zalloc(
|
||||
npages * sizeof (struct anon), KMEM_NOSLEEP);
|
||||
if (!nsip->si_anon) {
|
||||
kmem_free(nsip, sizeof(struct swapinfo));
|
||||
return (ENOMEM);
|
||||
}
|
||||
nsip->si_eanon = &nsip->si_anon[npages - 1];
|
||||
#ifdef RECORD_USAGE
|
||||
/*
|
||||
* Monitoring of swap space usage is enabled, so malloc
|
||||
* a parallel array to hold the PID responsible for
|
||||
* causing the anon page to be created.
|
||||
*/
|
||||
nsip->si_pid = (short *)
|
||||
new_kmem_zalloc(npages * sizeof (short), KMEM_NOSLEEP);
|
||||
if (!nsip->si_pid) {
|
||||
kmem_free(nsip->si_anon, npages * sizeof (struct anon));
|
||||
kmem_free(nsip, sizeof(struct swapinfo));
|
||||
return (ENOMEM);
|
||||
}
|
||||
#endif RECORD_USAGE
|
||||
npages -= skip;
|
||||
|
||||
/*
|
||||
* ap2 now points to the first usable slot in the swap area.
|
||||
* Set up free list links so that the head of the list is at
|
||||
* the front of the usable portion of the array.
|
||||
*/
|
||||
ap = nsip->si_eanon;
|
||||
ap2 = nsip->si_anon + skip;
|
||||
while (--ap >= ap2)
|
||||
ap->un.an_next = ap + 1;
|
||||
if (npages == 0) /* if size was <= skip */
|
||||
nsip->si_free = NULL;
|
||||
else
|
||||
nsip->si_free = ap + 1;
|
||||
anoninfo.ani_free += npages;
|
||||
anoninfo.ani_max += npages;
|
||||
|
||||
*sipp = nsip;
|
||||
if (silast == NULL) /* first swap device */
|
||||
silast = nsip;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a swap vnode.
|
||||
*/
|
||||
int
|
||||
swap_init(vp)
|
||||
struct vnode *vp;
|
||||
{
|
||||
struct vattr vattr;
|
||||
u_int skip;
|
||||
int err;
|
||||
|
||||
err = VOP_GETATTR(vp, &vattr, u.u_cred); /* XXX - u.u_cred? */
|
||||
if (err) {
|
||||
printf("swap_init: getattr failed, errno %d\n", err);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* To prevent swap I/O requests from crossing the boundary
|
||||
* between swap areas, we erect a "fence" between areas by
|
||||
* not allowing the first page of each swap area to be used.
|
||||
* (This also prevents us from scribbling on the disk label
|
||||
* if the swap partition is the first partition on the disk.)
|
||||
* This may not be strictly necessary, since swap_blksize also
|
||||
* prevents requests from crossing the boundary.
|
||||
*
|
||||
* If swapping on the root filesystem, don't put swap blocks that
|
||||
* correspond to the miniroot filesystem on the swap free list.
|
||||
*/
|
||||
if (rootvp == vp)
|
||||
skip = btoc(roundup(dbtob(MINIROOTSIZE), klustsize));
|
||||
else
|
||||
skip = 1;
|
||||
|
||||
err = swapinfo_init(vp, (u_int)btop(vattr.va_size), skip);
|
||||
|
||||
if (!err)
|
||||
vp->v_flag |= VISSWAP;
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is used to fake npages worth of swap space.
|
||||
* These pages will have no backing and cannot be paged out any where.
|
||||
*/
|
||||
swap_cons(npages)
|
||||
u_int npages;
|
||||
{
|
||||
|
||||
if (swapinfo_init((struct vnode *)NULL, npages, 0) != 0)
|
||||
panic("swap_cons");
|
||||
}
|
||||
|
||||
/*
|
||||
* Points to the location (close to) the last block handed to
|
||||
* swap_free. The theory is that if you free one in this area,
|
||||
* you'll probably free more, so use the hint as a starting point.
|
||||
* hint is reset on each free to the block that preceeds the one
|
||||
* freed (or the block freed, if we can't find the block before it).
|
||||
* It is also reset if it points at block that is allocated.
|
||||
*
|
||||
* XXX - swap_free and swap_alloc both manipulate hint; the free
|
||||
* lists are now protected with splswap(). Don't call into these routines
|
||||
* from higher level interrupts!
|
||||
*/
|
||||
static struct {
|
||||
struct anon *ap; /* pointer to the last freed */
|
||||
struct swapinfo *sip; /* swap list for which hint is valid */
|
||||
} hint;
|
||||
|
||||
int swap_hit; /* hint helped */
|
||||
int swap_miss; /* hint was no good */
|
||||
|
||||
|
||||
/*
|
||||
* Allocate a single page from the virtual swap device.
|
||||
*/
|
||||
struct anon *
|
||||
swap_alloc()
|
||||
{
|
||||
struct swapinfo *sip = silast;
|
||||
struct anon *ap;
|
||||
|
||||
do {
|
||||
ap = sip->si_free;
|
||||
if (ap) {
|
||||
/*
|
||||
* can't condition this on swap_order since some
|
||||
* idiot might turn it on and off. It's not cool
|
||||
* to have the hint point at an allocated block.
|
||||
*/
|
||||
if (hint.sip == sip && hint.ap == ap)
|
||||
hint.sip = NULL;
|
||||
sip->si_free = ap->un.an_next;
|
||||
if (++sip->si_allocs >= swap_maxcontig) {
|
||||
sip->si_allocs = 0;
|
||||
if (sip == silast) {
|
||||
silast = sip->si_next;
|
||||
if (silast == NULL)
|
||||
silast = swapinfo;
|
||||
}
|
||||
} else {
|
||||
silast = sip;
|
||||
}
|
||||
# ifdef TRACE
|
||||
{
|
||||
struct vnode *vp;
|
||||
u_int off;
|
||||
|
||||
swap_xlate(ap, &vp, &off);
|
||||
trace3(TR_MP_SWAP, vp, off, ap);
|
||||
}
|
||||
# endif TRACE
|
||||
#ifdef RECORD_USAGE
|
||||
if (u.u_procp) {
|
||||
/* swap monitoring is on - record the current PID */
|
||||
sip->si_pid[ap - sip->si_anon] = u.u_procp->p_pid;
|
||||
}
|
||||
#endif RECORD_USAGE
|
||||
return (ap);
|
||||
}
|
||||
/*
|
||||
* No more free anon slots here.
|
||||
*/
|
||||
sip->si_allocs = 0;
|
||||
sip = sip->si_next;
|
||||
if (sip == NULL)
|
||||
sip = swapinfo;
|
||||
} while (sip != silast);
|
||||
return ((struct anon *)NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a swap page.
|
||||
* List is maintained in sorted order. Worst case is a linear search on the
|
||||
* list; we maintain a hint to mitigate this.
|
||||
*
|
||||
* Pointing the hint at the most recently free'd anon struct makes it
|
||||
* really fast to free anon pages in ascending order.
|
||||
*
|
||||
* Pointing the hint at the anon struct that is just *before* this makes
|
||||
* it really fast to free anon pages in descending order, at nearly zero
|
||||
* cost.
|
||||
*
|
||||
* This alogrithm points the hint at the anon struct that points to
|
||||
* the one most recently free'd. When freeing a block of anon structs
|
||||
* presented in ascending order, the hint advances one block behind
|
||||
* the blocks as they are free'd. When freeing a block of anon structs
|
||||
* precented in descending order -- which happens if a large hunk of
|
||||
* memory is allocated in reverse order then free'd in forward order,
|
||||
* common enough to be a problem -- the hint remains pointing at the
|
||||
* anon struct that ends up pointing at each of the free'd blocks
|
||||
* in order. This is worth an example.
|
||||
*
|
||||
* Assume anons #2 and #9 are free, the hint points to anon #2, and
|
||||
* #2's "next" pointer goes to #9. Now, we present a set of swap_free
|
||||
* requests for blocks #8 through #3, in descending order. This results
|
||||
* in a series of hits on the hint, which just keeps pointing at #2.
|
||||
* The previous algorithm would have set the hint to each block as
|
||||
* it came in, resulting in worst-case behavior as the list had to
|
||||
* be scanned from the front.
|
||||
*/
|
||||
void
|
||||
swap_free(ap)
|
||||
struct anon *ap;
|
||||
{
|
||||
register struct swapinfo *sip = silast;
|
||||
register struct anon *tap, **tapp;
|
||||
register struct anon *tap_hint;
|
||||
|
||||
/*
|
||||
* Find the swap area containing ap and then put
|
||||
* ap at the head of that area's free list.
|
||||
*/
|
||||
do {
|
||||
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
|
||||
/*
|
||||
ap->un.an_next = sip->si_free;
|
||||
sip->si_free = ap;
|
||||
*/
|
||||
/*
|
||||
* old unordered way
|
||||
*/
|
||||
if (!swap_order) {
|
||||
ap->un.an_next = sip->si_free;
|
||||
sip->si_free = ap;
|
||||
#ifdef RECORD_USAGE
|
||||
/* Swap monitoring is on - undo the PID */
|
||||
sip->si_pid[ap - sip->si_anon] = 0;
|
||||
#endif RECORD_USAGE
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Do it in order; use hint if possible
|
||||
*/
|
||||
tap = hint.ap;
|
||||
if (hint.sip == sip && tap < ap) {
|
||||
/*
|
||||
* The anon we are freeing
|
||||
* follows the hint tap somewhere.
|
||||
* save the hint and advance
|
||||
* to the next free anon.
|
||||
*/
|
||||
tapp = &tap->un.an_next;
|
||||
tap_hint = tap;
|
||||
tap = tap->un.an_next;
|
||||
swap_hit++;
|
||||
} else {
|
||||
/*
|
||||
* Wrong swapinfo, or
|
||||
* the anon being free'd
|
||||
* preceeds the hint.
|
||||
* must start scanning
|
||||
* from the front of the
|
||||
* list. The best hint we
|
||||
* can seed with is the
|
||||
* anon we are freeing.
|
||||
*/
|
||||
tapp = &sip->si_free;
|
||||
tap = sip->si_free;
|
||||
tap_hint = ap;
|
||||
swap_miss++;
|
||||
}
|
||||
/*
|
||||
* advance tap until it is greater
|
||||
* than the incoming anon.
|
||||
*/
|
||||
while (tap && tap < ap) {
|
||||
tapp = &tap->un.an_next;
|
||||
tap_hint = tap;
|
||||
tap = tap->un.an_next;
|
||||
}
|
||||
*tapp = ap;
|
||||
ap->un.an_next = tap;
|
||||
#ifdef RECORD_USAGE
|
||||
/* Swap monitoring is on - undo the PID */
|
||||
sip->si_pid[ap - sip->si_anon] = 0;
|
||||
#endif RECORD_USAGE
|
||||
hint.sip = sip;
|
||||
hint.ap = tap_hint;
|
||||
return;
|
||||
}
|
||||
sip = sip->si_next;
|
||||
if (sip == NULL)
|
||||
sip = swapinfo;
|
||||
} while (sip != silast);
|
||||
panic("swap_free");
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the <vnode, offset> pair
|
||||
* corresponding to the given anon struct.
|
||||
*/
|
||||
void
|
||||
swap_xlate(ap, vpp, offsetp)
|
||||
struct anon *ap;
|
||||
struct vnode **vpp;
|
||||
u_int *offsetp;
|
||||
{
|
||||
register struct swapinfo *sip = silast;
|
||||
|
||||
do {
|
||||
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
|
||||
*offsetp = ptob(ap - sip->si_anon);
|
||||
*vpp = sip->si_vp;
|
||||
return;
|
||||
}
|
||||
sip = sip->si_next;
|
||||
if (sip == NULL)
|
||||
sip = swapinfo;
|
||||
} while (sip != silast);
|
||||
panic("swap_xlate");
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* Like swap_xlate, but return a status instead of panic'ing.
|
||||
* Used by dump routines when we know we may be corrupted.
|
||||
*/
|
||||
swap_xlate_nopanic(ap, vpp, offsetp)
|
||||
struct anon *ap;
|
||||
struct vnode **vpp;
|
||||
u_int *offsetp;
|
||||
{
|
||||
register struct swapinfo *sip = swapinfo;
|
||||
|
||||
do {
|
||||
if (sip->si_anon <= ap && ap <= sip->si_eanon) {
|
||||
*offsetp = (ap - sip->si_anon) << PAGESHIFT;
|
||||
*vpp = sip->si_vp;
|
||||
return (1);
|
||||
}
|
||||
} while (sip = sip->si_next);
|
||||
|
||||
/* Couldn't find it; return failure */
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the anon struct corresponding for the given
|
||||
* <vnode, offset> if it is part of the virtual swap device.
|
||||
*/
|
||||
struct anon *
|
||||
swap_anon(vp, offset)
|
||||
struct vnode *vp;
|
||||
u_int offset;
|
||||
{
|
||||
register struct swapinfo *sip = silast;
|
||||
|
||||
if (vp && sip) {
|
||||
do {
|
||||
if (vp == sip->si_vp && offset < sip->si_size)
|
||||
return (sip->si_anon + (offset >> PAGESHIFT));
|
||||
sip = sip->si_next;
|
||||
if (sip == NULL)
|
||||
sip = swapinfo;
|
||||
} while (sip != silast);
|
||||
}
|
||||
/*
|
||||
* Note - we don't return the anon structure for
|
||||
* fake'd anon slots which have no real vp.
|
||||
*/
|
||||
return ((struct anon *)NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* swread and swwrite implement the /dev/drum device, an indirect,
|
||||
* user visible, device to allow reading of the (virtual) swap device.
|
||||
*/
|
||||
|
||||
/*ARGSUSED*/
|
||||
swread(dev, uio)
|
||||
dev_t dev;
|
||||
struct uio *uio;
|
||||
{
|
||||
|
||||
return (sw_rdwr(uio, UIO_READ));
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
swwrite(dev, uio)
|
||||
dev_t dev;
|
||||
struct uio *uio;
|
||||
{
|
||||
|
||||
return (sw_rdwr(uio, UIO_WRITE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle all the work of reading "fake" swap pages that are in memory.
|
||||
*/
|
||||
static int
|
||||
fake_sw_rdwr(uio, rw, cred)
|
||||
register struct uio *uio;
|
||||
enum uio_rw rw;
|
||||
struct ucred *cred;
|
||||
{
|
||||
struct page *pp;
|
||||
struct vnode *memvp;
|
||||
int nbytes;
|
||||
u_int off;
|
||||
int err;
|
||||
extern int mem_no;
|
||||
|
||||
nbytes = uio->uio_resid;
|
||||
off = uio->uio_offset;
|
||||
memvp = makespecvp(makedev(mem_no, M_MEM), VCHR);
|
||||
|
||||
do {
|
||||
/*
|
||||
* Find the page corresponding to the "fake" name
|
||||
* and then read the corresponding page from /dev/mem.
|
||||
*/
|
||||
pp = page_find((struct vnode *)NULL, (u_int)(off & PAGEMASK));
|
||||
if (pp == NULL) {
|
||||
err = EIO;
|
||||
break;
|
||||
}
|
||||
uio->uio_offset = ptob(page_pptonum(pp)) + (off & PAGEOFFSET);
|
||||
|
||||
if ((off & PAGEOFFSET) == 0)
|
||||
uio->uio_resid = MIN(PAGESIZE, nbytes);
|
||||
else
|
||||
uio->uio_resid = min(ptob(btopr(off)) - off,
|
||||
(u_int)nbytes);
|
||||
nbytes -= uio->uio_resid;
|
||||
off += uio->uio_resid;
|
||||
err = VOP_RDWR(memvp, uio, rw, 0, cred);
|
||||
} while (err == 0 && nbytes > 0 && uio->uio_resid == 0);
|
||||
|
||||
VN_RELE(memvp);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common routine used to break up reads and writes to the
|
||||
* (virtual) swap device to the underlying vnode(s). This is
|
||||
* used to implement the user visable /dev/drum interface.
|
||||
*/
|
||||
static int
|
||||
sw_rdwr(uio, rw)
|
||||
register struct uio *uio;
|
||||
enum uio_rw rw;
|
||||
{
|
||||
register struct swapinfo *sip = swapinfo;
|
||||
int nbytes = uio->uio_resid;
|
||||
u_int off = 0;
|
||||
int err = 0;
|
||||
|
||||
do {
|
||||
if (uio->uio_offset >= off &&
|
||||
uio->uio_offset < off + sip->si_size)
|
||||
break;
|
||||
off += sip->si_size;
|
||||
} while (sip = sip->si_next);
|
||||
|
||||
if (sip) {
|
||||
uio->uio_offset -= off;
|
||||
do {
|
||||
uio->uio_resid = MIN(sip->si_size - uio->uio_offset,
|
||||
nbytes);
|
||||
nbytes -= uio->uio_resid;
|
||||
if (sip->si_vp)
|
||||
err = VOP_RDWR(sip->si_vp, uio, rw, 0,
|
||||
u.u_cred);
|
||||
else
|
||||
err = fake_sw_rdwr(uio, rw, u.u_cred);
|
||||
uio->uio_offset = 0;
|
||||
} while (err == 0 && nbytes > 0 && uio->uio_resid == 0 &&
|
||||
(sip = sip->si_next));
|
||||
uio->uio_resid = nbytes + uio->uio_resid;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* System call swapon(name) enables swapping on device name,
|
||||
* Return EBUSY if already swapping on this device.
|
||||
*/
|
||||
swapon()
|
||||
{
|
||||
register struct a {
|
||||
char *name;
|
||||
} *uap = (struct a *)u.u_ap;
|
||||
struct vnode *vp;
|
||||
|
||||
if (!suser())
|
||||
return;
|
||||
uap = (struct a *)u.u_ap;
|
||||
if (u.u_error = lookupname(uap->name, UIOSEG_USER, FOLLOW_LINK,
|
||||
(struct vnode **)NULL, &vp))
|
||||
return;
|
||||
|
||||
switch (vp->v_type) {
|
||||
case VBLK: {
|
||||
struct vnode *nvp;
|
||||
|
||||
nvp = bdevvp(vp->v_rdev);
|
||||
VN_RELE(vp);
|
||||
vp = nvp;
|
||||
/*
|
||||
* Call the partition's open routine, to give it a chance to
|
||||
* check itself for consistency (e.g., for scrambled disk
|
||||
* labels). (The open isn't otherwise required.)
|
||||
*/
|
||||
if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
case VREG:
|
||||
if (vp->v_vfsp->vfs_flag & VFS_RDONLY) {
|
||||
u.u_error = EROFS;
|
||||
goto out;
|
||||
}
|
||||
if (u.u_error = VOP_ACCESS(vp, VREAD|VWRITE, u.u_cred))
|
||||
goto out;
|
||||
if (u.u_error = VOP_OPEN(&vp, FREAD|FWRITE, u.u_cred))
|
||||
goto out;
|
||||
break;
|
||||
|
||||
case VDIR:
|
||||
u.u_error = EISDIR;
|
||||
goto out;
|
||||
|
||||
case VCHR:
|
||||
case VSOCK:
|
||||
default:
|
||||
u.u_error = EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
u.u_error = swap_init(vp);
|
||||
out:
|
||||
if (u.u_error) {
|
||||
VN_RELE(vp);
|
||||
}
|
||||
}
|
||||
28
sys/vm/vpage.h
Normal file
28
sys/vm/vpage.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* @(#)vpage.h 1.1 94/10/31 SMI */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _vm_vpage_h
|
||||
#define _vm_vpage_h
|
||||
|
||||
/*
|
||||
* VM - Information per virtual page.
|
||||
*/
|
||||
struct vpage {
|
||||
u_int vp_prot: 4; /* see <sys/mman.h> prot flags */
|
||||
u_int vp_advice: 3; /* see <sys/mman.h> madvise flags */
|
||||
u_int vp_pplock: 1; /* physical page locked by me */
|
||||
/*
|
||||
* The following two are for use with a
|
||||
* local page replacement algorithm (someday).
|
||||
*/
|
||||
u_int vp_ref: 1; /* reference bit */
|
||||
u_int vp_mod: 1; /* (maybe) modify bit, from hat */
|
||||
u_int vp_ski_ref: 1; /* ski reference bit */
|
||||
u_int vp_ski_mod: 1; /* ski modified bit */
|
||||
u_int : 4;
|
||||
};
|
||||
|
||||
#endif /*!_vm_vpage_h*/
|
||||
Reference in New Issue
Block a user