2021-10-11 18:20:23 -03:00

510 lines
12 KiB
C

/* @(#)vm_anon.c 1.1 92/07/30 SMI */
/*
* Copyright (c) 1988 by Sun Microsystems, Inc.
*/
/*
* VM - anonymous pages.
*
* This layer sits immediately above the vm_swap layer. It manages
* physical pages that have no permanent identity in the file system
* name space, using the services of the vm_swap layer to allocate
* backing storage for these pages. Since these pages have no external
* identity, they are discarded when the last reference is removed.
*
* An important function of this layer is to manage low-level sharing
* of pages that are logically distinct but that happen to be
* physically identical (e.g., the corresponding pages of the processes
* resulting from a fork before one process or the other changes their
* contents). This pseudo-sharing is present only as an optimization
* and is not to be confused with true sharing in which multiple
* address spaces deliberately contain references to the same object;
* such sharing is managed at a higher level.
*
* The key data structure here is the anon struct, which contains a
* reference count for its associated physical page and a hint about
* the identity of that page. Anon structs typically live in arrays,
* with an instance's position in its array determining where the
* corresponding backing storage is allocated; however, the swap_xlate()
* routine abstracts away this representation information so that the
* rest of the anon layer need not know it. (See the swap layer for
* more details on anon struct layout.)
*
* In the future versions of the system, the association between an
* anon struct and its position on backing store will change so that
* we don't require backing store all anonymous pages in the system.
* This is important for consideration for large memory systems.
* We can also use this technique to delay binding physical locations
* to anonymous pages until pageout/swapout time where we can make
* smarter allocation decisions to improve anonymous klustering.
*
* Many of the routines defined here take a (struct anon **) argument,
* which allows the code at this level to manage anon pages directly,
* so that callers can regard anon structs as opaque objects and not be
* concerned with assigning or inspecting their contents.
*
* Clients of this layer refer to anon pages indirectly. That is, they
* maintain arrays of pointers to anon structs rather than maintaining
* anon structs themselves. The (struct anon **) arguments mentioned
* above are pointers to entries in these arrays. It is these arrays
* that capture the mapping between offsets within a given segment and
* the corresponding anonymous backing storage address.
*/
#include <sys/param.h>
#include <sys/user.h> /* XXX - for rusage */
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/ucred.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/trace.h>
#include <sys/debug.h>
#include <vm/hat.h>
#include <vm/anon.h>
#include <vm/swap.h>
#include <vm/as.h>
#include <vm/page.h>
#include <vm/seg.h>
#include <vm/pvn.h>
#include <vm/rm.h>
#include <vm/mp.h>
struct anoninfo anoninfo;
#ifdef KMON_DEBUG
kmon_t anon_lock;
#endif /* KMON_DEBUG */
int anon_resv_debug = 0;
int anon_enforce_resv = 1;
/*
* Reserve anon space.
* Return non-zero on success.
*/
int
anon_resv(size)
u_int size;
{
anoninfo.ani_resv += btopr(size);
if (anoninfo.ani_resv > anoninfo.ani_max) {
if (anon_enforce_resv)
anoninfo.ani_resv -= btopr(size);
else if (anon_resv_debug)
printf("anon: swap space overcommitted by %d\n",
anoninfo.ani_resv - anoninfo.ani_max);
return (!anon_enforce_resv);
} else {
return (1);
}
}
/*
* Give back an anon reservation.
*/
void
anon_unresv(size)
u_int size;
{
anoninfo.ani_resv -= btopr(size);
if ((int)anoninfo.ani_resv < 0)
printf("anon: reservations below zero???\n");
}
/*
* Allocate an anon slot.
*/
struct anon *
anon_alloc()
{
register struct anon *ap;
kmon_enter(&anon_lock);
ap = swap_alloc();
if (ap != NULL) {
anoninfo.ani_free--;
ap->an_refcnt = 1;
ap->un.an_page = NULL;
}
kmon_exit(&anon_lock);
return (ap);
}
/*
* Decrement the reference count of an anon page.
* If reference count goes to zero, free it and
* its associated page (if any).
*/
static void
anon_decref(ap)
register struct anon *ap;
{
register struct page *pp;
struct vnode *vp;
u_int off;
if (--ap->an_refcnt == 0) {
/*
* If there is a page for this anon slot we will need to
* call page_abort to get rid of the vp association and
* put the page back on the free list as really free.
*/
swap_xlate(ap, &vp, &off);
pp = page_find(vp, off);
/*
* XXX - If we have a page, wait for its keepcnt to become
* zero, re-verify the identity before aborting it and
* freeing the swap slot. This ensures that any pending i/o
* always completes before the swap slot is freed.
*/
if (pp != NULL) {
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode == vp && pp->p_offset == off)
page_abort(pp);
} else {
page_abort(pp);
}
}
kmon_enter(&anon_lock);
swap_free(ap);
anoninfo.ani_free++;
kmon_exit(&anon_lock);
}
}
/*
* Duplicate references to size bytes worth of anon pages.
* Used when duplicating a segment that contains private anon pages.
* This code assumes that procedure calling this one has already used
* hat_chgprot() to disable write access to the range of addresses that
* that *old actually refers to.
*/
void
anon_dup(old, new, size)
register struct anon **old, **new;
u_int size;
{
register int i;
i = btopr(size);
while (i-- > 0) {
if ((*new = *old) != NULL)
(*new)->an_refcnt++;
old++;
new++;
}
}
/*
* Free a group of "size" anon pages, size in bytes,
* and clear out the pointers to the anon entries.
*/
void
anon_free(app, size)
register struct anon **app;
u_int size;
{
register int i;
i = btopr(size);
while (i-- > 0) {
if (*app != NULL) {
anon_decref(*app);
*app = NULL;
}
app++;
}
}
/*
* Return the kept page(s) and protections back to the segment driver.
*/
int
anon_getpage(app, protp, pl, plsz, seg, addr, rw, cred)
struct anon **app;
u_int *protp;
struct page *pl[];
u_int plsz;
struct seg *seg;
addr_t addr;
enum seg_rw rw;
struct ucred *cred;
{
register struct page *pp, **ppp;
register struct anon *ap = *app;
struct vnode *vp;
u_int off;
int err;
extern int nopagereclaim;
register int s;
swap_xlate(ap, &vp, &off);
again:
pp = ap->un.an_page;
/*
* If the anon pointer has a page associated with it,
* see if it looks ok after raising priority to prevent
* it from being ripped away at interrupt level if on the
* free list. If the page is being paged in, wait for it
* to finish as we must return a list of pages since this
* routine acts like the VOP_GETPAGE routine does.
*/
s = splvm();
if (pp != NULL && pp->p_vnode == vp && pp->p_offset == off &&
!pp->p_gone && pl != NULL) {
if (pp->p_intrans && (pp->p_pagein || nopagereclaim)) {
(void) splx(s);
page_wait(pp);
goto again; /* try again */
}
if (pp->p_free)
page_reclaim(pp);
(void) splx(s);
PAGE_HOLD(pp);
if (ap->an_refcnt == 1)
*protp = PROT_ALL;
else
*protp = PROT_ALL & ~PROT_WRITE;
pl[0] = pp;
pl[1] = NULL;
/* no one else accounted for it so we must */
u.u_ru.ru_minflt++;
return (0);
}
(void) splx(s);
/*
* Simply treat it as a vnode fault on the anon vp.
*/
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_ANON);
err = VOP_GETPAGE(vp, off, PAGESIZE, protp, pl, plsz,
seg, addr, rw, cred);
if (err == 0 && pl != NULL) {
for (ppp = pl; (pp = *ppp++) != NULL; ) {
if (pp->p_offset == off) {
ap->un.an_page = pp;
break;
}
}
if (ap->an_refcnt != 1)
*protp &= ~PROT_WRITE; /* make read-only */
}
return (err);
}
int npagesteal;
/*
* Turn a reference to an object or shared anon page
* into a private page with a copy of the data from the
* original page. The original page is always kept, locked
* and loaded in the MMU by the caller. This routine unlocks
* the translation and releases the original page, if it isn't
* being stolen, before returning to the caller.
*/
struct page *
anon_private(app, seg, addr, opp, oppflags)
struct anon **app;
struct seg *seg;
addr_t addr;
struct page *opp;
u_int oppflags;
{
register struct anon *old = *app;
register struct anon *new;
register struct page *pp;
struct vnode *vp;
u_int off;
ASSERT(opp->p_mapping);
ASSERT(opp->p_keepcnt);
new = anon_alloc();
if (new == (struct anon *)NULL) {
rm_outofanon();
hat_unlock(seg, addr);
PAGE_RELE(opp);
return ((struct page *)NULL); /* out of swap space */
}
*app = new;
swap_xlate(new, &vp, &off);
again:
pp = page_lookup(vp, off);
if (pp == NULL && (oppflags & STEAL_PAGE) &&
opp->p_keepcnt == 1 && opp->p_mod == 0) {
pp = opp;
hat_unlock(seg, addr); /* unlock translation */
hat_pageunload(pp); /* unload all translations */
page_hashout(pp); /* destroy old name for page */
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) /* rename as anon page */
panic("anon private steal");
new->un.an_page = pp;
pg_setmod(pp, 1);
page_unlock(pp);
/*
* If original page is ``locked'', relinquish
* claim for the extra page.
*/
if (oppflags & LOCK_PAGE)
page_subclaim(1);
npagesteal++;
return (pp);
}
if (pp == NULL) {
/*
* Normal case, need to allocate new page frame.
*/
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) {
PAGE_RELE(pp);
goto again; /* try again */
}
} else {
/*
* Already found a page with the right identity -- just
* use it if the `keepcnt' is 0. If not, wait for the
* `keepcnt' to become 0, re-verify the identity before
* using the page.
*/
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode != vp || pp->p_offset != off)
goto again;
}
page_lock(pp);
PAGE_HOLD(pp);
}
new->un.an_page = pp;
/*
* Now copy the contents from the original page which
* is loaded and locked in the MMU by the caller to
* prevent yet another page fault.
*/
pp->p_intrans = pp->p_pagein = 1;
pagecopy(addr, pp);
pp->p_intrans = pp->p_pagein = 0;
pg_setmod(pp, 1); /* mark as modified */
page_unlock(pp);
/*
* If original page is ``locked'', relinquish claim
* for an extra page reserved for the private copy
* in case of a copy-on-write. Lock the new page
* ignoring the current reservation check.
*/
if (oppflags & LOCK_PAGE) {
if (old == NULL)
page_pp_unlock(opp, 1);
else
page_pp_unlock(opp, 0);
(void) page_pp_lock(pp, 0, 0);
}
/*
* Unlock translation to the original page since
* it can be unloaded if the page is aborted.
*/
hat_unlock(seg, addr);
/*
* Ok, now release the original page, or else the
* process will sleep forever in anon_decref()
* waiting for the `keepcnt' to become 0.
*/
PAGE_RELE(opp);
/*
* If we copied away from an anonymous page, then
* we are one step closer to freeing up an anon slot.
*/
if (old != NULL)
anon_decref(old);
return (pp);
}
/*
* Allocate a zero-filled anon page.
*/
struct page *
anon_zero(seg, addr, app)
struct seg *seg;
addr_t addr;
struct anon **app;
{
register struct anon *ap;
register struct page *pp;
struct vnode *vp;
u_int off;
*app = ap = anon_alloc();
if (ap == NULL) {
rm_outofanon();
return ((struct page *)NULL);
}
swap_xlate(ap, &vp, &off);
again:
pp = page_lookup(vp, off);
if (pp == NULL) {
/*
* Normal case, need to allocate new page frame.
*/
pp = rm_allocpage(seg, addr, PAGESIZE, 1);
trace6(TR_SEG_ALLOCPAGE, seg, addr, TRC_SEG_ANON, vp, off, pp);
if (page_enter(pp, vp, off)) {
PAGE_RELE(pp);
goto again; /* try again */
}
} else {
/*
* Already found a page with the right identity -- just
* use it if the `keepcnt' is 0. If not, wait for the
* `keepcnt' to become 0, re-verify the identity before
* using the page.
*/
if (pp->p_keepcnt != 0) {
page_wait(pp);
if (pp->p_vnode != vp || pp->p_offset != off)
goto again;
}
page_lock(pp);
PAGE_HOLD(pp);
}
ap->un.an_page = pp;
pagezero(pp, 0, PAGESIZE);
cnt.v_zfod++;
pg_setmod(pp, 1); /* mark as modified so pageout writes back */
page_unlock(pp);
return (pp);
}
/*
* This gets calls by the seg_vn driver unload routine
* which is called by the hat code when it decides to
* unload a particular mapping.
*/
void
anon_unloadmap(ap, ref, mod)
struct anon *ap;
u_int ref, mod;
{
struct vnode *vp;
u_int off;
swap_xlate(ap, &vp, &off);
pvn_unloadmap(vp, off, ref, mod);
}