2021-10-11 18:20:23 -03:00

2449 lines
62 KiB
C

/* @(#)seg_vn.c 1.1 92/07/30 SMI */
#ident "$SunId: @(#)seg_vn.c 1.4 91/04/10 SMI [RMTC] $"
/*
* Copyright (c) 1988, 1989 by Sun Microsystems, Inc.
*/
/*
* VM - shared or copy-on-write from a vnode/anonymous memory.
*/
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mman.h>
#include <sys/debug.h>
#include <sys/ucred.h>
#include <sys/trace.h>
#include <sys/vmsystm.h>
#include <sys/user.h>
#include <sys/kernel.h>
#include <vm/hat.h>
#include <vm/mp.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_vn.h>
#include <vm/pvn.h>
#include <vm/anon.h>
#include <vm/page.h>
#include <vm/swap.h>
#include <vm/vpage.h>
#include <machine/seg_kmem.h>
/*
* Private seg op routines.
*/
static int segvn_dup(/* seg, newsegp */);
static int segvn_unmap(/* seg, addr, len */);
static int segvn_free(/* seg */);
static faultcode_t segvn_fault(/* seg, addr, len, type, rw */);
static faultcode_t segvn_faulta(/* seg, addr */);
static int segvn_hatsync(/* seg, addr, ref, mod, flags */);
static int segvn_setprot(/* seg, addr, len, prot */);
static int segvn_checkprot(/* seg, addr, len, prot */);
static int segvn_kluster(/* seg, addr, delta */);
static u_int segvn_swapout(/* seg */);
static int segvn_sync(/* seg, addr, len, flags */);
static int segvn_incore(/* seg, addr, len, vec */);
static int segvn_lockop(/* seg, addr, len, op */);
static int segvn_advise(/* seg, addr, len, function */);
struct seg_ops segvn_ops = {
segvn_dup,
segvn_unmap,
segvn_free,
segvn_fault,
segvn_faulta,
segvn_hatsync,
segvn_setprot,
segvn_checkprot,
segvn_kluster,
segvn_swapout,
segvn_sync,
segvn_incore,
segvn_lockop,
segvn_advise,
};
/*
* Common zfod structures, provided as a shorthand for others to use.
*/
static
struct segvn_crargs zfod_segvn_crargs = {
(struct vnode *)NULL,
0,
(struct ucred *)NULL,
MAP_PRIVATE,
PROT_ALL,
PROT_ALL,
(struct anon_map *)NULL,
};
static
struct segvn_crargs kzfod_segvn_crargs = {
(struct vnode *)NULL,
0,
(struct ucred *)NULL,
MAP_PRIVATE,
PROT_ALL & ~PROT_USER,
PROT_ALL & ~PROT_USER,
(struct anon_map *)NULL,
};
caddr_t zfod_argsp = (caddr_t)&zfod_segvn_crargs; /* user zfod argsp */
caddr_t kzfod_argsp = (caddr_t)&kzfod_segvn_crargs; /* kernel zfod argsp */
/*
* Variables for maintaining the free lists
* of segvn_data and anon_map structures.
*/
static struct segvn_data *segvn_freelist;
static int segvn_freeincr = 8;
static struct anon_map *anonmap_freelist;
static int anonmap_freeincr = 4;
#define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
static u_int anon_slop = 64*1024; /* allow segs to expand in place */
static int concat(/* seg1, seg2, a */);
static int extend_prev(/* seg1, seg2, a */);
static int extend_next(/* seg1, seg2, a */);
static void anonmap_alloc(/* seg, swresv */);
static void segvn_vpage(/* seg */);
/*
* Routines needed externally
*/
struct anon_map *anonmap_fast_alloc();
void anonmap_fast_free(/* amp */);
int
segvn_create(seg, argsp)
struct seg *seg;
caddr_t argsp;
{
register struct segvn_crargs *a = (struct segvn_crargs *)argsp;
register struct segvn_data *svd;
register u_int swresv = 0;
#ifdef LWP
extern int runthreads;
#endif LWP
/*
* Check arguments for invalid combinations.
*/
if ((a->type != MAP_PRIVATE && a->type != MAP_SHARED) ||
(a->amp != NULL && a->vp != NULL))
panic("segvn_create args");
/*
* If segment may need anonymous pages, reserve them now.
*/
if ((a->vp == NULL && a->amp == NULL) ||
(a->type == MAP_PRIVATE && (a->prot & PROT_WRITE))) {
if (anon_resv(seg->s_size) == 0)
return (ENOMEM);
swresv = seg->s_size;
}
/*
* If more than one segment in the address space, and
* they're adjacent virtually, try to concatenate them.
* Don't concatenate if an explicit anon_map structure
* was supplied (e.g., SystemV shared memory).
*
* We also don't try concatenation if this is a segment
* for the kernel's address space. This is a kludge
* because the kernel has several threads of control
* active at the same time and we can get in trouble
* if we reallocate the anon_map while another process
* is trying to fill the old anon_map in.
* XXX - need as/seg locking to fix the general problem of
* multiple threads in an address space instead of this kludge.
*/
#ifdef LWP
if ((seg->s_prev != seg) && (a->amp == NULL) &&
(seg->s_as != &kas) && !runthreads) {
#else
if ((seg->s_prev != seg) && (a->amp == NULL) && (seg->s_as != &kas)) {
#endif LWP
register struct seg *pseg, *nseg;
/* first, try to concatenate the previous and new segments */
pseg = seg->s_prev;
if (pseg->s_base + pseg->s_size == seg->s_base &&
pseg->s_ops == &segvn_ops &&
extend_prev(pseg, seg, a, swresv) == 0) {
/* success! now try to concatenate with following seg */
nseg = pseg->s_next;
if (nseg != pseg && nseg->s_ops == &segvn_ops &&
pseg->s_base + pseg->s_size == nseg->s_base)
(void) concat(pseg, nseg);
return (0);
}
/* failed, so try to concatenate with following seg */
nseg = seg->s_next;
if (seg->s_base + seg->s_size == nseg->s_base &&
nseg->s_ops == &segvn_ops &&
extend_next(seg, nseg, a, swresv) == 0)
return (0);
}
svd = (struct segvn_data *)new_kmem_fast_alloc(
(caddr_t *)&segvn_freelist, sizeof (*segvn_freelist),
segvn_freeincr, KMEM_SLEEP);
lock_init(&svd->lock);
seg->s_data = (char *)svd;
seg->s_ops = &segvn_ops;
if (a->vp != NULL) {
VN_HOLD(a->vp);
}
svd->vp = a->vp;
svd->offset = a->offset & PAGEMASK;
svd->prot = a->prot;
svd->maxprot = a->maxprot;
svd->pageprot = 0;
svd->type = a->type;
svd->vpage = NULL;
svd->advice = -1;
svd->pageadvice = 0;
if (a->cred != NULL) {
svd->cred = a->cred;
crhold(svd->cred);
} else {
svd->cred = crgetcred();
}
if (svd->type == MAP_SHARED && a->vp == NULL && a->amp == NULL) {
/*
* We have a shared mapping to an anon_map object
* which hasn't been allocated yet. Allocate the
* struct now so that it will be properly shared
* by remembering the swap reservation there.
*/
svd->swresv = 0;
anonmap_alloc(seg, swresv);
} else if ((svd->amp = a->amp) != NULL) {
u_int anon_num;
/*
* Mapping to an existing anon_map structure.
*/
svd->swresv = swresv;
/*
* For now we will insure that the segment size isn't larger
* than the size - offset gives us. Later on we may wish to
* have the anon array dynamically allocated itself so that
* we don't always have to allocate all the anon pointer
* slots. This of course involves adding extra code to check
* that we aren't trying to use an anon pointer slot beyond
* the end of the currently allocated anon array.
*/
if ((a->amp->size - a->offset) < seg->s_size)
panic("segvn_create anon_map size");
anon_num = btopr(a->offset);
if (a->type == MAP_SHARED) {
/*
* SHARED mapping to a given anon_map.
*/
a->amp->refcnt++;
svd->anon_index = anon_num;
} else {
/*
* PRIVATE mapping to a given anon_map.
* Then make sure that all the needs anon
* structures are created (so that we will
* share the underlying pages if nothing
* is written by this mapping) and then
* duplicate the anon array as is done
* when a privately mapped segment is dup'ed.
*/
register struct anon **app;
register addr_t addr;
addr_t eaddr;
anonmap_alloc(seg, 0);
AMAP_LOCK(svd->amp);
app = &a->amp->anon[anon_num];
eaddr = seg->s_base + seg->s_size;
for (addr = seg->s_base; addr < eaddr;
addr += PAGESIZE, app++) {
struct page *pp;
if (*app != NULL)
continue;
/*
* Allocate the anon struct now.
* Might as well load up translation
* to the page while we're at it...
*/
pp = anon_zero(seg, addr, app);
if (*app == NULL)
panic("segvn_create anon_zero");
hat_memload(seg, addr, pp,
svd->prot & ~PROT_WRITE, 0);
PAGE_RELE(pp);
}
anon_dup(&a->amp->anon[anon_num], svd->amp->anon,
seg->s_size);
AMAP_UNLOCK(svd->amp);
}
} else {
svd->swresv = swresv;
svd->anon_index = 0;
}
return (0);
}
/*
* Concatenate two existing vnode segments, if possible.
* Return 0 on success.
*/
static int
concat(seg1, seg2)
struct seg *seg1, *seg2;
{
register struct segvn_data *svd1, *svd2;
register u_int size, swresv;
register struct anon_map *amp1, *amp2;
register struct vpage *vpage1, *vpage2;
svd1 = (struct segvn_data *)seg1->s_data;
svd2 = (struct segvn_data *)seg2->s_data;
/* both segments exist, try to merge them */
#define incompat(x) (svd1->x != svd2->x)
if (incompat(vp) || incompat(maxprot) ||
(!svd1->pageprot && !svd2->pageprot && incompat(prot)) ||
(!svd1->pageadvice && !svd2->pageadvice && incompat(advice)) ||
incompat(type) || incompat(cred))
return (-1);
#undef incompat
/* XXX - need to check credentials more carefully */
/* vp == NULL implies zfod, offset doesn't matter */
if (svd1->vp != NULL &&
svd1->offset + seg1->s_size != svd2->offset)
return (-1);
amp1 = svd1->amp;
amp2 = svd2->amp;
/* XXX - for now, reject if any private pages. could merge. */
if (amp1 != NULL || amp2 != NULL)
return (-1);
/* if either seg has vpages, create new merged vpages */
vpage1 = svd1->vpage;
vpage2 = svd2->vpage;
if (vpage1 != NULL || vpage2 != NULL) {
register int npages1, npages2;
register struct vpage *vp, *new_vpage;
npages1 = seg_pages(seg1);
npages2 = seg_pages(seg2);
new_vpage = (struct vpage *)new_kmem_zalloc(
(u_int)(vpgtob(npages1 + npages2)), KMEM_SLEEP);
if (vpage1 != NULL)
bcopy((caddr_t)vpage1, (caddr_t)new_vpage,
(u_int)vpgtob(npages1));
if (vpage2 != NULL)
bcopy((caddr_t)vpage2, (caddr_t)(new_vpage + npages1),
(u_int)vpgtob(npages2));
for (vp = new_vpage; vp < new_vpage + npages1; vp++) {
if (svd2->pageprot && !svd1->pageprot)
vp->vp_prot = svd1->prot;
if (svd2->pageadvice && !svd1->pageadvice)
vp->vp_advice = svd1->advice;
}
for (vp = new_vpage + npages1;
vp < new_vpage + npages1 + npages2; vp++) {
if (svd1->pageprot && !svd2->pageprot)
vp->vp_prot = svd2->prot;
if (svd1->pageadvice && !svd2->pageadvice)
vp->vp_advice = svd2->advice;
}
if (vpage1 != NULL)
kmem_free((caddr_t)vpage1, (u_int)vpgtob(npages1));
if (svd2->pageprot)
svd1->pageprot = 1;
if (svd2->pageadvice)
svd1->pageadvice = 1;
svd1->vpage = new_vpage;
}
/* all looks ok, merge second into first */
size = seg2->s_size;
swresv = svd2->swresv;
svd2->swresv = 0; /* so seg_free doesn't release swap space */
seg_free(seg2);
seg1->s_size += size;
svd1->swresv += swresv;
return (0);
}
/*
* Extend the previous segment (seg1) to include the
* new segment (seg2 + a), if possible.
* Return 0 on success.
*/
static int
extend_prev(seg1, seg2, a, swresv)
struct seg *seg1, *seg2;
register struct segvn_crargs *a;
u_int swresv;
{
register struct segvn_data *svd1;
register u_int size;
register struct anon_map *amp1;
struct vpage *new_vpage;
/* second segment is new, try to extend first */
svd1 = (struct segvn_data *)seg1->s_data;
if (svd1->vp != a->vp || svd1->maxprot != a->maxprot ||
(!svd1->pageprot && (svd1->prot != a->prot)) ||
svd1->type != a->type)
return (-1);
/* vp == NULL implies zfod, offset doesn't matter */
if (svd1->vp != NULL &&
svd1->offset + seg1->s_size != (a->offset & PAGEMASK))
return (-1);
amp1 = svd1->amp;
if (amp1) {
/*
* segment has private pages, can
* data structures be expanded?
*/
if (amp1->refcnt > 1 && amp1->size != seg1->s_size)
return (-1);
AMAP_LOCK(amp1);
if (amp1->size - ctob(svd1->anon_index) <
seg1->s_size + seg2->s_size) {
struct anon **aa;
u_int asize;
/*
* We need a bigger anon array. Allocate a new
* one with anon_slop worth of slop at the
* end so it will be easier to expand in
* place the next time we need to do this.
*/
asize = seg1->s_size + seg2->s_size + anon_slop;
aa = (struct anon **) new_kmem_zalloc(
(u_int)btop(asize) * sizeof (struct anon *),
KMEM_SLEEP);
bcopy((caddr_t)(amp1->anon + svd1->anon_index),
(caddr_t)aa,
(u_int)(btop(seg1->s_size) * sizeof (struct anon *)));
kmem_free((caddr_t)amp1->anon,
btop(amp1->size) * sizeof (struct anon *));
amp1->anon = aa;
amp1->size = asize;
svd1->anon_index = 0;
} else {
/*
* Can just expand anon array in place.
* Clear out anon slots after the end
* of the currently used slots.
*/
bzero((caddr_t)(amp1->anon + svd1->anon_index +
seg_pages(seg1)),
seg_pages(seg2) * sizeof (struct anon *));
}
AMAP_UNLOCK(amp1);
}
if (svd1->vpage != NULL) {
new_vpage = (struct vpage *)new_kmem_zalloc(
(u_int)(vpgtob(seg_pages(seg1) + seg_pages(seg2))),
KMEM_SLEEP);
bcopy((caddr_t)svd1->vpage, (caddr_t)new_vpage,
(u_int)vpgtob(seg_pages(seg1)));
kmem_free((caddr_t)svd1->vpage, (u_int)vpgtob(seg_pages(seg1)));
svd1->vpage = new_vpage;
if (svd1->pageprot) {
register struct vpage *vp, *evp;
vp = new_vpage + seg_pages(seg1);
evp = vp + seg_pages(seg2);
for (; vp < evp; vp++)
vp->vp_prot = a->prot;
}
}
size = seg2->s_size;
seg_free(seg2);
seg1->s_size += size;
svd1->swresv += swresv;
return (0);
}
/*
* Extend the next segment (seg2) to include the
* new segment (seg1 + a), if possible.
* Return 0 on success.
*/
static int
extend_next(seg1, seg2, a, swresv)
struct seg *seg1, *seg2;
register struct segvn_crargs *a;
u_int swresv;
{
register struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data;
register u_int size;
register struct anon_map *amp2;
struct vpage *new_vpage;
/* first segment is new, try to extend second */
if (svd2->vp != a->vp || svd2->maxprot != a->maxprot ||
(!svd2->pageprot && (svd2->prot != a->prot)) ||
svd2->type != a->type)
return (-1);
/* vp == NULL implies zfod, offset doesn't matter */
if (svd2->vp != NULL &&
(a->offset & PAGEMASK) + seg1->s_size != svd2->offset)
return (-1);
amp2 = svd2->amp;
if (amp2) {
/*
* Segment has private pages, can
* data structures be expanded?
*/
if (amp2->refcnt > 1)
return (-1);
AMAP_LOCK(amp2);
if (ctob(svd2->anon_index) < seg1->s_size) {
struct anon **aa;
u_int asize;
/*
* We need a bigger anon array. Allocate a new
* one with anon_slop worth of slop at the
* beginning so it will be easier to expand in
* place the next time we need to do this.
*/
asize = seg1->s_size + seg2->s_size + anon_slop;
aa = (struct anon **)new_kmem_zalloc(
(u_int)btop(asize) * sizeof (struct anon *),
KMEM_SLEEP);
bcopy((caddr_t)(amp2->anon + svd2->anon_index),
(caddr_t)(aa + btop(anon_slop) + seg_pages(seg1)),
(u_int)(btop(seg2->s_size) * sizeof (struct anon *)));
kmem_free((caddr_t)amp2->anon,
btop(amp2->size) * sizeof (struct anon *));
amp2->anon = aa;
amp2->size = asize;
svd2->anon_index = btop(anon_slop);
} else {
/*
* Can just expand anon array in place.
* Clear out anon slots going backwards
* towards the beginning of the array.
*/
bzero((caddr_t)(amp2->anon + svd2->anon_index -
seg_pages(seg1)),
seg_pages(seg1) * sizeof (struct anon *));
svd2->anon_index -= seg_pages(seg1);
}
AMAP_UNLOCK(amp2);
}
if (svd2->vpage != NULL) {
new_vpage = (struct vpage *)new_kmem_zalloc(
(u_int)vpgtob(seg_pages(seg1) + seg_pages(seg2)),
KMEM_SLEEP);
bcopy((caddr_t)svd2->vpage,
(caddr_t)(new_vpage + seg_pages(seg1)),
(u_int)vpgtob(seg_pages(seg2)));
kmem_free((caddr_t)svd2->vpage, (u_int)vpgtob(seg_pages(seg2)));
svd2->vpage = new_vpage;
if (svd2->pageprot) {
register struct vpage *vp, *evp;
vp = new_vpage;
evp = vp + seg_pages(seg1);
for (; vp < evp; vp++)
vp->vp_prot = a->prot;
}
}
size = seg1->s_size;
seg_free(seg1);
seg2->s_size += size;
seg2->s_base -= size;
svd2->offset -= size;
svd2->swresv += swresv;
return (0);
}
/*
* Allocate and initialize an anon_map structure for seg
* associating the given swap reservation with the new anon_map.
*/
static void
anonmap_alloc(seg, swresv)
register struct seg *seg;
u_int swresv;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
svd->amp = anonmap_fast_alloc();
svd->amp->refcnt = 1;
svd->amp->size = seg->s_size;
svd->amp->anon = (struct anon **)new_kmem_zalloc(
(u_int)(seg_pages(seg) * sizeof (struct anon *)), KMEM_SLEEP);
svd->amp->swresv = swresv;
svd->amp->flags = 0;
svd->anon_index = 0;
}
/*
* Allocate an anon_map structure;
* also used by ipc_shm.c
*/
struct anon_map *
anonmap_fast_alloc()
{
register struct anon_map *amp;
amp = (struct anon_map *)new_kmem_fast_alloc(
(caddr_t *)&anonmap_freelist, sizeof (*anonmap_freelist),
anonmap_freeincr, KMEM_SLEEP);
amp->flags = 0; /* XXX for ipc_shm.c */
return (amp);
}
/*
* Free an anon_map structure;
* also used by ipc_shm.c
*/
void
anonmap_fast_free(amp)
register struct anon_map *amp;
{
kmem_fast_free((caddr_t *)&anonmap_freelist,
(caddr_t)amp);
}
static int
segvn_dup(seg, newseg)
struct seg *seg, *newseg;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct segvn_data *newsvd;
register u_int npages = seg_pages(seg);
/*
* If a segment has anon reserved,
* reserve more for the new segment.
*/
if (svd->swresv && anon_resv(svd->swresv) == 0)
return (-1);
newsvd = (struct segvn_data *)new_kmem_fast_alloc(
(caddr_t *)&segvn_freelist, sizeof (*segvn_freelist),
segvn_freeincr, KMEM_SLEEP);
lock_init(&newsvd->lock);
newseg->s_ops = &segvn_ops;
newseg->s_data = (char *)newsvd;
if ((newsvd->vp = svd->vp) != NULL) {
VN_HOLD(svd->vp);
}
newsvd->offset = svd->offset;
newsvd->prot = svd->prot;
newsvd->maxprot = svd->maxprot;
newsvd->pageprot = svd->pageprot;
newsvd->type = svd->type;
newsvd->cred = svd->cred;
crhold(newsvd->cred);
newsvd->advice = svd->advice;
newsvd->pageadvice = svd->pageadvice;
newsvd->swresv = svd->swresv;
if ((newsvd->amp = svd->amp) == NULL) {
/*
* No associated anon object.
*/
newsvd->anon_index = 0;
} else {
if (svd->type == MAP_SHARED) {
svd->amp->refcnt++;
newsvd->anon_index = svd->anon_index;
} else {
/*
* Allocate and initialize new anon_map structure.
*/
anonmap_alloc(newseg, 0);
AMAP_LOCK(svd->amp);
newsvd->anon_index = 0;
hat_chgprot(seg, seg->s_base, seg->s_size, ~PROT_WRITE);
anon_dup(&svd->amp->anon[svd->anon_index],
newsvd->amp->anon, seg->s_size);
AMAP_UNLOCK(svd->amp);
}
}
/*
* If necessary, create a vpage structure for the new segment.
* Do not copy any page lock indications.
*/
if (svd->vpage != NULL) {
register u_int i;
register struct vpage *ovp = svd->vpage;
register struct vpage *nvp;
nvp = newsvd->vpage = (struct vpage *)
new_kmem_alloc(vpgtob(npages), KMEM_SLEEP);
for (i = 0; i < npages; i++) {
*nvp = *ovp++;
(nvp++)->vp_pplock = 0;
}
} else
newsvd->vpage = NULL;
return (0);
}
static int
segvn_unmap(seg, addr, len)
register struct seg *seg;
register addr_t addr;
u_int len;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct segvn_data *nsvd;
register struct seg *nseg;
register u_int npages, spages, tpages;
struct anon **app;
addr_t nbase;
u_int nsize, hpages;
/*
* Check for bad sizes
*/
if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
(len & PAGEOFFSET) || ((u_int)addr & PAGEOFFSET))
panic("segvn_unmap");
/*
* Remove any page locks set through this mapping.
*/
(void) segvn_lockop(seg, addr, len, MC_UNLOCK);
/*
* Unload any hardware translations in the range to be taken out.
*/
hat_unload(seg, addr, len);
/*
* Check for entire segment
*/
if (addr == seg->s_base && len == seg->s_size) {
seg_free(seg);
return (0);
}
/*
* Check for beginning of segment
*/
npages = btop(len);
spages = seg_pages(seg);
if (addr == seg->s_base) {
if (svd->vpage != NULL) {
svd->vpage = (struct vpage *)new_kmem_resize(
(caddr_t)svd->vpage, vpgtob(npages),
vpgtob(spages - npages), vpgtob(spages),
KMEM_SLEEP);
}
if (svd->amp != NULL && (svd->amp->refcnt == 1 ||
svd->type == MAP_PRIVATE)) {
/*
* Free up now unused parts of anon_map array.
*/
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index];
anon_free(app, len);
svd->anon_index += npages;
AMAP_UNLOCK(svd->amp);
}
if (svd->vp != NULL)
svd->offset += len;
if (svd->swresv) {
anon_unresv(len);
svd->swresv -= len;
}
seg->s_base += len;
seg->s_size -= len;
return (0);
}
/*
* Check for end of segment
*/
if (addr + len == seg->s_base + seg->s_size) {
tpages = spages - npages;
if (svd->vpage != NULL) {
svd->vpage = (struct vpage *)
new_kmem_resize((caddr_t)svd->vpage, (u_int)0,
vpgtob(tpages), vpgtob(spages), KMEM_SLEEP);
}
if (svd->amp != NULL && (svd->amp->refcnt == 1 ||
svd->type == MAP_PRIVATE)) {
/*
* Free up now unused parts of anon_map array
*/
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + tpages];
anon_free(app, len);
AMAP_UNLOCK(svd->amp);
}
if (svd->swresv) {
anon_unresv(len);
svd->swresv -= len;
}
seg->s_size -= len;
return (0);
}
/*
* The section to go is in the middle of the segment,
* have to make it into two segments. nseg is made for
* the high end while seg is cut down at the low end.
*/
nbase = addr + len; /* new seg base */
nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */
seg->s_size = addr - seg->s_base; /* shrink old seg */
nseg = seg_alloc(seg->s_as, nbase, nsize);
if (nseg == NULL)
panic("segvn_unmap seg_alloc");
nseg->s_ops = seg->s_ops;
nsvd = (struct segvn_data *)new_kmem_fast_alloc(
(caddr_t *)&segvn_freelist, sizeof (*segvn_freelist),
segvn_freeincr, KMEM_SLEEP);
nseg->s_data = (char *)nsvd;
lock_init(&nsvd->lock);
nsvd->pageprot = svd->pageprot;
nsvd->prot = svd->prot;
nsvd->maxprot = svd->maxprot;
nsvd->type = svd->type;
nsvd->vp = svd->vp;
nsvd->cred = svd->cred;
nsvd->offset = svd->offset + nseg->s_base - seg->s_base;
nsvd->swresv = 0;
nsvd->advice = svd->advice;
nsvd->pageadvice = svd->pageadvice;
if (svd->vp != NULL)
VN_HOLD(nsvd->vp);
crhold(svd->cred);
tpages = btop(nseg->s_base - seg->s_base);
hpages = btop(addr - seg->s_base);
if (svd->vpage == NULL)
nsvd->vpage = NULL;
else {
nsvd->vpage = (struct vpage *)
new_kmem_alloc(vpgtob(spages - tpages), KMEM_SLEEP);
bcopy((caddr_t)&svd->vpage[tpages], (caddr_t)nsvd->vpage,
(u_int)vpgtob(spages - tpages));
svd->vpage = (struct vpage *)
new_kmem_resize((caddr_t)svd->vpage, (u_int)0,
vpgtob(hpages), vpgtob(spages), KMEM_SLEEP);
}
if (svd->amp == NULL) {
nsvd->amp = NULL;
nsvd->anon_index = 0;
} else {
/*
* Share the same anon_map structure.
*/
if (svd->amp->refcnt == 1 ||
svd->type == MAP_PRIVATE) {
/*
* Free up now unused parts of anon_map array
*/
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + hpages];
anon_free(app, len);
AMAP_UNLOCK(svd->amp);
}
nsvd->amp = svd->amp;
nsvd->anon_index = svd->anon_index + tpages;
nsvd->amp->refcnt++;
}
if (svd->swresv) {
if (seg->s_size + nseg->s_size + len != svd->swresv)
panic("segvn_unmap: can split swap reservation");
anon_unresv(len);
svd->swresv = seg->s_size;
nsvd->swresv = nseg->s_size;
}
/*
* Now we do something so that all the translations which used
* to be associated with seg but are now associated with nseg.
*/
hat_newseg(seg, nseg->s_base, nseg->s_size, nseg);
return (0); /* I'm glad that's all over with! */
}
static
segvn_free(seg)
struct seg *seg;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon **app;
u_int npages = seg_pages(seg);
/*
* Be sure to unlock pages. XXX Why do things get free'ed instead
* of unmapped? XXX
*/
(void) segvn_lockop(seg, seg->s_base, seg->s_size, MC_UNLOCK);
/*
* Deallocate the vpage and anon pointers if necessary and possible.
*/
if (svd->vpage != NULL)
kmem_free((caddr_t)svd->vpage, npages * sizeof (struct vpage));
if (svd->amp != NULL) {
/*
* If there are no more references to this anon_map
* structure, then deallocate the structure after freeing
* up all the anon slot pointers that we can.
*/
if (--svd->amp->refcnt == 0) {
AMAP_LOCK(svd->amp);
if (svd->type == MAP_PRIVATE) {
/*
* Private - we only need to anon_free
* the part that this segment refers to.
*/
anon_free(&svd->amp->anon[svd->anon_index],
seg->s_size);
} else {
/*
* Shared - anon_free the entire
* anon_map's worth of stuff and
* release any swap reservations.
*/
anon_free(svd->amp->anon, svd->amp->size);
if (svd->amp->swresv)
anon_unresv(svd->amp->swresv);
}
kmem_free((caddr_t)svd->amp->anon,
btop(svd->amp->size) * sizeof (struct anon *));
AMAP_UNLOCK(svd->amp);
anonmap_fast_free(svd->amp);
} else if (svd->type == MAP_PRIVATE) {
/*
* We had a private mapping which still has
* a held anon_map so just free up all the
* anon slot pointers that we were using.
*/
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index];
anon_free(app, seg->s_size);
AMAP_UNLOCK(svd->amp);
}
}
/*
* Release swap reservation.
*/
if (svd->swresv)
anon_unresv(svd->swresv);
/*
* Release claim on vnode, credentials, and finally free the
* private data.
*/
if (svd->vp != NULL)
VN_RELE(svd->vp);
crfree(svd->cred);
kmem_fast_free((caddr_t *)&segvn_freelist, (caddr_t)svd);
}
/*
* Do a F_SOFTUNLOCK call over the range requested.
* The range must have already been F_SOFTLOCK'ed.
*/
static void
segvn_softunlock(seg, addr, len, rw)
struct seg *seg;
addr_t addr;
u_int len;
enum seg_rw rw;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon **app;
register struct page *pp;
register struct vpage *vpage;
register addr_t adr;
struct vnode *vp;
u_int offset;
if (svd->amp != NULL) {
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + seg_page(seg, addr)];
} else
app = NULL;
if (svd->vpage != NULL)
vpage = &svd->vpage[seg_page(seg, addr)];
else
vpage = NULL;
for (adr = addr; adr < addr + len; adr += PAGESIZE) {
if (app != NULL && *app != NULL)
swap_xlate(*app, &vp, &offset);
else {
vp = svd->vp;
offset = svd->offset + (adr - seg->s_base);
}
/*
* For now, we just kludge here by finding the page
* ourselves since we would not find the page using
* page_find() if someone has page_abort()'ed it.
* XXX - need to redo things to avoid this mess.
*/
for (pp = page_hash[PAGE_HASHFUNC(vp, offset)]; pp != NULL;
pp = pp->p_hash)
if (pp->p_vnode == vp && pp->p_offset == offset)
break;
if (pp == NULL || pp->p_pagein || pp->p_free)
panic("segvn_softunlock");
if (rw == S_WRITE) {
pg_setmod(pp, 1);
if (vpage != NULL)
vpage->vp_mod = 1;
}
if (rw != S_OTHER) {
trace4(TR_PG_SEGVN_FLT, pp, vp, offset, 1);
pg_setref(pp, 1);
if (vpage != NULL)
vpage->vp_ref = 1;
}
hat_unlock(seg, adr);
PAGE_RELE(pp);
if (vpage != NULL)
vpage++;
if (app != NULL)
app++;
}
if (svd->amp != NULL)
AMAP_UNLOCK(svd->amp);
}
/*
* Returns true if the app array has some non-anonymous memory
* The offp and lenp paramters are in/out paramters. On entry
* these values represent the starting offset and length of the
* mapping. When true is returned, these values may be modified
* to be the largest range which includes non-anonymous memory.
*/
static int
non_anon(app, offp, lenp)
register struct anon **app;
u_int *offp, *lenp;
{
register int i, el;
int low, high;
low = -1;
for (i = 0, el = *lenp; i < el; i += PAGESIZE) {
if (*app++ == NULL) {
if (low == -1)
low = i;
high = i;
}
}
if (low != -1) {
/*
* Found at least one non-anon page.
* Set up the off and len return values.
*/
if (low != 0)
*offp += low;
*lenp = high - low + PAGESIZE;
return (1);
}
return (0);
}
#define PAGE_HANDLED ((struct page *)-1)
/*
* Release all the pages in the NULL terminated ppp list
* which haven't already been converted to PAGE_HANDLED.
*/
static void
segvn_pagelist_rele(ppp)
register struct page **ppp;
{
for (; *ppp != NULL; ppp++) {
if (*ppp != PAGE_HANDLED)
PAGE_RELE(*ppp);
}
}
int stealcow = 1;
/*
* Handles all the dirty work of getting the right
* anonymous pages and loading up the translations.
* This routine is called only from segvn_fault()
* when looping over the range of addresses requested.
*
* The basic algorithm here is:
* If this is an anon_zero case
* Call anon_zero to allocate page
* Load up translation
* Return
* endif
* If this is an anon page
* Use anon_getpage to get the page
* else
* Find page in pl[] list passed in
* endif
* If not a cow
* Load up the translation to the page
* return
* endif
* Load and lock translation to the original page
* Call anon_private to handle cow
* Unload translation to the original page
* Load up (writable) translation to new page
*/
static int
segvn_faultpage(seg, addr, off, app, vpage, pl, vpprot, type, rw)
struct seg *seg; /* seg_vn of interest */
addr_t addr; /* address in as */
u_int off; /* offset in vp */
struct anon **app; /* pointer to anon for vp, off */
struct vpage *vpage; /* pointer to vpage for vp, off */
struct page *pl[]; /* object source page pointer */
u_int vpprot; /* access allowed to object pages */
enum fault_type type; /* type of fault */
enum seg_rw rw; /* type of access at fault */
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct page *pp, **ppp;
u_int pageflags = 0;
struct page *anon_pl[1 + 1];
struct page *opp; /* original object page */
u_int prot;
int err;
int cow;
/*
* Initialize protection value for this page.
* If we have per page protection values check it now.
*/
if (svd->pageprot) {
u_int protchk;
switch (rw) {
case S_READ:
protchk = PROT_READ;
break;
case S_WRITE:
protchk = PROT_WRITE;
break;
case S_EXEC:
protchk = PROT_EXEC;
break;
case S_OTHER:
default:
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
break;
}
prot = vpage->vp_prot;
if ((prot & protchk) == 0)
return (FC_PROT); /* illegal access type */
} else {
prot = svd->prot;
}
#ifdef WRITABLE_IMPLIES_MODIFIED
if (rw != S_WRITE)
prot &= ~PROT_WRITE;
#endif
if (svd->vp == NULL && *app == NULL) {
/*
* Allocate a (normally) writable
* anonymous page of zeroes
*/
if ((pp = anon_zero(seg, addr, app)) == NULL)
return (FC_MAKE_ERR(ENOMEM)); /* out of swap space */
pg_setref(pp, 1);
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't PAGE_RELE the page.
*/
hat_memload(seg, addr, pp, prot, 1);
} else {
hat_memload(seg, addr, pp, prot, 0);
PAGE_RELE(pp);
}
trace5(TR_SPG_FLT, u.u_ar0[PC], addr, svd->vp, off,
TRC_SPG_ZERO);
trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
trs(u.u_comm,0), trs(u.u_comm,1),
trs(u.u_comm,2), trs(u.u_comm,3));
return (0);
}
/*
* Obtain the page structure via anon_getpage() if it is
* a private copy of an object (the result of a previous
* copy-on-write), or from the pl[] list passed in if it
* is from the original object (i.e., not a private copy).
*/
if (app != NULL && *app != NULL) {
err = anon_getpage(app, &vpprot, anon_pl, PAGESIZE,
seg, addr, rw, svd->cred);
if (err)
return (FC_MAKE_ERR(err));
if (svd->type == MAP_SHARED) {
/*
* If this is a shared mapping to an
* anon_map, then ignore the write
* permissions returned by anon_getpage().
* They apply to the private mappings
* of this anon_map.
*/
vpprot |= PROT_WRITE;
}
opp = anon_pl[0];
} else {
/*
* Find original page. We must be bringing it in
* from the list in pl[].
*/
for (ppp = pl; (opp = *ppp) != NULL; ppp++) {
if (opp == PAGE_HANDLED)
continue;
ASSERT(opp->p_vnode == svd->vp); /* XXX */
if (opp->p_offset == off)
break;
}
if (opp == NULL)
panic("segvn_faultpage not found");
*ppp = PAGE_HANDLED;
}
ASSERT(opp != NULL);
trace4(TR_PG_SEGVN_FLT, opp, opp->p_vnode, opp->p_offset, 0);
pg_setref(opp, 1);
/*
* The fault is treated as a copy-on-write fault if a
* write occurs on a private segment and the object
* page is write protected. We assume that fatal
* protection checks have already been made.
*/
cow = (rw == S_WRITE && svd->type == MAP_PRIVATE &&
(vpprot & PROT_WRITE) == 0);
/*
* If not a copy-on-write case load the translation
* and return.
*/
if (cow == 0) {
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't PAGE_RELE the page.
*/
hat_memload(seg, addr, opp, prot & vpprot, 1);
} else {
hat_memload(seg, addr, opp, prot & vpprot, 0);
PAGE_RELE(opp);
}
trace5(TR_SPG_FLT, u.u_ar0[PC], addr, svd->vp, off,
svd->vp == opp->p_vnode ? TRC_SPG_FILE : TRC_SPG_ANON);
trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
trs(u.u_comm,0), trs(u.u_comm,1),
trs(u.u_comm,2), trs(u.u_comm,3));
return (0);
}
ASSERT(app != NULL);
/*
* Steal the original page if the following conditions are true:
*
* We are low on memory, the page is not private, keepcnt is 1,
* not modified, not `locked' or if we have it `locked' and
* if it doesn't have any translations.
*/
if (stealcow && freemem < minfree && *app == NULL &&
opp->p_keepcnt == 1 && opp->p_mod == 0 &&
(opp->p_lckcnt == 0 || opp->p_lckcnt == 1 &&
vpage != NULL && vpage->vp_pplock)) {
/*
* Check if this page has other translations
* after unloading our translation.
*/
if (opp->p_mapping != NULL)
hat_unload(seg, addr, PAGESIZE);
if (opp->p_mapping == NULL)
pageflags |= STEAL_PAGE;
}
/*
* Copy-on-write case: anon_private() will copy the contents
* of the original page into a new page. The page fault which
* could occur during the copy is prevented by ensuring that
* a translation to the original page is loaded and locked.
*/
hat_memload(seg, addr, opp, prot & vpprot, 1);
/*
* If the vpage pointer is valid, see if it indicates that
* we have ``locked'' the page we map. If so, ensure that
* anon_private() will transfer the locking resource to
* the new page.
*/
if (vpage != NULL && vpage->vp_pplock)
pageflags |= LOCK_PAGE;
/*
* Allocate a page and perform the copy.
*/
pp = anon_private(app, seg, addr, opp, pageflags);
if (pp == NULL)
return (FC_MAKE_ERR(ENOMEM)); /* out of swap space */
/*
* Ok, now just unload the old translation since it has
* been unlocked in anon_private().
*/
hat_unload(seg, addr, PAGESIZE);
if (type == F_SOFTLOCK) {
/*
* Load up the translation keeping it
* locked and don't PAGE_RELE the page.
*/
hat_memload(seg, addr, pp, prot, 1);
} else {
hat_memload(seg, addr, pp, prot, 0);
PAGE_RELE(pp);
}
trace5(TR_SPG_FLT, u.u_ar0[PC], addr, svd->vp, off, TRC_SPG_COW);
trace6(TR_SPG_FLT_PROC, time.tv_sec, time.tv_usec,
trs(u.u_comm,0), trs(u.u_comm,1), trs(u.u_comm,2), trs(u.u_comm,3));
return (0);
}
int fltadvice = 1; /* set to free behind pages for sequential access */
/*
* This routine is called via a machine specific fault handling routine.
* It is also called by software routines wishing to lock or unlock
* a range of addresses.
*
* Here is the basic algorithm:
* If unlocking
* Call segvn_softunlock
* Return
* endif
* Checking and set up work
* If we will need some non-anonymous pages
* Call VOP_GETPAGE over the range of non-anonymous pages
* endif
* Loop over all addresses requested
* Call segvn_faultpage passing in page list
* to load up translations and handle anonymous pages
* endloop
* Load up translation to any additional pages in page list not
* already handled that fit into this segment
*/
static faultcode_t
segvn_fault(seg, addr, len, type, rw)
struct seg *seg;
addr_t addr;
u_int len;
enum fault_type type;
enum seg_rw rw;
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct page **plp, **ppp, *pp;
struct anon **app;
u_int off;
addr_t a;
struct vpage *vpage;
u_int vpprot, prot;
int err;
struct page *pl[PVN_GETPAGE_NUM + 1];
u_int plsz, pl_alloc_sz;
int page;
/*
* First handle the easy stuff
*/
if (type == F_SOFTUNLOCK) {
segvn_softunlock(seg, addr, len, rw);
return (0);
}
/*
* If we have the same protections for the entire segment,
* insure that the access being attempted is legitimate.
*/
if (svd->pageprot == 0) {
u_int protchk;
switch (rw) {
case S_READ:
protchk = PROT_READ;
break;
case S_WRITE:
protchk = PROT_WRITE;
break;
case S_EXEC:
protchk = PROT_EXEC;
break;
case S_OTHER:
default:
protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
break;
}
if ((svd->prot & protchk) == 0)
return (FC_PROT); /* illegal access type */
}
/*
* Check to see if we need to allocate an anon_map structure.
*/
if (svd->amp == NULL && (svd->vp == NULL ||
(rw == S_WRITE && svd->type == MAP_PRIVATE))) {
anonmap_alloc(seg, 0);
}
page = seg_page(seg, addr);
/*
* Lock the anon_map if the segment has private pages. This is
* necessary to ensure that updates of the anon array associated
* with the anon_map are atomic.
*/
if (svd->amp == NULL)
app = NULL;
else {
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + page];
}
if (svd->vpage == NULL)
vpage = NULL;
else
vpage = &svd->vpage[page];
plp = pl;
*plp = (struct page *)NULL;
pl_alloc_sz = 0;
off = svd->offset + (addr - seg->s_base);
/*
* See if we need to call VOP_GETPAGE for
* *any* of the range being faulted on.
* We can skip all of this work if there
* was no original vnode.
*/
if (svd->vp != NULL) {
u_int vp_off, vp_len;
int dogetpage;
if (len > ptob((sizeof (pl) / sizeof (pl[0])) - 1)) {
/*
* Page list won't fit in local array,
* allocate one of the needed size.
*/
pl_alloc_sz = (btop(len) + 1) * sizeof (struct page *);
plp = (struct page **)new_kmem_zalloc(
pl_alloc_sz, KMEM_SLEEP);
plsz = len;
} else
plsz = PVN_GETPAGE_SZ;
vp_off = off;
vp_len = len;
if (app == NULL)
dogetpage = 1;
else if (len <= PAGESIZE)
dogetpage = (*app == NULL); /* inline non_anon() */
else
dogetpage = non_anon(app, &vp_off, &vp_len);
if (dogetpage) {
enum seg_rw arw;
/*
* Need to get some non-anonymous pages.
* We need to make only one call to GETPAGE to do
* this to prevent certain deadlocking conditions
* when we are doing locking. In this case
* non_anon() should have picked up the smallest
* range which includes all the non-anonymous
* pages in the requested range. We have to
* be careful regarding which rw flag to pass in
* because on a private mapping, the underlying
* object is never allowed to be written.
*/
if (rw == S_WRITE && svd->type == MAP_PRIVATE) {
arw = S_READ;
} else {
arw = rw;
}
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_FILE);
err = VOP_GETPAGE(svd->vp, vp_off, vp_len, &vpprot,
plp, plsz, seg, addr + (vp_off - off), arw,
svd->cred);
if (err) {
if (svd->amp != NULL)
AMAP_UNLOCK(svd->amp);
segvn_pagelist_rele(plp);
if (pl_alloc_sz)
kmem_free((caddr_t)plp, pl_alloc_sz);
return (FC_MAKE_ERR(err));
}
if (svd->type == MAP_PRIVATE)
vpprot &= ~PROT_WRITE;
}
}
/*
* If MADV_SEQUENTIAL has been set for the particular page we
* are faulting on, free behind all pages in the segment and put
* them on the free list.
*/
if ((page > 0) && fltadvice) { /* not if first page in segment */
struct vpage *vpp;
register u_int pgoff;
int fpage;
u_int fpgoff;
struct vnode *fvp;
struct anon **fap = NULL;
register int s;
if (svd->advice == MADV_SEQUENTIAL ||
(svd->pageadvice &&
vpage->vp_advice == MADV_SEQUENTIAL)) {
pgoff = off - PAGESIZE;
fpage = page - 1;
if (vpage)
vpp = &svd->vpage[fpage];
if (svd->amp)
fap = &svd->amp->anon[svd->anon_index + fpage];
while (pgoff > svd->offset) {
if (svd->advice != MADV_SEQUENTIAL ||
(!svd->pageadvice && (vpage &&
vpp->vp_advice != MADV_SEQUENTIAL)))
break;
/*
* if this is an anon page, we must find the
* correct <vp, offset> for it
*/
if (svd->amp && *fap)
swap_xlate(*fap, &fvp, &fpgoff);
else {
fpgoff = pgoff;
fvp = svd->vp;
}
s = splvm();
pp = page_exists(fvp, fpgoff);
if (pp != NULL &&
(!(pp->p_free || pp->p_intrans))) {
/*
* we should build a page list
* to kluster putpages XXX
*/
(void) splx(s);
(void)VOP_PUTPAGE(fvp, fpgoff, PAGESIZE,
(B_DONTNEED|B_FREE|B_ASYNC|B_DELWRI),
svd->cred);
--vpp, --fap;
pgoff -= PAGESIZE;
} else {
(void) splx(s);
break;
}
}
}
}
/*
* N.B. at this time the plp array has all the needed non-anon
* pages in addition to (possibly) having some adjacent pages.
*/
/*
* Ok, now loop over the address range and handle faults
*/
for (a = addr; a < addr + len; a += PAGESIZE, off += PAGESIZE) {
err = segvn_faultpage(seg, a, off, app, vpage, plp, vpprot,
type, rw);
if (err) {
if (svd->amp != NULL)
AMAP_UNLOCK(svd->amp);
if (type == F_SOFTLOCK && a > addr)
segvn_softunlock(seg, addr, (u_int)(a - addr),
S_OTHER);
segvn_pagelist_rele(plp);
if (pl_alloc_sz)
kmem_free((caddr_t)plp, pl_alloc_sz);
return (err);
}
if (app)
app++;
if (vpage)
vpage++;
}
/*
* Now handle any other pages in the list returned.
* If the page can be used, load up the translations now.
* Note that the for loop will only be entered if "plp"
* is pointing to a non-NULL page pointer which means that
* VOP_GETPAGE() was called and vpprot has been initialized.
*/
if (svd->pageprot == 0)
prot = svd->prot & vpprot;
for (ppp = plp; (pp = *ppp) != NULL; ppp++) {
int diff;
if (pp == PAGE_HANDLED)
continue;
diff = pp->p_offset - svd->offset;
if (diff >= 0 && diff < seg->s_size) {
ASSERT(svd->vp == pp->p_vnode);
page = btop(diff);
if (svd->pageprot)
prot = svd->vpage[page].vp_prot & vpprot;
if (svd->amp == NULL ||
svd->amp->anon[svd->anon_index + page] == NULL) {
hat_memload(seg, seg->s_base + diff, pp,
prot, 0);
}
}
PAGE_RELE(pp);
}
if (svd->amp != NULL)
AMAP_UNLOCK(svd->amp);
if (pl_alloc_sz)
kmem_free((caddr_t)plp, pl_alloc_sz);
return (0);
}
/*
* This routine is used to start I/O on pages asynchronously.
*/
static faultcode_t
segvn_faulta(seg, addr)
struct seg *seg;
addr_t addr;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon **app;
int err;
if (svd->amp != NULL) {
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + seg_page(seg, addr)];
if (*app != NULL) {
err = anon_getpage(app, (u_int *)NULL,
(struct page **)NULL, 0, seg, addr, S_READ,
svd->cred);
AMAP_UNLOCK(svd->amp);
if (err)
return (FC_MAKE_ERR(err));
return (0);
}
AMAP_UNLOCK(svd->amp);
}
if (svd->vp == NULL)
return (0); /* zfod page - do nothing now */
trace3(TR_SEG_GETPAGE, seg, addr, TRC_SEG_FILE);
err = VOP_GETPAGE(svd->vp, svd->offset + (addr - seg->s_base),
PAGESIZE, (u_int *)NULL, (struct page **)NULL, 0, seg, addr,
S_OTHER, svd->cred);
if (err)
return (FC_MAKE_ERR(err));
return (0);
}
static
segvn_hatsync(seg, addr, ref, mod, flags)
struct seg *seg;
addr_t addr;
u_int ref, mod;
u_int flags;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register int page;
register struct vpage *vpage;
register struct anon *ap;
/* For now, only ski allocates vpage structures, on demand */
if (svd->vpage == NULL && seg->s_as->a_ski) {
svd->vpage = (struct vpage *)
new_kmem_zalloc(vpgtob(seg_pages(seg)), KMEM_SLEEP);
}
if (svd->vpage == NULL)
return;
page = seg_page(seg, addr);
vpage = &svd->vpage[page];
if (flags & AHAT_UNLOAD) {
if (svd->amp && (ap = svd->amp->anon[svd->anon_index + page]))
anon_unloadmap(ap, ref, mod);
else
pvn_unloadmap(svd->vp,
svd->offset + (addr - seg->s_base), ref, mod);
}
vpage->vp_mod |= mod;
vpage->vp_ref |= ref;
/* Ski keeps separate vpage ref and mod info */
if (seg->s_as->a_ski) {
vpage->vp_ski_mod |= mod;
vpage->vp_ski_ref |= ref;
}
}
static int
segvn_setprot(seg, addr, len, prot)
register struct seg *seg;
register addr_t addr;
register u_int len, prot;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct vpage *vp, *evp;
if ((svd->maxprot & prot) != prot)
return (-1); /* violated maxprot */
/*
* If it's a private mapping and we're making it writable
* and no swap space has been reserved, have to reserve
* it all now. If it's private and we're removing write
* permission on the entire segment and we haven't modified
* any pages, we can release the swap space.
*/
if (svd->type == MAP_PRIVATE) {
if (prot & PROT_WRITE) {
if (svd->swresv == 0) {
if (anon_resv(seg->s_size) == 0)
return (-1);
svd->swresv = seg->s_size;
}
} else {
if (svd->swresv != 0 && svd->amp == NULL &&
addr == seg->s_base && len == seg->s_size &&
svd->pageprot == 0) {
anon_unresv(svd->swresv);
svd->swresv = 0;
}
}
}
if (addr == seg->s_base && len == seg->s_size && svd->pageprot == 0) {
if (svd->prot == prot)
return (0); /* all done */
svd->prot = prot;
} else {
/*
* A vpage structure exists or else the change does not
* involve the entire segment. Establish a vpage structure
* if none is there. Then, for each page in the range,
* adjust its individual permissions. Note that write-
* enabling a MAP_PRIVATE page can affect the claims for
* locked down memory. Overcommitting memory terminates
* the operation.
*/
segvn_vpage(seg);
evp = &svd->vpage[seg_page(seg, addr + len)];
for (vp = &svd->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
if (vp->vp_pplock && (svd->type == MAP_PRIVATE)) {
if ((vp->vp_prot ^ prot) & PROT_WRITE)
if (prot & PROT_WRITE) {
if (!page_addclaim(1))
break;
} else
page_subclaim(1);
}
vp->vp_prot = prot;
}
/*
* Did we terminate prematurely? If so, simply unload
* the translations to the things we've updated so far.
*/
if (vp != evp) {
len = (vp - &svd->vpage[seg_page(seg, addr)]) *
PAGESIZE;
if (len != 0)
hat_unload(seg, addr, len);
return (-1);
}
}
if (((prot & PROT_WRITE) != 0) || ((prot & ~PROT_USER) == PROT_NONE)) {
/*
* Either private data with write access (in which case
* we need to throw out all former translations so that
* we get the right translations set up on fault and we
* don't allow write access to any copy-on-write pages
* that might be around) or we don't have permission
* to access the memory at all (in which case we have to
* unload any current translations that might exist).
*/
hat_unload(seg, addr, len);
} else {
/*
* A shared mapping or a private mapping in which write
* protection is going to be denied - just change all the
* protections over the range of addresses in question.
*/
hat_chgprot(seg, addr, len, prot);
}
return (0);
}
static int
segvn_checkprot(seg, addr, len, prot)
register struct seg *seg;
register addr_t addr;
register u_int len, prot;
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct vpage *vp, *evp;
/*
* If segment protection can be used, simply check against them.
*/
if (svd->pageprot == 0)
return (((svd->prot & prot) != prot) ? -1 : 0);
/*
* Have to check down to the vpage level.
*/
evp = &svd->vpage[seg_page(seg, addr + len)];
for (vp = &svd->vpage[seg_page(seg, addr)]; vp < evp; vp++)
if ((vp->vp_prot & prot) != prot)
return (-1);
return (0);
}
/*
* Check to see if it makes sense to do kluster/read ahead to
* addr + delta relative to the mapping at addr. We assume here
* that delta is a signed PAGESIZE'd multiple (which can be negative).
*
* For segvn, we currently "approve" of the action if we are
* still in the segment and it maps from the same vp/off,
* or if the advice stored in segvn_data or vpages allows it.
* Currently, klustering is not allowed only if MADV_RANDOM is set.
*/
static int
segvn_kluster(seg, addr, delta)
register struct seg *seg;
register addr_t addr;
int delta;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon *oap, *ap;
register struct vpage *bvpp, *evpp, *tvpp;
register int pd;
register u_int page;
struct vnode *vp1, *vp2;
u_int off1, off2;
if (addr + delta < seg->s_base ||
addr + delta >= (seg->s_base + seg->s_size))
return (-1); /* exceeded segment bounds */
pd = delta >> PAGESHIFT; /* assumes that sign bit is preserved */
page = seg_page(seg, addr);
/*
* Check to see if any of the pages in the range have advice
* set that prevents klustering
*/
switch (svd->advice) { /* if advice set for entire segment */
case MADV_RANDOM:
return (-1);
case MADV_SEQUENTIAL:
case MADV_NORMAL:
case MADV_WILLNEED:
case MADV_DONTNEED:
default:
break;
}
if (svd->pageadvice && svd->vpage) {
bvpp = &svd->vpage[page];
evpp = &svd->vpage[page + pd];
if (evpp < bvpp) { /* searching backwards */
tvpp = bvpp;
bvpp = evpp;
evpp = tvpp;
}
for (; bvpp < evpp; bvpp++) {
switch (bvpp->vp_advice) {
case MADV_RANDOM:
return (-1);
case MADV_SEQUENTIAL:
case MADV_NORMAL:
case MADV_WILLNEED:
case MADV_DONTNEED:
default:
break;
}
}
}
if (svd->type == MAP_SHARED)
return (0); /* shared mapping - all ok */
if (svd->amp == NULL)
return (0); /* off original vnode */
page += svd->anon_index;
oap = svd->amp->anon[page];
ap = svd->amp->anon[page + pd];
if ((oap == NULL && ap != NULL) || (oap != NULL && ap == NULL))
return (-1); /* one with and one without an anon */
if (oap == NULL) /* implies that ap == NULL */
return (0); /* off original vnode */
/*
* Now we know we have two anon pointers - check to
* see if they happen to be properly allocated.
*/
swap_xlate(ap, &vp1, &off1);
swap_xlate(oap, &vp2, &off2);
if (!VOP_CMP(vp1, vp2) || off1 - off2 != delta)
return (-1);
return (0);
}
/*
* Swap the pages of seg out to secondary storage, returning the
* number of bytes of storage freed.
*
* The basic idea is first to unload all translations and then to call
* VOP_PUTPAGE for all newly-unmapped pages, to push them out to the
* swap device. Pages to which other segments have mappings will remain
* mapped and won't be swapped. Our caller (as_swapout) has already
* performed the unloading step.
*
* The value returned is intended to correlate well with the process's
* memory requirements. However, there are some caveats:
* 1) When given a shared segment as argument, this routine will
* only succeed in swapping out pages for the last sharer of the
* segment. (Previous callers will only have decremented mapping
* reference counts.)
* 2) We assume that the hat layer maintains a large enough translation
* cache to capture process reference patterns.
*/
static u_int
segvn_swapout(seg)
struct seg *seg;
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
struct anon_map *amp = svd->amp;
register u_int pgcnt = 0;
u_int npages;
register u_int page;
/*
* Find pages unmapped by our caller and force them
* out to the virtual swap device.
*/
npages = seg->s_size >> PAGESHIFT;
if (amp != NULL)
AMAP_LOCK(amp);
for (page = 0; page < npages; page++) {
register struct page *pp;
register struct anon **app;
struct vnode *vp;
u_int off;
register int s;
/*
* Obtain <vp, off> pair for the page, then look it up.
*
* Note that this code is willing to consider regular
* pages as well as anon pages. Is this appropriate here?
*/
if (amp != NULL &&
*(app = &amp->anon[svd->anon_index + page]) != NULL) {
swap_xlate(*app, &vp, &off);
} else {
off = svd->offset + ptob(page);
vp = svd->vp;
}
s = splvm();
if ((pp = page_exists(vp, off)) == NULL || pp->p_free) {
(void) splx(s);
continue;
}
(void) splx(s);
/*
* Skip if page is logically unavailable for removal.
*/
if (pp->p_lckcnt != 0)
continue;
/*
* Examine the page to see whether it can be tossed out,
* keeping track of how many we've found.
*/
if (pp->p_keepcnt != 0) {
/*
* If the page is marked as in transit going out
* and has no mappings, it's very likely that
* the page is in transit because of klustering.
* Assume this is so and take credit for it here.
*/
if (pp->p_intrans && !pp->p_pagein && !pp->p_mapping)
pgcnt++;
continue;
}
if (pp->p_mapping != NULL)
continue;
/*
* Since the keepcnt was 0 the page should not be
* in a gone state nor is not directly or indirectly
* involved in any IO at this time.
*/
/*
* No longer mapped -- we can toss it out. How
* we do so depends on whether or not it's dirty.
*
* XXX: Need we worry about locking between the
* time of the hat_pagesync call and the actions
* that depend on its result?
*/
hat_pagesync(pp);
if (pp->p_mod && pp->p_vnode) {
/*
* We must clean the page before it can be
* freed. Setting B_FREE will cause pvn_done
* to free the page when the i/o completes.
* XXX: This also causes it to be accounted
* as a pageout instead of a swap: need
* B_SWAPOUT bit to use instead of B_FREE.
*/
(void) VOP_PUTPAGE(vp, off, PAGESIZE, B_ASYNC | B_FREE,
svd->cred);
} else {
/*
* The page was clean. Lock it and free it.
*
* XXX: Can we ever encounter modified pages
* with no associated vnode here?
*/
page_lock(pp);
page_free(pp, 0);
}
/*
* Credit now even if i/o is in progress.
*/
pgcnt++;
}
if (amp != NULL)
AMAP_UNLOCK(amp);
return (ptob(pgcnt));
}
/*
* Synchronize primary storage cache with real object in virtual memory.
*/
static int
segvn_sync(seg, addr, len, flags)
struct seg *seg;
register addr_t addr;
u_int len;
u_int flags;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon **app;
register u_int offset;
struct vpage *vpp = svd->vpage;
addr_t eaddr;
int bflags;
int err;
int new_len;
int lock_error = 0;
if (svd->vp == NULL)
return (0); /* all anonymous memory - nothing to do */
offset = svd->offset + (addr - seg->s_base);
bflags = B_FORCE | ((flags & MS_ASYNC) ? B_ASYNC : 0) |
((flags & MS_INVALIDATE) ? B_INVAL : 0);
/*
* See if any of these pages are locked -- if so, then we
* will have to truncate an invalidate request at the first
* locked one.
*/
if ((flags & MS_INVALIDATE) && vpp)
for (new_len = 0; new_len < len; new_len += PAGESIZE)
if ((vpp++)->vp_pplock) {
/*
* A page is locked. Reset the length
* of this operation. If the result is
* zero, simply return now.
*/
lock_error = EPERM;
if ((len = new_len) == 0)
return (lock_error);
break;
}
if (svd->amp == NULL) {
/*
* No anonymous pages, just use one big request.
*/
err = VOP_PUTPAGE(svd->vp, offset, len, bflags, svd->cred);
} else {
err = 0;
AMAP_LOCK(svd->amp);
app = &svd->amp->anon[svd->anon_index + seg_page(seg, addr)];
for (eaddr = addr + len; addr < eaddr;
addr += PAGESIZE, offset += PAGESIZE) {
if (*app++ != NULL)
continue; /* don't sync anonymous pages */
/*
* XXX - Should ultimately try to kluster
* calls to VOP_PUTPAGE for performance.
*/
err = VOP_PUTPAGE(svd->vp, offset, PAGESIZE, bflags,
svd->cred);
if (err)
break;
}
AMAP_UNLOCK(svd->amp);
}
return (err ? err : lock_error);
}
/*
* Determine if we have data corresponding to pages in the
* primary storage virtual memory cache (i.e., "in core").
* N.B. Assumes things are "in core" if page structs exist.
*/
static int
segvn_incore(seg, addr, len, vec)
struct seg *seg;
addr_t addr;
u_int len;
char *vec;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct anon **app;
struct vnode *vp;
u_int offset;
u_int p = seg_page(seg, addr);
u_int ep = seg_page(seg, addr + len);
u_int v;
if (svd->amp != NULL)
AMAP_LOCK(svd->amp);
for (v = 0; p < ep; p++, addr += PAGESIZE, v += PAGESIZE) {
if ((svd->amp != NULL) &&
(*(app = &svd->amp->anon[svd->anon_index + p]) != NULL)) {
swap_xlate(*app, &vp, &offset);
*vec++ = (page_exists(vp, offset) != NULL);
} else if (vp = svd->vp) {
offset = svd->offset + (addr - seg->s_base);
*vec++ = (page_exists(vp, offset) != NULL);
} else {
*vec++ = 0;
}
}
if (svd->amp != NULL)
AMAP_UNLOCK(svd->amp);
return (v);
}
/*
* Lock down (or unlock) pages mapped by this segment.
*/
static int
segvn_lockop(seg, addr, len, op)
struct seg *seg;
addr_t addr;
u_int len;
int op;
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct vpage *vpp = svd->vpage;
register struct vpage *evp;
struct page *pp;
struct anon **app;
u_int offset;
u_int off;
int claim;
int err;
struct vnode *vp;
/*
* If we're locking, then we must create a vpage structure if
* none exists. If we're unlocking, then check to see if there
* is a vpage -- if not, then we could not have locked anything.
*/
if (vpp == NULL)
if (op == MC_LOCK)
segvn_vpage(seg);
else
return (0);
/*
* Set up bounds for looping over the range of pages. Guard
* against lazy creation of the anonymous data vector (i.e.,
* previously unreferenced mapping to swap space) by lazily
* testing for its existence.
*/
app = NULL;
offset = svd->offset + (addr - seg->s_base);
evp = &svd->vpage[seg_page(seg, addr + len)];
/*
* Loop over all pages in the range. Process if we're locking and
* page has not already been locked in this mapping; or if we're
* unlocking and the page has been locked.
*/
for (vpp = &svd->vpage[seg_page(seg, addr)]; vpp < evp; vpp++) {
if (((op == MC_LOCK) && (!vpp->vp_pplock)) ||
((op == MC_UNLOCK) && (vpp->vp_pplock))) {
/*
* If we're locking, softfault the page in memory.
*/
if (op == MC_LOCK)
if (segvn_fault(seg, addr, PAGESIZE,
F_SOFTLOCK, S_OTHER) != 0)
return (EIO);
/*
* Check for lazy creation of anonymous
* data vector.
*/
if (app == NULL)
if (svd->amp != NULL)
app = &svd->amp->anon[svd->anon_index +
seg_page(seg, addr)];
/*
* Get name for page, accounting for
* existence of private copy.
*/
if (app != NULL && *app != NULL) {
AMAP_LOCK(svd->amp);
swap_xlate(*app, &vp, &off);
AMAP_UNLOCK(svd->amp);
claim = 0;
} else {
vp = svd->vp;
off = offset;
claim = ((vpp->vp_prot & PROT_WRITE) != 0) &&
(svd->type == MAP_PRIVATE);
}
/*
* Get page frame. It's ok if the page is
* not available when we're unlocking, as this
* may simply mean that a page we locked got
* truncated out of existence after we locked it.
*/
if ((pp = page_lookup(vp, off)) == NULL)
if (op == MC_LOCK)
panic("segvn_lockop: no page");
/*
* Perform page-level operation appropriate to
* operation. If locking, undo the SOFTLOCK
* performed to bring the page into memory
* after setting the lock. If unlocking,
* and no page was found, account for the claim
* separately.
*/
if (op == MC_LOCK) {
err = page_pp_lock(pp, claim, 1);
(void) segvn_fault(seg, addr, PAGESIZE,
F_SOFTUNLOCK, S_OTHER);
if (!err)
return (EAGAIN);
vpp->vp_pplock = 1;
} else {
if (pp)
page_pp_unlock(pp, claim);
else
page_subclaim(claim);
vpp->vp_pplock = 0;
}
}
addr += PAGESIZE;
offset += PAGESIZE;
if (app)
app++;
}
return (0);
}
/*
* Set advice from user for specified pages
* There are 5 types of advice:
* MADV_NORMAL - Normal (default) behavior (whatever that is)
* MADV_RANDOM - Random page references
* do not allow readahead or 'klustering'
* MADV_SEQUENTIAL - Sequential page references
* Pages previous to the one currently being
* accessed (determined by fault) are 'not needed'
* and are freed immediately
* MADV_WILLNEED - Pages are likely to be used (fault ahead in mctl)
* MADV_DONTNEED - Pages are not needed (synced out in mctl)
*/
static
segvn_advise(seg, addr, len, behav)
struct seg *seg;
addr_t addr;
u_int len;
int behav;
{
struct segvn_data *svd = (struct segvn_data *)seg->s_data;
int page;
/*
* If advice is to be applied to entire segment,
* use advice field in seg_data structure
* otherwise use appropriate vpage entry.
*/
if ((addr == seg->s_base) && (len == seg->s_size)) {
switch (behav) {
case MADV_SEQUENTIAL:
/* unloading mapping guarantees detection in segvn_fault */
hat_unload(seg, addr, len);
case MADV_NORMAL:
case MADV_RANDOM:
svd->advice = behav;
svd->pageadvice = 0;
break;
case MADV_WILLNEED:
case MADV_DONTNEED:
break; /* handled in mctl */
default:
return (EINVAL);
}
return (0);
} else
svd->advice = -1; /* convenience to check if advice set */
page = seg_page(seg, addr);
if ((svd->vpage) == NULL)
segvn_vpage(seg);
switch (behav) {
register struct vpage *bvpp, *evpp;
case MADV_SEQUENTIAL:
hat_unload(seg, addr, len);
case MADV_NORMAL:
case MADV_RANDOM:
bvpp = &svd->vpage[page];
evpp = &svd->vpage[page + (len >> PAGESHIFT)];
for (; bvpp < evpp; bvpp++)
bvpp->vp_advice = behav;
svd->pageadvice = 1;
break;
case MADV_WILLNEED:
case MADV_DONTNEED:
break;
default:
return (EINVAL);
}
return (0);
}
/*
* Create a vpage structure for this seg.
*/
static void
segvn_vpage(seg)
struct seg *seg;
{
register struct segvn_data *svd = (struct segvn_data *)seg->s_data;
register struct vpage *vp, *evp;
/*
* If no vpage structure exists, allocate one. Copy the protections
* from the segment itself to the individual pages.
*/
if (svd->vpage == NULL) {
svd->pageprot = 1;
svd->vpage = (struct vpage *)
new_kmem_zalloc((u_int)vpgtob(seg_pages(seg)), KMEM_SLEEP);
evp = &svd->vpage[seg_page(seg, seg->s_base + seg->s_size)];
for (vp = svd->vpage; vp < evp; vp++)
vp->vp_prot = svd->prot;
}
}