541 lines
12 KiB
C
Executable File
541 lines
12 KiB
C
Executable File
/*
|
|
* Copyright (c) 1988, 1989, 1990, 1993, by Sun Microsystems, Inc.
|
|
*/
|
|
|
|
#pragma ident "@(#)page_lock.c 1.27 95/09/13 SMI"
|
|
|
|
/*
|
|
* VM - page locking primitives
|
|
*/
|
|
#include <sys/param.h>
|
|
#include <sys/t_lock.h>
|
|
#include <sys/vtrace.h>
|
|
#include <sys/debug.h>
|
|
#include <sys/cmn_err.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/bitmap.h>
|
|
#include <vm/page.h>
|
|
#include <vm/seg_enum.h>
|
|
|
|
/*
|
|
* This global mutex is for logical page locking.
|
|
* The following fields in the page structure are protected
|
|
* by this lock:
|
|
*
|
|
* p_lckcnt
|
|
* p_cowcnt
|
|
*/
|
|
kmutex_t page_llock;
|
|
|
|
/*
|
|
* This is a global lock for the logical page free list. The
|
|
* logical free list, in this implementation, is maintained as two
|
|
* separate physical lists - the cache list and the free list.
|
|
*/
|
|
kmutex_t page_freelock;
|
|
|
|
/*
|
|
* The hash table, page_hash[], the p_selock fields, and the
|
|
* list of pages associated with vnodes are protected by arrays of mutexes.
|
|
*
|
|
* Unless the hashes are changed radically, the table sizes must be
|
|
* a power of two. Also, we typically need more mutexes for the
|
|
* vnodes since these locks are occasionally held for long periods.
|
|
* And since there seem to be two special vnodes (kvp and swapvp),
|
|
* we make room for private mutexes for them.
|
|
*
|
|
* The pse_mutex[] array holds the mutexes to protect the p_selock
|
|
* fields of all page_t structures.
|
|
*
|
|
* PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
|
|
* when given a pointer to a page_t.
|
|
*
|
|
* PSE_TABLE_SIZE must be a power of two. One could argue that we
|
|
* should go to the trouble of setting it up at run time and base it
|
|
* on memory size rather than the number of compile time CPUs.
|
|
*/
|
|
#if NCPU < 4
|
|
#define PH_TABLE_SIZE 16
|
|
#define VP_SHIFT 7
|
|
#else
|
|
#define PH_TABLE_SIZE 128
|
|
#define VP_SHIFT 9
|
|
#endif
|
|
|
|
#define PSE_SHIFT 6 /* next power of 2 bigger than page_t */
|
|
#define PSE_TABLE_SIZE 32 /* number of mutexes to have */
|
|
|
|
kmutex_t ph_mutex[PH_TABLE_SIZE];
|
|
kmutex_t pse_mutex[PSE_TABLE_SIZE];
|
|
u_int ph_mutex_shift;
|
|
|
|
#define PAGE_SE_MUTEX(pp) \
|
|
&pse_mutex[(((u_int)pp) >> PSE_SHIFT) & (PSE_TABLE_SIZE - 1)]
|
|
|
|
/*
|
|
* The vph_mutex[] array holds the mutexes to protect the vnode chains,
|
|
* (i.e., the list of pages anchored by v_pages and connected via p_vpprev
|
|
* and p_vpnext).
|
|
*
|
|
* The page_vnode_mutex(vp) function returns the address of the appropriate
|
|
* mutex from this array given a pointer to a vnode. It is complicated
|
|
* by the fact that the kernel's vnode and the swapfs vnode are referenced
|
|
* frequently enough to warrent their own mutexes.
|
|
*
|
|
* The VP_HASH_FUNC returns the index into the vph_mutex array given
|
|
* an address of a vnode.
|
|
*/
|
|
#define VPH_TABLE_SIZE (2 << VP_SHIFT)
|
|
|
|
#define VP_HASH_FUNC(vp) \
|
|
((((u_int)(vp) >> 6) + \
|
|
((u_int)(vp) >> 10) + \
|
|
((u_int)(vp) >> 10) + \
|
|
((u_int)(vp) >> 12)) \
|
|
& (VPH_TABLE_SIZE - 1))
|
|
|
|
extern struct vnode kvp;
|
|
|
|
kmutex_t vph_mutex[VPH_TABLE_SIZE + 2];
|
|
|
|
/*
|
|
* Initialize the locks used by the Virtual Memory Management system.
|
|
*
|
|
* These include the page_freelock, page_llock, and the three arrays
|
|
* protecting the hash lists, the vp->v_page lists and the p_selock
|
|
* fields of the page_t structures.
|
|
*
|
|
* page_hashsz gets set up at startup time.
|
|
*/
|
|
void
|
|
page_lock_init()
|
|
{
|
|
u_int i;
|
|
char buf[100];
|
|
extern void page_create_init();
|
|
|
|
/*
|
|
* Initialize the global "page_freelock" lock.
|
|
*/
|
|
mutex_init(&page_freelock, "page_freelock", MUTEX_DEFAULT, DEFAULT_WT);
|
|
|
|
/*
|
|
* Initialize the global "page_llock".
|
|
*/
|
|
mutex_init(&page_llock, "page struct lock", MUTEX_DEFAULT, DEFAULT_WT);
|
|
|
|
/*
|
|
* Initialize each mutex in the ph_mutex[] array.
|
|
*/
|
|
ph_mutex_shift = highbit(page_hashsz / PH_TABLE_SIZE);
|
|
|
|
for (i = 0; i < PH_TABLE_SIZE; i++) {
|
|
(void) sprintf(buf, "ph_mutex %d", i);
|
|
mutex_init(&ph_mutex[i], buf, MUTEX_DEFAULT, DEFAULT_WT);
|
|
}
|
|
|
|
/*
|
|
* Initialize each mutex in the vph_mutex[] array.
|
|
*
|
|
* The `plus two' is to make room for private mutexes for
|
|
* the kvp and swapfsvp vnodes.
|
|
*/
|
|
for (i = 0; i < VPH_TABLE_SIZE + 2; i++) {
|
|
(void) sprintf(buf, "vph_mutex %d", i);
|
|
mutex_init(&vph_mutex[i], buf, MUTEX_DEFAULT, DEFAULT_WT);
|
|
}
|
|
|
|
/*
|
|
* Initialize each mutex in the pse_mutex[] array.
|
|
*/
|
|
for (i = 0; i < PSE_TABLE_SIZE; i++) {
|
|
(void) sprintf(buf, "pse_mutex %d", i);
|
|
mutex_init(&pse_mutex[i], buf, MUTEX_DEFAULT, DEFAULT_WT);
|
|
}
|
|
|
|
/*
|
|
* And finally, go set up the locks for page_create().
|
|
*/
|
|
page_create_init();
|
|
}
|
|
|
|
#ifdef VM_STATS
|
|
u_int vph_kvp_count;
|
|
u_int vph_swapfsvp_count;
|
|
u_int vph_other;
|
|
#endif /* VM_STATS */
|
|
|
|
|
|
/*ARGSUSED*/
|
|
void
|
|
se_init(lock, str, type, arg)
|
|
selock_t *lock;
|
|
caddr_t str;
|
|
se_type_t type;
|
|
void *arg;
|
|
{
|
|
*lock = 0;
|
|
}
|
|
|
|
#ifdef VM_STATS
|
|
u_int page_lock_count;
|
|
u_int page_lock_miss;
|
|
u_int page_lock_miss_lock;
|
|
u_int page_lock_reclaim;
|
|
u_int page_lock_bad_reclaim;
|
|
u_int page_lock_same_page;
|
|
u_int page_lock_upgrade;
|
|
u_int page_lock_upgrade_failed;
|
|
|
|
u_int page_trylock_locked;
|
|
u_int page_trylock_missed;
|
|
|
|
u_int page_try_reclaim_upgrade;
|
|
#endif /* VM_STATS */
|
|
|
|
|
|
/*
|
|
* Acquire the "shared/exclusive" lock on a page.
|
|
*
|
|
* Returns 1 on success and locks the page appropriately.
|
|
* 0 on failure and does not lock the page.
|
|
*
|
|
* If `lock' is non-NULL, it will be dropped and and reacquired in the
|
|
* failure case. This routine can block, and if it does
|
|
* it will always return a failure since the page identity [vp, off]
|
|
* or state may have changed.
|
|
*/
|
|
|
|
int
|
|
page_lock(pp, se, lock, reclaim)
|
|
register page_t *pp;
|
|
register se_t se;
|
|
kmutex_t *lock;
|
|
reclaim_t reclaim;
|
|
{
|
|
register int retval;
|
|
kmutex_t *pse;
|
|
int upgraded;
|
|
int reclaim_it;
|
|
|
|
ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
|
|
|
|
VM_STAT_ADD(page_lock_count);
|
|
|
|
upgraded = 0;
|
|
reclaim_it = 0;
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
|
|
if ((reclaim == P_RECLAIM) && (pp->p_free)) {
|
|
|
|
reclaim_it = 1;
|
|
if (se == SE_SHARED) {
|
|
/*
|
|
* This is an interesting situation.
|
|
*
|
|
* Remember that p_free can only change if
|
|
* p_selock == -1.
|
|
* p_free does not depend on our holding `pse'.
|
|
* And, since we hold `pse', p_selock can not change.
|
|
* So, if p_free changes on us, the page is already
|
|
* exclusively held, and we would fail se_trylock()
|
|
* regardless.
|
|
*
|
|
* We want to avoid getting the share
|
|
* lock on a free page that needs to be reclaimed.
|
|
* It is possible that some other thread has the share
|
|
* lock and has left the free page on the cache list.
|
|
* pvn_vplist_dirty() does this for brief periods.
|
|
* If the se_share is currently SE_EXCL, we will fail
|
|
* the following se_trylock anyway. Blocking is the
|
|
* right thing to do.
|
|
* If we need to reclaim this page, we must get
|
|
* exclusive access to it, force the upgrade now. Again,
|
|
* we will fail the following se_trylock if the
|
|
* page is not free and block.
|
|
*/
|
|
upgraded = 1;
|
|
se = SE_EXCL;
|
|
VM_STAT_ADD(page_lock_upgrade);
|
|
}
|
|
}
|
|
|
|
if (!se_trylock(&pp->p_selock, se)) {
|
|
|
|
VM_STAT_ADD(page_lock_miss);
|
|
if (upgraded) {
|
|
VM_STAT_ADD(page_lock_upgrade_failed);
|
|
}
|
|
|
|
if (lock) {
|
|
VM_STAT_ADD(page_lock_miss_lock);
|
|
mutex_exit(lock);
|
|
}
|
|
|
|
/*
|
|
* Now, wait for the page to be unlocked and
|
|
* release the lock protecting p_cv and p_selock.
|
|
*/
|
|
cv_wait(&pp->p_cv, pse);
|
|
mutex_exit(pse);
|
|
|
|
/*
|
|
* The page identity may have changed while we were
|
|
* blocked. If we are willing to depend on "pp"
|
|
* still pointing to a valid page structure (i.e.,
|
|
* assuming page structures are not dynamically allocated
|
|
* or freed), we could try to lock the page if its
|
|
* identity hasn't changed.
|
|
*
|
|
* This needs to be measured, since we come back from
|
|
* cv_wait holding pse (the expensive part of this
|
|
* operation) we might as well try the cheap part.
|
|
* Though we would also have to confirm that dropping
|
|
* `lock' did not cause any grief to the callers.
|
|
*/
|
|
if (lock) {
|
|
mutex_enter(lock);
|
|
}
|
|
retval = 0;
|
|
} else {
|
|
/*
|
|
* We have the page lock.
|
|
* If we needed to reclaim the page, and the page
|
|
* needed reclaiming (ie, it was free), then we
|
|
* have the page exclusively locked. We may need
|
|
* to downgrade the page.
|
|
*/
|
|
ASSERT((upgraded) ?
|
|
((pp->p_free) && se_excl_assert(&pp->p_selock)) : 1);
|
|
mutex_exit(pse);
|
|
|
|
retval = 1;
|
|
|
|
/*
|
|
* We now hold this page's lock, either shared or
|
|
* exclusive. This will prevent its identity from changing.
|
|
* The page, however, may or may not be free. If the caller
|
|
* requested, and it is free, go reclaim it from the
|
|
* free list. If the page can't be reclaimed, return failure
|
|
* so that the caller can start all over again.
|
|
*
|
|
* NOTE:page_reclaim() releases the page lock (p_selock)
|
|
* if it can't be reclaimed.
|
|
*/
|
|
if (reclaim_it) {
|
|
if (!page_reclaim(pp, lock)) {
|
|
VM_STAT_ADD(page_lock_bad_reclaim);
|
|
retval = 0;
|
|
} else {
|
|
VM_STAT_ADD(page_lock_reclaim);
|
|
if (upgraded) {
|
|
page_downgrade(pp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return (retval);
|
|
}
|
|
|
|
/*
|
|
* Read the comments inside of page_lock() carefully.
|
|
*/
|
|
int
|
|
page_try_reclaim_lock(pp, se)
|
|
page_t *pp;
|
|
se_t se;
|
|
{
|
|
kmutex_t *pse;
|
|
int rc;
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
|
|
if ((se == SE_SHARED) && (pp->p_free)) {
|
|
VM_STAT_ADD(page_try_reclaim_upgrade);
|
|
se = SE_EXCL;
|
|
}
|
|
rc = se_trylock(&pp->p_selock, se);
|
|
|
|
mutex_exit(pse);
|
|
return (rc);
|
|
}
|
|
|
|
/*
|
|
* Acquire a page's "shared/exclusive" lock, but never block.
|
|
* Returns 1 on success, 0 on failure.
|
|
*/
|
|
int
|
|
page_trylock(pp, se)
|
|
register page_t *pp;
|
|
register se_t se;
|
|
{
|
|
register int retval;
|
|
kmutex_t *pse;
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
retval = se_trylock(&pp->p_selock, se);
|
|
|
|
ASSERT(retval? ((se == SE_EXCL)? pp->p_selock == -1 :
|
|
pp->p_selock > 0) : 1);
|
|
|
|
mutex_exit(pse);
|
|
return (retval);
|
|
}
|
|
|
|
/*
|
|
* Release the page's "shared/exclusive" lock and wake up anyone
|
|
* who might be waiting for it.
|
|
*/
|
|
void
|
|
page_unlock(pp)
|
|
register page_t *pp;
|
|
{
|
|
kmutex_t *pse;
|
|
selock_t se;
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
se = pp->p_selock;
|
|
if (se == 0) {
|
|
cmn_err(CE_PANIC,
|
|
"page_unlock: page %x is not locked", (int)pp);
|
|
} else if (se < 0) {
|
|
THREAD_KPRI_RELEASE();
|
|
pp->p_selock = 0;
|
|
cv_broadcast(&pp->p_cv);
|
|
} else if ((pp->p_selock = --se) == 0) {
|
|
cv_broadcast(&pp->p_cv);
|
|
}
|
|
mutex_exit(pse);
|
|
}
|
|
|
|
/*
|
|
* Try to upgrade the lock on the page from a "shared" to an
|
|
* "exclusive" lock. Since this upgrade operation is done while
|
|
* holding the mutex protecting this page, no one else can acquire this page's
|
|
* lock and change the page. Thus, it is safe to drop the "shared"
|
|
* lock and attempt to acquire the "exclusive" lock.
|
|
*
|
|
* Returns 1 on success, 0 on failure.
|
|
*/
|
|
int
|
|
page_tryupgrade(pp)
|
|
register page_t *pp;
|
|
{
|
|
register int retval;
|
|
kmutex_t *pse;
|
|
|
|
ASSERT(se_shared_assert(&pp->p_selock));
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
if (pp->p_selock == 1) {
|
|
pp->p_selock = -1; /* convert to exclusive lock */
|
|
retval = 1;
|
|
} else {
|
|
retval = 0;
|
|
}
|
|
mutex_exit(pse);
|
|
return (retval);
|
|
}
|
|
|
|
/*
|
|
* Downgrade the "exclusive" lock on the page to a "shared" lock
|
|
* while holding the mutex protecting this page's p_selock field.
|
|
*/
|
|
void
|
|
page_downgrade(pp)
|
|
register page_t *pp;
|
|
{
|
|
kmutex_t *pse;
|
|
|
|
ASSERT(se_excl_assert(&pp->p_selock));
|
|
|
|
pse = PAGE_SE_MUTEX(pp);
|
|
mutex_enter(pse);
|
|
pp->p_selock = 1;
|
|
cv_broadcast(&pp->p_cv);
|
|
mutex_exit(pse);
|
|
}
|
|
|
|
/*
|
|
* Acquire the i/o lock on a page.
|
|
*/
|
|
void
|
|
page_io_lock(pp)
|
|
register page_t *pp;
|
|
{
|
|
sema_p(&pp->p_iolock);
|
|
}
|
|
|
|
/*
|
|
* Release the i/o lock on a page.
|
|
*/
|
|
void
|
|
page_io_unlock(pp)
|
|
register page_t *pp;
|
|
{
|
|
sema_v(&pp->p_iolock);
|
|
}
|
|
|
|
/*
|
|
* Try to acquire the i/o lock on a page without blocking.
|
|
* Returns 1 on success, 0 on failure.
|
|
*/
|
|
int
|
|
page_io_trylock(pp)
|
|
register page_t *pp;
|
|
{
|
|
return (sema_tryp(&pp->p_iolock));
|
|
}
|
|
|
|
/*
|
|
* Assert that the i/o lock on a page is held.
|
|
* Returns 1 on success, 0 on failure.
|
|
*/
|
|
int
|
|
page_iolock_assert(pp)
|
|
register page_t *pp;
|
|
{
|
|
return (SEMA_HELD(&pp->p_iolock));
|
|
}
|
|
|
|
/*
|
|
* Wrapper exported to kernel routines that are built
|
|
* platform-independent (the macro is platform-dependent;
|
|
* the size of vph_mutex[] is based on NCPU).
|
|
*/
|
|
kmutex_t *
|
|
page_vnode_mutex(vp)
|
|
struct vnode *vp;
|
|
{
|
|
kmutex_t *mp;
|
|
|
|
if (vp == &kvp) {
|
|
mp = &vph_mutex[VPH_TABLE_SIZE + 0];
|
|
} else {
|
|
mp = &vph_mutex[VP_HASH_FUNC(vp)];
|
|
}
|
|
return (mp);
|
|
|
|
/*
|
|
return (PAGE_VNODE_MUTEX(vp));
|
|
|
|
#define PAGE_VNODE_MUTEX(vp) \
|
|
((vp) == &kvp ? &vph_mutex[VPH_TABLE_SIZE + 0] : \
|
|
&vph_mutex[VP_HASH_FUNC(vp)])
|
|
*/
|
|
}
|
|
|
|
|
|
kmutex_t *
|
|
page_se_mutex(pp)
|
|
page_t *pp;
|
|
{
|
|
return (&pse_mutex[(((u_int)pp) >> PSE_SHIFT) & (PSE_TABLE_SIZE - 1)]);
|
|
}
|