Files
Arquivotheca.SunOS-4.1.4/sys/os/vm_pageout.c
seta75D ff309bfe1c Init
2021-10-11 18:37:13 -03:00

514 lines
13 KiB
C

/* @(#)vm_pageout.c 1.1 94/10/31 SMI */
/*
* Copyright (c) 1987 by Sun Microsystems, Inc.
*/
#include <sys/param.h>
#include <sys/buf.h>
#include <sys/uio.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/mman.h>
#include <sys/vnode.h>
#include <sys/vm.h>
#include <sys/trace.h>
#include <machine/pte.h>
#include <machine/vmparam.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/page.h>
#include <vm/pvn.h>
/*
* The following parameters control operation of the page replacement
* algorithm. They are initialized to 0, and then computed at boot time
* based on the size of the system. If they are patched non-zero in
* a loaded vmunix they are left alone and may thus be changed per system
* using adb on the loaded system.
*/
int maxpgio = 0;
int minfree = 0;
int desfree = 0;
int lotsfree = 0;
int slowscan = 0;
int fastscan = 0;
int handspread = 0;
int loopfraction = 2;
/*
* The size of the clock loop.
*/
#define LOOPPAGES (epages - pages)
/* insure non-zero */
#define nz(x) ((x) != 0 ? (x) : 1)
/*
* Set up the paging constants for the clock algorithm.
* Called after the system is initialized and the amount of memory
* and number of paging devices is known.
*
* Threshold constants are defined in ../machine/vmparam.h.
*/
setupclock()
{
/*
* Set up thresholds for paging:
*/
/*
* Lotsfree is threshold where paging daemon turns on.
*/
if (lotsfree == 0)
lotsfree = LOTSFREE / NBPG;
if (lotsfree > LOOPPAGES / LOTSFREEFRACT)
lotsfree = LOOPPAGES / LOTSFREEFRACT;
/*
* Desfree is amount of memory desired free.
* If less than this for extended period, do swapping.
*/
if (desfree == 0)
desfree = DESFREE / NBPG;
if (desfree > LOOPPAGES / DESFREEFRACT)
desfree = LOOPPAGES / DESFREEFRACT;
/*
* Minfree is minimal amount of free memory which is tolerable.
*/
if (minfree == 0)
minfree = MINFREE / NBPG;
if (minfree > desfree / MINFREEFRACT)
minfree = desfree / MINFREEFRACT;
/*
* Maxpgio thresholds how much paging is acceptable.
* This figures that 2/3 busy on an arm is all that is
* tolerable for paging. We assume one operation per disk rev.
* ADDITIONALLY, we assume all this is going to the same
* arm; multiple-spindle systems can perhaps be patched to
* allow higher paging rates.
*/
if (maxpgio == 0)
maxpgio = (DISKRPM * 2) / 3;
/*
* Handspread is distance (in bytes) between front and back
* pageout daemon hands. It must be < the amount of pageable
* memory.
*/
if (handspread == 0)
handspread = HANDSPREAD;
if (handspread >= LOOPPAGES * CLBYTES)
handspread = (LOOPPAGES - 1) * CLBYTES;
/*
* The *scan variables are in units of pages scanned per second.
*/
if (fastscan == 0)
fastscan = LOOPPAGES / loopfraction;
if (fastscan > LOOPPAGES / loopfraction)
fastscan = LOOPPAGES / loopfraction;
if (slowscan == 0)
slowscan = fastscan / 10;
if (slowscan > fastscan / 2)
slowscan = fastscan / 2;
/*
* Make sure that back hand follows front hand by at least
* 1/RATETOSCHEDPAGING seconds. Without this test, it is possible
* for the back hand to look at a page during the same wakeup of
* the pageout daemon in which the front hand cleared its ref bit.
*/
if (handspread < fastscan * CLBYTES)
handspread = fastscan * CLBYTES;
}
/*
* Pageout scheduling.
*
* Schedpaging controls the rate at which the page out daemon runs by
* setting the global variables nscan and desscan RATETOSCHEDPAGING
* times a second. Nscan records the number of pages pageout has examined
* in its current pass; schedpaging resets this value to zero each time
* it runs. Desscan records the number of pages pageout should examine
* in its next pass; schedpaging sets this value based on the amount of
* currently available memory.
*/
#define RATETOSCHEDPAGING 4 /* hz that is */
/*
* Schedule rate for paging.
* Rate is linear interpolation between
* slowscan with lotsfree and fastscan when out of memory.
*/
schedpaging()
{
register int vavail;
nscan = 0;
vavail = freemem - deficit;
if (vavail < 0)
vavail = 0;
if (vavail > lotsfree)
vavail = lotsfree;
desscan = (slowscan * vavail + fastscan * (lotsfree - vavail)) /
nz(lotsfree) / RATETOSCHEDPAGING;
if (freemem < lotsfree) {
trace1(TR_PAGEOUT_CALL, 3);
wakeup((caddr_t)&proc[2]);
}
timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
}
struct buf *bclnlist;
int pageout_asleep = 0;
int pushes;
#define FRONT 1
#define BACK 2
/*
* The page out daemon, which runs as process 2.
*
* As long as there are at least lotsfree pages,
* this process is not run. When the number of free
* pages stays in the range desfree to lotsfree,
* this daemon runs through the pages in the loop
* at a rate determined in schedpaging(). Pageout manages
* two hands on the clock. The front hand moves through
* memory, clearing the reference bit,
* and stealing pages from procs that are over maxrss.
* The back hand travels a distance behind the front hand,
* freeing the pages that have not been referenced in the time
* since the front hand passed. If modified, they are pushed to
* swap before being freed.
*
* For now, this thing is in very rough form.
*/
void
pageout()
{
register struct page *fronthand, *backhand;
register int count;
/*
* Wake up icode() which is waiting on this. This insures that
* pageout is created before init starts running. I use &proc[1]
* as a convient address because nobody uses it so there won't be
* a conflict.
*/
wakeup((caddr_t)&proc[1]);
proc[2].p_flag |= SLOAD | SSYS;
/*
* Set the two clock hands to be separated by a reasonable amount,
* but no more than 360 degrees apart.
*
* N.B.: assumes pages are all contiguous.
*/
backhand = pages;
fronthand = pages + handspread / CLBYTES;
if (fronthand >= epages)
fronthand = epages - 1;
loop:
/*
* XXX: Add trace points to the loop below.
*/
/*
* Before sleeping, look to see if there are any swap I/O headers
* in the ``cleaned'' list that correspond to dirty
* pages that have been pushed asynchronously. If so,
* empty the list by calling cleanup().
*
* N.B.: We guarantee never to block while the cleaned list is nonempty.
*/
(void) spl6();
if (bclnlist != NULL) {
(void) spl0();
cleanup();
goto loop;
}
pageout_asleep = 1;
(void) sleep((caddr_t)&proc[2], PSWP+1);
pageout_asleep = 0;
(void) spl0();
count = 0;
pushes = 0;
trace5(TR_PAGEOUT, 0, nscan, desscan, freemem, lotsfree);
while (nscan < desscan && freemem < lotsfree) {
register int rvfront, rvback;
/*
* If checkpage manages to add a page to the free list,
* we give ourselves another couple of trips around the loop.
*/
if ((rvfront = checkpage(fronthand, FRONT)) == 1)
count = 0;
if ((rvback = checkpage(backhand, BACK)) == 1)
count = 0;
cnt.v_scan++;
/*
* Don't include ineligible pages in the number scanned.
*/
if (rvfront != -1 || rvback != -1)
nscan++;
if (++backhand >= epages) {
trace2(TR_PAGEOUT_WRAP, freemem, BACK);
backhand = pages;
}
if (++fronthand >= epages) {
trace2(TR_PAGEOUT_WRAP, freemem, FRONT);
fronthand = pages;
cnt.v_rev++;
if (count > 2) {
/*
* Extremely unlikely, but we went around
* the loop twice and didn't get anywhere.
* Don't cycle, stop till the next clock tick.
*/
goto loop;
}
count++;
}
}
trace5(TR_PAGEOUT, 1, nscan, desscan, freemem, lotsfree);
goto loop;
}
/*
* An iteration of the clock pointer (hand) around the loop.
* Look at the page at hand. If it is locked (e.g., for physical i/o),
* system (u., page table) or free, then leave it alone. Otherwise,
* if we are running the front hand, turn off the page's reference bit.
* If the proc is over maxrss, we take it. If running the back hand,
* check whether the page has been reclaimed. If not, free the page,
* pushing it to disk first if necessary.
*
* Return values:
* -1 if the page was locked and therefore not a candidate at all,
* 0 if not freed, or
* 1 if we freed it.
*/
int
checkpage (pp, whichhand)
register struct page *pp;
int whichhand;
{
/*
* Reject pages that it doesn't make sense to free.
*/
if (pp->p_lock || pp->p_free || pp->p_intrans ||
pp->p_lckcnt != 0 || pp->p_keepcnt != 0)
return (-1);
/*
* XXX - Where do we simulate reference bits for
* stupid machines (like the vax) that don't have them?
*
* hat_pagesync will turn off ref and mod bits loaded
* into the hardware.
*/
hat_pagesync(pp);
#ifdef MULTIPROCESSOR
/*
* This page may be getting biffed from another processor, so bear in
* mind that it might be referenced or modified anytime from here until
* it gets unmapped. So, we need to handle the case -- gracefully.
*/
#endif MULTIPROCESSOR
if (pp->p_ref) {
trace4(TR_PG_POREF, pp, pp->p_vnode, pp->p_offset, whichhand);
if (whichhand == BACK) {
/*
* Somebody referenced the page since the front
* hand went by, so it's not a candidate for
* freeing up.
*/
return (0);
}
/*
* Somebody referenced the page since the last
* time around. Clear the referenced bit, so when
* the back hand comes around we might have a shot
* at stealing the page.
*/
pg_setref(pp, 0);
/*
* Checking of rss or madvise flags needed here...
*
* If not "well-behaved", fall through into the code
* for not referenced.
*/
return (0);
}
/*
* This page has not been recently referenced, so we
* ought to try to add it to the free list.
*/
if (pp->p_mod && pp->p_vnode) {
/*
* The page is currently dirty, so we have to arrange
* to have it cleaned before it can be freed.
*
* An unkept modified page with no vnode shouldn't really
* happen; but if it does, the page is fair game to be taken.
*/
/*
* Limit pushes to avoid saturating pageout devices.
*/
if (pushes > maxpgio / RATETOSCHEDPAGING) {
trace2(TR_PAGEOUT_MAXPGIO, freemem, nscan);
return (0);
}
/*
* Test for process being swapped out or about to exit?
* [Can't get back to process(es) using the page.]
*/
/*
* The call to VOP_PUTPAGE can fail if we are
* currently out of free pageio buf headers or we have
* some other condition that requires us to allocate
* more memory. Since we don't want to deadlock in
* these case, we cannot risk calling kmem_alloc and
* sleeping waiting for memory. Rather we just hope
* that this call will start the IO, even though in
* some cases it might not. However, we only count
* successful pushes. If we fail, try calling cleanup
* to free up some buffer headers to prevent us from
* looping here.
*/
if (VOP_PUTPAGE(pp->p_vnode, pp->p_offset,
PAGESIZE, B_ASYNC | B_FREE | B_NODELAY,
(struct ucred *)0) == 0) {
trace5(TR_PG_POCLEAN, pp, pp->p_vnode, pp->p_offset,
whichhand, freemem);
pushes++;
/*
* Credit now, even though the page isn't
* yet freed.
*/
return (1);
} else {
cleanup();
return (0);
}
}
/*
* Now we lock the page, unload all the
* translations, and put the page back
* on the free list.
*/
trace5(TR_PG_POFREE, pp, pp->p_vnode, pp->p_offset,
whichhand, freemem);
page_lock(pp);
hat_pageunload(pp);
#ifdef MULTIPROCESSOR
/*
* If someone accessed the page after we did the hat_pagesync
* above, avoid the page_free; This leaves the page properly
* associated with the vnode, so that subsequent accesses by
* processes get the data from memory, and the next time
* checkpage hits this page it will go through the VOP_PUTPAGE
* routine again. This logic is similar to logic in pvn_done
* that reclaims the page on the way out the door.
*/
if (pp->p_mod || pp->p_ref) {
page_unlock(pp);
cnt.v_pgrec++;
return (0);
}
#endif MULTIPROCESSOR
page_free(pp, 0);
cnt.v_dfree++;
return (1); /* freed a page! */
}
/*
* Add a buffer to the ``cleaned'' list. Called from biodone().
*/
swdone(bp)
register struct buf *bp;
{
int s;
s = spl6();
bp->av_forw = bclnlist;
bclnlist = bp;
(void) splx(s);
/*
* Rather than wake up the pageout daemon, look for
* a sleeper on one of the pages associated with the
* buffer and wake it up, with the expectation that
* it will clean the list.
*/
{
register struct page *pp = bp->b_pages;
if (pp) {
do {
if (pp->p_want) {
wakeup((caddr_t)pp);
pp->p_want = 0;
break;
}
pp = pp->p_next;
} while (pp != bp->b_pages);
}
}
}
/*
* Process the list of ``cleaned'' pages
* or asynchronous I/O requests that need
* synchronous cleanup.
*/
cleanup()
{
register struct buf *bp;
register int s;
for (;;) {
s = spl6();
if ((bp = bclnlist) == NULL) {
(void) splx(s);
break;
}
bclnlist = bp->av_forw;
(void) splx(s);
if ((bp->b_flags & B_ASYNC) == 0)
panic("cleanup: not async");
if (bp->b_flags & B_PAGEIO)
pvn_done(bp);
else {
/* bp was B_ASYNC and B_REMAPPED (and not B_PAGEIO) */
if ((bp->b_flags & B_REMAPPED) == 0)
panic("cleanup: not remapped");
bp_mapout(bp);
brelse(bp);
}
}
}