1539 lines
35 KiB
C
1539 lines
35 KiB
C
#ident "@(#)spec_vnodeops.c 1.1 94/10/31 SMI"
|
|
|
|
/*
|
|
* Copyright (c) 1988 by Sun Microsystems, Inc.
|
|
*/
|
|
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/user.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/buf.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/vfs.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/file.h>
|
|
#include <sys/stream.h>
|
|
#include <sys/stropts.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/debug.h>
|
|
#include <sys/unistd.h>
|
|
#include <sys/termios.h>
|
|
#include <sys/vmmeter.h>
|
|
#include <specfs/snode.h>
|
|
|
|
#include <krpc/lockmgr.h>
|
|
|
|
#include <vm/hat.h>
|
|
#include <vm/page.h>
|
|
#include <vm/as.h>
|
|
#include <vm/pvn.h>
|
|
#include <vm/seg.h>
|
|
#include <vm/seg_map.h>
|
|
#include <vm/seg_dev.h>
|
|
#include <vm/seg_vn.h>
|
|
#include <vm/swap.h>
|
|
|
|
static int spec_open();
|
|
static int spec_close();
|
|
static int spec_rdwr();
|
|
static int spec_ioctl();
|
|
static int spec_select();
|
|
static int spec_getattr();
|
|
static int spec_inactive();
|
|
static int spec_noop();
|
|
static int spec_getpage();
|
|
static int spec_putpage();
|
|
static int spec_map();
|
|
static int spec_dump();
|
|
static int spec_cmp();
|
|
/*
|
|
* Used directly in fifo_vnodeops
|
|
*/
|
|
int spec_setattr();
|
|
int spec_access();
|
|
int spec_link();
|
|
int spec_lockctl();
|
|
int spec_fsync();
|
|
int spec_fid();
|
|
int spec_realvp();
|
|
int spec_cntl();
|
|
|
|
struct vnodeops spec_vnodeops = {
|
|
spec_open,
|
|
spec_close,
|
|
spec_rdwr,
|
|
spec_ioctl,
|
|
spec_select,
|
|
spec_getattr,
|
|
spec_setattr,
|
|
spec_access,
|
|
spec_noop, /* lookup */
|
|
spec_noop, /* create */
|
|
spec_noop, /* remove */
|
|
spec_link,
|
|
spec_noop, /* rename */
|
|
spec_noop, /* mkdir */
|
|
spec_noop, /* rmdir */
|
|
spec_noop, /* readdir */
|
|
spec_noop, /* symlink */
|
|
spec_noop, /* readlink */
|
|
spec_fsync,
|
|
spec_inactive,
|
|
spec_lockctl,
|
|
spec_fid,
|
|
spec_getpage,
|
|
spec_putpage,
|
|
spec_map,
|
|
spec_dump,
|
|
spec_cmp,
|
|
spec_realvp,
|
|
spec_cntl,
|
|
};
|
|
|
|
/*
|
|
* open a special file (device)
|
|
* Some weird stuff here having to do with clone and indirect devices:
|
|
* When a file lookup operation happens (e.g. ufs_lookup) and the vnode has
|
|
* type VDEV specvp() is used to return a spec vnode instead. Then when
|
|
* the VOP_OPEN routine is called, we get control here. When we do the
|
|
* device open routine there are several possible strange results:
|
|
* 1) An indirect device will return the error EAGAIN on open and return
|
|
* a new dev number. We have to make that into a spec vnode and call
|
|
* open on it again.
|
|
* 2) The clone device driver will return the error EEXIST and return a
|
|
* new dev number. As above, we build a new vnode and call open again,
|
|
* explicitly asking the open routine to do a clone open.
|
|
* 3) A clone device will return a new dev number on open but no error.
|
|
* In this case we just make a new spec vnode out of the new dev number
|
|
* and return that.
|
|
* The last two cases differ in that the decision to clone arises outside
|
|
* of the target device in 2) and from within in 3).
|
|
*
|
|
* TODO: extend case 2) to apply to all character devices, not just streams
|
|
* devices.
|
|
*/
|
|
#define MAX_S_SIZE \
|
|
((1 << sizeof (off_t) * NBBY - DEV_BSHIFT - 1) - 1)
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_open(vpp, flag, cred)
|
|
struct vnode **vpp;
|
|
int flag;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
dev_t dev;
|
|
dev_t newdev;
|
|
int sflag = 0;
|
|
register int error;
|
|
|
|
sp = VTOS(*vpp);
|
|
|
|
/*
|
|
* Do open protocol for special type.
|
|
*/
|
|
dev = sp->s_dev;
|
|
|
|
switch ((*vpp)->v_type) {
|
|
|
|
case VCHR:
|
|
newdev = dev;
|
|
error = 0;
|
|
for (;;) {
|
|
register struct vnode *nvp;
|
|
|
|
dev = newdev;
|
|
if ((u_int)major(dev) >= nchrdev)
|
|
return (ENXIO);
|
|
|
|
while (isclosing(dev, (*vpp)->v_type))
|
|
if ( sleep((caddr_t)sp, PSLEP|PCATCH))
|
|
return (EINTR);
|
|
|
|
if (cdevsw[major(dev)].d_str) {
|
|
/*
|
|
* Open the stream. Stropen handles
|
|
* the mechanics of cloning itself.
|
|
* In particular, it builds a fresh
|
|
* vnode for the cloned instance and
|
|
* does streams-specific cross-linking.
|
|
*/
|
|
error = stropen(vpp, flag, sflag);
|
|
sp = VTOS(*vpp);
|
|
break;
|
|
} else
|
|
error = (*cdevsw[major(dev)].d_open)(dev,
|
|
flag, &newdev);
|
|
|
|
/*
|
|
* If this is an indirect device or a forced clone,
|
|
* we need to do the open again. In both cases,
|
|
* we insist that newdev differ from dev, to help
|
|
* avoid infinite regress.
|
|
*/
|
|
if (newdev == dev ||
|
|
(error != 0 && error != EAGAIN && error != EEXIST))
|
|
break;
|
|
|
|
/*
|
|
* Allocate new snode with new device. Release old
|
|
* snode. Set vpp to point to new one. This snode will
|
|
* go away when the last reference to it goes away.
|
|
* Warning: if you stat this, and try to match it with
|
|
* a name in the filesystem you will fail, unless you
|
|
* had previously put names in that match.
|
|
*/
|
|
nvp = makespecvp(newdev, VCHR);
|
|
sp = VTOS(nvp);
|
|
VN_RELE(*vpp);
|
|
*vpp = nvp;
|
|
|
|
/* If we've completed a clone open, we're done. */
|
|
if (error == 0)
|
|
break;
|
|
else
|
|
sflag = error == EEXIST ? CLONEOPEN : 0;
|
|
}
|
|
break;
|
|
|
|
case VFIFO:
|
|
printf("spec_open: got a VFIFO???\n");
|
|
/* fall through to... */
|
|
|
|
case VSOCK:
|
|
error = EOPNOTSUPP;
|
|
break;
|
|
|
|
case VBLK:
|
|
/*
|
|
* The block device sizing was already done in specvp().
|
|
* However, we still need to verify that we can open the
|
|
* block device here (since specvp was called as part of a
|
|
* "lookup", not an "open", and e.g. "stat"ing a block special
|
|
* file with an illegal major device number should be legal).
|
|
*
|
|
* With loadable drivers, removable media devices, or
|
|
* metadevices, the block device sizing might need to be
|
|
* done again, as the open will likely find a changed size
|
|
* to the device.
|
|
*
|
|
* If the special file for a device is opened before the
|
|
* driver is loaded, or a lookup on /dev is done, there
|
|
* might be an snode around with s_size == 0. In this case,
|
|
* we need to resize the device.
|
|
*
|
|
* Another way of putting it is that the XXsize function
|
|
* reports the current size, if any, of a device, and doesn't
|
|
* imply any other action that the driver will take, while
|
|
* the open might imply sizing.
|
|
*/
|
|
|
|
if ((u_int)major(dev) >= nblkdev)
|
|
error = ENXIO;
|
|
else
|
|
error = (*bdevsw[major(dev)].d_open)(dev, flag);
|
|
if (error == 0) {
|
|
struct snode *sptmp;
|
|
int (*size)() = bdevsw[major(dev)].d_psize;
|
|
|
|
sptmp = VTOS(bdevvp(dev));
|
|
if (size != NULL) {
|
|
int rsize = (*size)(dev);
|
|
if (rsize == -1)
|
|
sptmp->s_size = 0;
|
|
else
|
|
sptmp->s_size =
|
|
dbtob(MIN(rsize, MAX_S_SIZE));
|
|
}
|
|
VN_RELE(STOV(sptmp));
|
|
}
|
|
break;
|
|
default:
|
|
panic("spec_open: type not VCHR or VBLK");
|
|
break;
|
|
}
|
|
if (error == 0)
|
|
sp->s_count++; /* one more open reference */
|
|
return (error);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_close(vp, flag, count, cred)
|
|
struct vnode *vp;
|
|
int flag;
|
|
int count;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
dev_t dev;
|
|
|
|
if (count > 1)
|
|
return (0);
|
|
|
|
/*
|
|
* setjmp in case close is interrupted
|
|
*/
|
|
if (setjmp(&u.u_qsave)) {
|
|
sp = VTOS(vp); /* recompute - I don't trust setjmp/longjmp */
|
|
sp->s_flag &= ~SCLOSING;
|
|
wakeup((caddr_t)sp);
|
|
return (EINTR);
|
|
}
|
|
|
|
sp = VTOS(vp);
|
|
sp->s_count--; /* one fewer open reference */
|
|
|
|
/*
|
|
* Only call the close routine when the last open
|
|
* reference through any [s, v]node goes away.
|
|
*/
|
|
if (stillopen(sp->s_dev, vp->v_type))
|
|
return (0);
|
|
|
|
dev = sp->s_dev;
|
|
|
|
switch (vp->v_type) {
|
|
|
|
case VCHR:
|
|
/*
|
|
* Mark this device as closing, so that opens will wait until
|
|
* the close finishes. Since the close may block, this
|
|
* prevents an open from getting in while the close is blocked,
|
|
* and then getting surprised when the close finishes and
|
|
* potentially clears out the driver's state.
|
|
*
|
|
* XXX - really should be done on all devices, but for now we
|
|
* only do it on streams (as that's the one case where the
|
|
* close blocks before the close routine is called, and thus
|
|
* the one case where the close routine really can't protect
|
|
* itself).
|
|
*/
|
|
/*
|
|
* If it's a stream, call stream close routine.
|
|
*/
|
|
if (cdevsw[major(dev)].d_str) {
|
|
sp->s_flag |= SCLOSING;
|
|
strclose(vp, flag);
|
|
sp->s_flag &= ~SCLOSING;
|
|
wakeup((caddr_t)sp);
|
|
} else
|
|
(void) (*cdevsw[major(dev)].d_close)(dev, flag);
|
|
break;
|
|
|
|
case VBLK:
|
|
/*
|
|
* On last close of a block device, we flush back
|
|
* and invalidate any in core buffers to help make
|
|
* the spec vnode inactive ASAP if it is not currently
|
|
* held by someone else for something (e.g., swapping).
|
|
*/
|
|
bflush(sp->s_bdevvp);
|
|
binval(sp->s_bdevvp);
|
|
(void) (*bdevsw[major(dev)].d_close)(dev, flag);
|
|
break;
|
|
|
|
case VFIFO:
|
|
printf("spec_close: got a VFIFO???\n");
|
|
break;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* read or write a spec vnode
|
|
*/
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_rdwr(vp, uiop, rw, ioflag, cred)
|
|
struct vnode *vp;
|
|
register struct uio *uiop;
|
|
enum uio_rw rw;
|
|
int ioflag;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
register addr_t base;
|
|
register u_int off;
|
|
struct vnode *blkvp;
|
|
dev_t dev;
|
|
register int n, on;
|
|
u_int flags;
|
|
u_int bdevsize;
|
|
int pagecreate;
|
|
int error;
|
|
extern int mem_no;
|
|
|
|
sp = VTOS(vp);
|
|
dev = (dev_t)sp->s_dev;
|
|
if (rw != UIO_READ && rw != UIO_WRITE)
|
|
panic("spec_rdwr");
|
|
if (rw == UIO_READ && uiop->uio_resid == 0)
|
|
return (0);
|
|
n = uiop->uio_resid;
|
|
/*
|
|
* If this I/O will carry us over the 2GB threshold,
|
|
* switch automatically to block mode if possible.
|
|
*
|
|
* XXX We switch if the I/O leaves us exactly at 2GB,
|
|
* which is arguably wrong, but the old code didn't
|
|
* allow such I/O's anyway, so there is no compatibility
|
|
* problem.
|
|
*/
|
|
if (vp->v_type == VCHR && (uiop->uio_fmode & FSETBLK) == 0 &&
|
|
mem_no != major(dev) && vp->v_stream == NULL &&
|
|
uiop->uio_offset >= 0 && uiop->uio_offset + n < 0 &&
|
|
uiop->uio_offset % DEV_BSIZE == 0) {
|
|
uiop->uio_fmode |= FSETBLK;
|
|
uiop->uio_offset = btodb(uiop->uio_offset);
|
|
}
|
|
if (uiop->uio_fmode & FSETBLK) {
|
|
if (n % DEV_BSIZE != 0)
|
|
return (EINVAL);
|
|
n = btodb(n);
|
|
}
|
|
if ((uiop->uio_offset < 0 ||
|
|
(n != 0 && uiop->uio_offset + n - 1 < 0)) &&
|
|
!(vp->v_type == VCHR &&
|
|
(mem_no == major(dev) || vp->v_stream != NULL))) {
|
|
return (EINVAL);
|
|
}
|
|
|
|
if (rw == UIO_READ)
|
|
smark(sp, SACC);
|
|
|
|
if (vp->v_type == VCHR) {
|
|
if (rw == UIO_READ) {
|
|
if (cdevsw[major(dev)].d_str) {
|
|
int saverr = u.u_error;
|
|
|
|
u.u_error = 0;
|
|
strread(vp, uiop);
|
|
error = u.u_error;
|
|
u.u_error = saverr;
|
|
} else
|
|
error = (*cdevsw[major(dev)].d_read)(dev, uiop);
|
|
} else {
|
|
smark(sp, SUPD|SCHG);
|
|
if (cdevsw[major(dev)].d_str) {
|
|
int saverr = u.u_error;
|
|
|
|
u.u_error = 0;
|
|
strwrite(vp, uiop);
|
|
error = u.u_error;
|
|
u.u_error = saverr;
|
|
} else
|
|
error = (*cdevsw[major(dev)].d_write)(dev,
|
|
uiop);
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
if (vp->v_type != VBLK)
|
|
return (EOPNOTSUPP);
|
|
|
|
if (uiop->uio_resid == 0)
|
|
return (0);
|
|
|
|
error = 0;
|
|
blkvp = sp->s_bdevvp;
|
|
bdevsize = sp->s_size;
|
|
do {
|
|
int diff;
|
|
|
|
off = uiop->uio_offset & MAXBMASK;
|
|
on = uiop->uio_offset & MAXBOFFSET;
|
|
n = MIN(MAXBSIZE - on, uiop->uio_resid);
|
|
pagecreate = 0;
|
|
diff = bdevsize - uiop->uio_offset;
|
|
|
|
if (diff <= 0)
|
|
break;
|
|
if (diff < n)
|
|
n = diff;
|
|
|
|
base = segmap_getmap(segkmap, blkvp, off);
|
|
|
|
/*
|
|
* Check to see if we can skip reading in the page
|
|
* and just allocate the memory. We can do this
|
|
* if we are going to rewrite the entire mapping
|
|
* or if we are going to write to end of the device
|
|
* from the beginning of the mapping.
|
|
*/
|
|
if (rw == UIO_WRITE && (n == MAXBSIZE ||
|
|
(on == 0 && (off + n) == bdevsize))) {
|
|
SNLOCK(sp);
|
|
segmap_pagecreate(segkmap, base + on, (u_int)n, 0);
|
|
SNUNLOCK(sp);
|
|
pagecreate = 1;
|
|
}
|
|
|
|
error = uiomove(base + on, n, rw, uiop);
|
|
|
|
if (pagecreate && uiop->uio_offset <
|
|
roundup(off + on + n, PAGESIZE)) {
|
|
/*
|
|
* We created pages w/o initializing them completely,
|
|
* thus we need to zero the part that wasn't set up.
|
|
* This can happen if we write to the end of the device
|
|
* or if we had some sort of error during the uiomove.
|
|
*/
|
|
int nzero, nmoved;
|
|
|
|
nmoved = uiop->uio_offset - (off + on);
|
|
ASSERT(nmoved >= 0 && nmoved <= n);
|
|
nzero = roundup(on + n, PAGESIZE) - nmoved;
|
|
ASSERT(nzero > 0 && on + nmoved + nzero <= MAXBSIZE);
|
|
(void) kzero(base + on + nmoved, (u_int)nzero);
|
|
}
|
|
|
|
if (error == 0) {
|
|
flags = 0;
|
|
if (rw == UIO_WRITE) {
|
|
/*
|
|
* Force write back for synchronous write cases.
|
|
*/
|
|
if (ioflag & IO_SYNC) {
|
|
flags = SM_WRITE;
|
|
} else if (n + on == MAXBSIZE ||
|
|
IS_SWAPVP(vp)) {
|
|
/*
|
|
* Have written a whole block.
|
|
* Start an asynchronous write and
|
|
* mark the buffer to indicate that
|
|
* it won't be needed again soon.
|
|
* Push swap files here, since it
|
|
* won't happen anywhere else.
|
|
*/
|
|
flags = SM_WRITE | SM_ASYNC |
|
|
SM_DONTNEED;
|
|
}
|
|
smark(sp, SUPD|SCHG);
|
|
} else if (rw == UIO_READ) {
|
|
/*
|
|
* If read a whole block, won't need this
|
|
* buffer again soon. Don't mark it with
|
|
* SM_FREE, as that can lead to a deadlock
|
|
* if the block corresponds to a u-page.
|
|
* (The keep count never drops to zero, so
|
|
* waiting for "i/o to complete" never
|
|
* terminates; this points out a flaw in
|
|
* our locking strategy.)
|
|
*/
|
|
if (n + on == MAXBSIZE)
|
|
flags = SM_DONTNEED;
|
|
}
|
|
error = segmap_release(segkmap, base, flags);
|
|
} else {
|
|
(void) segmap_release(segkmap, base, 0);
|
|
}
|
|
|
|
} while (error == 0 && uiop->uio_resid > 0 && n != 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_ioctl(vp, com, data, flag, cred)
|
|
struct vnode *vp;
|
|
int com;
|
|
caddr_t data;
|
|
int flag;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
|
|
sp = VTOS(vp);
|
|
if (vp->v_type != VCHR)
|
|
panic("spec_ioctl");
|
|
if (cdevsw[major(sp->s_dev)].d_str) {
|
|
int saverr = u.u_error;
|
|
int error;
|
|
|
|
u.u_error = 0;
|
|
strioctl(vp, com, data, flag);
|
|
error = u.u_error;
|
|
u.u_error = saverr;
|
|
return (error);
|
|
}
|
|
return ((*cdevsw[major(sp->s_dev)].d_ioctl)
|
|
(sp->s_dev, com, data, flag));
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_select(vp, which, cred)
|
|
struct vnode *vp;
|
|
int which;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
|
|
sp = VTOS(vp);
|
|
if (vp->v_type != VCHR)
|
|
panic("spec_select");
|
|
if (cdevsw[major(sp->s_dev)].d_str)
|
|
return (strselect(vp, which));
|
|
else
|
|
return ((*cdevsw[major(sp->s_dev)].d_select)(sp->s_dev, which));
|
|
}
|
|
|
|
static int
|
|
spec_inactive(vp, cred)
|
|
struct vnode *vp;
|
|
struct ucred *cred;
|
|
{
|
|
struct snode *sp;
|
|
int error;
|
|
|
|
sp = VTOS(vp);
|
|
|
|
/* XXX before removing the snode reset stream */
|
|
if (vp->v_type == VCHR && vp->v_stream)
|
|
vp->v_stream->sd_vnode = other_specvp(vp);
|
|
|
|
/* must sunsave() first to prevent a race when spec_fsync() sleeps */
|
|
sunsave(sp);
|
|
|
|
if (sp->s_realvp && (sp->s_bdevvp == NULL || !IS_SWAPVP(sp->s_bdevvp)))
|
|
(void) spec_fsync(vp, cred);
|
|
if (vp->v_type == VBLK && vp->v_pages != NULL) {
|
|
/*
|
|
* Device is no longer referenced by anyone.
|
|
* Destroy all the old pages (which BTW don't
|
|
* count against the vnode reference count) so
|
|
* we can, for instance, change floppy disks.
|
|
*/
|
|
error = spec_putpage(sp->s_bdevvp, 0, 0, B_INVAL,
|
|
(struct ucred *)0);
|
|
} else {
|
|
error = 0;
|
|
}
|
|
|
|
/* now free the realvp (no longer done by sunsave()) */
|
|
if (sp->s_realvp) {
|
|
VN_RELE(sp->s_realvp);
|
|
sp->s_realvp = NULL;
|
|
if (sp->s_bdevvp)
|
|
VN_RELE(sp->s_bdevvp);
|
|
}
|
|
|
|
kmem_free((caddr_t)sp, sizeof (*sp));
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
spec_getattr(vp, vap, cred)
|
|
struct vnode *vp;
|
|
register struct vattr *vap;
|
|
struct ucred *cred;
|
|
{
|
|
int error;
|
|
register struct snode *sp;
|
|
register struct vnode *realvp;
|
|
|
|
sp = VTOS(vp);
|
|
if ((realvp = sp->s_realvp) == NULL) {
|
|
/*
|
|
* No real vnode behind this one.
|
|
* Set the device size from snode.
|
|
* Set times to the present.
|
|
* Set blocksize based on type in the unreal vnode.
|
|
*/
|
|
bzero((caddr_t)vap, sizeof (*vap));
|
|
vap->va_size = sp->s_size;
|
|
vap->va_rdev = sp->s_dev;
|
|
vap->va_type = vp->v_type;
|
|
vap->va_nodeid = ++fake_vno;
|
|
} else {
|
|
extern int dump_no;
|
|
error = VOP_GETATTR(realvp, vap, cred);
|
|
if (error != 0)
|
|
return (error);
|
|
/* if this is the dump file, copy the size, too */
|
|
/* XXX there should be a more general way of doing this */
|
|
if (vp->v_type == VCHR && dump_no == major(sp->s_dev))
|
|
vap->va_size = sp->s_size;
|
|
}
|
|
/* set current times from snode, even if older than vnode */
|
|
vap->va_atime = sp->s_atime;
|
|
vap->va_mtime = sp->s_mtime;
|
|
vap->va_ctime = sp->s_ctime;
|
|
|
|
/* set device-dependent blocksizes */
|
|
switch (vap->va_type) {
|
|
case VBLK:
|
|
vap->va_blocksize = MAXBSIZE; /* was BLKDEV_IOSIZE */
|
|
break;
|
|
|
|
case VCHR:
|
|
vap->va_blocksize = MAXBSIZE;
|
|
break;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
spec_setattr(vp, vap, cred)
|
|
struct vnode *vp;
|
|
register struct vattr *vap;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
register struct vnode *realvp;
|
|
int error;
|
|
register int chtime = 0;
|
|
|
|
sp = VTOS(vp);
|
|
if ((realvp = sp->s_realvp) == NULL)
|
|
error = 0; /* no real vnode to update */
|
|
else
|
|
error = VOP_SETATTR(realvp, vap, cred);
|
|
if (error == 0) {
|
|
/* if times were changed, update snode */
|
|
if (vap->va_mtime.tv_sec != -1) {
|
|
|
|
/*
|
|
* If SysV-compatible option to set access and
|
|
* modified times if root, owner, or write access,
|
|
* need to read back the new times in order to
|
|
* keep the snode times in sync. If VOP_GETATTR()
|
|
* fails, use current client time as an approximation.
|
|
*
|
|
* XXX - va_mtime.tv_usec == -1 flags this.
|
|
*/
|
|
if (vap->va_mtime.tv_usec == -1) {
|
|
struct vattr vtmp;
|
|
|
|
if ((realvp == NULL) ||
|
|
VOP_GETATTR(realvp, &vtmp, cred) != 0) {
|
|
/* if error, simulate server time */
|
|
sp->s_mtime = time;
|
|
sp->s_atime = time;
|
|
sp->s_ctime = time;
|
|
} else {
|
|
sp->s_mtime = vtmp.va_mtime;
|
|
sp->s_atime = vtmp.va_atime;
|
|
sp->s_ctime = vtmp.va_ctime;
|
|
}
|
|
goto no_chtime;
|
|
}
|
|
|
|
sp->s_mtime = vap->va_mtime;
|
|
chtime++;
|
|
}
|
|
if (vap->va_atime.tv_sec != -1) {
|
|
sp->s_atime = vap->va_atime;
|
|
chtime++;
|
|
}
|
|
if (chtime)
|
|
sp->s_ctime = time;
|
|
}
|
|
no_chtime:
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
spec_access(vp, mode, cred)
|
|
struct vnode *vp;
|
|
int mode;
|
|
struct ucred *cred;
|
|
{
|
|
register struct vnode *realvp;
|
|
|
|
if ((realvp = VTOS(vp)->s_realvp) != NULL)
|
|
return (VOP_ACCESS(realvp, mode, cred));
|
|
else
|
|
return (0); /* allow all access */
|
|
}
|
|
|
|
int
|
|
spec_link(vp, tdvp, tnm, cred)
|
|
struct vnode *vp;
|
|
struct vnode *tdvp;
|
|
char *tnm;
|
|
struct ucred *cred;
|
|
{
|
|
register struct vnode *realvp;
|
|
|
|
if ((realvp = VTOS(vp)->s_realvp) != NULL)
|
|
return (VOP_LINK(realvp, tdvp, tnm, cred));
|
|
else
|
|
return (ENOENT); /* can't link to something non-existent */
|
|
}
|
|
|
|
/*
|
|
* In order to sync out the snode times without multi-client problems,
|
|
* make sure the times written out are never earlier than the times
|
|
* already set in the vnode.
|
|
*/
|
|
int
|
|
spec_fsync(vp, cred)
|
|
struct vnode *vp;
|
|
struct ucred *cred;
|
|
{
|
|
register int error = 0;
|
|
register struct snode *sp;
|
|
register struct vnode *realvp;
|
|
struct vattr *vap;
|
|
struct vattr *vatmp;
|
|
int err;
|
|
|
|
sp = VTOS(vp);
|
|
/*
|
|
* If times didn't change on a non-block
|
|
* special file, don't flush anything.
|
|
*/
|
|
if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK)
|
|
return (0);
|
|
sp->s_flag &= ~(SACC|SUPD|SCHG);
|
|
|
|
/*
|
|
* If the vnode represents a block device and it is a "shadow"
|
|
* vnode, then flush all pages associated with the "common" vnode.
|
|
*/
|
|
if (vp->v_type == VBLK && sp->s_bdevvp != vp &&
|
|
sp->s_bdevvp->v_pages != NULL)
|
|
error = spec_putpage(sp->s_bdevvp, 0, 0, 0,
|
|
(struct ucred *)0);
|
|
|
|
/*
|
|
* If no real vnode to update, don't flush anything
|
|
*/
|
|
if ((realvp = sp->s_realvp) == NULL)
|
|
return (error);
|
|
|
|
vatmp = (struct vattr *)new_kmem_alloc(sizeof (*vatmp), KMEM_SLEEP);
|
|
err = VOP_GETATTR(realvp, vatmp, cred);
|
|
if (err == 0) {
|
|
vap = (struct vattr *)new_kmem_alloc(sizeof (*vap), KMEM_SLEEP);
|
|
vattr_null(vap);
|
|
vap->va_atime = timercmp(&vatmp->va_atime, &sp->s_atime, >) ?
|
|
vatmp->va_atime : sp->s_atime;
|
|
vap->va_mtime = timercmp(&vatmp->va_mtime, &sp->s_mtime, >) ?
|
|
vatmp->va_mtime : sp->s_mtime;
|
|
VOP_SETATTR(realvp, vap, cred);
|
|
kmem_free((caddr_t)vap, sizeof (*vap));
|
|
}
|
|
kmem_free((caddr_t)vatmp, sizeof (*vatmp));
|
|
(void) VOP_FSYNC(realvp, cred);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
spec_dump(vp, addr, bn, count)
|
|
struct vnode *vp;
|
|
caddr_t addr;
|
|
int bn;
|
|
int count;
|
|
{
|
|
|
|
return ((*bdevsw[major(vp->v_rdev)].d_dump)
|
|
(vp->v_rdev, addr, bn, count));
|
|
}
|
|
|
|
static int
|
|
spec_noop()
|
|
{
|
|
|
|
return (EINVAL);
|
|
}
|
|
|
|
/*
|
|
* Record-locking requests are passed back to the real vnode handler.
|
|
*/
|
|
int
|
|
spec_lockctl(vp, ld, cmd, cred, clid)
|
|
struct vnode *vp;
|
|
struct flock *ld;
|
|
int cmd;
|
|
struct ucred *cred;
|
|
int clid;
|
|
{
|
|
register struct vnode *realvp;
|
|
|
|
if ((realvp = VTOS(vp)->s_realvp) != NULL)
|
|
return (VOP_LOCKCTL(realvp, ld, cmd, cred, clid));
|
|
else
|
|
return (EINVAL); /* can't lock this, it doesn't exist */
|
|
}
|
|
|
|
int
|
|
spec_fid(vp, fidpp)
|
|
struct vnode *vp;
|
|
struct fid **fidpp;
|
|
{
|
|
register struct vnode *realvp;
|
|
|
|
if ((realvp = VTOS(vp)->s_realvp) != NULL)
|
|
return (VOP_FID(realvp, fidpp));
|
|
else
|
|
return (EINVAL); /* you lose */
|
|
}
|
|
|
|
/*
|
|
* klustsize should be a multiple of PAGESIZE and <= MAXPHYS.
|
|
*/
|
|
#define KLUSTSIZE (56 * 1024)
|
|
int klustsize = KLUSTSIZE;
|
|
int spec_ra = 1;
|
|
int spec_lostpage; /* number of times we lost original page */
|
|
|
|
/*
|
|
* Called from pvn_getpages or spec_getpage to get a particular page.
|
|
* When we are called the snode is already locked.
|
|
*/
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_getapage(vp, off, protp, pl, plsz, seg, addr, rw, cred)
|
|
register struct vnode *vp;
|
|
u_int off, *protp;
|
|
struct page *pl[];
|
|
u_int plsz;
|
|
struct seg *seg;
|
|
addr_t addr;
|
|
enum seg_rw rw;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
struct buf *bp, *bp2;
|
|
struct page *pp, *pp2, **ppp, *pagefound;
|
|
u_int io_off, io_len;
|
|
u_int blksz, blkoff;
|
|
int dora, err;
|
|
u_int xlen;
|
|
|
|
sp = VTOS(vp);
|
|
|
|
reread:
|
|
err = 0;
|
|
bp = NULL;
|
|
bp2 = NULL;
|
|
|
|
if (spec_ra && sp->s_nextr == off)
|
|
dora = 1;
|
|
else
|
|
dora = 0;
|
|
|
|
/*
|
|
* We SNLOCK here to try and allow more concurrent access
|
|
* to the snode. We release the lock as soon as we know
|
|
* we won't be allocating more pages for the vnode.
|
|
* NB: It's possible that the snode was already locked by
|
|
* this process (e.g. we were called through pvn_getpages),
|
|
* thus we are assuming that SNLOCK is recursive.
|
|
*/
|
|
SNLOCK(sp);
|
|
again:
|
|
if ((pagefound = page_find(vp, off)) == NULL) {
|
|
/*
|
|
* Need to really do disk IO to get the page.
|
|
*/
|
|
blkoff = (off / klustsize) * klustsize;
|
|
if (blkoff + klustsize <= sp->s_size)
|
|
blksz = klustsize;
|
|
else
|
|
blksz = sp->s_size - blkoff;
|
|
|
|
pp = pvn_kluster(vp, off, seg, addr, &io_off, &io_len,
|
|
blkoff, blksz, 0);
|
|
/*
|
|
* Somebody has entered the page before us, so
|
|
* just use it.
|
|
*/
|
|
if (pp == NULL)
|
|
goto again;
|
|
|
|
if (!dora)
|
|
SNUNLOCK(sp);
|
|
|
|
if (pl != NULL) {
|
|
register int sz;
|
|
|
|
if (plsz >= io_len) {
|
|
/*
|
|
* Everything fits, set up to load
|
|
* up and hold all the pages.
|
|
*/
|
|
pp2 = pp;
|
|
sz = io_len;
|
|
} else {
|
|
/*
|
|
* Set up to load plsz worth
|
|
* starting at the needed page.
|
|
*/
|
|
for (pp2 = pp; pp2->p_offset != off;
|
|
pp2 = pp2->p_next) {
|
|
ASSERT(pp2->p_next->p_offset !=
|
|
pp->p_offset);
|
|
}
|
|
sz = plsz;
|
|
}
|
|
|
|
ppp = pl;
|
|
do {
|
|
PAGE_HOLD(pp2);
|
|
*ppp++ = pp2;
|
|
pp2 = pp2->p_next;
|
|
sz -= PAGESIZE;
|
|
} while (sz > 0);
|
|
*ppp = NULL; /* terminate list */
|
|
}
|
|
|
|
bp = pageio_setup(pp, io_len, vp, pl == NULL ?
|
|
(B_ASYNC | B_READ) : B_READ);
|
|
|
|
bp->b_dev = vp->v_rdev;
|
|
bp->b_blkno = btodb(io_off);
|
|
|
|
/*
|
|
* Zero part of page which we are not
|
|
* going to be reading from disk now.
|
|
*/
|
|
xlen = io_len & PAGEOFFSET;
|
|
if (xlen != 0)
|
|
pagezero(pp->p_prev, xlen, PAGESIZE - xlen);
|
|
|
|
(*bdevsw[major(vp->v_rdev)].d_strategy)(bp);
|
|
|
|
sp->s_nextr = io_off + io_len;
|
|
u.u_ru.ru_majflt++;
|
|
if (seg == segkmap)
|
|
u.u_ru.ru_inblock++; /* count as `read' operation */
|
|
cnt.v_pgin++;
|
|
cnt.v_pgpgin += btopr(io_len);
|
|
} else if (!dora)
|
|
SNUNLOCK(sp);
|
|
|
|
if (dora) {
|
|
u_int off2;
|
|
addr_t addr2;
|
|
|
|
off2 = ((off / klustsize) + 1) * klustsize;
|
|
addr2 = addr + (off2 - off);
|
|
|
|
/*
|
|
* If addr is now in a different seg or we are past
|
|
* EOF then don't bother trying with read-ahead.
|
|
*/
|
|
if (addr2 >= seg->s_base + seg->s_size || off2 >= sp->s_size) {
|
|
pp2 = NULL;
|
|
} else {
|
|
if (off2 + klustsize <= sp->s_size)
|
|
blksz = klustsize;
|
|
else
|
|
blksz = sp->s_size - off2;
|
|
|
|
pp2 = pvn_kluster(vp, off2, seg, addr2, &io_off,
|
|
&io_len, off2, blksz, 1);
|
|
}
|
|
|
|
SNUNLOCK(sp);
|
|
|
|
if (pp2 != NULL) {
|
|
bp2 = pageio_setup(pp2, io_len, vp, B_READ | B_ASYNC);
|
|
|
|
bp2->b_dev = vp->v_rdev;
|
|
bp2->b_blkno = btodb(io_off);
|
|
|
|
/*
|
|
* Zero part of page which we are not
|
|
* going to be reading from disk now.
|
|
*/
|
|
xlen = io_len & PAGEOFFSET;
|
|
if (xlen != 0)
|
|
pagezero(pp2->p_prev, xlen, PAGESIZE - xlen);
|
|
|
|
(*bdevsw[major(vp->v_rdev)].d_strategy)(bp2);
|
|
|
|
/*
|
|
* Should we bill read ahead to extra faults?
|
|
*/
|
|
u.u_ru.ru_majflt++;
|
|
if (seg == segkmap)
|
|
u.u_ru.ru_inblock++; /* count as `read' */
|
|
cnt.v_pgin++;
|
|
cnt.v_pgpgin += btopr(io_len);
|
|
}
|
|
}
|
|
|
|
if (bp != NULL && pl != NULL) {
|
|
err = biowait(bp);
|
|
pageio_done(bp);
|
|
} else if (pagefound != NULL) {
|
|
register int s;
|
|
|
|
/*
|
|
* We need to be careful here because if the page was
|
|
* previously on the free list, we might have already
|
|
* lost it at interrupt level.
|
|
*/
|
|
s = splvm();
|
|
if (pagefound->p_vnode == vp && pagefound->p_offset == off) {
|
|
/*
|
|
* If the page is still intransit or if
|
|
* it is on the free list call page_lookup
|
|
* to try and wait for / reclaim the page.
|
|
*/
|
|
if (pagefound->p_intrans || pagefound->p_free)
|
|
pagefound = page_lookup(vp, off);
|
|
}
|
|
if (pagefound == NULL || pagefound->p_offset != off ||
|
|
pagefound->p_vnode != vp || pagefound->p_gone) {
|
|
(void) splx(s);
|
|
spec_lostpage++;
|
|
goto reread;
|
|
}
|
|
if (pl != NULL) {
|
|
PAGE_HOLD(pagefound);
|
|
pl[0] = pagefound;
|
|
pl[1] = NULL;
|
|
u.u_ru.ru_minflt++;
|
|
sp->s_nextr = off + PAGESIZE;
|
|
}
|
|
(void) splx(s);
|
|
}
|
|
|
|
if (err && pl != NULL) {
|
|
for (ppp = pl; *ppp != NULL; *ppp++ = NULL)
|
|
PAGE_RELE(*ppp);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Return all the pages from [off..off+len) in block device
|
|
*/
|
|
static int
|
|
spec_getpage(vp, off, len, protp, pl, plsz, seg, addr, rw, cred)
|
|
struct vnode *vp;
|
|
u_int off, len;
|
|
u_int *protp;
|
|
struct page *pl[];
|
|
u_int plsz;
|
|
struct seg *seg;
|
|
addr_t addr;
|
|
enum seg_rw rw;
|
|
struct ucred *cred;
|
|
{
|
|
struct snode *sp = VTOS(vp);
|
|
int err;
|
|
|
|
if (vp->v_type != VBLK || sp->s_bdevvp != vp)
|
|
panic("spec_getpage");
|
|
|
|
if (off + len - PAGEOFFSET > sp->s_size)
|
|
return (EFAULT); /* beyond EOF */
|
|
|
|
if (protp != NULL)
|
|
*protp = PROT_ALL;
|
|
|
|
if (len <= PAGESIZE)
|
|
err = spec_getapage(vp, off, protp, pl, plsz, seg, addr,
|
|
rw, cred);
|
|
else {
|
|
SNLOCK(sp);
|
|
err = pvn_getpages(spec_getapage, vp, off, len, protp, pl,
|
|
plsz, seg, addr, rw, cred);
|
|
SNUNLOCK(sp);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
|
|
*/
|
|
static int
|
|
spec_wrtblk(vp, pp, off, len, flags)
|
|
struct vnode *vp;
|
|
struct page *pp;
|
|
u_int off, len;
|
|
int flags;
|
|
{
|
|
struct buf *bp;
|
|
int err;
|
|
|
|
bp = pageio_setup(pp, len, vp, B_WRITE | flags);
|
|
if (bp == NULL) {
|
|
pvn_fail(pp, B_WRITE | flags);
|
|
return (ENOMEM);
|
|
}
|
|
|
|
bp->b_dev = vp->v_rdev;
|
|
bp->b_blkno = btodb(off);
|
|
|
|
(*bdevsw[major(vp->v_rdev)].d_strategy)(bp);
|
|
u.u_ru.ru_oublock++;
|
|
|
|
/*
|
|
* If async, assume that pvn_done will
|
|
* handle the pages when IO is done
|
|
*/
|
|
if ((flags & B_ASYNC) != 0)
|
|
return (0);
|
|
|
|
err = biowait(bp);
|
|
pageio_done(bp);
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Flags are composed of {B_ASYNC, B_INVAL, B_DIRTY B_FREE, B_DONTNEED}
|
|
* If len == 0, do from off to EOF.
|
|
*
|
|
* The normal cases should be len == 0 & off == 0 (entire vp list),
|
|
* len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
|
|
* (from pageout).
|
|
*/
|
|
/*ARGSUSED*/
|
|
static int
|
|
spec_putpage(vp, off, len, flags, cred)
|
|
register struct vnode *vp;
|
|
u_int off;
|
|
u_int len;
|
|
int flags;
|
|
struct ucred *cred;
|
|
{
|
|
register struct snode *sp;
|
|
register struct page *pp;
|
|
struct page *dirty, *io_list;
|
|
register u_int io_off, io_len;
|
|
int vpcount;
|
|
int err = 0;
|
|
|
|
sp = VTOS(vp);
|
|
if (vp->v_pages == NULL || off >= sp->s_size)
|
|
return (0);
|
|
|
|
if (vp->v_type != VBLK || sp->s_bdevvp != vp)
|
|
panic("spec_putpage");
|
|
|
|
vpcount = vp->v_count;
|
|
VN_HOLD(vp);
|
|
|
|
again:
|
|
if (len == 0) {
|
|
/*
|
|
* We refuse to act on behalf of the pageout daemon to push
|
|
* out a page to a snode which is currently locked.
|
|
*/
|
|
if ((sp->s_flag & SLOCKED) && u.u_procp == &proc[2]) {
|
|
err = EWOULDBLOCK; /* XXX */
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Search the entire vp list for pages >= off.
|
|
* We lock the snode here to prevent us from having
|
|
* multiple instances of pvn_vplist_dirty working
|
|
* on the same vnode active at the same time.
|
|
*/
|
|
SNLOCK(sp);
|
|
dirty = pvn_vplist_dirty(vp, off, flags);
|
|
SNUNLOCK(sp);
|
|
} else {
|
|
/*
|
|
* Do a range from [off...off + len) via page_find.
|
|
* We set limits so that we kluster to klustsize boundaries.
|
|
*/
|
|
if (off >= sp->s_size) {
|
|
dirty = NULL;
|
|
} else {
|
|
u_int fsize, eoff, offlo, offhi;
|
|
|
|
fsize = (sp->s_size + PAGEOFFSET) & PAGEMASK;
|
|
eoff = MIN(off + len, fsize);
|
|
offlo = (off / klustsize) * klustsize;
|
|
offhi = roundup(eoff, klustsize);
|
|
dirty = pvn_range_dirty(vp, off, eoff, offlo, offhi,
|
|
flags);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now pp will have the list of kept dirty pages marked for
|
|
* write back. It will also, handle invalidation and freeing
|
|
* of pages that are not dirty. All the pages on the list
|
|
* returned need to still be dealt with here.
|
|
*/
|
|
|
|
/*
|
|
* Destroy read ahead value (since we are really going to write)
|
|
*/
|
|
if (dirty != NULL)
|
|
sp->s_nextr = 0;
|
|
|
|
/*
|
|
* Handle all the dirty pages not yet dealt with.
|
|
*/
|
|
while ((pp = dirty) != NULL) {
|
|
/*
|
|
* Pull off a contiguous chunk
|
|
*/
|
|
page_sub(&dirty, pp);
|
|
io_list = pp;
|
|
io_off = pp->p_offset;
|
|
io_len = PAGESIZE;
|
|
while (dirty != NULL && dirty->p_offset == io_off + io_len) {
|
|
pp = dirty;
|
|
page_sub(&dirty, pp);
|
|
page_sortadd(&io_list, pp);
|
|
io_len += PAGESIZE;
|
|
if (io_len >= klustsize - PAGEOFFSET)
|
|
break;
|
|
}
|
|
/*
|
|
* Check for page length rounding problems
|
|
*/
|
|
if (io_off + io_len > sp->s_size) {
|
|
ASSERT((io_off + io_len) - sp->s_size < PAGESIZE);
|
|
io_len = sp->s_size - io_off;
|
|
}
|
|
err = spec_wrtblk(vp, io_list, io_off, io_len, flags);
|
|
if (err)
|
|
break;
|
|
}
|
|
|
|
if (err != 0) {
|
|
if (dirty != NULL)
|
|
pvn_fail(dirty, B_WRITE | flags);
|
|
} else if (off == 0 && (len == 0 || len >= sp->s_size)) {
|
|
/*
|
|
* If doing "synchronous invalidation", make
|
|
* sure that all the pages are actually gone.
|
|
*/
|
|
if ((flags & (B_INVAL | B_ASYNC)) == B_INVAL &&
|
|
(vp->v_pages != NULL))
|
|
goto again;
|
|
}
|
|
|
|
out:
|
|
/*
|
|
* Instead of using VN_RELE here we are careful to only call
|
|
* the inactive routine if the vnode reference count is now zero,
|
|
* but it wasn't zero coming into putpage. This is to prevent
|
|
* recursively calling the inactive routine on a vnode that
|
|
* is already considered in the `inactive' state.
|
|
* XXX - inactive is a relative term here (sigh).
|
|
*/
|
|
if (--vp->v_count == 0 && vpcount > 0)
|
|
(void) spec_inactive(vp, cred);
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* This routine is called through the cdevsw[] table to handle
|
|
* traditional mmap'able devices that support a d_mmap function.
|
|
*/
|
|
/*ARGSUSED*/
|
|
int
|
|
spec_segmap(dev, off, as, addrp, len, prot, maxprot, flags, cred)
|
|
dev_t dev;
|
|
u_int off;
|
|
struct as *as;
|
|
addr_t *addrp;
|
|
u_int len;
|
|
u_int prot, maxprot;
|
|
u_int flags;
|
|
struct ucred *cred;
|
|
{
|
|
struct segdev_crargs dev_a;
|
|
int (*mapfunc)();
|
|
register int i;
|
|
|
|
if ((mapfunc = cdevsw[major(dev)].d_mmap) == NULL)
|
|
return (ENODEV);
|
|
|
|
/*
|
|
* Character devices that support the d_mmap
|
|
* interface can only be mmap'ed shared.
|
|
*/
|
|
if ((flags & MAP_TYPE) != MAP_SHARED)
|
|
return (EINVAL);
|
|
|
|
/*
|
|
* Check to insure that the entire range is
|
|
* legal and we are not trying to map in
|
|
* more than the device will let us.
|
|
*/
|
|
for (i = 0; i < len; i += PAGESIZE) {
|
|
if ((*mapfunc)(dev, off + i, maxprot) == -1)
|
|
return (ENXIO);
|
|
}
|
|
|
|
if ((flags & MAP_FIXED) == 0) {
|
|
/*
|
|
* Pick an address w/o worrying about
|
|
* any vac alignment contraints.
|
|
*/
|
|
map_addr(addrp, len, (off_t)off, 0);
|
|
if (*addrp == NULL)
|
|
return (ENOMEM);
|
|
} else {
|
|
/*
|
|
* User specified address -
|
|
* Blow away any previous mappings.
|
|
*/
|
|
(void) as_unmap(as, *addrp, len);
|
|
}
|
|
|
|
dev_a.mapfunc = mapfunc;
|
|
dev_a.dev = dev;
|
|
dev_a.offset = off;
|
|
dev_a.prot = prot;
|
|
dev_a.maxprot = maxprot;
|
|
|
|
return (as_map(as, *addrp, len, segdev_create, (caddr_t)&dev_a));
|
|
}
|
|
|
|
static int
|
|
spec_map(vp, off, as, addrp, len, prot, maxprot, flags, cred)
|
|
struct vnode *vp;
|
|
u_int off;
|
|
struct as *as;
|
|
addr_t *addrp;
|
|
u_int len;
|
|
u_int prot, maxprot;
|
|
u_int flags;
|
|
struct ucred *cred;
|
|
{
|
|
|
|
if (vp->v_type == VCHR) {
|
|
int (*segmap)();
|
|
dev_t dev = vp->v_rdev;
|
|
|
|
/*
|
|
* Character device, let the device driver
|
|
* pick the appropriate segment driver.
|
|
*/
|
|
segmap = cdevsw[major(dev)].d_segmap;
|
|
if (segmap == NULL) {
|
|
if (cdevsw[major(dev)].d_mmap == NULL)
|
|
return (ENODEV);
|
|
|
|
/*
|
|
* For cdevsw[] entries that specify a d_mmap
|
|
* function but don't have a d_segmap function,
|
|
* we default to spec_segmap for compatibility.
|
|
*/
|
|
segmap = spec_segmap;
|
|
}
|
|
|
|
return ((*segmap)(dev, off, as, addrp, len, prot, maxprot,
|
|
flags, cred));
|
|
} else if (vp->v_type == VBLK) {
|
|
struct segvn_crargs vn_a;
|
|
|
|
/*
|
|
* Block device, use the underlying bdevvp name for pages.
|
|
*/
|
|
if ((int)off < 0 || (int)(off + len) < 0)
|
|
return (EINVAL);
|
|
|
|
if ((flags & MAP_FIXED) == 0) {
|
|
map_addr(addrp, len, (off_t)off, 1);
|
|
if (*addrp == NULL)
|
|
return (ENOMEM);
|
|
} else {
|
|
/*
|
|
* User specified address -
|
|
* Blow away any previous mappings.
|
|
*/
|
|
(void) as_unmap(as, *addrp, len);
|
|
}
|
|
|
|
ASSERT(VTOS(vp)->s_bdevvp != NULL);
|
|
|
|
vn_a.vp = VTOS(vp)->s_bdevvp;
|
|
vn_a.offset = off;
|
|
vn_a.type = flags & MAP_TYPE;
|
|
vn_a.prot = prot;
|
|
vn_a.maxprot = maxprot;
|
|
vn_a.cred = cred;
|
|
vn_a.amp = NULL;
|
|
|
|
return (as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a));
|
|
} else {
|
|
return (ENODEV);
|
|
}
|
|
}
|
|
|
|
static int
|
|
spec_cmp(vp1, vp2)
|
|
struct vnode *vp1, *vp2;
|
|
{
|
|
|
|
return (vp1 == vp2);
|
|
}
|
|
|
|
int
|
|
spec_realvp(vp, vpp)
|
|
struct vnode *vp;
|
|
struct vnode **vpp;
|
|
{
|
|
extern struct vnodeops spec_vnodeops;
|
|
extern struct vnodeops fifo_vnodeops;
|
|
struct vnode *rvp;
|
|
|
|
if (vp &&
|
|
(vp->v_op == &spec_vnodeops || vp->v_op == &fifo_vnodeops)) {
|
|
vp = VTOS(vp)->s_realvp;
|
|
}
|
|
if (vp && VOP_REALVP(vp, &rvp) == 0) {
|
|
vp = rvp;
|
|
}
|
|
*vpp = vp;
|
|
return (0);
|
|
}
|
|
|
|
spec_cntl(vp, cmd, idata, odata, iflg, oflg)
|
|
struct vnode *vp;
|
|
int cmd, iflg, oflg;
|
|
caddr_t idata, odata;
|
|
{
|
|
struct vnode *realvp;
|
|
int error;
|
|
|
|
switch (cmd) {
|
|
/*
|
|
* ask the dev for this one
|
|
*/
|
|
case _PC_MAX_INPUT:
|
|
if (vp->v_type == VCHR && vp->v_stream) {
|
|
ASSERT(odata && oflg == CNTL_INT32);
|
|
return (VOP_IOCTL(vp, TIOCISIZE, odata, 0, 0));
|
|
} else if ((realvp = other_specvp(vp)) &&
|
|
realvp->v_type == VCHR && realvp->v_stream) {
|
|
ASSERT(odata && oflg == CNTL_INT32);
|
|
vp->v_stream = realvp->v_stream;
|
|
return (VOP_IOCTL(vp, TIOCISIZE, odata, 0, 0));
|
|
} else {
|
|
/*
|
|
* This is for posix conformance. Max input will
|
|
* always be at least 1 char. Used to return EINVAL
|
|
*/
|
|
*odata = 1;
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
* ask the supporting fs for everything else
|
|
*/
|
|
default:
|
|
if (error = VOP_REALVP(vp, &realvp))
|
|
return (error);
|
|
return (VOP_CNTL(realvp, cmd, idata, odata, iflg, oflg));
|
|
}
|
|
/*NOTREACHED*/
|
|
}
|