2021-10-11 18:37:13 -03:00

1201 lines
25 KiB
C

#ident "@(#)sys_generic.c 1.1 94/10/31 SMI" /* from UCB 5.42 83/06/24 */
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/user.h>
#include <sys/ioctl.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/stat.h>
#if defined(ASYNCHIO) && defined(LWP)
#include <lwp/lwperror.h>
#include <machlwp/stackdep.h>
#include <sys/asynch.h>
#include <sys/vnode.h>
static int nasynchio = 0;
#ifdef SUNDBE
#include <sys/dbe_asynch.h>
static int generic_asynchio_used = 0; /* to help in debugging problems */
#define INLINE_RDWR
#endif /* SUNDBE */
#endif /* ASYNCHIO && LWP */
/*
* Read system call.
*/
#ifdef INLINE_RDWR
read()
{
register struct a {
int fdes;
char *cbuf;
unsigned count;
} *uap = (struct a *)u.u_ap;
struct uio uio;
struct iovec iov;
register struct file *fp;
register count;
iov.iov_base = (caddr_t)uap->cbuf;
iov.iov_len = uap->count;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
/*
* The below is `rwuio(&auio, UIO_READ);' inline
*/
GETF(fp, uap->fdes);
if ((fp->f_flag & FREAD) == 0) {
u.u_error = EBADF;
return;
}
uio.uio_resid = 0;
uio.uio_segflg = UIO_USERSPACE;
if (iov.iov_len < 0) {
u.u_error = EINVAL;
return;
}
uio.uio_resid += iov.iov_len;
count = uio.uio_resid;
uio.uio_offset = fp->f_offset;
uio.uio_fmode = fp->f_flag;
if (setjmp(&u.u_qsave)) {
if (uio.uio_resid == count) {
if ((u.u_sigintr & sigmask(u.u_procp->p_cursig)) != 0)
u.u_error = EINTR;
else
u.u_eosys = RESTARTSYS;
}
} else
u.u_error = (*fp->f_ops->fo_rw)(fp, UIO_READ, &uio);
u.u_r.r_val1 = count - uio.uio_resid;
/*
* Check if the underlying code has switched us to block mode
*/
if ((uio.uio_fmode & FSETBLK) && !(fp->f_flag & FSETBLK)) {
fp->f_flag |= FSETBLK;
fp->f_offset = btodb(fp->f_offset);
}
/* update file offset only if there is no error */
if (u.u_error == 0) {
fp->f_offset += (fp->f_flag & FSETBLK) ?
howmany(u.u_r.r_val1, DEV_BSIZE) : u.u_r.r_val1;
}
u.u_ioch += (unsigned)u.u_r.r_val1;
}
#else /* INLINE_RDWR */
read()
{
register struct a {
int fdes;
char *cbuf;
unsigned count;
} *uap = (struct a *)u.u_ap;
struct uio auio;
struct iovec aiov;
aiov.iov_base = (caddr_t)uap->cbuf;
aiov.iov_len = uap->count;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
rwuio(&auio, UIO_READ);
}
#endif /* INLINE_RDWR */
readv()
{
register struct a {
int fdes;
struct iovec *iovp;
int iovcnt;
} *uap = (struct a *)u.u_ap;
struct uio auio;
struct iovec aiov[16]; /* XXX */
if (uap->iovcnt <= 0 || uap->iovcnt > sizeof (aiov)/sizeof (aiov[0])) {
u.u_error = EINVAL;
return;
}
auio.uio_iov = aiov;
auio.uio_iovcnt = uap->iovcnt;
u.u_error = copyin((caddr_t)uap->iovp, (caddr_t)aiov,
(unsigned)(uap->iovcnt * sizeof (struct iovec)));
if (u.u_error)
return;
rwuio(&auio, UIO_READ);
}
/*
* Write system call
*/
#ifdef INLINE_RDWR
write()
{
register struct a {
int fdes;
char *cbuf;
unsigned count;
} *uap = (struct a *)u.u_ap;
struct uio uio;
struct iovec iov;
register struct file *fp;
register count;
iov.iov_base = (caddr_t)uap->cbuf;
iov.iov_len = uap->count;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
/*
* The below is `rwuio(&auio, UIO_WRITE);' inline
*/
GETF(fp, uap->fdes);
if ((fp->f_flag & FWRITE) == 0) {
u.u_error = EBADF;
return;
}
uio.uio_resid = 0;
uio.uio_segflg = UIO_USERSPACE;
if (iov.iov_len < 0) {
u.u_error = EINVAL;
return;
}
uio.uio_resid += iov.iov_len;
count = uio.uio_resid;
uio.uio_offset = fp->f_offset;
uio.uio_fmode = fp->f_flag;
if (setjmp(&u.u_qsave)) {
if (uio.uio_resid == count) {
if ((u.u_sigintr & sigmask(u.u_procp->p_cursig)) != 0)
u.u_error = EINTR;
else
u.u_eosys = RESTARTSYS;
}
} else
u.u_error = (*fp->f_ops->fo_rw)(fp, UIO_WRITE, &uio);
u.u_r.r_val1 = count - uio.uio_resid;
/*
* Check if the underlying code has switched us to block mode
*/
if ((uio.uio_fmode & FSETBLK) && !(fp->f_flag & FSETBLK)) {
fp->f_flag |= FSETBLK;
fp->f_offset = btodb(fp->f_offset);
}
/* update file offset only if there is no error */
if (u.u_error == 0) {
fp->f_offset += (fp->f_flag & FSETBLK) ?
howmany(u.u_r.r_val1, DEV_BSIZE) : u.u_r.r_val1;
}
u.u_ioch += (unsigned)u.u_r.r_val1;
}
#else /* INLINE_RDWR */
write()
{
register struct a {
int fdes;
char *cbuf;
int count;
} *uap = (struct a *)u.u_ap;
struct uio auio;
struct iovec aiov;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
aiov.iov_base = uap->cbuf;
aiov.iov_len = uap->count;
rwuio(&auio, UIO_WRITE);
}
#endif /* INLINE_RDWR */
writev()
{
register struct a {
int fdes;
struct iovec *iovp;
int iovcnt;
} *uap = (struct a *)u.u_ap;
struct uio auio;
struct iovec aiov[16]; /* XXX */
if (uap->iovcnt <= 0 || uap->iovcnt > sizeof (aiov)/sizeof (aiov[0])) {
u.u_error = EINVAL;
return;
}
auio.uio_iov = aiov;
auio.uio_iovcnt = uap->iovcnt;
u.u_error = copyin((caddr_t)uap->iovp, (caddr_t)aiov,
(unsigned)(uap->iovcnt * sizeof (struct iovec)));
if (u.u_error)
return;
rwuio(&auio, UIO_WRITE);
}
#if defined(ASYNCHIO) && defined(LWP)
#define AIO_DONE 1
void adoit();
void arw();
void arwuio();
void cancelaio();
void del_aiodone();
void mark_unwanted();
void return_stk();
extern label_t *sleepqsave;
/* used to copy arguments onto thread stack */
struct auiotemp {
struct uio auio;
struct iovec aiov;
struct file af;
};
aioread()
{
arw(UIO_READ);
}
aiowrite()
{
arw(UIO_WRITE);
}
int __LwpRunCnt;
int maxasynchio = 0;
int maxunreaped = 0;
int perproc_maxunreaped = 0;
/*
* allocate a thread stack and initialize it to contain
* an environment for adoit.
*/
void
arw(rw)
enum uio_rw rw;
{
register struct a {
int fildes;
char *bufp;
u_int bufsz;
off_t offset;
int whence;
aio_result_t *resultp;
} *uap = (struct a *)u.u_ap;
struct auiotemp *at;
register struct file *fp;
struct proc *p = u.u_procp;
register struct file *newfp;
caddr_t sp;
int s;
extern stkalign_t *lwp_datastk();
extern int perproc_maxasynchio;
GETF(fp, uap->fildes);
if ((fp->f_flag & (rw == UIO_READ ? FREAD : FWRITE)) == 0) {
u.u_error = EBADF;
return;
}
#ifdef SUNDBE
/*
* dbe does not handle medusa's 1TB changes (LB_SET/INCR/XTND)
*/
if ((uap->whence < L_SET) || (uap->whence > L_XTND))
goto dont_do_dbe_asynchio;
/*
* See if this a raw disk i/o; if so, let dbe_arw() do an optimized i/o.
* Otherwise, fall through and do a generic asynch i/o.
*/
if (dbe_asynchio_enabled) {
switch ( dbe_arw(rw) ) {
case DBE_AIO_UNAVAIL:
break; /* Do generic aio */
case DBE_AIO_SUCCESS:
case DBE_AIO_ERROR:
return; /* u.u_error set by dbe_arw() */
break;
default:
panic("arw: bad status from dbe_arw()");
break;
}
}
dont_do_dbe_asynchio:
/*
* Print an informative console message if this is the first time
* we use generic asynch i/o. This information will help to isolate
* problems should they occur in the field.
*/
if (!generic_asynchio_used) {
generic_asynchio_used++;
printf("SunDBE: asynch i/o in use - this is not an error\n");
}
#endif SUNDBE
s = splclock();
if ((__LwpRunCnt >= maxasynchio) ||
(p->p_threadcnt >= perproc_maxasynchio) ||
(nasynchio >= maxunreaped) ||
(p->p_aio_count >= perproc_maxunreaped)) {
splx(s);
u.u_error = EAGAIN;
return;
}
sp = (caddr_t)lwp_datastk((caddr_t)NULL,
sizeof (struct auiotemp), (caddr_t *)&at);
splx (s);
newfp = &(at->af);
bcopy((caddr_t)fp, (caddr_t)newfp, sizeof (struct file));
u.u_error = lseek1(newfp, uap->offset, uap->whence);
if (u.u_error) {
return_stk(sp);
return;
}
/*
* The calls to fuword allow us to check whether the parameters are
* legal. This needs to be done before the lwp is forked.
*/
if (uap->bufp == (caddr_t)NULL ||
(fubyte(&uap->bufp[uap->bufsz - 1]) == -1)) {
return_stk(sp);
u.u_error = EFAULT;
return;
}
at->aiov.iov_base = (caddr_t)uap->bufp;
at->aiov.iov_len = uap->bufsz;
at->auio.uio_iov = &(at->aiov);
at->auio.uio_iovcnt = 1;
if (fuword((caddr_t)uap->resultp) == -1) {
return_stk(sp);
u.u_error = EFAULT;
return;
}
arwuio(at, rw, sp, uap->resultp, uap->fildes);
if (u.u_error) {
return_stk(sp);
return;
}
}
/*
* do the uio for asynch IO.
* This is equivalent to rwuio, but we also create a thread
* to do the work asynchronously.
*/
void
arwuio(at, rw, sp, res, fd)
struct auiotemp *at;
enum uio_rw rw;
caddr_t sp;
aio_result_t *res;
int fd;
{
register struct uio *uio;
register struct iovec *iov;
register struct file *fp;
register aiodone_t *ares;
struct proc *p = u.u_procp;
int i, count;
int s;
thread_t tid;
aiodone_t *aprev = (aiodone_t *)NULL;
extern void adoit();
aiodone_t *aiodone_alloc();
uio = &at->auio;
uio->uio_resid = 0;
uio->uio_segflg = UIO_USERSPACE;
iov = uio->uio_iov;
fp = &at->af;
for (i = 0; i < uio->uio_iovcnt; i++) {
if (iov->iov_len < 0) {
u.u_error = EINVAL;
return;
}
uio->uio_resid += iov->iov_len;
if (uio->uio_resid < 0) {
u.u_error = EINVAL;
return;
}
iov++;
}
count = uio->uio_resid;
uio->uio_offset = fp->f_offset;
uio->uio_fmode = fp->f_flag;
s = splclock();
ares = aiodone_alloc();
ares->aiod_fd = fd;
ares->aiod_result = res;
ares->aiod_next = (aiodone_t *)0;
ares->aiod_state = AIO_INPROGRESS;
if (p->p_aio_forw) {
aprev = p->p_aio_back;
p->p_aio_back->aiod_next = ares;
} else
p->p_aio_forw = ares;
p->p_aio_back = ares;
if (lwp_create(&tid, adoit, MINPRIO, LWPSUSPEND, sp,
5, fp, rw, count, uio, ares) < 0) {
u.u_error = EAGAIN;
del_aiodone(aprev, ares);
(void) splx(s);
return;
}
p->p_threadcnt++; /* number of threads in this process */
ares->aiod_thread = tid;
/*
* u.u_procp could change so we give it special context
* before resuming it
*/
unixset(tid);
(void) splx(s);
(void) lwp_resume(tid);
}
/*
* This is the thread that actually does the work of asynchronous IO.
* We have copied the arguments to the thread on its stack so there
* is no reliance on the invoker's stack.
* The u-area this thread operates in is a dummy, but u.u_procp
* is set on context switch, and the thread may safely read u values like
* u.u_error. We currently do no accounting.
*/
void
adoit(fp, rw, count, uio, ares)
struct file *fp;
enum uio_rw rw;
int count;
register struct uio *uio;
aiodone_t *ares;
{
int err;
int size;
caddr_t addr;
register struct proc *p = u.u_procp;
int s;
s = splclock();
ares->aiod_state = AIO_INPROGRESS;
nasynchio++; /* total asynch io's in the system */
(void) splx(s);
if (setjmp(sleepqsave))
err = EINTR;
else
err = (*fp->f_ops->fo_rw)(fp, rw, uio);
s = splclock();
addr = (caddr_t)(ares->aiod_result);
size = count - uio->uio_resid;
(void)suword(addr, size); /* aio_return */
(void)suword(addr + sizeof (int), err); /* aio_errno */
ares->aiod_state = AIO_DONE;
p->p_aio_count++;
wakeup((caddr_t)&p->p_aio_count);
p->p_threadcnt--;
psignal(p, SIGIO);
(void) splx(s);
/*
* When the thread returns, it returns to lwpkill for cleanup
*/
}
/*
* Cancel a pending asynchronous IO.
* We don't sleep if we can't do it.
*/
aiocancel()
{
register struct a {
aio_result_t *resultp;
} *uap = (struct a *)u.u_ap;
register aiodone_t *ares;
register struct aio_result_t *res;
struct proc *p = u.u_procp;
int s;
if (fuword((caddr_t)uap->resultp) == -1) {
u.u_error = EFAULT;
return;
}
res = uap->resultp;
s = splclock();
ares = p->p_aio_forw;
while (ares && (ares->aiod_result != res)) {
ares = ares->aiod_next;
}
if (ares == (aiodone_t *)0) {
u.u_error = EINVAL;
(void) splx(s);
return;
}
/*
* If in progress, then state won't change
* because we are at clock. If we are done
* then just return.
*/
if (ares->aiod_state == AIO_INPROGRESS)
mark_unwanted(ares->aiod_thread);
else u.u_error = EACCES;
(void)splx(s);
}
/*
* Wait for any asynchronous IO to complete.
*/
aiowait()
{
register struct a {
struct timeval *tv;
} *uap = (struct a *)u.u_ap;
register struct proc *p = u.u_procp;
register aiodone_t *ares = NULL;
register aiodone_t *prev = NULL;
struct timeval atv;
int unawait();
label_t lqsave;
int s;
#ifdef SUNDBE
dbe_handle_t dai;
int dbe_status;
#ifndef MULTIPROCESSOR
/* On multi-processors this counter is incremented outside of
* the kernel lock in dbe_probewait()
*/
extern long aiowaitdokv;
aiowaitdokv++;
#endif /* MULTIPROCESSOR */
#endif /* SUNDBE */
if (uap->tv) {
if ((u.u_error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv))))
return;
if (itimerfix(&atv)) {
u.u_error = EINVAL;
return;
}
/* atv is relative amount of time to wait from current time */
s = splclock();
timevaladd(&atv, &time);
(void) splx(s);
}
/*
* Exit if timer expires or asynch i/o completes; threadcnt is set to 1
* fork
*/
s = splclock();
#ifdef SUNDBE
while ((dbe_status = dbe_aio_comp_check(p, &dai)) != DBE_AIO_COMPLETE) {
if (p->p_aio_count != 0)
break; /* generic asynch i/o completed */
if (dbe_status == DBE_AIO_NONE_OUTSTANDING && nasynchio == 0)
break; /* no outstanding asynch i/os */
#else
while (p->p_aio_count == 0 && nasynchio != 0) {
#endif SUNDBE
if (uap->tv && (time.tv_sec > atv.tv_sec ||
time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) {
/* time expired */
goto out;
}
if (uap->tv) {
lqsave = u.u_qsave;
if (setjmp(&u.u_qsave)) {
untimeout(unawait, (caddr_t)u.u_procp);
u.u_error = EINTR;
goto out;
}
timeout(unawait, (caddr_t)u.u_procp, hzto(&atv));
}
(void) sleep((caddr_t)&p->p_aio_count, PZERO);
if (uap->tv) {
u.u_qsave = lqsave;
untimeout(unawait, (caddr_t)u.u_procp);
}
}
#ifdef SUNDBE
if (dbe_status == DBE_AIO_COMPLETE) {
(void) splx(s);
u.u_r.r_val1 = (int)dbe_aio_comp_reap(dai);
return;
}
#endif SUNDBE
ares = p->p_aio_forw;
while ((ares != (aiodone_t *)NULL) && ares->aiod_state != AIO_DONE) {
prev = ares;
ares = ares->aiod_next;
}
if (ares == (aiodone_t *)NULL) {
goto out;
}
u.u_r.r_val1 = (int)ares->aiod_result;
p->p_aio_count--;
nasynchio--;
del_aiodone(prev, ares);
(void) splx(s);
return;
out:
if (p->p_aio_count == 0) {
#ifdef SUNDBE
if (p->p_aio_forw == NULL &&
dbe_status == DBE_AIO_NONE_OUTSTANDING) {
#else
if (p->p_aio_forw == NULL) {
#endif SUNDBE
u.u_error = EINVAL; /* No outstanding asynch. I/O */
u.u_r.r_val1 = -1; /* Ugly, but specified */
} else
u.u_r.r_val1 = 0;
(void) splx(s);
return;
}
(void) splx(s);
}
unawait(p)
register struct proc *p;
{
register int s = splhigh();
switch (p->p_stat) {
case SSLEEP:
setrun(p);
break;
case SSTOP:
unsleep(p);
break;
}
(void) splx(s);
}
void
astop(fd, res)
int fd;
struct aio_result_t *res;
{
aiodone_t *ares = (aiodone_t *)NULL;
aiodone_t *prev = (aiodone_t *)NULL;
struct proc *p = u.u_procp;
int pri;
extern int __Nrunnable;
for (;;) {
pri = splclock(); /* hold splclock since we won't be blocking */
if ((ares = p->p_aio_forw) == (aiodone_t *)0) {
if (p->p_aio_count != 0)
panic("astop: bad p_aio_count");
else {
(void) splx(pri);
return;
}
}
if ((res == (struct aio_result_t *)ALL_AIO) &&
(fd != ares->aiod_fd) && (fd != ALL_AIO)) {
while (fd != ares->aiod_fd) {
prev = ares;
if ((ares = ares->aiod_next) ==
(aiodone_t *)NULL) {
(void) splx(pri);
return;
}
}
}
if (ares->aiod_state == AIO_INPROGRESS)
cancelaio(ares);
/*
* The decrementing of aio_count and nasynchio is also
* done in await
*/
p->p_aio_count--;
--nasynchio;
del_aiodone(prev, ares);
(void) splx(pri);
}
}
static aiodone_t *aiodone_freelst;
static int aiodone_freelst_cnt;
aiodone_t *
aiodone_alloc()
{
aiodone_t *aio;
if ((aio = aiodone_freelst) == NULL) {
aio = (aiodone_t *)new_kmem_alloc(sizeof (aiodone_t),
KMEM_SLEEP);
} else {
aiodone_freelst = aiodone_freelst->aiod_next;
aiodone_freelst_cnt--;
aio->aiod_next = 0;
}
return (aio);
}
void
aiodone_free(aio)
aiodone_t *aio;
{
if (aiodone_freelst_cnt < maxasynchio) {
aio->aiod_next = aiodone_freelst;
aiodone_freelst = aio;
aiodone_freelst_cnt++;
} else
kmem_free((caddr_t)aio, (u_int)sizeof (aiodone_t));
}
/*
* It is assumed that the next 2 routines are called
* at splclock
*/
void
cancelaio(ares)
aiodone_t *ares;
{
struct proc *p = u.u_procp;
/*
* Thread is marked the first time; if sleeping
* at interruptable pri. then cancelled else it
* was a uninterruptable pri. and will finish.
*/
(void) mark_unwanted(ares->aiod_thread);
while (ares->aiod_state == AIO_INPROGRESS)
(void)sleep((caddr_t)&p->p_aio_count, PZERO - 1);
}
void
del_aiodone(prev, ares)
register aiodone_t *prev;
register aiodone_t *ares;
{
struct proc *p = u.u_procp;
void aiodone_free();
if (prev == (aiodone_t *)NULL) /* first element */
p->p_aio_forw = ares->aiod_next;
else
prev->aiod_next = ares->aiod_next;
if (ares->aiod_next == (aiodone_t *)NULL) /* last element */
p->p_aio_back = prev;
aiodone_free(ares);
}
#endif /* ASYNCHIO && LWP */
rwuio(uio, rw)
register struct uio *uio;
enum uio_rw rw;
{
struct a {
int fdes;
};
register struct file *fp;
register struct iovec *iov;
int i, count;
GETF(fp, ((struct a *)u.u_ap)->fdes);
if ((fp->f_flag & (rw == UIO_READ ? FREAD : FWRITE)) == 0) {
u.u_error = EBADF;
return;
}
uio->uio_resid = 0;
uio->uio_segflg = UIO_USERSPACE;
iov = uio->uio_iov;
for (i = 0; i < uio->uio_iovcnt; i++) {
if (iov->iov_len < 0) {
u.u_error = EINVAL;
return;
}
uio->uio_resid += iov->iov_len;
if (uio->uio_resid < 0) {
u.u_error = EINVAL;
return;
}
iov++;
}
count = uio->uio_resid;
uio->uio_offset = fp->f_offset;
uio->uio_fmode = fp->f_flag;
if (setjmp(&u.u_qsave)) {
if (uio->uio_resid == count) {
if ((u.u_sigintr & sigmask(u.u_procp->p_cursig)) != 0)
u.u_error = EINTR;
else
u.u_eosys = RESTARTSYS;
}
} else
u.u_error = (*fp->f_ops->fo_rw)(fp, rw, uio);
u.u_r.r_val1 = count - uio->uio_resid;
/*
* Check if the underlying code has switched us to block mode
*/
if ((uio->uio_fmode & FSETBLK) && !(fp->f_flag & FSETBLK)) {
fp->f_flag |= FSETBLK;
fp->f_offset = btodb(fp->f_offset);
}
/* update file offset only if there is no error */
if (u.u_error == 0) {
fp->f_offset += (fp->f_flag & FSETBLK) ?
howmany(u.u_r.r_val1, DEV_BSIZE) : u.u_r.r_val1;
}
u.u_ioch += (unsigned)u.u_r.r_val1;
}
/*
* Ioctl system call
*/
ioctl()
{
register struct file *fp;
struct a {
int fdes;
int cmd;
caddr_t cmarg;
} *uap;
register int com;
register u_int size;
int data[howmany(_IOCPARM_MASK/2, sizeof (int))];
register caddr_t iocparm;
uap = (struct a *)u.u_ap;
GETF(fp, uap->fdes);
if ((fp->f_flag & (FREAD|FWRITE)) == 0) {
u.u_error = EBADF;
return;
}
com = uap->cmd;
#if defined(vax) && defined(COMPAT)
/*
* Map old style ioctl's into new for the
* sake of backwards compatibility (sigh).
*/
if ((com&~0xffff) == 0) {
com = mapioctl(com);
if (com == 0) {
u.u_error = EINVAL;
return;
}
}
#endif
if (com == FIOCLEX) {
u.u_pofile[uap->fdes] |= UF_EXCLOSE;
return;
}
if (com == FIONCLEX) {
u.u_pofile[uap->fdes] &= ~UF_EXCLOSE;
return;
}
/*
* Interpret high order word to find
* amount of data to be copied to/from the
* user's address space.
*/
size = (com &~ (_IOC_INOUT|_IOC_VOID)) >> 16;
if (size <= sizeof (data))
/*
* If size is less than IOCPARM_MASK/2, use data[]
* to avoid overheads from kmem_alloc and kmem_free.
*/
iocparm = (caddr_t)data;
else if (size <= _IOCPARM_MASK)
/*
* Get space from kmem_alloc if parameter size is more
* than IOCPARM_MASK / 2 to avoid kernel stack overflow.
*/
iocparm = new_kmem_alloc(size, KMEM_SLEEP);
else {
/*
* Size > IOCPARM_MASK, not possible to detect with
* the current ioctl macros.
*/
u.u_error = EINVAL;
return;
}
if (com&_IOC_IN) {
if (size == sizeof (int) && uap->cmarg == NULL)
*(int *)iocparm = 0;
else if (size != 0) {
u.u_error = copyin(uap->cmarg, iocparm, size);
if (u.u_error)
return;
} else
*(caddr_t *)iocparm = uap->cmarg;
} else if ((com&_IOC_OUT) && size)
/*
* Zero the buffer on the stack so the user
* always gets back something deterministic.
*/
bzero(iocparm, size);
else if (com&_IOC_VOID)
*(caddr_t *)iocparm = uap->cmarg;
if (setjmp(&u.u_qsave)) {
u.u_error = EINTR;
goto done;
} else
u.u_error = (*fp->f_ops->fo_ioctl)(fp, com, iocparm);
/*
* Copy any data to user, size was
* already set and checked above.
*/
if (u.u_error == 0) {
/*
* XXX Actually, there should not be any connection
* between the "ioctl"-settable per-object FIONBIO
* and FASYNC flags and any per-file-descriptor flags,
* so we shouldn't have to do this.
* Unfortunately, 4.2BSD has such a connection, so we
* must support that. Thus, we must set or clear the
* FNDELAY or FASYNC flag on the file descriptor.
* In order to do this right, it should really hit
* every file descriptor that refers to the same
* object, but that's too expensive.
*/
switch (com) {
case FIONBIO:
if (*(int *)iocparm)
fp->f_flag |= FNDELAY;
else
fp->f_flag &= ~FNDELAY;
break;
case FIOASYNC:
if (*(int *)iocparm)
fp->f_flag |= FASYNC;
else
fp->f_flag &= ~FASYNC;
break;
}
if ((com&_IOC_OUT) && size)
u.u_error = copyout(iocparm, uap->cmarg, size);
}
done:
if (iocparm != (caddr_t)data)
kmem_free(iocparm, size);
}
int unselect();
int nselcoll;
/*
* Select system call.
*/
select()
{
register struct uap {
int nd;
fd_set *in, *ou, *ex;
struct timeval *tv;
} *uap = (struct uap *)u.u_ap;
fd_set ibits[3], obits[3];
struct timeval atv;
int s, ncoll, ni;
label_t lqsave;
int rwe_flag = 0;
bzero((caddr_t)ibits, sizeof (ibits));
bzero((caddr_t)obits, sizeof (obits));
if (uap->nd < 0 || uap->nd > NOFILE)
uap->nd = NOFILE; /* forgiving, if slightly wrong */
ni = howmany(uap->nd, NFDBITS);
#define getbits(name, x) \
if (uap->name) { \
rwe_flag |= (1<<x); \
u.u_error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], \
(unsigned)(ni * sizeof (fd_mask))); \
if (u.u_error) \
goto done; \
}
getbits(in, 0);
getbits(ou, 1);
getbits(ex, 2);
#undef getbits
if (uap->tv) {
u.u_error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv));
if (u.u_error)
goto done;
if (itimerfix(&atv)) {
u.u_error = EINVAL;
goto done;
}
s = splclock(); timevaladd(&atv, &time); (void) splx(s);
}
retry:
ncoll = nselcoll;
u.u_procp->p_flag |= SSEL;
u.u_r.r_val1 = selscan(ibits, obits, uap->nd, rwe_flag);
if (u.u_error || u.u_r.r_val1)
goto done;
s = splhigh();
/* this should be timercmp(&time, &atv, >=) */
if (uap->tv && (time.tv_sec > atv.tv_sec ||
time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) {
(void) splx(s);
goto done;
}
if ((u.u_procp->p_flag & SSEL) == 0 || nselcoll != ncoll) {
u.u_procp->p_flag &= ~SSEL;
(void) splx(s);
goto retry;
}
u.u_procp->p_flag &= ~SSEL;
if (uap->tv) {
lqsave = u.u_qsave;
if (setjmp(&u.u_qsave)) {
untimeout(unselect, (caddr_t)u.u_procp);
u.u_error = EINTR;
(void) splx(s);
goto done;
}
timeout(unselect, (caddr_t)u.u_procp, hzto(&atv));
}
(void) sleep((caddr_t)&selwait, PZERO+1);
if (uap->tv) {
u.u_qsave = lqsave;
untimeout(unselect, (caddr_t)u.u_procp);
}
(void) splx(s);
goto retry;
done:
#define putbits(name, x) \
if (uap->name) { \
int error = copyout((caddr_t)&obits[x], (caddr_t)uap->name, \
(unsigned)(ni * sizeof (fd_mask))); \
if (error) \
u.u_error = error; \
}
if (u.u_error == 0) {
putbits(in, 0);
putbits(ou, 1);
putbits(ex, 2);
#undef putbits
}
}
unselect(p)
register struct proc *p;
{
register int s = splhigh();
switch (p->p_stat) {
case SSLEEP:
setrun(p);
break;
case SSTOP:
unsleep(p);
break;
}
(void) splx(s);
}
selscan(ibits, obits, nfd, rwe_flag)
register fd_set *ibits;
fd_set *obits;
int nfd;
int rwe_flag;
{
register fd_mask bits;
register int which, j, i;
struct file *fp;
int n = 0;
static int which_flag[3] = { FREAD, FWRITE, 0 };
for (which = 0; which < 3; which++) {
if (!(rwe_flag & (1 << which)))
continue;
for (i = 0; i < nfd; i += NFDBITS) {
bits = ibits[which].fds_bits[i/NFDBITS];
while (bits && (j = ffs(bits)) && i + --j < nfd) {
register int fd;
bits &= ~(1 << j);
fd = i + j;
fp = (struct file *)getf(fd);
if (fp == NULL) {
/* u.u_error has been set. */
break;
}
if ((*fp->f_ops->fo_select)(fp,
which_flag[which])) {
FD_SET(fd, &obits[which]);
n++;
}
}
}
}
return (n);
}
/*ARGSUSED*/
seltrue(dev, flag)
dev_t dev;
int flag;
{
return (1);
}
selwakeup(p, coll)
register struct proc *p;
int coll;
{
if (coll) {
nselcoll++;
wakeup((caddr_t)&selwait);
}
if (p) {
int s = splhigh();
if (p->p_wchan == (caddr_t)&selwait) {
if (p->p_stat == SSLEEP)
setrun(p);
else
unsleep(p);
} else if (p->p_flag & SSEL)
p->p_flag &= ~SSEL;
(void) splx(s);
}
}