Files
Arquivotheca.Solaris-2.5/uts/common/os/fork.c
seta75D 7c4988eac0 Init
2021-10-11 19:38:01 -03:00

751 lines
18 KiB
C
Executable File

/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* Copyright (c) 1994 Sun Microsystems, Inc. */
/* All Rights Reserved */
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */
/* The copyright notice above does not evidence any */
/* actual or intended publication of such source code. */
#ident "@(#)fork.c 1.86 95/09/05 SMI" /* from SVr4.0 1.63 */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/signal.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/systm.h>
#include <sys/cpuvar.h>
#include <sys/map.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/cmn_err.h>
#include <sys/acct.h>
#include <sys/tuneable.h>
#include <sys/class.h>
#include <sys/kmem.h>
#include <sys/session.h>
#include <sys/ucontext.h>
#include <sys/procfs.h>
#include <sys/prsystm.h>
#include <sys/vmsystm.h>
#include <sys/vtrace.h>
#include <sys/debug.h>
#include <sys/shm.h>
#include <vm/as.h>
#include <vm/rm.h>
#include <c2/audit.h>
#include <sys/var.h>
#include <sys/strlog.h>
static longlong_t cfork(int, int);
static int getproc(proc_t **, int);
static void fork_fail(proc_t *);
static void forklwp_fail(proc_t *);
int fork_fail_pending;
/*
* fork system call.
*/
longlong_t
fork(void)
{
return (cfork(0, 0));
}
/*
* The parent is stopped until the child invokes relvm().
*/
longlong_t
vfork(void)
{
curthread->t_post_sys = 1; /* so vfwait() will be called */
return (cfork(1, 1));
}
/*
* fork1 system call
*/
longlong_t
fork1(void)
{
return (cfork(0, 1));
}
/* ARGSUSED */
static longlong_t
cfork(int isvfork, int isfork1)
{
proc_t *p = ttoproc(curthread);
proc_t *cp, **orphpp;
klwp_id_t clwp;
kthread_id_t t;
rval_t r;
int error;
int i;
/*
* If the calling LWP is doing a fork1() then the
* other LWPs in this process are not duplicated and
* don't need to be held where their kernel stacks
* can be cloned. In general, the process is held with
* HOLDLWPS, so that the LWPs are at a point where their
* stacks can be copied which is on entry or exit from
* the kernel.
*/
if (!holdlwps(p, isfork1? HOLDLWP2 : HOLDLWPS)) {
error = EINTR;
goto forkerr;
}
/*
* Create a child proc struct. Place a VN_HOLD on appropriate vnodes.
*/
if (getproc(&cp, isfork1) < 0) {
mutex_enter(&p->p_lock);
continuelwps(p);
mutex_exit(&p->p_lock);
error = EAGAIN;
goto forkerr;
}
#ifdef TRACE
trace_process_fork((u_long) (cp->p_pid), (u_long) (p->p_pid));
#endif /* TRACE */
TRACE_2(TR_FAC_PROC, TR_PROC_FORK,
"proc_fork:cpid %d ppid %d", cp->p_pid, p->p_pid);
/*
* Assign an address space to child
*/
if (isvfork) {
cp->p_as = p->p_as;
cp->p_flag |= SVFORK;
} else {
error = as_dup(p->p_as, &cp->p_as);
if (error != 0) {
fork_fail(cp);
mutex_enter(&pidlock);
orphpp = &p->p_orphan;
while (*orphpp != cp)
orphpp = &(*orphpp)->p_nextorph;
*orphpp = cp->p_nextorph;
ASSERT(p->p_child == cp);
p->p_child = cp->p_sibling;
if (p->p_child) {
p->p_child->p_psibling = NULL;
}
pid_exit(cp);
mutex_exit(&pidlock);
mutex_enter(&p->p_lock);
continuelwps(p);
mutex_exit(&p->p_lock);
/*
* Preserve ENOMEM error condition but
* map all others to EAGAIN.
*/
error = (error == ENOMEM) ? ENOMEM : EAGAIN;
goto forkerr;
}
/* Duplicate parent's shared memory */
if (p->p_segacct)
shmfork(p, cp);
}
/*
* duplicate parent's lwps.
* mutual exclusion is not needed because the process is in the
* hold state and only the current lwp is running. however, if
* the p_lock mutex is not held, assertion checking for lwp
* routines will fail.
*/
i = ((isfork1) ? 1 : p->p_lwpcnt);
for (t = curthread; i-- > 0; t = t->t_forw) {
clwp = forklwp(ttolwp(t), cp);
if (clwp == NULL) {
if (!isvfork) {
struct as *as;
shmexit(cp);
as = cp->p_as;
cp->p_as = &kas;
as_free(as);
}
forklwp_fail(cp);
fork_fail(cp);
mutex_enter(&pidlock);
orphpp = &p->p_orphan;
while (*orphpp != cp)
orphpp = &(*orphpp)->p_nextorph;
*orphpp = cp->p_nextorph;
ASSERT(p->p_child == cp);
p->p_child = cp->p_sibling;
if (p->p_child) {
p->p_child->p_psibling = NULL;
}
pid_exit(cp);
mutex_exit(&pidlock);
mutex_enter(&p->p_lock);
continuelwps(p);
mutex_exit(&p->p_lock);
error = EAGAIN;
goto forkerr;
}
/* only duplicate LWP IDs if doing a fork() */
if (!isfork1)
lwptot(clwp)->t_tid = t->t_tid;
}
/* make sure next lwp the child creates is unique */
if (!isfork1)
cp->p_lwptotal = p->p_lwptotal;
#ifdef i386
if (p->p_ldt) /* parent has a private LDT */
(void) ldt_dup(p, cp);
#endif
/*
* If the active child lwp has been marked to stop
* on exit from this fork, arrange for all other
* lwps to stop in sympathy with the active lwp.
*/
if (PTOU(cp)->u_systrap &&
prismember(&PTOU(cp)->u_exitmask, curthread->t_sysnum)) {
for (t = cp->p_tlist->t_forw; t != cp->p_tlist; t = t->t_forw) {
t->t_proc_flag |= TP_PRSTOP;
aston(t); /* so TP_PRSTOP will be seen */
}
}
/* set return values for child */
lwp_setrval(ttolwp(cp->p_tlist), p->p_pid, 1);
/* set return values for parent */
r.r_val1 = (int)cp->p_pid;
r.r_val2 = 0;
mutex_enter(&pidlock);
/*
* Now that there are lwps and threads attached, add the new
* process to the process group.
*/
pgjoin(cp, p->p_pgidp);
cp->p_stat = SRUN;
/*
* Mutual exclusion is only needed here when assertion checking
* is on.
*/
if (isvfork) {
CPU_STAT_ADDQ(CPU, cpu_sysinfo.sysvfork, 1);
/*
* Grab p_lock before dropping pidlock to ensure the
* process will not disappear before we set it running.
*/
mutex_enter(&cp->p_lock);
mutex_exit(&pidlock);
continuelwps(cp);
mutex_exit(&cp->p_lock);
} else {
CPU_STAT_ADDQ(CPU, cpu_sysinfo.sysfork, 1);
/*
* It is CL_FORKRET's job to drop pidlock.
* If we do it here, the process could be set running
* and disappear before CL_FORKRET() is called.
*/
CL_FORKRET(curthread, cp->p_tlist);
MUTEX_NOT_HELD(&pidlock);
}
return (r.r_vals);
forkerr:
return ((longlong_t)set_errno(error));
}
/*
* Free allocated resources from getproc() if a fork failed.
*/
static void
fork_fail(proc_t *cp)
{
closeall(0);
sigdelq(cp, NULL, 0);
/*
* single threaded, so no locking needed here
*/
crfree(cp->p_cred);
kmem_free(PTOU(cp)->u_flist,
PTOU(cp)->u_nofiles * sizeof (struct uf_entry));
VN_RELE(u.u_cdir);
if (u.u_rdir)
VN_RELE(u.u_rdir);
if (cp->p_exec)
VN_RELE(cp->p_exec);
}
/*
* Clean up the lwps already created for this child process.
* The fork failed while duplicating all the lwps of the parent
* and those lwps already created must be freed.
* This process is invisible to the rest of the system,
* so we don't need to hold p->p_lock to protect the list.
*/
static void
forklwp_fail(proc_t *p)
{
kthread_id_t t;
while ((t = p->p_tlist) != NULL) {
/*
* First remove the lwp from the process's p_tlist.
*/
if (t != t->t_forw)
p->p_tlist = t->t_forw;
else
p->p_tlist = NULL;
p->p_lwpcnt--;
t->t_forw->t_back = t->t_back;
t->t_back->t_forw = t->t_forw;
/*
* Remove the thread from the all threads list.
* We need to hold pidlock for this.
*/
mutex_enter(&pidlock);
t->t_next->t_prev = t->t_prev;
t->t_prev->t_next = t->t_next;
mutex_exit(&pidlock);
thread_free(t);
}
}
extern struct as kas;
extern id_t syscid;
/*
* fork a kernel process.
*/
int
newproc(void (*pc)(), id_t cid, int pri)
{
proc_t *p;
struct user *up;
if (getproc(&p, 0) < 0)
return (EAGAIN);
if (cid == syscid) {
p->p_flag |= (SSYS | SLOCK | SNOWAIT);
p->p_exec = NULL;
/*
* kernel processes do not inherit /proc tracing flags.
*/
sigemptyset(&p->p_sigmask);
premptyset(&p->p_fltmask);
up = PTOU(p);
up->u_systrap = 0;
premptyset(&(up->u_entrymask));
premptyset(&(up->u_exitmask));
}
p->p_as = &kas;
if (lwp_create(pc, NULL, 0, p, TS_RUN, pri,
curthread->t_hold, cid) == NULL) {
fork_fail(p);
mutex_enter(&pidlock);
pid_exit(p);
mutex_exit(&pidlock);
return (EAGAIN);
} else {
mutex_enter(&pidlock);
pgjoin(p, curproc->p_pgidp);
p->p_stat = SRUN;
mutex_exit(&pidlock);
}
return (0);
}
/*
* Setup context of child process.
* up is a pointer to the child's u-area.
* isfork1 is a flag indicating whether this
* is a fork1 system call.
*/
void
setuctxt(user_t *up, int isfork1)
{
int sz;
/*
* XXX This code only works if the u_flist can not shrink.
*/
/* Copy u-block. */
*up = *PTOU(curproc);
cv_init(&up->u_cv, "u cv", CV_DEFAULT, NULL);
mutex_init(&up->u_flock, "u flist lock", MUTEX_DEFAULT, DEFAULT_WT);
/*
* I don't need to hold u_flock because all other lwp's in the
* parent have been held.
*/
sz = u.u_nofiles * sizeof (struct uf_entry);
/*
* if fork1 (or vfork), the child process will have only one lwp.
* In this case, we only want to duplicate the open file dscriptors;
* we will leave the refcnt null and clear all flags in the pofile
* entry (but leave the FCLOSEXEC set)
*/
if (isfork1) {
int i;
struct uf_entry *pufe_s, *pufe_d;
up->u_flist = kmem_zalloc(sz, KM_SLEEP);
pufe_d = &up->u_flist[0];
pufe_s = &u.u_flist[0];
for (i = 0; i < u.u_nofiles; i++) {
pufe_d[i].uf_ofile = pufe_s[i].uf_ofile;
pufe_d[i].uf_pofile = pufe_s[i].uf_pofile & FCLOSEXEC;
}
} else {
up->u_flist = kmem_alloc(sz, KM_SLEEP);
bcopy((caddr_t)u.u_flist, (caddr_t)up->u_flist, (size_t)sz);
}
}
/*
* create a child proc struct.
*/
static int
getproc(proc_t **cpp, int isfork1)
{
proc_t *pp, *cp;
pid_t newpid;
struct user *uarea;
extern u_int nproc;
struct cred *cr;
cp = kmem_zalloc(sizeof (proc_t), KM_SLEEP);
/*
* Make proc entry for child process
*/
mutex_init(&cp->p_lock, "proc", MUTEX_DEFAULT, DEFAULT_WT);
mutex_init(&cp->p_crlock, "proc crlock", MUTEX_DEFAULT, DEFAULT_WT);
mutex_init(&cp->p_pflock, "profiling", MUTEX_DEFAULT, DEFAULT_WT);
#ifdef i386
mutex_init(&cp->p_ldtlock, "LDT lock", MUTEX_DEFAULT, DEFAULT_WT);
#endif
cp->p_stat = SIDL;
cp->p_mstart = gethrtime();
if ((newpid = pid_assign(cp)) == -1) {
if (nproc == v.v_proc) {
CPU_STAT_ADD(CPU, cpu_sysinfo.procovf, 1);
(void) strlog(0, 1, 0, SL_CONSOLE | SL_WARN,
"out of processes", 0);
}
goto bad;
}
/*
* If not super-user make sure that this user hasn't exceeded
* v.v_maxup processes, and that users collectively haven't
* exceeded v.v_maxupttl processes.
*/
mutex_enter(&pidlock);
ASSERT(nproc < v.v_proc); /* otherwise how'd we get our pid? */
cr = CRED();
if (nproc >= v.v_maxup && /* short-circuit; usually false */
cr->cr_uid &&
cr->cr_ruid &&
(nproc >= v.v_maxupttl || upcount_get(cr->cr_ruid) >= v.v_maxup)) {
char buf[80];
mutex_exit(&pidlock);
(void) sprintf(buf, "out of per-user processes for uid %d",
cr->cr_ruid);
(void) strlog(0, 1, 0, SL_CONSOLE | SL_NOTE, buf, 0);
goto bad;
}
/*
* Everything is cool, put the new proc on the active process list.
* It is already on the pid list and in /proc.
* Increment the per uid process count (upcount).
*/
nproc++;
upcount_inc(cr->cr_ruid);
cp->p_next = practive;
practive->p_prev = cp;
practive = cp;
pp = ttoproc(curthread);
cp->p_ignore = pp->p_ignore;
cp->p_siginfo = pp->p_siginfo;
/*
* If fork1(), do not inherit SWAITSIG or ASLWP settings of p_flag.
* SWAITSIG is not inherited since the child of fork1() has only 1
* user thread and the user-level should make an explicit call to
* turn it on, if it needs to. Correspondingly, the _sigwaitingset
* variable in libthread is turned off in the child of fork1(), in
* the wrapper to fork1() in libhread.
* The ASLWP flag is not inherited since the child of fork1() has only
* one lwp - the aslwp has not yet been cloned. So retaining the flag
* without the real aslwp is dangerous. The aslwp is explicitly created
* by the child, at user-level, just after fork1() returns, if it needs
* to - if it does, the ASLWP flag will be turned on in p_flag at that
* time.
*/
if (isfork1)
cp->p_flag = SLOAD | (pp->p_flag & (SJCTL|SNOWAIT));
else
cp->p_flag = SLOAD
| (pp->p_flag & (SJCTL|SNOWAIT|SWAITSIG|ASLWP));
if (cp->p_flag & SWAITSIG)
cp->p_lwpblocked = 0;
else
cp->p_lwpblocked = -1;
cp->p_sessp = pp->p_sessp;
SESS_HOLD(pp->p_sessp);
cp->p_exec = pp->p_exec;
cp->p_brkbase = pp->p_brkbase;
cp->p_brksize = pp->p_brksize;
cp->p_stksize = pp->p_stksize;
cp->p_ppid = pp->p_pid;
/*
* Link up to parent-child-sibling chain. No need to lock
* in general since only a call to freeproc() (done by the
* same parent as newproc()) diddles with the child chain.
*/
cp->p_sibling = pp->p_child;
if (pp->p_child)
pp->p_child->p_psibling = cp;
cp->p_parent = pp;
pp->p_child = cp;
cp->p_child_ns = NULL;
cp->p_sibling_ns = NULL;
cp->p_nextorph = pp->p_orphan;
cp->p_nextofkin = pp;
pp->p_orphan = cp;
mutex_exit(&pidlock);
/*
* Duplicate any audit information kept in the process table
*/
#ifdef C2_AUDIT
if (audit_active) /* copy audit data to cp */
audit_newproc(cp);
#endif
crhold(cp->p_cred = pp->p_cred);
/*
* Bump up the counts on the file structures pointed at by the
* parents ofile table since the child will point at them too.
*/
bump_fcnts(pp);
VN_HOLD(u.u_cdir);
if (u.u_rdir)
VN_HOLD(u.u_rdir);
/*
* copy the parent's uarea.
*/
uarea = PTOU(cp);
setuctxt(uarea, isfork1);
cp->p_flag |= SULOAD;
uarea->u_start = hrestime.tv_sec;
uarea->u_ticks = lbolt;
uarea->u_mem = rm_asrss(pp->p_as);
uarea->u_nshmseg = 0;
uarea->u_acflag = AFORK;
/*
* If inherit-on-fork, copy /proc tracing flags to child.
*/
if ((pp->p_flag & SPRFORK) != 0) {
cp->p_flag |= pp->p_flag & (SPROCTR|SPRFORK|SMSACCT);
cp->p_sigmask = pp->p_sigmask;
cp->p_fltmask = pp->p_fltmask;
} else {
sigemptyset(&cp->p_sigmask);
premptyset(&cp->p_fltmask);
uarea->u_systrap = 0;
premptyset(&uarea->u_entrymask);
premptyset(&uarea->u_exitmask);
}
if (cp->p_exec)
VN_HOLD(cp->p_exec);
*cpp = cp;
return (0);
bad:
ASSERT(MUTEX_NOT_HELD(&pidlock));
mutex_destroy(&cp->p_lock);
mutex_destroy(&cp->p_crlock);
mutex_destroy(&cp->p_pflock);
#ifdef i386
mutex_destroy(&cp->p_ldtlock);
#endif
if (newpid != -1) {
proc_entry_free(cp->p_pidp);
pid_rele(cp->p_pidp);
}
kmem_free(cp, sizeof (proc_t));
/*
* We most likely got into this situation because some process is
* forking out of control. As punishment, put it to sleep for a
* bit so it can't eat the machine alive. Sleep interval is chosen
* to allow no more than one fork failure per cpu per clock tick
* on average (yes, I just made this up). This has two desirable
* properties: (1) it sets a constant limit on the fork failure
* rate, and (2) the busier the system is, the harsher the penalty
* for abusing it becomes.
*/
INCR_COUNT(&fork_fail_pending, &pidlock);
delay(fork_fail_pending / ncpus + 1);
DECR_COUNT(&fork_fail_pending, &pidlock);
return (-1); /* out of memory or proc slots */
}
/*
* Release virtual memory.
* In the case of vfork(), the child was given exclusive access to its
* parent's address space. The parent is waiting in vfwait() for the
* child to release its exclusive claim via relvm().
*/
void
relvm(proc_t *p)
{
ASSERT((unsigned)p->p_lwpcnt <= 1);
if (p->p_flag & SVFORK) {
proc_t *pp = p->p_parent;
/*
* The child process is either exec'ing or exit'ing.
* The child is now separated from the parent's address
* space. The parent process is made dispatchable.
*
* This is a delicate locking maneuver, involving
* both the parent's p_lock and the child's p_lock.
* As soon as the SVFORK flag is turned off, the
* parent is free to run, but it must not run until
* we wake it up using its p_cv because it might
* exit and we would be referencing invalid memory.
* Therefore, we hold the parent with its p_lock
* while protecting our p_flags with our own p_lock.
*/
mutex_enter(&p->p_lock); /* grab child's lock first */
prbarrier(p); /* make sure /proc is blocked out */
mutex_enter(&pp->p_lock);
p->p_flag &= ~SVFORK;
p->p_as = &kas;
/*
* child sizes are copied back to parent because
* child may have grown.
*/
pp->p_brkbase = p->p_brkbase;
pp->p_brksize = p->p_brksize;
pp->p_stksize = p->p_stksize;
cv_signal(&pp->p_cv);
mutex_exit(&pp->p_lock);
mutex_exit(&p->p_lock);
} else {
if (p->p_as != &kas) {
struct as *as;
if (PTOU(p)->u_nshmseg)
shmexit(p);
/*
* We grab p_lock for the benefit of /proc
*/
mutex_enter(&p->p_lock);
prbarrier(p); /* make sure /proc is blocked out */
as = p->p_as;
p->p_as = &kas;
mutex_exit(&p->p_lock);
as_free(as);
}
}
}
/*
* Wait for child to exec or exit.
* Called by parent of vfork'ed process.
* See important comments in relvm(), above.
*/
void
vfwait(pid_t pid)
{
proc_t *pp = ttoproc(curthread);
proc_t *cp;
/*
* Wait for child to exec or exit.
*/
for (;;) {
mutex_enter(&pidlock);
cp = prfind(pid);
if (cp == NULL || cp->p_parent != pp) {
/*
* Child has exit()ed.
*/
mutex_exit(&pidlock);
break;
}
/*
* Grab the child's p_lock before releasing pidlock.
* Otherwise, the child could exit and we would be
* referencing invalid memory.
*/
mutex_enter(&cp->p_lock);
mutex_exit(&pidlock);
if (!(cp->p_flag & SVFORK)) {
/*
* Child has exec()ed or is exit()ing.
*/
mutex_exit(&cp->p_lock);
break;
}
mutex_enter(&pp->p_lock);
mutex_exit(&cp->p_lock);
/*
* We might be waked up spuriously from the cv_wait().
* We have to do the whole operation over again to be
* sure the child's SVFORK flag really is turned off.
* We cannot make reference to the child because it can
* exit before we return and we would be referencing
* invalid memory.
*/
cv_wait(&pp->p_cv, &pp->p_lock);
mutex_exit(&pp->p_lock);
}
mutex_enter(&pp->p_lock);
continuelwps(pp);
mutex_exit(&pp->p_lock);
}