1148 lines
28 KiB
C
1148 lines
28 KiB
C
#ident "@(#)ufs_inode.c 1.1 94/10/31 SMI" /* from UCB 7.1 6/5/86 */
|
|
|
|
/*LINTLIBRARY*/
|
|
|
|
/*
|
|
* Copyright (c) 1982, 1986 Regents of the University of California.
|
|
* All rights reserved. The Berkeley software License Agreement
|
|
* specifies the terms and conditions for redistribution.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 1988, 1990 by Sun Microsystem, Inc.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/user.h>
|
|
#include <sys/vfs.h>
|
|
#include <sys/vnode.h>
|
|
#include <sys/buf.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/trace.h>
|
|
#include <sys/dnlc.h>
|
|
#include <sys/vaccess.h> /* ULOCKFS */
|
|
#include <sys/lockfs.h> /* ULOCKFS */
|
|
|
|
#include <ufs/mount.h>
|
|
#include <ufs/inode.h>
|
|
#include <ufs/fs.h>
|
|
#ifdef QUOTA
|
|
#include <ufs/quota.h>
|
|
#endif
|
|
#include <ufs/lockfs.h> /* ULOCKFS */
|
|
#include <vm/hat.h>
|
|
#include <vm/as.h>
|
|
#include <vm/pvn.h>
|
|
#include <vm/seg.h>
|
|
#include <vm/swap.h>
|
|
#include <vm/page.h>
|
|
|
|
extern int freemem, lotsfree, pages_before_pager;
|
|
|
|
struct instats ins;
|
|
|
|
int ino_new; /* Current # of inodes kmem_allocated */
|
|
int ino_free_at_front; /* # inodes freed at front of free list */
|
|
|
|
struct inode *ifreeh, **ifreet;
|
|
|
|
union ihead ihead[INOHSZ];
|
|
|
|
/*
|
|
* Variables for maintaining the free list of inode structures.
|
|
*/
|
|
static struct inode *in_free;
|
|
|
|
#if !(PAGESIZE - 4096)
|
|
static u_int nin_incr = (2*PAGESIZE / (sizeof(struct inode)));
|
|
#else
|
|
static u_int nin_incr = (PAGESIZE / (sizeof(struct inode)));
|
|
#endif
|
|
|
|
/*
|
|
* Convert inode formats to vnode types
|
|
*/
|
|
enum vtype iftovt_tab[] = {
|
|
VFIFO, VCHR, VDIR, VBLK, VREG, VLNK, VSOCK, VBAD
|
|
};
|
|
|
|
int vttoif_tab[] = {
|
|
0, IFREG, IFDIR, IFBLK, IFCHR, IFLNK, IFSOCK, IFMT, IFIFO
|
|
};
|
|
|
|
/*
|
|
* Initialize hash links for inodes
|
|
* and build inode free list.
|
|
*/
|
|
ihinit()
|
|
{
|
|
register int i;
|
|
register union ihead *ih = ihead;
|
|
|
|
for (i = INOHSZ; --i >= 0; ih++) {
|
|
ih->ih_head[0] = ih;
|
|
ih->ih_head[1] = ih;
|
|
}
|
|
ifreeh = NULL;
|
|
ifreet = NULL;
|
|
}
|
|
|
|
zeroperms()
|
|
{
|
|
}
|
|
|
|
#if defined(DEBUG_INODES)
|
|
printinode(s, i)
|
|
char *s;
|
|
struct inode *i;
|
|
{
|
|
printf("%s inode %d/%d:%d m=%o u=%d g=%d\n",
|
|
s, major(i->i_dev), minor(i->i_dev), i->i_number,
|
|
i->i_ic.ic_mode, i->i_ic.ic_uid, i->i_ic.ic_gid);
|
|
if (i->i_ic.ic_mode == 0) {
|
|
printf("strange, perm of zero .. complete contents:\n");
|
|
|
|
printf("\tchain: %x %x\n", i->i_chain[0], i->i_chain[1]);
|
|
printf("\tvnode: %x devvp: %x\n", i->i_vnode, i->i_devvp);
|
|
printf("\tflag: %x dev: %d/%d number: %x\n", i->i_flag, major(i->i_dev), minor(i->i_dev), i->i_number);
|
|
printf("\tdiroff: %x fs: %x\n", i->i_diroff, i->i_fs);
|
|
printf("\tdquot: %x owner: %x count: %x\n", i->i_dquot, i->i_owner, i->i_count);
|
|
printf("\tfreef: %x freeb: %x\n", i->i_fr.if_freef, i->i_fr.if_freeb);
|
|
|
|
printf("\tmode: %x\tnlink: %x\n", i->i_ic.ic_mode, i->i_ic.ic_nlink);
|
|
printf("\tuid: %x\tgid: %x\n", i->i_ic.ic_uid, i->i_ic.ic_gid);
|
|
printf("\tsize: %x\n", i->i_ic.ic_size);
|
|
printf("\tatime: %x\tmtime: %x\tctime: %x\n", i->i_ic.ic_atime.tv_sec, i->i_ic.ic_mtime.tv_sec, i->i_ic.ic_ctime.tv_sec);
|
|
printf("\tflags: %x\tblocks: %x\tgen: %x\n", i->i_ic.ic_flags, i->i_ic.ic_blocks, i->i_ic.ic_gen);
|
|
zeroperms();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Look up an inode by device, inumber. If it is in core (in the
|
|
* inode structure), honor the locking protocol. If it is not in
|
|
* core, read it in from the specified device after freeing any pages.
|
|
* In all cases, a pointer to a locked inode structure is returned.
|
|
*/
|
|
iget(dev, fs, ino, ipp)
|
|
dev_t dev;
|
|
register struct fs *fs;
|
|
ino_t ino;
|
|
struct inode **ipp;
|
|
{
|
|
register struct inode *ip;
|
|
register union ihead *ih;
|
|
register struct buf *bp;
|
|
register struct dinode *dp;
|
|
register struct inode *iq;
|
|
struct mount *mp;
|
|
int inode_dnlc_purge();
|
|
|
|
/*
|
|
* Lookup inode in cache.
|
|
*/
|
|
loop:
|
|
mp = getmp(dev);
|
|
if (mp == NULL)
|
|
return (ENOENT);
|
|
if (mp->m_bufp->b_un.b_fs != fs)
|
|
return (ENOENT);
|
|
ih = &ihead[INOHASH(dev, ino)];
|
|
for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
|
|
if (ino == ip->i_number && dev == ip->i_dev) {
|
|
/*
|
|
* Found it - check for locks.
|
|
*/
|
|
if (((ip->i_flag & ILOCKED) != 0) &&
|
|
ip->i_owner != uniqpid()) {
|
|
ip->i_flag |= IWANT;
|
|
(void) sleep((caddr_t)ip, PINOD);
|
|
goto loop;
|
|
}
|
|
/*
|
|
* If inode is on free list, remove it.
|
|
*/
|
|
if ((ip->i_flag & IREF) == 0) {
|
|
if (iq = ip->i_freef)
|
|
iq->i_freeb = ip->i_freeb;
|
|
else
|
|
ifreet = ip->i_freeb;
|
|
*ip->i_freeb = iq;
|
|
ip->i_freef = NULL;
|
|
ip->i_freeb = NULL;
|
|
}
|
|
/*
|
|
* Lock the inode and mark it referenced and return it.
|
|
*/
|
|
ip->i_flag |= IREF;
|
|
ilock(ip);
|
|
VN_HOLD(ITOV(ip));
|
|
*ipp = ip;
|
|
ins.in_hits++;
|
|
trace6(TR_UFS_INSTATS, ip, ip->i_dev, ip->i_number,
|
|
TRC_INSTATS_HIT, ins.in_misses, ins.in_hits);
|
|
#if defined(DEBUG_INODES)
|
|
printinode("iget: cached", ip);
|
|
#endif
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Inode was not in cache.
|
|
*/
|
|
|
|
/*
|
|
* If over high-water mark, and no inodes available on freelist
|
|
* without attached pages, try to free one up from dnlc.
|
|
*/
|
|
if (ino_new >= ninode) {
|
|
while (ifreeh == NULL || ITOV(ifreeh)->v_pages) {
|
|
if (dnlc_iter(inode_dnlc_purge, 0) == NULL)
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If there's a free one available and it has no pages attached
|
|
* take it. If we're over the high water mark, take it even if
|
|
* it has attached pages. Otherwise, make a new one.
|
|
*/
|
|
if (ifreeh && (ITOV(ifreeh)->v_pages == NULL || ino_new >= ninode)) {
|
|
ip = ifreeh;
|
|
if (iq = ip->i_freef)
|
|
iq->i_freeb = &ifreeh;
|
|
ifreeh = iq;
|
|
ip->i_freef = NULL;
|
|
ip->i_freeb = NULL;
|
|
/*
|
|
* When the inode was put on the free list in iinactive,
|
|
* we did an async syncip() there. Here we call syncip()
|
|
* to synchronously wait for any pages that are still
|
|
* in transit and to invalidate all the pages on the vp
|
|
* and finally to write back the inode to disk.
|
|
*/
|
|
if (ip->i_flag & IFASTSYMLNK) {
|
|
/* clean up the symbolic link cache content first */
|
|
int i;
|
|
for (i = 1; i < NDADDR && ip->i_db[i]; i++)
|
|
ip->i_db[i] = 0;
|
|
for (i = 0; i < NIADDR && ip->i_ib[i]; i++)
|
|
ip->i_ib[i]=0;
|
|
}
|
|
ip->i_flag = (ip->i_flag & (IMODTIME|ILOCKED|IWANT)) | IREF;
|
|
VN_HOLD(ITOV(ip)); /* bug 1102884 */
|
|
ILOCK(ip); /* bug 1102884 */
|
|
#ifdef TRACE
|
|
trace_vn_reuse(ITOV(ip));
|
|
#endif TRACE
|
|
if (ITOV(ip)->v_pages == NULL) {
|
|
if (ino_free_at_front > 0)
|
|
ino_free_at_front--;
|
|
}
|
|
if ((syncip(ip, B_INVAL, 1) != 0) ||
|
|
((ip->i_flag & IWANT) != 0) ||
|
|
(ITOV(ip)->v_count != 1)) {
|
|
idrop(ip);
|
|
goto loop;
|
|
}
|
|
ip->i_flag &= ~IMODTIME;
|
|
} else {
|
|
ip = (struct inode *)new_kmem_fast_zalloc((caddr_t *)
|
|
&in_free, sizeof(*in_free), (int)nin_incr, KMEM_SLEEP);
|
|
ip->i_forw = ip;
|
|
ip->i_back = ip;
|
|
ip->i_vnode.v_data = (caddr_t)ip;
|
|
ip->i_vnode.v_op = &ufs_vnodeops;
|
|
ip->i_flag = IREF;
|
|
VN_HOLD(ITOV(ip));
|
|
ILOCK(ip);
|
|
ins.in_malloc++;
|
|
ino_new++;
|
|
ins.in_maxsize = MAX(ins.in_maxsize, ino_new);
|
|
}
|
|
|
|
/*
|
|
* We have to check the inode table again to make sure no
|
|
* inode with the same (dev, ino) has been created since last
|
|
* time we checked the inode table. This case can happen
|
|
* because there are a few places where we can go to sleep.
|
|
* If another process comes by also trying to iget the same
|
|
* (dev, ino) while we are sleeping, failing to find one, it
|
|
* will create an in-core inode with the same (dev, ino). Then
|
|
* we'll end up with two in-core inodes representing the same
|
|
* disk inode. This bug has caused a lot of panics due to file
|
|
* system corruption of various kinds.
|
|
*/
|
|
for (iq = ih->ih_chain[0]; iq != (struct inode *)ih; iq = iq->i_forw) {
|
|
if (ino == iq->i_number && dev == iq->i_dev) {
|
|
idrop(ip);
|
|
goto loop;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Move the inode on the chain for its new (ino, dev) pair
|
|
*/
|
|
remque(ip);
|
|
insque(ip, ih);
|
|
ip->i_dev = dev;
|
|
ip->i_devvp = mp->m_devvp;
|
|
ip->i_number = ino;
|
|
trace3(TR_MP_INODE, ITOV(ip), dev, ino);
|
|
ip->i_diroff = 0;
|
|
ip->i_fs = fs;
|
|
ip->i_nextr = 0;
|
|
#ifdef QUOTA
|
|
dqrele(ip->i_dquot);
|
|
ip->i_dquot = NULL;
|
|
#endif
|
|
bp = bread(ip->i_devvp, (daddr_t)fsbtodb(fs, itod(fs, ino)),
|
|
(int)fs->fs_bsize);
|
|
/*
|
|
* Check I/O errors
|
|
*/
|
|
if ((bp->b_flags & B_ERROR) != 0) {
|
|
brelse(bp);
|
|
/*
|
|
* The inode doesn't contain anything useful, so it
|
|
* would be misleading to leave it on its hash chain.
|
|
*/
|
|
remque(ip);
|
|
ip->i_forw = ip;
|
|
ip->i_back = ip;
|
|
/*
|
|
* We also loose its inumber, just in case (as iput
|
|
* doesn't do that any more) - but as it isn't on its
|
|
* hash chain, I doubt if this is really necessary .. kre
|
|
* (probably the two methods are interchangable)
|
|
*/
|
|
ip->i_number = 0;
|
|
ITOV(ip)->v_count = 0;
|
|
iunlock(ip);
|
|
ip->i_flag = 0;
|
|
/*
|
|
* Put the inode on the end of the free list.
|
|
* Maybe we should put it on the beginning of
|
|
* the free list.
|
|
*/
|
|
if (ifreeh) {
|
|
*ifreet = ip;
|
|
ip->i_freeb = ifreet;
|
|
} else {
|
|
ifreeh = ip;
|
|
ip->i_freeb = &ifreeh;
|
|
}
|
|
ip->i_freef = NULL;
|
|
ifreet = &ip->i_freef; /* bug 1102884 */
|
|
|
|
#if defined(DEBUG_INODES)
|
|
printf("iget: I/O error reading inode %d/%d:%d\n",
|
|
major(dev), minor(dev), ino);
|
|
#endif
|
|
return (EIO);
|
|
}
|
|
dp = bp->b_un.b_dino;
|
|
dp += itoo(fs, ino);
|
|
ip->i_ic = dp->di_ic; /* structure assignment */
|
|
/*
|
|
* These are unneeded when we go to the next major release and
|
|
* get these fields out of icommon.
|
|
*/
|
|
ip->i_ic.ic_delayoff = 0; /* XXX */
|
|
ip->i_ic.ic_delaylen = 0; /* XXX */
|
|
ip->i_ic.ic_nextrio = 0; /* XXX */
|
|
ip->i_ic.ic_writes = 0; /* XXX */
|
|
VN_INIT(ITOV(ip), mp->m_vfsp, IFTOVT(ip->i_mode), ip->i_rdev);
|
|
if (ino == (ino_t)ROOTINO) {
|
|
ITOV(ip)->v_flag |= VROOT;
|
|
}
|
|
brelse(bp);
|
|
#ifdef QUOTA
|
|
if (ip->i_mode != 0)
|
|
ip->i_dquot = getinoquota(ip);
|
|
#endif
|
|
*ipp = ip;
|
|
ins.in_misses++;
|
|
trace6(TR_UFS_INSTATS, ip, ip->i_dev, ip->i_number, TRC_INSTATS_MISS,
|
|
ins.in_misses, ins.in_hits);
|
|
#if defined(DEBUG_INODES)
|
|
printinode("iget: read", ip);
|
|
#endif
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Unlock inode and vrele associated vnode
|
|
*/
|
|
iput(ip)
|
|
register struct inode *ip;
|
|
{
|
|
|
|
if ((ip->i_flag & ILOCKED) == 0)
|
|
panic("iput");
|
|
iunlock(ip);
|
|
ITIMES(ip);
|
|
VN_RELE(ITOV(ip));
|
|
}
|
|
|
|
/*
|
|
* Check that inode is not locked and release associated vnode.
|
|
*/
|
|
irele(ip)
|
|
register struct inode *ip;
|
|
{
|
|
|
|
if (ip->i_flag & ILOCKED)
|
|
panic("irele");
|
|
ITIMES(ip);
|
|
VN_RELE(ITOV(ip));
|
|
}
|
|
|
|
/*
|
|
* Drop inode without going through the normal
|
|
* chain of unlocking and releasing.
|
|
*/
|
|
idrop(ip)
|
|
register struct inode *ip;
|
|
{
|
|
register struct vnode *vp = &ip->i_vnode;
|
|
|
|
if ((ip->i_flag & ILOCKED) == 0)
|
|
panic("idrop");
|
|
iunlock(ip);
|
|
if (--vp->v_count == 0) {
|
|
if (ip->i_freeb || ip->i_freef)
|
|
panic("idrop: inactivating inactive inode");
|
|
/* retain the fast symlnk flag and mtime-okay flag */
|
|
ip->i_flag &= (IFASTSYMLNK|IMODTIME);
|
|
/*
|
|
* Put the inode back on the end of the free list.
|
|
*/
|
|
if (ifreeh) {
|
|
*ifreet = ip;
|
|
ip->i_freeb = ifreet;
|
|
} else {
|
|
ifreeh = ip;
|
|
ip->i_freeb = &ifreeh;
|
|
}
|
|
ip->i_freef = NULL;
|
|
ifreet = &ip->i_freef;
|
|
}
|
|
}
|
|
|
|
static int i_genrand = 1234;
|
|
/*
|
|
* Vnode is no longer referenced, write the inode out
|
|
* and if necessary, truncate and deallocate the file.
|
|
*/
|
|
iinactive(ip)
|
|
register struct inode *ip;
|
|
{
|
|
int mode;
|
|
|
|
if ((ip->i_flag & (IREF|ILOCKED)) != IREF || ip->i_freeb || ip->i_freef)
|
|
panic("iinactive");
|
|
if (ip->i_fs && ip->i_mode && ip->i_fs->fs_ronly == 0) {
|
|
ilock(ip);
|
|
if (ip->i_nlink <= 0) {
|
|
if (ULOCKFS_IS_NOIDEL(ITOU(ip))) {
|
|
iunlock(ip);
|
|
return;
|
|
}
|
|
i_genrand *= 16843009; /* turns into shift and adds */
|
|
i_genrand++;
|
|
ip->i_gen += ((i_genrand + lbolt) & 0xffff) + 1;
|
|
(void) itrunc(ip, (u_long)0);
|
|
mode = ip->i_mode;
|
|
ip->i_mode = 0;
|
|
ip->i_rdev = 0;
|
|
ip->i_flag |= IUPD|ICHG;
|
|
ifree(ip, ip->i_number, mode);
|
|
#ifdef QUOTA
|
|
(void) chkiq(VFSTOM(ip->i_vnode.v_vfsp),
|
|
ip, (int)ip->i_uid, 0);
|
|
dqrele(ip->i_dquot);
|
|
ip->i_dquot = NULL;
|
|
#endif
|
|
IUPDAT(ip, 0)
|
|
} else if (!IS_SWAPVP(ITOV(ip))) {
|
|
/*
|
|
* Do an async write (B_ASYNC) of the pages
|
|
* on the vnode and put the pages on the free
|
|
* list when we are done (B_FREE). This action
|
|
* will cause all the pages to be written back
|
|
* for the file now and will allow update() to
|
|
* skip over inodes that are on the free list.
|
|
*
|
|
* NOTE: The pages associated with this vnode are
|
|
* freed only if the system is low on memory.
|
|
*/
|
|
if (freemem < lotsfree + pages_before_pager)
|
|
(void) syncip(ip, B_FREE | B_ASYNC, 0);
|
|
else {
|
|
IUPDAT(ip, 0);
|
|
}
|
|
|
|
} else {
|
|
IUPDAT(ip, 0);
|
|
}
|
|
iunlock(ip);
|
|
}
|
|
if (ip->i_freeb || ip->i_freef)
|
|
panic("iinactive: inactivating inactive inode");
|
|
|
|
/* retain the fast symlnk flag */
|
|
ip->i_flag &= (IFASTSYMLNK|IMODTIME);
|
|
ITOV(ip)->v_op = &ufs_vnodeops;
|
|
/*
|
|
* If the inode has associated pages put it on the back of the
|
|
* free list. If it has none, put it on the front.
|
|
* Also, fast symbolic inode always put at end of free list
|
|
*/
|
|
if (ITOV(ip)->v_pages || ip->i_flag & IFASTSYMLNK) {
|
|
if (ifreeh) {
|
|
*ifreet = ip;
|
|
ip->i_freeb = ifreet;
|
|
} else {
|
|
ifreeh = ip;
|
|
ip->i_freeb = &ifreeh;
|
|
}
|
|
ip->i_freef = NULL;
|
|
ifreet = &ip->i_freef;
|
|
ins.in_frback++;
|
|
} else {
|
|
if (ifreeh) {
|
|
ip->i_freef = ifreeh;
|
|
ifreeh->i_freeb = &ip->i_freef;
|
|
} else {
|
|
ip->i_freef = NULL;
|
|
ifreet = &ip->i_freef;
|
|
}
|
|
ifreeh = ip;
|
|
ip->i_freeb = &ifreeh;
|
|
ino_free_at_front++;
|
|
ins.in_frfront++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check accessed and update flags on an inode structure.
|
|
* If any is on, update the inode with the (unique) current time.
|
|
* If waitfor is given insure i/o order so wait for write to complete.
|
|
*/
|
|
iupdat(ip, waitfor)
|
|
register struct inode *ip;
|
|
int waitfor;
|
|
{
|
|
register struct buf *bp;
|
|
register struct fs *fp;
|
|
struct dinode *dp;
|
|
int fastsymflag; /* fast symbolic link is active */
|
|
long fastsymlnk[FSL_SIZE / sizeof (long)]; /* for sun4 alignment */
|
|
|
|
fp = ip->i_fs;
|
|
if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC)) != 0) {
|
|
if (fp == NULL || fp->fs_ronly)
|
|
return;
|
|
ufs_notclean(ITOM(ip), fp);
|
|
fastsymflag = (ip->i_flag & IFASTSYMLNK);
|
|
if (fastsymflag) {
|
|
/* save fast sym link */
|
|
int i;
|
|
|
|
(void) bcopy((caddr_t) &ip->i_db[1],
|
|
(caddr_t) fastsymlnk, (u_int)ip->i_size);
|
|
ip->i_flag &= ~IFASTSYMLNK;
|
|
for (i = 1; i < NDADDR && ip->i_db[i]; i++)
|
|
ip->i_db[i] = 0;
|
|
for (i = 0; i < NIADDR && ip->i_ib[i]; i++)
|
|
ip->i_ib[i] = 0;
|
|
}
|
|
bp = bread(ip->i_devvp,
|
|
(daddr_t)fsbtodb(fp, itod(fp, ip->i_number)),
|
|
(int)fp->fs_bsize);
|
|
if (bp->b_flags & B_ERROR) {
|
|
brelse(bp);
|
|
return;
|
|
}
|
|
if (ip->i_flag & (IUPD|IACC|ICHG))
|
|
IMARK(ip);
|
|
ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC);
|
|
dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
|
|
dp->di_ic = ip->i_ic; /* structure assignment */
|
|
/*
|
|
* These are unneeded when we go to the next major release and
|
|
* get these fields out of icoomon.
|
|
*/
|
|
dp->di_un.di_icom.ic_delayoff = 0; /* XXX */
|
|
dp->di_un.di_icom.ic_delaylen = 0; /* XXX */
|
|
dp->di_un.di_icom.ic_nextrio = 0; /* XXX */
|
|
dp->di_un.di_icom.ic_writes = 0; /* XXX */
|
|
if (waitfor && ((ITOM(ip)->m_dio & MDIO_ON) == 0))
|
|
bwrite(bp);
|
|
else
|
|
bdwrite(bp);
|
|
|
|
/* restore inode */
|
|
if (fastsymflag) {
|
|
bcopy((caddr_t) fastsymlnk, (caddr_t) &ip->i_db[1],
|
|
(u_int)ip->i_size);
|
|
ip->i_flag |= IFASTSYMLNK;
|
|
}
|
|
}
|
|
}
|
|
|
|
#define SINGLE 0 /* index of single indirect block */
|
|
#define DOUBLE 1 /* index of double indirect block */
|
|
#define TRIPLE 2 /* index of triple indirect block */
|
|
|
|
/*
|
|
* Release blocks associated with the inode ip and
|
|
* stored in the indirect block bn. Blocks are free'd
|
|
* in LIFO order up to (but not including) lastbn. If
|
|
* level is greater than SINGLE, the block is an indirect
|
|
* block and recursive calls to indirtrunc must be used to
|
|
* cleanse other indirect blocks.
|
|
*
|
|
* N.B.: triple indirect blocks are untested.
|
|
*/
|
|
static long
|
|
indirtrunc(ip, bn, lastbn, level)
|
|
register struct inode *ip;
|
|
daddr_t bn, lastbn;
|
|
int level;
|
|
{
|
|
register int i;
|
|
struct buf *bp, *copy;
|
|
register daddr_t *bap;
|
|
register struct fs *fs = ip->i_fs;
|
|
daddr_t nb, last;
|
|
long factor;
|
|
int blocksreleased = 0, nblocks;
|
|
|
|
/*
|
|
* Calculate index in current block of last
|
|
* block to be kept. -1 indicates the entire
|
|
* block so we need not calculate the index.
|
|
*/
|
|
factor = 1;
|
|
for (i = SINGLE; i < level; i++)
|
|
factor *= NINDIR(fs);
|
|
last = lastbn;
|
|
if (lastbn > 0)
|
|
last /= factor;
|
|
nblocks = btodb(fs->fs_bsize);
|
|
/*
|
|
* Get buffer of block pointers, zero those
|
|
* entries corresponding to blocks to be free'd,
|
|
* and update on disk copy first.
|
|
*/
|
|
copy = geteblk((int)fs->fs_bsize);
|
|
bp = bread(ip->i_devvp, (daddr_t)fsbtodb(fs, bn), (int)fs->fs_bsize);
|
|
if (bp->b_flags & B_ERROR) {
|
|
brelse(copy);
|
|
brelse(bp);
|
|
return (0);
|
|
}
|
|
bap = bp->b_un.b_daddr;
|
|
bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
|
|
bzero((caddr_t)&bap[last + 1],
|
|
(u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
|
|
bwrite(bp);
|
|
bp = copy, bap = bp->b_un.b_daddr;
|
|
|
|
/*
|
|
* Recursively free totally unused blocks.
|
|
*/
|
|
for (i = NINDIR(fs) - 1; i > last; i--) {
|
|
nb = bap[i];
|
|
if (nb == 0)
|
|
continue;
|
|
if (level > SINGLE)
|
|
blocksreleased +=
|
|
indirtrunc(ip, nb, (daddr_t)-1, level - 1);
|
|
free(ip, nb, (off_t)fs->fs_bsize);
|
|
blocksreleased += nblocks;
|
|
}
|
|
|
|
/*
|
|
* Recursively free last partial block.
|
|
*/
|
|
if (level > SINGLE && lastbn >= 0) {
|
|
last = lastbn % factor;
|
|
nb = bap[i];
|
|
if (nb != 0)
|
|
blocksreleased += indirtrunc(ip, nb, last, level - 1);
|
|
}
|
|
brelse(bp);
|
|
return (blocksreleased);
|
|
}
|
|
|
|
/*
|
|
* Truncate the inode ip to at most length size.
|
|
* Free affected disk blocks -- the blocks of the
|
|
* file are removed in reverse order.
|
|
*
|
|
* N.B.: triple indirect blocks are untested.
|
|
*/
|
|
itrunc(oip, length)
|
|
register struct inode *oip;
|
|
u_long length;
|
|
{
|
|
register struct fs *fs = oip->i_fs;
|
|
register struct inode *ip;
|
|
register daddr_t lastblock;
|
|
register off_t bsize;
|
|
register int offset;
|
|
daddr_t bn, lastiblock[NIADDR];
|
|
int level;
|
|
long nblocks, blocksreleased = 0;
|
|
register int i;
|
|
struct inode tip;
|
|
daddr_t llbn;
|
|
|
|
/*
|
|
* We only allow truncation of regular files and directories
|
|
* to arbritary lengths here. In addition, we allow symbolic
|
|
* links to be truncated only to zero length. Other inode
|
|
* types cannot have their length set here since disk blocks
|
|
* are being dealt with - especially device inodes where
|
|
* ip->i_rdev is actually being stored in ip->i_db[0]!
|
|
*/
|
|
i = oip->i_mode & IFMT;
|
|
if (i != IFREG && i != IFDIR && i != IFLNK)
|
|
return (0);
|
|
else if (i == IFLNK && length != 0)
|
|
return (EINVAL);
|
|
|
|
/*
|
|
* POSIX requires modification time be updated on truncation
|
|
*/
|
|
/*
|
|
** Certain vendors NFS clients either allows file sizes over 2G
|
|
** or they are buggy and just don't check for bogus offsets so
|
|
** we need to check for that here.
|
|
*/
|
|
if ((long)length < 0)
|
|
return (EFBIG);
|
|
|
|
if (length == oip->i_size) {
|
|
oip->i_flag |= ICHG|IUPD;
|
|
return (0);
|
|
}
|
|
|
|
if (oip->i_flag & IFASTSYMLNK) {
|
|
int j;
|
|
oip->i_flag &= ~IFASTSYMLNK;
|
|
for (j = 1; j < NDADDR && oip->i_db[j]; j++)
|
|
oip->i_db[j] = 0;
|
|
for (j = 0; j < NIADDR && oip->i_ib[j]; j++)
|
|
oip->i_ib[j] = 0;
|
|
}
|
|
|
|
offset = blkoff(fs, length);
|
|
llbn = lblkno(fs, length - 1);
|
|
if (length > oip->i_size) {
|
|
int err;
|
|
|
|
/*
|
|
* Trunc up case. We need to call bmap_write() because of
|
|
* frags since the number of frags is calculated from the size.
|
|
*/
|
|
if (offset == 0)
|
|
err = BMAPALLOC(oip, llbn, (int)fs->fs_bsize);
|
|
else
|
|
err = BMAPALLOC(oip, llbn, offset);
|
|
if (err == 0) {
|
|
oip->i_size = length;
|
|
oip->i_flag |= ICHG;
|
|
ITIMES(oip);
|
|
}
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Forget about delayed pages that will be past the end of the file.
|
|
*/
|
|
if (oip->i_delaylen && oip->i_delayoff + oip->i_delaylen > length) {
|
|
oip->i_delaylen -= oip->i_delayoff + oip->i_delaylen - length;
|
|
if (oip->i_delaylen <= 0)
|
|
oip->i_delaylen = oip->i_delayoff = 0;
|
|
}
|
|
|
|
/*
|
|
* Update the pages of the file. If the file is not being
|
|
* truncated to a block boundary, the contents of the
|
|
* pages following the end of the file must be zero'ed
|
|
* in case it ever become accessable again because
|
|
* of subsequent file growth.
|
|
*/
|
|
trace3(TR_MP_TRUNC, ITOV(oip), length, oip->i_size);
|
|
if (offset == 0) {
|
|
pvn_vptrunc(ITOV(oip), (u_int)length, (u_int)0);
|
|
} else {
|
|
int err;
|
|
|
|
/*
|
|
* Trunc down case.
|
|
* Make sure that the last block is properly allocated.
|
|
* We only really have to do this if the last block is
|
|
* actually allocated since bmap will now handle the case
|
|
* of a fragment which is has no block allocated. Just to
|
|
* be sure, we do it now independent of current allocation.
|
|
*/
|
|
if (err = BMAPALLOC(oip, llbn, offset))
|
|
return (err);
|
|
|
|
/*
|
|
* Caclulate how much to zero past EOF.
|
|
*/
|
|
bsize = llbn >= NDADDR? fs->fs_bsize : fragroundup(fs, offset);
|
|
pvn_vptrunc(ITOV(oip), (u_int)length, (u_int)(bsize - offset));
|
|
}
|
|
|
|
/*
|
|
* Calculate index into inode's block list of
|
|
* last direct and indirect blocks (if any)
|
|
* which we want to keep. Lastblock is -1 when
|
|
* the file is truncated to 0.
|
|
*/
|
|
lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
|
|
lastiblock[SINGLE] = lastblock - NDADDR;
|
|
lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
|
|
lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
|
|
nblocks = btodb(fs->fs_bsize);
|
|
|
|
/*
|
|
* Update file and block pointers
|
|
* on disk before we start freeing blocks.
|
|
* If we crash before free'ing blocks below,
|
|
* the blocks will be returned to the free list.
|
|
* lastiblock values are also normalized to -1
|
|
* for calls to indirtrunc below.
|
|
*/
|
|
tip = *oip; /* structure copy */
|
|
ip = &tip;
|
|
|
|
for (level = TRIPLE; level >= SINGLE; level--)
|
|
if (lastiblock[level] < 0) {
|
|
oip->i_ib[level] = 0;
|
|
lastiblock[level] = -1;
|
|
}
|
|
for (i = NDADDR - 1; i > lastblock; i--)
|
|
oip->i_db[i] = 0;
|
|
|
|
oip->i_size = length;
|
|
oip->i_flag |= ICHG|IUPD;
|
|
iupdat(oip, 1); /* do sync inode update */
|
|
|
|
/*
|
|
* Indirect blocks first.
|
|
*/
|
|
for (level = TRIPLE; level >= SINGLE; level--) {
|
|
bn = ip->i_ib[level];
|
|
if (bn != 0) {
|
|
blocksreleased +=
|
|
indirtrunc(ip, bn, lastiblock[level], level);
|
|
if (lastiblock[level] < 0) {
|
|
ip->i_ib[level] = 0;
|
|
free(ip, bn, (off_t)fs->fs_bsize);
|
|
blocksreleased += nblocks;
|
|
}
|
|
}
|
|
if (lastiblock[level] >= 0)
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* All whole direct blocks or frags.
|
|
*/
|
|
for (i = NDADDR - 1; i > lastblock; i--) {
|
|
bn = ip->i_db[i];
|
|
if (bn == 0)
|
|
continue;
|
|
ip->i_db[i] = 0;
|
|
bsize = (off_t)blksize(fs, ip, i);
|
|
free(ip, bn, bsize);
|
|
blocksreleased += btodb(bsize);
|
|
}
|
|
if (lastblock < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* Finally, look for a change in size of the
|
|
* last direct block; release any frags.
|
|
*/
|
|
bn = ip->i_db[lastblock];
|
|
if (bn != 0) {
|
|
off_t oldspace, newspace;
|
|
|
|
/*
|
|
* Calculate amount of space we're giving
|
|
* back as old block size minus new block size.
|
|
*/
|
|
oldspace = blksize(fs, ip, lastblock);
|
|
ip->i_size = length;
|
|
newspace = blksize(fs, ip, lastblock);
|
|
if (newspace == 0)
|
|
panic("itrunc: newspace");
|
|
if (oldspace - newspace > 0) {
|
|
/*
|
|
* Block number of space to be free'd is
|
|
* the old block # plus the number of frags
|
|
* required for the storage we're keeping.
|
|
*/
|
|
bn += numfrags(fs, newspace);
|
|
free(ip, bn, oldspace - newspace);
|
|
blocksreleased += btodb(oldspace - newspace);
|
|
}
|
|
}
|
|
done:
|
|
/* BEGIN PARANOIA */
|
|
for (level = SINGLE; level <= TRIPLE; level++)
|
|
if (ip->i_ib[level] != oip->i_ib[level])
|
|
panic("itrunc1");
|
|
for (i = 0; i < NDADDR; i++)
|
|
if (ip->i_db[i] != oip->i_db[i])
|
|
panic("itrunc2");
|
|
/* END PARANOIA */
|
|
oip->i_blocks -= blocksreleased;
|
|
if (length == 0 && oip->i_blocks != 0) { /* sanity */
|
|
printf("itrunc: %s/%d new size = %d, blocks = %d\n",
|
|
fs->fs_fsmnt, oip->i_number, oip->i_size, oip->i_blocks);
|
|
oip->i_blocks = 0;
|
|
}
|
|
oip->i_flag |= ICHG;
|
|
#ifdef QUOTA
|
|
(void) chkdq(oip, -blocksreleased, 0);
|
|
#endif
|
|
return (0);
|
|
}
|
|
|
|
|
|
/*
|
|
* Check mode permission on inode. Mode is READ, WRITE or EXEC.
|
|
* In the case of WRITE, the read-only status of the file system
|
|
* is checked. The mode is shifted to select the owner/group/other
|
|
* fields. The super user is granted all permissions except
|
|
* writing to read-only file systems.
|
|
*/
|
|
iaccess(ip, m)
|
|
register struct inode *ip;
|
|
register int m;
|
|
{
|
|
register int *gp;
|
|
|
|
if (m & IWRITE) {
|
|
/*
|
|
* Disallow write attempts on read-only
|
|
* file systems; unless the file is a block
|
|
* or character device resident on the
|
|
* file system, or a fifo.
|
|
*/
|
|
if (ip->i_fs->fs_ronly != 0) {
|
|
if ((ip->i_mode & IFMT) != IFCHR &&
|
|
(ip->i_mode & IFMT) != IFBLK &&
|
|
(ip->i_mode & IFMT) != IFIFO) {
|
|
return (EROFS);
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* If you're the super-user,
|
|
* you always get access.
|
|
*/
|
|
if (u.u_uid == 0)
|
|
return (0);
|
|
/*
|
|
* Access check is based on only
|
|
* one of owner, group, public.
|
|
* If not owner, then check group.
|
|
* If not a member of the group, then
|
|
* check public access.
|
|
*/
|
|
if (u.u_uid != ip->i_uid) {
|
|
m >>= 3;
|
|
if (u.u_gid == ip->i_gid)
|
|
goto found;
|
|
gp = u.u_groups;
|
|
for (; gp < &u.u_groups[NGROUPS] && *gp != NOGROUP; gp++)
|
|
if (ip->i_gid == *gp)
|
|
goto found;
|
|
m >>= 3;
|
|
}
|
|
found:
|
|
if ((ip->i_mode & m) == m)
|
|
return (0);
|
|
return (EACCES);
|
|
}
|
|
|
|
ilock(ip)
|
|
register struct inode *ip;
|
|
{
|
|
|
|
ILOCK(ip);
|
|
}
|
|
|
|
iunlock(ip)
|
|
register struct inode *ip;
|
|
{
|
|
|
|
IUNLOCK(ip);
|
|
}
|
|
|
|
/*
|
|
* Check a dnlc entry to see if it's suitable for purging from the
|
|
* dnlc and if so purge it. Suitable is: must be type ufs,
|
|
* and if pages_ok == 0, have no associated pages. Returns 1 if
|
|
* purged an entry, else 0. This function is intended to be called by
|
|
* the dnlc_iter() function to search the cache and purge a qualified
|
|
* entry.
|
|
*/
|
|
static int
|
|
inode_dnlc_purge(ncp, pages_ok)
|
|
register struct ncache *ncp;
|
|
register int pages_ok;
|
|
{
|
|
register struct vnode *vp = dnlc_vp(ncp);
|
|
|
|
ins.in_dnlclook++;
|
|
|
|
if ((vp->v_op == &ufs_vnodeops) &&
|
|
(pages_ok || (!pages_ok && !vp->v_pages))) {
|
|
ins.in_dnlcpurge++;
|
|
dnlc_rm(ncp);
|
|
return (1);
|
|
} else
|
|
return (0);
|
|
}
|
|
/*
|
|
* Remove any inodes in the inode cache belonging to dev
|
|
*
|
|
* If not forced unmount:
|
|
* There should not be any active ones, return error if any are
|
|
* found but still invalidate others (N.B.: this is a user error,
|
|
* not a system error).
|
|
* If forced unmount
|
|
* unhash relevant inodes, and NULL ip->i_fs and vp->v_vfsp because
|
|
* fs and vfs will be freed even while the inode remains.
|
|
* vp->v_ops is set to ufs_forcedops, which returns EIO to every
|
|
* access except close and inactive.
|
|
*/
|
|
|
|
extern struct vnodeops ufs_forcedops;
|
|
extern struct vfsops ufs_vfsops;
|
|
|
|
struct vfs forcedvfs;
|
|
|
|
iflush(dev, forced, iq)
|
|
dev_t dev;
|
|
int forced;
|
|
struct inode *iq;
|
|
{
|
|
struct inode *ip;
|
|
union ihead *ih;
|
|
|
|
loop:
|
|
for (ih = ihead; ih < &ihead[INOHSZ]; ih++) {
|
|
for (ip = ih->ih_chain[0];
|
|
ip != (struct inode *)ih; ip = ip->i_forw) {
|
|
|
|
if (ip->i_dev != dev)
|
|
continue;
|
|
/*
|
|
* quota inode will be handled in a later iflush call
|
|
*/
|
|
if (ip == iq)
|
|
continue;
|
|
/*
|
|
* stop the flush if an inode is referenced
|
|
*/
|
|
if ((!forced) && (ip->i_flag & IREF))
|
|
return (1);
|
|
/*
|
|
* flush the inode
|
|
*/
|
|
if (iget(ip->i_dev, ip->i_fs, ip->i_number, &ip))
|
|
return (1);
|
|
(void) syncip(ip, B_INVAL, 1);
|
|
|
|
/*
|
|
* unhash
|
|
*/
|
|
remque(ip);
|
|
ip->i_forw = ip;
|
|
ip->i_back = ip;
|
|
#ifdef QUOTA
|
|
dqrele(ip->i_dquot);
|
|
ip->i_dquot = NULL;
|
|
#endif
|
|
/*
|
|
* if forced:
|
|
* new vnodeops vector (returns EIO)
|
|
* new vfs struct (prevent data fault panics)
|
|
* NULL i_fs
|
|
* call iinactive on hidden, deleted files
|
|
*/
|
|
if (forced) {
|
|
forcedvfs.vfs_flag = VFS_RDONLY;
|
|
forcedvfs.vfs_op = &ufs_vfsops;
|
|
ITOV(ip)->v_op = &ufs_forcedops;
|
|
ITOV(ip)->v_vfsp = &forcedvfs;
|
|
ITOV(ip)->v_flag &= ~VROOT;
|
|
ip->i_fs = NULL;
|
|
}
|
|
iput(ip);
|
|
goto loop;
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
#ifdef TRACE
|
|
/*
|
|
* Dump the contents of the inode cache.
|
|
*/
|
|
trace_inode()
|
|
{
|
|
register struct inode *ip;
|
|
register union ihead *ih;
|
|
|
|
/*
|
|
* Purge the cache so we can find out who's REALLY in use
|
|
*/
|
|
dnlc_purge();
|
|
|
|
for (ih = ihead; ih < &ihead[INOHSZ]; ih++) {
|
|
for (ip = ih->ih_chain[0];
|
|
ip != (struct inode *)ih; ip = ip->i_forw) {
|
|
trace6(TR_UFS_INODE, ip,
|
|
ip->i_flag, ip->i_dev, ip->i_number,
|
|
(ip->i_mode << 16) | (ip->i_rdev & 0xffff), 0);
|
|
}
|
|
}
|
|
trace6(TR_UFS_INODE, 0, 0, 0, 0, 0, 1); /* Signals last entry */
|
|
}
|
|
|
|
/*
|
|
* Dump the inode statistics counters.
|
|
*/
|
|
trace_instats()
|
|
{
|
|
trace6(TR_UFS_INSTATS, 0, 0, 0, 0, ins.in_misses, ins.in_hits);
|
|
}
|
|
|
|
/*
|
|
* Reset the inode statistics counters.
|
|
*/
|
|
trace_instats_reset()
|
|
{
|
|
bzero((caddr_t) &ins, sizeof (ins));
|
|
}
|
|
#endif TRACE
|