File:  [DragonFly] / src / sys / vfs / nullfs / Attic / null_subr.c
Revision 1.10: download - view: text, annotated - select for diffs
Wed May 19 22:53:05 2004 UTC (10 years, 6 months ago) by dillon
Branches: MAIN
CVS tags: HEAD, DragonFly_1_0_REL, DragonFly_1_0_RC1, DragonFly_1_0A_REL
Device layer rollup commit.

* cdevsw_add() is now required.  cdevsw_add() and cdevsw_remove() may specify
  a mask/match indicating the range of supported minor numbers.  Multiple
  cdevsw_add()'s using the same major number, but distinctly different
  ranges, may be issued.  All devices that failed to call cdevsw_add() before
  now do.

* cdevsw_remove() now automatically marks all devices within its supported
  range as being destroyed.

* vnode->v_rdev is no longer resolved when the vnode is created.  Instead,
  only v_udev (a newly added field) is resolved.  v_rdev is resolved when
  the vnode is opened and cleared on the last close.

* A great deal of code was making rather dubious assumptions with regards
  to the validity of devices associated with vnodes, primarily due to
  the persistence of a device structure due to being indexed by (major, minor)
  instead of by (cdevsw, major, minor).  In particular, if you run a program
  which connects to a USB device and then you pull the USB device and plug
  it back in, the vnode subsystem will continue to believe that the device
  is open when, in fact, it isn't (because it was destroyed and recreated).

  In particular, note that all the VFS mount procedures now check devices
  via v_udev instead of v_rdev prior to calling VOP_OPEN(), since v_rdev
  is NULL prior to the first open.

* The disk layer's device interaction has been rewritten.  The disk layer
  (i.e. the slice and disklabel management layer) no longer overloads
  its data onto the device structure representing the underlying physical
  disk.  Instead, the disk layer uses the new cdevsw_add() functionality
  to register its own cdevsw using the underlying device's major number,
  and simply does NOT register the underlying device's cdevsw.  No
  confusion is created because the device hash is now based on
  (cdevsw,major,minor) rather then (major,minor).

  NOTE: This also means that underlying raw disk devices may use the entire
  device minor number instead of having to reserve the bits used by the disk
  layer, and also means that can we (theoretically) stack a fully
  disklabel-supported 'disk' on top of any block device.

* The new reference counting scheme prevents this by associating a device
  with a cdevsw and disconnecting the device from its cdevsw when the cdevsw
  is removed.  Additionally, all udev2dev() lookups run through the cdevsw
  mask/match and only successfully find devices still associated with an
  active cdevsw.

* Major work on MFS:  MFS no longer shortcuts vnode and device creation.  It
  now creates a real vnode and a real device and implements real open and
  close VOPs.  Additionally, due to the disk layer changes, MFS is no longer
  limited to 255 mounts.  The new limit is 16 million.  Since MFS creates a
  real device node, mount_mfs will now create a real /dev/mfs<PID> device
  that can be read from userland (e.g. so you can dump an MFS filesystem).

* BUF AND DEVICE STRATEGY changes.  The struct buf contains a b_dev field.
  In order to properly handle stacked devices we now require that the b_dev
  field be initialized before the device strategy routine is called.  This
  required some additional work in various VFS implementations.  To enforce
  this requirement, biodone() now sets b_dev to NODEV.  The new disk layer
  will adjust b_dev before forwarding a request to the actual physical
  device.

* A bug in the ISO CD boot sequence which resulted in a panic has been fixed.

Testing by: lots of people, but David Rhodus found the most aggregious bugs.

    1: /*
    2:  * Copyright (c) 1992, 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  *
    5:  * This code is derived from software donated to Berkeley by
    6:  * Jan-Simon Pendry.
    7:  *
    8:  * Redistribution and use in source and binary forms, with or without
    9:  * modification, are permitted provided that the following conditions
   10:  * are met:
   11:  * 1. Redistributions of source code must retain the above copyright
   12:  *    notice, this list of conditions and the following disclaimer.
   13:  * 2. Redistributions in binary form must reproduce the above copyright
   14:  *    notice, this list of conditions and the following disclaimer in the
   15:  *    documentation and/or other materials provided with the distribution.
   16:  * 3. All advertising materials mentioning features or use of this software
   17:  *    must display the following acknowledgement:
   18:  *	This product includes software developed by the University of
   19:  *	California, Berkeley and its contributors.
   20:  * 4. Neither the name of the University nor the names of its contributors
   21:  *    may be used to endorse or promote products derived from this software
   22:  *    without specific prior written permission.
   23:  *
   24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34:  * SUCH DAMAGE.
   35:  *
   36:  *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
   37:  *
   38:  * $FreeBSD: src/sys/miscfs/nullfs/null_subr.c,v 1.21.2.4 2001/06/26 04:20:09 bp Exp $
   39:  * $DragonFly: src/sys/vfs/nullfs/null_subr.c,v 1.10 2004/05/19 22:53:05 dillon Exp $
   40:  */
   41: 
   42: #include <sys/param.h>
   43: #include <sys/systm.h>
   44: #include <sys/kernel.h>
   45: #include <sys/proc.h>
   46: #include <sys/vnode.h>
   47: #include <sys/mount.h>
   48: #include <sys/malloc.h>
   49: #include "null.h"
   50: 
   51: #define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
   52: #define	NNULLNODECACHE 16
   53: 
   54: /*
   55:  * Null layer cache:
   56:  * Each cache entry holds a reference to the lower vnode
   57:  * along with a pointer to the alias vnode.  When an
   58:  * entry is added the lower vnode is vref'd.  When the
   59:  * alias is removed the lower vnode is vrele'd.
   60:  */
   61: 
   62: #define	NULL_NHASH(vp) \
   63: 	(&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
   64: 
   65: static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
   66: static u_long null_node_hash;
   67: struct lock null_hashlock;
   68: 
   69: static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
   70: MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
   71: 
   72: static int	null_node_alloc(struct mount *mp, struct vnode *lowervp,
   73: 				     struct vnode **vpp);
   74: static struct vnode *
   75: 		null_node_find(struct mount *mp, struct vnode *lowervp);
   76: 
   77: /*
   78:  * Initialise cache headers
   79:  */
   80: int
   81: nullfs_init(struct vfsconf *vfsp)
   82: {
   83: 	NULLFSDEBUG("nullfs_init\n");		/* printed during system boot */
   84: 	null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash);
   85: 	lockinit(&null_hashlock, 0, "nullhs", 0, 0);
   86: 	return (0);
   87: }
   88: 
   89: int
   90: nullfs_uninit(struct vfsconf *vfsp)
   91: {
   92:         if (null_node_hashtbl) {
   93: 		free(null_node_hashtbl, M_NULLFSHASH);
   94: 	}
   95: 	return (0);
   96: }
   97: 
   98: /*
   99:  * Return a vref'ed alias for lower vnode if already exists, else 0.
  100:  * Lower vnode should be locked on entry and will be left locked on exit.
  101:  */
  102: static struct vnode *
  103: null_node_find(struct mount *mp, struct vnode *lowervp)
  104: {
  105: 	struct thread *td = curthread;	/* XXX */
  106: 	struct null_node_hashhead *hd;
  107: 	struct null_node *a;
  108: 	struct vnode *vp;
  109: 
  110: 	/*
  111: 	 * Find hash base, and then search the (two-way) linked
  112: 	 * list looking for a null_node structure which is referencing
  113: 	 * the lower vnode.  If found, the increment the null_node
  114: 	 * reference count (but NOT the lower vnode's vref counter).
  115: 	 */
  116: 	hd = NULL_NHASH(lowervp);
  117: loop:
  118: 	lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, td);
  119: 	LIST_FOREACH(a, hd, null_hash) {
  120: 		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
  121: 			vp = NULLTOV(a);
  122: 			lockmgr(&null_hashlock, LK_RELEASE, NULL, td);
  123: 			/*
  124: 			 * We need vget for the VXLOCK
  125: 			 * stuff, but we don't want to lock
  126: 			 * the lower node.
  127: 			 */
  128: 			if (vget(vp, NULL, LK_EXCLUSIVE | LK_CANRECURSE, td)) {
  129: 				printf ("null_node_find: vget failed.\n");
  130: 				goto loop;
  131: 			}
  132: 			VOP_UNLOCK(lowervp, NULL, 0, td);
  133: 			return (vp);
  134: 		}
  135: 	}
  136: 	lockmgr(&null_hashlock, LK_RELEASE, NULL, td);
  137: 
  138: 	return NULLVP;
  139: }
  140: 
  141: 
  142: /*
  143:  * Make a new null_node node.
  144:  * Vp is the alias vnode, lofsvp is the lower vnode.
  145:  * Maintain a reference to (lowervp).
  146:  */
  147: static int
  148: null_node_alloc(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
  149: {
  150: 	struct thread *td = curthread;	/* XXX */
  151: 	struct null_node_hashhead *hd;
  152: 	struct null_node *xp;
  153: 	struct vnode *othervp, *vp;
  154: 	int error;
  155: 
  156: 	/*
  157: 	 * Do the MALLOC before the getnewvnode since doing so afterward
  158: 	 * might cause a bogus v_data pointer to get dereferenced
  159: 	 * elsewhere if MALLOC should block.
  160: 	 */
  161: 	MALLOC(xp, struct null_node *, sizeof(struct null_node),
  162: 	    M_NULLFSNODE, M_WAITOK);
  163: 
  164: 	error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp);
  165: 	if (error) {
  166: 		FREE(xp, M_NULLFSNODE);
  167: 		return (error);
  168: 	}
  169: 	vp = *vpp;
  170: 
  171: 	vp->v_type = lowervp->v_type;
  172: 
  173: 	/*
  174: 	 * XXX:
  175: 	 * When nullfs encounters sockets or device nodes, it
  176: 	 * has a hard time working with the normal vp union.
  177: 	 * This still needs to be investigated.
  178: 	 */
  179: 	if (vp->v_type == VCHR || vp->v_type == VBLK)
  180: 		addaliasu(vp, lowervp->v_udev);
  181: 	else
  182: 		vp->v_un = lowervp->v_un;
  183: 	lockinit(&xp->null_lock, 0, "nullnode", 0, LK_CANRECURSE);
  184: 	xp->null_vnode = vp;
  185: 	vp->v_data = xp;
  186: 	xp->null_lowervp = lowervp;
  187: 	/*
  188: 	 * Before we insert our new node onto the hash chains,
  189: 	 * check to see if someone else has beaten us to it.
  190: 	 * (We could have slept in MALLOC.)
  191: 	 */
  192: 	othervp = null_node_find(mp, lowervp);
  193: 	if (othervp) {
  194: 		vp->v_data = NULL;
  195: 		FREE(xp, M_NULLFSNODE);
  196: 		vp->v_type = VBAD;	/* node is discarded */
  197: 		vrele(vp);
  198: 		*vpp = othervp;
  199: 		return 0;
  200: 	}
  201: 
  202: 	/*
  203: 	 * From NetBSD:
  204: 	 * Now lock the new node. We rely on the fact that we were passed
  205: 	 * a locked vnode. If the lower node is exporting a struct lock
  206: 	 * (v_vnlock != NULL) then we just set the upper v_vnlock to the
  207: 	 * lower one, and both are now locked. If the lower node is exporting
  208: 	 * NULL, then we copy that up and manually lock the new vnode.
  209: 	 */
  210: 
  211: 	lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, td);
  212: 	vp->v_vnlock = lowervp->v_vnlock;
  213: 	error = VOP_LOCK(vp, NULL, LK_EXCLUSIVE | LK_THISLAYER, td);
  214: 	if (error)
  215: 		panic("null_node_alloc: can't lock new vnode\n");
  216: 
  217: 	vref(lowervp);
  218: 	hd = NULL_NHASH(lowervp);
  219: 	LIST_INSERT_HEAD(hd, xp, null_hash);
  220: 	lockmgr(&null_hashlock, LK_RELEASE, NULL, td);
  221: 	return 0;
  222: }
  223: 
  224: 
  225: /*
  226:  * Try to find an existing null_node vnode refering to the given underlying
  227:  * vnode (which should be locked). If no vnode found, create a new null_node
  228:  * vnode which contains a reference to the lower vnode.
  229:  */
  230: int
  231: null_node_create(struct mount *mp, struct vnode *lowervp, struct vnode **newvpp)
  232: {
  233: 	struct vnode *aliasvp;
  234: 
  235: 	aliasvp = null_node_find(mp, lowervp);
  236: 	if (aliasvp) {
  237: 		/*
  238: 		 * null_node_find has taken another reference
  239: 		 * to the alias vnode.
  240: 		 */
  241: 		vrele(lowervp);
  242: #ifdef NULLFS_DEBUG
  243: 		vprint("null_node_create: exists", aliasvp);
  244: #endif
  245: 	} else {
  246: 		int error;
  247: 
  248: 		/*
  249: 		 * Get new vnode.
  250: 		 */
  251: 		NULLFSDEBUG("null_node_create: create new alias vnode\n");
  252: 
  253: 		/*
  254: 		 * Make new vnode reference the null_node.
  255: 		 */
  256: 		error = null_node_alloc(mp, lowervp, &aliasvp);
  257: 		if (error)
  258: 			return error;
  259: 
  260: 		/*
  261: 		 * aliasvp is already vref'd by getnewvnode()
  262: 		 */
  263: 	}
  264: 
  265: #ifdef DIAGNOSTIC
  266: 	if (lowervp->v_usecount < 1) {
  267: 		/* Should never happen... */
  268: 		vprint ("null_node_create: alias ", aliasvp);
  269: 		vprint ("null_node_create: lower ", lowervp);
  270: 		panic ("null_node_create: lower has 0 usecount.");
  271: 	};
  272: #endif
  273: 
  274: #ifdef NULLFS_DEBUG
  275: 	vprint("null_node_create: alias", aliasvp);
  276: 	vprint("null_node_create: lower", lowervp);
  277: #endif
  278: 
  279: 	*newvpp = aliasvp;
  280: 	return (0);
  281: }
  282: 
  283: #ifdef DIAGNOSTIC
  284: #include "opt_ddb.h"
  285: 
  286: #ifdef DDB
  287: #define	null_checkvp_barrier	1
  288: #else
  289: #define	null_checkvp_barrier	0
  290: #endif
  291: 
  292: struct vnode *
  293: null_checkvp(struct vnode *vp, char *fil, int lno)
  294: {
  295: 	struct null_node *a = VTONULL(vp);
  296: #ifdef notyet
  297: 	/*
  298: 	 * Can't do this check because vop_reclaim runs
  299: 	 * with a funny vop vector.
  300: 	 */
  301: 	if (vp->v_op != null_vnodeop_p) {
  302: 		printf ("null_checkvp: on non-null-node\n");
  303: 		while (null_checkvp_barrier) /*WAIT*/ ;
  304: 		panic("null_checkvp");
  305: 	};
  306: #endif
  307: 	if (a->null_lowervp == NULLVP) {
  308: 		/* Should never happen */
  309: 		int i; u_long *p;
  310: 		printf("vp = %p, ZERO ptr\n", (void *)vp);
  311: 		for (p = (u_long *) a, i = 0; i < 8; i++)
  312: 			printf(" %lx", p[i]);
  313: 		printf("\n");
  314: 		/* wait for debugger */
  315: 		while (null_checkvp_barrier) /*WAIT*/ ;
  316: 		panic("null_checkvp");
  317: 	}
  318: 	if (a->null_lowervp->v_usecount < 1) {
  319: 		int i; u_long *p;
  320: 		printf("vp = %p, unref'ed lowervp\n", (void *)vp);
  321: 		for (p = (u_long *) a, i = 0; i < 8; i++)
  322: 			printf(" %lx", p[i]);
  323: 		printf("\n");
  324: 		/* wait for debugger */
  325: 		while (null_checkvp_barrier) /*WAIT*/ ;
  326: 		panic ("null with unref'ed lowervp");
  327: 	};
  328: #ifdef notyet
  329: 	printf("null %x/%d -> %x/%d [%s, %d]\n",
  330: 	        NULLTOV(a), NULLTOV(a)->v_usecount,
  331: 		a->null_lowervp, a->null_lowervp->v_usecount,
  332: 		fil, lno);
  333: #endif
  334: 	return a->null_lowervp;
  335: }
  336: #endif