File:  [DragonFly] / src / sys / dev / disk / vn / vn.c
Revision 1.10: download - view: text, annotated - select for diffs
Wed May 19 22:52:42 2004 UTC (10 years, 2 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
Device layer rollup commit.

* cdevsw_add() is now required.  cdevsw_add() and cdevsw_remove() may specify
  a mask/match indicating the range of supported minor numbers.  Multiple
  cdevsw_add()'s using the same major number, but distinctly different
  ranges, may be issued.  All devices that failed to call cdevsw_add() before
  now do.

* cdevsw_remove() now automatically marks all devices within its supported
  range as being destroyed.

* vnode->v_rdev is no longer resolved when the vnode is created.  Instead,
  only v_udev (a newly added field) is resolved.  v_rdev is resolved when
  the vnode is opened and cleared on the last close.

* A great deal of code was making rather dubious assumptions with regards
  to the validity of devices associated with vnodes, primarily due to
  the persistence of a device structure due to being indexed by (major, minor)
  instead of by (cdevsw, major, minor).  In particular, if you run a program
  which connects to a USB device and then you pull the USB device and plug
  it back in, the vnode subsystem will continue to believe that the device
  is open when, in fact, it isn't (because it was destroyed and recreated).

  In particular, note that all the VFS mount procedures now check devices
  via v_udev instead of v_rdev prior to calling VOP_OPEN(), since v_rdev
  is NULL prior to the first open.

* The disk layer's device interaction has been rewritten.  The disk layer
  (i.e. the slice and disklabel management layer) no longer overloads
  its data onto the device structure representing the underlying physical
  disk.  Instead, the disk layer uses the new cdevsw_add() functionality
  to register its own cdevsw using the underlying device's major number,
  and simply does NOT register the underlying device's cdevsw.  No
  confusion is created because the device hash is now based on
  (cdevsw,major,minor) rather then (major,minor).

  NOTE: This also means that underlying raw disk devices may use the entire
  device minor number instead of having to reserve the bits used by the disk
  layer, and also means that can we (theoretically) stack a fully
  disklabel-supported 'disk' on top of any block device.

* The new reference counting scheme prevents this by associating a device
  with a cdevsw and disconnecting the device from its cdevsw when the cdevsw
  is removed.  Additionally, all udev2dev() lookups run through the cdevsw
  mask/match and only successfully find devices still associated with an
  active cdevsw.

* Major work on MFS:  MFS no longer shortcuts vnode and device creation.  It
  now creates a real vnode and a real device and implements real open and
  close VOPs.  Additionally, due to the disk layer changes, MFS is no longer
  limited to 255 mounts.  The new limit is 16 million.  Since MFS creates a
  real device node, mount_mfs will now create a real /dev/mfs<PID> device
  that can be read from userland (e.g. so you can dump an MFS filesystem).

* BUF AND DEVICE STRATEGY changes.  The struct buf contains a b_dev field.
  In order to properly handle stacked devices we now require that the b_dev
  field be initialized before the device strategy routine is called.  This
  required some additional work in various VFS implementations.  To enforce
  this requirement, biodone() now sets b_dev to NODEV.  The new disk layer
  will adjust b_dev before forwarding a request to the actual physical
  device.

* A bug in the ISO CD boot sequence which resulted in a panic has been fixed.

Testing by: lots of people, but David Rhodus found the most aggregious bugs.

    1: /*
    2:  * Copyright (c) 1988 University of Utah.
    3:  * Copyright (c) 1990, 1993
    4:  *	The Regents of the University of California.  All rights reserved.
    5:  *
    6:  * This code is derived from software contributed to Berkeley by
    7:  * the Systems Programming Group of the University of Utah Computer
    8:  * Science Department.
    9:  *
   10:  * Redistribution and use in source and binary forms, with or without
   11:  * modification, are permitted provided that the following conditions
   12:  * are met:
   13:  * 1. Redistributions of source code must retain the above copyright
   14:  *    notice, this list of conditions and the following disclaimer.
   15:  * 2. Redistributions in binary form must reproduce the above copyright
   16:  *    notice, this list of conditions and the following disclaimer in the
   17:  *    documentation and/or other materials provided with the distribution.
   18:  * 3. All advertising materials mentioning features or use of this software
   19:  *    must display the following acknowledgement:
   20:  *	This product includes software developed by the University of
   21:  *	California, Berkeley and its contributors.
   22:  * 4. Neither the name of the University nor the names of its contributors
   23:  *    may be used to endorse or promote products derived from this software
   24:  *    without specific prior written permission.
   25:  *
   26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36:  * SUCH DAMAGE.
   37:  *
   38:  * from: Utah Hdr: vn.c 1.13 94/04/02
   39:  *
   40:  *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
   41:  * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
   42:  * $DragonFly: src/sys/dev/disk/vn/vn.c,v 1.10 2004/05/19 22:52:42 dillon Exp $
   43:  */
   44: 
   45: /*
   46:  * Vnode disk driver.
   47:  *
   48:  * Block/character interface to a vnode.  Allows one to treat a file
   49:  * as a disk (e.g. build a filesystem in it, mount it, etc.).
   50:  *
   51:  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
   52:  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
   53:  * local buffer cache.
   54:  *
   55:  * NOTE 2: There is a security issue involved with this driver.
   56:  * Once mounted all access to the contents of the "mapped" file via
   57:  * the special file is controlled by the permissions on the special
   58:  * file, the protection of the mapped file is ignored (effectively,
   59:  * by using root credentials in all transactions).
   60:  *
   61:  * NOTE 3: Doesn't interact with leases, should it?
   62:  */
   63: 
   64: #include <sys/param.h>
   65: #include <sys/systm.h>
   66: #include <sys/kernel.h>
   67: #include <sys/proc.h>
   68: #include <sys/namei.h>
   69: #include <sys/buf.h>
   70: #include <sys/malloc.h>
   71: #include <sys/mount.h>
   72: #include <sys/vnode.h>
   73: #include <sys/fcntl.h>
   74: #include <sys/conf.h>
   75: #include <sys/disklabel.h>
   76: #include <sys/diskslice.h>
   77: #include <sys/stat.h>
   78: #include <sys/conf.h>
   79: #include <sys/module.h>
   80: #include <sys/vnioctl.h>
   81: 
   82: #include <vm/vm.h>
   83: #include <vm/vm_object.h>
   84: #include <vm/vm_page.h>
   85: #include <vm/vm_pager.h>
   86: #include <vm/vm_pageout.h>
   87: #include <vm/swap_pager.h>
   88: #include <vm/vm_extern.h>
   89: #include <vm/vm_zone.h>
   90: 
   91: static	d_ioctl_t	vnioctl;
   92: static	d_open_t	vnopen;
   93: static	d_close_t	vnclose;
   94: static	d_psize_t	vnsize;
   95: static	d_strategy_t	vnstrategy;
   96: 
   97: #define CDEV_MAJOR 43
   98: 
   99: #define VN_BSIZE_BEST	8192
  100: 
  101: /*
  102:  * cdevsw
  103:  *	D_DISK		we want to look like a disk
  104:  *	D_CANFREE	We support B_FREEBUF
  105:  */
  106: 
  107: static struct cdevsw vn_cdevsw = {
  108: 	/* name */	"vn",
  109: 	/* maj */	CDEV_MAJOR,
  110: 	/* flags */	D_DISK|D_CANFREE,
  111: 	/* port */	NULL,
  112: 	/* clone */	NULL,
  113: 
  114: 	/* open */	vnopen,
  115: 	/* close */	vnclose,
  116: 	/* read */	physread,
  117: 	/* write */	physwrite,
  118: 	/* ioctl */	vnioctl,
  119: 	/* poll */	nopoll,
  120: 	/* mmap */	nommap,
  121: 	/* strategy */	vnstrategy,
  122: 	/* dump */	nodump,
  123: 	/* psize */	vnsize
  124: };
  125: 
  126: #define	getvnbuf()	\
  127: 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
  128: 
  129: #define putvnbuf(bp)	\
  130: 	free((caddr_t)(bp), M_DEVBUF)
  131: 
  132: struct vn_softc {
  133: 	int		sc_unit;
  134: 	int		sc_flags;	/* flags 			*/
  135: 	int		sc_size;	/* size of vn, sc_secsize scale	*/
  136: 	int		sc_secsize;	/* sector size			*/
  137: 	struct diskslices *sc_slices;
  138: 	struct vnode	*sc_vp;		/* vnode if not NULL		*/
  139: 	vm_object_t	sc_object;	/* backing object if not NULL	*/
  140: 	struct ucred	*sc_cred;	/* credentials 			*/
  141: 	int		 sc_maxactive;	/* max # of active requests 	*/
  142: 	struct buf	 sc_tab;	/* transfer queue 		*/
  143: 	u_long		 sc_options;	/* options 			*/
  144: 	dev_t		 sc_devlist;	/* devices that refer to this unit */
  145: 	SLIST_ENTRY(vn_softc) sc_list;
  146: };
  147: 
  148: static SLIST_HEAD(, vn_softc) vn_list;
  149: 
  150: /* sc_flags */
  151: #define VNF_INITED	0x01
  152: #define	VNF_READONLY	0x02
  153: 
  154: static u_long	vn_options;
  155: 
  156: #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
  157: #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
  158: 
  159: static int	vnsetcred (struct vn_softc *vn, struct ucred *cred);
  160: static void	vnclear (struct vn_softc *vn);
  161: static int	vn_modevent (module_t, int, void *);
  162: static int 	vniocattach_file (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct thread *p);
  163: static int 	vniocattach_swap (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct thread *p);
  164: 
  165: static	int
  166: vnclose(dev_t dev, int flags, int mode, struct thread *td)
  167: {
  168: 	struct vn_softc *vn = dev->si_drv1;
  169: 
  170: 	IFOPT(vn, VN_LABELS)
  171: 		if (vn->sc_slices != NULL)
  172: 			dsclose(dev, mode, vn->sc_slices);
  173: 	return (0);
  174: }
  175: 
  176: static struct vn_softc *
  177: vnfindvn(dev_t dev)
  178: {
  179: 	int unit;
  180: 	struct vn_softc *vn;
  181: 
  182: 	unit = dkunit(dev);
  183: 	SLIST_FOREACH(vn, &vn_list, sc_list) {
  184: 		if (vn->sc_unit == unit) {
  185: 			dev->si_drv2 = vn->sc_devlist;
  186: 			vn->sc_devlist = dev;
  187: 			reference_dev(dev);
  188: 			dev->si_drv1 = vn;
  189: 			break;
  190: 		}
  191: 	}
  192: 	if (vn == NULL) {
  193: 		vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK | M_ZERO);
  194: 		vn->sc_unit = unit;
  195: 		dev->si_drv1 = vn;
  196: 		vn->sc_devlist = make_dev(&vn_cdevsw, 0, UID_ROOT,
  197: 					GID_OPERATOR, 0640, "vn%d", unit);
  198: 		reference_dev(vn->sc_devlist);
  199: 		vn->sc_devlist->si_drv1 = vn;
  200: 		vn->sc_devlist->si_drv2 = NULL;
  201: 		if (vn->sc_devlist != dev) {
  202: 			dev->si_drv1 = vn;
  203: 			dev->si_drv2 = vn->sc_devlist;
  204: 			vn->sc_devlist = dev;
  205: 			reference_dev(dev);
  206: 		}
  207: 		SLIST_INSERT_HEAD(&vn_list, vn, sc_list);
  208: 	}
  209: 	return (vn);
  210: }
  211: 
  212: static	int
  213: vnopen(dev_t dev, int flags, int mode, struct thread *td)
  214: {
  215: 	struct vn_softc *vn;
  216: 
  217: 	/*
  218: 	 * Locate preexisting device
  219: 	 */
  220: 
  221: 	if ((vn = dev->si_drv1) == NULL)
  222: 		vn = vnfindvn(dev);
  223: 
  224: 	/*
  225: 	 * Update si_bsize fields for device.  This data will be overriden by
  226: 	 * the slice/parition code for vn accesses through partitions, and
  227: 	 * used directly if you open the 'whole disk' device.
  228: 	 *
  229: 	 * si_bsize_best must be reinitialized in case VN has been 
  230: 	 * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency.
  231: 	 */
  232: 	dev->si_bsize_phys = vn->sc_secsize;
  233: 	dev->si_bsize_best = vn->sc_secsize;
  234: 	if (dev->si_bsize_best < VN_BSIZE_BEST)
  235: 		dev->si_bsize_best = VN_BSIZE_BEST;
  236: 
  237: 	if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
  238: 		return (EACCES);
  239: 
  240: 	IFOPT(vn, VN_FOLLOW)
  241: 		printf("vnopen(%s, 0x%x, 0x%x, %p)\n",
  242: 		    devtoname(dev), flags, mode, (void *)td);
  243: 
  244: 	/*
  245: 	 * Initialize label
  246: 	 */
  247: 
  248: 	IFOPT(vn, VN_LABELS) {
  249: 		if (vn->sc_flags & VNF_INITED) {
  250: 			struct disklabel label;
  251: 
  252: 			/* Build label for whole disk. */
  253: 			bzero(&label, sizeof label);
  254: 			label.d_secsize = vn->sc_secsize;
  255: 			label.d_nsectors = 32;
  256: 			label.d_ntracks = 64 / (vn->sc_secsize / DEV_BSIZE);
  257: 			label.d_secpercyl = label.d_nsectors * label.d_ntracks;
  258: 			label.d_ncylinders = vn->sc_size / label.d_secpercyl;
  259: 			label.d_secperunit = vn->sc_size;
  260: 			label.d_partitions[RAW_PART].p_size = vn->sc_size;
  261: 
  262: 			return (dsopen(dev, mode, 0, &vn->sc_slices, &label));
  263: 		}
  264: 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
  265: 		    dkpart(dev) != RAW_PART ||
  266: 		    mode != S_IFCHR) {
  267: 			return (ENXIO);
  268: 		}
  269: 	}
  270: 	return(0);
  271: }
  272: 
  273: /*
  274:  *	vnstrategy:
  275:  *
  276:  *	Run strategy routine for VN device.  We use VOP_READ/VOP_WRITE calls
  277:  *	for vnode-backed vn's, and the new vm_pager_strategy() call for
  278:  *	vm_object-backed vn's.
  279:  *
  280:  *	Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only.
  281:  *
  282:  *	NOTE: bp->b_blkno is DEV_BSIZE'd.  We must generate bp->b_pblkno for
  283:  *	our uio or vn_pager_strategy() call that is vn->sc_secsize'd
  284:  */
  285: 
  286: static	void
  287: vnstrategy(struct buf *bp)
  288: {
  289: 	int unit;
  290: 	struct vn_softc *vn;
  291: 	int error;
  292: 
  293: 	unit = dkunit(bp->b_dev);
  294: 	if ((vn = bp->b_dev->si_drv1) == NULL)
  295: 		vn = vnfindvn(bp->b_dev);
  296: 
  297: 	IFOPT(vn, VN_DEBUG)
  298: 		printf("vnstrategy(%p): unit %d\n", bp, unit);
  299: 
  300: 	if ((vn->sc_flags & VNF_INITED) == 0) {
  301: 		bp->b_error = ENXIO;
  302: 		bp->b_flags |= B_ERROR;
  303: 		biodone(bp);
  304: 		return;
  305: 	}
  306: 
  307: 	bp->b_resid = bp->b_bcount;
  308: 
  309: 	IFOPT(vn, VN_LABELS) {
  310: 		if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) {
  311: 			bp->b_flags |= B_INVAL;
  312: 			biodone(bp);
  313: 			return;
  314: 		}
  315: 	} else {
  316: 		int pbn;	/* in sc_secsize chunks */
  317: 		long sz;	/* in sc_secsize chunks */
  318: 
  319: 		/*
  320: 		 * Check for required alignment.  Transfers must be a valid
  321: 		 * multiple of the sector size.
  322: 		 */
  323: 		if (bp->b_bcount % vn->sc_secsize != 0 ||
  324: 		    bp->b_blkno % (vn->sc_secsize / DEV_BSIZE) != 0) {
  325: 			bp->b_error = EINVAL;
  326: 			bp->b_flags |= B_ERROR | B_INVAL;
  327: 			biodone(bp);
  328: 			return;
  329: 		}
  330: 
  331: 		pbn = bp->b_blkno / (vn->sc_secsize / DEV_BSIZE);
  332: 		sz = howmany(bp->b_bcount, vn->sc_secsize);
  333: 
  334: 		/*
  335: 		 * If out of bounds return an error.  If at the EOF point,
  336: 		 * simply read or write less.
  337: 		 */
  338: 		if (pbn < 0 || pbn >= vn->sc_size) {
  339: 			if (pbn != vn->sc_size) {
  340: 				bp->b_error = EINVAL;
  341: 				bp->b_flags |= B_ERROR | B_INVAL;
  342: 			}
  343: 			biodone(bp);
  344: 			return;
  345: 		}
  346: 
  347: 		/*
  348: 		 * If the request crosses EOF, truncate the request.
  349: 		 */
  350: 		if (pbn + sz > vn->sc_size) {
  351: 			bp->b_bcount = (vn->sc_size - pbn) * vn->sc_secsize;
  352: 			bp->b_resid = bp->b_bcount;
  353: 		}
  354: 		bp->b_pblkno = pbn;
  355: 	}
  356: 
  357: 	if (vn->sc_vp && (bp->b_flags & B_FREEBUF)) {
  358: 		/*
  359: 		 * Not handled for vnode-backed element yet.
  360: 		 */
  361: 		biodone(bp);
  362: 	} else if (vn->sc_vp) {
  363: 		/*
  364: 		 * VNODE I/O
  365: 		 *
  366: 		 * If an error occurs, we set B_ERROR but we do not set 
  367: 		 * B_INVAL because (for a write anyway), the buffer is 
  368: 		 * still valid.
  369: 		 */
  370: 		struct uio auio;
  371: 		struct iovec aiov;
  372: 
  373: 		bzero(&auio, sizeof(auio));
  374: 
  375: 		aiov.iov_base = bp->b_data;
  376: 		aiov.iov_len = bp->b_bcount;
  377: 		auio.uio_iov = &aiov;
  378: 		auio.uio_iovcnt = 1;
  379: 		auio.uio_offset = (vm_ooffset_t)bp->b_pblkno * vn->sc_secsize;
  380: 		auio.uio_segflg = UIO_SYSSPACE;
  381: 		if( bp->b_flags & B_READ)
  382: 			auio.uio_rw = UIO_READ;
  383: 		else
  384: 			auio.uio_rw = UIO_WRITE;
  385: 		auio.uio_resid = bp->b_bcount;
  386: 		auio.uio_td = curthread;
  387: 		vn_lock(vn->sc_vp, NULL, LK_EXCLUSIVE | LK_RETRY, curthread);
  388: 		if (bp->b_flags & B_READ)
  389: 			error = VOP_READ(vn->sc_vp, &auio, IO_DIRECT, vn->sc_cred);
  390: 		else
  391: 			error = VOP_WRITE(vn->sc_vp, &auio, IO_NOWDRAIN, vn->sc_cred);
  392: 		VOP_UNLOCK(vn->sc_vp, NULL, 0, curthread);
  393: 		bp->b_resid = auio.uio_resid;
  394: 
  395: 		if (error) {
  396: 			bp->b_error = error;
  397: 			bp->b_flags |= B_ERROR;
  398: 		}
  399: 		biodone(bp);
  400: 	} else if (vn->sc_object) {
  401: 		/*
  402: 		 * OBJT_SWAP I/O
  403: 		 *
  404: 		 * ( handles read, write, freebuf )
  405: 		 *
  406: 		 * Note: if we pre-reserved swap, B_FREEBUF is disabled
  407: 		 */
  408: 		KASSERT((bp->b_bufsize & (vn->sc_secsize - 1)) == 0,
  409: 		    ("vnstrategy: buffer %p too small for physio", bp));
  410: 
  411: 		if ((bp->b_flags & B_FREEBUF) && TESTOPT(vn, VN_RESERVE)) {
  412: 			biodone(bp);
  413: 		} else {
  414: 			vm_pager_strategy(vn->sc_object, bp);
  415: 		}
  416: 	} else {
  417: 		bp->b_flags |= B_ERROR;
  418: 		bp->b_error = EINVAL;
  419: 		biodone(bp);
  420: 	}
  421: }
  422: 
  423: /* ARGSUSED */
  424: static	int
  425: vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
  426: {
  427: 	struct vn_softc *vn;
  428: 	struct vn_ioctl *vio;
  429: 	int error;
  430: 	u_long *f;
  431: 
  432: 	vn = dev->si_drv1;
  433: 	IFOPT(vn,VN_FOLLOW)
  434: 		printf("vnioctl(%s, 0x%lx, %p, 0x%x, %p): unit %d\n",
  435: 		    devtoname(dev), cmd, (void *)data, flag, (void *)td,
  436: 		    dkunit(dev));
  437: 
  438: 	switch (cmd) {
  439: 	case VNIOCATTACH:
  440: 	case VNIOCDETACH:
  441: 	case VNIOCGSET:
  442: 	case VNIOCGCLEAR:
  443: 	case VNIOCUSET:
  444: 	case VNIOCUCLEAR:
  445: 		goto vn_specific;
  446: 	}
  447: 
  448: 	IFOPT(vn,VN_LABELS) {
  449: 		if (vn->sc_slices != NULL) {
  450: 			error = dsioctl(dev, cmd, data, flag, &vn->sc_slices);
  451: 			if (error != ENOIOCTL)
  452: 				return (error);
  453: 		}
  454: 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
  455: 		    dkpart(dev) != RAW_PART)
  456: 			return (ENOTTY);
  457: 	}
  458: 
  459:     vn_specific:
  460: 
  461: 	error = suser(td);
  462: 	if (error)
  463: 		return (error);
  464: 
  465: 	vio = (struct vn_ioctl *)data;
  466: 	f = (u_long*)data;
  467: 	switch (cmd) {
  468: 
  469: 	case VNIOCATTACH:
  470: 		if (vn->sc_flags & VNF_INITED)
  471: 			return(EBUSY);
  472: 
  473: 		if (vio->vn_file == NULL)
  474: 			error = vniocattach_swap(vn, vio, dev, flag, td);
  475: 		else
  476: 			error = vniocattach_file(vn, vio, dev, flag, td);
  477: 		break;
  478: 
  479: 	case VNIOCDETACH:
  480: 		if ((vn->sc_flags & VNF_INITED) == 0)
  481: 			return(ENXIO);
  482: 		/*
  483: 		 * XXX handle i/o in progress.  Return EBUSY, or wait, or
  484: 		 * flush the i/o.
  485: 		 * XXX handle multiple opens of the device.  Return EBUSY,
  486: 		 * or revoke the fd's.
  487: 		 * How are these problems handled for removable and failing
  488: 		 * hardware devices? (Hint: They are not)
  489: 		 */
  490: 		vnclear(vn);
  491: 		IFOPT(vn, VN_FOLLOW)
  492: 			printf("vnioctl: CLRed\n");
  493: 		break;
  494: 
  495: 	case VNIOCGSET:
  496: 		vn_options |= *f;
  497: 		*f = vn_options;
  498: 		break;
  499: 
  500: 	case VNIOCGCLEAR:
  501: 		vn_options &= ~(*f);
  502: 		*f = vn_options;
  503: 		break;
  504: 
  505: 	case VNIOCUSET:
  506: 		vn->sc_options |= *f;
  507: 		*f = vn->sc_options;
  508: 		break;
  509: 
  510: 	case VNIOCUCLEAR:
  511: 		vn->sc_options &= ~(*f);
  512: 		*f = vn->sc_options;
  513: 		break;
  514: 
  515: 	default:
  516: 		error = ENOTTY;
  517: 		break;
  518: 	}
  519: 	return(error);
  520: }
  521: 
  522: /*
  523:  *	vniocattach_file:
  524:  *
  525:  *	Attach a file to a VN partition.  Return the size in the vn_size
  526:  *	field.
  527:  */
  528: 
  529: static int
  530: vniocattach_file(vn, vio, dev, flag, td)
  531: 	struct vn_softc *vn;
  532: 	struct vn_ioctl *vio;
  533: 	dev_t dev;
  534: 	int flag;
  535: 	struct thread *td;
  536: {
  537: 	struct vattr vattr;
  538: 	struct nameidata nd;
  539: 	int error, flags;
  540: 	struct proc *p = td->td_proc;
  541: 
  542: 	KKASSERT(p != NULL);
  543: 
  544: 	flags = FREAD|FWRITE;
  545: 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, vio->vn_file, td);
  546: 	error = vn_open(&nd, flags, 0);
  547: 	if (error) {
  548: 		if (error != EACCES && error != EPERM && error != EROFS)
  549: 			return (error);
  550: 		flags &= ~FWRITE;
  551: 		NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW,
  552: 			UIO_USERSPACE, vio->vn_file, td);
  553: 		error = vn_open(&nd, flags, 0);
  554: 		if (error)
  555: 			return (error);
  556: 	}
  557: 	NDFREE(&nd, NDF_ONLY_PNBUF);
  558: 	if (nd.ni_vp->v_type != VREG ||
  559: 	    (error = VOP_GETATTR(nd.ni_vp, &vattr, td))) {
  560: 		VOP_UNLOCK(nd.ni_vp, NULL, 0, td);
  561: 		(void) vn_close(nd.ni_vp, flags, td);
  562: 		return (error ? error : EINVAL);
  563: 	}
  564: 	VOP_UNLOCK(nd.ni_vp, NULL, 0, td);
  565: 	vn->sc_secsize = DEV_BSIZE;
  566: 	vn->sc_vp = nd.ni_vp;
  567: 
  568: 	/*
  569: 	 * If the size is specified, override the file attributes.  Note that
  570: 	 * the vn_size argument is in PAGE_SIZE sized blocks.
  571: 	 */
  572: 	if (vio->vn_size)
  573: 		vn->sc_size = (quad_t)vio->vn_size * PAGE_SIZE / vn->sc_secsize;
  574: 	else
  575: 		vn->sc_size = vattr.va_size / vn->sc_secsize;
  576: 	error = vnsetcred(vn, p->p_ucred);
  577: 	if (error) {
  578: 		(void) vn_close(nd.ni_vp, flags, td);
  579: 		return(error);
  580: 	}
  581: 	vn->sc_flags |= VNF_INITED;
  582: 	if (flags == FREAD)
  583: 		vn->sc_flags |= VNF_READONLY;
  584: 	IFOPT(vn, VN_LABELS) {
  585: 		/*
  586: 		 * Reopen so that `ds' knows which devices are open.
  587: 		 * If this is the first VNIOCSET, then we've
  588: 		 * guaranteed that the device is the cdev and that
  589: 		 * no other slices or labels are open.  Otherwise,
  590: 		 * we rely on VNIOCCLR not being abused.
  591: 		 */
  592: 		error = vnopen(dev, flag, S_IFCHR, td);
  593: 		if (error)
  594: 			vnclear(vn);
  595: 	}
  596: 	IFOPT(vn, VN_FOLLOW)
  597: 		printf("vnioctl: SET vp %p size %x blks\n",
  598: 		       vn->sc_vp, vn->sc_size);
  599: 	return(0);
  600: }
  601: 
  602: /*
  603:  *	vniocattach_swap:
  604:  *
  605:  *	Attach swap backing store to a VN partition of the size specified
  606:  *	in vn_size.
  607:  */
  608: 
  609: static int
  610: vniocattach_swap(vn, vio, dev, flag, td)
  611: 	struct vn_softc *vn;
  612: 	struct vn_ioctl *vio;
  613: 	dev_t dev;
  614: 	int flag;
  615: 	struct thread *td;
  616: {
  617: 	int error;
  618: 	struct proc *p = td->td_proc;
  619: 
  620: 	KKASSERT(p != NULL);
  621: 	/*
  622: 	 * Range check.  Disallow negative sizes or any size less then the
  623: 	 * size of a page.  Then round to a page.
  624: 	 */
  625: 
  626: 	if (vio->vn_size <= 0)
  627: 		return(EDOM);
  628: 
  629: 	/*
  630: 	 * Allocate an OBJT_SWAP object.
  631: 	 *
  632: 	 * sc_secsize is PAGE_SIZE'd
  633: 	 *
  634: 	 * vio->vn_size is in PAGE_SIZE'd chunks.
  635: 	 * sc_size must be in PAGE_SIZE'd chunks.  
  636: 	 * Note the truncation.
  637: 	 */
  638: 
  639: 	vn->sc_secsize = PAGE_SIZE;
  640: 	vn->sc_size = vio->vn_size;
  641: 	vn->sc_object = 
  642: 	 vm_pager_allocate(OBJT_SWAP, NULL, vn->sc_secsize * (vm_ooffset_t)vio->vn_size, VM_PROT_DEFAULT, 0);
  643: 	IFOPT(vn, VN_RESERVE) {
  644: 		if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) {
  645: 			vm_pager_deallocate(vn->sc_object);
  646: 			vn->sc_object = NULL;
  647: 			return(EDOM);
  648: 		}
  649: 	}
  650: 	vn->sc_flags |= VNF_INITED;
  651: 
  652: 	error = vnsetcred(vn, p->p_ucred);
  653: 	if (error == 0) {
  654: 		IFOPT(vn, VN_LABELS) {
  655: 			/*
  656: 			 * Reopen so that `ds' knows which devices are open.
  657: 			 * If this is the first VNIOCSET, then we've
  658: 			 * guaranteed that the device is the cdev and that
  659: 			 * no other slices or labels are open.  Otherwise,
  660: 			 * we rely on VNIOCCLR not being abused.
  661: 			 */
  662: 			error = vnopen(dev, flag, S_IFCHR, td);
  663: 		}
  664: 	}
  665: 	if (error == 0) {
  666: 		IFOPT(vn, VN_FOLLOW) {
  667: 			printf("vnioctl: SET vp %p size %x\n",
  668: 			       vn->sc_vp, vn->sc_size);
  669: 		}
  670: 	}
  671: 	if (error)
  672: 		vnclear(vn);
  673: 	return(error);
  674: }
  675: 
  676: /*
  677:  * Duplicate the current processes' credentials.  Since we are called only
  678:  * as the result of a SET ioctl and only root can do that, any future access
  679:  * to this "disk" is essentially as root.  Note that credentials may change
  680:  * if some other uid can write directly to the mapped file (NFS).
  681:  */
  682: int
  683: vnsetcred(struct vn_softc *vn, struct ucred *cred)
  684: {
  685: 	char *tmpbuf;
  686: 	int error = 0;
  687: 
  688: 	/*
  689: 	 * Set credits in our softc
  690: 	 */
  691: 
  692: 	if (vn->sc_cred)
  693: 		crfree(vn->sc_cred);
  694: 	vn->sc_cred = crdup(cred);
  695: 
  696: 	/*
  697: 	 * Horrible kludge to establish credentials for NFS  XXX.
  698: 	 */
  699: 
  700: 	if (vn->sc_vp) {
  701: 		struct uio auio;
  702: 		struct iovec aiov;
  703: 
  704: 		tmpbuf = malloc(vn->sc_secsize, M_TEMP, M_WAITOK);
  705: 		bzero(&auio, sizeof(auio));
  706: 
  707: 		aiov.iov_base = tmpbuf;
  708: 		aiov.iov_len = vn->sc_secsize;
  709: 		auio.uio_iov = &aiov;
  710: 		auio.uio_iovcnt = 1;
  711: 		auio.uio_offset = 0;
  712: 		auio.uio_rw = UIO_READ;
  713: 		auio.uio_segflg = UIO_SYSSPACE;
  714: 		auio.uio_resid = aiov.iov_len;
  715: 		vn_lock(vn->sc_vp, NULL, LK_EXCLUSIVE | LK_RETRY, curthread);
  716: 		error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
  717: 		VOP_UNLOCK(vn->sc_vp, NULL, 0, curthread);
  718: 		free(tmpbuf, M_TEMP);
  719: 	}
  720: 	return (error);
  721: }
  722: 
  723: void
  724: vnclear(struct vn_softc *vn)
  725: {
  726: 	struct thread *td = curthread;		/* XXX */
  727: 
  728: 	IFOPT(vn, VN_FOLLOW)
  729: 		printf("vnclear(%p): vp=%p\n", vn, vn->sc_vp);
  730: 	if (vn->sc_slices != NULL)
  731: 		dsgone(&vn->sc_slices);
  732: 	vn->sc_flags &= ~VNF_INITED;
  733: 	if (vn->sc_vp != NULL) {
  734: 		(void)vn_close(vn->sc_vp, vn->sc_flags & VNF_READONLY ?
  735: 		    FREAD : (FREAD|FWRITE), td);
  736: 		vn->sc_vp = NULL;
  737: 	}
  738: 	vn->sc_flags &= ~VNF_READONLY;
  739: 	if (vn->sc_cred) {
  740: 		crfree(vn->sc_cred);
  741: 		vn->sc_cred = NULL;
  742: 	}
  743: 	if (vn->sc_object != NULL) {
  744: 		vm_pager_deallocate(vn->sc_object);
  745: 		vn->sc_object = NULL;
  746: 	}
  747: 	vn->sc_size = 0;
  748: }
  749: 
  750: static	int
  751: vnsize(dev_t dev)
  752: {
  753: 	struct vn_softc *vn;
  754: 
  755: 	vn = dev->si_drv1;
  756: 	if (!vn)
  757: 		return(-1);
  758: 	if ((vn->sc_flags & VNF_INITED) == 0)
  759: 		return(-1);
  760: 
  761: 	return(vn->sc_size);
  762: }
  763: 
  764: static int 
  765: vn_modevent(module_t mod, int type, void *data)
  766: {
  767: 	struct vn_softc *vn;
  768: 	dev_t dev;
  769: 
  770: 	switch (type) {
  771: 	case MOD_LOAD:
  772: 		cdevsw_add(&vn_cdevsw, 0, 0);
  773: 		break;
  774: 	case MOD_UNLOAD:
  775: 		/* fall through */
  776: 	case MOD_SHUTDOWN:
  777: 		for (;;) {
  778: 			vn = SLIST_FIRST(&vn_list);
  779: 			if (!vn)
  780: 				break;
  781: 			SLIST_REMOVE_HEAD(&vn_list, sc_list);
  782: 			if (vn->sc_flags & VNF_INITED)
  783: 				vnclear(vn);
  784: 			/* Cleanup all dev_t's that refer to this unit */
  785: 			while ((dev = vn->sc_devlist) != NULL) {
  786: 				vn->sc_devlist = dev->si_drv2;
  787: 				dev->si_drv1 = dev->si_drv2 = NULL;
  788: 				destroy_dev(dev);
  789: 			}
  790: 			free(vn, M_DEVBUF);
  791: 		}
  792: 		cdevsw_remove(&vn_cdevsw, -1, 0);
  793: 		break;
  794: 	default:
  795: 		break;
  796: 	}
  797: 	return 0;
  798: }
  799: 
  800: DEV_MODULE(vn, vn_modevent, 0);