File:  [DragonFly] / src / sys / dev / disk / vn / vn.c
Revision 1.9: download - view: text, annotated - select for diffs
Thu May 13 23:49:15 2004 UTC (10 years, 7 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
device switch 1/many: Remove d_autoq, add d_clone (where d_autoq was).

d_autoq was used to allow the device port dispatch to mix old-style synchronous
calls with new style messaging calls within a particular device.  It was never
used for that purpose.

d_clone will be more fully implemented as work continues.  We are going to
install d_port in the dev_t (struct specinfo) structure itself and d_clone
will be needed to allow devices to 'revector' the port on a minor-number
by minor-number basis, in particular allowing minor numbers to be directly
dispatched to distinct threads.  This is something we will be needing later
on.

    1: /*
    2:  * Copyright (c) 1988 University of Utah.
    3:  * Copyright (c) 1990, 1993
    4:  *	The Regents of the University of California.  All rights reserved.
    5:  *
    6:  * This code is derived from software contributed to Berkeley by
    7:  * the Systems Programming Group of the University of Utah Computer
    8:  * Science Department.
    9:  *
   10:  * Redistribution and use in source and binary forms, with or without
   11:  * modification, are permitted provided that the following conditions
   12:  * are met:
   13:  * 1. Redistributions of source code must retain the above copyright
   14:  *    notice, this list of conditions and the following disclaimer.
   15:  * 2. Redistributions in binary form must reproduce the above copyright
   16:  *    notice, this list of conditions and the following disclaimer in the
   17:  *    documentation and/or other materials provided with the distribution.
   18:  * 3. All advertising materials mentioning features or use of this software
   19:  *    must display the following acknowledgement:
   20:  *	This product includes software developed by the University of
   21:  *	California, Berkeley and its contributors.
   22:  * 4. Neither the name of the University nor the names of its contributors
   23:  *    may be used to endorse or promote products derived from this software
   24:  *    without specific prior written permission.
   25:  *
   26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36:  * SUCH DAMAGE.
   37:  *
   38:  * from: Utah Hdr: vn.c 1.13 94/04/02
   39:  *
   40:  *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
   41:  * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
   42:  * $DragonFly: src/sys/dev/disk/vn/vn.c,v 1.9 2004/05/13 23:49:15 dillon Exp $
   43:  */
   44: 
   45: /*
   46:  * Vnode disk driver.
   47:  *
   48:  * Block/character interface to a vnode.  Allows one to treat a file
   49:  * as a disk (e.g. build a filesystem in it, mount it, etc.).
   50:  *
   51:  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
   52:  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
   53:  * local buffer cache.
   54:  *
   55:  * NOTE 2: There is a security issue involved with this driver.
   56:  * Once mounted all access to the contents of the "mapped" file via
   57:  * the special file is controlled by the permissions on the special
   58:  * file, the protection of the mapped file is ignored (effectively,
   59:  * by using root credentials in all transactions).
   60:  *
   61:  * NOTE 3: Doesn't interact with leases, should it?
   62:  */
   63: 
   64: #include <sys/param.h>
   65: #include <sys/systm.h>
   66: #include <sys/kernel.h>
   67: #include <sys/proc.h>
   68: #include <sys/namei.h>
   69: #include <sys/buf.h>
   70: #include <sys/malloc.h>
   71: #include <sys/mount.h>
   72: #include <sys/vnode.h>
   73: #include <sys/fcntl.h>
   74: #include <sys/conf.h>
   75: #include <sys/disklabel.h>
   76: #include <sys/diskslice.h>
   77: #include <sys/stat.h>
   78: #include <sys/conf.h>
   79: #include <sys/module.h>
   80: #include <sys/vnioctl.h>
   81: 
   82: #include <vm/vm.h>
   83: #include <vm/vm_object.h>
   84: #include <vm/vm_page.h>
   85: #include <vm/vm_pager.h>
   86: #include <vm/vm_pageout.h>
   87: #include <vm/swap_pager.h>
   88: #include <vm/vm_extern.h>
   89: #include <vm/vm_zone.h>
   90: 
   91: static	d_ioctl_t	vnioctl;
   92: static	d_open_t	vnopen;
   93: static	d_close_t	vnclose;
   94: static	d_psize_t	vnsize;
   95: static	d_strategy_t	vnstrategy;
   96: 
   97: #define CDEV_MAJOR 43
   98: 
   99: #define VN_BSIZE_BEST	8192
  100: 
  101: /*
  102:  * cdevsw
  103:  *	D_DISK		we want to look like a disk
  104:  *	D_CANFREE	We support B_FREEBUF
  105:  */
  106: 
  107: static struct cdevsw vn_cdevsw = {
  108: 	/* name */	"vn",
  109: 	/* maj */	CDEV_MAJOR,
  110: 	/* flags */	D_DISK|D_CANFREE,
  111: 	/* port */	NULL,
  112: 	/* clone */	NULL,
  113: 
  114: 	/* open */	vnopen,
  115: 	/* close */	vnclose,
  116: 	/* read */	physread,
  117: 	/* write */	physwrite,
  118: 	/* ioctl */	vnioctl,
  119: 	/* poll */	nopoll,
  120: 	/* mmap */	nommap,
  121: 	/* strategy */	vnstrategy,
  122: 	/* dump */	nodump,
  123: 	/* psize */	vnsize
  124: };
  125: 
  126: #define	getvnbuf()	\
  127: 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
  128: 
  129: #define putvnbuf(bp)	\
  130: 	free((caddr_t)(bp), M_DEVBUF)
  131: 
  132: struct vn_softc {
  133: 	int		sc_unit;
  134: 	int		sc_flags;	/* flags 			*/
  135: 	int		sc_size;	/* size of vn, sc_secsize scale	*/
  136: 	int		sc_secsize;	/* sector size			*/
  137: 	struct diskslices *sc_slices;
  138: 	struct vnode	*sc_vp;		/* vnode if not NULL		*/
  139: 	vm_object_t	sc_object;	/* backing object if not NULL	*/
  140: 	struct ucred	*sc_cred;	/* credentials 			*/
  141: 	int		 sc_maxactive;	/* max # of active requests 	*/
  142: 	struct buf	 sc_tab;	/* transfer queue 		*/
  143: 	u_long		 sc_options;	/* options 			*/
  144: 	dev_t		 sc_devlist;	/* devices that refer to this unit */
  145: 	SLIST_ENTRY(vn_softc) sc_list;
  146: };
  147: 
  148: static SLIST_HEAD(, vn_softc) vn_list;
  149: 
  150: /* sc_flags */
  151: #define VNF_INITED	0x01
  152: #define	VNF_READONLY	0x02
  153: 
  154: static u_long	vn_options;
  155: 
  156: #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
  157: #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
  158: 
  159: static int	vnsetcred (struct vn_softc *vn, struct ucred *cred);
  160: static void	vnclear (struct vn_softc *vn);
  161: static int	vn_modevent (module_t, int, void *);
  162: static int 	vniocattach_file (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct thread *p);
  163: static int 	vniocattach_swap (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct thread *p);
  164: 
  165: static	int
  166: vnclose(dev_t dev, int flags, int mode, struct thread *td)
  167: {
  168: 	struct vn_softc *vn = dev->si_drv1;
  169: 
  170: 	IFOPT(vn, VN_LABELS)
  171: 		if (vn->sc_slices != NULL)
  172: 			dsclose(dev, mode, vn->sc_slices);
  173: 	return (0);
  174: }
  175: 
  176: static struct vn_softc *
  177: vnfindvn(dev_t dev)
  178: {
  179: 	int unit;
  180: 	struct vn_softc *vn;
  181: 
  182: 	unit = dkunit(dev);
  183: 	vn = dev->si_drv1;
  184: 	if (!vn) {
  185: 		SLIST_FOREACH(vn, &vn_list, sc_list) {
  186: 			if (vn->sc_unit == unit) {
  187: 				dev->si_drv2 = vn->sc_devlist;
  188: 				vn->sc_devlist = dev;
  189: 				dev->si_drv1 = vn;
  190: 				break;
  191: 			}
  192: 		}
  193: 	}
  194: 	if (!vn) {
  195: 		vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK);
  196: 		if (!vn)
  197: 			return (NULL);
  198: 		bzero(vn, sizeof *vn);
  199: 		vn->sc_unit = unit;
  200: 		dev->si_drv1 = vn;
  201: 		vn->sc_devlist = make_dev(&vn_cdevsw, 0,
  202: 		    UID_ROOT, GID_OPERATOR, 0640, "vn%d", unit);
  203: 		vn->sc_devlist->si_drv1 = vn;
  204: 		vn->sc_devlist->si_drv2 = NULL;
  205: 		if (vn->sc_devlist != dev) {
  206: 			dev->si_drv2 = vn->sc_devlist;
  207: 			vn->sc_devlist = dev;
  208: 		}
  209: 		SLIST_INSERT_HEAD(&vn_list, vn, sc_list);
  210: 	}
  211: 	return (vn);
  212: }
  213: 
  214: static	int
  215: vnopen(dev_t dev, int flags, int mode, struct thread *td)
  216: {
  217: 	struct vn_softc *vn;
  218: 
  219: 	/*
  220: 	 * Locate preexisting device
  221: 	 */
  222: 
  223: 	if ((vn = dev->si_drv1) == NULL)
  224: 		vn = vnfindvn(dev);
  225: 
  226: 	/*
  227: 	 * Update si_bsize fields for device.  This data will be overriden by
  228: 	 * the slice/parition code for vn accesses through partitions, and
  229: 	 * used directly if you open the 'whole disk' device.
  230: 	 *
  231: 	 * si_bsize_best must be reinitialized in case VN has been 
  232: 	 * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency.
  233: 	 */
  234: 	dev->si_bsize_phys = vn->sc_secsize;
  235: 	dev->si_bsize_best = vn->sc_secsize;
  236: 	if (dev->si_bsize_best < VN_BSIZE_BEST)
  237: 		dev->si_bsize_best = VN_BSIZE_BEST;
  238: 
  239: 	if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
  240: 		return (EACCES);
  241: 
  242: 	IFOPT(vn, VN_FOLLOW)
  243: 		printf("vnopen(%s, 0x%x, 0x%x, %p)\n",
  244: 		    devtoname(dev), flags, mode, (void *)td);
  245: 
  246: 	/*
  247: 	 * Initialize label
  248: 	 */
  249: 
  250: 	IFOPT(vn, VN_LABELS) {
  251: 		if (vn->sc_flags & VNF_INITED) {
  252: 			struct disklabel label;
  253: 
  254: 			/* Build label for whole disk. */
  255: 			bzero(&label, sizeof label);
  256: 			label.d_secsize = vn->sc_secsize;
  257: 			label.d_nsectors = 32;
  258: 			label.d_ntracks = 64 / (vn->sc_secsize / DEV_BSIZE);
  259: 			label.d_secpercyl = label.d_nsectors * label.d_ntracks;
  260: 			label.d_ncylinders = vn->sc_size / label.d_secpercyl;
  261: 			label.d_secperunit = vn->sc_size;
  262: 			label.d_partitions[RAW_PART].p_size = vn->sc_size;
  263: 
  264: 			return (dsopen(dev, mode, 0, &vn->sc_slices, &label));
  265: 		}
  266: 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
  267: 		    dkpart(dev) != RAW_PART ||
  268: 		    mode != S_IFCHR) {
  269: 			return (ENXIO);
  270: 		}
  271: 	}
  272: 	return(0);
  273: }
  274: 
  275: /*
  276:  *	vnstrategy:
  277:  *
  278:  *	Run strategy routine for VN device.  We use VOP_READ/VOP_WRITE calls
  279:  *	for vnode-backed vn's, and the new vm_pager_strategy() call for
  280:  *	vm_object-backed vn's.
  281:  *
  282:  *	Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only.
  283:  *
  284:  *	NOTE: bp->b_blkno is DEV_BSIZE'd.  We must generate bp->b_pblkno for
  285:  *	our uio or vn_pager_strategy() call that is vn->sc_secsize'd
  286:  */
  287: 
  288: static	void
  289: vnstrategy(struct buf *bp)
  290: {
  291: 	int unit;
  292: 	struct vn_softc *vn;
  293: 	int error;
  294: 
  295: 	unit = dkunit(bp->b_dev);
  296: 	vn = bp->b_dev->si_drv1;
  297: 	if (!vn)
  298: 		vn = vnfindvn(bp->b_dev);
  299: 
  300: 	IFOPT(vn, VN_DEBUG)
  301: 		printf("vnstrategy(%p): unit %d\n", bp, unit);
  302: 
  303: 	if ((vn->sc_flags & VNF_INITED) == 0) {
  304: 		bp->b_error = ENXIO;
  305: 		bp->b_flags |= B_ERROR;
  306: 		biodone(bp);
  307: 		return;
  308: 	}
  309: 
  310: 	bp->b_resid = bp->b_bcount;
  311: 
  312: 	IFOPT(vn, VN_LABELS) {
  313: 		if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) {
  314: 			bp->b_flags |= B_INVAL;
  315: 			biodone(bp);
  316: 			return;
  317: 		}
  318: 	} else {
  319: 		int pbn;	/* in sc_secsize chunks */
  320: 		long sz;	/* in sc_secsize chunks */
  321: 
  322: 		/*
  323: 		 * Check for required alignment.  Transfers must be a valid
  324: 		 * multiple of the sector size.
  325: 		 */
  326: 		if (bp->b_bcount % vn->sc_secsize != 0 ||
  327: 		    bp->b_blkno % (vn->sc_secsize / DEV_BSIZE) != 0) {
  328: 			bp->b_error = EINVAL;
  329: 			bp->b_flags |= B_ERROR | B_INVAL;
  330: 			biodone(bp);
  331: 			return;
  332: 		}
  333: 
  334: 		pbn = bp->b_blkno / (vn->sc_secsize / DEV_BSIZE);
  335: 		sz = howmany(bp->b_bcount, vn->sc_secsize);
  336: 
  337: 		/*
  338: 		 * If out of bounds return an error.  If at the EOF point,
  339: 		 * simply read or write less.
  340: 		 */
  341: 		if (pbn < 0 || pbn >= vn->sc_size) {
  342: 			if (pbn != vn->sc_size) {
  343: 				bp->b_error = EINVAL;
  344: 				bp->b_flags |= B_ERROR | B_INVAL;
  345: 			}
  346: 			biodone(bp);
  347: 			return;
  348: 		}
  349: 
  350: 		/*
  351: 		 * If the request crosses EOF, truncate the request.
  352: 		 */
  353: 		if (pbn + sz > vn->sc_size) {
  354: 			bp->b_bcount = (vn->sc_size - pbn) * vn->sc_secsize;
  355: 			bp->b_resid = bp->b_bcount;
  356: 		}
  357: 		bp->b_pblkno = pbn;
  358: 	}
  359: 
  360: 	if (vn->sc_vp && (bp->b_flags & B_FREEBUF)) {
  361: 		/*
  362: 		 * Not handled for vnode-backed element yet.
  363: 		 */
  364: 		biodone(bp);
  365: 	} else if (vn->sc_vp) {
  366: 		/*
  367: 		 * VNODE I/O
  368: 		 *
  369: 		 * If an error occurs, we set B_ERROR but we do not set 
  370: 		 * B_INVAL because (for a write anyway), the buffer is 
  371: 		 * still valid.
  372: 		 */
  373: 		struct uio auio;
  374: 		struct iovec aiov;
  375: 
  376: 		bzero(&auio, sizeof(auio));
  377: 
  378: 		aiov.iov_base = bp->b_data;
  379: 		aiov.iov_len = bp->b_bcount;
  380: 		auio.uio_iov = &aiov;
  381: 		auio.uio_iovcnt = 1;
  382: 		auio.uio_offset = (vm_ooffset_t)bp->b_pblkno * vn->sc_secsize;
  383: 		auio.uio_segflg = UIO_SYSSPACE;
  384: 		if( bp->b_flags & B_READ)
  385: 			auio.uio_rw = UIO_READ;
  386: 		else
  387: 			auio.uio_rw = UIO_WRITE;
  388: 		auio.uio_resid = bp->b_bcount;
  389: 		auio.uio_td = curthread;
  390: 		vn_lock(vn->sc_vp, NULL, LK_EXCLUSIVE | LK_RETRY, curthread);
  391: 		if (bp->b_flags & B_READ)
  392: 			error = VOP_READ(vn->sc_vp, &auio, IO_DIRECT, vn->sc_cred);
  393: 		else
  394: 			error = VOP_WRITE(vn->sc_vp, &auio, IO_NOWDRAIN, vn->sc_cred);
  395: 		VOP_UNLOCK(vn->sc_vp, NULL, 0, curthread);
  396: 		bp->b_resid = auio.uio_resid;
  397: 
  398: 		if (error) {
  399: 			bp->b_error = error;
  400: 			bp->b_flags |= B_ERROR;
  401: 		}
  402: 		biodone(bp);
  403: 	} else if (vn->sc_object) {
  404: 		/*
  405: 		 * OBJT_SWAP I/O
  406: 		 *
  407: 		 * ( handles read, write, freebuf )
  408: 		 *
  409: 		 * Note: if we pre-reserved swap, B_FREEBUF is disabled
  410: 		 */
  411: 		KASSERT((bp->b_bufsize & (vn->sc_secsize - 1)) == 0,
  412: 		    ("vnstrategy: buffer %p too small for physio", bp));
  413: 
  414: 		if ((bp->b_flags & B_FREEBUF) && TESTOPT(vn, VN_RESERVE)) {
  415: 			biodone(bp);
  416: 		} else {
  417: 			vm_pager_strategy(vn->sc_object, bp);
  418: 		}
  419: 	} else {
  420: 		bp->b_flags |= B_ERROR;
  421: 		bp->b_error = EINVAL;
  422: 		biodone(bp);
  423: 	}
  424: }
  425: 
  426: /* ARGSUSED */
  427: static	int
  428: vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
  429: {
  430: 	struct vn_softc *vn;
  431: 	struct vn_ioctl *vio;
  432: 	int error;
  433: 	u_long *f;
  434: 
  435: 	vn = dev->si_drv1;
  436: 	IFOPT(vn,VN_FOLLOW)
  437: 		printf("vnioctl(%s, 0x%lx, %p, 0x%x, %p): unit %d\n",
  438: 		    devtoname(dev), cmd, (void *)data, flag, (void *)td,
  439: 		    dkunit(dev));
  440: 
  441: 	switch (cmd) {
  442: 	case VNIOCATTACH:
  443: 	case VNIOCDETACH:
  444: 	case VNIOCGSET:
  445: 	case VNIOCGCLEAR:
  446: 	case VNIOCUSET:
  447: 	case VNIOCUCLEAR:
  448: 		goto vn_specific;
  449: 	}
  450: 
  451: 	IFOPT(vn,VN_LABELS) {
  452: 		if (vn->sc_slices != NULL) {
  453: 			error = dsioctl(dev, cmd, data, flag, &vn->sc_slices);
  454: 			if (error != ENOIOCTL)
  455: 				return (error);
  456: 		}
  457: 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
  458: 		    dkpart(dev) != RAW_PART)
  459: 			return (ENOTTY);
  460: 	}
  461: 
  462:     vn_specific:
  463: 
  464: 	error = suser(td);
  465: 	if (error)
  466: 		return (error);
  467: 
  468: 	vio = (struct vn_ioctl *)data;
  469: 	f = (u_long*)data;
  470: 	switch (cmd) {
  471: 
  472: 	case VNIOCATTACH:
  473: 		if (vn->sc_flags & VNF_INITED)
  474: 			return(EBUSY);
  475: 
  476: 		if (vio->vn_file == NULL)
  477: 			error = vniocattach_swap(vn, vio, dev, flag, td);
  478: 		else
  479: 			error = vniocattach_file(vn, vio, dev, flag, td);
  480: 		break;
  481: 
  482: 	case VNIOCDETACH:
  483: 		if ((vn->sc_flags & VNF_INITED) == 0)
  484: 			return(ENXIO);
  485: 		/*
  486: 		 * XXX handle i/o in progress.  Return EBUSY, or wait, or
  487: 		 * flush the i/o.
  488: 		 * XXX handle multiple opens of the device.  Return EBUSY,
  489: 		 * or revoke the fd's.
  490: 		 * How are these problems handled for removable and failing
  491: 		 * hardware devices? (Hint: They are not)
  492: 		 */
  493: 		vnclear(vn);
  494: 		IFOPT(vn, VN_FOLLOW)
  495: 			printf("vnioctl: CLRed\n");
  496: 		break;
  497: 
  498: 	case VNIOCGSET:
  499: 		vn_options |= *f;
  500: 		*f = vn_options;
  501: 		break;
  502: 
  503: 	case VNIOCGCLEAR:
  504: 		vn_options &= ~(*f);
  505: 		*f = vn_options;
  506: 		break;
  507: 
  508: 	case VNIOCUSET:
  509: 		vn->sc_options |= *f;
  510: 		*f = vn->sc_options;
  511: 		break;
  512: 
  513: 	case VNIOCUCLEAR:
  514: 		vn->sc_options &= ~(*f);
  515: 		*f = vn->sc_options;
  516: 		break;
  517: 
  518: 	default:
  519: 		error = ENOTTY;
  520: 		break;
  521: 	}
  522: 	return(error);
  523: }
  524: 
  525: /*
  526:  *	vniocattach_file:
  527:  *
  528:  *	Attach a file to a VN partition.  Return the size in the vn_size
  529:  *	field.
  530:  */
  531: 
  532: static int
  533: vniocattach_file(vn, vio, dev, flag, td)
  534: 	struct vn_softc *vn;
  535: 	struct vn_ioctl *vio;
  536: 	dev_t dev;
  537: 	int flag;
  538: 	struct thread *td;
  539: {
  540: 	struct vattr vattr;
  541: 	struct nameidata nd;
  542: 	int error, flags;
  543: 	struct proc *p = td->td_proc;
  544: 
  545: 	KKASSERT(p != NULL);
  546: 
  547: 	flags = FREAD|FWRITE;
  548: 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, vio->vn_file, td);
  549: 	error = vn_open(&nd, flags, 0);
  550: 	if (error) {
  551: 		if (error != EACCES && error != EPERM && error != EROFS)
  552: 			return (error);
  553: 		flags &= ~FWRITE;
  554: 		NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW,
  555: 			UIO_USERSPACE, vio->vn_file, td);
  556: 		error = vn_open(&nd, flags, 0);
  557: 		if (error)
  558: 			return (error);
  559: 	}
  560: 	NDFREE(&nd, NDF_ONLY_PNBUF);
  561: 	if (nd.ni_vp->v_type != VREG ||
  562: 	    (error = VOP_GETATTR(nd.ni_vp, &vattr, td))) {
  563: 		VOP_UNLOCK(nd.ni_vp, NULL, 0, td);
  564: 		(void) vn_close(nd.ni_vp, flags, td);
  565: 		return (error ? error : EINVAL);
  566: 	}
  567: 	VOP_UNLOCK(nd.ni_vp, NULL, 0, td);
  568: 	vn->sc_secsize = DEV_BSIZE;
  569: 	vn->sc_vp = nd.ni_vp;
  570: 
  571: 	/*
  572: 	 * If the size is specified, override the file attributes.  Note that
  573: 	 * the vn_size argument is in PAGE_SIZE sized blocks.
  574: 	 */
  575: 	if (vio->vn_size)
  576: 		vn->sc_size = (quad_t)vio->vn_size * PAGE_SIZE / vn->sc_secsize;
  577: 	else
  578: 		vn->sc_size = vattr.va_size / vn->sc_secsize;
  579: 	error = vnsetcred(vn, p->p_ucred);
  580: 	if (error) {
  581: 		(void) vn_close(nd.ni_vp, flags, td);
  582: 		return(error);
  583: 	}
  584: 	vn->sc_flags |= VNF_INITED;
  585: 	if (flags == FREAD)
  586: 		vn->sc_flags |= VNF_READONLY;
  587: 	IFOPT(vn, VN_LABELS) {
  588: 		/*
  589: 		 * Reopen so that `ds' knows which devices are open.
  590: 		 * If this is the first VNIOCSET, then we've
  591: 		 * guaranteed that the device is the cdev and that
  592: 		 * no other slices or labels are open.  Otherwise,
  593: 		 * we rely on VNIOCCLR not being abused.
  594: 		 */
  595: 		error = vnopen(dev, flag, S_IFCHR, td);
  596: 		if (error)
  597: 			vnclear(vn);
  598: 	}
  599: 	IFOPT(vn, VN_FOLLOW)
  600: 		printf("vnioctl: SET vp %p size %x blks\n",
  601: 		       vn->sc_vp, vn->sc_size);
  602: 	return(0);
  603: }
  604: 
  605: /*
  606:  *	vniocattach_swap:
  607:  *
  608:  *	Attach swap backing store to a VN partition of the size specified
  609:  *	in vn_size.
  610:  */
  611: 
  612: static int
  613: vniocattach_swap(vn, vio, dev, flag, td)
  614: 	struct vn_softc *vn;
  615: 	struct vn_ioctl *vio;
  616: 	dev_t dev;
  617: 	int flag;
  618: 	struct thread *td;
  619: {
  620: 	int error;
  621: 	struct proc *p = td->td_proc;
  622: 
  623: 	KKASSERT(p != NULL);
  624: 	/*
  625: 	 * Range check.  Disallow negative sizes or any size less then the
  626: 	 * size of a page.  Then round to a page.
  627: 	 */
  628: 
  629: 	if (vio->vn_size <= 0)
  630: 		return(EDOM);
  631: 
  632: 	/*
  633: 	 * Allocate an OBJT_SWAP object.
  634: 	 *
  635: 	 * sc_secsize is PAGE_SIZE'd
  636: 	 *
  637: 	 * vio->vn_size is in PAGE_SIZE'd chunks.
  638: 	 * sc_size must be in PAGE_SIZE'd chunks.  
  639: 	 * Note the truncation.
  640: 	 */
  641: 
  642: 	vn->sc_secsize = PAGE_SIZE;
  643: 	vn->sc_size = vio->vn_size;
  644: 	vn->sc_object = 
  645: 	 vm_pager_allocate(OBJT_SWAP, NULL, vn->sc_secsize * (vm_ooffset_t)vio->vn_size, VM_PROT_DEFAULT, 0);
  646: 	IFOPT(vn, VN_RESERVE) {
  647: 		if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) {
  648: 			vm_pager_deallocate(vn->sc_object);
  649: 			vn->sc_object = NULL;
  650: 			return(EDOM);
  651: 		}
  652: 	}
  653: 	vn->sc_flags |= VNF_INITED;
  654: 
  655: 	error = vnsetcred(vn, p->p_ucred);
  656: 	if (error == 0) {
  657: 		IFOPT(vn, VN_LABELS) {
  658: 			/*
  659: 			 * Reopen so that `ds' knows which devices are open.
  660: 			 * If this is the first VNIOCSET, then we've
  661: 			 * guaranteed that the device is the cdev and that
  662: 			 * no other slices or labels are open.  Otherwise,
  663: 			 * we rely on VNIOCCLR not being abused.
  664: 			 */
  665: 			error = vnopen(dev, flag, S_IFCHR, td);
  666: 		}
  667: 	}
  668: 	if (error == 0) {
  669: 		IFOPT(vn, VN_FOLLOW) {
  670: 			printf("vnioctl: SET vp %p size %x\n",
  671: 			       vn->sc_vp, vn->sc_size);
  672: 		}
  673: 	}
  674: 	if (error)
  675: 		vnclear(vn);
  676: 	return(error);
  677: }
  678: 
  679: /*
  680:  * Duplicate the current processes' credentials.  Since we are called only
  681:  * as the result of a SET ioctl and only root can do that, any future access
  682:  * to this "disk" is essentially as root.  Note that credentials may change
  683:  * if some other uid can write directly to the mapped file (NFS).
  684:  */
  685: int
  686: vnsetcred(struct vn_softc *vn, struct ucred *cred)
  687: {
  688: 	char *tmpbuf;
  689: 	int error = 0;
  690: 
  691: 	/*
  692: 	 * Set credits in our softc
  693: 	 */
  694: 
  695: 	if (vn->sc_cred)
  696: 		crfree(vn->sc_cred);
  697: 	vn->sc_cred = crdup(cred);
  698: 
  699: 	/*
  700: 	 * Horrible kludge to establish credentials for NFS  XXX.
  701: 	 */
  702: 
  703: 	if (vn->sc_vp) {
  704: 		struct uio auio;
  705: 		struct iovec aiov;
  706: 
  707: 		tmpbuf = malloc(vn->sc_secsize, M_TEMP, M_WAITOK);
  708: 		bzero(&auio, sizeof(auio));
  709: 
  710: 		aiov.iov_base = tmpbuf;
  711: 		aiov.iov_len = vn->sc_secsize;
  712: 		auio.uio_iov = &aiov;
  713: 		auio.uio_iovcnt = 1;
  714: 		auio.uio_offset = 0;
  715: 		auio.uio_rw = UIO_READ;
  716: 		auio.uio_segflg = UIO_SYSSPACE;
  717: 		auio.uio_resid = aiov.iov_len;
  718: 		vn_lock(vn->sc_vp, NULL, LK_EXCLUSIVE | LK_RETRY, curthread);
  719: 		error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
  720: 		VOP_UNLOCK(vn->sc_vp, NULL, 0, curthread);
  721: 		free(tmpbuf, M_TEMP);
  722: 	}
  723: 	return (error);
  724: }
  725: 
  726: void
  727: vnclear(struct vn_softc *vn)
  728: {
  729: 	struct thread *td = curthread;		/* XXX */
  730: 
  731: 	IFOPT(vn, VN_FOLLOW)
  732: 		printf("vnclear(%p): vp=%p\n", vn, vn->sc_vp);
  733: 	if (vn->sc_slices != NULL)
  734: 		dsgone(&vn->sc_slices);
  735: 	vn->sc_flags &= ~VNF_INITED;
  736: 	if (vn->sc_vp != NULL) {
  737: 		(void)vn_close(vn->sc_vp, vn->sc_flags & VNF_READONLY ?
  738: 		    FREAD : (FREAD|FWRITE), td);
  739: 		vn->sc_vp = NULL;
  740: 	}
  741: 	vn->sc_flags &= ~VNF_READONLY;
  742: 	if (vn->sc_cred) {
  743: 		crfree(vn->sc_cred);
  744: 		vn->sc_cred = NULL;
  745: 	}
  746: 	if (vn->sc_object != NULL) {
  747: 		vm_pager_deallocate(vn->sc_object);
  748: 		vn->sc_object = NULL;
  749: 	}
  750: 	vn->sc_size = 0;
  751: }
  752: 
  753: static	int
  754: vnsize(dev_t dev)
  755: {
  756: 	struct vn_softc *vn;
  757: 
  758: 	vn = dev->si_drv1;
  759: 	if (!vn)
  760: 		return(-1);
  761: 	if ((vn->sc_flags & VNF_INITED) == 0)
  762: 		return(-1);
  763: 
  764: 	return(vn->sc_size);
  765: }
  766: 
  767: static int 
  768: vn_modevent(module_t mod, int type, void *data)
  769: {
  770: 	struct vn_softc *vn;
  771: 	dev_t dev;
  772: 
  773: 	switch (type) {
  774: 	case MOD_LOAD:
  775: 		cdevsw_add(&vn_cdevsw);
  776: 		break;
  777: 
  778: 	case MOD_UNLOAD:
  779: 		/* fall through */
  780: 	case MOD_SHUTDOWN:
  781: 		for (;;) {
  782: 			vn = SLIST_FIRST(&vn_list);
  783: 			if (!vn)
  784: 				break;
  785: 			SLIST_REMOVE_HEAD(&vn_list, sc_list);
  786: 			if (vn->sc_flags & VNF_INITED)
  787: 				vnclear(vn);
  788: 			/* Cleanup all dev_t's that refer to this unit */
  789: 			while ((dev = vn->sc_devlist) != NULL) {
  790: 				vn->sc_devlist = dev->si_drv2;
  791: 				dev->si_drv1 = dev->si_drv2 = NULL;
  792: 				/* If the last one, destroy it. */
  793: 				if (vn->sc_devlist == NULL)
  794: 					destroy_dev(dev);
  795: 			}
  796: 			free(vn, M_DEVBUF);
  797: 		}
  798: 		cdevsw_remove(&vn_cdevsw);
  799: 		break;
  800: 	default:
  801: 		break;
  802: 	}
  803: 	return 0;
  804: }
  805: 
  806: DEV_MODULE(vn, vn_modevent, 0);