File:  [DragonFly] / src / sys / vfs / ufs / ufs_readwrite.c
Revision 1.9: download - view: text, annotated - select for diffs
Sat Jul 26 22:04:27 2003 UTC (11 years, 2 months ago) by rob
Branches: MAIN
CVS tags: HEAD
Register keyword removal

Approved by: Matt Dillon

    1: /*-
    2:  * Copyright (c) 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  * 3. All advertising materials mentioning features or use of this software
   14:  *    must display the following acknowledgement:
   15:  *	This product includes software developed by the University of
   16:  *	California, Berkeley and its contributors.
   17:  * 4. Neither the name of the University nor the names of its contributors
   18:  *    may be used to endorse or promote products derived from this software
   19:  *    without specific prior written permission.
   20:  *
   21:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31:  * SUCH DAMAGE.
   32:  *
   33:  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
   34:  * $FreeBSD: src/sys/ufs/ufs/ufs_readwrite.c,v 1.65.2.14 2003/04/04 22:21:29 tegge Exp $
   35:  * $DragonFly: src/sys/vfs/ufs/ufs_readwrite.c,v 1.9 2003/07/26 22:04:27 rob Exp $
   36:  */
   37: 
   38: #define	BLKSIZE(a, b, c)	blksize(a, b, c)
   39: #define	FS			struct fs
   40: #define	I_FS			i_fs
   41: 
   42: #include <vm/vm.h>
   43: #include <vm/vm_object.h>
   44: #include <vm/vm_pager.h>
   45: #include <vm/vm_map.h>
   46: #include <vm/vnode_pager.h>
   47: #include <sys/event.h>
   48: #include <sys/vmmeter.h>
   49: #include <vm/vm_page2.h>
   50: 
   51: #include "opt_directio.h"
   52: 
   53: #define VN_KNOTE(vp, b) \
   54: 	KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
   55: 
   56: #ifdef DIRECTIO
   57: extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
   58: #endif
   59: 
   60: /*
   61:  * Vnode op for reading.
   62:  */
   63: /* ARGSUSED */
   64: int
   65: ffs_read(ap)
   66: 	struct vop_read_args /* {
   67: 		struct vnode *a_vp;
   68: 		struct uio *a_uio;
   69: 		int a_ioflag;
   70: 		struct ucred *a_cred;
   71: 	} */ *ap;
   72: {
   73: 	struct vnode *vp;
   74: 	struct inode *ip;
   75: 	struct uio *uio;
   76: 	FS *fs;
   77: 	struct buf *bp;
   78: 	ufs_daddr_t lbn, nextlbn;
   79: 	off_t bytesinfile;
   80: 	long size, xfersize, blkoffset;
   81: 	int error, orig_resid;
   82: 	u_short mode;
   83: 	int seqcount;
   84: 	int ioflag;
   85: 	vm_object_t object;
   86: 
   87: 	vp = ap->a_vp;
   88: 	seqcount = ap->a_ioflag >> 16;
   89: 	ip = VTOI(vp);
   90: 	mode = ip->i_mode;
   91: 	uio = ap->a_uio;
   92: 	ioflag = ap->a_ioflag;
   93: #ifdef DIRECTIO
   94: 	if ((ioflag & IO_DIRECT) != 0) {
   95: 		int workdone;
   96: 
   97: 		error = ffs_rawread(vp, uio, &workdone);
   98: 		if (error || workdone)
   99: 			return error;
  100: 	}
  101: #endif
  102: 
  103: #ifdef DIAGNOSTIC
  104: 	if (uio->uio_rw != UIO_READ)
  105: 		panic("ffs_read: mode");
  106: 
  107: 	if (vp->v_type == VLNK) {
  108: 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
  109: 			panic("ffs_read: short symlink");
  110: 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
  111: 		panic("ffs_read: type %d", vp->v_type);
  112: #endif
  113: 	fs = ip->I_FS;
  114: 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
  115: 		return (EFBIG);
  116: 
  117: 	orig_resid = uio->uio_resid;
  118: 	if (orig_resid <= 0)
  119: 		return (0);
  120: 
  121: 	object = vp->v_object;
  122: 
  123: 	bytesinfile = ip->i_size - uio->uio_offset;
  124: 	if (bytesinfile <= 0) {
  125: 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  126: 			ip->i_flag |= IN_ACCESS;
  127: 		return 0;
  128: 	}
  129: 
  130: 	if (object)
  131: 		vm_object_reference(object);
  132: 
  133: #ifdef ENABLE_VFS_IOOPT
  134: 	/*
  135: 	 * If IO optimisation is turned on,
  136: 	 * and we are NOT a VM based IO request, 
  137: 	 * (i.e. not headed for the buffer cache)
  138: 	 * but there IS a vm object associated with it.
  139: 	 */
  140: 	if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
  141: 		int nread, toread;
  142: 
  143: 		toread = uio->uio_resid;
  144: 		if (toread > bytesinfile)
  145: 			toread = bytesinfile;
  146: 		if (toread >= PAGE_SIZE) {
  147: 			/*
  148: 			 * Then if it's at least a page in size, try 
  149: 			 * get the data from the object using vm tricks
  150: 			 */
  151: 			error = uioread(toread, uio, object, &nread);
  152: 			if ((uio->uio_resid == 0) || (error != 0)) {
  153: 				/*
  154: 				 * If we finished or there was an error
  155: 				 * then finish up (the reference previously
  156: 				 * obtained on object must be released).
  157: 				 */
  158: 				if ((error == 0 ||
  159: 				    uio->uio_resid != orig_resid) &&
  160: 				    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  161: 					ip->i_flag |= IN_ACCESS;
  162: 
  163: 				if (object)
  164: 					vm_object_vndeallocate(object);
  165: 				return error;
  166: 			}
  167: 		}
  168: 	}
  169: #endif
  170: 
  171: 	/*
  172: 	 * Ok so we couldn't do it all in one vm trick...
  173: 	 * so cycle around trying smaller bites..
  174: 	 */
  175: 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  176: 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  177: 			break;
  178: #ifdef ENABLE_VFS_IOOPT
  179: 		if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
  180: 			/*
  181: 			 * Obviously we didn't finish above, but we
  182: 			 * didn't get an error either. Try the same trick again.
  183: 			 * but this time we are looping.
  184: 			 */
  185: 			int nread, toread;
  186: 			toread = uio->uio_resid;
  187: 			if (toread > bytesinfile)
  188: 				toread = bytesinfile;
  189: 
  190: 			/*
  191: 			 * Once again, if there isn't enough for a
  192: 			 * whole page, don't try optimising.
  193: 			 */
  194: 			if (toread >= PAGE_SIZE) {
  195: 				error = uioread(toread, uio, object, &nread);
  196: 				if ((uio->uio_resid == 0) || (error != 0)) {
  197: 					/*
  198: 					 * If we finished or there was an 
  199: 					 * error then finish up (the reference
  200: 					 * previously obtained on object must 
  201: 					 * be released).
  202: 					 */
  203: 					if ((error == 0 ||
  204: 					    uio->uio_resid != orig_resid) &&
  205: 					    (vp->v_mount->mnt_flag &
  206: 					    MNT_NOATIME) == 0)
  207: 						ip->i_flag |= IN_ACCESS;
  208: 					if (object)
  209: 						vm_object_vndeallocate(object);
  210: 					return error;
  211: 				}
  212: 				/*
  213: 				 * To get here we didnt't finish or err.
  214: 				 * If we did get some data,
  215: 				 * loop to try another bite.
  216: 				 */
  217: 				if (nread > 0) {
  218: 					continue;
  219: 				}
  220: 			}
  221: 		}
  222: #endif
  223: 
  224: 		lbn = lblkno(fs, uio->uio_offset);
  225: 		nextlbn = lbn + 1;
  226: 
  227: 		/*
  228: 		 * size of buffer.  The buffer representing the
  229: 		 * end of the file is rounded up to the size of
  230: 		 * the block type ( fragment or full block, 
  231: 		 * depending ).
  232: 		 */
  233: 		size = BLKSIZE(fs, ip, lbn);
  234: 		blkoffset = blkoff(fs, uio->uio_offset);
  235: 		
  236: 		/*
  237: 		 * The amount we want to transfer in this iteration is
  238: 		 * one FS block less the amount of the data before
  239: 		 * our startpoint (duh!)
  240: 		 */
  241: 		xfersize = fs->fs_bsize - blkoffset;
  242: 
  243: 		/*
  244: 		 * But if we actually want less than the block,
  245: 		 * or the file doesn't have a whole block more of data,
  246: 		 * then use the lesser number.
  247: 		 */
  248: 		if (uio->uio_resid < xfersize)
  249: 			xfersize = uio->uio_resid;
  250: 		if (bytesinfile < xfersize)
  251: 			xfersize = bytesinfile;
  252: 
  253: 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
  254: 			/*
  255: 			 * Don't do readahead if this is the end of the file.
  256: 			 */
  257: 			error = bread(vp, lbn, size, &bp);
  258: 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
  259: 			/* 
  260: 			 * Otherwise if we are allowed to cluster,
  261: 			 * grab as much as we can.
  262: 			 *
  263: 			 * XXX  This may not be a win if we are not
  264: 			 * doing sequential access.
  265: 			 */
  266: 			error = cluster_read(vp, ip->i_size, lbn,
  267: 				size, uio->uio_resid, seqcount, &bp);
  268: 		} else if (seqcount > 1) {
  269: 			/*
  270: 			 * If we are NOT allowed to cluster, then
  271: 			 * if we appear to be acting sequentially,
  272: 			 * fire off a request for a readahead
  273: 			 * as well as a read. Note that the 4th and 5th
  274: 			 * arguments point to arrays of the size specified in
  275: 			 * the 6th argument.
  276: 			 */
  277: 			int nextsize = BLKSIZE(fs, ip, nextlbn);
  278: 			error = breadn(vp, lbn,
  279: 			    size, &nextlbn, &nextsize, 1, &bp);
  280: 		} else {
  281: 			/*
  282: 			 * Failing all of the above, just read what the 
  283: 			 * user asked for. Interestingly, the same as
  284: 			 * the first option above.
  285: 			 */
  286: 			error = bread(vp, lbn, size, &bp);
  287: 		}
  288: 		if (error) {
  289: 			brelse(bp);
  290: 			bp = NULL;
  291: 			break;
  292: 		}
  293: 
  294: 		/*
  295: 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
  296: 		 * will cause us to attempt to release the buffer later on
  297: 		 * and will cause the buffer cache to attempt to free the
  298: 		 * underlying pages.
  299: 		 */
  300: 		if (ioflag & IO_DIRECT)
  301: 			bp->b_flags |= B_DIRECT;
  302: 
  303: 		/*
  304: 		 * We should only get non-zero b_resid when an I/O error
  305: 		 * has occurred, which should cause us to break above.
  306: 		 * However, if the short read did not cause an error,
  307: 		 * then we want to ensure that we do not uiomove bad
  308: 		 * or uninitialized data.
  309: 		 *
  310: 		 * XXX b_resid is only valid when an actual I/O has occured
  311: 		 * and may be incorrect if the buffer is B_CACHE or if the
  312: 		 * last op on the buffer was a failed write.  This KASSERT
  313: 		 * is a precursor to removing it from the UFS code.
  314: 		 */
  315: 		KASSERT(bp->b_resid == 0, ("bp->b_resid != 0"));
  316: 		size -= bp->b_resid;
  317: 		if (size < xfersize) {
  318: 			if (size == 0)
  319: 				break;
  320: 			xfersize = size;
  321: 		}
  322: 
  323: #ifdef ENABLE_VFS_IOOPT
  324: 		if (vfs_ioopt && object &&
  325: 		    (bp->b_flags & B_VMIO) &&
  326: 		    ((blkoffset & PAGE_MASK) == 0) &&
  327: 		    ((xfersize & PAGE_MASK) == 0)) {
  328: 			/*
  329: 			 * If VFS IO  optimisation is turned on,
  330: 			 * and it's an exact page multiple
  331: 			 * And a normal VM based op,
  332: 			 * then use uiomiveco()
  333: 			 */
  334: 			error =
  335: 				uiomoveco((char *)bp->b_data + blkoffset,
  336: 					(int)xfersize, uio, object);
  337: 		} else 
  338: #endif
  339: 		{
  340: 			/*
  341: 			 * otherwise use the general form
  342: 			 */
  343: 			error =
  344: 				uiomove((char *)bp->b_data + blkoffset,
  345: 					(int)xfersize, uio);
  346: 		}
  347: 
  348: 		if (error)
  349: 			break;
  350: 
  351: 		if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  352: 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
  353: 			/*
  354: 			 * If there are no dependencies, and it's VMIO,
  355: 			 * then we don't need the buf, mark it available
  356: 			 * for freeing. The VM has the data.
  357: 			 */
  358: 			bp->b_flags |= B_RELBUF;
  359: 			brelse(bp);
  360: 		} else {
  361: 			/*
  362: 			 * Otherwise let whoever
  363: 			 * made the request take care of
  364: 			 * freeing it. We just queue
  365: 			 * it onto another list.
  366: 			 */
  367: 			bqrelse(bp);
  368: 		}
  369: 	}
  370: 
  371: 	/* 
  372: 	 * This can only happen in the case of an error
  373: 	 * because the loop above resets bp to NULL on each iteration
  374: 	 * and on normal completion has not set a new value into it.
  375: 	 * so it must have come from a 'break' statement
  376: 	 */
  377: 	if (bp != NULL) {
  378: 		if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  379: 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
  380: 			bp->b_flags |= B_RELBUF;
  381: 			brelse(bp);
  382: 		} else {
  383: 			bqrelse(bp);
  384: 		}
  385: 	}
  386: 
  387: 	if (object)
  388: 		vm_object_vndeallocate(object);
  389: 	if ((error == 0 || uio->uio_resid != orig_resid) &&
  390: 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  391: 		ip->i_flag |= IN_ACCESS;
  392: 	return (error);
  393: }
  394: 
  395: /*
  396:  * Vnode op for writing.
  397:  */
  398: int
  399: ffs_write(ap)
  400: 	struct vop_write_args /* {
  401: 		struct vnode *a_vp;
  402: 		struct uio *a_uio;
  403: 		int a_ioflag;
  404: 		struct ucred *a_cred;
  405: 	} */ *ap;
  406: {
  407: 	struct vnode *vp;
  408: 	struct uio *uio;
  409: 	struct inode *ip;
  410: 	FS *fs;
  411: 	struct buf *bp;
  412: 	ufs_daddr_t lbn;
  413: 	off_t osize;
  414: 	int seqcount;
  415: 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
  416: 	vm_object_t object;
  417: 	struct thread *td;
  418: 
  419: 	extended = 0;
  420: 	seqcount = ap->a_ioflag >> 16;
  421: 	ioflag = ap->a_ioflag;
  422: 	uio = ap->a_uio;
  423: 	vp = ap->a_vp;
  424: 	ip = VTOI(vp);
  425: 
  426: 	object = vp->v_object;
  427: 	if (object)
  428: 		vm_object_reference(object);
  429: 
  430: #ifdef DIAGNOSTIC
  431: 	if (uio->uio_rw != UIO_WRITE)
  432: 		panic("ffs_write: mode");
  433: #endif
  434: 
  435: 	switch (vp->v_type) {
  436: 	case VREG:
  437: 		if (ioflag & IO_APPEND)
  438: 			uio->uio_offset = ip->i_size;
  439: 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
  440: 			if (object)
  441: 				vm_object_vndeallocate(object);
  442: 			return (EPERM);
  443: 		}
  444: 		/* FALLTHROUGH */
  445: 	case VLNK:
  446: 		break;
  447: 	case VDIR:
  448: 		panic("ffs_write: dir write");
  449: 		break;
  450: 	default:
  451: 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
  452: 			(int)uio->uio_offset,
  453: 			(int)uio->uio_resid
  454: 		);
  455: 	}
  456: 
  457: 	fs = ip->I_FS;
  458: 	if (uio->uio_offset < 0 ||
  459: 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
  460: 		if (object)
  461: 			vm_object_vndeallocate(object);
  462: 		return (EFBIG);
  463: 	}
  464: 	/*
  465: 	 * Maybe this should be above the vnode op call, but so long as
  466: 	 * file servers have no limits, I don't think it matters.
  467: 	 */
  468: 	td = uio->uio_td;
  469: 	if (vp->v_type == VREG && td && td->td_proc &&
  470: 	    uio->uio_offset + uio->uio_resid >
  471: 	    td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
  472: 		psignal(td->td_proc, SIGXFSZ);
  473: 		if (object)
  474: 			vm_object_vndeallocate(object);
  475: 		return (EFBIG);
  476: 	}
  477: 
  478: 	resid = uio->uio_resid;
  479: 	osize = ip->i_size;
  480: 
  481: 	/*
  482: 	 * NOTE! These B_ flags are actually balloc-only flags, not buffer
  483: 	 * flags.  They are similar to the BA_ flags in -current.
  484: 	 */
  485: 	if (seqcount > B_SEQMAX)
  486: 		flags = B_SEQMAX << B_SEQSHIFT;
  487: 	else
  488: 		flags = seqcount << B_SEQSHIFT;
  489: 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  490: 		flags |= B_SYNC;
  491: 
  492: 	if (object && (object->flags & OBJ_OPT)) {
  493: 		vm_freeze_copyopts(object,
  494: 			OFF_TO_IDX(uio->uio_offset),
  495: 			OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
  496: 	}
  497: 
  498: 	for (error = 0; uio->uio_resid > 0;) {
  499: 		lbn = lblkno(fs, uio->uio_offset);
  500: 		blkoffset = blkoff(fs, uio->uio_offset);
  501: 		xfersize = fs->fs_bsize - blkoffset;
  502: 		if (uio->uio_resid < xfersize)
  503: 			xfersize = uio->uio_resid;
  504: 
  505: 		if (uio->uio_offset + xfersize > ip->i_size)
  506: 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  507: 
  508: 		/*      
  509: 		 * We must perform a read-before-write if the transfer
  510: 		 * size does not cover the entire buffer.
  511: 		 */
  512: 		if (fs->fs_bsize > xfersize)
  513: 			flags |= B_CLRBUF;
  514: 		else
  515: 			flags &= ~B_CLRBUF;
  516: /* XXX is uio->uio_offset the right thing here? */
  517: 		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
  518: 		    ap->a_cred, flags, &bp);
  519: 		if (error != 0)
  520: 			break;
  521: 		/*
  522: 		 * If the buffer is not valid and we did not clear garbage
  523: 		 * out above, we have to do so here even though the write
  524: 		 * covers the entire buffer in order to avoid a mmap()/write
  525: 		 * race where another process may see the garbage prior to
  526: 		 * the uiomove() for a write replacing it.
  527: 		 */
  528: 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
  529: 			vfs_bio_clrbuf(bp);
  530: 		if (ioflag & IO_DIRECT)
  531: 			bp->b_flags |= B_DIRECT;
  532: 		if (ioflag & IO_NOWDRAIN)
  533: 			bp->b_flags |= B_NOWDRAIN;
  534: 
  535: 		if (uio->uio_offset + xfersize > ip->i_size) {
  536: 			ip->i_size = uio->uio_offset + xfersize;
  537: 			extended = 1;
  538: 		}
  539: 
  540: 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
  541: 		if (size < xfersize)
  542: 			xfersize = size;
  543: 
  544: 		error =
  545: 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
  546: 		if ((ioflag & (IO_VMIO|IO_DIRECT)) && 
  547: 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
  548: 			bp->b_flags |= B_RELBUF;
  549: 		}
  550: 
  551: 		/*
  552: 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
  553: 		 * if we have a severe page deficiency write the buffer 
  554: 		 * asynchronously.  Otherwise try to cluster, and if that
  555: 		 * doesn't do it then either do an async write (if O_DIRECT),
  556: 		 * or a delayed write (if not).
  557: 		 */
  558: 
  559: 		if (ioflag & IO_SYNC) {
  560: 			(void)bwrite(bp);
  561: 		} else if (vm_page_count_severe() || 
  562: 			    buf_dirty_count_severe() ||
  563: 			    (ioflag & IO_ASYNC)) {
  564: 			bp->b_flags |= B_CLUSTEROK;
  565: 			bawrite(bp);
  566: 		} else if (xfersize + blkoffset == fs->fs_bsize) {
  567: 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  568: 				bp->b_flags |= B_CLUSTEROK;
  569: 				cluster_write(bp, ip->i_size, seqcount);
  570: 			} else {
  571: 				bawrite(bp);
  572: 			}
  573: 		} else if (ioflag & IO_DIRECT) {
  574: 			bp->b_flags |= B_CLUSTEROK;
  575: 			bawrite(bp);
  576: 		} else {
  577: 			bp->b_flags |= B_CLUSTEROK;
  578: 			bdwrite(bp);
  579: 		}
  580: 		if (error || xfersize == 0)
  581: 			break;
  582: 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
  583: 	}
  584: 	/*
  585: 	 * If we successfully wrote any data, and we are not the superuser
  586: 	 * we clear the setuid and setgid bits as a precaution against
  587: 	 * tampering.
  588: 	 */
  589: 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
  590: 		ip->i_mode &= ~(ISUID | ISGID);
  591: 	if (resid > uio->uio_resid)
  592: 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
  593: 	if (error) {
  594: 		if (ioflag & IO_UNIT) {
  595: 			(void)UFS_TRUNCATE(vp, osize,
  596: 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
  597: 			uio->uio_offset -= resid - uio->uio_resid;
  598: 			uio->uio_resid = resid;
  599: 		}
  600: 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
  601: 		error = UFS_UPDATE(vp, 1);
  602: 
  603: 	if (object)
  604: 		vm_object_vndeallocate(object);
  605: 
  606: 	return (error);
  607: }
  608: 
  609: 
  610: /*
  611:  * get page routine
  612:  */
  613: int
  614: ffs_getpages(ap)
  615: 	struct vop_getpages_args *ap;
  616: {
  617: 	off_t foff, physoffset;
  618: 	int i, size, bsize;
  619: 	struct vnode *dp, *vp;
  620: 	vm_object_t obj;
  621: 	vm_pindex_t pindex, firstindex;
  622: 	vm_page_t mreq;
  623: 	int bbackwards, bforwards;
  624: 	int pbackwards, pforwards;
  625: 	int firstpage;
  626: 	int reqlblkno;
  627: 	daddr_t reqblkno;
  628: 	int poff;
  629: 	int pcount;
  630: 	int rtval;
  631: 	int pagesperblock;
  632: 
  633: 
  634: 	pcount = round_page(ap->a_count) / PAGE_SIZE;
  635: 	mreq = ap->a_m[ap->a_reqpage];
  636: 	firstindex = ap->a_m[0]->pindex;
  637: 
  638: 	/*
  639: 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
  640: 	 * then the entire page is valid.  Since the page may be mapped,
  641: 	 * user programs might reference data beyond the actual end of file
  642: 	 * occuring within the page.  We have to zero that data.
  643: 	 */
  644: 	if (mreq->valid) {
  645: 		if (mreq->valid != VM_PAGE_BITS_ALL)
  646: 			vm_page_zero_invalid(mreq, TRUE);
  647: 		for (i = 0; i < pcount; i++) {
  648: 			if (i != ap->a_reqpage) {
  649: 				vm_page_free(ap->a_m[i]);
  650: 			}
  651: 		}
  652: 		return VM_PAGER_OK;
  653: 	}
  654: 
  655: 	vp = ap->a_vp;
  656: 	obj = vp->v_object;
  657: 	bsize = vp->v_mount->mnt_stat.f_iosize;
  658: 	pindex = mreq->pindex;
  659: 	foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
  660: 
  661: 	if (bsize < PAGE_SIZE)
  662: 		return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
  663: 						    ap->a_count,
  664: 						    ap->a_reqpage);
  665: 
  666: 	/*
  667: 	 * foff is the file offset of the required page
  668: 	 * reqlblkno is the logical block that contains the page
  669: 	 * poff is the index of the page into the logical block
  670: 	 */
  671: 	reqlblkno = foff / bsize;
  672: 	poff = (foff % bsize) / PAGE_SIZE;
  673: 
  674: 	if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno,
  675: 		&bforwards, &bbackwards) || (reqblkno == -1)) {
  676: 		for(i = 0; i < pcount; i++) {
  677: 			if (i != ap->a_reqpage)
  678: 				vm_page_free(ap->a_m[i]);
  679: 		}
  680: 		if (reqblkno == -1) {
  681: 			if ((mreq->flags & PG_ZERO) == 0)
  682: 				vm_page_zero_fill(mreq);
  683: 			vm_page_undirty(mreq);
  684: 			mreq->valid = VM_PAGE_BITS_ALL;
  685: 			return VM_PAGER_OK;
  686: 		} else {
  687: 			return VM_PAGER_ERROR;
  688: 		}
  689: 	}
  690: 
  691: 	physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
  692: 	pagesperblock = bsize / PAGE_SIZE;
  693: 	/*
  694: 	 * find the first page that is contiguous...
  695: 	 * note that pbackwards is the number of pages that are contiguous
  696: 	 * backwards.
  697: 	 */
  698: 	firstpage = 0;
  699: 	if (ap->a_count) {
  700: 		pbackwards = poff + bbackwards * pagesperblock;
  701: 		if (ap->a_reqpage > pbackwards) {
  702: 			firstpage = ap->a_reqpage - pbackwards;
  703: 			for(i=0;i<firstpage;i++)
  704: 				vm_page_free(ap->a_m[i]);
  705: 		}
  706: 
  707: 	/*
  708: 	 * pforwards is the number of pages that are contiguous
  709: 	 * after the current page.
  710: 	 */
  711: 		pforwards = (pagesperblock - (poff + 1)) +
  712: 			bforwards * pagesperblock;
  713: 		if (pforwards < (pcount - (ap->a_reqpage + 1))) {
  714: 			for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
  715: 				vm_page_free(ap->a_m[i]);
  716: 			pcount = ap->a_reqpage + pforwards + 1;
  717: 		}
  718: 
  719: 	/*
  720: 	 * number of pages for I/O corrected for the non-contig pages at
  721: 	 * the beginning of the array.
  722: 	 */
  723: 		pcount -= firstpage;
  724: 	}
  725: 
  726: 	/*
  727: 	 * calculate the size of the transfer
  728: 	 */
  729: 
  730: 	size = pcount * PAGE_SIZE;
  731: 
  732: 	if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
  733: 		obj->un_pager.vnp.vnp_size)
  734: 		size = obj->un_pager.vnp.vnp_size -
  735: 			IDX_TO_OFF(ap->a_m[firstpage]->pindex);
  736: 
  737: 	physoffset -= foff;
  738: 	rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
  739: 		(ap->a_reqpage - firstpage), physoffset);
  740: 
  741: 	return (rtval);
  742: }
  743: 
  744: /*
  745:  * put page routine
  746:  *
  747:  * XXX By default, wimp out... note that a_offset is ignored (and always
  748:  * XXX has been).
  749:  */
  750: int
  751: ffs_putpages(ap)
  752: 	struct vop_putpages_args *ap;
  753: {
  754: 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
  755: 		ap->a_sync, ap->a_rtvals);
  756: }