File:  [DragonFly] / src / sys / sys / buf.h
Revision 1.7: download - view: text, annotated - select for diffs
Wed Aug 20 07:31:21 2003 UTC (11 years, 3 months ago) by rob
Branches: MAIN
CVS tags: HEAD
__P() != wanted, begin removal, in order to preserve white space this needs
to be done by hand, as I accidently killed a source tree that I had gotten
this far on. I'm committing this now, LINT and GENERIC both build with
these changes, there are many more to come.

    1: /*
    2:  * Copyright (c) 1982, 1986, 1989, 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  * (c) UNIX System Laboratories, Inc.
    5:  * All or some portions of this file are derived from material licensed
    6:  * to the University of California by American Telephone and Telegraph
    7:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8:  * the permission of UNIX System Laboratories, Inc.
    9:  *
   10:  * Redistribution and use in source and binary forms, with or without
   11:  * modification, are permitted provided that the following conditions
   12:  * are met:
   13:  * 1. Redistributions of source code must retain the above copyright
   14:  *    notice, this list of conditions and the following disclaimer.
   15:  * 2. Redistributions in binary form must reproduce the above copyright
   16:  *    notice, this list of conditions and the following disclaimer in the
   17:  *    documentation and/or other materials provided with the distribution.
   18:  * 3. All advertising materials mentioning features or use of this software
   19:  *    must display the following acknowledgement:
   20:  *	This product includes software developed by the University of
   21:  *	California, Berkeley and its contributors.
   22:  * 4. Neither the name of the University nor the names of its contributors
   23:  *    may be used to endorse or promote products derived from this software
   24:  *    without specific prior written permission.
   25:  *
   26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36:  * SUCH DAMAGE.
   37:  *
   38:  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
   39:  * $FreeBSD: src/sys/sys/buf.h,v 1.88.2.10 2003/01/25 19:02:23 dillon Exp $
   40:  * $DragonFly: src/sys/sys/buf.h,v 1.7 2003/08/20 07:31:21 rob Exp $
   41:  */
   42: 
   43: #ifndef _SYS_BUF_H_
   44: #define	_SYS_BUF_H_
   45: 
   46: #ifndef _SYS_QUEUE_H_
   47: #include <sys/queue.h>
   48: #endif
   49: #ifndef _SYS_LOCK_H_
   50: #include <sys/lock.h>
   51: #endif
   52: #ifndef _SYS_DEVICE_H_
   53: #include <sys/device.h>
   54: #endif
   55: 
   56: struct buf;
   57: struct mount;
   58: struct vnode;
   59: 
   60: /*
   61:  * To avoid including <ufs/ffs/softdep.h> 
   62:  */   
   63: LIST_HEAD(workhead, worklist);
   64: /*
   65:  * These are currently used only by the soft dependency code, hence
   66:  * are stored once in a global variable. If other subsystems wanted
   67:  * to use these hooks, a pointer to a set of bio_ops could be added
   68:  * to each buffer.
   69:  */
   70: extern struct bio_ops {
   71: 	void	(*io_start) (struct buf *);
   72: 	void	(*io_complete) (struct buf *);
   73: 	void	(*io_deallocate) (struct buf *);
   74: 	int	(*io_fsync) (struct vnode *);
   75: 	int	(*io_sync) (struct mount *);
   76: 	void	(*io_movedeps) (struct buf *, struct buf *);
   77: 	int	(*io_countdeps) (struct buf *, int);
   78: } bioops;
   79: 
   80: struct iodone_chain {
   81: 	long	ic_prev_flags;
   82: 	void	(*ic_prev_iodone) (struct buf *);
   83: 	void	*ic_prev_iodone_chain;
   84: 	struct {
   85: 		long	ia_long;
   86: 		void	*ia_ptr;
   87: 	}	ic_args[5];
   88: };
   89: 
   90: /*
   91:  * The buffer header describes an I/O operation in the kernel.
   92:  *
   93:  * NOTES:
   94:  *	b_bufsize, b_bcount.  b_bufsize is the allocation size of the
   95:  *	buffer, either DEV_BSIZE or PAGE_SIZE aligned.  b_bcount is the
   96:  *	originally requested buffer size and can serve as a bounds check
   97:  *	against EOF.  For most, but not all uses, b_bcount == b_bufsize.
   98:  *
   99:  *	b_dirtyoff, b_dirtyend.  Buffers support piecemeal, unaligned
  100:  *	ranges of dirty data that need to be written to backing store.
  101:  *	The range is typically clipped at b_bcount ( not b_bufsize ).
  102:  *
  103:  *	b_resid.  Number of bytes remaining in I/O.  After an I/O operation
  104:  *	completes, b_resid is usually 0 indicating 100% success.
  105:  */
  106: struct buf {
  107: 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
  108: 	TAILQ_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
  109: 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
  110: 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
  111: 	long	b_flags;		/* B_* flags. */
  112: 	unsigned short b_qindex;	/* buffer queue index */
  113: 	unsigned char b_xflags;		/* extra flags */
  114: 	struct lock b_lock;		/* Buffer lock */
  115: 	int	b_error;		/* Errno value. */
  116: 	long	b_bufsize;		/* Allocated buffer size. */
  117: 	long	b_runningbufspace;	/* when I/O is running, pipelining */
  118: 	long	b_bcount;		/* Valid bytes in buffer. */
  119: 	long	b_resid;		/* Remaining I/O. */
  120: 	dev_t	b_dev;			/* Device associated with buffer. */
  121: 	caddr_t	b_data;			/* Memory, superblocks, indirect etc. */
  122: 	caddr_t	b_kvabase;		/* base kva for buffer */
  123: 	int	b_kvasize;		/* size of kva for buffer */
  124: 	daddr_t	b_lblkno;		/* Logical block number. */
  125: 	daddr_t	b_blkno;		/* Underlying physical block number. */
  126: 	off_t	b_offset;		/* Offset into file */
  127: 					/* Function to call upon completion. */
  128: 	void	(*b_iodone) (struct buf *);
  129: 					/* For nested b_iodone's. */
  130: 	struct	iodone_chain *b_iodone_chain;
  131: 	struct	vnode *b_vp;		/* Device vnode. */
  132: 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
  133: 	int	b_dirtyend;		/* Offset of end of dirty region. */
  134: 	daddr_t	b_pblkno;               /* physical block number */
  135: 	void	*b_saveaddr;		/* Original b_addr for physio. */
  136: 	void	*b_driver1;		/* for private use by the driver */
  137: 	void	*b_driver2;		/* for private use by the driver */
  138: 	void	*b_caller1;		/* for private use by the caller */
  139: 	void	*b_caller2;		/* for private use by the caller */
  140: 	union	pager_info {
  141: 		void	*pg_spc;
  142: 		int	pg_reqpage;
  143: 	} b_pager;
  144: 	union	cluster_info {
  145: 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
  146: 		TAILQ_ENTRY(buf) cluster_entry;
  147: 	} b_cluster;
  148: 	struct	vm_page *b_pages[btoc(MAXPHYS)];
  149: 	int		b_npages;
  150: 	struct	workhead b_dep;		/* List of filesystem dependencies. */
  151: 	struct chain_info {		/* buffer chaining */
  152: 		struct buf *parent;
  153: 		int count;
  154: 	} b_chain;
  155: };
  156: 
  157: #define b_spc	b_pager.pg_spc
  158: 
  159: /*
  160:  * These flags are kept in b_flags.
  161:  *
  162:  * Notes:
  163:  *
  164:  *	B_ASYNC		VOP calls on bp's are usually async whether or not
  165:  *			B_ASYNC is set, but some subsystems, such as NFS, like 
  166:  *			to know what is best for the caller so they can
  167:  *			optimize the I/O.
  168:  *
  169:  *	B_PAGING	Indicates that bp is being used by the paging system or
  170:  *			some paging system and that the bp is not linked into
  171:  *			the b_vp's clean/dirty linked lists or ref counts.
  172:  *			Buffer vp reassignments are illegal in this case.
  173:  *
  174:  *	B_CACHE		This may only be set if the buffer is entirely valid.
  175:  *			The situation where B_DELWRI is set and B_CACHE is
  176:  *			clear MUST be committed to disk by getblk() so 
  177:  *			B_DELWRI can also be cleared.  See the comments for
  178:  *			getblk() in kern/vfs_bio.c.  If B_CACHE is clear,
  179:  *			the caller is expected to clear B_ERROR|B_INVAL,
  180:  *			set B_READ, and initiate an I/O.
  181:  *
  182:  *			The 'entire buffer' is defined to be the range from
  183:  *			0 through b_bcount.
  184:  *
  185:  *	B_MALLOC	Request that the buffer be allocated from the malloc
  186:  *			pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
  187:  *
  188:  *	B_CLUSTEROK	This flag is typically set for B_DELWRI buffers
  189:  *			by filesystems that allow clustering when the buffer
  190:  *			is fully dirty and indicates that it may be clustered
  191:  *			with other adjacent dirty buffers.  Note the clustering
  192:  *			may not be used with the stage 1 data write under NFS
  193:  *			but may be used for the commit rpc portion.
  194:  *
  195:  *	B_VMIO		Indicates that the buffer is tied into an VM object.
  196:  *			The buffer's data is always PAGE_SIZE aligned even
  197:  *			if b_bufsize and b_bcount are not.  ( b_bufsize is 
  198:  *			always at least DEV_BSIZE aligned, though ).
  199:  *	
  200:  *	B_DIRECT	Hint that we should attempt to completely free
  201:  *			the pages underlying the buffer.   B_DIRECT is 
  202:  *			sticky until the buffer is released and typically
  203:  *			only has an effect when B_RELBUF is also set.
  204:  *
  205:  *	B_NOWDRAIN	This flag should be set when a device (like VN)
  206:  *			does a turn-around VOP_WRITE from its strategy
  207:  *			routine.  This flag prevents bwrite() from blocking
  208:  *			in wdrain, avoiding a deadlock situation.
  209:  */
  210: 
  211: #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
  212: #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
  213: #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
  214: #define	B_DIRECT	0x00000008	/* direct I/O flag (pls free vmio) */
  215: #define	B_DEFERRED	0x00000010	/* Skipped over for cleaning */
  216: #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
  217: #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
  218: #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
  219: #define	B_FREEBUF	0x00000100	/* Instruct driver: free blocks */
  220: #define	B_DONE		0x00000200	/* I/O completed. */
  221: #define	B_EINTR		0x00000400	/* I/O was interrupted */
  222: #define	B_ERROR		0x00000800	/* I/O error occurred. */
  223: #define	B_SCANNED	0x00001000	/* VOP_FSYNC funcs mark written bufs */
  224: #define	B_INVAL		0x00002000	/* Does not contain valid info. */
  225: #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
  226: #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
  227: #define	B_MALLOC	0x00010000	/* malloced b_data */
  228: #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
  229: #define	B_PHYS		0x00040000	/* I/O to user memory. */
  230: #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
  231: #define	B_READ		0x00100000	/* Read buffer. */
  232: #define	B_DIRTY		0x00200000	/* Needs writing later. */
  233: #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
  234: #define	B_WANT		0x00800000	/* Used by vm_pager.c */
  235: #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
  236: #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
  237: #define	B_XXX		0x02000000	/* Debugging flag. */
  238: #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
  239: #define	B_ORDERED	0x08000000	/* Must guarantee I/O ordering */
  240: #define B_RAM		0x10000000	/* Read ahead mark (flag) */
  241: #define B_VMIO		0x20000000	/* VMIO flag */
  242: #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
  243: #define B_NOWDRAIN	0x80000000	/* Avoid wdrain deadlock */
  244: 
  245: #define PRINT_BUF_FLAGS "\20\40nowdrain\37cluster\36vmio\35ram\34ordered" \
  246: 	"\33paging\32xxx\31writeinprog\30want\27relbuf\26dirty" \
  247: 	"\25read\24raw\23phys\22clusterok\21malloc\20nocache" \
  248: 	"\17locked\16inval\15scanned\14error\13eintr\12done\11freebuf" \
  249: 	"\10delwri\7call\6cache\4direct\3async\2needcommit\1age"
  250: 
  251: /*
  252:  * These flags are kept in b_xflags.
  253:  */
  254: #define	BX_VNDIRTY	0x00000001	/* On vnode dirty list */
  255: #define	BX_VNCLEAN	0x00000002	/* On vnode clean list */
  256: #define	BX_BKGRDWRITE	0x00000004	/* Do writes in background */
  257: #define	BX_BKGRDINPROG	0x00000008	/* Background write in progress */
  258: #define	BX_BKGRDWAIT	0x00000010	/* Background write waiting */
  259: #define BX_AUTOCHAINDONE 0x00000020	/* pager I/O chain auto mode */
  260: 
  261: #define	NOOFFSET	(-1LL)		/* No buffer offset calculated yet */
  262: 
  263: #ifdef _KERNEL
  264: /*
  265:  * Buffer locking.  See sys/buf2.h for inline functions.
  266:  */
  267: struct lwkt_token buftimetoken;		/* Interlock on setting prio and timo */
  268: extern char *buf_wmesg;			/* Default buffer lock message */
  269: #define BUF_WMESG "bufwait"
  270: 
  271: #endif /* _KERNEL */
  272: 
  273: struct buf_queue_head {
  274: 	TAILQ_HEAD(buf_queue, buf) queue;
  275: 	daddr_t	last_pblkno;
  276: 	struct	buf *insert_point;
  277: 	struct	buf *switch_point;
  278: };
  279: 
  280: /*
  281:  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
  282:  * field of the buffer on which I/O is done.  At I/O completion, cluster
  283:  * callback uses the structure to parcel I/O's to individual buffers, and
  284:  * then free's this structure.
  285:  */
  286: struct cluster_save {
  287: 	long	bs_bcount;		/* Saved b_bcount. */
  288: 	long	bs_bufsize;		/* Saved b_bufsize. */
  289: 	void	*bs_saveaddr;		/* Saved b_addr. */
  290: 	int	bs_nchildren;		/* Number of associated buffers. */
  291: 	struct buf **bs_children;	/* List of associated buffers. */
  292: };
  293: 
  294: /*
  295:  * Definitions for the buffer free lists.
  296:  */
  297: #define BUFFER_QUEUES	6	/* number of free buffer queues */
  298: 
  299: #define QUEUE_NONE	0	/* on no queue */
  300: #define QUEUE_LOCKED	1	/* locked buffers */
  301: #define QUEUE_CLEAN	2	/* non-B_DELWRI buffers */
  302: #define QUEUE_DIRTY	3	/* B_DELWRI buffers */
  303: #define QUEUE_EMPTYKVA	4	/* empty buffer headers w/KVA assignment */
  304: #define QUEUE_EMPTY	5	/* empty buffer headers */
  305: 
  306: /*
  307:  * Zero out the buffer's data area.
  308:  */
  309: #define	clrbuf(bp) {							\
  310: 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
  311: 	(bp)->b_resid = 0;						\
  312: }
  313: 
  314: /*
  315:  * Flags to low-level bitmap allocation routines (balloc).
  316:  *
  317:  * Note: sequential_heuristic() in kern/vfs_vnops.c limits the count
  318:  * to 127.
  319:  */
  320: #define B_SEQMASK	0x7F000000	/* Sequential heuristic mask. */
  321: #define B_SEQSHIFT	24		/* Sequential heuristic shift. */
  322: #define B_SEQMAX	0x7F
  323: #define B_CLRBUF	0x01		/* Cleared invalid areas of buffer. */
  324: #define B_SYNC		0x02		/* Do all allocations synchronously. */
  325: 
  326: #ifdef _KERNEL
  327: extern int	nbuf;			/* The number of buffer headers */
  328: extern int	maxswzone;		/* Max KVA for swap structures */
  329: extern int	maxbcache;		/* Max KVA for buffer cache */
  330: extern int	runningbufspace;
  331: extern int      buf_maxio;              /* nominal maximum I/O for buffer */
  332: extern struct	buf *buf;		/* The buffer headers. */
  333: extern char	*buffers;		/* The buffer contents. */
  334: extern int	bufpages;		/* Number of memory pages in the buffer pool. */
  335: extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
  336: extern int	nswbuf;			/* Number of swap I/O buffer headers. */
  337: extern TAILQ_HEAD(swqueue, buf) bswlist;
  338: extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
  339: 
  340: struct uio;
  341: 
  342: caddr_t bufhashinit (caddr_t);
  343: void	bufinit (void);
  344: void	bwillwrite (void);
  345: int	buf_dirty_count_severe (void);
  346: void	bremfree (struct buf *);
  347: int	bread (struct vnode *, daddr_t, int, struct buf **);
  348: int	breadn (struct vnode *, daddr_t, int, daddr_t *, int *, int,
  349: 	    struct buf **);
  350: int	bwrite (struct buf *);
  351: void	bdwrite (struct buf *);
  352: void	bawrite (struct buf *);
  353: void	bdirty (struct buf *);
  354: void	bundirty (struct buf *);
  355: int	bowrite (struct buf *);
  356: void	brelse (struct buf *);
  357: void	bqrelse (struct buf *);
  358: int	vfs_bio_awrite (struct buf *);
  359: struct buf *     getpbuf (int *);
  360: struct buf *incore (struct vnode *, daddr_t);
  361: struct buf *gbincore (struct vnode *, daddr_t);
  362: int	inmem (struct vnode *, daddr_t);
  363: struct buf *getblk (struct vnode *, daddr_t, int, int, int);
  364: struct buf *geteblk (int);
  365: int	biowait (struct buf *);
  366: void	biodone (struct buf *);
  367: 
  368: void	cluster_callback (struct buf *);
  369: int	cluster_read (struct vnode *, u_quad_t, daddr_t, long,
  370: 	    long, int, struct buf **);
  371: int	cluster_wbuild (struct vnode *, long, daddr_t, int);
  372: void	cluster_write (struct buf *, u_quad_t, int);
  373: int	physio (dev_t dev, struct uio *uio, int ioflag);
  374: #define physread physio
  375: #define physwrite physio
  376: void	vfs_bio_set_validclean (struct buf *, int base, int size);
  377: void	vfs_bio_clrbuf (struct buf *);
  378: void	vfs_busy_pages (struct buf *, int clear_modify);
  379: void	vfs_unbusy_pages (struct buf *);
  380: void	vwakeup (struct buf *);
  381: int	vmapbuf (struct buf *);
  382: void	vunmapbuf (struct buf *);
  383: void	relpbuf (struct buf *, int *);
  384: void	brelvp (struct buf *);
  385: void	bgetvp (struct vnode *, struct buf *);
  386: void	pbgetvp (struct vnode *, struct buf *);
  387: void	pbrelvp (struct buf *);
  388: int	allocbuf (struct buf *bp, int size);
  389: void	reassignbuf (struct buf *, struct vnode *);
  390: void	pbreassignbuf (struct buf *, struct vnode *);
  391: struct	buf *trypbuf (int *);
  392: 
  393: #endif /* _KERNEL */
  394: 
  395: #endif /* !_SYS_BUF_H_ */