File:  [DragonFly] / src / sys / dev / raid / mlx / mlxvar.h
Revision 1.4: download - view: text, annotated - select for diffs
Wed May 19 22:52:47 2004 UTC (10 years, 3 months ago) by dillon
Branches: MAIN
CVS tags: HEAD, DragonFly_Snap13Sep2004, DragonFly_1_0_REL, DragonFly_1_0_RC1, DragonFly_1_0A_REL
Device layer rollup commit.

* cdevsw_add() is now required.  cdevsw_add() and cdevsw_remove() may specify
  a mask/match indicating the range of supported minor numbers.  Multiple
  cdevsw_add()'s using the same major number, but distinctly different
  ranges, may be issued.  All devices that failed to call cdevsw_add() before
  now do.

* cdevsw_remove() now automatically marks all devices within its supported
  range as being destroyed.

* vnode->v_rdev is no longer resolved when the vnode is created.  Instead,
  only v_udev (a newly added field) is resolved.  v_rdev is resolved when
  the vnode is opened and cleared on the last close.

* A great deal of code was making rather dubious assumptions with regards
  to the validity of devices associated with vnodes, primarily due to
  the persistence of a device structure due to being indexed by (major, minor)
  instead of by (cdevsw, major, minor).  In particular, if you run a program
  which connects to a USB device and then you pull the USB device and plug
  it back in, the vnode subsystem will continue to believe that the device
  is open when, in fact, it isn't (because it was destroyed and recreated).

  In particular, note that all the VFS mount procedures now check devices
  via v_udev instead of v_rdev prior to calling VOP_OPEN(), since v_rdev
  is NULL prior to the first open.

* The disk layer's device interaction has been rewritten.  The disk layer
  (i.e. the slice and disklabel management layer) no longer overloads
  its data onto the device structure representing the underlying physical
  disk.  Instead, the disk layer uses the new cdevsw_add() functionality
  to register its own cdevsw using the underlying device's major number,
  and simply does NOT register the underlying device's cdevsw.  No
  confusion is created because the device hash is now based on
  (cdevsw,major,minor) rather then (major,minor).

  NOTE: This also means that underlying raw disk devices may use the entire
  device minor number instead of having to reserve the bits used by the disk
  layer, and also means that can we (theoretically) stack a fully
  disklabel-supported 'disk' on top of any block device.

* The new reference counting scheme prevents this by associating a device
  with a cdevsw and disconnecting the device from its cdevsw when the cdevsw
  is removed.  Additionally, all udev2dev() lookups run through the cdevsw
  mask/match and only successfully find devices still associated with an
  active cdevsw.

* Major work on MFS:  MFS no longer shortcuts vnode and device creation.  It
  now creates a real vnode and a real device and implements real open and
  close VOPs.  Additionally, due to the disk layer changes, MFS is no longer
  limited to 255 mounts.  The new limit is 16 million.  Since MFS creates a
  real device node, mount_mfs will now create a real /dev/mfs<PID> device
  that can be read from userland (e.g. so you can dump an MFS filesystem).

* BUF AND DEVICE STRATEGY changes.  The struct buf contains a b_dev field.
  In order to properly handle stacked devices we now require that the b_dev
  field be initialized before the device strategy routine is called.  This
  required some additional work in various VFS implementations.  To enforce
  this requirement, biodone() now sets b_dev to NODEV.  The new disk layer
  will adjust b_dev before forwarding a request to the actual physical
  device.

* A bug in the ISO CD boot sequence which resulted in a panic has been fixed.

Testing by: lots of people, but David Rhodus found the most aggregious bugs.

/*-
 * Copyright (c) 1999 Michael Smith
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	$FreeBSD: src/sys/dev/mlx/mlxvar.h,v 1.5.2.3 2001/06/25 04:37:51 msmith Exp $
 *	$DragonFly: src/sys/dev/raid/mlx/mlxvar.h,v 1.4 2004/05/19 22:52:47 dillon Exp $
 */

/*
 * Debugging levels:
 *  0 - quiet, only emit warnings
 *  1 - noisy, emit major function points and things done
 *  2 - extremely noisy, emit trace items in loops, etc.
 */
#ifdef MLX_DEBUG
#define debug(level, fmt, args...)	do { if (level <= MLX_DEBUG) printf("%s: " fmt "\n", __FUNCTION__ , ##args); } while(0)
#define debug_called(level)		do { if (level <= MLX_DEBUG) printf(__FUNCTION__ ": called\n"); } while(0)
#else
#define debug(level, fmt, args...)
#define debug_called(level)
#endif

/*
 * Regardless of the actual capacity of the controller, we will allocate space
 * for 64 s/g entries.  Typically controllers support 17 or 33 entries (64k or
 * 128k maximum transfer assuming 4k page size and non-optimal alignment), but
 * making that fit cleanly without crossing page boundaries requires rounding up
 * to the next power of two.
 */
#define MLX_NSEG	64

#define MLX_NSLOTS	256		/* max number of command slots */

#define MLX_MAXDRIVES	32		/* max number of system drives */

/*
 * Structure describing a System Drive as attached to the controller.
 */
struct mlx_sysdrive 
{
    /* from MLX_CMD_ENQSYSDRIVE */
    u_int32_t		ms_size;
    int			ms_state;
    int			ms_raidlevel;

    /* synthetic geometry */
    int			ms_cylinders;
    int			ms_heads;
    int			ms_sectors;

    /* handle for attached driver */
    device_t		ms_disk;
};

/*
 * Per-command control structure.
 */
struct mlx_command 
{
    TAILQ_ENTRY(mlx_command)	mc_link;	/* list linkage */

    struct mlx_softc		*mc_sc;		/* controller that owns us */
    u_int8_t			mc_slot;	/* command slot we occupy */
    u_int16_t			mc_status;	/* command completion status */
    time_t			mc_timeout;	/* when this command expires */
    u_int8_t			mc_mailbox[16];	/* command mailbox */
    u_int32_t			mc_sgphys;	/* physical address of s/g array in controller space */
    int				mc_nsgent;	/* number of entries in s/g map */
    int				mc_flags;
#define MLX_CMD_DATAIN		(1<<0)
#define MLX_CMD_DATAOUT		(1<<1)
#define MLX_CMD_PRIORITY	(1<<2)		/* high-priority command */

    void			*mc_data;	/* data buffer */
    size_t			mc_length;
    bus_dmamap_t		mc_dmamap;	/* DMA map for data */
    u_int32_t			mc_dataphys;	/* data buffer base address controller space */

    void			(* mc_complete)(struct mlx_command *mc);	/* completion handler */
    void			*mc_private;	/* submitter-private data or wait channel */
};

/*
 * Per-controller structure.
 */
struct mlx_softc 
{
    /* bus connections */
    device_t		mlx_dev;
    struct resource	*mlx_mem;	/* mailbox interface window */
    int			mlx_mem_rid;
    int			mlx_mem_type;
    bus_space_handle_t	mlx_bhandle;	/* bus space handle */
    bus_space_tag_t	mlx_btag;	/* bus space tag */
    bus_dma_tag_t	mlx_parent_dmat;/* parent DMA tag */
    bus_dma_tag_t	mlx_buffer_dmat;/* data buffer DMA tag */
    struct resource	*mlx_irq;	/* interrupt */
    void		*mlx_intr;	/* interrupt handle */

    /* scatter/gather lists and their controller-visible mappings */
    struct mlx_sgentry	*mlx_sgtable;	/* s/g lists */
    u_int32_t		mlx_sgbusaddr;	/* s/g table base address in bus space */
    bus_dma_tag_t	mlx_sg_dmat;	/* s/g buffer DMA tag */
    bus_dmamap_t	mlx_sg_dmamap;	/* map for s/g buffers */
    
    /* controller limits and features */
    struct mlx_enquiry2	*mlx_enq2;
    int			mlx_feature;	/* controller features/quirks */
#define MLX_FEAT_PAUSEWORKS	(1<<0)	/* channel pause works as expected */

    /* controller queues and arrays */
    TAILQ_HEAD(, mlx_command)	mlx_freecmds;		/* command structures available for reuse */
    TAILQ_HEAD(, mlx_command)	mlx_work;		/* active commands */
    struct mlx_command	*mlx_busycmd[MLX_NSLOTS];	/* busy commands */
    int			mlx_busycmds;			/* count of busy commands */
    struct mlx_sysdrive	mlx_sysdrive[MLX_MAXDRIVES];	/* system drives */
    mlx_bioq		mlx_bioq;			/* outstanding I/O operations */
    int			mlx_waitbufs;			/* number of bufs awaiting commands */

    /* controller status */
    int			mlx_geom;
#define MLX_GEOM_128_32		0	/* geoemetry translation modes */
#define MLX_GEOM_256_63		1
    int			mlx_state;
#define MLX_STATE_INTEN		(1<<0)	/* interrupts have been enabled */
#define MLX_STATE_SHUTDOWN	(1<<1)	/* controller is shut down */
#define MLX_STATE_OPEN		(1<<2)	/* control device is open */
#define MLX_STATE_SUSPEND	(1<<3)	/* controller is suspended */
    struct callout_handle mlx_timeout;	/* periodic status monitor */
    time_t		mlx_lastpoll;	/* last time_second we polled for status */
    u_int16_t		mlx_lastevent;	/* sequence number of the last event we recorded */
    int			mlx_currevent;	/* sequence number last time we looked */
    int			mlx_background;	/* if != 0 rebuild or check is in progress */
#define MLX_BACKGROUND_CHECK		1	/* we started a check */
#define MLX_BACKGROUND_REBUILD		2	/* we started a rebuild */
#define MLX_BACKGROUND_SPONTANEOUS	3	/* it just happened somehow */
    struct mlx_rebuild_status mlx_rebuildstat;	/* last rebuild status */
    struct mlx_pause	mlx_pause;	/* pending pause operation details */

    int			mlx_locks;	/* reentrancy avoidance */
    int			mlx_flags;
#define MLX_SPINUP_REPORTED	(1<<0)	/* "spinning up drives" message displayed */
#define MLX_EVENTLOG_BUSY	(1<<1)	/* currently reading event log */

    /* interface-specific accessor functions */
    int			mlx_iftype;	/* interface protocol */
#define MLX_IFTYPE_2	2
#define MLX_IFTYPE_3	3
#define MLX_IFTYPE_4	4
#define MLX_IFTYPE_5	5
    int			(* mlx_tryqueue)(struct mlx_softc *sc, struct mlx_command *mc);
    int			(* mlx_findcomplete)(struct mlx_softc *sc, u_int8_t *slot, u_int16_t *status);
    void		(* mlx_intaction)(struct mlx_softc *sc, int action);
    int			(* mlx_fw_handshake)(struct mlx_softc *sc, int *error, int *param1, int *param2);
#define MLX_INTACTION_DISABLE		0
#define MLX_INTACTION_ENABLE		1
};

/*
 * Simple (stupid) locks.
 *
 * Note that these are designed to avoid reentrancy, not concurrency, and will
 * need to be replaced with something better.
 */
#define MLX_LOCK_COMPLETING	(1<<0)
#define MLX_LOCK_STARTING	(1<<1)

static __inline int
mlx_lock_tas(struct mlx_softc *sc, int lock)
{
    if ((sc)->mlx_locks & (lock))
	return(1);
    atomic_set_int(&sc->mlx_locks, lock);
    return(0);
}

static __inline void
mlx_lock_clr(struct mlx_softc *sc, int lock)
{
    atomic_clear_int(&sc->mlx_locks, lock);
}

/*
 * Interface between bus connections and driver core.
 */
extern void		mlx_free(struct mlx_softc *sc);
extern int		mlx_attach(struct mlx_softc *sc);
extern void		mlx_startup(struct mlx_softc *sc);
extern void		mlx_intr(void *data);
extern int		mlx_detach(device_t dev);
extern int		mlx_shutdown(device_t dev);
extern int		mlx_suspend(device_t dev); 
extern int		mlx_resume(device_t dev);
extern d_open_t		mlx_open;
extern d_close_t	mlx_close;
extern d_ioctl_t	mlx_ioctl;

extern devclass_t	mlx_devclass;
extern devclass_t	mlxd_devclass;

/*
 * Mylex System Disk driver
 */
struct mlxd_softc 
{
    device_t		mlxd_dev;
    dev_t		mlxd_dev_t;
    struct mlx_softc	*mlxd_controller;
    struct mlx_sysdrive	*mlxd_drive;
    struct disk		mlxd_disk;
    struct devstat	mlxd_stats;
    struct disklabel	mlxd_label;
    int			mlxd_unit;
    int			mlxd_flags;
#define MLXD_OPEN	(1<<0)		/* drive is open (can't shut down) */
};

/*
 * Interface between driver core and disk driver (should be using a bus?)
 */
extern int	mlx_submit_buf(struct mlx_softc *sc, mlx_bio *bp);
extern int	mlx_submit_ioctl(struct mlx_softc *sc, struct mlx_sysdrive *drive, u_long cmd, 
				 caddr_t addr, int32_t flag, d_thread_t *td);
extern void	mlxd_intr(void *data);