File:  [DragonFly] / src / sys / kern / lwkt_ipiq.c
Revision 1.1: download - view: text, annotated - select for diffs
Sun Feb 15 02:14:41 2004 UTC (10 years, 10 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
Split the IPIQ messaging out of lwkt_thread.c and move it to its own file,
lwkt_ipiq.c.

Add a MI synchronous cpu rendezvous API lwkt_cpusync_*().  This API allows the
kernel to synchronize an operation across any number of cpus.  Multiple cpus
can initiate synchronization operations simultaniously without creating a
deadlock.  The API utilizes the IPI messaging core and guarentees that
other synchronization and IPI messaging operations will continue to work
during any given synchronization op.  The API is a spin-blocking API, meaning
that it will not switch threads and can be used by mainline code, interrupts,
and other sensitive code.

This API is intended to replace smp_rendezvous(), Xcpustop, and other
hardwired IPI ops.  It will also be used to fix our TLB shootdown code.

As of this commit the API has not yet been connected to anything and has
been tested only a little.

    1: /*
    2:  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
    3:  * All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  *
   14:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24:  * SUCH DAMAGE.
   25:  *
   26:  * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.1 2004/02/15 02:14:41 dillon Exp $
   27:  */
   28: 
   29: /*
   30:  * This module implements IPI message queueing and the MI portion of IPI
   31:  * message processing.
   32:  */
   33: 
   34: #ifdef _KERNEL
   35: 
   36: #include <sys/param.h>
   37: #include <sys/systm.h>
   38: #include <sys/kernel.h>
   39: #include <sys/proc.h>
   40: #include <sys/rtprio.h>
   41: #include <sys/queue.h>
   42: #include <sys/thread2.h>
   43: #include <sys/sysctl.h>
   44: #include <sys/kthread.h>
   45: #include <machine/cpu.h>
   46: #include <sys/lock.h>
   47: #include <sys/caps.h>
   48: 
   49: #include <vm/vm.h>
   50: #include <vm/vm_param.h>
   51: #include <vm/vm_kern.h>
   52: #include <vm/vm_object.h>
   53: #include <vm/vm_page.h>
   54: #include <vm/vm_map.h>
   55: #include <vm/vm_pager.h>
   56: #include <vm/vm_extern.h>
   57: #include <vm/vm_zone.h>
   58: 
   59: #include <machine/stdarg.h>
   60: #include <machine/ipl.h>
   61: #include <machine/smp.h>
   62: #include <machine/atomic.h>
   63: 
   64: #define THREAD_STACK	(UPAGES * PAGE_SIZE)
   65: 
   66: #else
   67: 
   68: #include <sys/stdint.h>
   69: #include <libcaps/thread.h>
   70: #include <sys/thread.h>
   71: #include <sys/msgport.h>
   72: #include <sys/errno.h>
   73: #include <libcaps/globaldata.h>
   74: #include <sys/thread2.h>
   75: #include <sys/msgport2.h>
   76: #include <stdio.h>
   77: #include <stdlib.h>
   78: #include <string.h>
   79: #include <machine/cpufunc.h>
   80: #include <machine/lock.h>
   81: 
   82: #endif
   83: 
   84: #ifdef SMP
   85: static __int64_t ipiq_count = 0;
   86: static __int64_t ipiq_fifofull = 0;
   87: #endif
   88: 
   89: #ifdef _KERNEL
   90: 
   91: #ifdef SMP
   92: SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
   93: SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
   94: #endif
   95: 
   96: #endif
   97: 
   98: #ifdef SMP
   99: 
  100: static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
  101: static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
  102: static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
  103: 
  104: /*
  105:  * Send a function execution request to another cpu.  The request is queued
  106:  * on the cpu<->cpu ipiq matrix.  Each cpu owns a unique ipiq FIFO for every
  107:  * possible target cpu.  The FIFO can be written.
  108:  *
  109:  * YYY If the FIFO fills up we have to enable interrupts and process the
  110:  * IPIQ while waiting for it to empty or we may deadlock with another cpu.
  111:  * Create a CPU_*() function to do this!
  112:  *
  113:  * We can safely bump gd_intr_nesting_level because our crit_exit() at the
  114:  * end will take care of any pending interrupts.
  115:  *
  116:  * Must be called from a critical section.
  117:  */
  118: int
  119: lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
  120: {
  121:     lwkt_ipiq_t ip;
  122:     int windex;
  123:     struct globaldata *gd = mycpu;
  124: 
  125:     if (target == gd) {
  126: 	func(arg);
  127: 	return(0);
  128:     } 
  129:     crit_enter();
  130:     ++gd->gd_intr_nesting_level;
  131: #ifdef INVARIANTS
  132:     if (gd->gd_intr_nesting_level > 20)
  133: 	panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
  134: #endif
  135:     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
  136:     ++ipiq_count;
  137:     ip = &gd->gd_ipiq[target->gd_cpuid];
  138: 
  139:     /*
  140:      * We always drain before the FIFO becomes full so it should never
  141:      * become full.  We need to leave enough entries to deal with 
  142:      * reentrancy.
  143:      */
  144:     KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO);
  145:     windex = ip->ip_windex & MAXCPUFIFO_MASK;
  146:     ip->ip_func[windex] = (ipifunc2_t)func;
  147:     ip->ip_arg[windex] = arg;
  148:     /* YYY memory barrier */
  149:     ++ip->ip_windex;
  150:     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
  151: 	unsigned int eflags = read_eflags();
  152: 	cpu_enable_intr();
  153: 	++ipiq_fifofull;
  154: 	while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
  155: 	    KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
  156: 	    lwkt_process_ipiq();
  157: 	}
  158: 	write_eflags(eflags);
  159:     }
  160:     --gd->gd_intr_nesting_level;
  161:     cpu_send_ipiq(target->gd_cpuid);	/* issues mem barrier if appropriate */
  162:     crit_exit();
  163:     return(ip->ip_windex);
  164: }
  165: 
  166: /*
  167:  * deprecated, used only by fast int forwarding.
  168:  */
  169: int
  170: lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
  171: {
  172:     return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
  173: }
  174: 
  175: /*
  176:  * Send a message to several target cpus.  Typically used for scheduling.
  177:  * The message will not be sent to stopped cpus.
  178:  */
  179: int
  180: lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
  181: {
  182:     int cpuid;
  183:     int count = 0;
  184: 
  185:     mask &= ~stopped_cpus;
  186:     while (mask) {
  187: 	cpuid = bsfl(mask);
  188: 	lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
  189: 	mask &= ~(1 << cpuid);
  190: 	++count;
  191:     }
  192:     return(count);
  193: }
  194: 
  195: /*
  196:  * Wait for the remote cpu to finish processing a function.
  197:  *
  198:  * YYY we have to enable interrupts and process the IPIQ while waiting
  199:  * for it to empty or we may deadlock with another cpu.  Create a CPU_*()
  200:  * function to do this!  YYY we really should 'block' here.
  201:  *
  202:  * MUST be called from a critical section.  This routine may be called
  203:  * from an interrupt (for example, if an interrupt wakes a foreign thread
  204:  * up).
  205:  */
  206: void
  207: lwkt_wait_ipiq(globaldata_t target, int seq)
  208: {
  209:     lwkt_ipiq_t ip;
  210:     int maxc = 100000000;
  211: 
  212:     if (target != mycpu) {
  213: 	ip = &mycpu->gd_ipiq[target->gd_cpuid];
  214: 	if ((int)(ip->ip_xindex - seq) < 0) {
  215: 	    unsigned int eflags = read_eflags();
  216: 	    cpu_enable_intr();
  217: 	    while ((int)(ip->ip_xindex - seq) < 0) {
  218: 		lwkt_process_ipiq();
  219: 		if (--maxc == 0)
  220: 			printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
  221: 		if (maxc < -1000000)
  222: 			panic("LWKT_WAIT_IPIQ");
  223: 	    }
  224: 	    write_eflags(eflags);
  225: 	}
  226:     }
  227: }
  228: 
  229: /*
  230:  * Called from IPI interrupt (like a fast interrupt), which has placed
  231:  * us in a critical section.  The MP lock may or may not be held.
  232:  * May also be called from doreti or splz, or be reentrantly called
  233:  * indirectly through the ip_func[] we run.
  234:  *
  235:  * There are two versions, one where no interrupt frame is available (when
  236:  * called from the send code and from splz, and one where an interrupt
  237:  * frame is available.
  238:  */
  239: void
  240: lwkt_process_ipiq(void)
  241: {
  242:     globaldata_t gd = mycpu;
  243:     lwkt_ipiq_t ip;
  244:     int n;
  245: 
  246: again:
  247:     for (n = 0; n < ncpus; ++n) {
  248: 	if (n != gd->gd_cpuid) {
  249: 	    ip = globaldata_find(n)->gd_ipiq;
  250: 	    if (ip != NULL) {
  251: 		while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
  252: 		    ;
  253: 	    }
  254: 	}
  255:     }
  256:     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
  257: 	if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL))
  258: 	    goto again;
  259:     }
  260: }
  261: 
  262: #ifdef _KERNEL
  263: void
  264: lwkt_process_ipiq_frame(struct intrframe frame)
  265: {
  266:     globaldata_t gd = mycpu;
  267:     lwkt_ipiq_t ip;
  268:     int n;
  269: 
  270: again:
  271:     for (n = 0; n < ncpus; ++n) {
  272: 	if (n != gd->gd_cpuid) {
  273: 	    ip = globaldata_find(n)->gd_ipiq;
  274: 	    if (ip != NULL) {
  275: 		while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
  276: 		    ;
  277: 	    }
  278: 	}
  279:     }
  280:     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
  281: 	if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame))
  282: 	    goto again;
  283:     }
  284: }
  285: #endif
  286: 
  287: static int
  288: lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
  289: {
  290:     int ri;
  291:     int wi = ip->ip_windex;
  292:     /*
  293:      * Note: xindex is only updated after we are sure the function has
  294:      * finished execution.  Beware lwkt_process_ipiq() reentrancy!  The
  295:      * function may send an IPI which may block/drain.
  296:      */
  297:     while ((ri = ip->ip_rindex) != wi) {
  298: 	ip->ip_rindex = ri + 1;
  299: 	ri &= MAXCPUFIFO_MASK;
  300: 	ip->ip_func[ri](ip->ip_arg[ri], frame);
  301: 	/* YYY memory barrier */
  302: 	ip->ip_xindex = ip->ip_rindex;
  303:     }
  304:     return(wi != ip->ip_windex);
  305: }
  306: 
  307: /*
  308:  * CPU Synchronization Support
  309:  */
  310: 
  311: void
  312: lwkt_cpusync_simple(cpumask_t mask, cpusync_func2_t func, void *data)
  313: {
  314:     struct lwkt_cpusync cmd;
  315:     int count;
  316: 
  317:     cmd.cs_run_func = NULL;
  318:     cmd.cs_fin1_func = NULL;
  319:     cmd.cs_fin2_func = func;
  320:     cmd.cs_data = data;
  321:     count = lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
  322:     if (mask & (1 << mycpu->gd_cpuid))
  323: 	func(data);
  324:     lwkt_cpusync_finish(&cmd, count);
  325: }
  326: 
  327: /*
  328:  * Start synchronization with a set of target cpus, return once they are
  329:  * known to be in a synchronization loop.  The target cpus will execute
  330:  * poll->cs_run_func() IN TANDEM WITH THE RETURN.
  331:  */
  332: int
  333: lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
  334: {
  335:     int count;
  336: 
  337:     poll->cs_count = 0;
  338:     count = lwkt_send_ipiq_mask(mask, (ipifunc_t)lwkt_cpusync_remote1, poll);
  339:     while (poll->cs_count != count) {
  340: 	crit_enter();
  341: 	lwkt_process_ipiq();
  342: 	crit_exit();
  343:     }
  344:     return(count);
  345: }
  346: 
  347: /*
  348:  * Finish synchronization with a set of target cpus.  The target cpus will
  349:  * execute cs_fin1_func(poll) prior to this function returning, and will
  350:  * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
  351:  */
  352: void
  353: lwkt_cpusync_finish(lwkt_cpusync_t poll, int count)
  354: {
  355:     count = -(count + 1);
  356:     poll->cs_count = -1;
  357:     while (poll->cs_count != count) {
  358: 	crit_enter();
  359: 	lwkt_process_ipiq();
  360: 	crit_exit();
  361:     }
  362: }
  363: 
  364: /*
  365:  * helper IPI remote messaging function.
  366:  * 
  367:  * Called on remote cpu when a new cpu synchronization request has been
  368:  * sent to us.  Execute the run function and adjust cs_count, then requeue
  369:  * the request so we spin on it.
  370:  */
  371: static void
  372: lwkt_cpusync_remote1(lwkt_cpusync_t poll)
  373: {
  374:     atomic_add_int(&poll->cs_count, 1);
  375:     if (poll->cs_run_func)
  376: 	poll->cs_run_func(poll);
  377:     lwkt_cpusync_remote2(poll);
  378: }
  379: 
  380: /*
  381:  * helper IPI remote messaging function.
  382:  *
  383:  * Poll for the originator telling us to finish.  If it hasn't, requeue
  384:  * our request so we spin on it.  When the originator requests that we
  385:  * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
  386:  * in tandem with the release.
  387:  */
  388: static void
  389: lwkt_cpusync_remote2(lwkt_cpusync_t poll)
  390: {
  391:     if (poll->cs_count < 0) {
  392: 	cpusync_func2_t savef;
  393: 	void *saved;
  394: 
  395: 	if (poll->cs_fin1_func)
  396: 	    poll->cs_fin1_func(poll);
  397: 	if (poll->cs_fin2_func) {
  398: 	    savef = poll->cs_fin2_func;
  399: 	    saved = poll->cs_data;
  400: 	    atomic_add_int(&poll->cs_count, -1);
  401: 	    savef(saved);
  402: 	} else {
  403: 	    atomic_add_int(&poll->cs_count, -1);
  404: 	}
  405:     } else {
  406: 	globaldata_t gd = mycpu;
  407: 	lwkt_ipiq_t ip;
  408: 	int wi;
  409: 
  410: 	ip = &gd->gd_cpusyncq;
  411: 	wi = ip->ip_windex & MAXCPUFIFO_MASK;
  412: 	ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
  413: 	ip->ip_arg[wi] = poll;
  414: 	++ip->ip_windex;
  415:     }
  416: }
  417: 
  418: #else
  419: 
  420: /*
  421:  * !SMP dummy routines
  422:  */
  423: 
  424: int
  425: lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
  426: {
  427:     panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
  428:     return(0); /* NOT REACHED */
  429: }
  430: 
  431: void
  432: lwkt_wait_ipiq(globaldata_t target, int seq)
  433: {
  434:     panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
  435: }
  436: 
  437: #endif