File:  [DragonFly] / src / sys / kern / lwkt_ipiq.c
Revision 1.2: download - view: text, annotated - select for diffs
Sun Feb 15 05:15:25 2004 UTC (10 years, 7 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
Cleanup and augment the cpu synchronization API a bit.  Embed the maxcount
in the structure rather then returning it and requiring it to be passed
again, and document the procedures a bit more.

    1: /*
    2:  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
    3:  * All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  *
   14:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24:  * SUCH DAMAGE.
   25:  *
   26:  * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.2 2004/02/15 05:15:25 dillon Exp $
   27:  */
   28: 
   29: /*
   30:  * This module implements IPI message queueing and the MI portion of IPI
   31:  * message processing.
   32:  */
   33: 
   34: #ifdef _KERNEL
   35: 
   36: #include <sys/param.h>
   37: #include <sys/systm.h>
   38: #include <sys/kernel.h>
   39: #include <sys/proc.h>
   40: #include <sys/rtprio.h>
   41: #include <sys/queue.h>
   42: #include <sys/thread2.h>
   43: #include <sys/sysctl.h>
   44: #include <sys/kthread.h>
   45: #include <machine/cpu.h>
   46: #include <sys/lock.h>
   47: #include <sys/caps.h>
   48: 
   49: #include <vm/vm.h>
   50: #include <vm/vm_param.h>
   51: #include <vm/vm_kern.h>
   52: #include <vm/vm_object.h>
   53: #include <vm/vm_page.h>
   54: #include <vm/vm_map.h>
   55: #include <vm/vm_pager.h>
   56: #include <vm/vm_extern.h>
   57: #include <vm/vm_zone.h>
   58: 
   59: #include <machine/stdarg.h>
   60: #include <machine/ipl.h>
   61: #include <machine/smp.h>
   62: #include <machine/atomic.h>
   63: 
   64: #define THREAD_STACK	(UPAGES * PAGE_SIZE)
   65: 
   66: #else
   67: 
   68: #include <sys/stdint.h>
   69: #include <libcaps/thread.h>
   70: #include <sys/thread.h>
   71: #include <sys/msgport.h>
   72: #include <sys/errno.h>
   73: #include <libcaps/globaldata.h>
   74: #include <sys/thread2.h>
   75: #include <sys/msgport2.h>
   76: #include <stdio.h>
   77: #include <stdlib.h>
   78: #include <string.h>
   79: #include <machine/cpufunc.h>
   80: #include <machine/lock.h>
   81: 
   82: #endif
   83: 
   84: #ifdef SMP
   85: static __int64_t ipiq_count = 0;
   86: static __int64_t ipiq_fifofull = 0;
   87: #endif
   88: 
   89: #ifdef _KERNEL
   90: 
   91: #ifdef SMP
   92: SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
   93: SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
   94: #endif
   95: 
   96: #endif
   97: 
   98: #ifdef SMP
   99: 
  100: static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
  101: static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
  102: static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
  103: 
  104: /*
  105:  * Send a function execution request to another cpu.  The request is queued
  106:  * on the cpu<->cpu ipiq matrix.  Each cpu owns a unique ipiq FIFO for every
  107:  * possible target cpu.  The FIFO can be written.
  108:  *
  109:  * YYY If the FIFO fills up we have to enable interrupts and process the
  110:  * IPIQ while waiting for it to empty or we may deadlock with another cpu.
  111:  * Create a CPU_*() function to do this!
  112:  *
  113:  * We can safely bump gd_intr_nesting_level because our crit_exit() at the
  114:  * end will take care of any pending interrupts.
  115:  *
  116:  * Must be called from a critical section.
  117:  */
  118: int
  119: lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
  120: {
  121:     lwkt_ipiq_t ip;
  122:     int windex;
  123:     struct globaldata *gd = mycpu;
  124: 
  125:     if (target == gd) {
  126: 	func(arg);
  127: 	return(0);
  128:     } 
  129:     crit_enter();
  130:     ++gd->gd_intr_nesting_level;
  131: #ifdef INVARIANTS
  132:     if (gd->gd_intr_nesting_level > 20)
  133: 	panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
  134: #endif
  135:     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
  136:     ++ipiq_count;
  137:     ip = &gd->gd_ipiq[target->gd_cpuid];
  138: 
  139:     /*
  140:      * We always drain before the FIFO becomes full so it should never
  141:      * become full.  We need to leave enough entries to deal with 
  142:      * reentrancy.
  143:      */
  144:     KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO);
  145:     windex = ip->ip_windex & MAXCPUFIFO_MASK;
  146:     ip->ip_func[windex] = (ipifunc2_t)func;
  147:     ip->ip_arg[windex] = arg;
  148:     /* YYY memory barrier */
  149:     ++ip->ip_windex;
  150:     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
  151: 	unsigned int eflags = read_eflags();
  152: 	cpu_enable_intr();
  153: 	++ipiq_fifofull;
  154: 	while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
  155: 	    KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
  156: 	    lwkt_process_ipiq();
  157: 	}
  158: 	write_eflags(eflags);
  159:     }
  160:     --gd->gd_intr_nesting_level;
  161:     cpu_send_ipiq(target->gd_cpuid);	/* issues mem barrier if appropriate */
  162:     crit_exit();
  163:     return(ip->ip_windex);
  164: }
  165: 
  166: /*
  167:  * deprecated, used only by fast int forwarding.
  168:  */
  169: int
  170: lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
  171: {
  172:     return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
  173: }
  174: 
  175: /*
  176:  * Send a message to several target cpus.  Typically used for scheduling.
  177:  * The message will not be sent to stopped cpus.
  178:  */
  179: int
  180: lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
  181: {
  182:     int cpuid;
  183:     int count = 0;
  184: 
  185:     mask &= ~stopped_cpus;
  186:     while (mask) {
  187: 	cpuid = bsfl(mask);
  188: 	lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
  189: 	mask &= ~(1 << cpuid);
  190: 	++count;
  191:     }
  192:     return(count);
  193: }
  194: 
  195: /*
  196:  * Wait for the remote cpu to finish processing a function.
  197:  *
  198:  * YYY we have to enable interrupts and process the IPIQ while waiting
  199:  * for it to empty or we may deadlock with another cpu.  Create a CPU_*()
  200:  * function to do this!  YYY we really should 'block' here.
  201:  *
  202:  * MUST be called from a critical section.  This routine may be called
  203:  * from an interrupt (for example, if an interrupt wakes a foreign thread
  204:  * up).
  205:  */
  206: void
  207: lwkt_wait_ipiq(globaldata_t target, int seq)
  208: {
  209:     lwkt_ipiq_t ip;
  210:     int maxc = 100000000;
  211: 
  212:     if (target != mycpu) {
  213: 	ip = &mycpu->gd_ipiq[target->gd_cpuid];
  214: 	if ((int)(ip->ip_xindex - seq) < 0) {
  215: 	    unsigned int eflags = read_eflags();
  216: 	    cpu_enable_intr();
  217: 	    while ((int)(ip->ip_xindex - seq) < 0) {
  218: 		lwkt_process_ipiq();
  219: 		if (--maxc == 0)
  220: 			printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
  221: 		if (maxc < -1000000)
  222: 			panic("LWKT_WAIT_IPIQ");
  223: 	    }
  224: 	    write_eflags(eflags);
  225: 	}
  226:     }
  227: }
  228: 
  229: /*
  230:  * Called from IPI interrupt (like a fast interrupt), which has placed
  231:  * us in a critical section.  The MP lock may or may not be held.
  232:  * May also be called from doreti or splz, or be reentrantly called
  233:  * indirectly through the ip_func[] we run.
  234:  *
  235:  * There are two versions, one where no interrupt frame is available (when
  236:  * called from the send code and from splz, and one where an interrupt
  237:  * frame is available.
  238:  */
  239: void
  240: lwkt_process_ipiq(void)
  241: {
  242:     globaldata_t gd = mycpu;
  243:     lwkt_ipiq_t ip;
  244:     int n;
  245: 
  246: again:
  247:     for (n = 0; n < ncpus; ++n) {
  248: 	if (n != gd->gd_cpuid) {
  249: 	    ip = globaldata_find(n)->gd_ipiq;
  250: 	    if (ip != NULL) {
  251: 		while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
  252: 		    ;
  253: 	    }
  254: 	}
  255:     }
  256:     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
  257: 	if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL))
  258: 	    goto again;
  259:     }
  260: }
  261: 
  262: #ifdef _KERNEL
  263: void
  264: lwkt_process_ipiq_frame(struct intrframe frame)
  265: {
  266:     globaldata_t gd = mycpu;
  267:     lwkt_ipiq_t ip;
  268:     int n;
  269: 
  270: again:
  271:     for (n = 0; n < ncpus; ++n) {
  272: 	if (n != gd->gd_cpuid) {
  273: 	    ip = globaldata_find(n)->gd_ipiq;
  274: 	    if (ip != NULL) {
  275: 		while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
  276: 		    ;
  277: 	    }
  278: 	}
  279:     }
  280:     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
  281: 	if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame))
  282: 	    goto again;
  283:     }
  284: }
  285: #endif
  286: 
  287: static int
  288: lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
  289: {
  290:     int ri;
  291:     int wi = ip->ip_windex;
  292:     /*
  293:      * Note: xindex is only updated after we are sure the function has
  294:      * finished execution.  Beware lwkt_process_ipiq() reentrancy!  The
  295:      * function may send an IPI which may block/drain.
  296:      */
  297:     while ((ri = ip->ip_rindex) != wi) {
  298: 	ip->ip_rindex = ri + 1;
  299: 	ri &= MAXCPUFIFO_MASK;
  300: 	ip->ip_func[ri](ip->ip_arg[ri], frame);
  301: 	/* YYY memory barrier */
  302: 	ip->ip_xindex = ip->ip_rindex;
  303:     }
  304:     return(wi != ip->ip_windex);
  305: }
  306: 
  307: /*
  308:  * CPU Synchronization Support
  309:  *
  310:  * lwkt_cpusync_simple()
  311:  *
  312:  *	The function is executed synchronously before return on remote cpus.
  313:  *	A lwkt_cpusync_t pointer is passed as an argument.  The data can
  314:  *	be accessed via arg->cs_data.
  315:  *
  316:  *	XXX should I just pass the data as an argument to be consistent?
  317:  */
  318: 
  319: void
  320: lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
  321: {
  322:     struct lwkt_cpusync cmd;
  323: 
  324:     cmd.cs_run_func = NULL;
  325:     cmd.cs_fin1_func = func;
  326:     cmd.cs_fin2_func = NULL;
  327:     cmd.cs_data = data;
  328:     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
  329:     if (mask & (1 << mycpu->gd_cpuid))
  330: 	func(&cmd);
  331:     lwkt_cpusync_finish(&cmd);
  332: }
  333: 
  334: /*
  335:  * lwkt_cpusync_fastdata()
  336:  *
  337:  *	The function is executed in tandem with return on remote cpus.
  338:  *	The data is directly passed as an argument.  Do not pass pointers to
  339:  *	temporary storage as the storage might have
  340:  *	gone poof by the time the target cpu executes
  341:  *	the function.
  342:  *
  343:  *	At the moment lwkt_cpusync is declared on the stack and we must wait
  344:  *	for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
  345:  *	optimization we should be able to put a counter in the globaldata
  346:  *	structure (if it is not otherwise being used) and just poke it and
  347:  *	return without waiting. XXX
  348:  */
  349: void
  350: lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
  351: {
  352:     struct lwkt_cpusync cmd;
  353: 
  354:     cmd.cs_run_func = NULL;
  355:     cmd.cs_fin1_func = NULL;
  356:     cmd.cs_fin2_func = func;
  357:     cmd.cs_data = NULL;
  358:     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
  359:     if (mask & (1 << mycpu->gd_cpuid))
  360: 	func(data);
  361:     lwkt_cpusync_finish(&cmd);
  362: }
  363: 
  364: /*
  365:  * lwkt_cpusync_start()
  366:  *
  367:  *	Start synchronization with a set of target cpus, return once they are
  368:  *	known to be in a synchronization loop.  The target cpus will execute
  369:  *	poll->cs_run_func() IN TANDEM WITH THE RETURN.
  370:  *
  371:  *	XXX future: add lwkt_cpusync_start_quick() and require a call to
  372:  *	lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
  373:  *	potentially absorb the IPI latency doing something useful.
  374:  */
  375: void
  376: lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
  377: {
  378:     poll->cs_count = 0;
  379:     poll->cs_mask = mask;
  380:     poll->cs_maxcount = lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus,
  381: 				(ipifunc_t)lwkt_cpusync_remote1, poll);
  382:     if (mask & (1 << mycpu->gd_cpuid)) {
  383: 	if (poll->cs_run_func)
  384: 	    poll->cs_run_func(poll);
  385:     }
  386:     while (poll->cs_count != poll->cs_maxcount) {
  387: 	crit_enter();
  388: 	lwkt_process_ipiq();
  389: 	crit_exit();
  390:     }
  391: }
  392: 
  393: void
  394: lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
  395: {
  396:     mask &= ~poll->cs_mask;
  397:     poll->cs_mask |= mask;
  398:     poll->cs_maxcount += lwkt_send_ipiq_mask(mask & mycpu->gd_other_cpus,
  399: 				(ipifunc_t)lwkt_cpusync_remote1, poll);
  400:     if (mask & (1 << mycpu->gd_cpuid)) {
  401: 	if (poll->cs_run_func)
  402: 	    poll->cs_run_func(poll);
  403:     }
  404:     while (poll->cs_count != poll->cs_maxcount) {
  405: 	crit_enter();
  406: 	lwkt_process_ipiq();
  407: 	crit_exit();
  408:     }
  409: }
  410: 
  411: /*
  412:  * Finish synchronization with a set of target cpus.  The target cpus will
  413:  * execute cs_fin1_func(poll) prior to this function returning, and will
  414:  * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
  415:  */
  416: void
  417: lwkt_cpusync_finish(lwkt_cpusync_t poll)
  418: {
  419:     int count;
  420: 
  421:     count = -(poll->cs_maxcount + 1);
  422:     poll->cs_count = -1;
  423:     if (poll->cs_mask & (1 << mycpu->gd_cpuid)) {
  424: 	if (poll->cs_fin1_func)
  425: 	    poll->cs_fin1_func(poll);
  426: 	if (poll->cs_fin2_func)
  427: 	    poll->cs_fin2_func(poll->cs_data);
  428:     }
  429:     while (poll->cs_count != count) {
  430: 	crit_enter();
  431: 	lwkt_process_ipiq();
  432: 	crit_exit();
  433:     }
  434: }
  435: 
  436: /*
  437:  * helper IPI remote messaging function.
  438:  * 
  439:  * Called on remote cpu when a new cpu synchronization request has been
  440:  * sent to us.  Execute the run function and adjust cs_count, then requeue
  441:  * the request so we spin on it.
  442:  */
  443: static void
  444: lwkt_cpusync_remote1(lwkt_cpusync_t poll)
  445: {
  446:     atomic_add_int(&poll->cs_count, 1);
  447:     if (poll->cs_run_func)
  448: 	poll->cs_run_func(poll);
  449:     lwkt_cpusync_remote2(poll);
  450: }
  451: 
  452: /*
  453:  * helper IPI remote messaging function.
  454:  *
  455:  * Poll for the originator telling us to finish.  If it hasn't, requeue
  456:  * our request so we spin on it.  When the originator requests that we
  457:  * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
  458:  * in tandem with the release.
  459:  */
  460: static void
  461: lwkt_cpusync_remote2(lwkt_cpusync_t poll)
  462: {
  463:     if (poll->cs_count < 0) {
  464: 	cpusync_func2_t savef;
  465: 	void *saved;
  466: 
  467: 	if (poll->cs_fin1_func)
  468: 	    poll->cs_fin1_func(poll);
  469: 	if (poll->cs_fin2_func) {
  470: 	    savef = poll->cs_fin2_func;
  471: 	    saved = poll->cs_data;
  472: 	    atomic_add_int(&poll->cs_count, -1);
  473: 	    savef(saved);
  474: 	} else {
  475: 	    atomic_add_int(&poll->cs_count, -1);
  476: 	}
  477:     } else {
  478: 	globaldata_t gd = mycpu;
  479: 	lwkt_ipiq_t ip;
  480: 	int wi;
  481: 
  482: 	ip = &gd->gd_cpusyncq;
  483: 	wi = ip->ip_windex & MAXCPUFIFO_MASK;
  484: 	ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
  485: 	ip->ip_arg[wi] = poll;
  486: 	++ip->ip_windex;
  487:     }
  488: }
  489: 
  490: #else
  491: 
  492: /*
  493:  * !SMP dummy routines
  494:  */
  495: 
  496: int
  497: lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
  498: {
  499:     panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
  500:     return(0); /* NOT REACHED */
  501: }
  502: 
  503: void
  504: lwkt_wait_ipiq(globaldata_t target, int seq)
  505: {
  506:     panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
  507: }
  508: 
  509: #endif