--- src/sys/kern/kern_timeout.c 2004/09/13 23:18:20 1.11 +++ src/sys/kern/kern_timeout.c 2004/09/17 00:18:09 1.12 @@ -1,4 +1,37 @@ -/*- +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -39,6 +72,32 @@ * $FreeBSD: src/sys/kern/kern_timeout.c,v 1.59.2.1 2001/11/13 18:24:52 archie Exp $ * $DragonFly$ */ +/* + * DRAGONFLY BGL STATUS + * + * All the API functions should be MP safe. + * + * The callback functions will be flagged as being MP safe if the + * timeout structure is initialized with callout_init_mp() instead of + * callout_init(). + * + * The helper threads cannot be made preempt-capable until after we + * clean up all the uses of splsoftclock() and related interlocks (which + * require the related functions to be MP safe as well). + */ +/* + * The callout mechanism is based on the work of Adam M. Costello and + * George Varghese, published in a technical report entitled "Redesigning + * the BSD Callout and Timer Facilities" and modified slightly for inclusion + * in FreeBSD by Justin T. Gibbs. The original work on the data structures + * used in this implementation was published by G. Varghese and T. Lauck in + * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for + * the Efficient Implementation of a Timer Facility" in the Proceedings of + * the 11th ACM Annual Symposium on Operating Systems Principles, + * Austin, Texas Nov 1987. + * + * The per-cpu augmentation was done by Matthew Dillon. + */ #include #include @@ -48,99 +107,217 @@ #include #include #include +#include "opt_ddb.h" + +#ifndef MAX_SOFTCLOCK_STEPS +#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ +#endif + + +struct softclock_pcpu { + struct callout_list callfree; + struct callout_tailq *callwheel; + struct callout * volatile next; + int softticks; /* softticks index */ + int curticks; /* per-cpu ticks counter */ + int isrunning; + struct thread thread; + +}; + +typedef struct softclock_pcpu *softclock_pcpu_t; /* * TODO: * allocate more timeout table slots when table overflows. */ +static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures"); +static int callwheelsize; +static int callwheelbits; +static int callwheelmask; +static struct softclock_pcpu softclock_pcpu_ary[MAXCPU]; + +static void softclock_handler(void *arg); + +static void +swi_softclock_setup(void *arg) +{ + int cpu; + int i; -/* Exported to machdep.c and/or kern_clock.c. */ -struct callout *callout; -struct callout_list callfree; -int callwheelsize, callwheelbits, callwheelmask; -struct callout_tailq *callwheel; -int softticks; /* Like ticks, but for softclock(). */ + /* + * Figure out how large a callwheel we need. It must be a power of 2. + */ + callwheelsize = 1; + callwheelbits = 0; + while (callwheelsize < ncallout) { + callwheelsize <<= 1; + ++callwheelbits; + } + callwheelmask = callwheelsize - 1; -static struct callout * volatile nextsoftcheck; /* Next callout to checked. */ + /* + * Initialize per-cpu data structures. + */ + for (cpu = 0; cpu < ncpus; ++cpu) { + softclock_pcpu_t sc; + struct callout *callout; + + sc = &softclock_pcpu_ary[cpu]; + + sc->callwheel = malloc(sizeof(*sc->callwheel) * callwheelsize, + M_CALLOUT, M_WAITOK|M_ZERO); + for (i = 0; i < callwheelsize; ++i) + TAILQ_INIT(&sc->callwheel[i]); + + SLIST_INIT(&sc->callfree); + callout = malloc(sizeof(struct callout) * ncallout, + M_CALLOUT, M_WAITOK|M_ZERO); + for (i = 0; i < ncallout; ++i) { + callout_init(&callout[i]); + callout[i].c_flags |= CALLOUT_LOCAL_ALLOC; + SLIST_INSERT_HEAD(&sc->callfree, &callout[i], + c_links.sle); + } + + /* + * Create a preemption-capable thread for each cpu to handle + * softclock timeouts on that cpu. The preemption can only + * be blocked by a critical section. The thread can itself + * be preempted by normal interrupts. + */ + lwkt_create(softclock_handler, sc, NULL, + &sc->thread, TDF_STOPREQ|TDF_INTTHREAD, -1, + "softclock %d", cpu); + lwkt_setpri(&sc->thread, TDPRI_SOFT_NORM); +#if 0 + /* + * Do not make the thread preemptable until we clean up all + * the splsoftclock() calls in the system. Since the threads + * are no longer operated as a software interrupt, the + * splsoftclock() calls will not have any effect on them. + */ + sc->thread.td_preemptable = lwkt_preempt; +#endif + } +} + +SYSINIT(softclock_setup, SI_SUB_CPU, SI_ORDER_ANY, swi_softclock_setup, NULL); /* - * The callout mechanism is based on the work of Adam M. Costello and - * George Varghese, published in a technical report entitled "Redesigning - * the BSD Callout and Timer Facilities" and modified slightly for inclusion - * in FreeBSD by Justin T. Gibbs. The original work on the data structures - * used in this implementation was published by G. Varghese and T. Lauck in - * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for - * the Efficient Implementation of a Timer Facility" in the Proceedings of - * the 11th ACM Annual Symposium on Operating Systems Principles, - * Austin, Texas Nov 1987. + * This routine is called from the hardclock() (basically a FASTint/IPI) on + * each cpu in the system. sc->curticks is this cpu's notion of the timebase. + * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where + * the callwheel is currently indexed. + * + * WARNING! The MP lock is not necessarily held on call, nor can it be + * safely obtained. + * + * sc->softticks is adjusted by either this routine or our helper thread + * depending on whether the helper thread is running or not. */ +void +hardclock_softtick(globaldata_t gd) +{ + softclock_pcpu_t sc; + + sc = &softclock_pcpu_ary[gd->gd_cpuid]; + ++sc->curticks; + if (sc->isrunning) + return; + if (sc->softticks == sc->curticks) { + /* + * in sync, only wakeup the thread if there is something to + * do. + */ + if (TAILQ_FIRST(&sc->callwheel[sc->softticks & callwheelmask])) + { + sc->isrunning = 1; + lwkt_schedule(&sc->thread); + } else { + ++sc->softticks; + } + } else { + /* + * out of sync, wakeup the thread unconditionally so it can + * catch up. + */ + sc->isrunning = 1; + lwkt_schedule(&sc->thread); + } +} /* - * Software (low priority) clock interrupt. - * Run periodic events from timeout queue. + * This procedure is the main loop of our per-cpu helper thread. The + * sc->isrunning flag prevents us from racing hardclock_softtick() and + * a critical section is sufficient to interlock sc->curticks. + * + * The thread starts with the MP lock held and not in a critical section. + * The loop itself is MP safe while individual callbacks may or may not + * be, so we obtain or release the MP lock as appropriate. */ static void -swi_softclock(void *dummy) +softclock_handler(void *arg) { + softclock_pcpu_t sc; struct callout *c; struct callout_tailq *bucket; - int curticks; - int steps; /* #steps since we last allowed interrupts */ - -#ifndef MAX_SOFTCLOCK_STEPS -#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ -#endif /* MAX_SOFTCLOCK_STEPS */ + void (*c_func)(void *); + void *c_arg; + int c_flags; +#ifdef SMP + int mpsafe = 0; +#endif - steps = 0; + sc = arg; crit_enter(); - while (softticks != ticks) { - softticks++; - /* - * softticks may be modified by hard clock, so cache - * it while we work on a given bucket. - */ - curticks = softticks; - bucket = &callwheel[curticks & callwheelmask]; - c = TAILQ_FIRST(bucket); - while (c) { - if (c->c_time != curticks) { - c = TAILQ_NEXT(c, c_links.tqe); - ++steps; - if (steps >= MAX_SOFTCLOCK_STEPS) { - nextsoftcheck = c; - /* Give interrupts a chance. */ - crit_exit(); - crit_enter(); - c = nextsoftcheck; - steps = 0; +loop: + while (sc->softticks != (int)(sc->curticks + 1)) { + bucket = &sc->callwheel[sc->softticks & callwheelmask]; + + for (c = TAILQ_FIRST(bucket); c; c = sc->next) { + sc->next = TAILQ_NEXT(c, c_links.tqe); + if (c->c_time != sc->softticks) + continue; + TAILQ_REMOVE(bucket, c, c_links.tqe); + c_func = c->c_func; + c_arg = c->c_arg; + c_flags = c->c_flags; + c->c_func = NULL; + KKASSERT(c->c_flags & CALLOUT_DID_INIT); + if (c->c_flags & CALLOUT_LOCAL_ALLOC) { + c->c_flags = CALLOUT_LOCAL_ALLOC | + CALLOUT_DID_INIT; + SLIST_INSERT_HEAD(&sc->callfree, + c, c_links.sle); + } else { + c->c_flags &= ~CALLOUT_PENDING; + } + crit_exit(); +#ifdef SMP + if (c_flags & CALLOUT_MPSAFE) { + if (mpsafe == 0) { + mpsafe = 1; + rel_mplock(); } } else { - void (*c_func)(void *); - void *c_arg; - - nextsoftcheck = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE(bucket, c, c_links.tqe); - c_func = c->c_func; - c_arg = c->c_arg; - c->c_func = NULL; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) { - c->c_flags = CALLOUT_LOCAL_ALLOC; - SLIST_INSERT_HEAD(&callfree, c, - c_links.sle); - } else { - c->c_flags = - (c->c_flags & ~CALLOUT_PENDING); + if (mpsafe) { + mpsafe = 0; + get_mplock(); } - crit_exit(); - c_func(c_arg); - crit_enter(); - steps = 0; - c = nextsoftcheck; } +#endif + c_func(c_arg); + crit_enter(); + /* NOTE: list may have changed */ } + ++sc->softticks; } - nextsoftcheck = NULL; - crit_exit(); + sc->isrunning = 0; + lwkt_deschedule_self(&sc->thread); /* == curthread */ + lwkt_switch(); + goto loop; + /* NOT REACHED */ } /* @@ -162,31 +339,31 @@ swi_softclock(void *dummy) struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks) { - int s; + softclock_pcpu_t sc; struct callout *new; struct callout_handle handle; - s = splhigh(); + sc = &softclock_pcpu_ary[mycpu->gd_cpuid]; + crit_enter(); /* Fill in the next free callout structure. */ - new = SLIST_FIRST(&callfree); - if (new == NULL) + new = SLIST_FIRST(&sc->callfree); + if (new == NULL) { /* XXX Attempt to malloc first */ panic("timeout table full"); - SLIST_REMOVE_HEAD(&callfree, c_links.sle); + } + SLIST_REMOVE_HEAD(&sc->callfree, c_links.sle); callout_reset(new, to_ticks, ftn, arg); handle.callout = new; - splx(s); + crit_exit(); return (handle); } void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { - int s; - /* * Check for a handle that was initialized * by callout_handle_init, but never used @@ -195,10 +372,10 @@ untimeout(timeout_t *ftn, void *arg, str if (handle.callout == NULL) return; - s = splhigh(); + crit_enter(); if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); - splx(s); + crit_exit(); } void @@ -213,7 +390,8 @@ callout_handle_init(struct callout_handl * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can - * safely be passed to callout_reset() and callout_stop() + * safely be passed to callout_reset() and callout_stop() + * callout_init_mp() - same but any installed functions must be MP safe. * * defines three convenience macros: * @@ -221,78 +399,154 @@ callout_handle_init(struct callout_handl * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ + +/* + * Start or restart a timeout. Install the callout structure in the + * callwheel. Callers may legally pass any value, even if 0 or negative, + * but since the sc->curticks index may have already been processed a + * minimum timeout of 1 tick will be enforced. + * + * The callout is installed on and will be processed on the current cpu's + * callout wheel. + */ void -callout_reset(struct callout *c, int to_ticks, - void (*ftn)(void *), void *arg) +callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *), + void *arg) { - int s; + softclock_pcpu_t sc; + globaldata_t gd; + +#ifdef INVARIANTS + if ((c->c_flags & CALLOUT_DID_INIT) == 0) { + callout_init(c); + printf( + "callout_reset(%p) from %p: callout was not initialized\n", + c, ((int **)&c)[-1]); +#ifdef DDB + db_print_backtrace(); +#endif + } +#endif + gd = mycpu; + sc = &softclock_pcpu_ary[gd->gd_cpuid]; + crit_enter_gd(gd); - s = splhigh(); if (c->c_flags & CALLOUT_PENDING) callout_stop(c); - /* - * We could spl down here and back up at the TAILQ_INSERT_TAIL, - * but there's no point since doing this setup doesn't take much - * time. - */ if (to_ticks <= 0) to_ticks = 1; c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); c->c_func = ftn; - c->c_time = ticks + to_ticks; - TAILQ_INSERT_TAIL(&callwheel[c->c_time & callwheelmask], + c->c_time = sc->curticks + to_ticks; +#ifdef SMP + c->c_gd = gd; +#endif + + TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & callwheelmask], c, c_links.tqe); - splx(s); - + crit_exit_gd(gd); } +/* + * Stop a running timer. WARNING! If called on a cpu other then the one + * the callout was started on this function will liveloop on its IPI to + * the target cpu to process the request. It is possible for the callout + * to execute in that case. + * + * WARNING! This routine may be called from an IPI + */ int callout_stop(struct callout *c) { - int s; + globaldata_t gd = mycpu; +#ifdef SMP + globaldata_t tgd; +#endif + softclock_pcpu_t sc; + +#ifdef INVARIANTS + if ((c->c_flags & CALLOUT_DID_INIT) == 0) { + callout_init(c); + printf( + "callout_reset(%p) from %p: callout was not initialized\n", + c, ((int **)&c)[-1]); +#ifdef DDB + db_print_backtrace(); +#endif + } +#endif + crit_enter_gd(gd); - s = splhigh(); /* * Don't attempt to delete a callout that's not on the queue. */ - if (!(c->c_flags & CALLOUT_PENDING)) { + if ((c->c_flags & CALLOUT_PENDING) == 0) { c->c_flags &= ~CALLOUT_ACTIVE; - splx(s); + crit_exit_gd(gd); return (0); } - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); +#ifdef SMP + if ((tgd = c->c_gd) != gd) { + /* + * If the callout is owned by a different CPU we have to + * execute the function synchronously on the target cpu. + */ + int seq; - if (nextsoftcheck == c) { - nextsoftcheck = TAILQ_NEXT(c, c_links.tqe); - } - TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_links.tqe); - c->c_func = NULL; + cpu_mb1(); /* don't let tgd alias c_gd */ + seq = lwkt_send_ipiq(tgd, (void *)callout_stop, c); + lwkt_wait_ipiq(tgd, seq); + } else +#endif + { + /* + * If the callout is owned by the same CPU we can + * process it directly, but if we are racing our helper + * thread (sc->next), we have to adjust sc->next. The + * race is interlocked by a critical section. + */ + sc = &softclock_pcpu_ary[gd->gd_cpuid]; + + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); + if (sc->next == c) + sc->next = TAILQ_NEXT(c, c_links.tqe); + + TAILQ_REMOVE(&sc->callwheel[c->c_time & callwheelmask], + c, c_links.tqe); + c->c_func = NULL; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) { - SLIST_INSERT_HEAD(&callfree, c, c_links.sle); + if (c->c_flags & CALLOUT_LOCAL_ALLOC) { + SLIST_INSERT_HEAD(&sc->callfree, c, c_links.sle); + } } - splx(s); + crit_exit_gd(gd); return (1); } +/* + * Prepare a callout structure for use by callout_reset() and/or + * callout_stop(). The MP version of this routine requires that the callback + * function installed by callout_reset() by MP safe. + */ void callout_init(struct callout *c) { bzero(c, sizeof *c); + c->c_flags = CALLOUT_DID_INIT; } -static void -swi_softclock_setup(void *arg) +void +callout_init_mp(struct callout *c) { - register_swi(SWI_CLOCK, swi_softclock, NULL, "swi_sftclk"); - swi_setpriority(SWI_CLOCK, TDPRI_SOFT_TIMER); + callout_init(c); + c->c_flags |= CALLOUT_MPSAFE; } -SYSINIT(vm_setup, SI_SUB_CPU, SI_ORDER_ANY, swi_softclock_setup, NULL); - +/* What, are you joking? This is nuts! -Matt */ +#if 0 #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after @@ -358,3 +612,5 @@ adjust_timeout_calltodo(struct timeval * return; } #endif /* APM_FIXUP_CALLTODO */ +#endif +