File:
[DragonFly] /
src /
sys /
kern /
subr_prof.c
Revision
1.9:
download - view:
text,
annotated -
select for diffs
Tue Jun 1 22:19:30 2004 UTC (8 years, 11 months ago) by
dillon
Branches:
MAIN
CVS tags:
HEAD,
DragonFly_Stable,
DragonFly_Snap29Sep2004,
DragonFly_Snap13Sep2004,
DragonFly_RELEASE_1_2_Slip,
DragonFly_RELEASE_1_2,
DragonFly_1_0_REL,
DragonFly_1_0_RC1,
DragonFly_1_0A_REL
ANSIfication. No operational changes.
Submitted-by: Tim Wickberg <me@k9mach3.org>
1: /*-
2: * Copyright (c) 1982, 1986, 1993
3: * The Regents of the University of California. All rights reserved.
4: *
5: * Redistribution and use in source and binary forms, with or without
6: * modification, are permitted provided that the following conditions
7: * are met:
8: * 1. Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright
11: * notice, this list of conditions and the following disclaimer in the
12: * documentation and/or other materials provided with the distribution.
13: * 3. All advertising materials mentioning features or use of this software
14: * must display the following acknowledgement:
15: * This product includes software developed by the University of
16: * California, Berkeley and its contributors.
17: * 4. Neither the name of the University nor the names of its contributors
18: * may be used to endorse or promote products derived from this software
19: * without specific prior written permission.
20: *
21: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31: * SUCH DAMAGE.
32: *
33: * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
34: * $FreeBSD: src/sys/kern/subr_prof.c,v 1.32.2.2 2000/08/03 00:09:32 ps Exp $
35: * $DragonFly: src/sys/kern/subr_prof.c,v 1.9 2004/06/01 22:19:30 dillon Exp $
36: */
37:
38: #include <sys/param.h>
39: #include <sys/systm.h>
40: #include <sys/sysproto.h>
41: #include <sys/kernel.h>
42: #include <sys/proc.h>
43: #include <sys/resourcevar.h>
44: #include <sys/sysctl.h>
45:
46: #include <machine/ipl.h>
47: #include <machine/cpu.h>
48:
49: #ifdef GPROF
50: #include <sys/malloc.h>
51: #include <sys/gmon.h>
52: #undef MCOUNT
53:
54: static MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer");
55:
56: static void kmstartup (void *);
57: SYSINIT(kmem, SI_SUB_KPROF, SI_ORDER_FIRST, kmstartup, NULL)
58:
59: struct gmonparam _gmonparam = { GMON_PROF_OFF };
60:
61: #ifdef GUPROF
62: #include <machine/asmacros.h>
63:
64: void
65: nullfunc_loop_profiled()
66: {
67: int i;
68:
69: for (i = 0; i < CALIB_SCALE; i++)
70: nullfunc_profiled();
71: }
72:
73: #define nullfunc_loop_profiled_end nullfunc_profiled /* XXX */
74:
75: void
76: nullfunc_profiled()
77: {
78: }
79: #endif /* GUPROF */
80:
81: static void
82: kmstartup(void *dummy)
83: {
84: char *cp;
85: struct gmonparam *p = &_gmonparam;
86: #ifdef GUPROF
87: int cputime_overhead;
88: int empty_loop_time;
89: int i;
90: int mcount_overhead;
91: int mexitcount_overhead;
92: int nullfunc_loop_overhead;
93: int nullfunc_loop_profiled_time;
94: uintfptr_t tmp_addr;
95: #endif
96:
97: /*
98: * Round lowpc and highpc to multiples of the density we're using
99: * so the rest of the scaling (here and in gprof) stays in ints.
100: */
101: p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER));
102: p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
103: p->textsize = p->highpc - p->lowpc;
104: printf("Profiling kernel, textsize=%lu [%x..%x]\n",
105: p->textsize, p->lowpc, p->highpc);
106: p->kcountsize = p->textsize / HISTFRACTION;
107: p->hashfraction = HASHFRACTION;
108: p->fromssize = p->textsize / HASHFRACTION;
109: p->tolimit = p->textsize * ARCDENSITY / 100;
110: if (p->tolimit < MINARCS)
111: p->tolimit = MINARCS;
112: else if (p->tolimit > MAXARCS)
113: p->tolimit = MAXARCS;
114: p->tossize = p->tolimit * sizeof(struct tostruct);
115: cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
116: M_GPROF, M_NOWAIT);
117: if (cp == 0) {
118: printf("No memory for profiling.\n");
119: return;
120: }
121: bzero(cp, p->kcountsize + p->tossize + p->fromssize);
122: p->tos = (struct tostruct *)cp;
123: cp += p->tossize;
124: p->kcount = (HISTCOUNTER *)cp;
125: cp += p->kcountsize;
126: p->froms = (u_short *)cp;
127:
128: #ifdef GUPROF
129: /* Initialize pointers to overhead counters. */
130: p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime));
131: p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount));
132: p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount));
133:
134: /*
135: * Disable interrupts to avoid interference while we calibrate
136: * things.
137: */
138: cpu_disable_intr();
139:
140: /*
141: * Determine overheads.
142: * XXX this needs to be repeated for each useful timer/counter.
143: */
144: cputime_overhead = 0;
145: startguprof(p);
146: for (i = 0; i < CALIB_SCALE; i++)
147: cputime_overhead += cputime();
148:
149: empty_loop();
150: startguprof(p);
151: empty_loop();
152: empty_loop_time = cputime();
153:
154: nullfunc_loop_profiled();
155:
156: /*
157: * Start profiling. There won't be any normal function calls since
158: * interrupts are disabled, but we will call the profiling routines
159: * directly to determine their overheads.
160: */
161: p->state = GMON_PROF_HIRES;
162:
163: startguprof(p);
164: nullfunc_loop_profiled();
165:
166: startguprof(p);
167: for (i = 0; i < CALIB_SCALE; i++)
168: #if defined(__i386__) && __GNUC__ >= 2
169: __asm("pushl %0; call __mcount; popl %%ecx"
170: :
171: : "i" (profil)
172: : "ax", "bx", "cx", "dx", "memory");
173: #else
174: #error
175: #endif
176: mcount_overhead = KCOUNT(p, PC_TO_I(p, profil));
177:
178: startguprof(p);
179: for (i = 0; i < CALIB_SCALE; i++)
180: #if defined(__i386__) && __GNUC__ >= 2
181: __asm("call " __XSTRING(HIDENAME(mexitcount)) "; 1:"
182: : : : "ax", "bx", "cx", "dx", "memory");
183: __asm("movl $1b,%0" : "=rm" (tmp_addr));
184: #else
185: #error
186: #endif
187: mexitcount_overhead = KCOUNT(p, PC_TO_I(p, tmp_addr));
188:
189: p->state = GMON_PROF_OFF;
190: stopguprof(p);
191:
192: cpu_enable_intr();
193:
194: nullfunc_loop_profiled_time = 0;
195: for (tmp_addr = (uintfptr_t)nullfunc_loop_profiled;
196: tmp_addr < (uintfptr_t)nullfunc_loop_profiled_end;
197: tmp_addr += HISTFRACTION * sizeof(HISTCOUNTER))
198: nullfunc_loop_profiled_time += KCOUNT(p, PC_TO_I(p, tmp_addr));
199: #define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE)
200: #define c2n(count, freq) ((int)((count) * 1000000000LL / freq))
201: printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n",
202: CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)),
203: CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)),
204: CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)),
205: CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)),
206: CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate)));
207: cputime_overhead -= empty_loop_time;
208: mcount_overhead -= empty_loop_time;
209: mexitcount_overhead -= empty_loop_time;
210:
211: /*-
212: * Profiling overheads are determined by the times between the
213: * following events:
214: * MC1: mcount() is called
215: * MC2: cputime() (called from mcount()) latches the timer
216: * MC3: mcount() completes
217: * ME1: mexitcount() is called
218: * ME2: cputime() (called from mexitcount()) latches the timer
219: * ME3: mexitcount() completes.
220: * The times between the events vary slightly depending on instruction
221: * combination and cache misses, etc. Attempt to determine the
222: * minimum times. These can be subtracted from the profiling times
223: * without much risk of reducing the profiling times below what they
224: * would be when profiling is not configured. Abbreviate:
225: * ab = minimum time between MC1 and MC3
226: * a = minumum time between MC1 and MC2
227: * b = minimum time between MC2 and MC3
228: * cd = minimum time between ME1 and ME3
229: * c = minimum time between ME1 and ME2
230: * d = minimum time between ME2 and ME3.
231: * These satisfy the relations:
232: * ab <= mcount_overhead (just measured)
233: * a + b <= ab
234: * cd <= mexitcount_overhead (just measured)
235: * c + d <= cd
236: * a + d <= nullfunc_loop_profiled_time (just measured)
237: * a >= 0, b >= 0, c >= 0, d >= 0.
238: * Assume that ab and cd are equal to the minimums.
239: */
240: p->cputime_overhead = CALIB_DOSCALE(cputime_overhead);
241: p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead);
242: p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead
243: - cputime_overhead);
244: nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time;
245: p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead
246: - nullfunc_loop_overhead)
247: / 4);
248: p->mexitcount_pre_overhead = p->mexitcount_overhead
249: + p->cputime_overhead
250: - p->mexitcount_post_overhead;
251: p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead)
252: - p->mexitcount_post_overhead;
253: p->mcount_post_overhead = p->mcount_overhead
254: + p->cputime_overhead
255: - p->mcount_pre_overhead;
256: printf(
257: "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n",
258: c2n(p->cputime_overhead, p->profrate),
259: c2n(p->mcount_overhead, p->profrate),
260: c2n(p->mcount_pre_overhead, p->profrate),
261: c2n(p->mcount_post_overhead, p->profrate),
262: c2n(p->cputime_overhead, p->profrate),
263: c2n(p->mexitcount_overhead, p->profrate),
264: c2n(p->mexitcount_pre_overhead, p->profrate),
265: c2n(p->mexitcount_post_overhead, p->profrate));
266: printf(
267: "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n",
268: p->cputime_overhead, p->mcount_overhead,
269: p->mcount_pre_overhead, p->mcount_post_overhead,
270: p->cputime_overhead, p->mexitcount_overhead,
271: p->mexitcount_pre_overhead, p->mexitcount_post_overhead);
272: #endif /* GUPROF */
273: }
274:
275: /*
276: * Return kernel profiling information.
277: */
278: static int
279: sysctl_kern_prof(SYSCTL_HANDLER_ARGS)
280: {
281: int *name = (int *) arg1;
282: u_int namelen = arg2;
283: struct gmonparam *gp = &_gmonparam;
284: int error;
285: int state;
286:
287: /* all sysctl names at this level are terminal */
288: if (namelen != 1)
289: return (ENOTDIR); /* overloaded */
290:
291: switch (name[0]) {
292: case GPROF_STATE:
293: state = gp->state;
294: error = sysctl_handle_int(oidp, &state, 0, req);
295: if (error)
296: return (error);
297: if (!req->newptr)
298: return (0);
299: if (state == GMON_PROF_OFF) {
300: gp->state = state;
301: stopprofclock(&proc0);
302: stopguprof(gp);
303: } else if (state == GMON_PROF_ON) {
304: gp->state = GMON_PROF_OFF;
305: stopguprof(gp);
306: gp->profrate = profhz;
307: startprofclock(&proc0);
308: gp->state = state;
309: #ifdef GUPROF
310: } else if (state == GMON_PROF_HIRES) {
311: gp->state = GMON_PROF_OFF;
312: stopprofclock(&proc0);
313: startguprof(gp);
314: gp->state = state;
315: #endif
316: } else if (state != gp->state)
317: return (EINVAL);
318: return (0);
319: case GPROF_COUNT:
320: return (sysctl_handle_opaque(oidp,
321: gp->kcount, gp->kcountsize, req));
322: case GPROF_FROMS:
323: return (sysctl_handle_opaque(oidp,
324: gp->froms, gp->fromssize, req));
325: case GPROF_TOS:
326: return (sysctl_handle_opaque(oidp,
327: gp->tos, gp->tossize, req));
328: case GPROF_GMONPARAM:
329: return (sysctl_handle_opaque(oidp, gp, sizeof *gp, req));
330: default:
331: return (EOPNOTSUPP);
332: }
333: /* NOTREACHED */
334: }
335:
336: SYSCTL_NODE(_kern, KERN_PROF, prof, CTLFLAG_RW, sysctl_kern_prof, "");
337: #endif /* GPROF */
338:
339: /*
340: * Profiling system call.
341: *
342: * The scale factor is a fixed point number with 16 bits of fraction, so that
343: * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
344: */
345: /* ARGSUSED */
346: int
347: profil(struct profil_args *uap)
348: {
349: struct proc *p = curproc;
350: struct uprof *upp;
351: int s;
352:
353: if (uap->scale > (1 << 16))
354: return (EINVAL);
355: if (uap->scale == 0) {
356: stopprofclock(p);
357: return (0);
358: }
359: upp = &p->p_stats->p_prof;
360:
361: /* Block profile interrupts while changing state. */
362: s = splstatclock();
363: upp->pr_off = uap->offset;
364: upp->pr_scale = uap->scale;
365: upp->pr_base = uap->samples;
366: upp->pr_size = uap->size;
367: startprofclock(p);
368: splx(s);
369:
370: return (0);
371: }
372:
373: /*
374: * Scale is a fixed-point number with the binary point 16 bits
375: * into the value, and is <= 1.0. pc is at most 32 bits, so the
376: * intermediate result is at most 48 bits.
377: */
378: #define PC_TO_INDEX(pc, prof) \
379: ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
380: (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
381:
382: /*
383: * Collect user-level profiling statistics; called on a profiling tick,
384: * when a process is running in user-mode. This routine may be called
385: * from an interrupt context. We try to update the user profiling buffers
386: * cheaply with fuswintr() and suswintr(). If that fails, we revert to
387: * an AST that will vector us to trap() with a context in which copyin
388: * and copyout will work. Trap will then call addupc_task().
389: *
390: * Note that we may (rarely) not get around to the AST soon enough, and
391: * lose profile ticks when the next tick overwrites this one, but in this
392: * case the system is overloaded and the profile is probably already
393: * inaccurate.
394: */
395: void
396: addupc_intr(struct proc *p, u_long pc, u_int ticks)
397: {
398: struct uprof *prof;
399: caddr_t addr;
400: u_int i;
401: int v;
402:
403: if (ticks == 0)
404: return;
405: prof = &p->p_stats->p_prof;
406: if (pc < prof->pr_off ||
407: (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
408: return; /* out of range; ignore */
409:
410: addr = prof->pr_base + i;
411: if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
412: prof->pr_addr = pc;
413: prof->pr_ticks = ticks;
414: need_proftick();
415: }
416: }
417:
418: /*
419: * Much like before, but we can afford to take faults here. If the
420: * update fails, we simply turn off profiling.
421: */
422: void
423: addupc_task(struct proc *p, u_long pc, u_int ticks)
424: {
425: struct uprof *prof;
426: caddr_t addr;
427: u_int i;
428: u_short v;
429:
430: /* Testing P_PROFIL may be unnecessary, but is certainly safe. */
431: if ((p->p_flag & P_PROFIL) == 0 || ticks == 0)
432: return;
433:
434: prof = &p->p_stats->p_prof;
435: if (pc < prof->pr_off ||
436: (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
437: return;
438:
439: addr = prof->pr_base + i;
440: if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
441: v += ticks;
442: if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
443: return;
444: }
445: stopprofclock(p);
446: }