File:  [DragonFly] / src / sys / kern / kern_exec.c
Revision 1.21: download - view: text, annotated - select for diffs
Fri Mar 12 23:09:36 2004 UTC (10 years, 5 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
In an rfork'd or vfork'd situation where multiple processes are sharing
the same vmspace, and one process goes zombie, the vmspace's vm_exitingcnt
will be non-zero.  If another process then forks or execs the exitingcnt will
be improperly inherited by the new vmspace.  The solution is to not copy
exitingcnt when copying to a new vmspace.

Additionally, for DragonFly, I also had to fix a few cases where the upcall
list was also being improperly inherited.

Heads-up-by: Xin LI <delphij@frontfree.net>
Obtained-From: Peter Wemm <peter@wemm.org> (FreeBSD-5)

    1: /*
    2:  * Copyright (c) 1993, David Greenman
    3:  * All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  *
   14:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24:  * SUCH DAMAGE.
   25:  *
   26:  * $FreeBSD: src/sys/kern/kern_exec.c,v 1.107.2.15 2002/07/30 15:40:46 nectar Exp $
   27:  * $DragonFly: src/sys/kern/kern_exec.c,v 1.21 2004/03/12 23:09:36 dillon Exp $
   28:  */
   29: 
   30: #include <sys/param.h>
   31: #include <sys/systm.h>
   32: #include <sys/sysproto.h>
   33: #include <sys/kernel.h>
   34: #include <sys/mount.h>
   35: #include <sys/filedesc.h>
   36: #include <sys/fcntl.h>
   37: #include <sys/acct.h>
   38: #include <sys/exec.h>
   39: #include <sys/imgact.h>
   40: #include <sys/imgact_elf.h>
   41: #include <sys/kern_syscall.h>
   42: #include <sys/wait.h>
   43: #include <sys/malloc.h>
   44: #include <sys/proc.h>
   45: #include <sys/signalvar.h>
   46: #include <sys/pioctl.h>
   47: #include <sys/namei.h>
   48: #include <sys/sysent.h>
   49: #include <sys/shm.h>
   50: #include <sys/sysctl.h>
   51: #include <sys/vnode.h>
   52: #include <sys/aio.h>
   53: 
   54: #include <vm/vm.h>
   55: #include <vm/vm_param.h>
   56: #include <sys/lock.h>
   57: #include <vm/pmap.h>
   58: #include <vm/vm_page.h>
   59: #include <vm/vm_map.h>
   60: #include <vm/vm_kern.h>
   61: #include <vm/vm_extern.h>
   62: #include <vm/vm_object.h>
   63: #include <vm/vm_pager.h>
   64: 
   65: #include <sys/user.h>
   66: #include <machine/reg.h>
   67: 
   68: MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
   69: 
   70: static register_t *exec_copyout_strings (struct image_params *);
   71: 
   72: /* XXX This should be vm_size_t. */
   73: static u_long ps_strings = PS_STRINGS;
   74: SYSCTL_ULONG(_kern, KERN_PS_STRINGS, ps_strings, CTLFLAG_RD, &ps_strings, 0, "");
   75: 
   76: /* XXX This should be vm_size_t. */
   77: static u_long usrstack = USRSTACK;
   78: SYSCTL_ULONG(_kern, KERN_USRSTACK, usrstack, CTLFLAG_RD, &usrstack, 0, "");
   79: 
   80: u_long ps_arg_cache_limit = PAGE_SIZE / 16;
   81: SYSCTL_LONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
   82:     &ps_arg_cache_limit, 0, "");
   83: 
   84: int ps_argsopen = 1;
   85: SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
   86: 
   87: void print_execve_args(struct image_args *args);
   88: int debug_execve_args = 0;
   89: SYSCTL_INT(_kern, OID_AUTO, debug_execve_args, CTLFLAG_RW, &debug_execve_args,
   90:     0, "");
   91: 
   92: void
   93: print_execve_args(struct image_args *args)
   94: {
   95: 	char *cp;
   96: 	int ndx;
   97: 
   98: 	cp = args->begin_argv;
   99: 	for (ndx = 0; ndx < args->argc; ndx++) {
  100: 		printf("\targv[%d]: %s\n", ndx, cp);
  101: 		while (*cp++ != '\0');
  102: 	}
  103: 	for (ndx = 0; ndx < args->envc; ndx++) {
  104: 		printf("\tenvv[%d]: %s\n", ndx, cp);
  105: 		while (*cp++ != '\0');
  106: 	}
  107: }
  108: 
  109: /*
  110:  * Each of the items is a pointer to a `const struct execsw', hence the
  111:  * double pointer here.
  112:  */
  113: static const struct execsw **execsw;
  114: 
  115: int
  116: kern_execve(struct nameidata *ndp, struct image_args *args)
  117: {
  118: 	struct thread *td = curthread;
  119: 	struct proc *p = td->td_proc;
  120: 	register_t *stack_base;
  121: 	int error, len, i;
  122: 	struct image_params image_params, *imgp;
  123: 	struct vattr attr;
  124: 	int (*img_first) (struct image_params *);
  125: 
  126: 	if (debug_execve_args) {
  127: 		printf("%s()\n", __func__);
  128: 		print_execve_args(args);
  129: 	}
  130: 
  131: 	KKASSERT(p);
  132: 	imgp = &image_params;
  133: 
  134: 	/*
  135: 	 * Lock the process and set the P_INEXEC flag to indicate that
  136: 	 * it should be left alone until we're done here.  This is
  137: 	 * necessary to avoid race conditions - e.g. in ptrace() -
  138: 	 * that might allow a local user to illicitly obtain elevated
  139: 	 * privileges.
  140: 	 */
  141: 	p->p_flag |= P_INEXEC;
  142: 
  143: 	/*
  144: 	 * Initialize part of the common data
  145: 	 */
  146: 	imgp->proc = p;
  147: 	imgp->args = args;
  148: 	imgp->attr = &attr;
  149: 	imgp->entry_addr = 0;
  150: 	imgp->resident = 0;
  151: 	imgp->vmspace_destroyed = 0;
  152: 	imgp->interpreted = 0;
  153: 	imgp->interpreter_name[0] = 0;
  154: 	imgp->auxargs = NULL;
  155: 	imgp->vp = NULL;
  156: 	imgp->firstpage = NULL;
  157: 	imgp->ps_strings = 0;
  158: 
  159: 	/*
  160: 	 * Allocate temporary demand zeroed space for argument and
  161: 	 *	environment strings
  162: 	 */
  163: 	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
  164: 	if (imgp->image_header == NULL) {
  165: 		error = ENOMEM;
  166: 		goto exec_fail;
  167: 	}
  168: 
  169: interpret:
  170: 
  171: 	/*
  172: 	 * Translate the file name. namei() returns a vnode pointer
  173: 	 *	in ni_vp amoung other things.
  174: 	 */
  175: 	error = namei(ndp);
  176: 	if (error) {
  177: 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
  178: 		    PAGE_SIZE);
  179: 		goto exec_fail;
  180: 	}
  181: 
  182: 	imgp->vp = ndp->ni_vp;
  183: 
  184: 	/*
  185: 	 * Check file permissions (also 'opens' file)
  186: 	 */
  187: 	error = exec_check_permissions(imgp);
  188: 	if (error) {
  189: 		VOP_UNLOCK(imgp->vp, NULL, 0, td);
  190: 		goto exec_fail_dealloc;
  191: 	}
  192: 
  193: 	error = exec_map_first_page(imgp);
  194: 	VOP_UNLOCK(imgp->vp, NULL, 0, td);
  195: 	if (error)
  196: 		goto exec_fail_dealloc;
  197: 
  198: 	if (debug_execve_args && imgp->interpreted) {
  199: 		printf("    target is interpreted -- recursive pass\n");
  200: 		printf("    interpreter: %s\n", imgp->interpreter_name);
  201: 		print_execve_args(args);
  202: 	}
  203: 
  204: 	/*
  205: 	 *	If the current process has a special image activator it
  206: 	 *	wants to try first, call it.   For example, emulating shell 
  207: 	 *	scripts differently.
  208: 	 */
  209: 	error = -1;
  210: 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
  211: 		error = img_first(imgp);
  212: 
  213: 	/*
  214: 	 *	If the vnode has a registered vmspace, exec the vmspace
  215: 	 */
  216: 	if (error == -1 && imgp->vp->v_resident) {
  217: 		error = exec_resident_imgact(imgp);
  218: 	}
  219: 
  220: 	/*
  221: 	 *	Loop through the list of image activators, calling each one.
  222: 	 *	An activator returns -1 if there is no match, 0 on success,
  223: 	 *	and an error otherwise.
  224: 	 */
  225: 	for (i = 0; error == -1 && execsw[i]; ++i) {
  226: 		if (execsw[i]->ex_imgact == NULL ||
  227: 		    execsw[i]->ex_imgact == img_first) {
  228: 			continue;
  229: 		}
  230: 		error = (*execsw[i]->ex_imgact)(imgp);
  231: 	}
  232: 
  233: 	if (error) {
  234: 		if (error == -1)
  235: 			error = ENOEXEC;
  236: 		goto exec_fail_dealloc;
  237: 	}
  238: 
  239: 	/*
  240: 	 * Special interpreter operation, cleanup and loop up to try to
  241: 	 * activate the interpreter.
  242: 	 */
  243: 	if (imgp->interpreted) {
  244: 		exec_unmap_first_page(imgp);
  245: 		/* free name buffer and old vnode */
  246: 		NDFREE(ndp, NDF_ONLY_PNBUF);
  247: 		vrele(ndp->ni_vp);
  248: 		/* set new name to that of the interpreter */
  249: 		NDINIT(ndp, NAMEI_LOOKUP, 
  250: 		    CNP_LOCKLEAF | CNP_FOLLOW | CNP_SAVENAME,
  251: 		    UIO_SYSSPACE, imgp->interpreter_name, td);
  252: 		goto interpret;
  253: 	}
  254: 
  255: 	/*
  256: 	 * Copy out strings (args and env) and initialize stack base
  257: 	 */
  258: 	stack_base = exec_copyout_strings(imgp);
  259: 	p->p_vmspace->vm_minsaddr = (char *)stack_base;
  260: 
  261: 	/*
  262: 	 * If custom stack fixup routine present for this process
  263: 	 * let it do the stack setup.  If we are running a resident
  264: 	 * image there is no auxinfo or other image activator context
  265: 	 * so don't try to add fixups to the stack.
  266: 	 *
  267: 	 * Else stuff argument count as first item on stack
  268: 	 */
  269: 	if (p->p_sysent->sv_fixup && imgp->resident == 0)
  270: 		(*p->p_sysent->sv_fixup)(&stack_base, imgp);
  271: 	else
  272: 		suword(--stack_base, imgp->args->argc);
  273: 
  274: 	/*
  275: 	 * For security and other reasons, the file descriptor table cannot
  276: 	 * be shared after an exec.
  277: 	 */
  278: 	if (p->p_fd->fd_refcnt > 1) {
  279: 		struct filedesc *tmp;
  280: 
  281: 		tmp = fdcopy(p);
  282: 		fdfree(p);
  283: 		p->p_fd = tmp;
  284: 	}
  285: 
  286: 	/*
  287: 	 * For security and other reasons, signal handlers cannot
  288: 	 * be shared after an exec. The new proces gets a copy of the old
  289: 	 * handlers. In execsigs(), the new process will have its signals
  290: 	 * reset.
  291: 	 */
  292: 	if (p->p_procsig->ps_refcnt > 1) {
  293: 		struct procsig *newprocsig;
  294: 
  295: 		MALLOC(newprocsig, struct procsig *, sizeof(struct procsig),
  296: 		       M_SUBPROC, M_WAITOK);
  297: 		bcopy(p->p_procsig, newprocsig, sizeof(*newprocsig));
  298: 		p->p_procsig->ps_refcnt--;
  299: 		p->p_procsig = newprocsig;
  300: 		p->p_procsig->ps_refcnt = 1;
  301: 		if (p->p_sigacts == &p->p_addr->u_sigacts)
  302: 			panic("shared procsig but private sigacts?");
  303: 
  304: 		p->p_addr->u_sigacts = *p->p_sigacts;
  305: 		p->p_sigacts = &p->p_addr->u_sigacts;
  306: 	}
  307: 
  308: 	/* Stop profiling */
  309: 	stopprofclock(p);
  310: 
  311: 	/* close files on exec */
  312: 	fdcloseexec(p);
  313: 
  314: 	/* reset caught signals */
  315: 	execsigs(p);
  316: 
  317: 	/* name this process - nameiexec(p, ndp) */
  318: 	len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
  319: 	bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
  320: 	p->p_comm[len] = 0;
  321: 
  322: 	/*
  323: 	 * mark as execed, wakeup the process that vforked (if any) and tell
  324: 	 * it that it now has its own resources back
  325: 	 */
  326: 	p->p_flag |= P_EXEC;
  327: 	if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
  328: 		p->p_flag &= ~P_PPWAIT;
  329: 		wakeup((caddr_t)p->p_pptr);
  330: 	}
  331: 
  332: 	/*
  333: 	 * Implement image setuid/setgid.
  334: 	 *
  335: 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
  336: 	 * the process is being traced.
  337: 	 */
  338: 	if ((((attr.va_mode & VSUID) && p->p_ucred->cr_uid != attr.va_uid) ||
  339: 	     ((attr.va_mode & VSGID) && p->p_ucred->cr_gid != attr.va_gid)) &&
  340: 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
  341: 	    (p->p_flag & P_TRACED) == 0) {
  342: 		/*
  343: 		 * Turn off syscall tracing for set-id programs, except for
  344: 		 * root.  Record any set-id flags first to make sure that
  345: 		 * we do not regain any tracing during a possible block.
  346: 		 */
  347: 		setsugid();
  348: 		if (p->p_tracep && suser(td)) {
  349: 			struct vnode *vtmp;
  350: 
  351: 			if ((vtmp = p->p_tracep) != NULL) {
  352: 				p->p_tracep = NULL;
  353: 				p->p_traceflag = 0;
  354: 				vrele(vtmp);
  355: 			}
  356: 		}
  357: 		/* Close any file descriptors 0..2 that reference procfs */
  358: 		setugidsafety(p);
  359: 		/* Make sure file descriptors 0..2 are in use. */
  360: 		error = fdcheckstd(p);
  361: 		if (error != 0)
  362: 			goto exec_fail_dealloc;
  363: 		/*
  364: 		 * Set the new credentials.
  365: 		 */
  366: 		cratom(&p->p_ucred);
  367: 		if (attr.va_mode & VSUID)
  368: 			change_euid(attr.va_uid);
  369: 		if (attr.va_mode & VSGID)
  370: 			p->p_ucred->cr_gid = attr.va_gid;
  371: 
  372: 		/*
  373: 		 * Clear local varsym variables
  374: 		 */
  375: 		varsymset_clean(&p->p_varsymset);
  376: 	} else {
  377: 		if (p->p_ucred->cr_uid == p->p_ucred->cr_ruid &&
  378: 		    p->p_ucred->cr_gid == p->p_ucred->cr_rgid)
  379: 			p->p_flag &= ~P_SUGID;
  380: 	}
  381: 
  382: 	/*
  383: 	 * Implement correct POSIX saved-id behavior.
  384: 	 */
  385: 	if (p->p_ucred->cr_svuid != p->p_ucred->cr_uid ||
  386: 	    p->p_ucred->cr_svgid != p->p_ucred->cr_gid) {
  387: 		cratom(&p->p_ucred);
  388: 		p->p_ucred->cr_svuid = p->p_ucred->cr_uid;
  389: 		p->p_ucred->cr_svgid = p->p_ucred->cr_gid;
  390: 	}
  391: 
  392: 	/*
  393: 	 * Store the vp for use in procfs
  394: 	 */
  395: 	if (p->p_textvp)		/* release old reference */
  396: 		vrele(p->p_textvp);
  397: 	VREF(ndp->ni_vp);
  398: 	p->p_textvp = ndp->ni_vp;
  399: 
  400:         /*
  401:          * Notify others that we exec'd, and clear the P_INEXEC flag
  402:          * as we're now a bona fide freshly-execed process.
  403:          */
  404: 	KNOTE(&p->p_klist, NOTE_EXEC);
  405: 	p->p_flag &= ~P_INEXEC;
  406: 
  407: 	/*
  408: 	 * If tracing the process, trap to debugger so breakpoints
  409: 	 * 	can be set before the program executes.
  410: 	 */
  411: 	STOPEVENT(p, S_EXEC, 0);
  412: 
  413: 	if (p->p_flag & P_TRACED)
  414: 		psignal(p, SIGTRAP);
  415: 
  416: 	/* clear "fork but no exec" flag, as we _are_ execing */
  417: 	p->p_acflag &= ~AFORK;
  418: 
  419: 	/* Set values passed into the program in registers. */
  420: 	setregs(p, imgp->entry_addr, (u_long)(uintptr_t)stack_base,
  421: 	    imgp->ps_strings);
  422: 
  423: 	/* Free any previous argument cache */
  424: 	if (p->p_args && --p->p_args->ar_ref == 0)
  425: 		FREE(p->p_args, M_PARGS);
  426: 	p->p_args = NULL;
  427: 
  428: 	/* Cache arguments if they fit inside our allowance */
  429: 	i = imgp->args->begin_envv - imgp->args->begin_argv;
  430: 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
  431: 		MALLOC(p->p_args, struct pargs *, sizeof(struct pargs) + i, 
  432: 		    M_PARGS, M_WAITOK);
  433: 		p->p_args->ar_ref = 1;
  434: 		p->p_args->ar_length = i;
  435: 		bcopy(imgp->args->begin_argv, p->p_args->ar_args, i);
  436: 	}
  437: 
  438: exec_fail_dealloc:
  439: 
  440: 	/*
  441: 	 * free various allocated resources
  442: 	 */
  443: 	if (imgp->firstpage)
  444: 		exec_unmap_first_page(imgp);
  445: 
  446: 	if (imgp->image_header != NULL)
  447: 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
  448: 		    PAGE_SIZE);
  449: 
  450: 	if (imgp->vp) {
  451: 		NDFREE(ndp, NDF_ONLY_PNBUF);
  452: 		vrele(imgp->vp);
  453: 	}
  454: 
  455: 	if (error == 0)
  456: 		return (0);
  457: 
  458: exec_fail:
  459: 	/* we're done here, clear P_INEXEC */
  460: 	p->p_flag &= ~P_INEXEC;
  461: 	if (imgp->vmspace_destroyed) {
  462: 		/* sorry, no more process anymore. exit gracefully */
  463: 		exit1(W_EXITCODE(0, SIGABRT));
  464: 		/* NOT REACHED */
  465: 		return(0);
  466: 	} else {
  467: 		return(error);
  468: 	}
  469: }
  470: 
  471: /*
  472:  * execve() system call.
  473:  */
  474: int
  475: execve(struct execve_args *uap)
  476: {
  477: 	struct thread *td = curthread;
  478: 	struct nameidata nd;
  479: 	struct image_args args;
  480: 	int error;
  481: 
  482: 	NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_FOLLOW | CNP_SAVENAME,
  483: 	    UIO_USERSPACE, uap->fname, td);
  484: 
  485: 	error = exec_copyin_args(&args, uap->fname, PATH_USERSPACE,
  486: 				uap->argv, uap->envv);
  487: 	if (error == 0)
  488: 		error = kern_execve(&nd, &args);
  489: 
  490: 	exec_free_args(&args);
  491: 
  492: 	/*
  493: 	 * The syscall result is returned in registers to the new program.
  494: 	 * Linux will register %edx as an atexit function and we must be
  495: 	 * sure to set it to 0.  XXX
  496: 	 */
  497: 	if (error == 0)
  498: 		uap->sysmsg_result64 = 0;
  499: 
  500: 	return (error);
  501: }
  502: 
  503: int
  504: exec_map_first_page(struct image_params *imgp)
  505: {
  506: 	int s, rv, i;
  507: 	int initial_pagein;
  508: 	vm_page_t ma[VM_INITIAL_PAGEIN];
  509: 	vm_object_t object;
  510: 
  511: 
  512: 	if (imgp->firstpage) {
  513: 		exec_unmap_first_page(imgp);
  514: 	}
  515: 
  516: 	VOP_GETVOBJECT(imgp->vp, &object);
  517: 	s = splvm();
  518: 
  519: 	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  520: 
  521: 	if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
  522: 		initial_pagein = VM_INITIAL_PAGEIN;
  523: 		if (initial_pagein > object->size)
  524: 			initial_pagein = object->size;
  525: 		for (i = 1; i < initial_pagein; i++) {
  526: 			if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
  527: 				if ((ma[i]->flags & PG_BUSY) || ma[i]->busy)
  528: 					break;
  529: 				if (ma[i]->valid)
  530: 					break;
  531: 				vm_page_busy(ma[i]);
  532: 			} else {
  533: 				ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL);
  534: 				if (ma[i] == NULL)
  535: 					break;
  536: 			}
  537: 		}
  538: 		initial_pagein = i;
  539: 
  540: 		rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
  541: 		ma[0] = vm_page_lookup(object, 0);
  542: 
  543: 		if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) {
  544: 			if (ma[0]) {
  545: 				vm_page_protect(ma[0], VM_PROT_NONE);
  546: 				vm_page_free(ma[0]);
  547: 			}
  548: 			splx(s);
  549: 			return EIO;
  550: 		}
  551: 	}
  552: 
  553: 	vm_page_wire(ma[0]);
  554: 	vm_page_wakeup(ma[0]);
  555: 	splx(s);
  556: 
  557: 	pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));
  558: 	imgp->firstpage = ma[0];
  559: 
  560: 	return 0;
  561: }
  562: 
  563: void
  564: exec_unmap_first_page(imgp)
  565: 	struct image_params *imgp;
  566: {
  567: 	if (imgp->firstpage) {
  568: 		pmap_kremove((vm_offset_t) imgp->image_header);
  569: 		vm_page_unwire(imgp->firstpage, 1);
  570: 		imgp->firstpage = NULL;
  571: 	}
  572: }
  573: 
  574: /*
  575:  * Destroy old address space, and allocate a new stack
  576:  *	The new stack is only SGROWSIZ large because it is grown
  577:  *	automatically in trap.c.
  578:  */
  579: int
  580: exec_new_vmspace(struct image_params *imgp, struct vmspace *vmcopy)
  581: {
  582: 	int error;
  583: 	struct vmspace *vmspace = imgp->proc->p_vmspace;
  584: 	vm_offset_t stack_addr = USRSTACK - maxssiz;
  585: 	vm_map_t map;
  586: 
  587: 	imgp->vmspace_destroyed = 1;
  588: 
  589: 	/*
  590: 	 * Prevent a pending AIO from modifying the new address space.
  591: 	 */
  592: 	aio_proc_rundown(imgp->proc);
  593: 
  594: 	/*
  595: 	 * Blow away entire process VM, if address space not shared,
  596: 	 * otherwise, create a new VM space so that other threads are
  597: 	 * not disrupted.  If we are execing a resident vmspace we
  598: 	 * create a duplicate of it and remap the stack.
  599: 	 *
  600: 	 * The exitingcnt test is not strictly necessary but has been
  601: 	 * included for code sanity (to make the code more deterministic).
  602: 	 */
  603: 	map = &vmspace->vm_map;
  604: 	if (vmcopy) {
  605: 		vmspace_exec(imgp->proc, vmcopy);
  606: 		vmspace = imgp->proc->p_vmspace;
  607: 		pmap_remove_pages(vmspace_pmap(vmspace), stack_addr, USRSTACK);
  608: 		map = &vmspace->vm_map;
  609: 	} else if (vmspace->vm_refcnt == 1 && vmspace->vm_exitingcnt == 0) {
  610: 		shmexit(vmspace);
  611: 		if (vmspace->vm_upcalls)
  612: 			upc_release(vmspace, imgp->proc);
  613: 		pmap_remove_pages(vmspace_pmap(vmspace), 0, VM_MAXUSER_ADDRESS);
  614: 		vm_map_remove(map, 0, VM_MAXUSER_ADDRESS);
  615: 	} else {
  616: 		vmspace_exec(imgp->proc, NULL);
  617: 		vmspace = imgp->proc->p_vmspace;
  618: 		map = &vmspace->vm_map;
  619: 	}
  620: 
  621: 	/* Allocate a new stack */
  622: 	error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz,
  623: 	    VM_PROT_ALL, VM_PROT_ALL, 0);
  624: 	if (error)
  625: 		return (error);
  626: 
  627: 	/* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
  628: 	 * VM_STACK case, but they are still used to monitor the size of the
  629: 	 * process stack so we can check the stack rlimit.
  630: 	 */
  631: 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
  632: 	vmspace->vm_maxsaddr = (char *)USRSTACK - maxssiz;
  633: 
  634: 	return(0);
  635: }
  636: 
  637: /*
  638:  * Copy out argument and environment strings from the old process
  639:  *	address space into the temporary string buffer.
  640:  */
  641: int
  642: exec_copyin_args(struct image_args *args, char *fname,
  643: 		enum exec_path_segflg segflg, char **argv, char **envv)
  644: {
  645: 	char	*argp, *envp;
  646: 	int	error = 0;
  647: 	size_t	length;
  648: 
  649: 	bzero(args, sizeof(*args));
  650: 	args->buf = (char *) kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
  651: 	if (args->buf == NULL)
  652: 		return (ENOMEM);
  653: 	args->begin_argv = args->buf;
  654: 	args->endp = args->begin_argv;
  655: 	args->space = ARG_MAX;
  656: 
  657: 	args->fname = args->buf + ARG_MAX;
  658: 
  659: 	/*
  660: 	 * Copy the file name.
  661: 	 */
  662: 	if (segflg == PATH_SYSSPACE) {
  663: 		error = copystr(fname, args->fname, PATH_MAX, &length);
  664: 	} else if (segflg == PATH_USERSPACE) {
  665: 		error = copyinstr(fname, args->fname, PATH_MAX, &length);
  666: 	}
  667: 
  668: 	/*
  669: 	 * extract argument strings
  670: 	 */
  671: 
  672: 	if (argv && error == 0) {
  673: 		/*
  674: 		 * The argv0 argument for execv() is allowed to be NULL,
  675: 		 * in which case we use our filename as argv[0].
  676: 		 * This guarantees that
  677: 		 * the interpreter knows what file to open in the case
  678: 		 * that we exec an interpreted file.
  679: 		 */
  680: 		argp = (caddr_t) (intptr_t) fuword(argv);
  681: 		if (argp == NULL) {
  682: 			length = strlen(args->fname) + 1;
  683: 			KKASSERT(length <= args->space);
  684: 			bcopy(args->fname, args->endp, length);
  685: 			args->space -= length;
  686: 			args->endp += length;
  687: 			args->argc++;
  688: 			argv++;
  689: 		}
  690: 		while ((argp = (caddr_t) (intptr_t) fuword(argv++))) {
  691: 			if (argp == (caddr_t) -1) {
  692: 				error = EFAULT;
  693: 				goto cleanup;
  694: 			}
  695: 			error = copyinstr(argp, args->endp,
  696: 					    args->space, &length);
  697: 			if (error == ENAMETOOLONG)
  698: 				error = E2BIG;
  699: 			if (error)
  700: 				goto cleanup;
  701: 			args->space -= length;
  702: 			args->endp += length;
  703: 			args->argc++;
  704: 		}
  705: 	}	
  706: 
  707: 	args->begin_envv = args->endp;
  708: 
  709: 	/*
  710: 	 * extract environment strings
  711: 	 */
  712: 	if (envv && error == 0) {
  713: 		while ((envp = (caddr_t) (intptr_t) fuword(envv++))) {
  714: 			if (envp == (caddr_t) -1) {
  715: 				error = EFAULT;
  716: 				goto cleanup;
  717: 			}
  718: 			error = copyinstr(envp, args->endp, args->space,
  719: 			    &length);
  720: 			if (error == ENAMETOOLONG)
  721: 				error = E2BIG;
  722: 			if (error)
  723: 				goto cleanup;
  724: 			args->space -= length;
  725: 			args->endp += length;
  726: 			args->envc++;
  727: 		}
  728: 	}
  729: 
  730: cleanup:
  731: 	return (error);
  732: }
  733: 
  734: void
  735: exec_free_args(struct image_args *args)
  736: {
  737: 	if (args->buf) {
  738: 		kmem_free_wakeup(exec_map,
  739: 				(vm_offset_t)args->buf, PATH_MAX + ARG_MAX);
  740: 		args->buf = NULL;
  741: 	}
  742: }
  743: 
  744: /*
  745:  * Copy strings out to the new process address space, constructing
  746:  *	new arg and env vector tables. Return a pointer to the base
  747:  *	so that it can be used as the initial stack pointer.
  748:  */
  749: register_t *
  750: exec_copyout_strings(struct image_params *imgp)
  751: {
  752: 	int argc, envc;
  753: 	char **vectp;
  754: 	char *stringp, *destp;
  755: 	register_t *stack_base;
  756: 	struct ps_strings *arginfo;
  757: 	int szsigcode;
  758: 
  759: 	/*
  760: 	 * Calculate string base and vector table pointers.
  761: 	 * Also deal with signal trampoline code for this exec type.
  762: 	 */
  763: 	arginfo = (struct ps_strings *)PS_STRINGS;
  764: 	szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
  765: 	destp =	(caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
  766: 	    roundup((ARG_MAX - imgp->args->space), sizeof(char *));
  767: 
  768: 	/*
  769: 	 * install sigcode
  770: 	 */
  771: 	if (szsigcode)
  772: 		copyout(imgp->proc->p_sysent->sv_sigcode,
  773: 		    ((caddr_t)arginfo - szsigcode), szsigcode);
  774: 
  775: 	/*
  776: 	 * If we have a valid auxargs ptr, prepare some room
  777: 	 * on the stack.
  778: 	 *
  779: 	 * The '+ 2' is for the null pointers at the end of each of the
  780: 	 * arg and env vector sets, and 'AT_COUNT*2' is room for the
  781: 	 * ELF Auxargs data.
  782: 	 */
  783: 	if (imgp->auxargs) {
  784: 		vectp = (char **)(destp - (imgp->args->argc +
  785: 			imgp->args->envc + 2 + AT_COUNT * 2) * sizeof(char*));
  786: 	} else {
  787: 		vectp = (char **)(destp - (imgp->args->argc +
  788: 			imgp->args->envc + 2) * sizeof(char*));
  789: 	}
  790: 
  791: 	/*
  792: 	 * NOTE: don't bother aligning the stack here for GCC 2.x, it will
  793: 	 * be done in crt1.o.  Note that GCC 3.x aligns the stack in main.
  794: 	 */
  795: 
  796: 	/*
  797: 	 * vectp also becomes our initial stack base
  798: 	 */
  799: 	stack_base = (register_t *)vectp;
  800: 
  801: 	stringp = imgp->args->begin_argv;
  802: 	argc = imgp->args->argc;
  803: 	envc = imgp->args->envc;
  804: 
  805: 	/*
  806: 	 * Copy out strings - arguments and environment.
  807: 	 */
  808: 	copyout(stringp, destp, ARG_MAX - imgp->args->space);
  809: 
  810: 	/*
  811: 	 * Fill in "ps_strings" struct for ps, w, etc.
  812: 	 */
  813: 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
  814: 	suword(&arginfo->ps_nargvstr, argc);
  815: 
  816: 	/*
  817: 	 * Fill in argument portion of vector table.
  818: 	 */
  819: 	for (; argc > 0; --argc) {
  820: 		suword(vectp++, (long)(intptr_t)destp);
  821: 		while (*stringp++ != 0)
  822: 			destp++;
  823: 		destp++;
  824: 	}
  825: 
  826: 	/* a null vector table pointer separates the argp's from the envp's */
  827: 	suword(vectp++, 0);
  828: 
  829: 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
  830: 	suword(&arginfo->ps_nenvstr, envc);
  831: 
  832: 	/*
  833: 	 * Fill in environment portion of vector table.
  834: 	 */
  835: 	for (; envc > 0; --envc) {
  836: 		suword(vectp++, (long)(intptr_t)destp);
  837: 		while (*stringp++ != 0)
  838: 			destp++;
  839: 		destp++;
  840: 	}
  841: 
  842: 	/* end of vector table is a null pointer */
  843: 	suword(vectp, 0);
  844: 
  845: 	return (stack_base);
  846: }
  847: 
  848: /*
  849:  * Check permissions of file to execute.
  850:  *	Return 0 for success or error code on failure.
  851:  */
  852: int
  853: exec_check_permissions(imgp)
  854: 	struct image_params *imgp;
  855: {
  856: 	struct proc *p = imgp->proc;
  857: 	struct vnode *vp = imgp->vp;
  858: 	struct vattr *attr = imgp->attr;
  859: 	struct thread *td = p->p_thread;
  860: 	int error;
  861: 
  862: 	/* Get file attributes */
  863: 	error = VOP_GETATTR(vp, attr, td);
  864: 	if (error)
  865: 		return (error);
  866: 
  867: 	/*
  868: 	 * 1) Check if file execution is disabled for the filesystem that this
  869: 	 *	file resides on.
  870: 	 * 2) Insure that at least one execute bit is on - otherwise root
  871: 	 *	will always succeed, and we don't want to happen unless the
  872: 	 *	file really is executable.
  873: 	 * 3) Insure that the file is a regular file.
  874: 	 */
  875: 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
  876: 	    ((attr->va_mode & 0111) == 0) ||
  877: 	    (attr->va_type != VREG)) {
  878: 		return (EACCES);
  879: 	}
  880: 
  881: 	/*
  882: 	 * Zero length files can't be exec'd
  883: 	 */
  884: 	if (attr->va_size == 0)
  885: 		return (ENOEXEC);
  886: 
  887: 	/*
  888: 	 *  Check for execute permission to file based on current credentials.
  889: 	 */
  890: 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td);
  891: 	if (error)
  892: 		return (error);
  893: 
  894: 	/*
  895: 	 * Check number of open-for-writes on the file and deny execution
  896: 	 * if there are any.
  897: 	 */
  898: 	if (vp->v_writecount)
  899: 		return (ETXTBSY);
  900: 
  901: 	/*
  902: 	 * Call filesystem specific open routine (which does nothing in the
  903: 	 * general case).
  904: 	 */
  905: 	error = VOP_OPEN(vp, FREAD, p->p_ucred, td);
  906: 	if (error)
  907: 		return (error);
  908: 
  909: 	return (0);
  910: }
  911: 
  912: /*
  913:  * Exec handler registration
  914:  */
  915: int
  916: exec_register(execsw_arg)
  917: 	const struct execsw *execsw_arg;
  918: {
  919: 	const struct execsw **es, **xs, **newexecsw;
  920: 	int count = 2;	/* New slot and trailing NULL */
  921: 
  922: 	if (execsw)
  923: 		for (es = execsw; *es; es++)
  924: 			count++;
  925: 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
  926: 	if (newexecsw == NULL)
  927: 		return ENOMEM;
  928: 	xs = newexecsw;
  929: 	if (execsw)
  930: 		for (es = execsw; *es; es++)
  931: 			*xs++ = *es;
  932: 	*xs++ = execsw_arg;
  933: 	*xs = NULL;
  934: 	if (execsw)
  935: 		free(execsw, M_TEMP);
  936: 	execsw = newexecsw;
  937: 	return 0;
  938: }
  939: 
  940: int
  941: exec_unregister(execsw_arg)
  942: 	const struct execsw *execsw_arg;
  943: {
  944: 	const struct execsw **es, **xs, **newexecsw;
  945: 	int count = 1;
  946: 
  947: 	if (execsw == NULL)
  948: 		panic("unregister with no handlers left?\n");
  949: 
  950: 	for (es = execsw; *es; es++) {
  951: 		if (*es == execsw_arg)
  952: 			break;
  953: 	}
  954: 	if (*es == NULL)
  955: 		return ENOENT;
  956: 	for (es = execsw; *es; es++)
  957: 		if (*es != execsw_arg)
  958: 			count++;
  959: 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
  960: 	if (newexecsw == NULL)
  961: 		return ENOMEM;
  962: 	xs = newexecsw;
  963: 	for (es = execsw; *es; es++)
  964: 		if (*es != execsw_arg)
  965: 			*xs++ = *es;
  966: 	*xs = NULL;
  967: 	if (execsw)
  968: 		free(execsw, M_TEMP);
  969: 	execsw = newexecsw;
  970: 	return 0;
  971: }