File:  [DragonFly] / src / sys / vm / vm_map.c
Revision 1.23: download - view: text, annotated - select for diffs
Fri Mar 12 23:09:37 2004 UTC (10 years, 7 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
In an rfork'd or vfork'd situation where multiple processes are sharing
the same vmspace, and one process goes zombie, the vmspace's vm_exitingcnt
will be non-zero.  If another process then forks or execs the exitingcnt will
be improperly inherited by the new vmspace.  The solution is to not copy
exitingcnt when copying to a new vmspace.

Additionally, for DragonFly, I also had to fix a few cases where the upcall
list was also being improperly inherited.

Heads-up-by: Xin LI <delphij@frontfree.net>
Obtained-From: Peter Wemm <peter@wemm.org> (FreeBSD-5)

    1: /*
    2:  * Copyright (c) 1991, 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  *
    5:  * This code is derived from software contributed to Berkeley by
    6:  * The Mach Operating System project at Carnegie-Mellon University.
    7:  *
    8:  * Redistribution and use in source and binary forms, with or without
    9:  * modification, are permitted provided that the following conditions
   10:  * are met:
   11:  * 1. Redistributions of source code must retain the above copyright
   12:  *    notice, this list of conditions and the following disclaimer.
   13:  * 2. Redistributions in binary form must reproduce the above copyright
   14:  *    notice, this list of conditions and the following disclaimer in the
   15:  *    documentation and/or other materials provided with the distribution.
   16:  * 3. All advertising materials mentioning features or use of this software
   17:  *    must display the following acknowledgement:
   18:  *	This product includes software developed by the University of
   19:  *	California, Berkeley and its contributors.
   20:  * 4. Neither the name of the University nor the names of its contributors
   21:  *    may be used to endorse or promote products derived from this software
   22:  *    without specific prior written permission.
   23:  *
   24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34:  * SUCH DAMAGE.
   35:  *
   36:  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
   37:  *
   38:  *
   39:  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40:  * All rights reserved.
   41:  *
   42:  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43:  *
   44:  * Permission to use, copy, modify and distribute this software and
   45:  * its documentation is hereby granted, provided that both the copyright
   46:  * notice and this permission notice appear in all copies of the
   47:  * software, derivative works or modified versions, and any portions
   48:  * thereof, and that both notices appear in supporting documentation.
   49:  *
   50:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53:  *
   54:  * Carnegie Mellon requests users of this software to return to
   55:  *
   56:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57:  *  School of Computer Science
   58:  *  Carnegie Mellon University
   59:  *  Pittsburgh PA 15213-3890
   60:  *
   61:  * any improvements or extensions that they make and grant Carnegie the
   62:  * rights to redistribute these changes.
   63:  *
   64:  * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $
   65:  * $DragonFly: src/sys/vm/vm_map.c,v 1.23 2004/03/12 23:09:37 dillon Exp $
   66:  */
   67: 
   68: /*
   69:  *	Virtual memory mapping module.
   70:  */
   71: 
   72: #include <sys/param.h>
   73: #include <sys/systm.h>
   74: #include <sys/proc.h>
   75: #include <sys/lock.h>
   76: #include <sys/vmmeter.h>
   77: #include <sys/mman.h>
   78: #include <sys/vnode.h>
   79: #include <sys/resourcevar.h>
   80: #include <sys/shm.h>
   81: 
   82: #include <vm/vm.h>
   83: #include <vm/vm_param.h>
   84: #include <vm/pmap.h>
   85: #include <vm/vm_map.h>
   86: #include <vm/vm_page.h>
   87: #include <vm/vm_object.h>
   88: #include <vm/vm_pager.h>
   89: #include <vm/vm_kern.h>
   90: #include <vm/vm_extern.h>
   91: #include <vm/swap_pager.h>
   92: #include <vm/vm_zone.h>
   93: 
   94: #include <sys/thread2.h>
   95: 
   96: /*
   97:  *	Virtual memory maps provide for the mapping, protection,
   98:  *	and sharing of virtual memory objects.  In addition,
   99:  *	this module provides for an efficient virtual copy of
  100:  *	memory from one map to another.
  101:  *
  102:  *	Synchronization is required prior to most operations.
  103:  *
  104:  *	Maps consist of an ordered doubly-linked list of simple
  105:  *	entries; a single hint is used to speed up lookups.
  106:  *
  107:  *	Since portions of maps are specified by start/end addresses,
  108:  *	which may not align with existing map entries, all
  109:  *	routines merely "clip" entries to these start/end values.
  110:  *	[That is, an entry is split into two, bordering at a
  111:  *	start or end value.]  Note that these clippings may not
  112:  *	always be necessary (as the two resulting entries are then
  113:  *	not changed); however, the clipping is done for convenience.
  114:  *
  115:  *	As mentioned above, virtual copy operations are performed
  116:  *	by copying VM object references from one map to
  117:  *	another, and then marking both regions as copy-on-write.
  118:  */
  119: 
  120: /*
  121:  *	vm_map_startup:
  122:  *
  123:  *	Initialize the vm_map module.  Must be called before
  124:  *	any other vm_map routines.
  125:  *
  126:  *	Map and entry structures are allocated from the general
  127:  *	purpose memory pool with some exceptions:
  128:  *
  129:  *	- The kernel map and kmem submap are allocated statically.
  130:  *	- Kernel map entries are allocated out of a static pool.
  131:  *
  132:  *	These restrictions are necessary since malloc() uses the
  133:  *	maps and requires map entries.
  134:  */
  135: 
  136: static struct vm_zone mapentzone_store, mapzone_store;
  137: static vm_zone_t mapentzone, mapzone, vmspace_zone;
  138: static struct vm_object mapentobj, mapobj;
  139: 
  140: static struct vm_map_entry map_entry_init[MAX_MAPENT];
  141: static struct vm_map map_init[MAX_KMAP];
  142: 
  143: static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *);
  144: static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *);
  145: static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
  146: static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
  147: static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *);
  148: static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t);
  149: static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t,
  150: 		vm_map_entry_t);
  151: static void vm_map_split (vm_map_entry_t);
  152: static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags);
  153: 
  154: void
  155: vm_map_startup()
  156: {
  157: 	mapzone = &mapzone_store;
  158: 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
  159: 		map_init, MAX_KMAP);
  160: 	mapentzone = &mapentzone_store;
  161: 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
  162: 		map_entry_init, MAX_MAPENT);
  163: }
  164: 
  165: /*
  166:  * Allocate a vmspace structure, including a vm_map and pmap,
  167:  * and initialize those structures.  The refcnt is set to 1.
  168:  * The remaining fields must be initialized by the caller.
  169:  */
  170: struct vmspace *
  171: vmspace_alloc(min, max)
  172: 	vm_offset_t min, max;
  173: {
  174: 	struct vmspace *vm;
  175: 
  176: 	vm = zalloc(vmspace_zone);
  177: 	vm_map_init(&vm->vm_map, min, max);
  178: 	pmap_pinit(vmspace_pmap(vm));
  179: 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
  180: 	vm->vm_refcnt = 1;
  181: 	vm->vm_shm = NULL;
  182: 	vm->vm_exitingcnt = 0;
  183: 	return (vm);
  184: }
  185: 
  186: void
  187: vm_init2(void) 
  188: {
  189: 	zinitna(mapentzone, &mapentobj, NULL, 0, 0, ZONE_USE_RESERVE, 1);
  190: 	zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1);
  191: 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
  192: 	pmap_init2();
  193: 	vm_object_init2();
  194: }
  195: 
  196: static __inline void
  197: vmspace_dofree(struct vmspace *vm)
  198: {
  199: 	int count;
  200: 
  201: 	/*
  202: 	 * Make sure any SysV shm is freed, it might not have in
  203: 	 * exit1()
  204: 	 */
  205: 	shmexit(vm);
  206: 
  207: 	KKASSERT(vm->vm_upcalls == NULL);
  208: 
  209: 	/*
  210: 	 * Lock the map, to wait out all other references to it.
  211: 	 * Delete all of the mappings and pages they hold, then call
  212: 	 * the pmap module to reclaim anything left.
  213: 	 */
  214: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  215: 	vm_map_lock(&vm->vm_map);
  216: 	vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
  217: 		vm->vm_map.max_offset, &count);
  218: 	vm_map_unlock(&vm->vm_map);
  219: 	vm_map_entry_release(count);
  220: 
  221: 	pmap_release(vmspace_pmap(vm));
  222: 	zfree(vmspace_zone, vm);
  223: }
  224: 
  225: void
  226: vmspace_free(struct vmspace *vm)
  227: {
  228: 	if (vm->vm_refcnt == 0)
  229: 		panic("vmspace_free: attempt to free already freed vmspace");
  230: 
  231: 	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
  232: 		vmspace_dofree(vm);
  233: }
  234: 
  235: void
  236: vmspace_exitfree(struct proc *p)
  237: {
  238: 	struct vmspace *vm;
  239: 
  240: 	vm = p->p_vmspace;
  241: 	p->p_vmspace = NULL;
  242: 
  243: 	/*
  244: 	 * cleanup by parent process wait()ing on exiting child.  vm_refcnt
  245: 	 * may not be 0 (e.g. fork() and child exits without exec()ing).
  246: 	 * exitingcnt may increment above 0 and drop back down to zero
  247: 	 * several times while vm_refcnt is held non-zero.  vm_refcnt
  248: 	 * may also increment above 0 and drop back down to zero several
  249: 	 * times while vm_exitingcnt is held non-zero.
  250: 	 *
  251: 	 * The last wait on the exiting child's vmspace will clean up
  252: 	 * the remainder of the vmspace.
  253: 	 */
  254: 	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
  255: 		vmspace_dofree(vm);
  256: }
  257: 
  258: /*
  259:  * vmspace_swap_count() - count the approximate swap useage in pages for a
  260:  *			  vmspace.
  261:  *
  262:  *	Swap useage is determined by taking the proportional swap used by
  263:  *	VM objects backing the VM map.  To make up for fractional losses,
  264:  *	if the VM object has any swap use at all the associated map entries
  265:  *	count for at least 1 swap page.
  266:  */
  267: int
  268: vmspace_swap_count(struct vmspace *vmspace)
  269: {
  270: 	vm_map_t map = &vmspace->vm_map;
  271: 	vm_map_entry_t cur;
  272: 	int count = 0;
  273: 
  274: 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
  275: 		vm_object_t object;
  276: 
  277: 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
  278: 		    (object = cur->object.vm_object) != NULL &&
  279: 		    object->type == OBJT_SWAP
  280: 		) {
  281: 			int n = (cur->end - cur->start) / PAGE_SIZE;
  282: 
  283: 			if (object->un_pager.swp.swp_bcount) {
  284: 				count += object->un_pager.swp.swp_bcount *
  285: 				    SWAP_META_PAGES * n / object->size + 1;
  286: 			}
  287: 		}
  288: 	}
  289: 	return(count);
  290: }
  291: 
  292: 
  293: /*
  294:  *	vm_map_create:
  295:  *
  296:  *	Creates and returns a new empty VM map with
  297:  *	the given physical map structure, and having
  298:  *	the given lower and upper address bounds.
  299:  */
  300: vm_map_t
  301: vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
  302: {
  303: 	vm_map_t result;
  304: 
  305: 	result = zalloc(mapzone);
  306: 	vm_map_init(result, min, max);
  307: 	result->pmap = pmap;
  308: 	return (result);
  309: }
  310: 
  311: /*
  312:  * Initialize an existing vm_map structure
  313:  * such as that in the vmspace structure.
  314:  * The pmap is set elsewhere.
  315:  */
  316: void
  317: vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max)
  318: {
  319: 	map->header.next = map->header.prev = &map->header;
  320: 	map->nentries = 0;
  321: 	map->size = 0;
  322: 	map->system_map = 0;
  323: 	map->infork = 0;
  324: 	map->min_offset = min;
  325: 	map->max_offset = max;
  326: 	map->first_free = &map->header;
  327: 	map->hint = &map->header;
  328: 	map->timestamp = 0;
  329: 	lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE);
  330: }
  331: 
  332: /*
  333:  *      vm_map_entry_cpu_init:
  334:  *
  335:  *	Set an initial negative count so the first attempt to reserve
  336:  *	space preloads a bunch of vm_map_entry's for this cpu.  This
  337:  *	routine is called in early boot so we cannot just call
  338:  *	vm_map_entry_reserve().
  339:  *
  340:  *	May be called for a gd other then mycpu.
  341:  */
  342: void
  343: vm_map_entry_reserve_cpu_init(globaldata_t gd)
  344: {
  345: 	gd->gd_vme_avail -= MAP_RESERVE_COUNT * 2;
  346: }
  347: 
  348: /*
  349:  *	vm_map_entry_reserve:
  350:  *
  351:  *	Reserves vm_map_entry structures so code later on can manipulate
  352:  *	map_entry structures within a locked map without blocking trying
  353:  *	to allocate a new vm_map_entry.
  354:  */
  355: int
  356: vm_map_entry_reserve(int count)
  357: {
  358: 	struct globaldata *gd = mycpu;
  359: 	vm_map_entry_t entry;
  360: 
  361: 	crit_enter();
  362: 	gd->gd_vme_avail -= count;
  363: 
  364: 	/*
  365: 	 * Make sure we have enough structures in gd_vme_base to handle
  366: 	 * the reservation request.
  367: 	 */
  368: 	while (gd->gd_vme_avail < 0) {
  369: 		entry = zalloc(mapentzone);
  370: 		entry->next = gd->gd_vme_base;
  371: 		gd->gd_vme_base = entry;
  372: 		++gd->gd_vme_avail;
  373: 	}
  374: 	crit_exit();
  375: 	return(count);
  376: }
  377: 
  378: /*
  379:  *	vm_map_entry_release:
  380:  *
  381:  *	Releases previously reserved vm_map_entry structures that were not
  382:  *	used.  If we have too much junk in our per-cpu cache clean some of
  383:  *	it out.
  384:  */
  385: void
  386: vm_map_entry_release(int count)
  387: {
  388: 	struct globaldata *gd = mycpu;
  389: 	vm_map_entry_t entry;
  390: 
  391: 	crit_enter();
  392: 	gd->gd_vme_avail += count;
  393: 	while (gd->gd_vme_avail > MAP_RESERVE_SLOP) {
  394: 		entry = gd->gd_vme_base;
  395: 		KKASSERT(entry != NULL);
  396: 		gd->gd_vme_base = entry->next;
  397: 		--gd->gd_vme_avail;
  398: 		crit_exit();
  399: 		zfree(mapentzone, entry);
  400: 		crit_enter();
  401: 	}
  402: 	crit_exit();
  403: }
  404: 
  405: /*
  406:  *	vm_map_entry_kreserve:
  407:  *
  408:  *	Reserve map entry structures for use in kernel_map or (if it exists)
  409:  *	kmem_map.  These entries have *ALREADY* been reserved on a per-cpu
  410:  *	basis when the map was inited.  This function is used by zalloc()
  411:  *	to avoid a recursion when zalloc() itself needs to allocate additional
  412:  *	kernel memory.
  413:  *
  414:  *	This function should only be used when the caller intends to later
  415:  *	call vm_map_entry_reserve() to 'normalize' the reserve cache.
  416:  */
  417: int
  418: vm_map_entry_kreserve(int count)
  419: {
  420: 	struct globaldata *gd = mycpu;
  421: 
  422: 	crit_enter();
  423: 	gd->gd_vme_kdeficit += count;
  424: 	crit_exit();
  425: 	KKASSERT(gd->gd_vme_base != NULL);
  426: 	return(count);
  427: }
  428: 
  429: /*
  430:  *	vm_map_entry_krelease:
  431:  *
  432:  *	Release previously reserved map entries for kernel_map or kmem_map
  433:  *	use.  This routine determines how many entries were actually used and
  434:  *	replentishes the kernel reserve supply from vme_avail.
  435:  *
  436:  *	If there is insufficient supply vme_avail will go negative, which is
  437:  *	ok.  We cannot safely call zalloc in this function without getting
  438:  *	into a recursion deadlock.  zalloc() will call vm_map_entry_reserve()
  439:  *	to regenerate the lost entries.
  440:  */
  441: void
  442: vm_map_entry_krelease(int count)
  443: {
  444: 	struct globaldata *gd = mycpu;
  445: 
  446: 	crit_enter();
  447: 	gd->gd_vme_kdeficit -= count;
  448: 	gd->gd_vme_avail -= gd->gd_vme_kdeficit;	/* can go negative */
  449: 	gd->gd_vme_kdeficit = 0;
  450: 	crit_exit();
  451: }
  452: 
  453: /*
  454:  *	vm_map_entry_create:	[ internal use only ]
  455:  *
  456:  *	Allocates a VM map entry for insertion.  No entry fields are filled 
  457:  *	in.
  458:  *
  459:  *	This routine may be called from an interrupt thread but not a FAST
  460:  *	interrupt.  This routine may recurse the map lock.
  461:  */
  462: static vm_map_entry_t
  463: vm_map_entry_create(vm_map_t map, int *countp)
  464: {
  465: 	struct globaldata *gd = mycpu;
  466: 	vm_map_entry_t entry;
  467: 
  468: 	KKASSERT(*countp > 0);
  469: 	--*countp;
  470: 	crit_enter();
  471: 	entry = gd->gd_vme_base;
  472: 	KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp));
  473: 	gd->gd_vme_base = entry->next;
  474: 	crit_exit();
  475: 	return(entry);
  476: }
  477: 
  478: /*
  479:  *	vm_map_entry_dispose:	[ internal use only ]
  480:  *
  481:  *	Dispose of a vm_map_entry that is no longer being referenced.  This
  482:  *	function may be called from an interrupt.
  483:  */
  484: static void
  485: vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp)
  486: {
  487: 	struct globaldata *gd = mycpu;
  488: 
  489: 	++*countp;
  490: 	crit_enter();
  491: 	entry->next = gd->gd_vme_base;
  492: 	gd->gd_vme_base = entry;
  493: 	crit_exit();
  494: }
  495: 
  496: 
  497: /*
  498:  *	vm_map_entry_{un,}link:
  499:  *
  500:  *	Insert/remove entries from maps.
  501:  */
  502: static __inline void
  503: vm_map_entry_link(vm_map_t map,
  504: 		  vm_map_entry_t after_where,
  505: 		  vm_map_entry_t entry)
  506: {
  507: 	map->nentries++;
  508: 	entry->prev = after_where;
  509: 	entry->next = after_where->next;
  510: 	entry->next->prev = entry;
  511: 	after_where->next = entry;
  512: }
  513: 
  514: static __inline void
  515: vm_map_entry_unlink(vm_map_t map,
  516: 		    vm_map_entry_t entry)
  517: {
  518: 	vm_map_entry_t prev;
  519: 	vm_map_entry_t next;
  520: 
  521: 	if (entry->eflags & MAP_ENTRY_IN_TRANSITION)
  522: 		panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry);
  523: 	prev = entry->prev;
  524: 	next = entry->next;
  525: 	next->prev = prev;
  526: 	prev->next = next;
  527: 	map->nentries--;
  528: }
  529: 
  530: /*
  531:  *	SAVE_HINT:
  532:  *
  533:  *	Saves the specified entry as the hint for
  534:  *	future lookups.
  535:  */
  536: #define	SAVE_HINT(map,value) \
  537: 		(map)->hint = (value);
  538: 
  539: /*
  540:  *	vm_map_lookup_entry:	[ internal use only ]
  541:  *
  542:  *	Finds the map entry containing (or
  543:  *	immediately preceding) the specified address
  544:  *	in the given map; the entry is returned
  545:  *	in the "entry" parameter.  The boolean
  546:  *	result indicates whether the address is
  547:  *	actually contained in the map.
  548:  */
  549: boolean_t
  550: vm_map_lookup_entry(map, address, entry)
  551: 	vm_map_t map;
  552: 	vm_offset_t address;
  553: 	vm_map_entry_t *entry;	/* OUT */
  554: {
  555: 	vm_map_entry_t cur;
  556: 	vm_map_entry_t last;
  557: 
  558: 	/*
  559: 	 * Start looking either from the head of the list, or from the hint.
  560: 	 */
  561: 
  562: 	cur = map->hint;
  563: 
  564: 	if (cur == &map->header)
  565: 		cur = cur->next;
  566: 
  567: 	if (address >= cur->start) {
  568: 		/*
  569: 		 * Go from hint to end of list.
  570: 		 *
  571: 		 * But first, make a quick check to see if we are already looking
  572: 		 * at the entry we want (which is usually the case). Note also
  573: 		 * that we don't need to save the hint here... it is the same
  574: 		 * hint (unless we are at the header, in which case the hint
  575: 		 * didn't buy us anything anyway).
  576: 		 */
  577: 		last = &map->header;
  578: 		if ((cur != last) && (cur->end > address)) {
  579: 			*entry = cur;
  580: 			return (TRUE);
  581: 		}
  582: 	} else {
  583: 		/*
  584: 		 * Go from start to hint, *inclusively*
  585: 		 */
  586: 		last = cur->next;
  587: 		cur = map->header.next;
  588: 	}
  589: 
  590: 	/*
  591: 	 * Search linearly
  592: 	 */
  593: 
  594: 	while (cur != last) {
  595: 		if (cur->end > address) {
  596: 			if (address >= cur->start) {
  597: 				/*
  598: 				 * Save this lookup for future hints, and
  599: 				 * return
  600: 				 */
  601: 
  602: 				*entry = cur;
  603: 				SAVE_HINT(map, cur);
  604: 				return (TRUE);
  605: 			}
  606: 			break;
  607: 		}
  608: 		cur = cur->next;
  609: 	}
  610: 	*entry = cur->prev;
  611: 	SAVE_HINT(map, *entry);
  612: 	return (FALSE);
  613: }
  614: 
  615: /*
  616:  *	vm_map_insert:
  617:  *
  618:  *	Inserts the given whole VM object into the target
  619:  *	map at the specified address range.  The object's
  620:  *	size should match that of the address range.
  621:  *
  622:  *	Requires that the map be locked, and leaves it so.  Requires that
  623:  *	sufficient vm_map_entry structures have been reserved and tracks
  624:  *	the use via countp.
  625:  *
  626:  *	If object is non-NULL, ref count must be bumped by caller
  627:  *	prior to making call to account for the new entry.
  628:  */
  629: int
  630: vm_map_insert(vm_map_t map, int *countp,
  631: 	      vm_object_t object, vm_ooffset_t offset,
  632: 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
  633: 	      int cow)
  634: {
  635: 	vm_map_entry_t new_entry;
  636: 	vm_map_entry_t prev_entry;
  637: 	vm_map_entry_t temp_entry;
  638: 	vm_eflags_t protoeflags;
  639: 
  640: 	/*
  641: 	 * Check that the start and end points are not bogus.
  642: 	 */
  643: 
  644: 	if ((start < map->min_offset) || (end > map->max_offset) ||
  645: 	    (start >= end))
  646: 		return (KERN_INVALID_ADDRESS);
  647: 
  648: 	/*
  649: 	 * Find the entry prior to the proposed starting address; if it's part
  650: 	 * of an existing entry, this range is bogus.
  651: 	 */
  652: 
  653: 	if (vm_map_lookup_entry(map, start, &temp_entry))
  654: 		return (KERN_NO_SPACE);
  655: 
  656: 	prev_entry = temp_entry;
  657: 
  658: 	/*
  659: 	 * Assert that the next entry doesn't overlap the end point.
  660: 	 */
  661: 
  662: 	if ((prev_entry->next != &map->header) &&
  663: 	    (prev_entry->next->start < end))
  664: 		return (KERN_NO_SPACE);
  665: 
  666: 	protoeflags = 0;
  667: 
  668: 	if (cow & MAP_COPY_ON_WRITE)
  669: 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
  670: 
  671: 	if (cow & MAP_NOFAULT) {
  672: 		protoeflags |= MAP_ENTRY_NOFAULT;
  673: 
  674: 		KASSERT(object == NULL,
  675: 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
  676: 	}
  677: 	if (cow & MAP_DISABLE_SYNCER)
  678: 		protoeflags |= MAP_ENTRY_NOSYNC;
  679: 	if (cow & MAP_DISABLE_COREDUMP)
  680: 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
  681: 
  682: 	if (object) {
  683: 		/*
  684: 		 * When object is non-NULL, it could be shared with another
  685: 		 * process.  We have to set or clear OBJ_ONEMAPPING 
  686: 		 * appropriately.
  687: 		 */
  688: 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
  689: 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
  690: 		}
  691: 	}
  692: 	else if ((prev_entry != &map->header) &&
  693: 		 (prev_entry->eflags == protoeflags) &&
  694: 		 (prev_entry->end == start) &&
  695: 		 (prev_entry->wired_count == 0) &&
  696: 		 ((prev_entry->object.vm_object == NULL) ||
  697: 		  vm_object_coalesce(prev_entry->object.vm_object,
  698: 				     OFF_TO_IDX(prev_entry->offset),
  699: 				     (vm_size_t)(prev_entry->end - prev_entry->start),
  700: 				     (vm_size_t)(end - prev_entry->end)))) {
  701: 		/*
  702: 		 * We were able to extend the object.  Determine if we
  703: 		 * can extend the previous map entry to include the 
  704: 		 * new range as well.
  705: 		 */
  706: 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
  707: 		    (prev_entry->protection == prot) &&
  708: 		    (prev_entry->max_protection == max)) {
  709: 			map->size += (end - prev_entry->end);
  710: 			prev_entry->end = end;
  711: 			vm_map_simplify_entry(map, prev_entry, countp);
  712: 			return (KERN_SUCCESS);
  713: 		}
  714: 
  715: 		/*
  716: 		 * If we can extend the object but cannot extend the
  717: 		 * map entry, we have to create a new map entry.  We
  718: 		 * must bump the ref count on the extended object to
  719: 		 * account for it.  object may be NULL.
  720: 		 */
  721: 		object = prev_entry->object.vm_object;
  722: 		offset = prev_entry->offset +
  723: 			(prev_entry->end - prev_entry->start);
  724: 		vm_object_reference(object);
  725: 	}
  726: 
  727: 	/*
  728: 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
  729: 	 * in things like the buffer map where we manage kva but do not manage
  730: 	 * backing objects.
  731: 	 */
  732: 
  733: 	/*
  734: 	 * Create a new entry
  735: 	 */
  736: 
  737: 	new_entry = vm_map_entry_create(map, countp);
  738: 	new_entry->start = start;
  739: 	new_entry->end = end;
  740: 
  741: 	new_entry->eflags = protoeflags;
  742: 	new_entry->object.vm_object = object;
  743: 	new_entry->offset = offset;
  744: 	new_entry->avail_ssize = 0;
  745: 
  746: 	new_entry->inheritance = VM_INHERIT_DEFAULT;
  747: 	new_entry->protection = prot;
  748: 	new_entry->max_protection = max;
  749: 	new_entry->wired_count = 0;
  750: 
  751: 	/*
  752: 	 * Insert the new entry into the list
  753: 	 */
  754: 
  755: 	vm_map_entry_link(map, prev_entry, new_entry);
  756: 	map->size += new_entry->end - new_entry->start;
  757: 
  758: 	/*
  759: 	 * Update the free space hint
  760: 	 */
  761: 	if ((map->first_free == prev_entry) &&
  762: 	    (prev_entry->end >= new_entry->start)) {
  763: 		map->first_free = new_entry;
  764: 	}
  765: 
  766: #if 0
  767: 	/*
  768: 	 * Temporarily removed to avoid MAP_STACK panic, due to
  769: 	 * MAP_STACK being a huge hack.  Will be added back in
  770: 	 * when MAP_STACK (and the user stack mapping) is fixed.
  771: 	 */
  772: 	/*
  773: 	 * It may be possible to simplify the entry
  774: 	 */
  775: 	vm_map_simplify_entry(map, new_entry, countp);
  776: #endif
  777: 
  778: 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
  779: 		pmap_object_init_pt(map->pmap, start,
  780: 				    object, OFF_TO_IDX(offset), end - start,
  781: 				    cow & MAP_PREFAULT_PARTIAL);
  782: 	}
  783: 
  784: 	return (KERN_SUCCESS);
  785: }
  786: 
  787: /*
  788:  * Find sufficient space for `length' bytes in the given map, starting at
  789:  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  790:  *
  791:  * This function will returned an arbitrarily aligned pointer.  If no
  792:  * particular alignment is required you should pass align as 1.  Note that
  793:  * the map may return PAGE_SIZE aligned pointers if all the lengths used in
  794:  * the map are a multiple of PAGE_SIZE, even if you pass a smaller align
  795:  * argument.
  796:  *
  797:  * 'align' should be a power of 2 but is not required to be.
  798:  */
  799: int
  800: vm_map_findspace(
  801: 	vm_map_t map,
  802: 	vm_offset_t start,
  803: 	vm_size_t length,
  804: 	vm_offset_t align,
  805: 	vm_offset_t *addr)
  806: {
  807: 	vm_map_entry_t entry, next;
  808: 	vm_offset_t end;
  809: 	vm_offset_t align_mask;
  810: 
  811: 	if (start < map->min_offset)
  812: 		start = map->min_offset;
  813: 	if (start > map->max_offset)
  814: 		return (1);
  815: 
  816: 	/*
  817: 	 * If the alignment is not a power of 2 we will have to use
  818: 	 * a mod/division, set align_mask to a special value.
  819: 	 */
  820: 	if ((align | (align - 1)) + 1 != (align << 1))
  821: 		align_mask = (vm_offset_t)-1;
  822: 	else
  823: 		align_mask = align - 1;
  824: 
  825: retry:
  826: 	/*
  827: 	 * Look for the first possible address; if there's already something
  828: 	 * at this address, we have to start after it.
  829: 	 */
  830: 	if (start == map->min_offset) {
  831: 		if ((entry = map->first_free) != &map->header)
  832: 			start = entry->end;
  833: 	} else {
  834: 		vm_map_entry_t tmp;
  835: 
  836: 		if (vm_map_lookup_entry(map, start, &tmp))
  837: 			start = tmp->end;
  838: 		entry = tmp;
  839: 	}
  840: 
  841: 	/*
  842: 	 * Look through the rest of the map, trying to fit a new region in the
  843: 	 * gap between existing regions, or after the very last region.
  844: 	 */
  845: 	for (;; start = (entry = next)->end) {
  846: 		/*
  847: 		 * Adjust the proposed start by the requested alignment,
  848: 		 * be sure that we didn't wrap the address.
  849: 		 */
  850: 		if (align_mask == (vm_offset_t)-1)
  851: 			end = ((start + align - 1) / align) * align;
  852: 		else
  853: 			end = (start + align_mask) & ~align_mask;
  854: 		if (end < start)
  855: 			return (1);
  856: 		start = end;
  857: 		/*
  858: 		 * Find the end of the proposed new region.  Be sure we didn't
  859: 		 * go beyond the end of the map, or wrap around the address.
  860: 		 * Then check to see if this is the last entry or if the 
  861: 		 * proposed end fits in the gap between this and the next
  862: 		 * entry.
  863: 		 */
  864: 		end = start + length;
  865: 		if (end > map->max_offset || end < start)
  866: 			return (1);
  867: 		next = entry->next;
  868: 		if (next == &map->header || next->start >= end)
  869: 			break;
  870: 	}
  871: 	SAVE_HINT(map, entry);
  872: 	if (map == kernel_map) {
  873: 		vm_offset_t ksize;
  874: 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
  875: 			pmap_growkernel(ksize);
  876: 			goto retry;
  877: 		}
  878: 	}
  879: 	*addr = start;
  880: 	return (0);
  881: }
  882: 
  883: /*
  884:  *	vm_map_find finds an unallocated region in the target address
  885:  *	map with the given length.  The search is defined to be
  886:  *	first-fit from the specified address; the region found is
  887:  *	returned in the same parameter.
  888:  *
  889:  *	If object is non-NULL, ref count must be bumped by caller
  890:  *	prior to making call to account for the new entry.
  891:  */
  892: int
  893: vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  894: 	    vm_offset_t *addr,	/* IN/OUT */
  895: 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
  896: 	    vm_prot_t max, int cow)
  897: {
  898: 	vm_offset_t start;
  899: 	int result;
  900: 	int count;
  901: 
  902: 	start = *addr;
  903: 
  904: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  905: 	vm_map_lock(map);
  906: 	if (find_space) {
  907: 		if (vm_map_findspace(map, start, length, 1, addr)) {
  908: 			vm_map_unlock(map);
  909: 			vm_map_entry_release(count);
  910: 			return (KERN_NO_SPACE);
  911: 		}
  912: 		start = *addr;
  913: 	}
  914: 	result = vm_map_insert(map, &count, object, offset,
  915: 		start, start + length, prot, max, cow);
  916: 	vm_map_unlock(map);
  917: 	vm_map_entry_release(count);
  918: 
  919: 	return (result);
  920: }
  921: 
  922: /*
  923:  *	vm_map_simplify_entry:
  924:  *
  925:  *	Simplify the given map entry by merging with either neighbor.  This
  926:  *	routine also has the ability to merge with both neighbors.
  927:  *
  928:  *	The map must be locked.
  929:  *
  930:  *	This routine guarentees that the passed entry remains valid (though
  931:  *	possibly extended).  When merging, this routine may delete one or
  932:  *	both neighbors.  No action is taken on entries which have their
  933:  *	in-transition flag set.
  934:  */
  935: void
  936: vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp)
  937: {
  938: 	vm_map_entry_t next, prev;
  939: 	vm_size_t prevsize, esize;
  940: 
  941: 	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) {
  942: 		++mycpu->gd_cnt.v_intrans_coll;
  943: 		return;
  944: 	}
  945: 
  946: 	prev = entry->prev;
  947: 	if (prev != &map->header) {
  948: 		prevsize = prev->end - prev->start;
  949: 		if ( (prev->end == entry->start) &&
  950: 		     (prev->object.vm_object == entry->object.vm_object) &&
  951: 		     (!prev->object.vm_object ||
  952: 			(prev->offset + prevsize == entry->offset)) &&
  953: 		     (prev->eflags == entry->eflags) &&
  954: 		     (prev->protection == entry->protection) &&
  955: 		     (prev->max_protection == entry->max_protection) &&
  956: 		     (prev->inheritance == entry->inheritance) &&
  957: 		     (prev->wired_count == entry->wired_count)) {
  958: 			if (map->first_free == prev)
  959: 				map->first_free = entry;
  960: 			if (map->hint == prev)
  961: 				map->hint = entry;
  962: 			vm_map_entry_unlink(map, prev);
  963: 			entry->start = prev->start;
  964: 			entry->offset = prev->offset;
  965: 			if (prev->object.vm_object)
  966: 				vm_object_deallocate(prev->object.vm_object);
  967: 			vm_map_entry_dispose(map, prev, countp);
  968: 		}
  969: 	}
  970: 
  971: 	next = entry->next;
  972: 	if (next != &map->header) {
  973: 		esize = entry->end - entry->start;
  974: 		if ((entry->end == next->start) &&
  975: 		    (next->object.vm_object == entry->object.vm_object) &&
  976: 		     (!entry->object.vm_object ||
  977: 			(entry->offset + esize == next->offset)) &&
  978: 		    (next->eflags == entry->eflags) &&
  979: 		    (next->protection == entry->protection) &&
  980: 		    (next->max_protection == entry->max_protection) &&
  981: 		    (next->inheritance == entry->inheritance) &&
  982: 		    (next->wired_count == entry->wired_count)) {
  983: 			if (map->first_free == next)
  984: 				map->first_free = entry;
  985: 			if (map->hint == next)
  986: 				map->hint = entry;
  987: 			vm_map_entry_unlink(map, next);
  988: 			entry->end = next->end;
  989: 			if (next->object.vm_object)
  990: 				vm_object_deallocate(next->object.vm_object);
  991: 			vm_map_entry_dispose(map, next, countp);
  992: 	        }
  993: 	}
  994: }
  995: /*
  996:  *	vm_map_clip_start:	[ internal use only ]
  997:  *
  998:  *	Asserts that the given entry begins at or after
  999:  *	the specified address; if necessary,
 1000:  *	it splits the entry into two.
 1001:  */
 1002: #define vm_map_clip_start(map, entry, startaddr, countp) \
 1003: { \
 1004: 	if (startaddr > entry->start) \
 1005: 		_vm_map_clip_start(map, entry, startaddr, countp); \
 1006: }
 1007: 
 1008: /*
 1009:  *	This routine is called only when it is known that
 1010:  *	the entry must be split.
 1011:  */
 1012: static void
 1013: _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp)
 1014: {
 1015: 	vm_map_entry_t new_entry;
 1016: 
 1017: 	/*
 1018: 	 * Split off the front portion -- note that we must insert the new
 1019: 	 * entry BEFORE this one, so that this entry has the specified
 1020: 	 * starting address.
 1021: 	 */
 1022: 
 1023: 	vm_map_simplify_entry(map, entry, countp);
 1024: 
 1025: 	/*
 1026: 	 * If there is no object backing this entry, we might as well create
 1027: 	 * one now.  If we defer it, an object can get created after the map
 1028: 	 * is clipped, and individual objects will be created for the split-up
 1029: 	 * map.  This is a bit of a hack, but is also about the best place to
 1030: 	 * put this improvement.
 1031: 	 */
 1032: 
 1033: 	if (entry->object.vm_object == NULL && !map->system_map) {
 1034: 		vm_object_t object;
 1035: 		object = vm_object_allocate(OBJT_DEFAULT,
 1036: 				atop(entry->end - entry->start));
 1037: 		entry->object.vm_object = object;
 1038: 		entry->offset = 0;
 1039: 	}
 1040: 
 1041: 	new_entry = vm_map_entry_create(map, countp);
 1042: 	*new_entry = *entry;
 1043: 
 1044: 	new_entry->end = start;
 1045: 	entry->offset += (start - entry->start);
 1046: 	entry->start = start;
 1047: 
 1048: 	vm_map_entry_link(map, entry->prev, new_entry);
 1049: 
 1050: 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1051: 		vm_object_reference(new_entry->object.vm_object);
 1052: 	}
 1053: }
 1054: 
 1055: /*
 1056:  *	vm_map_clip_end:	[ internal use only ]
 1057:  *
 1058:  *	Asserts that the given entry ends at or before
 1059:  *	the specified address; if necessary,
 1060:  *	it splits the entry into two.
 1061:  */
 1062: 
 1063: #define vm_map_clip_end(map, entry, endaddr, countp) \
 1064: { \
 1065: 	if (endaddr < entry->end) \
 1066: 		_vm_map_clip_end(map, entry, endaddr, countp); \
 1067: }
 1068: 
 1069: /*
 1070:  *	This routine is called only when it is known that
 1071:  *	the entry must be split.
 1072:  */
 1073: static void
 1074: _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp)
 1075: {
 1076: 	vm_map_entry_t new_entry;
 1077: 
 1078: 	/*
 1079: 	 * If there is no object backing this entry, we might as well create
 1080: 	 * one now.  If we defer it, an object can get created after the map
 1081: 	 * is clipped, and individual objects will be created for the split-up
 1082: 	 * map.  This is a bit of a hack, but is also about the best place to
 1083: 	 * put this improvement.
 1084: 	 */
 1085: 
 1086: 	if (entry->object.vm_object == NULL && !map->system_map) {
 1087: 		vm_object_t object;
 1088: 		object = vm_object_allocate(OBJT_DEFAULT,
 1089: 				atop(entry->end - entry->start));
 1090: 		entry->object.vm_object = object;
 1091: 		entry->offset = 0;
 1092: 	}
 1093: 
 1094: 	/*
 1095: 	 * Create a new entry and insert it AFTER the specified entry
 1096: 	 */
 1097: 
 1098: 	new_entry = vm_map_entry_create(map, countp);
 1099: 	*new_entry = *entry;
 1100: 
 1101: 	new_entry->start = entry->end = end;
 1102: 	new_entry->offset += (end - entry->start);
 1103: 
 1104: 	vm_map_entry_link(map, entry, new_entry);
 1105: 
 1106: 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1107: 		vm_object_reference(new_entry->object.vm_object);
 1108: 	}
 1109: }
 1110: 
 1111: /*
 1112:  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
 1113:  *
 1114:  *	Asserts that the starting and ending region
 1115:  *	addresses fall within the valid range of the map.
 1116:  */
 1117: #define	VM_MAP_RANGE_CHECK(map, start, end)		\
 1118: 		{					\
 1119: 		if (start < vm_map_min(map))		\
 1120: 			start = vm_map_min(map);	\
 1121: 		if (end > vm_map_max(map))		\
 1122: 			end = vm_map_max(map);		\
 1123: 		if (start > end)			\
 1124: 			start = end;			\
 1125: 		}
 1126: 
 1127: /*
 1128:  *	vm_map_transition_wait:	[ kernel use only ]
 1129:  *
 1130:  *	Used to block when an in-transition collison occurs.  The map
 1131:  *	is unlocked for the sleep and relocked before the return.
 1132:  */
 1133: static
 1134: void
 1135: vm_map_transition_wait(vm_map_t map)
 1136: {
 1137: 	vm_map_unlock(map);
 1138: 	tsleep(map, 0, "vment", 0);
 1139: 	vm_map_lock(map);
 1140: }
 1141: 
 1142: /*
 1143:  * CLIP_CHECK_BACK
 1144:  * CLIP_CHECK_FWD
 1145:  *
 1146:  *	When we do blocking operations with the map lock held it is
 1147:  *	possible that a clip might have occured on our in-transit entry,
 1148:  *	requiring an adjustment to the entry in our loop.  These macros
 1149:  *	help the pageable and clip_range code deal with the case.  The
 1150:  *	conditional costs virtually nothing if no clipping has occured.
 1151:  */
 1152: 
 1153: #define CLIP_CHECK_BACK(entry, save_start)		\
 1154:     do {						\
 1155: 	    while (entry->start != save_start) {	\
 1156: 		    entry = entry->prev;		\
 1157: 		    KASSERT(entry != &map->header, ("bad entry clip")); \
 1158: 	    }						\
 1159:     } while(0)
 1160: 
 1161: #define CLIP_CHECK_FWD(entry, save_end)			\
 1162:     do {						\
 1163: 	    while (entry->end != save_end) {		\
 1164: 		    entry = entry->next;		\
 1165: 		    KASSERT(entry != &map->header, ("bad entry clip")); \
 1166: 	    }						\
 1167:     } while(0)
 1168: 
 1169: 
 1170: /*
 1171:  *	vm_map_clip_range:	[ kernel use only ]
 1172:  *
 1173:  *	Clip the specified range and return the base entry.  The
 1174:  *	range may cover several entries starting at the returned base
 1175:  *	and the first and last entry in the covering sequence will be
 1176:  *	properly clipped to the requested start and end address.
 1177:  *
 1178:  *	If no holes are allowed you should pass the MAP_CLIP_NO_HOLES
 1179:  *	flag.  
 1180:  *
 1181:  *	The MAP_ENTRY_IN_TRANSITION flag will be set for the entries
 1182:  *	covered by the requested range.
 1183:  *
 1184:  *	The map must be exclusively locked on entry and will remain locked
 1185:  *	on return. If no range exists or the range contains holes and you
 1186:  *	specified that no holes were allowed, NULL will be returned.  This
 1187:  *	routine may temporarily unlock the map in order avoid a deadlock when
 1188:  *	sleeping.
 1189:  */
 1190: static
 1191: vm_map_entry_t
 1192: vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, 
 1193: 	int *countp, int flags)
 1194: {
 1195: 	vm_map_entry_t start_entry;
 1196: 	vm_map_entry_t entry;
 1197: 
 1198: 	/*
 1199: 	 * Locate the entry and effect initial clipping.  The in-transition
 1200: 	 * case does not occur very often so do not try to optimize it.
 1201: 	 */
 1202: again:
 1203: 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE)
 1204: 		return (NULL);
 1205: 	entry = start_entry;
 1206: 	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 1207: 		entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1208: 		++mycpu->gd_cnt.v_intrans_coll;
 1209: 		++mycpu->gd_cnt.v_intrans_wait;
 1210: 		vm_map_transition_wait(map);
 1211: 		/*
 1212: 		 * entry and/or start_entry may have been clipped while
 1213: 		 * we slept, or may have gone away entirely.  We have
 1214: 		 * to restart from the lookup.
 1215: 		 */
 1216: 		goto again;
 1217: 	}
 1218: 	/*
 1219: 	 * Since we hold an exclusive map lock we do not have to restart
 1220: 	 * after clipping, even though clipping may block in zalloc.
 1221: 	 */
 1222: 	vm_map_clip_start(map, entry, start, countp);
 1223: 	vm_map_clip_end(map, entry, end, countp);
 1224: 	entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 1225: 
 1226: 	/*
 1227: 	 * Scan entries covered by the range.  When working on the next
 1228: 	 * entry a restart need only re-loop on the current entry which
 1229: 	 * we have already locked, since 'next' may have changed.  Also,
 1230: 	 * even though entry is safe, it may have been clipped so we
 1231: 	 * have to iterate forwards through the clip after sleeping.
 1232: 	 */
 1233: 	while (entry->next != &map->header && entry->next->start < end) {
 1234: 		vm_map_entry_t next = entry->next;
 1235: 
 1236: 		if (flags & MAP_CLIP_NO_HOLES) {
 1237: 			if (next->start > entry->end) {
 1238: 				vm_map_unclip_range(map, start_entry,
 1239: 					start, entry->end, countp, flags);
 1240: 				return(NULL);
 1241: 			}
 1242: 		}
 1243: 
 1244: 		if (next->eflags & MAP_ENTRY_IN_TRANSITION) {
 1245: 			vm_offset_t save_end = entry->end;
 1246: 			next->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1247: 			++mycpu->gd_cnt.v_intrans_coll;
 1248: 			++mycpu->gd_cnt.v_intrans_wait;
 1249: 			vm_map_transition_wait(map);
 1250: 
 1251: 			/*
 1252: 			 * clips might have occured while we blocked.
 1253: 			 */
 1254: 			CLIP_CHECK_FWD(entry, save_end);
 1255: 			CLIP_CHECK_BACK(start_entry, start);
 1256: 			continue;
 1257: 		}
 1258: 		/*
 1259: 		 * No restart necessary even though clip_end may block, we
 1260: 		 * are holding the map lock.
 1261: 		 */
 1262: 		vm_map_clip_end(map, next, end, countp);
 1263: 		next->eflags |= MAP_ENTRY_IN_TRANSITION;
 1264: 		entry = next;
 1265: 	}
 1266: 	if (flags & MAP_CLIP_NO_HOLES) {
 1267: 		if (entry->end != end) {
 1268: 			vm_map_unclip_range(map, start_entry,
 1269: 				start, entry->end, countp, flags);
 1270: 			return(NULL);
 1271: 		}
 1272: 	}
 1273: 	return(start_entry);
 1274: }
 1275: 
 1276: /*
 1277:  *	vm_map_unclip_range:	[ kernel use only ]
 1278:  *
 1279:  *	Undo the effect of vm_map_clip_range().  You should pass the same
 1280:  *	flags and the same range that you passed to vm_map_clip_range().
 1281:  *	This code will clear the in-transition flag on the entries and
 1282:  *	wake up anyone waiting.  This code will also simplify the sequence 
 1283:  *	and attempt to merge it with entries before and after the sequence.
 1284:  *
 1285:  *	The map must be locked on entry and will remain locked on return.
 1286:  *
 1287:  *	Note that you should also pass the start_entry returned by 
 1288:  *	vm_map_clip_range().  However, if you block between the two calls
 1289:  *	with the map unlocked please be aware that the start_entry may
 1290:  *	have been clipped and you may need to scan it backwards to find
 1291:  *	the entry corresponding with the original start address.  You are
 1292:  *	responsible for this, vm_map_unclip_range() expects the correct
 1293:  *	start_entry to be passed to it and will KASSERT otherwise.
 1294:  */
 1295: static
 1296: void
 1297: vm_map_unclip_range(
 1298: 	vm_map_t map,
 1299: 	vm_map_entry_t start_entry,
 1300: 	vm_offset_t start,
 1301: 	vm_offset_t end,
 1302: 	int *countp,
 1303: 	int flags)
 1304: {
 1305: 	vm_map_entry_t entry;
 1306: 
 1307: 	entry = start_entry;
 1308: 
 1309: 	KASSERT(entry->start == start, ("unclip_range: illegal base entry"));
 1310: 	while (entry != &map->header && entry->start < end) {
 1311: 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry));
 1312: 		KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped"));
 1313: 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 1314: 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 1315: 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 1316: 			wakeup(map);
 1317: 		}
 1318: 		entry = entry->next;
 1319: 	}
 1320: 
 1321: 	/*
 1322: 	 * Simplification does not block so there is no restart case.
 1323: 	 */
 1324: 	entry = start_entry;
 1325: 	while (entry != &map->header && entry->start < end) {
 1326: 		vm_map_simplify_entry(map, entry, countp);
 1327: 		entry = entry->next;
 1328: 	}
 1329: }
 1330: 
 1331: /*
 1332:  *	vm_map_submap:		[ kernel use only ]
 1333:  *
 1334:  *	Mark the given range as handled by a subordinate map.
 1335:  *
 1336:  *	This range must have been created with vm_map_find,
 1337:  *	and no other operations may have been performed on this
 1338:  *	range prior to calling vm_map_submap.
 1339:  *
 1340:  *	Only a limited number of operations can be performed
 1341:  *	within this rage after calling vm_map_submap:
 1342:  *		vm_fault
 1343:  *	[Don't try vm_map_copy!]
 1344:  *
 1345:  *	To remove a submapping, one must first remove the
 1346:  *	range from the superior map, and then destroy the
 1347:  *	submap (if desired).  [Better yet, don't try it.]
 1348:  */
 1349: int
 1350: vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap)
 1351: {
 1352: 	vm_map_entry_t entry;
 1353: 	int result = KERN_INVALID_ARGUMENT;
 1354: 	int count;
 1355: 
 1356: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1357: 	vm_map_lock(map);
 1358: 
 1359: 	VM_MAP_RANGE_CHECK(map, start, end);
 1360: 
 1361: 	if (vm_map_lookup_entry(map, start, &entry)) {
 1362: 		vm_map_clip_start(map, entry, start, &count);
 1363: 	} else {
 1364: 		entry = entry->next;
 1365: 	}
 1366: 
 1367: 	vm_map_clip_end(map, entry, end, &count);
 1368: 
 1369: 	if ((entry->start == start) && (entry->end == end) &&
 1370: 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
 1371: 	    (entry->object.vm_object == NULL)) {
 1372: 		entry->object.sub_map = submap;
 1373: 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 1374: 		result = KERN_SUCCESS;
 1375: 	}
 1376: 	vm_map_unlock(map);
 1377: 	vm_map_entry_release(count);
 1378: 
 1379: 	return (result);
 1380: }
 1381: 
 1382: /*
 1383:  *	vm_map_protect:
 1384:  *
 1385:  *	Sets the protection of the specified address
 1386:  *	region in the target map.  If "set_max" is
 1387:  *	specified, the maximum protection is to be set;
 1388:  *	otherwise, only the current protection is affected.
 1389:  */
 1390: int
 1391: vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1392: 	       vm_prot_t new_prot, boolean_t set_max)
 1393: {
 1394: 	vm_map_entry_t current;
 1395: 	vm_map_entry_t entry;
 1396: 	int count;
 1397: 
 1398: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1399: 	vm_map_lock(map);
 1400: 
 1401: 	VM_MAP_RANGE_CHECK(map, start, end);
 1402: 
 1403: 	if (vm_map_lookup_entry(map, start, &entry)) {
 1404: 		vm_map_clip_start(map, entry, start, &count);
 1405: 	} else {
 1406: 		entry = entry->next;
 1407: 	}
 1408: 
 1409: 	/*
 1410: 	 * Make a first pass to check for protection violations.
 1411: 	 */
 1412: 
 1413: 	current = entry;
 1414: 	while ((current != &map->header) && (current->start < end)) {
 1415: 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1416: 			vm_map_unlock(map);
 1417: 			vm_map_entry_release(count);
 1418: 			return (KERN_INVALID_ARGUMENT);
 1419: 		}
 1420: 		if ((new_prot & current->max_protection) != new_prot) {
 1421: 			vm_map_unlock(map);
 1422: 			vm_map_entry_release(count);
 1423: 			return (KERN_PROTECTION_FAILURE);
 1424: 		}
 1425: 		current = current->next;
 1426: 	}
 1427: 
 1428: 	/*
 1429: 	 * Go back and fix up protections. [Note that clipping is not
 1430: 	 * necessary the second time.]
 1431: 	 */
 1432: 	current = entry;
 1433: 
 1434: 	while ((current != &map->header) && (current->start < end)) {
 1435: 		vm_prot_t old_prot;
 1436: 
 1437: 		vm_map_clip_end(map, current, end, &count);
 1438: 
 1439: 		old_prot = current->protection;
 1440: 		if (set_max)
 1441: 			current->protection =
 1442: 			    (current->max_protection = new_prot) &
 1443: 			    old_prot;
 1444: 		else
 1445: 			current->protection = new_prot;
 1446: 
 1447: 		/*
 1448: 		 * Update physical map if necessary. Worry about copy-on-write
 1449: 		 * here -- CHECK THIS XXX
 1450: 		 */
 1451: 
 1452: 		if (current->protection != old_prot) {
 1453: #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 1454: 							VM_PROT_ALL)
 1455: 
 1456: 			pmap_protect(map->pmap, current->start,
 1457: 			    current->end,
 1458: 			    current->protection & MASK(current));
 1459: #undef	MASK
 1460: 		}
 1461: 
 1462: 		vm_map_simplify_entry(map, current, &count);
 1463: 
 1464: 		current = current->next;
 1465: 	}
 1466: 
 1467: 	vm_map_unlock(map);
 1468: 	vm_map_entry_release(count);
 1469: 	return (KERN_SUCCESS);
 1470: }
 1471: 
 1472: /*
 1473:  *	vm_map_madvise:
 1474:  *
 1475:  * 	This routine traverses a processes map handling the madvise
 1476:  *	system call.  Advisories are classified as either those effecting
 1477:  *	the vm_map_entry structure, or those effecting the underlying 
 1478:  *	objects.
 1479:  */
 1480: 
 1481: int
 1482: vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav)
 1483: {
 1484: 	vm_map_entry_t current, entry;
 1485: 	int modify_map = 0;
 1486: 	int count;
 1487: 
 1488: 	/*
 1489: 	 * Some madvise calls directly modify the vm_map_entry, in which case
 1490: 	 * we need to use an exclusive lock on the map and we need to perform 
 1491: 	 * various clipping operations.  Otherwise we only need a read-lock
 1492: 	 * on the map.
 1493: 	 */
 1494: 
 1495: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1496: 
 1497: 	switch(behav) {
 1498: 	case MADV_NORMAL:
 1499: 	case MADV_SEQUENTIAL:
 1500: 	case MADV_RANDOM:
 1501: 	case MADV_NOSYNC:
 1502: 	case MADV_AUTOSYNC:
 1503: 	case MADV_NOCORE:
 1504: 	case MADV_CORE:
 1505: 		modify_map = 1;
 1506: 		vm_map_lock(map);
 1507: 		break;
 1508: 	case MADV_WILLNEED:
 1509: 	case MADV_DONTNEED:
 1510: 	case MADV_FREE:
 1511: 		vm_map_lock_read(map);
 1512: 		break;
 1513: 	default:
 1514: 		vm_map_entry_release(count);
 1515: 		return (KERN_INVALID_ARGUMENT);
 1516: 	}
 1517: 
 1518: 	/*
 1519: 	 * Locate starting entry and clip if necessary.
 1520: 	 */
 1521: 
 1522: 	VM_MAP_RANGE_CHECK(map, start, end);
 1523: 
 1524: 	if (vm_map_lookup_entry(map, start, &entry)) {
 1525: 		if (modify_map)
 1526: 			vm_map_clip_start(map, entry, start, &count);
 1527: 	} else {
 1528: 		entry = entry->next;
 1529: 	}
 1530: 
 1531: 	if (modify_map) {
 1532: 		/*
 1533: 		 * madvise behaviors that are implemented in the vm_map_entry.
 1534: 		 *
 1535: 		 * We clip the vm_map_entry so that behavioral changes are
 1536: 		 * limited to the specified address range.
 1537: 		 */
 1538: 		for (current = entry;
 1539: 		     (current != &map->header) && (current->start < end);
 1540: 		     current = current->next
 1541: 		) {
 1542: 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1543: 				continue;
 1544: 
 1545: 			vm_map_clip_end(map, current, end, &count);
 1546: 
 1547: 			switch (behav) {
 1548: 			case MADV_NORMAL:
 1549: 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
 1550: 				break;
 1551: 			case MADV_SEQUENTIAL:
 1552: 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
 1553: 				break;
 1554: 			case MADV_RANDOM:
 1555: 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
 1556: 				break;
 1557: 			case MADV_NOSYNC:
 1558: 				current->eflags |= MAP_ENTRY_NOSYNC;
 1559: 				break;
 1560: 			case MADV_AUTOSYNC:
 1561: 				current->eflags &= ~MAP_ENTRY_NOSYNC;
 1562: 				break;
 1563: 			case MADV_NOCORE:
 1564: 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
 1565: 				break;
 1566: 			case MADV_CORE:
 1567: 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 1568: 				break;
 1569: 			default:
 1570: 				break;
 1571: 			}
 1572: 			vm_map_simplify_entry(map, current, &count);
 1573: 		}
 1574: 		vm_map_unlock(map);
 1575: 	} else {
 1576: 		vm_pindex_t pindex;
 1577: 		int count;
 1578: 
 1579: 		/*
 1580: 		 * madvise behaviors that are implemented in the underlying
 1581: 		 * vm_object.
 1582: 		 *
 1583: 		 * Since we don't clip the vm_map_entry, we have to clip
 1584: 		 * the vm_object pindex and count.
 1585: 		 */
 1586: 		for (current = entry;
 1587: 		     (current != &map->header) && (current->start < end);
 1588: 		     current = current->next
 1589: 		) {
 1590: 			vm_offset_t useStart;
 1591: 
 1592: 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1593: 				continue;
 1594: 
 1595: 			pindex = OFF_TO_IDX(current->offset);
 1596: 			count = atop(current->end - current->start);
 1597: 			useStart = current->start;
 1598: 
 1599: 			if (current->start < start) {
 1600: 				pindex += atop(start - current->start);
 1601: 				count -= atop(start - current->start);
 1602: 				useStart = start;
 1603: 			}
 1604: 			if (current->end > end)
 1605: 				count -= atop(current->end - end);
 1606: 
 1607: 			if (count <= 0)
 1608: 				continue;
 1609: 
 1610: 			vm_object_madvise(current->object.vm_object,
 1611: 					  pindex, count, behav);
 1612: 			if (behav == MADV_WILLNEED) {
 1613: 				pmap_object_init_pt(
 1614: 				    map->pmap, 
 1615: 				    useStart,
 1616: 				    current->object.vm_object,
 1617: 				    pindex, 
 1618: 				    (count << PAGE_SHIFT),
 1619: 				    MAP_PREFAULT_MADVISE
 1620: 				);
 1621: 			}
 1622: 		}
 1623: 		vm_map_unlock_read(map);
 1624: 	}
 1625: 	vm_map_entry_release(count);
 1626: 	return(0);
 1627: }	
 1628: 
 1629: 
 1630: /*
 1631:  *	vm_map_inherit:
 1632:  *
 1633:  *	Sets the inheritance of the specified address
 1634:  *	range in the target map.  Inheritance
 1635:  *	affects how the map will be shared with
 1636:  *	child maps at the time of vm_map_fork.
 1637:  */
 1638: int
 1639: vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1640: 	       vm_inherit_t new_inheritance)
 1641: {
 1642: 	vm_map_entry_t entry;
 1643: 	vm_map_entry_t temp_entry;
 1644: 	int count;
 1645: 
 1646: 	switch (new_inheritance) {
 1647: 	case VM_INHERIT_NONE:
 1648: 	case VM_INHERIT_COPY:
 1649: 	case VM_INHERIT_SHARE:
 1650: 		break;
 1651: 	default:
 1652: 		return (KERN_INVALID_ARGUMENT);
 1653: 	}
 1654: 
 1655: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1656: 	vm_map_lock(map);
 1657: 
 1658: 	VM_MAP_RANGE_CHECK(map, start, end);
 1659: 
 1660: 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
 1661: 		entry = temp_entry;
 1662: 		vm_map_clip_start(map, entry, start, &count);
 1663: 	} else
 1664: 		entry = temp_entry->next;
 1665: 
 1666: 	while ((entry != &map->header) && (entry->start < end)) {
 1667: 		vm_map_clip_end(map, entry, end, &count);
 1668: 
 1669: 		entry->inheritance = new_inheritance;
 1670: 
 1671: 		vm_map_simplify_entry(map, entry, &count);
 1672: 
 1673: 		entry = entry->next;
 1674: 	}
 1675: 	vm_map_unlock(map);
 1676: 	vm_map_entry_release(count);
 1677: 	return (KERN_SUCCESS);
 1678: }
 1679: 
 1680: /*
 1681:  * Implement the semantics of mlock
 1682:  */
 1683: int
 1684: vm_map_unwire(map, start, real_end, new_pageable)
 1685: 	vm_map_t map;
 1686: 	vm_offset_t start;
 1687: 	vm_offset_t real_end;
 1688: 	boolean_t new_pageable;
 1689: {
 1690: 	vm_map_entry_t entry;
 1691: 	vm_map_entry_t start_entry;
 1692: 	vm_offset_t end;
 1693: 	int rv = KERN_SUCCESS;
 1694: 	int count;
 1695: 
 1696: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1697: 	vm_map_lock(map);
 1698: 	VM_MAP_RANGE_CHECK(map, start, real_end);
 1699: 	end = real_end;
 1700: 
 1701: 	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
 1702: 	if (start_entry == NULL) {
 1703: 		vm_map_unlock(map);
 1704: 		vm_map_entry_release(count);
 1705: 		return (KERN_INVALID_ADDRESS);
 1706: 	}
 1707: 
 1708: 	if (new_pageable == 0) {
 1709: 		entry = start_entry;
 1710: 		while ((entry != &map->header) && (entry->start < end)) {
 1711: 			vm_offset_t save_start;
 1712: 			vm_offset_t save_end;
 1713: 
 1714: 			/*
 1715: 			 * Already user wired or hard wired (trivial cases)
 1716: 			 */
 1717: 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1718: 				entry = entry->next;
 1719: 				continue;
 1720: 			}
 1721: 			if (entry->wired_count != 0) {
 1722: 				entry->wired_count++;
 1723: 				entry->eflags |= MAP_ENTRY_USER_WIRED;
 1724: 				entry = entry->next;
 1725: 				continue;
 1726: 			}
 1727: 
 1728: 			/*
 1729: 			 * A new wiring requires instantiation of appropriate
 1730: 			 * management structures and the faulting in of the
 1731: 			 * page.
 1732: 			 */
 1733: 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1734: 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1735: 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
 1736: 
 1737: 					vm_object_shadow(&entry->object.vm_object,
 1738: 					    &entry->offset,
 1739: 					    atop(entry->end - entry->start));
 1740: 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1741: 
 1742: 				} else if (entry->object.vm_object == NULL &&
 1743: 					   !map->system_map) {
 1744: 
 1745: 					entry->object.vm_object =
 1746: 					    vm_object_allocate(OBJT_DEFAULT,
 1747: 						atop(entry->end - entry->start));
 1748: 					entry->offset = (vm_offset_t) 0;
 1749: 
 1750: 				}
 1751: 			}
 1752: 			entry->wired_count++;
 1753: 			entry->eflags |= MAP_ENTRY_USER_WIRED;
 1754: 
 1755: 			/*
 1756: 			 * Now fault in the area.  The map lock needs to be
 1757: 			 * manipulated to avoid deadlocks.  The in-transition
 1758: 			 * flag protects the entries. 
 1759: 			 */
 1760: 			save_start = entry->start;
 1761: 			save_end = entry->end;
 1762: 			vm_map_unlock(map);
 1763: 			map->timestamp++;
 1764: 			rv = vm_fault_user_wire(map, save_start, save_end);
 1765: 			vm_map_lock(map);
 1766: 			if (rv) {
 1767: 				CLIP_CHECK_BACK(entry, save_start);
 1768: 				for (;;) {
 1769: 					KASSERT(entry->wired_count == 1, ("bad wired_count on entry"));
 1770: 					entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1771: 					entry->wired_count = 0;
 1772: 					if (entry->end == save_end)
 1773: 						break;
 1774: 					entry = entry->next;
 1775: 					KASSERT(entry != &map->header, ("bad entry clip during backout"));
 1776: 				}
 1777: 				end = save_start;	/* unwire the rest */
 1778: 				break;
 1779: 			}
 1780: 			/*
 1781: 			 * note that even though the entry might have been
 1782: 			 * clipped, the USER_WIRED flag we set prevents
 1783: 			 * duplication so we do not have to do a 
 1784: 			 * clip check.
 1785: 			 */
 1786: 			entry = entry->next;
 1787: 		}
 1788: 
 1789: 		/*
 1790: 		 * If we failed fall through to the unwiring section to
 1791: 		 * unwire what we had wired so far.  'end' has already
 1792: 		 * been adjusted.
 1793: 		 */
 1794: 		if (rv)
 1795: 			new_pageable = 1;
 1796: 
 1797: 		/*
 1798: 		 * start_entry might have been clipped if we unlocked the
 1799: 		 * map and blocked.  No matter how clipped it has gotten
 1800: 		 * there should be a fragment that is on our start boundary.
 1801: 		 */
 1802: 		CLIP_CHECK_BACK(start_entry, start);
 1803: 	}
 1804: 
 1805: 	/*
 1806: 	 * Deal with the unwiring case.
 1807: 	 */
 1808: 	if (new_pageable) {
 1809: 		/*
 1810: 		 * This is the unwiring case.  We must first ensure that the
 1811: 		 * range to be unwired is really wired down.  We know there
 1812: 		 * are no holes.
 1813: 		 */
 1814: 		entry = start_entry;
 1815: 		while ((entry != &map->header) && (entry->start < end)) {
 1816: 			if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 1817: 				rv = KERN_INVALID_ARGUMENT;
 1818: 				goto done;
 1819: 			}
 1820: 			KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry));
 1821: 			entry = entry->next;
 1822: 		}
 1823: 
 1824: 		/*
 1825: 		 * Now decrement the wiring count for each region. If a region
 1826: 		 * becomes completely unwired, unwire its physical pages and
 1827: 		 * mappings.
 1828: 		 */
 1829: 		/*
 1830: 		 * The map entries are processed in a loop, checking to
 1831: 		 * make sure the entry is wired and asserting it has a wired
 1832: 		 * count. However, another loop was inserted more-or-less in
 1833: 		 * the middle of the unwiring path. This loop picks up the
 1834: 		 * "entry" loop variable from the first loop without first
 1835: 		 * setting it to start_entry. Naturally, the secound loop
 1836: 		 * is never entered and the pages backing the entries are
 1837: 		 * never unwired. This can lead to a leak of wired pages.
 1838: 		 */
 1839: 		entry = start_entry;
 1840: 		while ((entry != &map->header) && (entry->start < end)) {
 1841: 			KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry));
 1842: 			entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1843: 			entry->wired_count--;
 1844: 			if (entry->wired_count == 0)
 1845: 				vm_fault_unwire(map, entry->start, entry->end);
 1846: 			entry = entry->next;
 1847: 		}
 1848: 	}
 1849: done:
 1850: 	vm_map_unclip_range(map, start_entry, start, real_end, &count,
 1851: 		MAP_CLIP_NO_HOLES);
 1852: 	map->timestamp++;
 1853: 	vm_map_unlock(map);
 1854: 	vm_map_entry_release(count);
 1855: 	return (rv);
 1856: }
 1857: 
 1858: /*
 1859:  *	vm_map_wire:
 1860:  *
 1861:  *	Sets the pageability of the specified address
 1862:  *	range in the target map.  Regions specified
 1863:  *	as not pageable require locked-down physical
 1864:  *	memory and physical page maps.
 1865:  *
 1866:  *	The map must not be locked, but a reference
 1867:  *	must remain to the map throughout the call.
 1868:  *
 1869:  *	This function may be called via the zalloc path and must properly
 1870:  *	reserve map entries for kernel_map.
 1871:  */
 1872: int
 1873: vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
 1874: {
 1875: 	vm_map_entry_t entry;
 1876: 	vm_map_entry_t start_entry;
 1877: 	vm_offset_t end;
 1878: 	int rv = KERN_SUCCESS;
 1879: 	int count;
 1880: 	int s;
 1881: 
 1882: 	if (kmflags & KM_KRESERVE)
 1883: 		count = vm_map_entry_kreserve(MAP_RESERVE_COUNT);
 1884: 	else
 1885: 		count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 1886: 	vm_map_lock(map);
 1887: 	VM_MAP_RANGE_CHECK(map, start, real_end);
 1888: 	end = real_end;
 1889: 
 1890: 	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
 1891: 	if (start_entry == NULL) {
 1892: 		vm_map_unlock(map);
 1893: 		rv = KERN_INVALID_ADDRESS;
 1894: 		goto failure;
 1895: 	}
 1896: 	if ((kmflags & KM_PAGEABLE) == 0) {
 1897: 		/*
 1898: 		 * Wiring.  
 1899: 		 *
 1900: 		 * 1.  Holding the write lock, we create any shadow or zero-fill
 1901: 		 * objects that need to be created. Then we clip each map
 1902: 		 * entry to the region to be wired and increment its wiring
 1903: 		 * count.  We create objects before clipping the map entries
 1904: 		 * to avoid object proliferation.
 1905: 		 *
 1906: 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
 1907: 		 * fault in the pages for any newly wired area (wired_count is
 1908: 		 * 1).
 1909: 		 *
 1910: 		 * Downgrading to a read lock for vm_fault_wire avoids a 
 1911: 		 * possible deadlock with another process that may have faulted
 1912: 		 * on one of the pages to be wired (it would mark the page busy,
 1913: 		 * blocking us, then in turn block on the map lock that we
 1914: 		 * hold).  Because of problems in the recursive lock package,
 1915: 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
 1916: 		 * any actions that require the write lock must be done
 1917: 		 * beforehand.  Because we keep the read lock on the map, the
 1918: 		 * copy-on-write status of the entries we modify here cannot
 1919: 		 * change.
 1920: 		 */
 1921: 
 1922: 		entry = start_entry;
 1923: 		while ((entry != &map->header) && (entry->start < end)) {
 1924: 			/*
 1925: 			 * Trivial case if the entry is already wired
 1926: 			 */
 1927: 			if (entry->wired_count) {
 1928: 				entry->wired_count++;
 1929: 				entry = entry->next;
 1930: 				continue;
 1931: 			}
 1932: 
 1933: 			/*
 1934: 			 * The entry is being newly wired, we have to setup
 1935: 			 * appropriate management structures.  A shadow 
 1936: 			 * object is required for a copy-on-write region,
 1937: 			 * or a normal object for a zero-fill region.  We
 1938: 			 * do not have to do this for entries that point to sub
 1939: 			 * maps because we won't hold the lock on the sub map.
 1940: 			 */
 1941: 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1942: 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1943: 				if (copyflag &&
 1944: 				    ((entry->protection & VM_PROT_WRITE) != 0)) {
 1945: 
 1946: 					vm_object_shadow(&entry->object.vm_object,
 1947: 					    &entry->offset,
 1948: 					    atop(entry->end - entry->start));
 1949: 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1950: 				} else if (entry->object.vm_object == NULL &&
 1951: 					   !map->system_map) {
 1952: 					entry->object.vm_object =
 1953: 					    vm_object_allocate(OBJT_DEFAULT,
 1954: 						atop(entry->end - entry->start));
 1955: 					entry->offset = (vm_offset_t) 0;
 1956: 				}
 1957: 			}
 1958: 
 1959: 			entry->wired_count++;
 1960: 			entry = entry->next;
 1961: 		}
 1962: 
 1963: 		/*
 1964: 		 * Pass 2.
 1965: 		 */
 1966: 
 1967: 		/*
 1968: 		 * HACK HACK HACK HACK
 1969: 		 *
 1970: 		 * Unlock the map to avoid deadlocks.  The in-transit flag
 1971: 		 * protects us from most changes but note that
 1972: 		 * clipping may still occur.  To prevent clipping from
 1973: 		 * occuring after the unlock, except for when we are
 1974: 		 * blocking in vm_fault_wire, we must run at splvm().
 1975: 		 * Otherwise our accesses to entry->start and entry->end
 1976: 		 * could be corrupted.  We have to set splvm() prior to
 1977: 		 * unlocking so start_entry does not change out from
 1978: 		 * under us at the very beginning of the loop.
 1979: 		 *
 1980: 		 * HACK HACK HACK HACK
 1981: 		 */
 1982: 
 1983: 		s = splvm();
 1984: 		vm_map_unlock(map);
 1985: 
 1986: 		entry = start_entry;
 1987: 		while (entry != &map->header && entry->start < end) {
 1988: 			/*
 1989: 			 * If vm_fault_wire fails for any page we need to undo
 1990: 			 * what has been done.  We decrement the wiring count
 1991: 			 * for those pages which have not yet been wired (now)
 1992: 			 * and unwire those that have (later).
 1993: 			 */
 1994: 			vm_offset_t save_start = entry->start;
 1995: 			vm_offset_t save_end = entry->end;
 1996: 
 1997: 			if (entry->wired_count == 1)
 1998: 				rv = vm_fault_wire(map, entry->start, entry->end);
 1999: 			if (rv) {
 2000: 				CLIP_CHECK_BACK(entry, save_start);
 2001: 				for (;;) {
 2002: 					KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly"));
 2003: 					entry->wired_count = 0;
 2004: 					if (entry->end == save_end)
 2005: 						break;
 2006: 					entry = entry->next;
 2007: 					KASSERT(entry != &map->header, ("bad entry clip during backout"));
 2008: 				}
 2009: 				end = save_start;
 2010: 				break;
 2011: 			}
 2012: 			CLIP_CHECK_FWD(entry, save_end);
 2013: 			entry = entry->next;
 2014: 		}
 2015: 		splx(s);
 2016: 
 2017: 		/*
 2018: 		 * relock.  start_entry is still IN_TRANSITION and must
 2019: 		 * still exist, but may have been clipped (handled just
 2020: 		 * below).
 2021: 		 */
 2022: 		vm_map_lock(map);
 2023: 
 2024: 		/*
 2025: 		 * If a failure occured undo everything by falling through
 2026: 		 * to the unwiring code.  'end' has already been adjusted
 2027: 		 * appropriately.
 2028: 		 */
 2029: 		if (rv)
 2030: 			kmflags |= KM_PAGEABLE;
 2031: 
 2032: 		/*
 2033: 		 * start_entry might have been clipped if we unlocked the
 2034: 		 * map and blocked.  No matter how clipped it has gotten
 2035: 		 * there should be a fragment that is on our start boundary.
 2036: 		 */
 2037: 		CLIP_CHECK_BACK(start_entry, start);
 2038: 	}
 2039: 
 2040: 	if (kmflags & KM_PAGEABLE) {
 2041: 		/*
 2042: 		 * This is the unwiring case.  We must first ensure that the
 2043: 		 * range to be unwired is really wired down.  We know there
 2044: 		 * are no holes.
 2045: 		 */
 2046: 		entry = start_entry;
 2047: 		while ((entry != &map->header) && (entry->start < end)) {
 2048: 			if (entry->wired_count == 0) {
 2049: 				rv = KERN_INVALID_ARGUMENT;
 2050: 				goto done;
 2051: 			}
 2052: 			entry = entry->next;
 2053: 		}
 2054: 
 2055: 		/*
 2056: 		 * Now decrement the wiring count for each region. If a region
 2057: 		 * becomes completely unwired, unwire its physical pages and
 2058: 		 * mappings.
 2059: 		 */
 2060: 		entry = start_entry;
 2061: 		while ((entry != &map->header) && (entry->start < end)) {
 2062: 			entry->wired_count--;
 2063: 			if (entry->wired_count == 0)
 2064: 				vm_fault_unwire(map, entry->start, entry->end);
 2065: 			entry = entry->next;
 2066: 		}
 2067: 	}
 2068: done:
 2069: 	vm_map_unclip_range(map, start_entry, start, real_end, &count,
 2070: 		MAP_CLIP_NO_HOLES);
 2071: 	map->timestamp++;
 2072: 	vm_map_unlock(map);
 2073: failure:
 2074: 	if (kmflags & KM_KRESERVE)
 2075: 		vm_map_entry_krelease(count);
 2076: 	else
 2077: 		vm_map_entry_release(count);
 2078: 	return (rv);
 2079: }
 2080: 
 2081: /*
 2082:  * vm_map_set_wired_quick()
 2083:  *
 2084:  *	Mark a newly allocated address range as wired but do not fault in
 2085:  *	the pages.  The caller is expected to load the pages into the object.
 2086:  *
 2087:  *	The map must be locked on entry and will remain locked on return.
 2088:  */
 2089: void
 2090: vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp)
 2091: {
 2092: 	vm_map_entry_t scan;
 2093: 	vm_map_entry_t entry;
 2094: 
 2095: 	entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
 2096: 	for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) {
 2097: 	    KKASSERT(entry->wired_count == 0);
 2098: 	    entry->wired_count = 1;                                              
 2099: 	}
 2100: 	vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
 2101: }
 2102: 
 2103: /*
 2104:  * vm_map_clean
 2105:  *
 2106:  * Push any dirty cached pages in the address range to their pager.
 2107:  * If syncio is TRUE, dirty pages are written synchronously.
 2108:  * If invalidate is TRUE, any cached pages are freed as well.
 2109:  *
 2110:  * Returns an error if any part of the specified range is not mapped.
 2111:  */
 2112: int
 2113: vm_map_clean(map, start, end, syncio, invalidate)
 2114: 	vm_map_t map;
 2115: 	vm_offset_t start;
 2116: 	vm_offset_t end;
 2117: 	boolean_t syncio;
 2118: 	boolean_t invalidate;
 2119: {
 2120: 	vm_map_entry_t current;
 2121: 	vm_map_entry_t entry;
 2122: 	vm_size_t size;
 2123: 	vm_object_t object;
 2124: 	vm_ooffset_t offset;
 2125: 
 2126: 	vm_map_lock_read(map);
 2127: 	VM_MAP_RANGE_CHECK(map, start, end);
 2128: 	if (!vm_map_lookup_entry(map, start, &entry)) {
 2129: 		vm_map_unlock_read(map);
 2130: 		return (KERN_INVALID_ADDRESS);
 2131: 	}
 2132: 	/*
 2133: 	 * Make a first pass to check for holes.
 2134: 	 */
 2135: 	for (current = entry; current->start < end; current = current->next) {
 2136: 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2137: 			vm_map_unlock_read(map);
 2138: 			return (KERN_INVALID_ARGUMENT);
 2139: 		}
 2140: 		if (end > current->end &&
 2141: 		    (current->next == &map->header ||
 2142: 			current->end != current->next->start)) {
 2143: 			vm_map_unlock_read(map);
 2144: 			return (KERN_INVALID_ADDRESS);
 2145: 		}
 2146: 	}
 2147: 
 2148: 	if (invalidate)
 2149: 		pmap_remove(vm_map_pmap(map), start, end);
 2150: 	/*
 2151: 	 * Make a second pass, cleaning/uncaching pages from the indicated
 2152: 	 * objects as we go.
 2153: 	 */
 2154: 	for (current = entry; current->start < end; current = current->next) {
 2155: 		offset = current->offset + (start - current->start);
 2156: 		size = (end <= current->end ? end : current->end) - start;
 2157: 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2158: 			vm_map_t smap;
 2159: 			vm_map_entry_t tentry;
 2160: 			vm_size_t tsize;
 2161: 
 2162: 			smap = current->object.sub_map;
 2163: 			vm_map_lock_read(smap);
 2164: 			(void) vm_map_lookup_entry(smap, offset, &tentry);
 2165: 			tsize = tentry->end - offset;
 2166: 			if (tsize < size)
 2167: 				size = tsize;
 2168: 			object = tentry->object.vm_object;
 2169: 			offset = tentry->offset + (offset - tentry->start);
 2170: 			vm_map_unlock_read(smap);
 2171: 		} else {
 2172: 			object = current->object.vm_object;
 2173: 		}
 2174: 		/*
 2175: 		 * Note that there is absolutely no sense in writing out
 2176: 		 * anonymous objects, so we track down the vnode object
 2177: 		 * to write out.
 2178: 		 * We invalidate (remove) all pages from the address space
 2179: 		 * anyway, for semantic correctness.
 2180: 		 *
 2181: 		 * note: certain anonymous maps, such as MAP_NOSYNC maps,
 2182: 		 * may start out with a NULL object.
 2183: 		 */
 2184: 		while (object && object->backing_object) {
 2185: 			object = object->backing_object;
 2186: 			offset += object->backing_object_offset;
 2187: 			if (object->size < OFF_TO_IDX( offset + size))
 2188: 				size = IDX_TO_OFF(object->size) - offset;
 2189: 		}
 2190: 		if (object && (object->type == OBJT_VNODE) && 
 2191: 		    (current->protection & VM_PROT_WRITE)) {
 2192: 			/*
 2193: 			 * Flush pages if writing is allowed, invalidate them
 2194: 			 * if invalidation requested.  Pages undergoing I/O
 2195: 			 * will be ignored by vm_object_page_remove().
 2196: 			 *
 2197: 			 * We cannot lock the vnode and then wait for paging
 2198: 			 * to complete without deadlocking against vm_fault.
 2199: 			 * Instead we simply call vm_object_page_remove() and
 2200: 			 * allow it to block internally on a page-by-page 
 2201: 			 * basis when it encounters pages undergoing async 
 2202: 			 * I/O.
 2203: 			 */
 2204: 			int flags;
 2205: 
 2206: 			vm_object_reference(object);
 2207: 			vn_lock(object->handle, NULL,
 2208: 				LK_EXCLUSIVE | LK_RETRY, curthread);
 2209: 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 2210: 			flags |= invalidate ? OBJPC_INVAL : 0;
 2211: 			vm_object_page_clean(object,
 2212: 			    OFF_TO_IDX(offset),
 2213: 			    OFF_TO_IDX(offset + size + PAGE_MASK),
 2214: 			    flags);
 2215: 			VOP_UNLOCK(object->handle, NULL, 0, curthread);
 2216: 			vm_object_deallocate(object);
 2217: 		}
 2218: 		if (object && invalidate &&
 2219: 		   ((object->type == OBJT_VNODE) ||
 2220: 		    (object->type == OBJT_DEVICE))) {
 2221: 			vm_object_reference(object);
 2222: 			vm_object_page_remove(object,
 2223: 			    OFF_TO_IDX(offset),
 2224: 			    OFF_TO_IDX(offset + size + PAGE_MASK),
 2225: 			    FALSE);
 2226: 			vm_object_deallocate(object);
 2227: 		}
 2228: 		start += size;
 2229: 	}
 2230: 
 2231: 	vm_map_unlock_read(map);
 2232: 	return (KERN_SUCCESS);
 2233: }
 2234: 
 2235: /*
 2236:  *	vm_map_entry_unwire:	[ internal use only ]
 2237:  *
 2238:  *	Make the region specified by this entry pageable.
 2239:  *
 2240:  *	The map in question should be locked.
 2241:  *	[This is the reason for this routine's existence.]
 2242:  */
 2243: static void 
 2244: vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
 2245: {
 2246: 	vm_fault_unwire(map, entry->start, entry->end);
 2247: 	entry->wired_count = 0;
 2248: }
 2249: 
 2250: /*
 2251:  *	vm_map_entry_delete:	[ internal use only ]
 2252:  *
 2253:  *	Deallocate the given entry from the target map.
 2254:  */
 2255: static void
 2256: vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp)
 2257: {
 2258: 	vm_map_entry_unlink(map, entry);
 2259: 	map->size -= entry->end - entry->start;
 2260: 
 2261: 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 2262: 		vm_object_deallocate(entry->object.vm_object);
 2263: 	}
 2264: 
 2265: 	vm_map_entry_dispose(map, entry, countp);
 2266: }
 2267: 
 2268: /*
 2269:  *	vm_map_delete:	[ internal use only ]
 2270:  *
 2271:  *	Deallocates the given address range from the target
 2272:  *	map.
 2273:  */
 2274: int
 2275: vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp)
 2276: {
 2277: 	vm_object_t object;
 2278: 	vm_map_entry_t entry;
 2279: 	vm_map_entry_t first_entry;
 2280: 
 2281: 	/*
 2282: 	 * Find the start of the region, and clip it
 2283: 	 */
 2284: 
 2285: again:
 2286: 	if (!vm_map_lookup_entry(map, start, &first_entry))
 2287: 		entry = first_entry->next;
 2288: 	else {
 2289: 		entry = first_entry;
 2290: 		vm_map_clip_start(map, entry, start, countp);
 2291: 		/*
 2292: 		 * Fix the lookup hint now, rather than each time though the
 2293: 		 * loop.
 2294: 		 */
 2295: 		SAVE_HINT(map, entry->prev);
 2296: 	}
 2297: 
 2298: 	/*
 2299: 	 * Save the free space hint
 2300: 	 */
 2301: 
 2302: 	if (entry == &map->header) {
 2303: 		map->first_free = &map->header;
 2304: 	} else if (map->first_free->start >= start) {
 2305: 		map->first_free = entry->prev;
 2306: 	}
 2307: 
 2308: 	/*
 2309: 	 * Step through all entries in this region
 2310: 	 */
 2311: 
 2312: 	while ((entry != &map->header) && (entry->start < end)) {
 2313: 		vm_map_entry_t next;
 2314: 		vm_offset_t s, e;
 2315: 		vm_pindex_t offidxstart, offidxend, count;
 2316: 
 2317: 		/*
 2318: 		 * If we hit an in-transition entry we have to sleep and
 2319: 		 * retry.  It's easier (and not really slower) to just retry
 2320: 		 * since this case occurs so rarely and the hint is already
 2321: 		 * pointing at the right place.  We have to reset the
 2322: 		 * start offset so as not to accidently delete an entry
 2323: 		 * another process just created in vacated space.
 2324: 		 */
 2325: 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 2326: 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 2327: 			start = entry->start;
 2328: 			++mycpu->gd_cnt.v_intrans_coll;
 2329: 			++mycpu->gd_cnt.v_intrans_wait;
 2330: 			vm_map_transition_wait(map);
 2331: 			goto again;
 2332: 		}
 2333: 		vm_map_clip_end(map, entry, end, countp);
 2334: 
 2335: 		s = entry->start;
 2336: 		e = entry->end;
 2337: 		next = entry->next;
 2338: 
 2339: 		offidxstart = OFF_TO_IDX(entry->offset);
 2340: 		count = OFF_TO_IDX(e - s);
 2341: 		object = entry->object.vm_object;
 2342: 
 2343: 		/*
 2344: 		 * Unwire before removing addresses from the pmap; otherwise,
 2345: 		 * unwiring will put the entries back in the pmap.
 2346: 		 */
 2347: 		if (entry->wired_count != 0) {
 2348: 			vm_map_entry_unwire(map, entry);
 2349: 		}
 2350: 
 2351: 		offidxend = offidxstart + count;
 2352: 
 2353: 		if ((object == kernel_object) || (object == kmem_object)) {
 2354: 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2355: 		} else {
 2356: 			pmap_remove(map->pmap, s, e);
 2357: 			if (object != NULL &&
 2358: 			    object->ref_count != 1 &&
 2359: 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
 2360: 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2361: 				vm_object_collapse(object);
 2362: 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2363: 				if (object->type == OBJT_SWAP) {
 2364: 					swap_pager_freespace(object, offidxstart, count);
 2365: 				}
 2366: 				if (offidxend >= object->size &&
 2367: 				    offidxstart < object->size) {
 2368: 					object->size = offidxstart;
 2369: 				}
 2370: 			}
 2371: 		}
 2372: 
 2373: 		/*
 2374: 		 * Delete the entry (which may delete the object) only after
 2375: 		 * removing all pmap entries pointing to its pages.
 2376: 		 * (Otherwise, its page frames may be reallocated, and any
 2377: 		 * modify bits will be set in the wrong object!)
 2378: 		 */
 2379: 		vm_map_entry_delete(map, entry, countp);
 2380: 		entry = next;
 2381: 	}
 2382: 	return (KERN_SUCCESS);
 2383: }
 2384: 
 2385: /*
 2386:  *	vm_map_remove:
 2387:  *
 2388:  *	Remove the given address range from the target map.
 2389:  *	This is the exported form of vm_map_delete.
 2390:  */
 2391: int
 2392: vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
 2393: {
 2394: 	int result;
 2395: 	int count;
 2396: 
 2397: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 2398: 	vm_map_lock(map);
 2399: 	VM_MAP_RANGE_CHECK(map, start, end);
 2400: 	result = vm_map_delete(map, start, end, &count);
 2401: 	vm_map_unlock(map);
 2402: 	vm_map_entry_release(count);
 2403: 
 2404: 	return (result);
 2405: }
 2406: 
 2407: /*
 2408:  *	vm_map_check_protection:
 2409:  *
 2410:  *	Assert that the target map allows the specified
 2411:  *	privilege on the entire address region given.
 2412:  *	The entire region must be allocated.
 2413:  */
 2414: boolean_t
 2415: vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2416: 			vm_prot_t protection)
 2417: {
 2418: 	vm_map_entry_t entry;
 2419: 	vm_map_entry_t tmp_entry;
 2420: 
 2421: 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 2422: 		return (FALSE);
 2423: 	}
 2424: 	entry = tmp_entry;
 2425: 
 2426: 	while (start < end) {
 2427: 		if (entry == &map->header) {
 2428: 			return (FALSE);
 2429: 		}
 2430: 		/*
 2431: 		 * No holes allowed!
 2432: 		 */
 2433: 
 2434: 		if (start < entry->start) {
 2435: 			return (FALSE);
 2436: 		}
 2437: 		/*
 2438: 		 * Check protection associated with entry.
 2439: 		 */
 2440: 
 2441: 		if ((entry->protection & protection) != protection) {
 2442: 			return (FALSE);
 2443: 		}
 2444: 		/* go to next entry */
 2445: 
 2446: 		start = entry->end;
 2447: 		entry = entry->next;
 2448: 	}
 2449: 	return (TRUE);
 2450: }
 2451: 
 2452: /*
 2453:  * Split the pages in a map entry into a new object.  This affords
 2454:  * easier removal of unused pages, and keeps object inheritance from
 2455:  * being a negative impact on memory usage.
 2456:  */
 2457: static void
 2458: vm_map_split(vm_map_entry_t entry)
 2459: {
 2460: 	vm_page_t m;
 2461: 	vm_object_t orig_object, new_object, source;
 2462: 	vm_offset_t s, e;
 2463: 	vm_pindex_t offidxstart, offidxend, idx;
 2464: 	vm_size_t size;
 2465: 	vm_ooffset_t offset;
 2466: 
 2467: 	orig_object = entry->object.vm_object;
 2468: 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 2469: 		return;
 2470: 	if (orig_object->ref_count <= 1)
 2471: 		return;
 2472: 
 2473: 	offset = entry->offset;
 2474: 	s = entry->start;
 2475: 	e = entry->end;
 2476: 
 2477: 	offidxstart = OFF_TO_IDX(offset);
 2478: 	offidxend = offidxstart + OFF_TO_IDX(e - s);
 2479: 	size = offidxend - offidxstart;
 2480: 
 2481: 	new_object = vm_pager_allocate(orig_object->type,
 2482: 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
 2483: 	if (new_object == NULL)
 2484: 		return;
 2485: 
 2486: 	source = orig_object->backing_object;
 2487: 	if (source != NULL) {
 2488: 		vm_object_reference(source);	/* Referenced by new_object */
 2489: 		LIST_INSERT_HEAD(&source->shadow_head,
 2490: 				  new_object, shadow_list);
 2491: 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
 2492: 		new_object->backing_object_offset = 
 2493: 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
 2494: 		new_object->backing_object = source;
 2495: 		source->shadow_count++;
 2496: 		source->generation++;
 2497: 	}
 2498: 
 2499: 	for (idx = 0; idx < size; idx++) {
 2500: 		vm_page_t m;
 2501: 
 2502: 	retry:
 2503: 		m = vm_page_lookup(orig_object, offidxstart + idx);
 2504: 		if (m == NULL)
 2505: 			continue;
 2506: 
 2507: 		/*
 2508: 		 * We must wait for pending I/O to complete before we can
 2509: 		 * rename the page.
 2510: 		 *
 2511: 		 * We do not have to VM_PROT_NONE the page as mappings should
 2512: 		 * not be changed by this operation.
 2513: 		 */
 2514: 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
 2515: 			goto retry;
 2516: 			
 2517: 		vm_page_busy(m);
 2518: 		vm_page_rename(m, new_object, idx);
 2519: 		/* page automatically made dirty by rename and cache handled */
 2520: 		vm_page_busy(m);
 2521: 	}
 2522: 
 2523: 	if (orig_object->type == OBJT_SWAP) {
 2524: 		vm_object_pip_add(orig_object, 1);
 2525: 		/*
 2526: 		 * copy orig_object pages into new_object
 2527: 		 * and destroy unneeded pages in
 2528: 		 * shadow object.
 2529: 		 */
 2530: 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
 2531: 		vm_object_pip_wakeup(orig_object);
 2532: 	}
 2533: 
 2534: 	for (idx = 0; idx < size; idx++) {
 2535: 		m = vm_page_lookup(new_object, idx);
 2536: 		if (m) {
 2537: 			vm_page_wakeup(m);
 2538: 		}
 2539: 	}
 2540: 
 2541: 	entry->object.vm_object = new_object;
 2542: 	entry->offset = 0LL;
 2543: 	vm_object_deallocate(orig_object);
 2544: }
 2545: 
 2546: /*
 2547:  *	vm_map_copy_entry:
 2548:  *
 2549:  *	Copies the contents of the source entry to the destination
 2550:  *	entry.  The entries *must* be aligned properly.
 2551:  */
 2552: static void
 2553: vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
 2554: 	vm_map_entry_t src_entry, vm_map_entry_t dst_entry)
 2555: {
 2556: 	vm_object_t src_object;
 2557: 
 2558: 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 2559: 		return;
 2560: 
 2561: 	if (src_entry->wired_count == 0) {
 2562: 
 2563: 		/*
 2564: 		 * If the source entry is marked needs_copy, it is already
 2565: 		 * write-protected.
 2566: 		 */
 2567: 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 2568: 			pmap_protect(src_map->pmap,
 2569: 			    src_entry->start,
 2570: 			    src_entry->end,
 2571: 			    src_entry->protection & ~VM_PROT_WRITE);
 2572: 		}
 2573: 
 2574: 		/*
 2575: 		 * Make a copy of the object.
 2576: 		 */
 2577: 		if ((src_object = src_entry->object.vm_object) != NULL) {
 2578: 
 2579: 			if ((src_object->handle == NULL) &&
 2580: 				(src_object->type == OBJT_DEFAULT ||
 2581: 				 src_object->type == OBJT_SWAP)) {
 2582: 				vm_object_collapse(src_object);
 2583: 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 2584: 					vm_map_split(src_entry);
 2585: 					src_object = src_entry->object.vm_object;
 2586: 				}
 2587: 			}
 2588: 
 2589: 			vm_object_reference(src_object);
 2590: 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 2591: 			dst_entry->object.vm_object = src_object;
 2592: 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2593: 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2594: 			dst_entry->offset = src_entry->offset;
 2595: 		} else {
 2596: 			dst_entry->object.vm_object = NULL;
 2597: 			dst_entry->offset = 0;
 2598: 		}
 2599: 
 2600: 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 2601: 		    dst_entry->end - dst_entry->start, src_entry->start);
 2602: 	} else {
 2603: 		/*
 2604: 		 * Of course, wired down pages can't be set copy-on-write.
 2605: 		 * Cause wired pages to be copied into the new map by
 2606: 		 * simulating faults (the new pages are pageable)
 2607: 		 */
 2608: 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 2609: 	}
 2610: }
 2611: 
 2612: /*
 2613:  * vmspace_fork:
 2614:  * Create a new process vmspace structure and vm_map
 2615:  * based on those of an existing process.  The new map
 2616:  * is based on the old map, according to the inheritance
 2617:  * values on the regions in that map.
 2618:  *
 2619:  * The source map must not be locked.
 2620:  */
 2621: struct vmspace *
 2622: vmspace_fork(struct vmspace *vm1)
 2623: {
 2624: 	struct vmspace *vm2;
 2625: 	vm_map_t old_map = &vm1->vm_map;
 2626: 	vm_map_t new_map;
 2627: 	vm_map_entry_t old_entry;
 2628: 	vm_map_entry_t new_entry;
 2629: 	vm_object_t object;
 2630: 	int count;
 2631: 
 2632: 	vm_map_lock(old_map);
 2633: 	old_map->infork = 1;
 2634: 
 2635: 	/*
 2636: 	 * XXX Note: upcalls are not copied.
 2637: 	 */
 2638: 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
 2639: 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 2640: 	    (caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy);
 2641: 	new_map = &vm2->vm_map;	/* XXX */
 2642: 	new_map->timestamp = 1;
 2643: 
 2644: 	count = 0;
 2645: 	old_entry = old_map->header.next;
 2646: 	while (old_entry != &old_map->header) {
 2647: 		++count;
 2648: 		old_entry = old_entry->next;
 2649: 	}
 2650: 
 2651: 	count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT);
 2652: 
 2653: 	old_entry = old_map->header.next;
 2654: 	while (old_entry != &old_map->header) {
 2655: 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 2656: 			panic("vm_map_fork: encountered a submap");
 2657: 
 2658: 		switch (old_entry->inheritance) {
 2659: 		case VM_INHERIT_NONE:
 2660: 			break;
 2661: 
 2662: 		case VM_INHERIT_SHARE:
 2663: 			/*
 2664: 			 * Clone the entry, creating the shared object if necessary.
 2665: 			 */
 2666: 			object = old_entry->object.vm_object;
 2667: 			if (object == NULL) {
 2668: 				object = vm_object_allocate(OBJT_DEFAULT,
 2669: 					atop(old_entry->end - old_entry->start));
 2670: 				old_entry->object.vm_object = object;
 2671: 				old_entry->offset = (vm_offset_t) 0;
 2672: 			}
 2673: 
 2674: 			/*
 2675: 			 * Add the reference before calling vm_object_shadow
 2676: 			 * to insure that a shadow object is created.
 2677: 			 */
 2678: 			vm_object_reference(object);
 2679: 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2680: 				vm_object_shadow(&old_entry->object.vm_object,
 2681: 					&old_entry->offset,
 2682: 					atop(old_entry->end - old_entry->start));
 2683: 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2684: 				/* Transfer the second reference too. */
 2685: 				vm_object_reference(
 2686: 				    old_entry->object.vm_object);
 2687: 				vm_object_deallocate(object);
 2688: 				object = old_entry->object.vm_object;
 2689: 			}
 2690: 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
 2691: 
 2692: 			/*
 2693: 			 * Clone the entry, referencing the shared object.
 2694: 			 */
 2695: 			new_entry = vm_map_entry_create(new_map, &count);
 2696: 			*new_entry = *old_entry;
 2697: 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2698: 			new_entry->wired_count = 0;
 2699: 
 2700: 			/*
 2701: 			 * Insert the entry into the new map -- we know we're
 2702: 			 * inserting at the end of the new map.
 2703: 			 */
 2704: 
 2705: 			vm_map_entry_link(new_map, new_map->header.prev,
 2706: 			    new_entry);
 2707: 
 2708: 			/*
 2709: 			 * Update the physical map
 2710: 			 */
 2711: 
 2712: 			pmap_copy(new_map->pmap, old_map->pmap,
 2713: 			    new_entry->start,
 2714: 			    (old_entry->end - old_entry->start),
 2715: 			    old_entry->start);
 2716: 			break;
 2717: 
 2718: 		case VM_INHERIT_COPY:
 2719: 			/*
 2720: 			 * Clone the entry and link into the map.
 2721: 			 */
 2722: 			new_entry = vm_map_entry_create(new_map, &count);
 2723: 			*new_entry = *old_entry;
 2724: 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2725: 			new_entry->wired_count = 0;
 2726: 			new_entry->object.vm_object = NULL;
 2727: 			vm_map_entry_link(new_map, new_map->header.prev,
 2728: 			    new_entry);
 2729: 			vm_map_copy_entry(old_map, new_map, old_entry,
 2730: 			    new_entry);
 2731: 			break;
 2732: 		}
 2733: 		old_entry = old_entry->next;
 2734: 	}
 2735: 
 2736: 	new_map->size = old_map->size;
 2737: 	old_map->infork = 0;
 2738: 	vm_map_unlock(old_map);
 2739: 	vm_map_entry_release(count);
 2740: 
 2741: 	return (vm2);
 2742: }
 2743: 
 2744: int
 2745: vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 2746: 	      vm_prot_t prot, vm_prot_t max, int cow)
 2747: {
 2748: 	vm_map_entry_t prev_entry;
 2749: 	vm_map_entry_t new_stack_entry;
 2750: 	vm_size_t      init_ssize;
 2751: 	int            rv;
 2752: 	int		count;
 2753: 
 2754: 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
 2755: 		return (KERN_NO_SPACE);
 2756: 
 2757: 	if (max_ssize < sgrowsiz)
 2758: 		init_ssize = max_ssize;
 2759: 	else
 2760: 		init_ssize = sgrowsiz;
 2761: 
 2762: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 2763: 	vm_map_lock(map);
 2764: 
 2765: 	/* If addr is already mapped, no go */
 2766: 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
 2767: 		vm_map_unlock(map);
 2768: 		vm_map_entry_release(count);
 2769: 		return (KERN_NO_SPACE);
 2770: 	}
 2771: 
 2772: 	/* If we would blow our VMEM resource limit, no go */
 2773: 	if (map->size + init_ssize >
 2774: 	    curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2775: 		vm_map_unlock(map);
 2776: 		vm_map_entry_release(count);
 2777: 		return (KERN_NO_SPACE);
 2778: 	}
 2779: 
 2780: 	/* If we can't accomodate max_ssize in the current mapping,
 2781: 	 * no go.  However, we need to be aware that subsequent user
 2782: 	 * mappings might map into the space we have reserved for
 2783: 	 * stack, and currently this space is not protected.  
 2784: 	 * 
 2785: 	 * Hopefully we will at least detect this condition 
 2786: 	 * when we try to grow the stack.
 2787: 	 */
 2788: 	if ((prev_entry->next != &map->header) &&
 2789: 	    (prev_entry->next->start < addrbos + max_ssize)) {
 2790: 		vm_map_unlock(map);
 2791: 		vm_map_entry_release(count);
 2792: 		return (KERN_NO_SPACE);
 2793: 	}
 2794: 
 2795: 	/* We initially map a stack of only init_ssize.  We will
 2796: 	 * grow as needed later.  Since this is to be a grow 
 2797: 	 * down stack, we map at the top of the range.
 2798: 	 *
 2799: 	 * Note: we would normally expect prot and max to be
 2800: 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
 2801: 	 * eliminate these as input parameters, and just
 2802: 	 * pass these values here in the insert call.
 2803: 	 */
 2804: 	rv = vm_map_insert(map, &count,
 2805: 			   NULL, 0, addrbos + max_ssize - init_ssize,
 2806: 	                   addrbos + max_ssize, prot, max, cow);
 2807: 
 2808: 	/* Now set the avail_ssize amount */
 2809: 	if (rv == KERN_SUCCESS){
 2810: 		if (prev_entry != &map->header)
 2811: 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count);
 2812: 		new_stack_entry = prev_entry->next;
 2813: 		if (new_stack_entry->end   != addrbos + max_ssize ||
 2814: 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
 2815: 			panic ("Bad entry start/end for new stack entry");
 2816: 		else 
 2817: 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
 2818: 	}
 2819: 
 2820: 	vm_map_unlock(map);
 2821: 	vm_map_entry_release(count);
 2822: 	return (rv);
 2823: }
 2824: 
 2825: /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
 2826:  * desired address is already mapped, or if we successfully grow
 2827:  * the stack.  Also returns KERN_SUCCESS if addr is outside the
 2828:  * stack range (this is strange, but preserves compatibility with
 2829:  * the grow function in vm_machdep.c).
 2830:  */
 2831: int
 2832: vm_map_growstack (struct proc *p, vm_offset_t addr)
 2833: {
 2834: 	vm_map_entry_t prev_entry;
 2835: 	vm_map_entry_t stack_entry;
 2836: 	vm_map_entry_t new_stack_entry;
 2837: 	struct vmspace *vm = p->p_vmspace;
 2838: 	vm_map_t map = &vm->vm_map;
 2839: 	vm_offset_t    end;
 2840: 	int grow_amount;
 2841: 	int rv = KERN_SUCCESS;
 2842: 	int is_procstack;
 2843: 	int use_read_lock = 1;
 2844: 	int count;
 2845: 
 2846: 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 2847: Retry:
 2848: 	if (use_read_lock)
 2849: 		vm_map_lock_read(map);
 2850: 	else
 2851: 		vm_map_lock(map);
 2852: 
 2853: 	/* If addr is already in the entry range, no need to grow.*/
 2854: 	if (vm_map_lookup_entry(map, addr, &prev_entry))
 2855: 		goto done;
 2856: 
 2857: 	if ((stack_entry = prev_entry->next) == &map->header)
 2858: 		goto done;
 2859: 	if (prev_entry == &map->header) 
 2860: 		end = stack_entry->start - stack_entry->avail_ssize;
 2861: 	else
 2862: 		end = prev_entry->end;
 2863: 
 2864: 	/* This next test mimics the old grow function in vm_machdep.c.
 2865: 	 * It really doesn't quite make sense, but we do it anyway
 2866: 	 * for compatibility.
 2867: 	 *
 2868: 	 * If not growable stack, return success.  This signals the
 2869: 	 * caller to proceed as he would normally with normal vm.
 2870: 	 */
 2871: 	if (stack_entry->avail_ssize < 1 ||
 2872: 	    addr >= stack_entry->start ||
 2873: 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
 2874: 		goto done;
 2875: 	} 
 2876: 	
 2877: 	/* Find the minimum grow amount */
 2878: 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
 2879: 	if (grow_amount > stack_entry->avail_ssize) {
 2880: 		rv = KERN_NO_SPACE;
 2881: 		goto done;
 2882: 	}
 2883: 
 2884: 	/* If there is no longer enough space between the entries
 2885: 	 * nogo, and adjust the available space.  Note: this 
 2886: 	 * should only happen if the user has mapped into the
 2887: 	 * stack area after the stack was created, and is
 2888: 	 * probably an error.
 2889: 	 *
 2890: 	 * This also effectively destroys any guard page the user
 2891: 	 * might have intended by limiting the stack size.
 2892: 	 */
 2893: 	if (grow_amount > stack_entry->start - end) {
 2894: 		if (use_read_lock && vm_map_lock_upgrade(map)) {
 2895: 			use_read_lock = 0;
 2896: 			goto Retry;
 2897: 		}
 2898: 		use_read_lock = 0;
 2899: 		stack_entry->avail_ssize = stack_entry->start - end;
 2900: 		rv = KERN_NO_SPACE;
 2901: 		goto done;
 2902: 	}
 2903: 
 2904: 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
 2905: 
 2906: 	/* If this is the main process stack, see if we're over the 
 2907: 	 * stack limit.
 2908: 	 */
 2909: 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2910: 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2911: 		rv = KERN_NO_SPACE;
 2912: 		goto done;
 2913: 	}
 2914: 
 2915: 	/* Round up the grow amount modulo SGROWSIZ */
 2916: 	grow_amount = roundup (grow_amount, sgrowsiz);
 2917: 	if (grow_amount > stack_entry->avail_ssize) {
 2918: 		grow_amount = stack_entry->avail_ssize;
 2919: 	}
 2920: 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2921: 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2922: 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
 2923: 		              ctob(vm->vm_ssize);
 2924: 	}
 2925: 
 2926: 	/* If we would blow our VMEM resource limit, no go */
 2927: 	if (map->size + grow_amount > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2928: 		rv = KERN_NO_SPACE;
 2929: 		goto done;
 2930: 	}
 2931: 
 2932: 	if (use_read_lock && vm_map_lock_upgrade(map)) {
 2933: 		use_read_lock = 0;
 2934: 		goto Retry;
 2935: 	}
 2936: 	use_read_lock = 0;
 2937: 
 2938: 	/* Get the preliminary new entry start value */
 2939: 	addr = stack_entry->start - grow_amount;
 2940: 
 2941: 	/* If this puts us into the previous entry, cut back our growth
 2942: 	 * to the available space.  Also, see the note above.
 2943: 	 */
 2944: 	if (addr < end) {
 2945: 		stack_entry->avail_ssize = stack_entry->start - end;
 2946: 		addr = end;
 2947: 	}
 2948: 
 2949: 	rv = vm_map_insert(map, &count,
 2950: 			   NULL, 0, addr, stack_entry->start,
 2951: 			   VM_PROT_ALL,
 2952: 			   VM_PROT_ALL,
 2953: 			   0);
 2954: 
 2955: 	/* Adjust the available stack space by the amount we grew. */
 2956: 	if (rv == KERN_SUCCESS) {
 2957: 		if (prev_entry != &map->header)
 2958: 			vm_map_clip_end(map, prev_entry, addr, &count);
 2959: 		new_stack_entry = prev_entry->next;
 2960: 		if (new_stack_entry->end   != stack_entry->start  ||
 2961: 		    new_stack_entry->start != addr)
 2962: 			panic ("Bad stack grow start/end in new stack entry");
 2963: 		else {
 2964: 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
 2965: 							(new_stack_entry->end -
 2966: 							 new_stack_entry->start);
 2967: 			if (is_procstack)
 2968: 				vm->vm_ssize += btoc(new_stack_entry->end -
 2969: 						     new_stack_entry->start);
 2970: 		}
 2971: 	}
 2972: 
 2973: done:
 2974: 	if (use_read_lock)
 2975: 		vm_map_unlock_read(map);
 2976: 	else
 2977: 		vm_map_unlock(map);
 2978: 	vm_map_entry_release(count);
 2979: 	return (rv);
 2980: }
 2981: 
 2982: /*
 2983:  * Unshare the specified VM space for exec.  If other processes are
 2984:  * mapped to it, then create a new one.  The new vmspace is null.
 2985:  */
 2986: 
 2987: void
 2988: vmspace_exec(struct proc *p, struct vmspace *vmcopy) 
 2989: {
 2990: 	struct vmspace *oldvmspace = p->p_vmspace;
 2991: 	struct vmspace *newvmspace;
 2992: 	vm_map_t map = &p->p_vmspace->vm_map;
 2993: 
 2994: 	/*
 2995: 	 * If we are execing a resident vmspace we fork it, otherwise
 2996: 	 * we create a new vmspace.  Note that exitingcnt and upcalls
 2997: 	 * are not copied to the new vmspace.
 2998: 	 */
 2999: 	if (vmcopy)  {
 3000: 	    newvmspace = vmspace_fork(vmcopy);
 3001: 	} else {
 3002: 	    newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
 3003: 	    bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
 3004: 		(caddr_t)&oldvmspace->vm_endcopy - 
 3005: 		    (caddr_t)&oldvmspace->vm_startcopy);
 3006: 	}
 3007: 
 3008: 	/*
 3009: 	 * This code is written like this for prototype purposes.  The
 3010: 	 * goal is to avoid running down the vmspace here, but let the
 3011: 	 * other process's that are still using the vmspace to finally
 3012: 	 * run it down.  Even though there is little or no chance of blocking
 3013: 	 * here, it is a good idea to keep this form for future mods.
 3014: 	 */
 3015: 	p->p_vmspace = newvmspace;
 3016: 	pmap_pinit2(vmspace_pmap(newvmspace));
 3017: 	if (p == curproc)
 3018: 		pmap_activate(p);
 3019: 	vmspace_free(oldvmspace);
 3020: }
 3021: 
 3022: /*
 3023:  * Unshare the specified VM space for forcing COW.  This
 3024:  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
 3025:  *
 3026:  * The exitingcnt test is not strictly necessary but has been
 3027:  * included for code sanity (to make the code a bit more deterministic).
 3028:  */
 3029: 
 3030: void
 3031: vmspace_unshare(struct proc *p) 
 3032: {
 3033: 	struct vmspace *oldvmspace = p->p_vmspace;
 3034: 	struct vmspace *newvmspace;
 3035: 
 3036: 	if (oldvmspace->vm_refcnt == 1 && oldvmspace->vm_exitingcnt == 0)
 3037: 		return;
 3038: 	newvmspace = vmspace_fork(oldvmspace);
 3039: 	p->p_vmspace = newvmspace;
 3040: 	pmap_pinit2(vmspace_pmap(newvmspace));
 3041: 	if (p == curproc)
 3042: 		pmap_activate(p);
 3043: 	vmspace_free(oldvmspace);
 3044: }
 3045: 
 3046: /*
 3047:  *	vm_map_lookup:
 3048:  *
 3049:  *	Finds the VM object, offset, and
 3050:  *	protection for a given virtual address in the
 3051:  *	specified map, assuming a page fault of the
 3052:  *	type specified.
 3053:  *
 3054:  *	Leaves the map in question locked for read; return
 3055:  *	values are guaranteed until a vm_map_lookup_done
 3056:  *	call is performed.  Note that the map argument
 3057:  *	is in/out; the returned map must be used in
 3058:  *	the call to vm_map_lookup_done.
 3059:  *
 3060:  *	A handle (out_entry) is returned for use in
 3061:  *	vm_map_lookup_done, to make that fast.
 3062:  *
 3063:  *	If a lookup is requested with "write protection"
 3064:  *	specified, the map may be changed to perform virtual
 3065:  *	copying operations, although the data referenced will
 3066:  *	remain the same.
 3067:  */
 3068: int
 3069: vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
 3070: 	      vm_offset_t vaddr,
 3071: 	      vm_prot_t fault_typea,
 3072: 	      vm_map_entry_t *out_entry,	/* OUT */
 3073: 	      vm_object_t *object,		/* OUT */
 3074: 	      vm_pindex_t *pindex,		/* OUT */
 3075: 	      vm_prot_t *out_prot,		/* OUT */
 3076: 	      boolean_t *wired)			/* OUT */
 3077: {
 3078: 	vm_map_entry_t entry;
 3079: 	vm_map_t map = *var_map;
 3080: 	vm_prot_t prot;
 3081: 	vm_prot_t fault_type = fault_typea;
 3082: 	int use_read_lock = 1;
 3083: 	int rv = KERN_SUCCESS;
 3084: 
 3085: RetryLookup:
 3086: 	if (use_read_lock)
 3087: 		vm_map_lock_read(map);
 3088: 	else
 3089: 		vm_map_lock(map);
 3090: 
 3091: 	/*
 3092: 	 * If the map has an interesting hint, try it before calling full
 3093: 	 * blown lookup routine.
 3094: 	 */
 3095: 	entry = map->hint;
 3096: 	*out_entry = entry;
 3097: 
 3098: 	if ((entry == &map->header) ||
 3099: 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
 3100: 		vm_map_entry_t tmp_entry;
 3101: 
 3102: 		/*
 3103: 		 * Entry was either not a valid hint, or the vaddr was not
 3104: 		 * contained in the entry, so do a full lookup.
 3105: 		 */
 3106: 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
 3107: 			rv = KERN_INVALID_ADDRESS;
 3108: 			goto done;
 3109: 		}
 3110: 
 3111: 		entry = tmp_entry;
 3112: 		*out_entry = entry;
 3113: 	}
 3114: 	
 3115: 	/*
 3116: 	 * Handle submaps.
 3117: 	 */
 3118: 
 3119: 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 3120: 		vm_map_t old_map = map;
 3121: 
 3122: 		*var_map = map = entry->object.sub_map;
 3123: 		if (use_read_lock)
 3124: 			vm_map_unlock_read(old_map);
 3125: 		else
 3126: 			vm_map_unlock(old_map);
 3127: 		use_read_lock = 1;
 3128: 		goto RetryLookup;
 3129: 	}
 3130: 
 3131: 	/*
 3132: 	 * Check whether this task is allowed to have this page.
 3133: 	 * Note the special case for MAP_ENTRY_COW
 3134: 	 * pages with an override.  This is to implement a forced
 3135: 	 * COW for debuggers.
 3136: 	 */
 3137: 
 3138: 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
 3139: 		prot = entry->max_protection;
 3140: 	else
 3141: 		prot = entry->protection;
 3142: 
 3143: 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
 3144: 	if ((fault_type & prot) != fault_type) {
 3145: 		rv = KERN_PROTECTION_FAILURE;
 3146: 		goto done;
 3147: 	}
 3148: 
 3149: 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
 3150: 	    (entry->eflags & MAP_ENTRY_COW) &&
 3151: 	    (fault_type & VM_PROT_WRITE) &&
 3152: 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
 3153: 		rv = KERN_PROTECTION_FAILURE;
 3154: 		goto done;
 3155: 	}
 3156: 
 3157: 	/*
 3158: 	 * If this page is not pageable, we have to get it for all possible
 3159: 	 * accesses.
 3160: 	 */
 3161: 
 3162: 	*wired = (entry->wired_count != 0);
 3163: 	if (*wired)
 3164: 		prot = fault_type = entry->protection;
 3165: 
 3166: 	/*
 3167: 	 * If the entry was copy-on-write, we either ...
 3168: 	 */
 3169: 
 3170: 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 3171: 		/*
 3172: 		 * If we want to write the page, we may as well handle that
 3173: 		 * now since we've got the map locked.
 3174: 		 *
 3175: 		 * If we don't need to write the page, we just demote the
 3176: 		 * permissions allowed.
 3177: 		 */
 3178: 
 3179: 		if (fault_type & VM_PROT_WRITE) {
 3180: 			/*
 3181: 			 * Make a new object, and place it in the object
 3182: 			 * chain.  Note that no new references have appeared
 3183: 			 * -- one just moved from the map to the new
 3184: 			 * object.
 3185: 			 */
 3186: 
 3187: 			if (use_read_lock && vm_map_lock_upgrade(map)) {
 3188: 				use_read_lock = 0;
 3189: 				goto RetryLookup;
 3190: 			}
 3191: 			use_read_lock = 0;
 3192: 
 3193: 			vm_object_shadow(
 3194: 			    &entry->object.vm_object,
 3195: 			    &entry->offset,
 3196: 			    atop(entry->end - entry->start));
 3197: 
 3198: 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 3199: 		} else {
 3200: 			/*
 3201: 			 * We're attempting to read a copy-on-write page --
 3202: 			 * don't allow writes.
 3203: 			 */
 3204: 
 3205: 			prot &= ~VM_PROT_WRITE;
 3206: 		}
 3207: 	}
 3208: 
 3209: 	/*
 3210: 	 * Create an object if necessary.
 3211: 	 */
 3212: 	if (entry->object.vm_object == NULL &&
 3213: 	    !map->system_map) {
 3214: 		if (use_read_lock && vm_map_lock_upgrade(map))  {
 3215: 			use_read_lock = 0;
 3216: 			goto RetryLookup;
 3217: 		}
 3218: 		use_read_lock = 0;
 3219: 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 3220: 		    atop(entry->end - entry->start));
 3221: 		entry->offset = 0;
 3222: 	}
 3223: 
 3224: 	/*
 3225: 	 * Return the object/offset from this entry.  If the entry was
 3226: 	 * copy-on-write or empty, it has been fixed up.
 3227: 	 */
 3228: 
 3229: 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 3230: 	*object = entry->object.vm_object;
 3231: 
 3232: 	/*
 3233: 	 * Return whether this is the only map sharing this data.  On
 3234: 	 * success we return with a read lock held on the map.  On failure
 3235: 	 * we return with the map unlocked.
 3236: 	 */
 3237: 	*out_prot = prot;
 3238: done:
 3239: 	if (rv == KERN_SUCCESS) {
 3240: 		if (use_read_lock == 0)
 3241: 			vm_map_lock_downgrade(map);
 3242: 	} else if (use_read_lock) {
 3243: 		vm_map_unlock_read(map);
 3244: 	} else {
 3245: 		vm_map_unlock(map);
 3246: 	}
 3247: 	return (rv);
 3248: }
 3249: 
 3250: /*
 3251:  *	vm_map_lookup_done:
 3252:  *
 3253:  *	Releases locks acquired by a vm_map_lookup
 3254:  *	(according to the handle returned by that lookup).
 3255:  */
 3256: 
 3257: void
 3258: vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count)
 3259: {
 3260: 	/*
 3261: 	 * Unlock the main-level map
 3262: 	 */
 3263: 	vm_map_unlock_read(map);
 3264: 	if (count)
 3265: 		vm_map_entry_release(count);
 3266: }
 3267: 
 3268: #ifdef ENABLE_VFS_IOOPT
 3269: 
 3270: /*
 3271:  * Implement uiomove with VM operations.  This handles (and collateral changes)
 3272:  * support every combination of source object modification, and COW type
 3273:  * operations.
 3274:  */
 3275: int
 3276: vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
 3277: 	vm_map_t mapa;
 3278: 	vm_object_t srcobject;
 3279: 	off_t cp;
 3280: 	int cnta;
 3281: 	vm_offset_t uaddra;
 3282: 	int *npages;
 3283: {
 3284: 	vm_map_t map;
 3285: 	vm_object_t first_object, oldobject, object;
 3286: 	vm_map_entry_t entry;
 3287: 	vm_prot_t prot;
 3288: 	boolean_t wired;
 3289: 	int tcnt, rv;
 3290: 	vm_offset_t uaddr, start, end, tend;
 3291: 	vm_pindex_t first_pindex, osize, oindex;
 3292: 	off_t ooffset;
 3293: 	int cnt;
 3294: 	int count;
 3295: 
 3296: 	if (npages)
 3297: 		*npages = 0;
 3298: 
 3299: 	cnt = cnta;
 3300: 	uaddr = uaddra;
 3301: 
 3302: 	while (cnt > 0) {
 3303: 		map = mapa;
 3304: 
 3305: 		count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
 3306: 
 3307: 		if ((vm_map_lookup(&map, uaddr,
 3308: 			VM_PROT_READ, &entry, &first_object,
 3309: 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
 3310: 			return EFAULT;
 3311: 		}
 3312: 
 3313: 		vm_map_clip_start(map, entry, uaddr, &count);
 3314: 
 3315: 		tcnt = cnt;
 3316: 		tend = uaddr + tcnt;
 3317: 		if (tend > entry->end) {
 3318: 			tcnt = entry->end - uaddr;
 3319: 			tend = entry->end;
 3320: 		}
 3321: 
 3322: 		vm_map_clip_end(map, entry, tend, &count);
 3323: 
 3324: 		start = entry->start;
 3325: 		end = entry->end;
 3326: 
 3327: 		osize = atop(tcnt);
 3328: 
 3329: 		oindex = OFF_TO_IDX(cp);
 3330: 		if (npages) {
 3331: 			vm_pindex_t idx;
 3332: 			for (idx = 0; idx < osize; idx++) {
 3333: 				vm_page_t m;
 3334: 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
 3335: 					vm_map_lookup_done(map, entry, count);
 3336: 					return 0;
 3337: 				}
 3338: 				/*
 3339: 				 * disallow busy or invalid pages, but allow
 3340: 				 * m->busy pages if they are entirely valid.
 3341: 				 */
 3342: 				if ((m->flags & PG_BUSY) ||
 3343: 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
 3344: 					vm_map_lookup_done(map, entry, count);
 3345: 					return 0;
 3346: 				}
 3347: 			}
 3348: 		}
 3349: 
 3350: /*
 3351:  * If we are changing an existing map entry, just redirect
 3352:  * the object, and change mappings.
 3353:  */
 3354: 		if ((first_object->type == OBJT_VNODE) &&
 3355: 			((oldobject = entry->object.vm_object) == first_object)) {
 3356: 
 3357: 			if ((entry->offset != cp) || (oldobject != srcobject)) {
 3358: 				/*
 3359:    				* Remove old window into the file
 3360:    				*/
 3361: 				pmap_remove (map->pmap, uaddr, tend);
 3362: 
 3363: 				/*
 3364:    				* Force copy on write for mmaped regions
 3365:    				*/
 3366: 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3367: 
 3368: 				/*
 3369:    				* Point the object appropriately
 3370:    				*/
 3371: 				if (oldobject != srcobject) {
 3372: 
 3373: 				/*
 3374:    				* Set the object optimization hint flag
 3375:    				*/
 3376: 					vm_object_set_flag(srcobject, OBJ_OPT);
 3377: 					vm_object_reference(srcobject);
 3378: 					entry->object.vm_object = srcobject;
 3379: 
 3380: 					if (oldobject) {
 3381: 						vm_object_deallocate(oldobject);
 3382: 					}
 3383: 				}
 3384: 
 3385: 				entry->offset = cp;
 3386: 				map->timestamp++;
 3387: 			} else {
 3388: 				pmap_remove (map->pmap, uaddr, tend);
 3389: 			}
 3390: 
 3391: 		} else if ((first_object->ref_count == 1) &&
 3392: 			(first_object->size == osize) &&
 3393: 			((first_object->type == OBJT_DEFAULT) ||
 3394: 				(first_object->type == OBJT_SWAP)) ) {
 3395: 
 3396: 			oldobject = first_object->backing_object;
 3397: 
 3398: 			if ((first_object->backing_object_offset != cp) ||
 3399: 				(oldobject != srcobject)) {
 3400: 				/*
 3401:    				* Remove old window into the file
 3402:    				*/
 3403: 				pmap_remove (map->pmap, uaddr, tend);
 3404: 
 3405: 				/*
 3406: 				 * Remove unneeded old pages
 3407: 				 */
 3408: 				vm_object_page_remove(first_object, 0, 0, 0);
 3409: 
 3410: 				/*
 3411: 				 * Invalidate swap space
 3412: 				 */
 3413: 				if (first_object->type == OBJT_SWAP) {
 3414: 					swap_pager_freespace(first_object,
 3415: 						0,
 3416: 						first_object->size);
 3417: 				}
 3418: 
 3419: 				/*
 3420:    				* Force copy on write for mmaped regions
 3421:    				*/
 3422: 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3423: 
 3424: 				/*
 3425:    				* Point the object appropriately
 3426:    				*/
 3427: 				if (oldobject != srcobject) {
 3428: 
 3429: 				/*
 3430:    				* Set the object optimization hint flag
 3431:    				*/
 3432: 					vm_object_set_flag(srcobject, OBJ_OPT);
 3433: 					vm_object_reference(srcobject);
 3434: 
 3435: 					if (oldobject) {
 3436: 						LIST_REMOVE(
 3437: 							first_object, shadow_list);
 3438: 						oldobject->shadow_count--;
 3439: 						/* XXX bump generation? */
 3440: 						vm_object_deallocate(oldobject);
 3441: 					}
 3442: 
 3443: 					LIST_INSERT_HEAD(&srcobject->shadow_head,
 3444: 						first_object, shadow_list);
 3445: 					srcobject->shadow_count++;
 3446: 					/* XXX bump generation? */
 3447: 
 3448: 					first_object->backing_object = srcobject;
 3449: 				}
 3450: 				first_object->backing_object_offset = cp;
 3451: 				map->timestamp++;
 3452: 			} else {
 3453: 				pmap_remove (map->pmap, uaddr, tend);
 3454: 			}
 3455: /*
 3456:  * Otherwise, we have to do a logical mmap.
 3457:  */
 3458: 		} else {
 3459: 
 3460: 			vm_object_set_flag(srcobject, OBJ_OPT);
 3461: 			vm_object_reference(srcobject);
 3462: 
 3463: 			pmap_remove (map->pmap, uaddr, tend);
 3464: 
 3465: 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3466: 			vm_map_lock_upgrade(map);
 3467: 
 3468: 			if (entry == &map->header) {
 3469: 				map->first_free = &map->header;
 3470: 			} else if (map->first_free->start >= start) {
 3471: 				map->first_free = entry->prev;
 3472: 			}
 3473: 
 3474: 			SAVE_HINT(map, entry->prev);
 3475: 			vm_map_entry_delete(map, entry, &count);
 3476: 
 3477: 			object = srcobject;
 3478: 			ooffset = cp;
 3479: 
 3480: 			rv = vm_map_insert(map, &count,
 3481: 				object, ooffset, start, tend,
 3482: 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
 3483: 
 3484: 			if (rv != KERN_SUCCESS)
 3485: 				panic("vm_uiomove: could not insert new entry: %d", rv);
 3486: 		}
 3487: 
 3488: /*
 3489:  * Map the window directly, if it is already in memory
 3490:  */
 3491: 		pmap_object_init_pt(map->pmap, uaddr,
 3492: 			srcobject, oindex, tcnt, 0);
 3493: 
 3494: 		map->timestamp++;
 3495: 		vm_map_unlock(map);
 3496: 		vm_map_entry_release(count);
 3497: 
 3498: 		cnt -= tcnt;
 3499: 		uaddr += tcnt;
 3500: 		cp += tcnt;
 3501: 		if (npages)
 3502: 			*npages += osize;
 3503: 	}
 3504: 	return 0;
 3505: }
 3506: 
 3507: #endif
 3508: 
 3509: /*
 3510:  * Performs the copy_on_write operations necessary to allow the virtual copies
 3511:  * into user space to work.  This has to be called for write(2) system calls
 3512:  * from other processes, file unlinking, and file size shrinkage.
 3513:  */
 3514: void
 3515: vm_freeze_copyopts(object, froma, toa)
 3516: 	vm_object_t object;
 3517: 	vm_pindex_t froma, toa;
 3518: {
 3519: 	int rv;
 3520: 	vm_object_t robject;
 3521: 	vm_pindex_t idx;
 3522: 
 3523: 	if ((object == NULL) ||
 3524: 		((object->flags & OBJ_OPT) == 0))
 3525: 		return;
 3526: 
 3527: 	if (object->shadow_count > object->ref_count)
 3528: 		panic("vm_freeze_copyopts: sc > rc");
 3529: 
 3530: 	while((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
 3531: 		vm_pindex_t bo_pindex;
 3532: 		vm_page_t m_in, m_out;
 3533: 
 3534: 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
 3535: 
 3536: 		vm_object_reference(robject);
 3537: 
 3538: 		vm_object_pip_wait(robject, "objfrz");
 3539: 
 3540: 		if (robject->ref_count == 1) {
 3541: 			vm_object_deallocate(robject);
 3542: 			continue;
 3543: 		}
 3544: 
 3545: 		vm_object_pip_add(robject, 1);
 3546: 
 3547: 		for (idx = 0; idx < robject->size; idx++) {
 3548: 
 3549: 			m_out = vm_page_grab(robject, idx,
 3550: 					    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3551: 
 3552: 			if (m_out->valid == 0) {
 3553: 				m_in = vm_page_grab(object, bo_pindex + idx,
 3554: 					    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3555: 				if (m_in->valid == 0) {
 3556: 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
 3557: 					if (rv != VM_PAGER_OK) {
 3558: 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
 3559: 						continue;
 3560: 					}
 3561: 					vm_page_deactivate(m_in);
 3562: 				}
 3563: 
 3564: 				vm_page_protect(m_in, VM_PROT_NONE);
 3565: 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
 3566: 				m_out->valid = m_in->valid;
 3567: 				vm_page_dirty(m_out);
 3568: 				vm_page_activate(m_out);
 3569: 				vm_page_wakeup(m_in);
 3570: 			}
 3571: 			vm_page_wakeup(m_out);
 3572: 		}
 3573: 
 3574: 		object->shadow_count--;
 3575: 		object->ref_count--;
 3576: 		LIST_REMOVE(robject, shadow_list);
 3577: 		robject->backing_object = NULL;
 3578: 		robject->backing_object_offset = 0;
 3579: 
 3580: 		vm_object_pip_wakeup(robject);
 3581: 		vm_object_deallocate(robject);
 3582: 	}
 3583: 
 3584: 	vm_object_clear_flag(object, OBJ_OPT);
 3585: }
 3586: 
 3587: #include "opt_ddb.h"
 3588: #ifdef DDB
 3589: #include <sys/kernel.h>
 3590: 
 3591: #include <ddb/ddb.h>
 3592: 
 3593: /*
 3594:  *	vm_map_print:	[ debug ]
 3595:  */
 3596: DB_SHOW_COMMAND(map, vm_map_print)
 3597: {
 3598: 	static int nlines;
 3599: 	/* XXX convert args. */
 3600: 	vm_map_t map = (vm_map_t)addr;
 3601: 	boolean_t full = have_addr;
 3602: 
 3603: 	vm_map_entry_t entry;
 3604: 
 3605: 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 3606: 	    (void *)map,
 3607: 	    (void *)map->pmap, map->nentries, map->timestamp);
 3608: 	nlines++;
 3609: 
 3610: 	if (!full && db_indent)
 3611: 		return;
 3612: 
 3613: 	db_indent += 2;
 3614: 	for (entry = map->header.next; entry != &map->header;
 3615: 	    entry = entry->next) {
 3616: 		db_iprintf("map entry %p: start=%p, end=%p\n",
 3617: 		    (void *)entry, (void *)entry->start, (void *)entry->end);
 3618: 		nlines++;
 3619: 		{
 3620: 			static char *inheritance_name[4] =
 3621: 			{"share", "copy", "none", "donate_copy"};
 3622: 
 3623: 			db_iprintf(" prot=%x/%x/%s",
 3624: 			    entry->protection,
 3625: 			    entry->max_protection,
 3626: 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
 3627: 			if (entry->wired_count != 0)
 3628: 				db_printf(", wired");
 3629: 		}
 3630: 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 3631: 			/* XXX no %qd in kernel.  Truncate entry->offset. */
 3632: 			db_printf(", share=%p, offset=0x%lx\n",
 3633: 			    (void *)entry->object.sub_map,
 3634: 			    (long)entry->offset);
 3635: 			nlines++;
 3636: 			if ((entry->prev == &map->header) ||
 3637: 			    (entry->prev->object.sub_map !=
 3638: 				entry->object.sub_map)) {
 3639: 				db_indent += 2;
 3640: 				vm_map_print((db_expr_t)(intptr_t)
 3641: 					     entry->object.sub_map,
 3642: 					     full, 0, (char *)0);
 3643: 				db_indent -= 2;
 3644: 			}
 3645: 		} else {
 3646: 			/* XXX no %qd in kernel.  Truncate entry->offset. */
 3647: 			db_printf(", object=%p, offset=0x%lx",
 3648: 			    (void *)entry->object.vm_object,
 3649: 			    (long)entry->offset);
 3650: 			if (entry->eflags & MAP_ENTRY_COW)
 3651: 				db_printf(", copy (%s)",
 3652: 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 3653: 			db_printf("\n");
 3654: 			nlines++;
 3655: 
 3656: 			if ((entry->prev == &map->header) ||
 3657: 			    (entry->prev->object.vm_object !=
 3658: 				entry->object.vm_object)) {
 3659: 				db_indent += 2;
 3660: 				vm_object_print((db_expr_t)(intptr_t)
 3661: 						entry->object.vm_object,
 3662: 						full, 0, (char *)0);
 3663: 				nlines += 4;
 3664: 				db_indent -= 2;
 3665: 			}
 3666: 		}
 3667: 	}
 3668: 	db_indent -= 2;
 3669: 	if (db_indent == 0)
 3670: 		nlines = 0;
 3671: }
 3672: 
 3673: 
 3674: DB_SHOW_COMMAND(procvm, procvm)
 3675: {
 3676: 	struct proc *p;
 3677: 
 3678: 	if (have_addr) {
 3679: 		p = (struct proc *) addr;
 3680: 	} else {
 3681: 		p = curproc;
 3682: 	}
 3683: 
 3684: 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 3685: 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 3686: 	    (void *)vmspace_pmap(p->p_vmspace));
 3687: 
 3688: 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
 3689: }
 3690: 
 3691: #endif /* DDB */