File:  [DragonFly] / src / sys / i386 / i386 / Attic / pmap.c
Revision 1.32: download - view: text, annotated - select for diffs
Mon Mar 1 06:33:16 2004 UTC (10 years, 4 months ago) by dillon
Branches: MAIN
CVS tags: HEAD
Newtoken commit.  Change the token implementation as follows:  (1) Obtaining
a token no longer enters a critical section.  (2) tokens can be held through
schedular switches and blocking conditions and are effectively released and
reacquired on resume.  Thus tokens serialize access only while the thread
is actually running.  Serialization is not broken by preemptive interrupts.
That is, interrupt threads which preempt do no release the preempted thread's
tokens.  (3) Unlike spl's, tokens will interlock w/ interrupt threads on
the same or on a different cpu.

The vnode interlock code has been rewritten and the API has changed.  The
mountlist vnode scanning code has been consolidated and all known races have
been fixed.  The vnode interlock is now a pool token.

The code that frees unreferenced vnodes whos last VM page has been freed has
been moved out of the low level vm_page_free() code and moved to the
periodic filesystem sycer code in vfs_msycn().

The SMP startup code and the IPI code has been cleaned up considerably.
Certain early token interactions on AP cpus have been moved to the BSP.

The LWKT rwlock API has been cleaned up and turned on.

Major testing by: David Rhodus

    1: /*
    2:  * Copyright (c) 1991 Regents of the University of California.
    3:  * All rights reserved.
    4:  * Copyright (c) 1994 John S. Dyson
    5:  * All rights reserved.
    6:  * Copyright (c) 1994 David Greenman
    7:  * All rights reserved.
    8:  *
    9:  * This code is derived from software contributed to Berkeley by
   10:  * the Systems Programming Group of the University of Utah Computer
   11:  * Science Department and William Jolitz of UUNET Technologies Inc.
   12:  *
   13:  * Redistribution and use in source and binary forms, with or without
   14:  * modification, are permitted provided that the following conditions
   15:  * are met:
   16:  * 1. Redistributions of source code must retain the above copyright
   17:  *    notice, this list of conditions and the following disclaimer.
   18:  * 2. Redistributions in binary form must reproduce the above copyright
   19:  *    notice, this list of conditions and the following disclaimer in the
   20:  *    documentation and/or other materials provided with the distribution.
   21:  * 3. All advertising materials mentioning features or use of this software
   22:  *    must display the following acknowledgement:
   23:  *	This product includes software developed by the University of
   24:  *	California, Berkeley and its contributors.
   25:  * 4. Neither the name of the University nor the names of its contributors
   26:  *    may be used to endorse or promote products derived from this software
   27:  *    without specific prior written permission.
   28:  *
   29:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39:  * SUCH DAMAGE.
   40:  *
   41:  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
   42:  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
   43:  * $DragonFly: src/sys/i386/i386/pmap.c,v 1.32 2004/03/01 06:33:16 dillon Exp $
   44:  */
   45: 
   46: /*
   47:  *	Manages physical address maps.
   48:  *
   49:  *	In addition to hardware address maps, this
   50:  *	module is called upon to provide software-use-only
   51:  *	maps which may or may not be stored in the same
   52:  *	form as hardware maps.  These pseudo-maps are
   53:  *	used to store intermediate results from copy
   54:  *	operations to and from address spaces.
   55:  *
   56:  *	Since the information managed by this module is
   57:  *	also stored by the logical address mapping module,
   58:  *	this module may throw away valid virtual-to-physical
   59:  *	mappings at almost any time.  However, invalidations
   60:  *	of virtual-to-physical mappings must be done as
   61:  *	requested.
   62:  *
   63:  *	In order to cope with hardware architectures which
   64:  *	make virtual-to-physical map invalidates expensive,
   65:  *	this module may delay invalidate or reduced protection
   66:  *	operations until such time as they are actually
   67:  *	necessary.  This module is given full information as
   68:  *	to which processors are currently using which maps,
   69:  *	and to when physical maps must be made correct.
   70:  */
   71: 
   72: #include "opt_disable_pse.h"
   73: #include "opt_pmap.h"
   74: #include "opt_msgbuf.h"
   75: 
   76: #include <sys/param.h>
   77: #include <sys/systm.h>
   78: #include <sys/kernel.h>
   79: #include <sys/proc.h>
   80: #include <sys/msgbuf.h>
   81: #include <sys/vmmeter.h>
   82: #include <sys/mman.h>
   83: 
   84: #include <vm/vm.h>
   85: #include <vm/vm_param.h>
   86: #include <sys/sysctl.h>
   87: #include <sys/lock.h>
   88: #include <vm/vm_kern.h>
   89: #include <vm/vm_page.h>
   90: #include <vm/vm_map.h>
   91: #include <vm/vm_object.h>
   92: #include <vm/vm_extern.h>
   93: #include <vm/vm_pageout.h>
   94: #include <vm/vm_pager.h>
   95: #include <vm/vm_zone.h>
   96: 
   97: #include <sys/user.h>
   98: #include <sys/thread2.h>
   99: 
  100: #include <machine/cputypes.h>
  101: #include <machine/md_var.h>
  102: #include <machine/specialreg.h>
  103: #if defined(SMP) || defined(APIC_IO)
  104: #include <machine/smp.h>
  105: #include <machine/apicreg.h>
  106: #endif /* SMP || APIC_IO */
  107: #include <machine/globaldata.h>
  108: #include <machine/pmap.h>
  109: #include <machine/pmap_inval.h>
  110: 
  111: #define PMAP_KEEP_PDIRS
  112: #ifndef PMAP_SHPGPERPROC
  113: #define PMAP_SHPGPERPROC 200
  114: #endif
  115: 
  116: #if defined(DIAGNOSTIC)
  117: #define PMAP_DIAGNOSTIC
  118: #endif
  119: 
  120: #define MINPV 2048
  121: 
  122: #if !defined(PMAP_DIAGNOSTIC)
  123: #define PMAP_INLINE __inline
  124: #else
  125: #define PMAP_INLINE
  126: #endif
  127: 
  128: /*
  129:  * Get PDEs and PTEs for user/kernel address space
  130:  */
  131: #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  132: #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  133: 
  134: #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
  135: #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
  136: #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
  137: #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
  138: #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
  139: 
  140: 
  141: /*
  142:  * Given a map and a machine independent protection code,
  143:  * convert to a vax protection code.
  144:  */
  145: #define pte_prot(m, p)	(protection_codes[p])
  146: static int protection_codes[8];
  147: 
  148: static struct pmap kernel_pmap_store;
  149: pmap_t kernel_pmap;
  150: 
  151: vm_paddr_t avail_start;	/* PA of first available physical page */
  152: vm_paddr_t avail_end;		/* PA of last available physical page */
  153: vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
  154: vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
  155: static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
  156: static int pgeflag;		/* PG_G or-in */
  157: static int pseflag;		/* PG_PS or-in */
  158: 
  159: static vm_object_t kptobj;
  160: 
  161: static int nkpt;
  162: vm_offset_t kernel_vm_end;
  163: 
  164: /*
  165:  * Data for the pv entry allocation mechanism
  166:  */
  167: static vm_zone_t pvzone;
  168: static struct vm_zone pvzone_store;
  169: static struct vm_object pvzone_obj;
  170: static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
  171: static int pmap_pagedaemon_waken = 0;
  172: static struct pv_entry *pvinit;
  173: 
  174: /*
  175:  * All those kernel PT submaps that BSD is so fond of
  176:  */
  177: pt_entry_t *CMAP1 = 0, *ptmmap;
  178: caddr_t CADDR1 = 0, ptvmmap = 0;
  179: static pt_entry_t *msgbufmap;
  180: struct msgbuf *msgbufp=0;
  181: 
  182: /*
  183:  * Crashdump maps.
  184:  */
  185: static pt_entry_t *pt_crashdumpmap;
  186: static caddr_t crashdumpmap;
  187: 
  188: extern pt_entry_t *SMPpt;
  189: 
  190: static PMAP_INLINE void	free_pv_entry (pv_entry_t pv);
  191: static unsigned * get_ptbase (pmap_t pmap);
  192: static pv_entry_t get_pv_entry (void);
  193: static void	i386_protection_init (void);
  194: static __inline void	pmap_changebit (vm_page_t m, int bit, boolean_t setem);
  195: 
  196: static void	pmap_remove_all (vm_page_t m);
  197: static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va,
  198: 				      vm_page_t m, vm_page_t mpte);
  199: static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, 
  200: 				vm_offset_t sva, pmap_inval_info_t info);
  201: static void pmap_remove_page (struct pmap *pmap, 
  202: 				vm_offset_t va, pmap_inval_info_t info);
  203: static int pmap_remove_entry (struct pmap *pmap, vm_page_t m,
  204: 				vm_offset_t va, pmap_inval_info_t info);
  205: static boolean_t pmap_testbit (vm_page_t m, int bit);
  206: static void pmap_insert_entry (pmap_t pmap, vm_offset_t va,
  207: 		vm_page_t mpte, vm_page_t m);
  208: 
  209: static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va);
  210: 
  211: static int pmap_release_free_page (pmap_t pmap, vm_page_t p);
  212: static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex);
  213: static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va);
  214: static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex);
  215: static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t);
  216: static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  217: 
  218: static unsigned pdir4mb;
  219: 
  220: /*
  221:  * Move the kernel virtual free pointer to the next
  222:  * 4MB.  This is used to help improve performance
  223:  * by using a large (4MB) page for much of the kernel
  224:  * (.text, .data, .bss)
  225:  */
  226: static vm_offset_t
  227: pmap_kmem_choose(vm_offset_t addr)
  228: {
  229: 	vm_offset_t newaddr = addr;
  230: #ifndef DISABLE_PSE
  231: 	if (cpu_feature & CPUID_PSE) {
  232: 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
  233: 	}
  234: #endif
  235: 	return newaddr;
  236: }
  237: 
  238: /*
  239:  * pmap_pte:
  240:  *
  241:  *	Extract the page table entry associated with the given map/virtual
  242:  *	pair.
  243:  *
  244:  *	This function may NOT be called from an interrupt.
  245:  */
  246: PMAP_INLINE unsigned *
  247: pmap_pte(pmap_t pmap, vm_offset_t va)
  248: {
  249: 	unsigned *pdeaddr;
  250: 
  251: 	if (pmap) {
  252: 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
  253: 		if (*pdeaddr & PG_PS)
  254: 			return pdeaddr;
  255: 		if (*pdeaddr) {
  256: 			return get_ptbase(pmap) + i386_btop(va);
  257: 		}
  258: 	}
  259: 	return (0);
  260: }
  261: 
  262: /*
  263:  * pmap_pte_quick:
  264:  *
  265:  *	Super fast pmap_pte routine best used when scanning the pv lists.
  266:  *	This eliminates many course-grained invltlb calls.  Note that many of
  267:  *	the pv list scans are across different pmaps and it is very wasteful
  268:  *	to do an entire invltlb when checking a single mapping.
  269:  *
  270:  *	Should only be called while splvm() is held or from a critical
  271:  *	section.
  272:  */
  273: static unsigned * 
  274: pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  275: {
  276: 	struct mdglobaldata *gd = mdcpu;
  277: 	unsigned pde, newpf;
  278: 
  279: 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
  280: 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  281: 		unsigned index = i386_btop(va);
  282: 		/* are we current address space or kernel? */
  283: 		if ((pmap == kernel_pmap) ||
  284: 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
  285: 			return (unsigned *) PTmap + index;
  286: 		}
  287: 		newpf = pde & PG_FRAME;
  288: 		if ( ((* (unsigned *) gd->gd_PMAP1) & PG_FRAME) != newpf) {
  289: 			* (unsigned *) gd->gd_PMAP1 = newpf | PG_RW | PG_V;
  290: 			cpu_invlpg(gd->gd_PADDR1);
  291: 		}
  292: 		return gd->gd_PADDR1 + ((unsigned) index & (NPTEPG - 1));
  293: 	}
  294: 	return (0);
  295: }
  296: 
  297: 
  298: /*
  299:  *	Bootstrap the system enough to run with virtual memory.
  300:  *
  301:  *	On the i386 this is called after mapping has already been enabled
  302:  *	and just syncs the pmap module with what has already been done.
  303:  *	[We can't call it easily with mapping off since the kernel is not
  304:  *	mapped with PA == VA, hence we would have to relocate every address
  305:  *	from the linked base (virtual) address "KERNBASE" to the actual
  306:  *	(physical) address starting relative to 0]
  307:  */
  308: void
  309: pmap_bootstrap(firstaddr, loadaddr)
  310: 	vm_paddr_t firstaddr;
  311: 	vm_paddr_t loadaddr;
  312: {
  313: 	vm_offset_t va;
  314: 	pt_entry_t *pte;
  315: 	struct mdglobaldata *gd;
  316: 	int i;
  317: 
  318: 	avail_start = firstaddr;
  319: 
  320: 	/*
  321: 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  322: 	 * large. It should instead be correctly calculated in locore.s and
  323: 	 * not based on 'first' (which is a physical address, not a virtual
  324: 	 * address, for the start of unused physical memory). The kernel
  325: 	 * page tables are NOT double mapped and thus should not be included
  326: 	 * in this calculation.
  327: 	 */
  328: 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  329: 	virtual_avail = pmap_kmem_choose(virtual_avail);
  330: 
  331: 	virtual_end = VM_MAX_KERNEL_ADDRESS;
  332: 
  333: 	/*
  334: 	 * Initialize protection array.
  335: 	 */
  336: 	i386_protection_init();
  337: 
  338: 	/*
  339: 	 * The kernel's pmap is statically allocated so we don't have to use
  340: 	 * pmap_create, which is unlikely to work correctly at this part of
  341: 	 * the boot sequence (XXX and which no longer exists).
  342: 	 */
  343: 	kernel_pmap = &kernel_pmap_store;
  344: 
  345: 	kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD);
  346: 	kernel_pmap->pm_count = 1;
  347: 	kernel_pmap->pm_active = (cpumask_t)-1;	/* don't allow deactivation */
  348: 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
  349: 	nkpt = NKPT;
  350: 
  351: 	/*
  352: 	 * Reserve some special page table entries/VA space for temporary
  353: 	 * mapping of pages.
  354: 	 */
  355: #define	SYSMAP(c, p, v, n)	\
  356: 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  357: 
  358: 	va = virtual_avail;
  359: 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
  360: 
  361: 	/*
  362: 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
  363: 	 */
  364: 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  365: 
  366: 	/*
  367: 	 * Crashdump maps.
  368: 	 */
  369: 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
  370: 
  371: 	/*
  372: 	 * ptvmmap is used for reading arbitrary physical pages via
  373: 	 * /dev/mem.
  374: 	 */
  375: 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
  376: 
  377: 	/*
  378: 	 * msgbufp is used to map the system message buffer.
  379: 	 * XXX msgbufmap is not used.
  380: 	 */
  381: 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
  382: 	       atop(round_page(MSGBUF_SIZE)))
  383: 
  384: 	virtual_avail = va;
  385: 
  386: 	*(int *) CMAP1 = 0;
  387: 	for (i = 0; i < NKPT; i++)
  388: 		PTD[i] = 0;
  389: 
  390: 	/*
  391: 	 * PG_G is terribly broken on SMP because we IPI invltlb's in some
  392: 	 * cases rather then invl1pg.  Actually, I don't even know why it
  393: 	 * works under UP because self-referential page table mappings
  394: 	 */
  395: #ifdef SMP
  396: 	pgeflag = 0;
  397: #else
  398: 	if (cpu_feature & CPUID_PGE)
  399: 		pgeflag = PG_G;
  400: #endif
  401: 	
  402: /*
  403:  * Initialize the 4MB page size flag
  404:  */
  405: 	pseflag = 0;
  406: /*
  407:  * The 4MB page version of the initial
  408:  * kernel page mapping.
  409:  */
  410: 	pdir4mb = 0;
  411: 
  412: #if !defined(DISABLE_PSE)
  413: 	if (cpu_feature & CPUID_PSE) {
  414: 		unsigned ptditmp;
  415: 		/*
  416: 		 * Note that we have enabled PSE mode
  417: 		 */
  418: 		pseflag = PG_PS;
  419: 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
  420: 		ptditmp &= ~(NBPDR - 1);
  421: 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
  422: 		pdir4mb = ptditmp;
  423: 
  424: #ifndef SMP
  425: 		/*
  426: 		 * Enable the PSE mode.  If we are SMP we can't do this
  427: 		 * now because the APs will not be able to use it when
  428: 		 * they boot up.
  429: 		 */
  430: 		load_cr4(rcr4() | CR4_PSE);
  431: 
  432: 		/*
  433: 		 * We can do the mapping here for the single processor
  434: 		 * case.  We simply ignore the old page table page from
  435: 		 * now on.
  436: 		 */
  437: 		/*
  438: 		 * For SMP, we still need 4K pages to bootstrap APs,
  439: 		 * PSE will be enabled as soon as all APs are up.
  440: 		 */
  441: 		PTD[KPTDI] = (pd_entry_t)ptditmp;
  442: 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t)ptditmp;
  443: 		cpu_invltlb();
  444: #endif
  445: 	}
  446: #endif
  447: #ifdef APIC_IO
  448: 	if (cpu_apic_address == 0)
  449: 		panic("pmap_bootstrap: no local apic!");
  450: 
  451: 	/* local apic is mapped on last page */
  452: 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
  453: 	    (cpu_apic_address & PG_FRAME));
  454: #endif
  455: 
  456: 	/* BSP does this itself, AP's get it pre-set */
  457: 	gd = &CPU_prvspace[0].mdglobaldata;
  458: 	gd->gd_CMAP1 = &SMPpt[1];
  459: 	gd->gd_CMAP2 = &SMPpt[2];
  460: 	gd->gd_CMAP3 = &SMPpt[3];
  461: 	gd->gd_PMAP1 = &SMPpt[4];
  462: 	gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1;
  463: 	gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2;
  464: 	gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3;
  465: 	gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1;
  466: 
  467: 	cpu_invltlb();
  468: }
  469: 
  470: #ifdef SMP
  471: /*
  472:  * Set 4mb pdir for mp startup
  473:  */
  474: void
  475: pmap_set_opt(void)
  476: {
  477: 	if (pseflag && (cpu_feature & CPUID_PSE)) {
  478: 		load_cr4(rcr4() | CR4_PSE);
  479: 		if (pdir4mb && mycpu->gd_cpuid == 0) {	/* only on BSP */
  480: 			kernel_pmap->pm_pdir[KPTDI] =
  481: 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
  482: 			cpu_invltlb();
  483: 		}
  484: 	}
  485: }
  486: #endif
  487: 
  488: /*
  489:  *	Initialize the pmap module.
  490:  *	Called by vm_init, to initialize any structures that the pmap
  491:  *	system needs to map virtual memory.
  492:  *	pmap_init has been enhanced to support in a fairly consistant
  493:  *	way, discontiguous physical memory.
  494:  */
  495: void
  496: pmap_init(phys_start, phys_end)
  497: 	vm_paddr_t phys_start, phys_end;
  498: {
  499: 	int i;
  500: 	int initial_pvs;
  501: 
  502: 	/*
  503: 	 * object for kernel page table pages
  504: 	 */
  505: 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
  506: 
  507: 	/*
  508: 	 * Allocate memory for random pmap data structures.  Includes the
  509: 	 * pv_head_table.
  510: 	 */
  511: 
  512: 	for(i = 0; i < vm_page_array_size; i++) {
  513: 		vm_page_t m;
  514: 
  515: 		m = &vm_page_array[i];
  516: 		TAILQ_INIT(&m->md.pv_list);
  517: 		m->md.pv_list_count = 0;
  518: 	}
  519: 
  520: 	/*
  521: 	 * init the pv free list
  522: 	 */
  523: 	initial_pvs = vm_page_array_size;
  524: 	if (initial_pvs < MINPV)
  525: 		initial_pvs = MINPV;
  526: 	pvzone = &pvzone_store;
  527: 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
  528: 		initial_pvs * sizeof (struct pv_entry));
  529: 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
  530: 	    vm_page_array_size);
  531: 
  532: 	/*
  533: 	 * Now it is safe to enable pv_table recording.
  534: 	 */
  535: 	pmap_initialized = TRUE;
  536: }
  537: 
  538: /*
  539:  * Initialize the address space (zone) for the pv_entries.  Set a
  540:  * high water mark so that the system can recover from excessive
  541:  * numbers of pv entries.
  542:  */
  543: void
  544: pmap_init2()
  545: {
  546: 	int shpgperproc = PMAP_SHPGPERPROC;
  547: 
  548: 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  549: 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  550: 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  551: 	pv_entry_high_water = 9 * (pv_entry_max / 10);
  552: 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
  553: }
  554: 
  555: 
  556: /***************************************************
  557:  * Low level helper routines.....
  558:  ***************************************************/
  559: 
  560: #if defined(PMAP_DIAGNOSTIC)
  561: 
  562: /*
  563:  * This code checks for non-writeable/modified pages.
  564:  * This should be an invalid condition.
  565:  */
  566: static int
  567: pmap_nw_modified(pt_entry_t ptea)
  568: {
  569: 	int pte;
  570: 
  571: 	pte = (int) ptea;
  572: 
  573: 	if ((pte & (PG_M|PG_RW)) == PG_M)
  574: 		return 1;
  575: 	else
  576: 		return 0;
  577: }
  578: #endif
  579: 
  580: 
  581: /*
  582:  * this routine defines the region(s) of memory that should
  583:  * not be tested for the modified bit.
  584:  */
  585: static PMAP_INLINE int
  586: pmap_track_modified(vm_offset_t va)
  587: {
  588: 	if ((va < clean_sva) || (va >= clean_eva)) 
  589: 		return 1;
  590: 	else
  591: 		return 0;
  592: }
  593: 
  594: static unsigned *
  595: get_ptbase(pmap_t pmap)
  596: {
  597: 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  598: 	struct globaldata *gd = mycpu;
  599: 
  600: 	/* are we current address space or kernel? */
  601: 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
  602: 		return (unsigned *) PTmap;
  603: 	}
  604: 
  605: 	/* otherwise, we are alternate address space */
  606: 	KKASSERT(gd->gd_intr_nesting_level == 0 && (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0);
  607: 
  608: 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
  609: 		APTDpde = (pd_entry_t)(frame | PG_RW | PG_V);
  610: 		/* The page directory is not shared between CPUs */
  611: 		cpu_invltlb();
  612: 	}
  613: 	return (unsigned *) APTmap;
  614: }
  615: 
  616: /*
  617:  * pmap_extract:
  618:  *
  619:  *	Extract the physical page address associated with the map/VA pair.
  620:  *
  621:  *	This function may not be called from an interrupt if the pmap is
  622:  *	not kernel_pmap.
  623:  */
  624: vm_paddr_t 
  625: pmap_extract(pmap_t pmap, vm_offset_t va)
  626: {
  627: 	vm_offset_t rtval;
  628: 	vm_offset_t pdirindex;
  629: 
  630: 	pdirindex = va >> PDRSHIFT;
  631: 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
  632: 		unsigned *pte;
  633: 		if ((rtval & PG_PS) != 0) {
  634: 			rtval &= ~(NBPDR - 1);
  635: 			rtval |= va & (NBPDR - 1);
  636: 			return rtval;
  637: 		}
  638: 		pte = get_ptbase(pmap) + i386_btop(va);
  639: 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  640: 		return rtval;
  641: 	}
  642: 	return 0;
  643: }
  644: 
  645: /*
  646:  * Extract user accessible page only, return NULL if the page is not
  647:  * present or if it's current state is not sufficient.  Caller will
  648:  * generally call vm_fault() on failure and try again.
  649:  */
  650: vm_page_t
  651: pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot)
  652: {
  653: 	vm_offset_t rtval;
  654: 	vm_offset_t pdirindex;
  655: 
  656: 	pdirindex = va >> PDRSHIFT;
  657: 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
  658: 		unsigned *pte;
  659: 		vm_page_t m;
  660: 
  661: 		if ((rtval & PG_PS) != 0) {
  662: 			if ((rtval & (PG_V|PG_U)) != (PG_V|PG_U))
  663: 				return (NULL);
  664: 			if ((prot & VM_PROT_WRITE) && (rtval & PG_RW) == 0)
  665: 				return (NULL);
  666: 			rtval &= ~(NBPDR - 1);
  667: 			rtval |= va & (NBPDR - 1);
  668: 			m = PHYS_TO_VM_PAGE(rtval);
  669: 		} else {
  670: 			pte = get_ptbase(pmap) + i386_btop(va);
  671: 			if ((*pte & (PG_V|PG_U)) != (PG_V|PG_U))
  672: 				return (NULL);
  673: 			if ((prot & VM_PROT_WRITE) && (*pte & PG_RW) == 0)
  674: 				return (NULL);
  675: 			rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  676: 			m = PHYS_TO_VM_PAGE(rtval);
  677: 		}
  678: 		return(m);
  679: 	}
  680: 	return (NULL);
  681: }
  682: 
  683: /***************************************************
  684:  * Low level mapping routines.....
  685:  ***************************************************/
  686: 
  687: /*
  688:  * add a wired page to the kva
  689:  * note that in order for the mapping to take effect -- you
  690:  * should do a invltlb after doing the pmap_kenter...
  691:  */
  692: PMAP_INLINE void 
  693: pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  694: {
  695: 	unsigned *pte;
  696: 	unsigned npte;
  697: 	pmap_inval_info info;
  698: 
  699: 	pmap_inval_init(&info);
  700: 	pmap_inval_add(&info, kernel_pmap, va);
  701: 	npte = pa | PG_RW | PG_V | pgeflag;
  702: 	pte = (unsigned *)vtopte(va);
  703: 	*pte = npte;
  704: 	pmap_inval_flush(&info);
  705: }
  706: 
  707: /*
  708:  * remove a page from the kernel pagetables
  709:  */
  710: PMAP_INLINE void
  711: pmap_kremove(vm_offset_t va)
  712: {
  713: 	unsigned *pte;
  714: 	pmap_inval_info info;
  715: 
  716: 	pmap_inval_init(&info);
  717: 	pmap_inval_add(&info, kernel_pmap, va);
  718: 	pte = (unsigned *)vtopte(va);
  719: 	*pte = 0;
  720: 	pmap_inval_flush(&info);
  721: }
  722: 
  723: /*
  724:  *	Used to map a range of physical addresses into kernel
  725:  *	virtual address space.
  726:  *
  727:  *	For now, VM is already on, we only need to map the
  728:  *	specified memory.
  729:  */
  730: vm_offset_t
  731: pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
  732: {
  733: 	while (start < end) {
  734: 		pmap_kenter(virt, start);
  735: 		virt += PAGE_SIZE;
  736: 		start += PAGE_SIZE;
  737: 	}
  738: 	return (virt);
  739: }
  740: 
  741: 
  742: /*
  743:  * Add a list of wired pages to the kva
  744:  * this routine is only used for temporary
  745:  * kernel mappings that do not need to have
  746:  * page modification or references recorded.
  747:  * Note that old mappings are simply written
  748:  * over.  The page *must* be wired.
  749:  */
  750: void
  751: pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
  752: {
  753: 	vm_offset_t end_va;
  754: 
  755: 	end_va = va + count * PAGE_SIZE;
  756: 		
  757: 	while (va < end_va) {
  758: 		unsigned *pte;
  759: 
  760: 		pte = (unsigned *)vtopte(va);
  761: 		*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
  762: 		cpu_invlpg((void *)va);
  763: 		va += PAGE_SIZE;
  764: 		m++;
  765: 	}
  766: #ifdef SMP
  767: 	smp_invltlb();	/* XXX */
  768: #endif
  769: }
  770: 
  771: /*
  772:  * this routine jerks page mappings from the
  773:  * kernel -- it is meant only for temporary mappings.
  774:  */
  775: void
  776: pmap_qremove(vm_offset_t va, int count)
  777: {
  778: 	vm_offset_t end_va;
  779: 
  780: 	end_va = va + count*PAGE_SIZE;
  781: 
  782: 	while (va < end_va) {
  783: 		unsigned *pte;
  784: 
  785: 		pte = (unsigned *)vtopte(va);
  786: 		*pte = 0;
  787: 		cpu_invlpg((void *)va);
  788: 		va += PAGE_SIZE;
  789: 	}
  790: #ifdef SMP
  791: 	smp_invltlb();
  792: #endif
  793: }
  794: 
  795: static vm_page_t
  796: pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
  797: {
  798: 	vm_page_t m;
  799: retry:
  800: 	m = vm_page_lookup(object, pindex);
  801: 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
  802: 		goto retry;
  803: 	return m;
  804: }
  805: 
  806: /*
  807:  * Create a new thread and optionally associate it with a (new) process.
  808:  * NOTE! the new thread's cpu may not equal the current cpu.
  809:  */
  810: void
  811: pmap_init_thread(thread_t td)
  812: {
  813: 	td->td_pcb = (struct pcb *)(td->td_kstack + UPAGES * PAGE_SIZE) - 1;
  814: 	td->td_sp = (char *)td->td_pcb - 16;
  815: }
  816: 
  817: /*
  818:  * Create the UPAGES for a new process.
  819:  * This routine directly affects the fork perf for a process.
  820:  */
  821: void
  822: pmap_init_proc(struct proc *p, struct thread *td)
  823: {
  824: 	p->p_addr = (void *)td->td_kstack;
  825: 	p->p_thread = td;
  826: 	td->td_proc = p;
  827: 	td->td_switch = cpu_heavy_switch;
  828: #ifdef SMP
  829: 	td->td_mpcount = 1;
  830: #endif
  831: 	bzero(p->p_addr, sizeof(*p->p_addr));
  832: }
  833: 
  834: /*
  835:  * Dispose the UPAGES for a process that has exited.
  836:  * This routine directly impacts the exit perf of a process.
  837:  */
  838: struct thread *
  839: pmap_dispose_proc(struct proc *p)
  840: {
  841: 	struct thread *td;
  842: 
  843: 	KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p));
  844: 
  845: 	if ((td = p->p_thread) != NULL) {
  846: 	    p->p_thread = NULL;
  847: 	    td->td_proc = NULL;
  848: 	}
  849: 	p->p_addr = NULL;
  850: 	return(td);
  851: }
  852: 
  853: /*
  854:  * Allow the UPAGES for a process to be prejudicially paged out.
  855:  */
  856: void
  857: pmap_swapout_proc(struct proc *p)
  858: {
  859: #if 0
  860: 	int i;
  861: 	vm_object_t upobj;
  862: 	vm_page_t m;
  863: 
  864: 	upobj = p->p_upages_obj;
  865: 	/*
  866: 	 * let the upages be paged
  867: 	 */
  868: 	for(i=0;i<UPAGES;i++) {
  869: 		if ((m = vm_page_lookup(upobj, i)) == NULL)
  870: 			panic("pmap_swapout_proc: upage already missing???");
  871: 		vm_page_dirty(m);
  872: 		vm_page_unwire(m, 0);
  873: 		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
  874: 	}
  875: #endif
  876: }
  877: 
  878: /*
  879:  * Bring the UPAGES for a specified process back in.
  880:  */
  881: void
  882: pmap_swapin_proc(struct proc *p)
  883: {
  884: #if 0
  885: 	int i,rv;
  886: 	vm_object_t upobj;
  887: 	vm_page_t m;
  888: 
  889: 	upobj = p->p_upages_obj;
  890: 	for(i=0;i<UPAGES;i++) {
  891: 
  892: 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  893: 
  894: 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
  895: 			VM_PAGE_TO_PHYS(m));
  896: 
  897: 		if (m->valid != VM_PAGE_BITS_ALL) {
  898: 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
  899: 			if (rv != VM_PAGER_OK)
  900: 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
  901: 			m = vm_page_lookup(upobj, i);
  902: 			m->valid = VM_PAGE_BITS_ALL;
  903: 		}
  904: 
  905: 		vm_page_wire(m);
  906: 		vm_page_wakeup(m);
  907: 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
  908: 	}
  909: #endif
  910: }
  911: 
  912: /***************************************************
  913:  * Page table page management routines.....
  914:  ***************************************************/
  915: 
  916: /*
  917:  * This routine unholds page table pages, and if the hold count
  918:  * drops to zero, then it decrements the wire count.
  919:  */
  920: static int 
  921: _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) 
  922: {
  923: 	pmap_inval_flush(info);
  924: 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
  925: 		;
  926: 
  927: 	if (m->hold_count == 0) {
  928: 		vm_offset_t pteva;
  929: 		/*
  930: 		 * unmap the page table page
  931: 		 */
  932: 		pmap_inval_add(info, pmap, -1);
  933: 		pmap->pm_pdir[m->pindex] = 0;
  934: 		--pmap->pm_stats.resident_count;
  935: 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
  936: 			(((unsigned) PTDpde) & PG_FRAME)) {
  937: 			/*
  938: 			 * Do a invltlb to make the invalidated mapping
  939: 			 * take effect immediately.
  940: 			 */
  941: 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
  942: 		}
  943: 
  944: 		if (pmap->pm_ptphint == m)
  945: 			pmap->pm_ptphint = NULL;
  946: 
  947: 		/*
  948: 		 * If the page is finally unwired, simply free it.
  949: 		 */
  950: 		--m->wire_count;
  951: 		if (m->wire_count == 0) {
  952: 			vm_page_flash(m);
  953: 			vm_page_busy(m);
  954: 			vm_page_free_zero(m);
  955: 			--vmstats.v_wire_count;
  956: 		}
  957: 		return 1;
  958: 	}
  959: 	return 0;
  960: }
  961: 
  962: static PMAP_INLINE int
  963: pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
  964: {
  965: 	vm_page_unhold(m);
  966: 	if (m->hold_count == 0)
  967: 		return _pmap_unwire_pte_hold(pmap, m, info);
  968: 	else
  969: 		return 0;
  970: }
  971: 
  972: /*
  973:  * After removing a page table entry, this routine is used to
  974:  * conditionally free the page, and manage the hold/wire counts.
  975:  */
  976: static int
  977: pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
  978: 		pmap_inval_info_t info)
  979: {
  980: 	unsigned ptepindex;
  981: 	if (va >= UPT_MIN_ADDRESS)
  982: 		return 0;
  983: 
  984: 	if (mpte == NULL) {
  985: 		ptepindex = (va >> PDRSHIFT);
  986: 		if (pmap->pm_ptphint &&
  987: 			(pmap->pm_ptphint->pindex == ptepindex)) {
  988: 			mpte = pmap->pm_ptphint;
  989: 		} else {
  990: 			pmap_inval_flush(info);
  991: 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
  992: 			pmap->pm_ptphint = mpte;
  993: 		}
  994: 	}
  995: 
  996: 	return pmap_unwire_pte_hold(pmap, mpte, info);
  997: }
  998: 
  999: void
 1000: pmap_pinit0(struct pmap *pmap)
 1001: {
 1002: 	pmap->pm_pdir =
 1003: 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1004: 	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
 1005: 	pmap->pm_count = 1;
 1006: 	pmap->pm_active = 0;
 1007: 	pmap->pm_ptphint = NULL;
 1008: 	TAILQ_INIT(&pmap->pm_pvlist);
 1009: 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1010: }
 1011: 
 1012: /*
 1013:  * Initialize a preallocated and zeroed pmap structure,
 1014:  * such as one in a vmspace structure.
 1015:  */
 1016: void
 1017: pmap_pinit(struct pmap *pmap)
 1018: {
 1019: 	vm_page_t ptdpg;
 1020: 
 1021: 	/*
 1022: 	 * No need to allocate page table space yet but we do need a valid
 1023: 	 * page directory table.
 1024: 	 */
 1025: 	if (pmap->pm_pdir == NULL) {
 1026: 		pmap->pm_pdir =
 1027: 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1028: 	}
 1029: 
 1030: 	/*
 1031: 	 * allocate object for the ptes
 1032: 	 */
 1033: 	if (pmap->pm_pteobj == NULL)
 1034: 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 1035: 
 1036: 	/*
 1037: 	 * allocate the page directory page
 1038: 	 */
 1039: 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 1040: 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1041: 
 1042: 	ptdpg->wire_count = 1;
 1043: 	++vmstats.v_wire_count;
 1044: 
 1045: 
 1046: 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 1047: 	ptdpg->valid = VM_PAGE_BITS_ALL;
 1048: 
 1049: 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 1050: 	if ((ptdpg->flags & PG_ZERO) == 0)
 1051: 		bzero(pmap->pm_pdir, PAGE_SIZE);
 1052: 
 1053: 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1054: 
 1055: 	/* install self-referential address mapping entry */
 1056: 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 1057: 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 1058: 
 1059: 	pmap->pm_count = 1;
 1060: 	pmap->pm_active = 0;
 1061: 	pmap->pm_ptphint = NULL;
 1062: 	TAILQ_INIT(&pmap->pm_pvlist);
 1063: 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1064: }
 1065: 
 1066: /*
 1067:  * Wire in kernel global address entries.  To avoid a race condition
 1068:  * between pmap initialization and pmap_growkernel, this procedure
 1069:  * should be called after the vmspace is attached to the process
 1070:  * but before this pmap is activated.
 1071:  */
 1072: void
 1073: pmap_pinit2(struct pmap *pmap)
 1074: {
 1075: 	/* XXX copies current process, does not fill in MPPTDI */
 1076: 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 1077: }
 1078: 
 1079: static int
 1080: pmap_release_free_page(struct pmap *pmap, vm_page_t p)
 1081: {
 1082: 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 1083: 	/*
 1084: 	 * This code optimizes the case of freeing non-busy
 1085: 	 * page-table pages.  Those pages are zero now, and
 1086: 	 * might as well be placed directly into the zero queue.
 1087: 	 */
 1088: 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 1089: 		return 0;
 1090: 
 1091: 	vm_page_busy(p);
 1092: 
 1093: 	/*
 1094: 	 * Remove the page table page from the processes address space.
 1095: 	 */
 1096: 	pde[p->pindex] = 0;
 1097: 	pmap->pm_stats.resident_count--;
 1098: 
 1099: 	if (p->hold_count)  {
 1100: 		panic("pmap_release: freeing held page table page");
 1101: 	}
 1102: 	/*
 1103: 	 * Page directory pages need to have the kernel
 1104: 	 * stuff cleared, so they can go into the zero queue also.
 1105: 	 */
 1106: 	if (p->pindex == PTDPTDI) {
 1107: 		bzero(pde + KPTDI, nkpt * PTESIZE);
 1108: 		pde[MPPTDI] = 0;
 1109: 		pde[APTDPTDI] = 0;
 1110: 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 1111: 	}
 1112: 
 1113: 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 1114: 		pmap->pm_ptphint = NULL;
 1115: 
 1116: 	p->wire_count--;
 1117: 	vmstats.v_wire_count--;
 1118: 	vm_page_free_zero(p);
 1119: 	return 1;
 1120: }
 1121: 
 1122: /*
 1123:  * this routine is called if the page table page is not
 1124:  * mapped correctly.
 1125:  */
 1126: static vm_page_t
 1127: _pmap_allocpte(pmap_t pmap, unsigned ptepindex)
 1128: {
 1129: 	vm_offset_t pteva, ptepa;
 1130: 	vm_page_t m;
 1131: 
 1132: 	/*
 1133: 	 * Find or fabricate a new pagetable page
 1134: 	 */
 1135: 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 1136: 			VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 1137: 
 1138: 	KASSERT(m->queue == PQ_NONE,
 1139: 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 1140: 
 1141: 	if (m->wire_count == 0)
 1142: 		vmstats.v_wire_count++;
 1143: 	m->wire_count++;
 1144: 
 1145: 	/*
 1146: 	 * Increment the hold count for the page table page
 1147: 	 * (denoting a new mapping.)
 1148: 	 */
 1149: 	m->hold_count++;
 1150: 
 1151: 	/*
 1152: 	 * Map the pagetable page into the process address space, if
 1153: 	 * it isn't already there.
 1154: 	 */
 1155: 
 1156: 	pmap->pm_stats.resident_count++;
 1157: 
 1158: 	ptepa = VM_PAGE_TO_PHYS(m);
 1159: 	pmap->pm_pdir[ptepindex] =
 1160: 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1161: 
 1162: 	/*
 1163: 	 * Set the page table hint
 1164: 	 */
 1165: 	pmap->pm_ptphint = m;
 1166: 
 1167: 	/*
 1168: 	 * Try to use the new mapping, but if we cannot, then
 1169: 	 * do it with the routine that maps the page explicitly.
 1170: 	 */
 1171: 	if ((m->flags & PG_ZERO) == 0) {
 1172: 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 1173: 			(((unsigned) PTDpde) & PG_FRAME)) {
 1174: 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 1175: 			bzero((caddr_t) pteva, PAGE_SIZE);
 1176: 		} else {
 1177: 			pmap_zero_page(ptepa);
 1178: 		}
 1179: 	}
 1180: 
 1181: 	m->valid = VM_PAGE_BITS_ALL;
 1182: 	vm_page_flag_clear(m, PG_ZERO);
 1183: 	vm_page_flag_set(m, PG_MAPPED);
 1184: 	vm_page_wakeup(m);
 1185: 
 1186: 	return m;
 1187: }
 1188: 
 1189: static vm_page_t
 1190: pmap_allocpte(pmap_t pmap, vm_offset_t va)
 1191: {
 1192: 	unsigned ptepindex;
 1193: 	vm_offset_t ptepa;
 1194: 	vm_page_t m;
 1195: 
 1196: 	/*
 1197: 	 * Calculate pagetable page index
 1198: 	 */
 1199: 	ptepindex = va >> PDRSHIFT;
 1200: 
 1201: 	/*
 1202: 	 * Get the page directory entry
 1203: 	 */
 1204: 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 1205: 
 1206: 	/*
 1207: 	 * This supports switching from a 4MB page to a
 1208: 	 * normal 4K page.
 1209: 	 */
 1210: 	if (ptepa & PG_PS) {
 1211: 		pmap->pm_pdir[ptepindex] = 0;
 1212: 		ptepa = 0;
 1213: 		cpu_invltlb();
 1214: 		smp_invltlb();
 1215: 	}
 1216: 
 1217: 	/*
 1218: 	 * If the page table page is mapped, we just increment the
 1219: 	 * hold count, and activate it.
 1220: 	 */
 1221: 	if (ptepa) {
 1222: 		/*
 1223: 		 * In order to get the page table page, try the
 1224: 		 * hint first.
 1225: 		 */
 1226: 		if (pmap->pm_ptphint &&
 1227: 			(pmap->pm_ptphint->pindex == ptepindex)) {
 1228: 			m = pmap->pm_ptphint;
 1229: 		} else {
 1230: 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 1231: 			pmap->pm_ptphint = m;
 1232: 		}
 1233: 		m->hold_count++;
 1234: 		return m;
 1235: 	}
 1236: 	/*
 1237: 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 1238: 	 */
 1239: 	return _pmap_allocpte(pmap, ptepindex);
 1240: }
 1241: 
 1242: 
 1243: /***************************************************
 1244: * Pmap allocation/deallocation routines.
 1245:  ***************************************************/
 1246: 
 1247: /*
 1248:  * Release any resources held by the given physical map.
 1249:  * Called when a pmap initialized by pmap_pinit is being released.
 1250:  * Should only be called if the map contains no valid mappings.
 1251:  */
 1252: void
 1253: pmap_release(struct pmap *pmap)
 1254: {
 1255: 	vm_page_t p,n,ptdpg;
 1256: 	vm_object_t object = pmap->pm_pteobj;
 1257: 	int curgeneration;
 1258: 
 1259: #if defined(DIAGNOSTIC)
 1260: 	if (object->ref_count != 1)
 1261: 		panic("pmap_release: pteobj reference count != 1");
 1262: #endif
 1263: 	
 1264: 	ptdpg = NULL;
 1265: retry:
 1266: 	curgeneration = object->generation;
 1267: 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 1268: 		n = TAILQ_NEXT(p, listq);
 1269: 		if (p->pindex == PTDPTDI) {
 1270: 			ptdpg = p;
 1271: 			continue;
 1272: 		}
 1273: 		while (1) {
 1274: 			if (!pmap_release_free_page(pmap, p) &&
 1275: 				(object->generation != curgeneration))
 1276: 				goto retry;
 1277: 		}
 1278: 	}
 1279: 
 1280: 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 1281: 		goto retry;
 1282: }
 1283: 
 1284: static int
 1285: kvm_size(SYSCTL_HANDLER_ARGS)
 1286: {
 1287: 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1288: 
 1289:         return sysctl_handle_long(oidp, &ksize, 0, req);
 1290: }
 1291: SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1292:     0, 0, kvm_size, "IU", "Size of KVM");
 1293: 
 1294: static int
 1295: kvm_free(SYSCTL_HANDLER_ARGS)
 1296: {
 1297: 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1298: 
 1299:         return sysctl_handle_long(oidp, &kfree, 0, req);
 1300: }
 1301: SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1302:     0, 0, kvm_free, "IU", "Amount of KVM free");
 1303: 
 1304: /*
 1305:  * grow the number of kernel page table entries, if needed
 1306:  */
 1307: void
 1308: pmap_growkernel(vm_offset_t addr)
 1309: {
 1310: 	struct proc *p;
 1311: 	struct pmap *pmap;
 1312: 	int s;
 1313: 	vm_offset_t ptppaddr;
 1314: 	vm_page_t nkpg;
 1315: 	pd_entry_t newpdir;
 1316: 
 1317: 	s = splhigh();
 1318: 	if (kernel_vm_end == 0) {
 1319: 		kernel_vm_end = KERNBASE;
 1320: 		nkpt = 0;
 1321: 		while (pdir_pde(PTD, kernel_vm_end)) {
 1322: 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1323: 			nkpt++;
 1324: 		}
 1325: 	}
 1326: 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1327: 	while (kernel_vm_end < addr) {
 1328: 		if (pdir_pde(PTD, kernel_vm_end)) {
 1329: 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1330: 			continue;
 1331: 		}
 1332: 
 1333: 		/*
 1334: 		 * This index is bogus, but out of the way
 1335: 		 */
 1336: 		nkpg = vm_page_alloc(kptobj, nkpt, 
 1337: 			VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT);
 1338: 		if (nkpg == NULL)
 1339: 			panic("pmap_growkernel: no memory to grow kernel");
 1340: 
 1341: 		nkpt++;
 1342: 
 1343: 		vm_page_wire(nkpg);
 1344: 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1345: 		pmap_zero_page(ptppaddr);
 1346: 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1347: 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 1348: 
 1349: 		FOREACH_PROC_IN_SYSTEM(p) {
 1350: 			if (p->p_vmspace) {
 1351: 				pmap = vmspace_pmap(p->p_vmspace);
 1352: 				*pmap_pde(pmap, kernel_vm_end) = newpdir;
 1353: 			}
 1354: 		}
 1355: 		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
 1356: 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1357: 	}
 1358: 	splx(s);
 1359: }
 1360: 
 1361: /*
 1362:  *	Retire the given physical map from service.
 1363:  *	Should only be called if the map contains
 1364:  *	no valid mappings.
 1365:  */
 1366: void
 1367: pmap_destroy(pmap_t pmap)
 1368: {
 1369: 	int count;
 1370: 
 1371: 	if (pmap == NULL)
 1372: 		return;
 1373: 
 1374: 	count = --pmap->pm_count;
 1375: 	if (count == 0) {
 1376: 		pmap_release(pmap);
 1377: 		panic("destroying a pmap is not yet implemented");
 1378: 	}
 1379: }
 1380: 
 1381: /*
 1382:  *	Add a reference to the specified pmap.
 1383:  */
 1384: void
 1385: pmap_reference(pmap_t pmap)
 1386: {
 1387: 	if (pmap != NULL) {
 1388: 		pmap->pm_count++;
 1389: 	}
 1390: }
 1391: 
 1392: /***************************************************
 1393: * page management routines.
 1394:  ***************************************************/
 1395: 
 1396: /*
 1397:  * free the pv_entry back to the free list.  This function may be
 1398:  * called from an interrupt.
 1399:  */
 1400: static PMAP_INLINE void
 1401: free_pv_entry(pv_entry_t pv)
 1402: {
 1403: 	pv_entry_count--;
 1404: 	zfree(pvzone, pv);
 1405: }
 1406: 
 1407: /*
 1408:  * get a new pv_entry, allocating a block from the system
 1409:  * when needed.  This function may be called from an interrupt.
 1410:  */
 1411: static pv_entry_t
 1412: get_pv_entry(void)
 1413: {
 1414: 	pv_entry_count++;
 1415: 	if (pv_entry_high_water &&
 1416: 		(pv_entry_count > pv_entry_high_water) &&
 1417: 		(pmap_pagedaemon_waken == 0)) {
 1418: 		pmap_pagedaemon_waken = 1;
 1419: 		wakeup (&vm_pages_needed);
 1420: 	}
 1421: 	return zalloc(pvzone);
 1422: }
 1423: 
 1424: /*
 1425:  * This routine is very drastic, but can save the system
 1426:  * in a pinch.
 1427:  */
 1428: void
 1429: pmap_collect(void)
 1430: {
 1431: 	int i;
 1432: 	vm_page_t m;
 1433: 	static int warningdone=0;
 1434: 
 1435: 	if (pmap_pagedaemon_waken == 0)
 1436: 		return;
 1437: 
 1438: 	if (warningdone < 5) {
 1439: 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 1440: 		warningdone++;
 1441: 	}
 1442: 
 1443: 	for(i = 0; i < vm_page_array_size; i++) {
 1444: 		m = &vm_page_array[i];
 1445: 		if (m->wire_count || m->hold_count || m->busy ||
 1446: 		    (m->flags & PG_BUSY))
 1447: 			continue;
 1448: 		pmap_remove_all(m);
 1449: 	}
 1450: 	pmap_pagedaemon_waken = 0;
 1451: }
 1452: 	
 1453: 
 1454: /*
 1455:  * If it is the first entry on the list, it is actually
 1456:  * in the header and we must copy the following entry up
 1457:  * to the header.  Otherwise we must search the list for
 1458:  * the entry.  In either case we free the now unused entry.
 1459:  */
 1460: static int
 1461: pmap_remove_entry(struct pmap *pmap, vm_page_t m, 
 1462: 			vm_offset_t va, pmap_inval_info_t info)
 1463: {
 1464: 	pv_entry_t pv;
 1465: 	int rtval;
 1466: 	int s;
 1467: 
 1468: 	s = splvm();
 1469: 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1470: 		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1471: 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1472: 				break;
 1473: 		}
 1474: 	} else {
 1475: 		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1476: 			if (va == pv->pv_va) 
 1477: 				break;
 1478: 		}
 1479: 	}
 1480: 
 1481: 	rtval = 0;
 1482: 	if (pv) {
 1483: 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info);
 1484: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1485: 		m->md.pv_list_count--;
 1486: 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1487: 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1488: 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1489: 		free_pv_entry(pv);
 1490: 	}
 1491: 	splx(s);
 1492: 	return rtval;
 1493: }
 1494: 
 1495: /*
 1496:  * Create a pv entry for page at pa for
 1497:  * (pmap, va).
 1498:  */
 1499: static void
 1500: pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 1501: {
 1502: 	int s;
 1503: 	pv_entry_t pv;
 1504: 
 1505: 	s = splvm();
 1506: 	pv = get_pv_entry();
 1507: 	pv->pv_va = va;
 1508: 	pv->pv_pmap = pmap;
 1509: 	pv->pv_ptem = mpte;
 1510: 
 1511: 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1512: 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1513: 	m->md.pv_list_count++;
 1514: 
 1515: 	splx(s);
 1516: }
 1517: 
 1518: /*
 1519:  * pmap_remove_pte: do the things to unmap a page in a process
 1520:  */
 1521: static int
 1522: pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va,
 1523: 	pmap_inval_info_t info)
 1524: {
 1525: 	unsigned oldpte;
 1526: 	vm_page_t m;
 1527: 
 1528: 	pmap_inval_add(info, pmap, va);
 1529: 	oldpte = loadandclear(ptq);
 1530: 	if (oldpte & PG_W)
 1531: 		pmap->pm_stats.wired_count -= 1;
 1532: 	/*
 1533: 	 * Machines that don't support invlpg, also don't support
 1534: 	 * PG_G.  XXX PG_G is disabled for SMP so don't worry about
 1535: 	 * the SMP case.
 1536: 	 */
 1537: 	if (oldpte & PG_G)
 1538: 		cpu_invlpg((void *)va);
 1539: 	pmap->pm_stats.resident_count -= 1;
 1540: 	if (oldpte & PG_MANAGED) {
 1541: 		m = PHYS_TO_VM_PAGE(oldpte);
 1542: 		if (oldpte & PG_M) {
 1543: #if defined(PMAP_DIAGNOSTIC)
 1544: 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1545: 				printf(
 1546: 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1547: 				    va, oldpte);
 1548: 			}
 1549: #endif
 1550: 			if (pmap_track_modified(va))
 1551: 				vm_page_dirty(m);
 1552: 		}
 1553: 		if (oldpte & PG_A)
 1554: 			vm_page_flag_set(m, PG_REFERENCED);
 1555: 		return pmap_remove_entry(pmap, m, va, info);
 1556: 	} else {
 1557: 		return pmap_unuse_pt(pmap, va, NULL, info);
 1558: 	}
 1559: 
 1560: 	return 0;
 1561: }
 1562: 
 1563: /*
 1564:  * pmap_remove_page:
 1565:  *
 1566:  *	Remove a single page from a process address space.
 1567:  *
 1568:  *	This function may not be called from an interrupt if the pmap is
 1569:  *	not kernel_pmap.
 1570:  */
 1571: static void
 1572: pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info)
 1573: {
 1574: 	unsigned *ptq;
 1575: 
 1576: 	/*
 1577: 	 * if there is no pte for this address, just skip it!!!  Otherwise
 1578: 	 * get a local va for mappings for this pmap and remove the entry.
 1579: 	 */
 1580: 	if (*pmap_pde(pmap, va) != 0) {
 1581: 		ptq = get_ptbase(pmap) + i386_btop(va);
 1582: 		if (*ptq) {
 1583: 			pmap_remove_pte(pmap, ptq, va, info);
 1584: 		}
 1585: 	}
 1586: }
 1587: 
 1588: /*
 1589:  * pmap_remove:
 1590:  *
 1591:  *	Remove the given range of addresses from the specified map.
 1592:  *
 1593:  *	It is assumed that the start and end are properly
 1594:  *	rounded to the page size.
 1595:  *
 1596:  *	This function may not be called from an interrupt if the pmap is
 1597:  *	not kernel_pmap.
 1598:  */
 1599: void
 1600: pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
 1601: {
 1602: 	unsigned *ptbase;
 1603: 	vm_offset_t pdnxt;
 1604: 	vm_offset_t ptpaddr;
 1605: 	vm_offset_t sindex, eindex;
 1606: 	struct pmap_inval_info info;
 1607: 
 1608: 	if (pmap == NULL)
 1609: 		return;
 1610: 
 1611: 	if (pmap->pm_stats.resident_count == 0)
 1612: 		return;
 1613: 
 1614: 	pmap_inval_init(&info);
 1615: 
 1616: 	/*
 1617: 	 * special handling of removing one page.  a very
 1618: 	 * common operation and easy to short circuit some
 1619: 	 * code.
 1620: 	 */
 1621: 	if (((sva + PAGE_SIZE) == eva) && 
 1622: 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1623: 		pmap_remove_page(pmap, sva, &info);
 1624: 		pmap_inval_flush(&info);
 1625: 		return;
 1626: 	}
 1627: 
 1628: 	/*
 1629: 	 * Get a local virtual address for the mappings that are being
 1630: 	 * worked with.
 1631: 	 */
 1632: 	ptbase = get_ptbase(pmap);
 1633: 
 1634: 	sindex = i386_btop(sva);
 1635: 	eindex = i386_btop(eva);
 1636: 
 1637: 	for (; sindex < eindex; sindex = pdnxt) {
 1638: 		unsigned pdirindex;
 1639: 
 1640: 		/*
 1641: 		 * Calculate index for next page table.
 1642: 		 */
 1643: 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1644: 		if (pmap->pm_stats.resident_count == 0)
 1645: 			break;
 1646: 
 1647: 		pdirindex = sindex / NPDEPG;
 1648: 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1649: 			pmap_inval_add(&info, pmap, -1);
 1650: 			pmap->pm_pdir[pdirindex] = 0;
 1651: 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1652: 			continue;
 1653: 		}
 1654: 
 1655: 		/*
 1656: 		 * Weed out invalid mappings. Note: we assume that the page
 1657: 		 * directory table is always allocated, and in kernel virtual.
 1658: 		 */
 1659: 		if (ptpaddr == 0)
 1660: 			continue;
 1661: 
 1662: 		/*
 1663: 		 * Limit our scan to either the end of the va represented
 1664: 		 * by the current page table page, or to the end of the
 1665: 		 * range being removed.
 1666: 		 */
 1667: 		if (pdnxt > eindex) {
 1668: 			pdnxt = eindex;
 1669: 		}
 1670: 
 1671: 		for (; sindex != pdnxt; sindex++) {
 1672: 			vm_offset_t va;
 1673: 			if (ptbase[sindex] == 0)
 1674: 				continue;
 1675: 			va = i386_ptob(sindex);
 1676: 			if (pmap_remove_pte(pmap, ptbase + sindex, va, &info))
 1677: 				break;
 1678: 		}
 1679: 	}
 1680: 	pmap_inval_flush(&info);
 1681: }
 1682: 
 1683: /*
 1684:  * pmap_remove_all:
 1685:  *
 1686:  *	Removes this physical page from all physical maps in which it resides.
 1687:  *	Reflects back modify bits to the pager.
 1688:  *
 1689:  *	This routine may not be called from an interrupt.
 1690:  */
 1691: 
 1692: static void
 1693: pmap_remove_all(vm_page_t m)
 1694: {
 1695: 	struct pmap_inval_info info;
 1696: 	unsigned *pte, tpte;
 1697: 	pv_entry_t pv;
 1698: 	int s;
 1699: 
 1700: #if defined(PMAP_DIAGNOSTIC)
 1701: 	/*
 1702: 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 1703: 	 * pages!
 1704: 	 */
 1705: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1706: 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m));
 1707: 	}
 1708: #endif
 1709: 
 1710: 	pmap_inval_init(&info);
 1711: 	s = splvm();
 1712: 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1713: 		pv->pv_pmap->pm_stats.resident_count--;
 1714: 
 1715: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1716: 		pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 1717: 
 1718: 		tpte = loadandclear(pte);
 1719: 		if (tpte & PG_W)
 1720: 			pv->pv_pmap->pm_stats.wired_count--;
 1721: 
 1722: 		if (tpte & PG_A)
 1723: 			vm_page_flag_set(m, PG_REFERENCED);
 1724: 
 1725: 		/*
 1726: 		 * Update the vm_page_t clean and reference bits.
 1727: 		 */
 1728: 		if (tpte & PG_M) {
 1729: #if defined(PMAP_DIAGNOSTIC)
 1730: 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 1731: 				printf(
 1732: 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1733: 				    pv->pv_va, tpte);
 1734: 			}
 1735: #endif
 1736: 			if (pmap_track_modified(pv->pv_va))
 1737: 				vm_page_dirty(m);
 1738: 		}
 1739: 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1740: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1741: 		m->md.pv_list_count--;
 1742: 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
 1743: 		free_pv_entry(pv);
 1744: 	}
 1745: 
 1746: 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1747: 	splx(s);
 1748: 	pmap_inval_flush(&info);
 1749: }
 1750: 
 1751: /*
 1752:  * pmap_protect:
 1753:  *
 1754:  *	Set the physical protection on the specified range of this map
 1755:  *	as requested.
 1756:  *
 1757:  *	This function may not be called from an interrupt if the map is
 1758:  *	not the kernel_pmap.
 1759:  */
 1760: void
 1761: pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1762: {
 1763: 	unsigned *ptbase;
 1764: 	vm_offset_t pdnxt, ptpaddr;
 1765: 	vm_pindex_t sindex, eindex;
 1766: 	pmap_inval_info info;
 1767: 
 1768: 	if (pmap == NULL)
 1769: 		return;
 1770: 
 1771: 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1772: 		pmap_remove(pmap, sva, eva);
 1773: 		return;
 1774: 	}
 1775: 
 1776: 	if (prot & VM_PROT_WRITE)
 1777: 		return;
 1778: 
 1779: 	pmap_inval_init(&info);
 1780: 
 1781: 	ptbase = get_ptbase(pmap);
 1782: 
 1783: 	sindex = i386_btop(sva);
 1784: 	eindex = i386_btop(eva);
 1785: 
 1786: 	for (; sindex < eindex; sindex = pdnxt) {
 1787: 
 1788: 		unsigned pdirindex;
 1789: 
 1790: 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1791: 
 1792: 		pdirindex = sindex / NPDEPG;
 1793: 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1794: 			pmap_inval_add(&info, pmap, -1);
 1795: 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1796: 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1797: 			continue;
 1798: 		}
 1799: 
 1800: 		/*
 1801: 		 * Weed out invalid mappings. Note: we assume that the page
 1802: 		 * directory table is always allocated, and in kernel virtual.
 1803: 		 */
 1804: 		if (ptpaddr == 0)
 1805: 			continue;
 1806: 
 1807: 		if (pdnxt > eindex) {
 1808: 			pdnxt = eindex;
 1809: 		}
 1810: 
 1811: 		for (; sindex != pdnxt; sindex++) {
 1812: 
 1813: 			unsigned pbits;
 1814: 			vm_page_t m;
 1815: 
 1816: 			/* XXX this isn't optimal */
 1817: 			pmap_inval_add(&info, pmap, i386_ptob(sindex));
 1818: 			pbits = ptbase[sindex];
 1819: 
 1820: 			if (pbits & PG_MANAGED) {
 1821: 				m = NULL;
 1822: 				if (pbits & PG_A) {
 1823: 					m = PHYS_TO_VM_PAGE(pbits);
 1824: 					vm_page_flag_set(m, PG_REFERENCED);
 1825: 					pbits &= ~PG_A;
 1826: 				}
 1827: 				if (pbits & PG_M) {
 1828: 					if (pmap_track_modified(i386_ptob(sindex))) {
 1829: 						if (m == NULL)
 1830: 							m = PHYS_TO_VM_PAGE(pbits);
 1831: 						vm_page_dirty(m);
 1832: 						pbits &= ~PG_M;
 1833: 					}
 1834: 				}
 1835: 			}
 1836: 
 1837: 			pbits &= ~PG_RW;
 1838: 
 1839: 			if (pbits != ptbase[sindex]) {
 1840: 				ptbase[sindex] = pbits;
 1841: 			}
 1842: 		}
 1843: 	}
 1844: 	pmap_inval_flush(&info);
 1845: }
 1846: 
 1847: /*
 1848:  *	Insert the given physical page (p) at
 1849:  *	the specified virtual address (v) in the
 1850:  *	target physical map with the protection requested.
 1851:  *
 1852:  *	If specified, the page will be wired down, meaning
 1853:  *	that the related pte can not be reclaimed.
 1854:  *
 1855:  *	NB:  This is the only routine which MAY NOT lazy-evaluate
 1856:  *	or lose information.  That is, this routine must actually
 1857:  *	insert this page into the given map NOW.
 1858:  */
 1859: void
 1860: pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1861: 	   boolean_t wired)
 1862: {
 1863: 	vm_paddr_t pa;
 1864: 	unsigned *pte;
 1865: 	vm_paddr_t opa;
 1866: 	vm_offset_t origpte, newpte;
 1867: 	vm_page_t mpte;
 1868: 	pmap_inval_info info;
 1869: 
 1870: 	if (pmap == NULL)
 1871: 		return;
 1872: 
 1873: 	va &= PG_FRAME;
 1874: #ifdef PMAP_DIAGNOSTIC
 1875: 	if (va > VM_MAX_KERNEL_ADDRESS)
 1876: 		panic("pmap_enter: toobig");
 1877: 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1878: 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1879: #endif
 1880: 
 1881: 	mpte = NULL;
 1882: 	/*
 1883: 	 * In the case that a page table page is not
 1884: 	 * resident, we are creating it here.
 1885: 	 */
 1886: 	if (va < UPT_MIN_ADDRESS) {
 1887: 		mpte = pmap_allocpte(pmap, va);
 1888: 	}
 1889: 
 1890: 	pmap_inval_init(&info);
 1891: 	pte = pmap_pte(pmap, va);
 1892: 
 1893: 	/*
 1894: 	 * Page Directory table entry not valid, we need a new PT page
 1895: 	 */
 1896: 	if (pte == NULL) {
 1897: 		panic("pmap_enter: invalid page directory pdir=%x, va=0x%x\n",
 1898: 		     (unsigned) pmap->pm_pdir[PTDPTDI], va);
 1899: 	}
 1900: 
 1901: 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 1902: 	pmap_inval_add(&info, pmap, va); /* XXX non-optimal */
 1903: 	origpte = *(vm_offset_t *)pte;
 1904: 	opa = origpte & PG_FRAME;
 1905: 
 1906: 	if (origpte & PG_PS)
 1907: 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 1908: 
 1909: 	/*
 1910: 	 * Mapping has not changed, must be protection or wiring change.
 1911: 	 */
 1912: 	if (origpte && (opa == pa)) {
 1913: 		/*
 1914: 		 * Wiring change, just update stats. We don't worry about
 1915: 		 * wiring PT pages as they remain resident as long as there
 1916: 		 * are valid mappings in them. Hence, if a user page is wired,
 1917: 		 * the PT page will be also.
 1918: 		 */
 1919: 		if (wired && ((origpte & PG_W) == 0))
 1920: 			pmap->pm_stats.wired_count++;
 1921: 		else if (!wired && (origpte & PG_W))
 1922: 			pmap->pm_stats.wired_count--;
 1923: 
 1924: #if defined(PMAP_DIAGNOSTIC)
 1925: 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 1926: 			printf(
 1927: 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1928: 			    va, origpte);
 1929: 		}
 1930: #endif
 1931: 
 1932: 		/*
 1933: 		 * Remove extra pte reference
 1934: 		 */
 1935: 		if (mpte)
 1936: 			mpte->hold_count--;
 1937: 
 1938: 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 1939: 			if ((origpte & PG_RW) == 0)
 1940: 				*pte |= PG_RW;
 1941: 			pmap_inval_flush(&info);
 1942: 			return;
 1943: 		}
 1944: 
 1945: 		/*
 1946: 		 * We might be turning off write access to the page,
 1947: 		 * so we go ahead and sense modify status.
 1948: 		 */
 1949: 		if (origpte & PG_MANAGED) {
 1950: 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 1951: 				vm_page_t om;
 1952: 				om = PHYS_TO_VM_PAGE(opa);
 1953: 				vm_page_dirty(om);
 1954: 			}
 1955: 			pa |= PG_MANAGED;
 1956: 		}
 1957: 		goto validate;
 1958: 	} 
 1959: 	/*
 1960: 	 * Mapping has changed, invalidate old range and fall through to
 1961: 	 * handle validating new mapping.
 1962: 	 */
 1963: 	if (opa) {
 1964: 		int err;
 1965: 		err = pmap_remove_pte(pmap, pte, va, &info);
 1966: 		if (err)
 1967: 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 1968: 	}
 1969: 
 1970: 	/*
 1971: 	 * Enter on the PV list if part of our managed memory. Note that we
 1972: 	 * raise IPL while manipulating pv_table since pmap_enter can be
 1973: 	 * called at interrupt time.
 1974: 	 */
 1975: 	if (pmap_initialized && 
 1976: 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 1977: 		pmap_insert_entry(pmap, va, mpte, m);
 1978: 		pa |= PG_MANAGED;
 1979: 	}
 1980: 
 1981: 	/*
 1982: 	 * Increment counters
 1983: 	 */
 1984: 	pmap->pm_stats.resident_count++;
 1985: 	if (wired)
 1986: 		pmap->pm_stats.wired_count++;
 1987: 
 1988: validate:
 1989: 	/*
 1990: 	 * Now validate mapping with desired protection/wiring.
 1991: 	 */
 1992: 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 1993: 
 1994: 	if (wired)
 1995: 		newpte |= PG_W;
 1996: 	if (va < UPT_MIN_ADDRESS)
 1997: 		newpte |= PG_U;
 1998: 	if (pmap == kernel_pmap)
 1999: 		newpte |= pgeflag;
 2000: 
 2001: 	/*
 2002: 	 * if the mapping or permission bits are different, we need
 2003: 	 * to update the pte.
 2004: 	 */
 2005: 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2006: 		*pte = newpte | PG_A;
 2007: 	}
 2008: 	pmap_inval_flush(&info);
 2009: }
 2010: 
 2011: /*
 2012:  * this code makes some *MAJOR* assumptions:
 2013:  * 1. Current pmap & pmap exists.
 2014:  * 2. Not wired.
 2015:  * 3. Read access.
 2016:  * 4. No page table pages.
 2017:  * 5. Tlbflush is deferred to calling procedure.
 2018:  * 6. Page IS managed.
 2019:  * but is *MUCH* faster than pmap_enter...
 2020:  */
 2021: 
 2022: static vm_page_t
 2023: pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2024: {
 2025: 	unsigned *pte;
 2026: 	vm_paddr_t pa;
 2027: 	pmap_inval_info info;
 2028: 
 2029: 	pmap_inval_init(&info);
 2030: 
 2031: 	/*
 2032: 	 * In the case that a page table page is not
 2033: 	 * resident, we are creating it here.
 2034: 	 */
 2035: 	if (va < UPT_MIN_ADDRESS) {
 2036: 		unsigned ptepindex;
 2037: 		vm_offset_t ptepa;
 2038: 
 2039: 		/*
 2040: 		 * Calculate pagetable page index
 2041: 		 */
 2042: 		ptepindex = va >> PDRSHIFT;
 2043: 		if (mpte && (mpte->pindex == ptepindex)) {
 2044: 			mpte->hold_count++;
 2045: 		} else {
 2046: retry:
 2047: 			/*
 2048: 			 * Get the page directory entry
 2049: 			 */
 2050: 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 2051: 
 2052: 			/*
 2053: 			 * If the page table page is mapped, we just increment
 2054: 			 * the hold count, and activate it.
 2055: 			 */
 2056: 			if (ptepa) {
 2057: 				if (ptepa & PG_PS)
 2058: 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2059: 				if (pmap->pm_ptphint &&
 2060: 					(pmap->pm_ptphint->pindex == ptepindex)) {
 2061: 					mpte = pmap->pm_ptphint;
 2062: 				} else {
 2063: 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 2064: 					pmap->pm_ptphint = mpte;
 2065: 				}
 2066: 				if (mpte == NULL)
 2067: 					goto retry;
 2068: 				mpte->hold_count++;
 2069: 			} else {
 2070: 				mpte = _pmap_allocpte(pmap, ptepindex);
 2071: 			}
 2072: 		}
 2073: 	} else {
 2074: 		mpte = NULL;
 2075: 	}
 2076: 
 2077: 	/*
 2078: 	 * This call to vtopte makes the assumption that we are
 2079: 	 * entering the page into the current pmap.  In order to support
 2080: 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 2081: 	 * But that isn't as quick as vtopte.
 2082: 	 */
 2083: 	pte = (unsigned *)vtopte(va);
 2084: 	if (*pte) {
 2085: 		if (mpte)
 2086: 			pmap_unwire_pte_hold(pmap, mpte, &info);
 2087: 		return 0;
 2088: 	}
 2089: 
 2090: 	/*
 2091: 	 * Enter on the PV list if part of our managed memory. Note that we
 2092: 	 * raise IPL while manipulating pv_table since pmap_enter can be
 2093: 	 * called at interrupt time.
 2094: 	 */
 2095: 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2096: 		pmap_insert_entry(pmap, va, mpte, m);
 2097: 
 2098: 	/*
 2099: 	 * Increment counters
 2100: 	 */
 2101: 	pmap->pm_stats.resident_count++;
 2102: 
 2103: 	pa = VM_PAGE_TO_PHYS(m);
 2104: 
 2105: 	/*
 2106: 	 * Now validate mapping with RO protection
 2107: 	 */
 2108: 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2109: 		*pte = pa | PG_V | PG_U;
 2110: 	else
 2111: 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 2112: 
 2113: 	return mpte;
 2114: }
 2115: 
 2116: /*
 2117:  * Make a temporary mapping for a physical address.  This is only intended
 2118:  * to be used for panic dumps.
 2119:  */
 2120: void *
 2121: pmap_kenter_temporary(vm_paddr_t pa, int i)
 2122: {
 2123: 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 2124: 	return ((void *)crashdumpmap);
 2125: }
 2126: 
 2127: #define MAX_INIT_PT (96)
 2128: /*
 2129:  * pmap_object_init_pt preloads the ptes for a given object
 2130:  * into the specified pmap.  This eliminates the blast of soft
 2131:  * faults on process startup and immediately after an mmap.
 2132:  */
 2133: void
 2134: pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 2135: 		    vm_pindex_t pindex, vm_size_t size, int limit)
 2136: {
 2137: 	vm_offset_t tmpidx;
 2138: 	int psize;
 2139: 	vm_page_t p, mpte;
 2140: 	int objpgs;
 2141: 
 2142: 	if (pmap == NULL || object == NULL)
 2143: 		return;
 2144: 
 2145: 	/*
 2146: 	 * This code maps large physical mmap regions into the
 2147: 	 * processor address space.  Note that some shortcuts
 2148: 	 * are taken, but the code works.
 2149: 	 */
 2150: 	if (pseflag &&
 2151: 		(object->type == OBJT_DEVICE) &&
 2152: 		((addr & (NBPDR - 1)) == 0) &&
 2153: 		((size & (NBPDR - 1)) == 0) ) {
 2154: 		int i;
 2155: 		vm_page_t m[1];
 2156: 		unsigned int ptepindex;
 2157: 		int npdes;
 2158: 		vm_offset_t ptepa;
 2159: 
 2160: 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2161: 			return;
 2162: 
 2163: retry:
 2164: 		p = vm_page_lookup(object, pindex);
 2165: 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 2166: 			goto retry;
 2167: 
 2168: 		if (p == NULL) {
 2169: 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2170: 			if (p == NULL)
 2171: 				return;
 2172: 			m[0] = p;
 2173: 
 2174: 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2175: 				vm_page_free(p);
 2176: 				return;
 2177: 			}
 2178: 
 2179: 			p = vm_page_lookup(object, pindex);
 2180: 			vm_page_wakeup(p);
 2181: 		}
 2182: 
 2183: 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 2184: 		if (ptepa & (NBPDR - 1)) {
 2185: 			return;
 2186: 		}
 2187: 
 2188: 		p->valid = VM_PAGE_BITS_ALL;
 2189: 
 2190: 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2191: 		npdes = size >> PDRSHIFT;
 2192: 		for(i=0;i<npdes;i++) {
 2193: 			pmap->pm_pdir[ptepindex] =
 2194: 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 2195: 			ptepa += NBPDR;
 2196: 			ptepindex += 1;
 2197: 		}
 2198: 		vm_page_flag_set(p, PG_MAPPED);
 2199: 		cpu_invltlb();
 2200: 		smp_invltlb();
 2201: 		return;
 2202: 	}
 2203: 
 2204: 	psize = i386_btop(size);
 2205: 
 2206: 	if ((object->type != OBJT_VNODE) ||
 2207: 		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 2208: 			(object->resident_page_count > MAX_INIT_PT))) {
 2209: 		return;
 2210: 	}
 2211: 
 2212: 	if (psize + pindex > object->size) {
 2213: 		if (object->size < pindex)
 2214: 			return;		  
 2215: 		psize = object->size - pindex;
 2216: 	}
 2217: 
 2218: 	mpte = NULL;
 2219: 	/*
 2220: 	 * if we are processing a major portion of the object, then scan the
 2221: 	 * entire thing.
 2222: 	 */
 2223: 	if (psize > (object->resident_page_count >> 2)) {
 2224: 		objpgs = psize;
 2225: 
 2226: 		for (p = TAILQ_FIRST(&object->memq);
 2227: 		    ((objpgs > 0) && (p != NULL));
 2228: 		    p = TAILQ_NEXT(p, listq)) {
 2229: 
 2230: 			tmpidx = p->pindex;
 2231: 			if (tmpidx < pindex) {
 2232: 				continue;
 2233: 			}
 2234: 			tmpidx -= pindex;
 2235: 			if (tmpidx >= psize) {
 2236: 				continue;
 2237: 			}
 2238: 			/*
 2239: 			 * don't allow an madvise to blow away our really
 2240: 			 * free pages allocating pv entries.
 2241: 			 */
 2242: 			if ((limit & MAP_PREFAULT_MADVISE) &&
 2243: 			    vmstats.v_free_count < vmstats.v_free_reserved) {
 2244: 				break;
 2245: 			}
 2246: 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2247: 				(p->busy == 0) &&
 2248: 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2249: 				if ((p->queue - p->pc) == PQ_CACHE)
 2250: 					vm_page_deactivate(p);
 2251: 				vm_page_busy(p);
 2252: 				mpte = pmap_enter_quick(pmap, 
 2253: 					addr + i386_ptob(tmpidx), p, mpte);
 2254: 				vm_page_flag_set(p, PG_MAPPED);
 2255: 				vm_page_wakeup(p);
 2256: 			}
 2257: 			objpgs -= 1;
 2258: 		}
 2259: 	} else {
 2260: 		/*
 2261: 		 * else lookup the pages one-by-one.
 2262: 		 */
 2263: 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 2264: 			/*
 2265: 			 * don't allow an madvise to blow away our really
 2266: 			 * free pages allocating pv entries.
 2267: 			 */
 2268: 			if ((limit & MAP_PREFAULT_MADVISE) &&
 2269: 			    vmstats.v_free_count < vmstats.v_free_reserved) {
 2270: 				break;
 2271: 			}
 2272: 			p = vm_page_lookup(object, tmpidx + pindex);
 2273: 			if (p &&
 2274: 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2275: 				(p->busy == 0) &&
 2276: 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2277: 				if ((p->queue - p->pc) == PQ_CACHE)
 2278: 					vm_page_deactivate(p);
 2279: 				vm_page_busy(p);
 2280: 				mpte = pmap_enter_quick(pmap, 
 2281: 					addr + i386_ptob(tmpidx), p, mpte);
 2282: 				vm_page_flag_set(p, PG_MAPPED);
 2283: 				vm_page_wakeup(p);
 2284: 			}
 2285: 		}
 2286: 	}
 2287: }
 2288: 
 2289: /*
 2290:  * pmap_prefault provides a quick way of clustering
 2291:  * pagefaults into a processes address space.  It is a "cousin"
 2292:  * of pmap_object_init_pt, except it runs at page fault time instead
 2293:  * of mmap time.
 2294:  */
 2295: #define PFBAK 4
 2296: #define PFFOR 4
 2297: #define PAGEORDER_SIZE (PFBAK+PFFOR)
 2298: 
 2299: static int pmap_prefault_pageorder[] = {
 2300: 	-PAGE_SIZE, PAGE_SIZE,
 2301: 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 2302: 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 2303: 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 2304: };
 2305: 
 2306: void
 2307: pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 2308: {
 2309: 	int i;
 2310: 	vm_offset_t starta;
 2311: 	vm_offset_t addr;
 2312: 	vm_pindex_t pindex;
 2313: 	vm_page_t m, mpte;
 2314: 	vm_object_t object;
 2315: 
 2316: 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 2317: 		return;
 2318: 
 2319: 	object = entry->object.vm_object;
 2320: 
 2321: 	starta = addra - PFBAK * PAGE_SIZE;
 2322: 	if (starta < entry->start) {
 2323: 		starta = entry->start;
 2324: 	} else if (starta > addra) {
 2325: 		starta = 0;
 2326: 	}
 2327: 
 2328: 	mpte = NULL;
 2329: 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 2330: 		vm_object_t lobject;
 2331: 		unsigned *pte;
 2332: 
 2333: 		addr = addra + pmap_prefault_pageorder[i];
 2334: 		if (addr > addra + (PFFOR * PAGE_SIZE))
 2335: 			addr = 0;
 2336: 
 2337: 		if (addr < starta || addr >= entry->end)
 2338: 			continue;
 2339: 
 2340: 		if ((*pmap_pde(pmap, addr)) == NULL) 
 2341: 			continue;
 2342: 
 2343: 		pte = (unsigned *) vtopte(addr);
 2344: 		if (*pte)
 2345: 			continue;
 2346: 
 2347: 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 2348: 		lobject = object;
 2349: 		for (m = vm_page_lookup(lobject, pindex);
 2350: 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 2351: 		    lobject = lobject->backing_object) {
 2352: 			if (lobject->backing_object_offset & PAGE_MASK)
 2353: 				break;
 2354: 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 2355: 			m = vm_page_lookup(lobject->backing_object, pindex);
 2356: 		}
 2357: 
 2358: 		/*
 2359: 		 * give-up when a page is not in memory
 2360: 		 */
 2361: 		if (m == NULL)
 2362: 			break;
 2363: 
 2364: 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2365: 			(m->busy == 0) &&
 2366: 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2367: 
 2368: 			if ((m->queue - m->pc) == PQ_CACHE) {
 2369: 				vm_page_deactivate(m);
 2370: 			}
 2371: 			vm_page_busy(m);
 2372: 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 2373: 			vm_page_flag_set(m, PG_MAPPED);
 2374: 			vm_page_wakeup(m);
 2375: 		}
 2376: 	}
 2377: }
 2378: 
 2379: /*
 2380:  *	Routine:	pmap_change_wiring
 2381:  *	Function:	Change the wiring attribute for a map/virtual-address
 2382:  *			pair.
 2383:  *	In/out conditions:
 2384:  *			The mapping must already exist in the pmap.
 2385:  */
 2386: void
 2387: pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 2388: {
 2389: 	unsigned *pte;
 2390: 
 2391: 	if (pmap == NULL)
 2392: 		return;
 2393: 
 2394: 	pte = pmap_pte(pmap, va);
 2395: 
 2396: 	if (wired && !pmap_pte_w(pte))
 2397: 		pmap->pm_stats.wired_count++;
 2398: 	else if (!wired && pmap_pte_w(pte))
 2399: 		pmap->pm_stats.wired_count--;
 2400: 
 2401: 	/*
 2402: 	 * Wiring is not a hardware characteristic so there is no need to
 2403: 	 * invalidate TLB.  However, in an SMP environment we must use
 2404: 	 * a locked bus cycle to update the pte (if we are not using 
 2405: 	 * the pmap_inval_*() API that is)... it's ok to do this for simple
 2406: 	 * wiring changes.
 2407: 	 */
 2408: #ifdef SMP
 2409: 	if (wired)
 2410: 		atomic_set_int(pte, PG_W);
 2411: 	else
 2412: 		atomic_clear_int(pte, PG_W);
 2413: #else
 2414: 	if (wired)
 2415: 		atomic_set_int_nonlocked(pte, PG_W);
 2416: 	else
 2417: 		atomic_clear_int_nonlocked(pte, PG_W);
 2418: #endif
 2419: }
 2420: 
 2421: 
 2422: 
 2423: /*
 2424:  *	Copy the range specified by src_addr/len
 2425:  *	from the source map to the range dst_addr/len
 2426:  *	in the destination map.
 2427:  *
 2428:  *	This routine is only advisory and need not do anything.
 2429:  */
 2430: void
 2431: pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 
 2432: 	vm_size_t len, vm_offset_t src_addr)
 2433: {
 2434: 	pmap_inval_info info;
 2435: 	vm_offset_t addr;
 2436: 	vm_offset_t end_addr = src_addr + len;
 2437: 	vm_offset_t pdnxt;
 2438: 	unsigned src_frame, dst_frame;
 2439: 	vm_page_t m;
 2440: 
 2441: 	if (dst_addr != src_addr)
 2442: 		return;
 2443: 
 2444: 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 2445: 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 2446: 		return;
 2447: 	}
 2448: 
 2449: 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 2450: 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 2451: 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 2452: 		/* The page directory is not shared between CPUs */
 2453: 		cpu_invltlb();
 2454: 	}
 2455: 	pmap_inval_init(&info);
 2456: 	pmap_inval_add(&info, dst_pmap, -1);
 2457: 	pmap_inval_add(&info, src_pmap, -1);
 2458: 
 2459: 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 2460: 		unsigned *src_pte, *dst_pte;
 2461: 		vm_page_t dstmpte, srcmpte;
 2462: 		vm_offset_t srcptepaddr;
 2463: 		unsigned ptepindex;
 2464: 
 2465: 		if (addr >= UPT_MIN_ADDRESS)
 2466: 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 2467: 
 2468: 		/*
 2469: 		 * Don't let optional prefaulting of pages make us go
 2470: 		 * way below the low water mark of free pages or way
 2471: 		 * above high water mark of used pv entries.
 2472: 		 */
 2473: 		if (vmstats.v_free_count < vmstats.v_free_reserved ||
 2474: 		    pv_entry_count > pv_entry_high_water)
 2475: 			break;
 2476: 		
 2477: 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 2478: 		ptepindex = addr >> PDRSHIFT;
 2479: 
 2480: 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 2481: 		if (srcptepaddr == 0)
 2482: 			continue;
 2483: 			
 2484: 		if (srcptepaddr & PG_PS) {
 2485: 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2486: 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 2487: 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 2488: 			}
 2489: 			continue;
 2490: 		}
 2491: 
 2492: 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 2493: 		if ((srcmpte == NULL) ||
 2494: 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 2495: 			continue;
 2496: 
 2497: 		if (pdnxt > end_addr)
 2498: 			pdnxt = end_addr;
 2499: 
 2500: 		src_pte = (unsigned *) vtopte(addr);
 2501: 		dst_pte = (unsigned *) avtopte(addr);
 2502: 		while (addr < pdnxt) {
 2503: 			unsigned ptetemp;
 2504: 			ptetemp = *src_pte;
 2505: 			/*
 2506: 			 * we only virtual copy managed pages
 2507: 			 */
 2508: 			if ((ptetemp & PG_MANAGED) != 0) {
 2509: 				/*
 2510: 				 * We have to check after allocpte for the
 2511: 				 * pte still being around...  allocpte can
 2512: 				 * block.
 2513: 				 */
 2514: 				dstmpte = pmap_allocpte(dst_pmap, addr);
 2515: 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 2516: 					/*
 2517: 					 * Clear the modified and
 2518: 					 * accessed (referenced) bits
 2519: 					 * during the copy.
 2520: 					 */
 2521: 					m = PHYS_TO_VM_PAGE(ptetemp);
 2522: 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 2523: 					dst_pmap->pm_stats.resident_count++;
 2524: 					pmap_insert_entry(dst_pmap, addr,
 2525: 						dstmpte, m);
 2526: 	 			} else {
 2527: 					pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
 2528: 				}
 2529: 				if (dstmpte->hold_count >= srcmpte->hold_count)
 2530: 					break;
 2531: 			}
 2532: 			addr += PAGE_SIZE;
 2533: 			src_pte++;
 2534: 			dst_pte++;
 2535: 		}
 2536: 	}
 2537: 	pmap_inval_flush(&info);
 2538: }	
 2539: 
 2540: /*
 2541:  *	Routine:	pmap_kernel
 2542:  *	Function:
 2543:  *		Returns the physical map handle for the kernel.
 2544:  */
 2545: pmap_t
 2546: pmap_kernel(void)
 2547: {
 2548: 	return (kernel_pmap);
 2549: }
 2550: 
 2551: /*
 2552:  * pmap_zero_page:
 2553:  *
 2554:  *	Zero the specified PA by mapping the page into KVM and clearing its
 2555:  *	contents.
 2556:  *
 2557:  *	This function may be called from an interrupt and no locking is
 2558:  *	required.
 2559:  */
 2560: void
 2561: pmap_zero_page(vm_paddr_t phys)
 2562: {
 2563: 	struct mdglobaldata *gd = mdcpu;
 2564: 
 2565: 	crit_enter();
 2566: 	if (*(int *)gd->gd_CMAP3)
 2567: 		panic("pmap_zero_page: CMAP3 busy");
 2568: 	*(int *)gd->gd_CMAP3 =
 2569: 		    PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2570: 	cpu_invlpg(gd->gd_CADDR3);
 2571: 
 2572: #if defined(I686_CPU)
 2573: 	if (cpu_class == CPUCLASS_686)
 2574: 		i686_pagezero(gd->gd_CADDR3);
 2575: 	else
 2576: #endif
 2577: 		bzero(gd->gd_CADDR3, PAGE_SIZE);
 2578: 	*(int *) gd->gd_CMAP3 = 0;
 2579: 	crit_exit();
 2580: }
 2581: 
 2582: /*
 2583:  * pmap_zero_page:
 2584:  *
 2585:  *	Zero part of a physical page by mapping it into memory and clearing
 2586:  *	its contents with bzero.
 2587:  *
 2588:  *	off and size may not cover an area beyond a single hardware page.
 2589:  */
 2590: void
 2591: pmap_zero_page_area(vm_paddr_t phys, int off, int size)
 2592: {
 2593: 	struct mdglobaldata *gd = mdcpu;
 2594: 
 2595: 	crit_enter();
 2596: 	if (*(int *) gd->gd_CMAP3)
 2597: 		panic("pmap_zero_page: CMAP3 busy");
 2598: 	*(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2599: 	cpu_invlpg(gd->gd_CADDR3);
 2600: 
 2601: #if defined(I686_CPU)
 2602: 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 2603: 		i686_pagezero(gd->gd_CADDR3);
 2604: 	else
 2605: #endif
 2606: 		bzero((char *)gd->gd_CADDR3 + off, size);
 2607: 	*(int *) gd->gd_CMAP3 = 0;
 2608: 	crit_exit();
 2609: }
 2610: 
 2611: /*
 2612:  * pmap_copy_page:
 2613:  *
 2614:  *	Copy the physical page from the source PA to the target PA.
 2615:  *	This function may be called from an interrupt.  No locking
 2616:  *	is required.
 2617:  */
 2618: void
 2619: pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
 2620: {
 2621: 	struct mdglobaldata *gd = mdcpu;
 2622: 
 2623: 	crit_enter();
 2624: 	if (*(int *) gd->gd_CMAP1)
 2625: 		panic("pmap_copy_page: CMAP1 busy");
 2626: 	if (*(int *) gd->gd_CMAP2)
 2627: 		panic("pmap_copy_page: CMAP2 busy");
 2628: 
 2629: 	*(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 2630: 	*(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 2631: 
 2632: 	cpu_invlpg(gd->gd_CADDR1);
 2633: 	cpu_invlpg(gd->gd_CADDR2);
 2634: 
 2635: 	bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
 2636: 
 2637: 	*(int *) gd->gd_CMAP1 = 0;
 2638: 	*(int *) gd->gd_CMAP2 = 0;
 2639: 	crit_exit();
 2640: }
 2641: 
 2642: /*
 2643:  * pmap_copy_page_frag:
 2644:  *
 2645:  *	Copy the physical page from the source PA to the target PA.
 2646:  *	This function may be called from an interrupt.  No locking
 2647:  *	is required.
 2648:  */
 2649: void
 2650: pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
 2651: {
 2652: 	struct mdglobaldata *gd = mdcpu;
 2653: 
 2654: 	crit_enter();
 2655: 	if (*(int *) gd->gd_CMAP1)
 2656: 		panic("pmap_copy_page: CMAP1 busy");
 2657: 	if (*(int *) gd->gd_CMAP2)
 2658: 		panic("pmap_copy_page: CMAP2 busy");
 2659: 
 2660: 	*(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 2661: 	*(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 2662: 
 2663: 	cpu_invlpg(gd->gd_CADDR1);
 2664: 	cpu_invlpg(gd->gd_CADDR2);
 2665: 
 2666: 	bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
 2667: 	      (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
 2668: 	      bytes);
 2669: 
 2670: 	*(int *) gd->gd_CMAP1 = 0;
 2671: 	*(int *) gd->gd_CMAP2 = 0;
 2672: 	crit_exit();
 2673: }
 2674: 
 2675: 
 2676: /*
 2677:  *	Routine:	pmap_pageable
 2678:  *	Function:
 2679:  *		Make the specified pages (by pmap, offset)
 2680:  *		pageable (or not) as requested.
 2681:  *
 2682:  *		A page which is not pageable may not take
 2683:  *		a fault; therefore, its page table entry
 2684:  *		must remain valid for the duration.
 2685:  *
 2686:  *		This routine is merely advisory; pmap_enter
 2687:  *		will specify that these pages are to be wired
 2688:  *		down (or not) as appropriate.
 2689:  */
 2690: void
 2691: pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, boolean_t pageable)
 2692: {
 2693: }
 2694: 
 2695: /*
 2696:  * Returns true if the pmap's pv is one of the first
 2697:  * 16 pvs linked to from this page.  This count may
 2698:  * be changed upwards or downwards in the future; it
 2699:  * is only necessary that true be returned for a small
 2700:  * subset of pmaps for proper page aging.
 2701:  */
 2702: boolean_t
 2703: pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 2704: {
 2705: 	pv_entry_t pv;
 2706: 	int loops = 0;
 2707: 	int s;
 2708: 
 2709: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2710: 		return FALSE;
 2711: 
 2712: 	s = splvm();
 2713: 
 2714: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2715: 		if (pv->pv_pmap == pmap) {
 2716: 			splx(s);
 2717: 			return TRUE;
 2718: 		}
 2719: 		loops++;
 2720: 		if (loops >= 16)
 2721: 			break;
 2722: 	}
 2723: 	splx(s);
 2724: 	return (FALSE);
 2725: }
 2726: 
 2727: #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2728: /*
 2729:  * Remove all pages from specified address space
 2730:  * this aids process exit speeds.  Also, this code
 2731:  * is special cased for current process only, but
 2732:  * can have the more generic (and slightly slower)
 2733:  * mode enabled.  This is much faster than pmap_remove
 2734:  * in the case of running down an entire address space.
 2735:  */
 2736: void
 2737: pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2738: {
 2739: 	unsigned *pte, tpte;
 2740: 	pv_entry_t pv, npv;
 2741: 	int s;
 2742: 	vm_page_t m;
 2743: 	pmap_inval_info info;
 2744: 
 2745: #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2746: 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 2747: 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 2748: 		return;
 2749: 	}
 2750: #endif
 2751: 
 2752: 	pmap_inval_init(&info);
 2753: 	s = splvm();
 2754: 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 2755: 		pv;
 2756: 		pv = npv) {
 2757: 
 2758: 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 2759: 			npv = TAILQ_NEXT(pv, pv_plist);
 2760: 			continue;
 2761: 		}
 2762: 
 2763: #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2764: 		pte = (unsigned *)vtopte(pv->pv_va);
 2765: #else
 2766: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2767: #endif
 2768: 		pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 2769: 		tpte = *pte;
 2770: 
 2771: /*
 2772:  * We cannot remove wired pages from a process' mapping at this time
 2773:  */
 2774: 		if (tpte & PG_W) {
 2775: 			npv = TAILQ_NEXT(pv, pv_plist);
 2776: 			continue;
 2777: 		}
 2778: 		*pte = 0;
 2779: 
 2780: 		m = PHYS_TO_VM_PAGE(tpte);
 2781: 
 2782: 		KASSERT(m < &vm_page_array[vm_page_array_size],
 2783: 			("pmap_remove_pages: bad tpte %x", tpte));
 2784: 
 2785: 		pv->pv_pmap->pm_stats.resident_count--;
 2786: 
 2787: 		/*
 2788: 		 * Update the vm_page_t clean and reference bits.
 2789: 		 */
 2790: 		if (tpte & PG_M) {
 2791: 			vm_page_dirty(m);
 2792: 		}
 2793: 
 2794: 
 2795: 		npv = TAILQ_NEXT(pv, pv_plist);
 2796: 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 2797: 
 2798: 		m->md.pv_list_count--;
 2799: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2800: 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 2801: 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 2802: 		}
 2803: 
 2804: 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
 2805: 		free_pv_entry(pv);
 2806: 	}
 2807: 	pmap_inval_flush(&info);
 2808: 	splx(s);
 2809: }
 2810: 
 2811: /*
 2812:  * pmap_testbit tests bits in pte's
 2813:  * note that the testbit/changebit routines are inline,
 2814:  * and a lot of things compile-time evaluate.
 2815:  */
 2816: static boolean_t
 2817: pmap_testbit(vm_page_t m, int bit)
 2818: {
 2819: 	pv_entry_t pv;
 2820: 	unsigned *pte;
 2821: 	int s;
 2822: 
 2823: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2824: 		return FALSE;
 2825: 
 2826: 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 2827: 		return FALSE;
 2828: 
 2829: 	s = splvm();
 2830: 
 2831: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2832: 		/*
 2833: 		 * if the bit being tested is the modified bit, then
 2834: 		 * mark clean_map and ptes as never
 2835: 		 * modified.
 2836: 		 */
 2837: 		if (bit & (PG_A|PG_M)) {
 2838: 			if (!pmap_track_modified(pv->pv_va))
 2839: 				continue;
 2840: 		}
 2841: 
 2842: #if defined(PMAP_DIAGNOSTIC)
 2843: 		if (!pv->pv_pmap) {
 2844: 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2845: 			continue;
 2846: 		}
 2847: #endif
 2848: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2849: 		if (*pte & bit) {
 2850: 			splx(s);
 2851: 			return TRUE;
 2852: 		}
 2853: 	}
 2854: 	splx(s);
 2855: 	return (FALSE);
 2856: }
 2857: 
 2858: /*
 2859:  * this routine is used to modify bits in ptes
 2860:  */
 2861: static __inline void
 2862: pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 2863: {
 2864: 	struct pmap_inval_info info;
 2865: 	pv_entry_t pv;
 2866: 	unsigned *pte;
 2867: 	int s;
 2868: 
 2869: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2870: 		return;
 2871: 
 2872: 	pmap_inval_init(&info);
 2873: 	s = splvm();
 2874: 
 2875: 	/*
 2876: 	 * Loop over all current mappings setting/clearing as appropos If
 2877: 	 * setting RO do we need to clear the VAC?
 2878: 	 */
 2879: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2880: 		/*
 2881: 		 * don't write protect pager mappings
 2882: 		 */
 2883: 		if (!setem && (bit == PG_RW)) {
 2884: 			if (!pmap_track_modified(pv->pv_va))
 2885: 				continue;
 2886: 		}
 2887: 
 2888: #if defined(PMAP_DIAGNOSTIC)
 2889: 		if (!pv->pv_pmap) {
 2890: 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2891: 			continue;
 2892: 		}
 2893: #endif
 2894: 
 2895: 		/*
 2896: 		 * Careful here.  We can use a locked bus instruction to
 2897: 		 * clear PG_A or PG_M safely but we need to synchronize
 2898: 		 * with the target cpus when we mess with PG_RW.
 2899: 		 */
 2900: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2901: 		if (bit == PG_RW)
 2902: 			pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 2903: 
 2904: 		if (setem) {
 2905: #ifdef SMP
 2906: 			atomic_set_int(pte, bit);
 2907: #else
 2908: 			atomic_set_int_nonlocked(pte, bit);
 2909: #endif
 2910: 		} else {
 2911: 			vm_offset_t pbits = *(vm_offset_t *)pte;
 2912: 			if (pbits & bit) {
 2913: 				if (bit == PG_RW) {
 2914: 					if (pbits & PG_M) {
 2915: 						vm_page_dirty(m);
 2916: 					}
 2917: #ifdef SMP
 2918: 					atomic_clear_int(pte, PG_M|PG_RW);
 2919: #else
 2920: 					atomic_clear_int_nonlocked(pte, PG_M|PG_RW);
 2921: #endif
 2922: 				} else {
 2923: #ifdef SMP
 2924: 					atomic_clear_int(pte, bit);
 2925: #else
 2926: 					atomic_clear_int_nonlocked(pte, bit);
 2927: #endif
 2928: 				}
 2929: 			}
 2930: 		}
 2931: 	}
 2932: 	pmap_inval_flush(&info);
 2933: 	splx(s);
 2934: }
 2935: 
 2936: /*
 2937:  *      pmap_page_protect:
 2938:  *
 2939:  *      Lower the permission for all mappings to a given page.
 2940:  */
 2941: void
 2942: pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2943: {
 2944: 	if ((prot & VM_PROT_WRITE) == 0) {
 2945: 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2946: 			pmap_changebit(m, PG_RW, FALSE);
 2947: 		} else {
 2948: 			pmap_remove_all(m);
 2949: 		}
 2950: 	}
 2951: }
 2952: 
 2953: vm_paddr_t
 2954: pmap_phys_address(int ppn)
 2955: {
 2956: 	return (i386_ptob(ppn));
 2957: }
 2958: 
 2959: /*
 2960:  *	pmap_ts_referenced:
 2961:  *
 2962:  *	Return a count of reference bits for a page, clearing those bits.
 2963:  *	It is not necessary for every reference bit to be cleared, but it
 2964:  *	is necessary that 0 only be returned when there are truly no
 2965:  *	reference bits set.
 2966:  *
 2967:  *	XXX: The exact number of bits to check and clear is a matter that
 2968:  *	should be tested and standardized at some point in the future for
 2969:  *	optimal aging of shared pages.
 2970:  */
 2971: int
 2972: pmap_ts_referenced(vm_page_t m)
 2973: {
 2974: 	pv_entry_t pv, pvf, pvn;
 2975: 	unsigned *pte;
 2976: 	int s;
 2977: 	int rtval = 0;
 2978: 
 2979: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2980: 		return (rtval);
 2981: 
 2982: 	s = splvm();
 2983: 
 2984: 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2985: 
 2986: 		pvf = pv;
 2987: 
 2988: 		do {
 2989: 			pvn = TAILQ_NEXT(pv, pv_list);
 2990: 
 2991: 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2992: 
 2993: 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2994: 
 2995: 			if (!pmap_track_modified(pv->pv_va))
 2996: 				continue;
 2997: 
 2998: 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2999: 
 3000: 			if (pte && (*pte & PG_A)) {
 3001: #ifdef SMP
 3002: 				atomic_clear_int(pte, PG_A);
 3003: #else
 3004: 				atomic_clear_int_nonlocked(pte, PG_A);
 3005: #endif
 3006: 				rtval++;
 3007: 				if (rtval > 4) {
 3008: 					break;
 3009: 				}
 3010: 			}
 3011: 		} while ((pv = pvn) != NULL && pv != pvf);
 3012: 	}
 3013: 	splx(s);
 3014: 
 3015: 	return (rtval);
 3016: }
 3017: 
 3018: /*
 3019:  *	pmap_is_modified:
 3020:  *
 3021:  *	Return whether or not the specified physical page was modified
 3022:  *	in any physical maps.
 3023:  */
 3024: boolean_t
 3025: pmap_is_modified(vm_page_t m)
 3026: {
 3027: 	return pmap_testbit(m, PG_M);
 3028: }
 3029: 
 3030: /*
 3031:  *	Clear the modify bits on the specified physical page.
 3032:  */
 3033: void
 3034: pmap_clear_modify(vm_page_t m)
 3035: {
 3036: 	pmap_changebit(m, PG_M, FALSE);
 3037: }
 3038: 
 3039: /*
 3040:  *	pmap_clear_reference:
 3041:  *
 3042:  *	Clear the reference bit on the specified physical page.
 3043:  */
 3044: void
 3045: pmap_clear_reference(vm_page_t m)
 3046: {
 3047: 	pmap_changebit(m, PG_A, FALSE);
 3048: }
 3049: 
 3050: /*
 3051:  * Miscellaneous support routines follow
 3052:  */
 3053: 
 3054: static void
 3055: i386_protection_init(void)
 3056: {
 3057: 	int *kp, prot;
 3058: 
 3059: 	kp = protection_codes;
 3060: 	for (prot = 0; prot < 8; prot++) {
 3061: 		switch (prot) {
 3062: 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 3063: 			/*
 3064: 			 * Read access is also 0. There isn't any execute bit,
 3065: 			 * so just make it readable.
 3066: 			 */
 3067: 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 3068: 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 3069: 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 3070: 			*kp++ = 0;
 3071: 			break;
 3072: 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 3073: 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3074: 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 3075: 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3076: 			*kp++ = PG_RW;
 3077: 			break;
 3078: 		}
 3079: 	}
 3080: }
 3081: 
 3082: /*
 3083:  * Map a set of physical memory pages into the kernel virtual
 3084:  * address space. Return a pointer to where it is mapped. This
 3085:  * routine is intended to be used for mapping device memory,
 3086:  * NOT real memory.
 3087:  *
 3088:  * NOTE: we can't use pgeflag unless we invalidate the pages one at
 3089:  * a time.
 3090:  */
 3091: void *
 3092: pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 3093: {
 3094: 	vm_offset_t va, tmpva, offset;
 3095: 	unsigned *pte;
 3096: 
 3097: 	offset = pa & PAGE_MASK;
 3098: 	size = roundup(offset + size, PAGE_SIZE);
 3099: 
 3100: 	va = kmem_alloc_pageable(kernel_map, size);
 3101: 	if (!va)
 3102: 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 3103: 
 3104: 	pa = pa & PG_FRAME;
 3105: 	for (tmpva = va; size > 0;) {
 3106: 		pte = (unsigned *)vtopte(tmpva);
 3107: 		*pte = pa | PG_RW | PG_V; /* | pgeflag; */
 3108: 		size -= PAGE_SIZE;
 3109: 		tmpva += PAGE_SIZE;
 3110: 		pa += PAGE_SIZE;
 3111: 	}
 3112: 	cpu_invltlb();
 3113: 	smp_invltlb();
 3114: 
 3115: 	return ((void *)(va + offset));
 3116: }
 3117: 
 3118: void
 3119: pmap_unmapdev(vm_offset_t va, vm_size_t size)
 3120: {
 3121: 	vm_offset_t base, offset;
 3122: 
 3123: 	base = va & PG_FRAME;
 3124: 	offset = va & PAGE_MASK;
 3125: 	size = roundup(offset + size, PAGE_SIZE);
 3126: 	kmem_free(kernel_map, base, size);
 3127: }
 3128: 
 3129: /*
 3130:  * perform the pmap work for mincore
 3131:  */
 3132: int
 3133: pmap_mincore(pmap_t pmap, vm_offset_t addr)
 3134: {
 3135: 	unsigned *ptep, pte;
 3136: 	vm_page_t m;
 3137: 	int val = 0;
 3138: 	
 3139: 	ptep = pmap_pte(pmap, addr);
 3140: 	if (ptep == 0) {
 3141: 		return 0;
 3142: 	}
 3143: 
 3144: 	if ((pte = *ptep) != 0) {
 3145: 		vm_offset_t pa;
 3146: 
 3147: 		val = MINCORE_INCORE;
 3148: 		if ((pte & PG_MANAGED) == 0)
 3149: 			return val;
 3150: 
 3151: 		pa = pte & PG_FRAME;
 3152: 
 3153: 		m = PHYS_TO_VM_PAGE(pa);
 3154: 
 3155: 		/*
 3156: 		 * Modified by us
 3157: 		 */
 3158: 		if (pte & PG_M)
 3159: 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 3160: 		/*
 3161: 		 * Modified by someone
 3162: 		 */
 3163: 		else if (m->dirty || pmap_is_modified(m))
 3164: 			val |= MINCORE_MODIFIED_OTHER;
 3165: 		/*
 3166: 		 * Referenced by us
 3167: 		 */
 3168: 		if (pte & PG_A)
 3169: 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 3170: 
 3171: 		/*
 3172: 		 * Referenced by someone
 3173: 		 */
 3174: 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 3175: 			val |= MINCORE_REFERENCED_OTHER;
 3176: 			vm_page_flag_set(m, PG_REFERENCED);
 3177: 		}
 3178: 	} 
 3179: 	return val;
 3180: }
 3181: 
 3182: void
 3183: pmap_activate(struct proc *p)
 3184: {
 3185: 	pmap_t	pmap;
 3186: 
 3187: 	pmap = vmspace_pmap(p->p_vmspace);
 3188: #if defined(SMP)
 3189: 	atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid);
 3190: #else
 3191: 	pmap->pm_active |= 1;
 3192: #endif
 3193: #if defined(SWTCH_OPTIM_STATS)
 3194: 	tlb_flush_count++;
 3195: #endif
 3196: 	p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir);
 3197: 	load_cr3(p->p_thread->td_pcb->pcb_cr3);
 3198: }
 3199: 
 3200: vm_offset_t
 3201: pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3202: {
 3203: 
 3204: 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3205: 		return addr;
 3206: 	}
 3207: 
 3208: 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 3209: 	return addr;
 3210: }
 3211: 
 3212: 
 3213: #if defined(PMAP_DEBUG)
 3214: int
 3215: pmap_pid_dump(int pid)
 3216: {
 3217: 	pmap_t pmap;
 3218: 	struct proc *p;
 3219: 	int npte = 0;
 3220: 	int index;
 3221: 	FOREACH_PROC_IN_SYSTEM(p) {
 3222: 		if (p->p_pid != pid)
 3223: 			continue;
 3224: 
 3225: 		if (p->p_vmspace) {
 3226: 			int i,j;
 3227: 			index = 0;
 3228: 			pmap = vmspace_pmap(p->p_vmspace);
 3229: 			for(i=0;i<1024;i++) {
 3230: 				pd_entry_t *pde;
 3231: 				unsigned *pte;
 3232: 				unsigned base = i << PDRSHIFT;
 3233: 				
 3234: 				pde = &pmap->pm_pdir[i];
 3235: 				if (pde && pmap_pde_v(pde)) {
 3236: 					for(j=0;j<1024;j++) {
 3237: 						unsigned va = base + (j << PAGE_SHIFT);
 3238: 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3239: 							if (index) {
 3240: 								index = 0;
 3241: 								printf("\n");
 3242: 							}
 3243: 							return npte;
 3244: 						}
 3245: 						pte = pmap_pte_quick( pmap, va);
 3246: 						if (pte && pmap_pte_v(pte)) {
 3247: 							vm_offset_t pa;
 3248: 							vm_page_t m;
 3249: 							pa = *(int *)pte;
 3250: 							m = PHYS_TO_VM_PAGE(pa);
 3251: 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3252: 								va, pa, m->hold_count, m->wire_count, m->flags);
 3253: 							npte++;
 3254: 							index++;
 3255: 							if (index >= 2) {
 3256: 								index = 0;
 3257: 								printf("\n");
 3258: 							} else {
 3259: 								printf(" ");
 3260: 							}
 3261: 						}
 3262: 					}
 3263: 				}
 3264: 			}
 3265: 		}
 3266: 	}
 3267: 	return npte;
 3268: }
 3269: #endif
 3270: 
 3271: #if defined(DEBUG)
 3272: 
 3273: static void	pads (pmap_t pm);
 3274: void		pmap_pvdump (vm_paddr_t pa);
 3275: 
 3276: /* print address space of pmap*/
 3277: static void
 3278: pads(pmap_t pm)
 3279: {
 3280: 	unsigned va, i, j;
 3281: 	unsigned *ptep;
 3282: 
 3283: 	if (pm == kernel_pmap)
 3284: 		return;
 3285: 	for (i = 0; i < 1024; i++)
 3286: 		if (pm->pm_pdir[i])
 3287: 			for (j = 0; j < 1024; j++) {
 3288: 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3289: 				if (pm == kernel_pmap && va < KERNBASE)
 3290: 					continue;
 3291: 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3292: 					continue;
 3293: 				ptep = pmap_pte_quick(pm, va);
 3294: 				if (pmap_pte_v(ptep))
 3295: 					printf("%x:%x ", va, *(int *) ptep);
 3296: 			};
 3297: 
 3298: }
 3299: 
 3300: void
 3301: pmap_pvdump(vm_paddr_t pa)
 3302: {
 3303: 	pv_entry_t pv;
 3304: 	vm_page_t m;
 3305: 
 3306: 	printf("pa %08llx", (long long)pa);
 3307: 	m = PHYS_TO_VM_PAGE(pa);
 3308: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3309: #ifdef used_to_be
 3310: 		printf(" -> pmap %p, va %x, flags %x",
 3311: 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 3312: #endif
 3313: 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3314: 		pads(pv->pv_pmap);
 3315: 	}
 3316: 	printf(" ");
 3317: }
 3318: #endif