File:  [DragonFly] / src / sys / i386 / i386 / Attic / pmap.c
Revision 1.33: download - view: text, annotated - select for diffs
Thu Apr 1 17:58:00 2004 UTC (10 years ago) by dillon
Branches: MAIN
CVS tags: HEAD
Enhance the pmap_kenter*() API and friends, separating out entries which
only need invalidation on the local cpu against entries which need invalidation
across the entire system, and provide a synchronization abstraction.

Enhance sf_buf_alloc() and friends to allow the caller to specify whether the
sf_buf's kernel mapping is going to be used on just the current cpu or
whether it needs to be valid across all cpus.  This is done by maintaining
a cpumask of known-synchronized cpus in the struct sf_buf

Optimize sf_buf_alloc() and friends by removing both TAILQ operations in the
critical path.  TAILQ operations to remove the sf_buf from the free queue
are now done in a lazy fashion.  Most sf_buf operations allocate a buf,
work on it, and free it, so why waste time moving the sf_buf off the freelist
if we are only going to move back onto the free list a microsecond later?

Fix a bug in sf_buf_alloc() code as it was being used by the PIPE code.
sf_buf_alloc() was unconditionally using PCATCH in its tsleep() call, which
is only correct when called from the sendfile() interface.

Optimize the PIPE code to require only local cpu_invlpg()'s when mapping
sf_buf's, greatly reducing the number of IPIs required.  On a DELL-2550,
a pipe test which explicitly blows out the sf_buf caching by using huge
buffers improves from 350 to 550 MBytes/sec.  However, note that buildworld
times were not found to have changed.

Replace the PIPE code's custom 'struct pipemapping' structure with a
struct xio and use the XIO API functions rather then its own.

    1: /*
    2:  * Copyright (c) 1991 Regents of the University of California.
    3:  * All rights reserved.
    4:  * Copyright (c) 1994 John S. Dyson
    5:  * All rights reserved.
    6:  * Copyright (c) 1994 David Greenman
    7:  * All rights reserved.
    8:  *
    9:  * This code is derived from software contributed to Berkeley by
   10:  * the Systems Programming Group of the University of Utah Computer
   11:  * Science Department and William Jolitz of UUNET Technologies Inc.
   12:  *
   13:  * Redistribution and use in source and binary forms, with or without
   14:  * modification, are permitted provided that the following conditions
   15:  * are met:
   16:  * 1. Redistributions of source code must retain the above copyright
   17:  *    notice, this list of conditions and the following disclaimer.
   18:  * 2. Redistributions in binary form must reproduce the above copyright
   19:  *    notice, this list of conditions and the following disclaimer in the
   20:  *    documentation and/or other materials provided with the distribution.
   21:  * 3. All advertising materials mentioning features or use of this software
   22:  *    must display the following acknowledgement:
   23:  *	This product includes software developed by the University of
   24:  *	California, Berkeley and its contributors.
   25:  * 4. Neither the name of the University nor the names of its contributors
   26:  *    may be used to endorse or promote products derived from this software
   27:  *    without specific prior written permission.
   28:  *
   29:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39:  * SUCH DAMAGE.
   40:  *
   41:  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
   42:  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
   43:  * $DragonFly: src/sys/i386/i386/pmap.c,v 1.33 2004/04/01 17:58:00 dillon Exp $
   44:  */
   45: 
   46: /*
   47:  *	Manages physical address maps.
   48:  *
   49:  *	In addition to hardware address maps, this
   50:  *	module is called upon to provide software-use-only
   51:  *	maps which may or may not be stored in the same
   52:  *	form as hardware maps.  These pseudo-maps are
   53:  *	used to store intermediate results from copy
   54:  *	operations to and from address spaces.
   55:  *
   56:  *	Since the information managed by this module is
   57:  *	also stored by the logical address mapping module,
   58:  *	this module may throw away valid virtual-to-physical
   59:  *	mappings at almost any time.  However, invalidations
   60:  *	of virtual-to-physical mappings must be done as
   61:  *	requested.
   62:  *
   63:  *	In order to cope with hardware architectures which
   64:  *	make virtual-to-physical map invalidates expensive,
   65:  *	this module may delay invalidate or reduced protection
   66:  *	operations until such time as they are actually
   67:  *	necessary.  This module is given full information as
   68:  *	to which processors are currently using which maps,
   69:  *	and to when physical maps must be made correct.
   70:  */
   71: 
   72: #include "opt_disable_pse.h"
   73: #include "opt_pmap.h"
   74: #include "opt_msgbuf.h"
   75: 
   76: #include <sys/param.h>
   77: #include <sys/systm.h>
   78: #include <sys/kernel.h>
   79: #include <sys/proc.h>
   80: #include <sys/msgbuf.h>
   81: #include <sys/vmmeter.h>
   82: #include <sys/mman.h>
   83: 
   84: #include <vm/vm.h>
   85: #include <vm/vm_param.h>
   86: #include <sys/sysctl.h>
   87: #include <sys/lock.h>
   88: #include <vm/vm_kern.h>
   89: #include <vm/vm_page.h>
   90: #include <vm/vm_map.h>
   91: #include <vm/vm_object.h>
   92: #include <vm/vm_extern.h>
   93: #include <vm/vm_pageout.h>
   94: #include <vm/vm_pager.h>
   95: #include <vm/vm_zone.h>
   96: 
   97: #include <sys/user.h>
   98: #include <sys/thread2.h>
   99: 
  100: #include <machine/cputypes.h>
  101: #include <machine/md_var.h>
  102: #include <machine/specialreg.h>
  103: #if defined(SMP) || defined(APIC_IO)
  104: #include <machine/smp.h>
  105: #include <machine/apicreg.h>
  106: #endif /* SMP || APIC_IO */
  107: #include <machine/globaldata.h>
  108: #include <machine/pmap.h>
  109: #include <machine/pmap_inval.h>
  110: 
  111: #define PMAP_KEEP_PDIRS
  112: #ifndef PMAP_SHPGPERPROC
  113: #define PMAP_SHPGPERPROC 200
  114: #endif
  115: 
  116: #if defined(DIAGNOSTIC)
  117: #define PMAP_DIAGNOSTIC
  118: #endif
  119: 
  120: #define MINPV 2048
  121: 
  122: #if !defined(PMAP_DIAGNOSTIC)
  123: #define PMAP_INLINE __inline
  124: #else
  125: #define PMAP_INLINE
  126: #endif
  127: 
  128: /*
  129:  * Get PDEs and PTEs for user/kernel address space
  130:  */
  131: #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  132: #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  133: 
  134: #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
  135: #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
  136: #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
  137: #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
  138: #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
  139: 
  140: 
  141: /*
  142:  * Given a map and a machine independent protection code,
  143:  * convert to a vax protection code.
  144:  */
  145: #define pte_prot(m, p)	(protection_codes[p])
  146: static int protection_codes[8];
  147: 
  148: static struct pmap kernel_pmap_store;
  149: pmap_t kernel_pmap;
  150: 
  151: vm_paddr_t avail_start;	/* PA of first available physical page */
  152: vm_paddr_t avail_end;		/* PA of last available physical page */
  153: vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
  154: vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
  155: static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
  156: static int pgeflag;		/* PG_G or-in */
  157: static int pseflag;		/* PG_PS or-in */
  158: 
  159: static vm_object_t kptobj;
  160: 
  161: static int nkpt;
  162: vm_offset_t kernel_vm_end;
  163: 
  164: /*
  165:  * Data for the pv entry allocation mechanism
  166:  */
  167: static vm_zone_t pvzone;
  168: static struct vm_zone pvzone_store;
  169: static struct vm_object pvzone_obj;
  170: static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
  171: static int pmap_pagedaemon_waken = 0;
  172: static struct pv_entry *pvinit;
  173: 
  174: /*
  175:  * All those kernel PT submaps that BSD is so fond of
  176:  */
  177: pt_entry_t *CMAP1 = 0, *ptmmap;
  178: caddr_t CADDR1 = 0, ptvmmap = 0;
  179: static pt_entry_t *msgbufmap;
  180: struct msgbuf *msgbufp=0;
  181: 
  182: /*
  183:  * Crashdump maps.
  184:  */
  185: static pt_entry_t *pt_crashdumpmap;
  186: static caddr_t crashdumpmap;
  187: 
  188: extern pt_entry_t *SMPpt;
  189: 
  190: static PMAP_INLINE void	free_pv_entry (pv_entry_t pv);
  191: static unsigned * get_ptbase (pmap_t pmap);
  192: static pv_entry_t get_pv_entry (void);
  193: static void	i386_protection_init (void);
  194: static __inline void	pmap_changebit (vm_page_t m, int bit, boolean_t setem);
  195: 
  196: static void	pmap_remove_all (vm_page_t m);
  197: static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va,
  198: 				      vm_page_t m, vm_page_t mpte);
  199: static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, 
  200: 				vm_offset_t sva, pmap_inval_info_t info);
  201: static void pmap_remove_page (struct pmap *pmap, 
  202: 				vm_offset_t va, pmap_inval_info_t info);
  203: static int pmap_remove_entry (struct pmap *pmap, vm_page_t m,
  204: 				vm_offset_t va, pmap_inval_info_t info);
  205: static boolean_t pmap_testbit (vm_page_t m, int bit);
  206: static void pmap_insert_entry (pmap_t pmap, vm_offset_t va,
  207: 		vm_page_t mpte, vm_page_t m);
  208: 
  209: static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va);
  210: 
  211: static int pmap_release_free_page (pmap_t pmap, vm_page_t p);
  212: static vm_page_t _pmap_allocpte (pmap_t pmap, unsigned ptepindex);
  213: static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va);
  214: static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex);
  215: static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t);
  216: static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  217: 
  218: static unsigned pdir4mb;
  219: 
  220: /*
  221:  * Move the kernel virtual free pointer to the next
  222:  * 4MB.  This is used to help improve performance
  223:  * by using a large (4MB) page for much of the kernel
  224:  * (.text, .data, .bss)
  225:  */
  226: static vm_offset_t
  227: pmap_kmem_choose(vm_offset_t addr)
  228: {
  229: 	vm_offset_t newaddr = addr;
  230: #ifndef DISABLE_PSE
  231: 	if (cpu_feature & CPUID_PSE) {
  232: 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
  233: 	}
  234: #endif
  235: 	return newaddr;
  236: }
  237: 
  238: /*
  239:  * pmap_pte:
  240:  *
  241:  *	Extract the page table entry associated with the given map/virtual
  242:  *	pair.
  243:  *
  244:  *	This function may NOT be called from an interrupt.
  245:  */
  246: PMAP_INLINE unsigned *
  247: pmap_pte(pmap_t pmap, vm_offset_t va)
  248: {
  249: 	unsigned *pdeaddr;
  250: 
  251: 	if (pmap) {
  252: 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
  253: 		if (*pdeaddr & PG_PS)
  254: 			return pdeaddr;
  255: 		if (*pdeaddr) {
  256: 			return get_ptbase(pmap) + i386_btop(va);
  257: 		}
  258: 	}
  259: 	return (0);
  260: }
  261: 
  262: /*
  263:  * pmap_pte_quick:
  264:  *
  265:  *	Super fast pmap_pte routine best used when scanning the pv lists.
  266:  *	This eliminates many course-grained invltlb calls.  Note that many of
  267:  *	the pv list scans are across different pmaps and it is very wasteful
  268:  *	to do an entire invltlb when checking a single mapping.
  269:  *
  270:  *	Should only be called while splvm() is held or from a critical
  271:  *	section.
  272:  */
  273: static unsigned * 
  274: pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  275: {
  276: 	struct mdglobaldata *gd = mdcpu;
  277: 	unsigned pde, newpf;
  278: 
  279: 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
  280: 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  281: 		unsigned index = i386_btop(va);
  282: 		/* are we current address space or kernel? */
  283: 		if ((pmap == kernel_pmap) ||
  284: 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
  285: 			return (unsigned *) PTmap + index;
  286: 		}
  287: 		newpf = pde & PG_FRAME;
  288: 		if ( ((* (unsigned *) gd->gd_PMAP1) & PG_FRAME) != newpf) {
  289: 			* (unsigned *) gd->gd_PMAP1 = newpf | PG_RW | PG_V;
  290: 			cpu_invlpg(gd->gd_PADDR1);
  291: 		}
  292: 		return gd->gd_PADDR1 + ((unsigned) index & (NPTEPG - 1));
  293: 	}
  294: 	return (0);
  295: }
  296: 
  297: 
  298: /*
  299:  *	Bootstrap the system enough to run with virtual memory.
  300:  *
  301:  *	On the i386 this is called after mapping has already been enabled
  302:  *	and just syncs the pmap module with what has already been done.
  303:  *	[We can't call it easily with mapping off since the kernel is not
  304:  *	mapped with PA == VA, hence we would have to relocate every address
  305:  *	from the linked base (virtual) address "KERNBASE" to the actual
  306:  *	(physical) address starting relative to 0]
  307:  */
  308: void
  309: pmap_bootstrap(firstaddr, loadaddr)
  310: 	vm_paddr_t firstaddr;
  311: 	vm_paddr_t loadaddr;
  312: {
  313: 	vm_offset_t va;
  314: 	pt_entry_t *pte;
  315: 	struct mdglobaldata *gd;
  316: 	int i;
  317: 
  318: 	avail_start = firstaddr;
  319: 
  320: 	/*
  321: 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  322: 	 * large. It should instead be correctly calculated in locore.s and
  323: 	 * not based on 'first' (which is a physical address, not a virtual
  324: 	 * address, for the start of unused physical memory). The kernel
  325: 	 * page tables are NOT double mapped and thus should not be included
  326: 	 * in this calculation.
  327: 	 */
  328: 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  329: 	virtual_avail = pmap_kmem_choose(virtual_avail);
  330: 
  331: 	virtual_end = VM_MAX_KERNEL_ADDRESS;
  332: 
  333: 	/*
  334: 	 * Initialize protection array.
  335: 	 */
  336: 	i386_protection_init();
  337: 
  338: 	/*
  339: 	 * The kernel's pmap is statically allocated so we don't have to use
  340: 	 * pmap_create, which is unlikely to work correctly at this part of
  341: 	 * the boot sequence (XXX and which no longer exists).
  342: 	 */
  343: 	kernel_pmap = &kernel_pmap_store;
  344: 
  345: 	kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (u_int)IdlePTD);
  346: 	kernel_pmap->pm_count = 1;
  347: 	kernel_pmap->pm_active = (cpumask_t)-1;	/* don't allow deactivation */
  348: 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
  349: 	nkpt = NKPT;
  350: 
  351: 	/*
  352: 	 * Reserve some special page table entries/VA space for temporary
  353: 	 * mapping of pages.
  354: 	 */
  355: #define	SYSMAP(c, p, v, n)	\
  356: 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  357: 
  358: 	va = virtual_avail;
  359: 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
  360: 
  361: 	/*
  362: 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
  363: 	 */
  364: 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  365: 
  366: 	/*
  367: 	 * Crashdump maps.
  368: 	 */
  369: 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
  370: 
  371: 	/*
  372: 	 * ptvmmap is used for reading arbitrary physical pages via
  373: 	 * /dev/mem.
  374: 	 */
  375: 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
  376: 
  377: 	/*
  378: 	 * msgbufp is used to map the system message buffer.
  379: 	 * XXX msgbufmap is not used.
  380: 	 */
  381: 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
  382: 	       atop(round_page(MSGBUF_SIZE)))
  383: 
  384: 	virtual_avail = va;
  385: 
  386: 	*(int *) CMAP1 = 0;
  387: 	for (i = 0; i < NKPT; i++)
  388: 		PTD[i] = 0;
  389: 
  390: 	/*
  391: 	 * PG_G is terribly broken on SMP because we IPI invltlb's in some
  392: 	 * cases rather then invl1pg.  Actually, I don't even know why it
  393: 	 * works under UP because self-referential page table mappings
  394: 	 */
  395: #ifdef SMP
  396: 	pgeflag = 0;
  397: #else
  398: 	if (cpu_feature & CPUID_PGE)
  399: 		pgeflag = PG_G;
  400: #endif
  401: 	
  402: /*
  403:  * Initialize the 4MB page size flag
  404:  */
  405: 	pseflag = 0;
  406: /*
  407:  * The 4MB page version of the initial
  408:  * kernel page mapping.
  409:  */
  410: 	pdir4mb = 0;
  411: 
  412: #if !defined(DISABLE_PSE)
  413: 	if (cpu_feature & CPUID_PSE) {
  414: 		unsigned ptditmp;
  415: 		/*
  416: 		 * Note that we have enabled PSE mode
  417: 		 */
  418: 		pseflag = PG_PS;
  419: 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
  420: 		ptditmp &= ~(NBPDR - 1);
  421: 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
  422: 		pdir4mb = ptditmp;
  423: 
  424: #ifndef SMP
  425: 		/*
  426: 		 * Enable the PSE mode.  If we are SMP we can't do this
  427: 		 * now because the APs will not be able to use it when
  428: 		 * they boot up.
  429: 		 */
  430: 		load_cr4(rcr4() | CR4_PSE);
  431: 
  432: 		/*
  433: 		 * We can do the mapping here for the single processor
  434: 		 * case.  We simply ignore the old page table page from
  435: 		 * now on.
  436: 		 */
  437: 		/*
  438: 		 * For SMP, we still need 4K pages to bootstrap APs,
  439: 		 * PSE will be enabled as soon as all APs are up.
  440: 		 */
  441: 		PTD[KPTDI] = (pd_entry_t)ptditmp;
  442: 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t)ptditmp;
  443: 		cpu_invltlb();
  444: #endif
  445: 	}
  446: #endif
  447: #ifdef APIC_IO
  448: 	if (cpu_apic_address == 0)
  449: 		panic("pmap_bootstrap: no local apic!");
  450: 
  451: 	/* local apic is mapped on last page */
  452: 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
  453: 	    (cpu_apic_address & PG_FRAME));
  454: #endif
  455: 
  456: 	/* BSP does this itself, AP's get it pre-set */
  457: 	gd = &CPU_prvspace[0].mdglobaldata;
  458: 	gd->gd_CMAP1 = &SMPpt[1];
  459: 	gd->gd_CMAP2 = &SMPpt[2];
  460: 	gd->gd_CMAP3 = &SMPpt[3];
  461: 	gd->gd_PMAP1 = &SMPpt[4];
  462: 	gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1;
  463: 	gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2;
  464: 	gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3;
  465: 	gd->gd_PADDR1 = (unsigned *)CPU_prvspace[0].PPAGE1;
  466: 
  467: 	cpu_invltlb();
  468: }
  469: 
  470: #ifdef SMP
  471: /*
  472:  * Set 4mb pdir for mp startup
  473:  */
  474: void
  475: pmap_set_opt(void)
  476: {
  477: 	if (pseflag && (cpu_feature & CPUID_PSE)) {
  478: 		load_cr4(rcr4() | CR4_PSE);
  479: 		if (pdir4mb && mycpu->gd_cpuid == 0) {	/* only on BSP */
  480: 			kernel_pmap->pm_pdir[KPTDI] =
  481: 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
  482: 			cpu_invltlb();
  483: 		}
  484: 	}
  485: }
  486: #endif
  487: 
  488: /*
  489:  *	Initialize the pmap module.
  490:  *	Called by vm_init, to initialize any structures that the pmap
  491:  *	system needs to map virtual memory.
  492:  *	pmap_init has been enhanced to support in a fairly consistant
  493:  *	way, discontiguous physical memory.
  494:  */
  495: void
  496: pmap_init(phys_start, phys_end)
  497: 	vm_paddr_t phys_start, phys_end;
  498: {
  499: 	int i;
  500: 	int initial_pvs;
  501: 
  502: 	/*
  503: 	 * object for kernel page table pages
  504: 	 */
  505: 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
  506: 
  507: 	/*
  508: 	 * Allocate memory for random pmap data structures.  Includes the
  509: 	 * pv_head_table.
  510: 	 */
  511: 
  512: 	for(i = 0; i < vm_page_array_size; i++) {
  513: 		vm_page_t m;
  514: 
  515: 		m = &vm_page_array[i];
  516: 		TAILQ_INIT(&m->md.pv_list);
  517: 		m->md.pv_list_count = 0;
  518: 	}
  519: 
  520: 	/*
  521: 	 * init the pv free list
  522: 	 */
  523: 	initial_pvs = vm_page_array_size;
  524: 	if (initial_pvs < MINPV)
  525: 		initial_pvs = MINPV;
  526: 	pvzone = &pvzone_store;
  527: 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
  528: 		initial_pvs * sizeof (struct pv_entry));
  529: 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
  530: 	    vm_page_array_size);
  531: 
  532: 	/*
  533: 	 * Now it is safe to enable pv_table recording.
  534: 	 */
  535: 	pmap_initialized = TRUE;
  536: }
  537: 
  538: /*
  539:  * Initialize the address space (zone) for the pv_entries.  Set a
  540:  * high water mark so that the system can recover from excessive
  541:  * numbers of pv entries.
  542:  */
  543: void
  544: pmap_init2()
  545: {
  546: 	int shpgperproc = PMAP_SHPGPERPROC;
  547: 
  548: 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  549: 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  550: 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  551: 	pv_entry_high_water = 9 * (pv_entry_max / 10);
  552: 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
  553: }
  554: 
  555: 
  556: /***************************************************
  557:  * Low level helper routines.....
  558:  ***************************************************/
  559: 
  560: #if defined(PMAP_DIAGNOSTIC)
  561: 
  562: /*
  563:  * This code checks for non-writeable/modified pages.
  564:  * This should be an invalid condition.
  565:  */
  566: static int
  567: pmap_nw_modified(pt_entry_t ptea)
  568: {
  569: 	int pte;
  570: 
  571: 	pte = (int) ptea;
  572: 
  573: 	if ((pte & (PG_M|PG_RW)) == PG_M)
  574: 		return 1;
  575: 	else
  576: 		return 0;
  577: }
  578: #endif
  579: 
  580: 
  581: /*
  582:  * this routine defines the region(s) of memory that should
  583:  * not be tested for the modified bit.
  584:  */
  585: static PMAP_INLINE int
  586: pmap_track_modified(vm_offset_t va)
  587: {
  588: 	if ((va < clean_sva) || (va >= clean_eva)) 
  589: 		return 1;
  590: 	else
  591: 		return 0;
  592: }
  593: 
  594: static unsigned *
  595: get_ptbase(pmap_t pmap)
  596: {
  597: 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  598: 	struct globaldata *gd = mycpu;
  599: 
  600: 	/* are we current address space or kernel? */
  601: 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
  602: 		return (unsigned *) PTmap;
  603: 	}
  604: 
  605: 	/* otherwise, we are alternate address space */
  606: 	KKASSERT(gd->gd_intr_nesting_level == 0 && (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0);
  607: 
  608: 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
  609: 		APTDpde = (pd_entry_t)(frame | PG_RW | PG_V);
  610: 		/* The page directory is not shared between CPUs */
  611: 		cpu_invltlb();
  612: 	}
  613: 	return (unsigned *) APTmap;
  614: }
  615: 
  616: /*
  617:  * pmap_extract:
  618:  *
  619:  *	Extract the physical page address associated with the map/VA pair.
  620:  *
  621:  *	This function may not be called from an interrupt if the pmap is
  622:  *	not kernel_pmap.
  623:  */
  624: vm_paddr_t 
  625: pmap_extract(pmap_t pmap, vm_offset_t va)
  626: {
  627: 	vm_offset_t rtval;
  628: 	vm_offset_t pdirindex;
  629: 
  630: 	pdirindex = va >> PDRSHIFT;
  631: 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
  632: 		unsigned *pte;
  633: 		if ((rtval & PG_PS) != 0) {
  634: 			rtval &= ~(NBPDR - 1);
  635: 			rtval |= va & (NBPDR - 1);
  636: 			return rtval;
  637: 		}
  638: 		pte = get_ptbase(pmap) + i386_btop(va);
  639: 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  640: 		return rtval;
  641: 	}
  642: 	return 0;
  643: }
  644: 
  645: /*
  646:  * Extract user accessible page only, return NULL if the page is not
  647:  * present or if it's current state is not sufficient.  Caller will
  648:  * generally call vm_fault() on failure and try again.
  649:  */
  650: vm_page_t
  651: pmap_extract_vmpage(pmap_t pmap, vm_offset_t va, int prot)
  652: {
  653: 	vm_offset_t rtval;
  654: 	vm_offset_t pdirindex;
  655: 
  656: 	pdirindex = va >> PDRSHIFT;
  657: 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
  658: 		unsigned *pte;
  659: 		vm_page_t m;
  660: 
  661: 		if ((rtval & PG_PS) != 0) {
  662: 			if ((rtval & (PG_V|PG_U)) != (PG_V|PG_U))
  663: 				return (NULL);
  664: 			if ((prot & VM_PROT_WRITE) && (rtval & PG_RW) == 0)
  665: 				return (NULL);
  666: 			rtval &= ~(NBPDR - 1);
  667: 			rtval |= va & (NBPDR - 1);
  668: 			m = PHYS_TO_VM_PAGE(rtval);
  669: 		} else {
  670: 			pte = get_ptbase(pmap) + i386_btop(va);
  671: 			if ((*pte & (PG_V|PG_U)) != (PG_V|PG_U))
  672: 				return (NULL);
  673: 			if ((prot & VM_PROT_WRITE) && (*pte & PG_RW) == 0)
  674: 				return (NULL);
  675: 			rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  676: 			m = PHYS_TO_VM_PAGE(rtval);
  677: 		}
  678: 		return(m);
  679: 	}
  680: 	return (NULL);
  681: }
  682: 
  683: /***************************************************
  684:  * Low level mapping routines.....
  685:  ***************************************************/
  686: 
  687: /*
  688:  * add a wired page to the kva
  689:  * note that in order for the mapping to take effect -- you
  690:  * should do a invltlb after doing the pmap_kenter...
  691:  */
  692: void 
  693: pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  694: {
  695: 	unsigned *pte;
  696: 	unsigned npte;
  697: 	pmap_inval_info info;
  698: 
  699: 	pmap_inval_init(&info);
  700: 	pmap_inval_add(&info, kernel_pmap, va);
  701: 	npte = pa | PG_RW | PG_V | pgeflag;
  702: 	pte = (unsigned *)vtopte(va);
  703: 	*pte = npte;
  704: 	pmap_inval_flush(&info);
  705: }
  706: 
  707: void
  708: pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa)
  709: {
  710: 	unsigned *pte;
  711: 	unsigned npte;
  712: 
  713: 	npte = pa | PG_RW | PG_V | pgeflag;
  714: 	pte = (unsigned *)vtopte(va);
  715: 	*pte = npte;
  716: 	cpu_invlpg((void *)va);
  717: }
  718: 
  719: void
  720: pmap_kenter_sync(vm_offset_t va)
  721: {
  722: 	pmap_inval_info info;
  723: 
  724: 	pmap_inval_init(&info);
  725: 	pmap_inval_add(&info, kernel_pmap, va);
  726: 	pmap_inval_flush(&info);
  727: }
  728: 
  729: void
  730: pmap_kenter_sync_quick(vm_offset_t va)
  731: {
  732: 	cpu_invlpg((void *)va);
  733: }
  734: 
  735: /*
  736:  * remove a page from the kernel pagetables
  737:  */
  738: void
  739: pmap_kremove(vm_offset_t va)
  740: {
  741: 	unsigned *pte;
  742: 	pmap_inval_info info;
  743: 
  744: 	pmap_inval_init(&info);
  745: 	pmap_inval_add(&info, kernel_pmap, va);
  746: 	pte = (unsigned *)vtopte(va);
  747: 	*pte = 0;
  748: 	pmap_inval_flush(&info);
  749: }
  750: 
  751: void
  752: pmap_kremove_quick(vm_offset_t va)
  753: {
  754: 	unsigned *pte;
  755: 	pte = (unsigned *)vtopte(va);
  756: 	*pte = 0;
  757: 	cpu_invlpg((void *)va);
  758: }
  759: 
  760: /*
  761:  *	Used to map a range of physical addresses into kernel
  762:  *	virtual address space.
  763:  *
  764:  *	For now, VM is already on, we only need to map the
  765:  *	specified memory.
  766:  */
  767: vm_offset_t
  768: pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
  769: {
  770: 	while (start < end) {
  771: 		pmap_kenter(virt, start);
  772: 		virt += PAGE_SIZE;
  773: 		start += PAGE_SIZE;
  774: 	}
  775: 	return (virt);
  776: }
  777: 
  778: 
  779: /*
  780:  * Add a list of wired pages to the kva
  781:  * this routine is only used for temporary
  782:  * kernel mappings that do not need to have
  783:  * page modification or references recorded.
  784:  * Note that old mappings are simply written
  785:  * over.  The page *must* be wired.
  786:  */
  787: void
  788: pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
  789: {
  790: 	vm_offset_t end_va;
  791: 
  792: 	end_va = va + count * PAGE_SIZE;
  793: 		
  794: 	while (va < end_va) {
  795: 		unsigned *pte;
  796: 
  797: 		pte = (unsigned *)vtopte(va);
  798: 		*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
  799: 		cpu_invlpg((void *)va);
  800: 		va += PAGE_SIZE;
  801: 		m++;
  802: 	}
  803: #ifdef SMP
  804: 	smp_invltlb();	/* XXX */
  805: #endif
  806: }
  807: 
  808: /*
  809:  * this routine jerks page mappings from the
  810:  * kernel -- it is meant only for temporary mappings.
  811:  */
  812: void
  813: pmap_qremove(vm_offset_t va, int count)
  814: {
  815: 	vm_offset_t end_va;
  816: 
  817: 	end_va = va + count*PAGE_SIZE;
  818: 
  819: 	while (va < end_va) {
  820: 		unsigned *pte;
  821: 
  822: 		pte = (unsigned *)vtopte(va);
  823: 		*pte = 0;
  824: 		cpu_invlpg((void *)va);
  825: 		va += PAGE_SIZE;
  826: 	}
  827: #ifdef SMP
  828: 	smp_invltlb();
  829: #endif
  830: }
  831: 
  832: static vm_page_t
  833: pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
  834: {
  835: 	vm_page_t m;
  836: retry:
  837: 	m = vm_page_lookup(object, pindex);
  838: 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
  839: 		goto retry;
  840: 	return m;
  841: }
  842: 
  843: /*
  844:  * Create a new thread and optionally associate it with a (new) process.
  845:  * NOTE! the new thread's cpu may not equal the current cpu.
  846:  */
  847: void
  848: pmap_init_thread(thread_t td)
  849: {
  850: 	td->td_pcb = (struct pcb *)(td->td_kstack + UPAGES * PAGE_SIZE) - 1;
  851: 	td->td_sp = (char *)td->td_pcb - 16;
  852: }
  853: 
  854: /*
  855:  * Create the UPAGES for a new process.
  856:  * This routine directly affects the fork perf for a process.
  857:  */
  858: void
  859: pmap_init_proc(struct proc *p, struct thread *td)
  860: {
  861: 	p->p_addr = (void *)td->td_kstack;
  862: 	p->p_thread = td;
  863: 	td->td_proc = p;
  864: 	td->td_switch = cpu_heavy_switch;
  865: #ifdef SMP
  866: 	td->td_mpcount = 1;
  867: #endif
  868: 	bzero(p->p_addr, sizeof(*p->p_addr));
  869: }
  870: 
  871: /*
  872:  * Dispose the UPAGES for a process that has exited.
  873:  * This routine directly impacts the exit perf of a process.
  874:  */
  875: struct thread *
  876: pmap_dispose_proc(struct proc *p)
  877: {
  878: 	struct thread *td;
  879: 
  880: 	KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p));
  881: 
  882: 	if ((td = p->p_thread) != NULL) {
  883: 	    p->p_thread = NULL;
  884: 	    td->td_proc = NULL;
  885: 	}
  886: 	p->p_addr = NULL;
  887: 	return(td);
  888: }
  889: 
  890: /*
  891:  * Allow the UPAGES for a process to be prejudicially paged out.
  892:  */
  893: void
  894: pmap_swapout_proc(struct proc *p)
  895: {
  896: #if 0
  897: 	int i;
  898: 	vm_object_t upobj;
  899: 	vm_page_t m;
  900: 
  901: 	upobj = p->p_upages_obj;
  902: 	/*
  903: 	 * let the upages be paged
  904: 	 */
  905: 	for(i=0;i<UPAGES;i++) {
  906: 		if ((m = vm_page_lookup(upobj, i)) == NULL)
  907: 			panic("pmap_swapout_proc: upage already missing???");
  908: 		vm_page_dirty(m);
  909: 		vm_page_unwire(m, 0);
  910: 		pmap_kremove((vm_offset_t)p->p_addr + (PAGE_SIZE * i));
  911: 	}
  912: #endif
  913: }
  914: 
  915: /*
  916:  * Bring the UPAGES for a specified process back in.
  917:  */
  918: void
  919: pmap_swapin_proc(struct proc *p)
  920: {
  921: #if 0
  922: 	int i,rv;
  923: 	vm_object_t upobj;
  924: 	vm_page_t m;
  925: 
  926: 	upobj = p->p_upages_obj;
  927: 	for(i=0;i<UPAGES;i++) {
  928: 
  929: 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  930: 
  931: 		pmap_kenter((vm_offset_t)p->p_addr + (i * PAGE_SIZE),
  932: 			VM_PAGE_TO_PHYS(m));
  933: 
  934: 		if (m->valid != VM_PAGE_BITS_ALL) {
  935: 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
  936: 			if (rv != VM_PAGER_OK)
  937: 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
  938: 			m = vm_page_lookup(upobj, i);
  939: 			m->valid = VM_PAGE_BITS_ALL;
  940: 		}
  941: 
  942: 		vm_page_wire(m);
  943: 		vm_page_wakeup(m);
  944: 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
  945: 	}
  946: #endif
  947: }
  948: 
  949: /***************************************************
  950:  * Page table page management routines.....
  951:  ***************************************************/
  952: 
  953: /*
  954:  * This routine unholds page table pages, and if the hold count
  955:  * drops to zero, then it decrements the wire count.
  956:  */
  957: static int 
  958: _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) 
  959: {
  960: 	pmap_inval_flush(info);
  961: 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
  962: 		;
  963: 
  964: 	if (m->hold_count == 0) {
  965: 		vm_offset_t pteva;
  966: 		/*
  967: 		 * unmap the page table page
  968: 		 */
  969: 		pmap_inval_add(info, pmap, -1);
  970: 		pmap->pm_pdir[m->pindex] = 0;
  971: 		--pmap->pm_stats.resident_count;
  972: 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
  973: 			(((unsigned) PTDpde) & PG_FRAME)) {
  974: 			/*
  975: 			 * Do a invltlb to make the invalidated mapping
  976: 			 * take effect immediately.
  977: 			 */
  978: 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
  979: 		}
  980: 
  981: 		if (pmap->pm_ptphint == m)
  982: 			pmap->pm_ptphint = NULL;
  983: 
  984: 		/*
  985: 		 * If the page is finally unwired, simply free it.
  986: 		 */
  987: 		--m->wire_count;
  988: 		if (m->wire_count == 0) {
  989: 			vm_page_flash(m);
  990: 			vm_page_busy(m);
  991: 			vm_page_free_zero(m);
  992: 			--vmstats.v_wire_count;
  993: 		}
  994: 		return 1;
  995: 	}
  996: 	return 0;
  997: }
  998: 
  999: static PMAP_INLINE int
 1000: pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
 1001: {
 1002: 	vm_page_unhold(m);
 1003: 	if (m->hold_count == 0)
 1004: 		return _pmap_unwire_pte_hold(pmap, m, info);
 1005: 	else
 1006: 		return 0;
 1007: }
 1008: 
 1009: /*
 1010:  * After removing a page table entry, this routine is used to
 1011:  * conditionally free the page, and manage the hold/wire counts.
 1012:  */
 1013: static int
 1014: pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
 1015: 		pmap_inval_info_t info)
 1016: {
 1017: 	unsigned ptepindex;
 1018: 	if (va >= UPT_MIN_ADDRESS)
 1019: 		return 0;
 1020: 
 1021: 	if (mpte == NULL) {
 1022: 		ptepindex = (va >> PDRSHIFT);
 1023: 		if (pmap->pm_ptphint &&
 1024: 			(pmap->pm_ptphint->pindex == ptepindex)) {
 1025: 			mpte = pmap->pm_ptphint;
 1026: 		} else {
 1027: 			pmap_inval_flush(info);
 1028: 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 1029: 			pmap->pm_ptphint = mpte;
 1030: 		}
 1031: 	}
 1032: 
 1033: 	return pmap_unwire_pte_hold(pmap, mpte, info);
 1034: }
 1035: 
 1036: void
 1037: pmap_pinit0(struct pmap *pmap)
 1038: {
 1039: 	pmap->pm_pdir =
 1040: 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1041: 	pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD);
 1042: 	pmap->pm_count = 1;
 1043: 	pmap->pm_active = 0;
 1044: 	pmap->pm_ptphint = NULL;
 1045: 	TAILQ_INIT(&pmap->pm_pvlist);
 1046: 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1047: }
 1048: 
 1049: /*
 1050:  * Initialize a preallocated and zeroed pmap structure,
 1051:  * such as one in a vmspace structure.
 1052:  */
 1053: void
 1054: pmap_pinit(struct pmap *pmap)
 1055: {
 1056: 	vm_page_t ptdpg;
 1057: 
 1058: 	/*
 1059: 	 * No need to allocate page table space yet but we do need a valid
 1060: 	 * page directory table.
 1061: 	 */
 1062: 	if (pmap->pm_pdir == NULL) {
 1063: 		pmap->pm_pdir =
 1064: 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1065: 	}
 1066: 
 1067: 	/*
 1068: 	 * allocate object for the ptes
 1069: 	 */
 1070: 	if (pmap->pm_pteobj == NULL)
 1071: 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 1072: 
 1073: 	/*
 1074: 	 * allocate the page directory page
 1075: 	 */
 1076: 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 1077: 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1078: 
 1079: 	ptdpg->wire_count = 1;
 1080: 	++vmstats.v_wire_count;
 1081: 
 1082: 
 1083: 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 1084: 	ptdpg->valid = VM_PAGE_BITS_ALL;
 1085: 
 1086: 	pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 1087: 	if ((ptdpg->flags & PG_ZERO) == 0)
 1088: 		bzero(pmap->pm_pdir, PAGE_SIZE);
 1089: 
 1090: 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1091: 
 1092: 	/* install self-referential address mapping entry */
 1093: 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 1094: 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 1095: 
 1096: 	pmap->pm_count = 1;
 1097: 	pmap->pm_active = 0;
 1098: 	pmap->pm_ptphint = NULL;
 1099: 	TAILQ_INIT(&pmap->pm_pvlist);
 1100: 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1101: }
 1102: 
 1103: /*
 1104:  * Wire in kernel global address entries.  To avoid a race condition
 1105:  * between pmap initialization and pmap_growkernel, this procedure
 1106:  * should be called after the vmspace is attached to the process
 1107:  * but before this pmap is activated.
 1108:  */
 1109: void
 1110: pmap_pinit2(struct pmap *pmap)
 1111: {
 1112: 	/* XXX copies current process, does not fill in MPPTDI */
 1113: 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 1114: }
 1115: 
 1116: static int
 1117: pmap_release_free_page(struct pmap *pmap, vm_page_t p)
 1118: {
 1119: 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 1120: 	/*
 1121: 	 * This code optimizes the case of freeing non-busy
 1122: 	 * page-table pages.  Those pages are zero now, and
 1123: 	 * might as well be placed directly into the zero queue.
 1124: 	 */
 1125: 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 1126: 		return 0;
 1127: 
 1128: 	vm_page_busy(p);
 1129: 
 1130: 	/*
 1131: 	 * Remove the page table page from the processes address space.
 1132: 	 */
 1133: 	pde[p->pindex] = 0;
 1134: 	pmap->pm_stats.resident_count--;
 1135: 
 1136: 	if (p->hold_count)  {
 1137: 		panic("pmap_release: freeing held page table page");
 1138: 	}
 1139: 	/*
 1140: 	 * Page directory pages need to have the kernel
 1141: 	 * stuff cleared, so they can go into the zero queue also.
 1142: 	 */
 1143: 	if (p->pindex == PTDPTDI) {
 1144: 		bzero(pde + KPTDI, nkpt * PTESIZE);
 1145: 		pde[MPPTDI] = 0;
 1146: 		pde[APTDPTDI] = 0;
 1147: 		pmap_kremove((vm_offset_t)pmap->pm_pdir);
 1148: 	}
 1149: 
 1150: 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 1151: 		pmap->pm_ptphint = NULL;
 1152: 
 1153: 	p->wire_count--;
 1154: 	vmstats.v_wire_count--;
 1155: 	vm_page_free_zero(p);
 1156: 	return 1;
 1157: }
 1158: 
 1159: /*
 1160:  * this routine is called if the page table page is not
 1161:  * mapped correctly.
 1162:  */
 1163: static vm_page_t
 1164: _pmap_allocpte(pmap_t pmap, unsigned ptepindex)
 1165: {
 1166: 	vm_offset_t pteva, ptepa;
 1167: 	vm_page_t m;
 1168: 
 1169: 	/*
 1170: 	 * Find or fabricate a new pagetable page
 1171: 	 */
 1172: 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 1173: 			VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 1174: 
 1175: 	KASSERT(m->queue == PQ_NONE,
 1176: 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 1177: 
 1178: 	if (m->wire_count == 0)
 1179: 		vmstats.v_wire_count++;
 1180: 	m->wire_count++;
 1181: 
 1182: 	/*
 1183: 	 * Increment the hold count for the page table page
 1184: 	 * (denoting a new mapping.)
 1185: 	 */
 1186: 	m->hold_count++;
 1187: 
 1188: 	/*
 1189: 	 * Map the pagetable page into the process address space, if
 1190: 	 * it isn't already there.
 1191: 	 */
 1192: 
 1193: 	pmap->pm_stats.resident_count++;
 1194: 
 1195: 	ptepa = VM_PAGE_TO_PHYS(m);
 1196: 	pmap->pm_pdir[ptepindex] =
 1197: 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1198: 
 1199: 	/*
 1200: 	 * Set the page table hint
 1201: 	 */
 1202: 	pmap->pm_ptphint = m;
 1203: 
 1204: 	/*
 1205: 	 * Try to use the new mapping, but if we cannot, then
 1206: 	 * do it with the routine that maps the page explicitly.
 1207: 	 */
 1208: 	if ((m->flags & PG_ZERO) == 0) {
 1209: 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 1210: 			(((unsigned) PTDpde) & PG_FRAME)) {
 1211: 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 1212: 			bzero((caddr_t) pteva, PAGE_SIZE);
 1213: 		} else {
 1214: 			pmap_zero_page(ptepa);
 1215: 		}
 1216: 	}
 1217: 
 1218: 	m->valid = VM_PAGE_BITS_ALL;
 1219: 	vm_page_flag_clear(m, PG_ZERO);
 1220: 	vm_page_flag_set(m, PG_MAPPED);
 1221: 	vm_page_wakeup(m);
 1222: 
 1223: 	return m;
 1224: }
 1225: 
 1226: static vm_page_t
 1227: pmap_allocpte(pmap_t pmap, vm_offset_t va)
 1228: {
 1229: 	unsigned ptepindex;
 1230: 	vm_offset_t ptepa;
 1231: 	vm_page_t m;
 1232: 
 1233: 	/*
 1234: 	 * Calculate pagetable page index
 1235: 	 */
 1236: 	ptepindex = va >> PDRSHIFT;
 1237: 
 1238: 	/*
 1239: 	 * Get the page directory entry
 1240: 	 */
 1241: 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 1242: 
 1243: 	/*
 1244: 	 * This supports switching from a 4MB page to a
 1245: 	 * normal 4K page.
 1246: 	 */
 1247: 	if (ptepa & PG_PS) {
 1248: 		pmap->pm_pdir[ptepindex] = 0;
 1249: 		ptepa = 0;
 1250: 		cpu_invltlb();
 1251: 		smp_invltlb();
 1252: 	}
 1253: 
 1254: 	/*
 1255: 	 * If the page table page is mapped, we just increment the
 1256: 	 * hold count, and activate it.
 1257: 	 */
 1258: 	if (ptepa) {
 1259: 		/*
 1260: 		 * In order to get the page table page, try the
 1261: 		 * hint first.
 1262: 		 */
 1263: 		if (pmap->pm_ptphint &&
 1264: 			(pmap->pm_ptphint->pindex == ptepindex)) {
 1265: 			m = pmap->pm_ptphint;
 1266: 		} else {
 1267: 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 1268: 			pmap->pm_ptphint = m;
 1269: 		}
 1270: 		m->hold_count++;
 1271: 		return m;
 1272: 	}
 1273: 	/*
 1274: 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 1275: 	 */
 1276: 	return _pmap_allocpte(pmap, ptepindex);
 1277: }
 1278: 
 1279: 
 1280: /***************************************************
 1281: * Pmap allocation/deallocation routines.
 1282:  ***************************************************/
 1283: 
 1284: /*
 1285:  * Release any resources held by the given physical map.
 1286:  * Called when a pmap initialized by pmap_pinit is being released.
 1287:  * Should only be called if the map contains no valid mappings.
 1288:  */
 1289: void
 1290: pmap_release(struct pmap *pmap)
 1291: {
 1292: 	vm_page_t p,n,ptdpg;
 1293: 	vm_object_t object = pmap->pm_pteobj;
 1294: 	int curgeneration;
 1295: 
 1296: #if defined(DIAGNOSTIC)
 1297: 	if (object->ref_count != 1)
 1298: 		panic("pmap_release: pteobj reference count != 1");
 1299: #endif
 1300: 	
 1301: 	ptdpg = NULL;
 1302: retry:
 1303: 	curgeneration = object->generation;
 1304: 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 1305: 		n = TAILQ_NEXT(p, listq);
 1306: 		if (p->pindex == PTDPTDI) {
 1307: 			ptdpg = p;
 1308: 			continue;
 1309: 		}
 1310: 		while (1) {
 1311: 			if (!pmap_release_free_page(pmap, p) &&
 1312: 				(object->generation != curgeneration))
 1313: 				goto retry;
 1314: 		}
 1315: 	}
 1316: 
 1317: 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 1318: 		goto retry;
 1319: }
 1320: 
 1321: static int
 1322: kvm_size(SYSCTL_HANDLER_ARGS)
 1323: {
 1324: 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1325: 
 1326:         return sysctl_handle_long(oidp, &ksize, 0, req);
 1327: }
 1328: SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1329:     0, 0, kvm_size, "IU", "Size of KVM");
 1330: 
 1331: static int
 1332: kvm_free(SYSCTL_HANDLER_ARGS)
 1333: {
 1334: 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1335: 
 1336:         return sysctl_handle_long(oidp, &kfree, 0, req);
 1337: }
 1338: SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1339:     0, 0, kvm_free, "IU", "Amount of KVM free");
 1340: 
 1341: /*
 1342:  * grow the number of kernel page table entries, if needed
 1343:  */
 1344: void
 1345: pmap_growkernel(vm_offset_t addr)
 1346: {
 1347: 	struct proc *p;
 1348: 	struct pmap *pmap;
 1349: 	int s;
 1350: 	vm_offset_t ptppaddr;
 1351: 	vm_page_t nkpg;
 1352: 	pd_entry_t newpdir;
 1353: 
 1354: 	s = splhigh();
 1355: 	if (kernel_vm_end == 0) {
 1356: 		kernel_vm_end = KERNBASE;
 1357: 		nkpt = 0;
 1358: 		while (pdir_pde(PTD, kernel_vm_end)) {
 1359: 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1360: 			nkpt++;
 1361: 		}
 1362: 	}
 1363: 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1364: 	while (kernel_vm_end < addr) {
 1365: 		if (pdir_pde(PTD, kernel_vm_end)) {
 1366: 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1367: 			continue;
 1368: 		}
 1369: 
 1370: 		/*
 1371: 		 * This index is bogus, but out of the way
 1372: 		 */
 1373: 		nkpg = vm_page_alloc(kptobj, nkpt, 
 1374: 			VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT);
 1375: 		if (nkpg == NULL)
 1376: 			panic("pmap_growkernel: no memory to grow kernel");
 1377: 
 1378: 		nkpt++;
 1379: 
 1380: 		vm_page_wire(nkpg);
 1381: 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1382: 		pmap_zero_page(ptppaddr);
 1383: 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1384: 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 1385: 
 1386: 		FOREACH_PROC_IN_SYSTEM(p) {
 1387: 			if (p->p_vmspace) {
 1388: 				pmap = vmspace_pmap(p->p_vmspace);
 1389: 				*pmap_pde(pmap, kernel_vm_end) = newpdir;
 1390: 			}
 1391: 		}
 1392: 		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
 1393: 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1394: 	}
 1395: 	splx(s);
 1396: }
 1397: 
 1398: /*
 1399:  *	Retire the given physical map from service.
 1400:  *	Should only be called if the map contains
 1401:  *	no valid mappings.
 1402:  */
 1403: void
 1404: pmap_destroy(pmap_t pmap)
 1405: {
 1406: 	int count;
 1407: 
 1408: 	if (pmap == NULL)
 1409: 		return;
 1410: 
 1411: 	count = --pmap->pm_count;
 1412: 	if (count == 0) {
 1413: 		pmap_release(pmap);
 1414: 		panic("destroying a pmap is not yet implemented");
 1415: 	}
 1416: }
 1417: 
 1418: /*
 1419:  *	Add a reference to the specified pmap.
 1420:  */
 1421: void
 1422: pmap_reference(pmap_t pmap)
 1423: {
 1424: 	if (pmap != NULL) {
 1425: 		pmap->pm_count++;
 1426: 	}
 1427: }
 1428: 
 1429: /***************************************************
 1430: * page management routines.
 1431:  ***************************************************/
 1432: 
 1433: /*
 1434:  * free the pv_entry back to the free list.  This function may be
 1435:  * called from an interrupt.
 1436:  */
 1437: static PMAP_INLINE void
 1438: free_pv_entry(pv_entry_t pv)
 1439: {
 1440: 	pv_entry_count--;
 1441: 	zfree(pvzone, pv);
 1442: }
 1443: 
 1444: /*
 1445:  * get a new pv_entry, allocating a block from the system
 1446:  * when needed.  This function may be called from an interrupt.
 1447:  */
 1448: static pv_entry_t
 1449: get_pv_entry(void)
 1450: {
 1451: 	pv_entry_count++;
 1452: 	if (pv_entry_high_water &&
 1453: 		(pv_entry_count > pv_entry_high_water) &&
 1454: 		(pmap_pagedaemon_waken == 0)) {
 1455: 		pmap_pagedaemon_waken = 1;
 1456: 		wakeup (&vm_pages_needed);
 1457: 	}
 1458: 	return zalloc(pvzone);
 1459: }
 1460: 
 1461: /*
 1462:  * This routine is very drastic, but can save the system
 1463:  * in a pinch.
 1464:  */
 1465: void
 1466: pmap_collect(void)
 1467: {
 1468: 	int i;
 1469: 	vm_page_t m;
 1470: 	static int warningdone=0;
 1471: 
 1472: 	if (pmap_pagedaemon_waken == 0)
 1473: 		return;
 1474: 
 1475: 	if (warningdone < 5) {
 1476: 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 1477: 		warningdone++;
 1478: 	}
 1479: 
 1480: 	for(i = 0; i < vm_page_array_size; i++) {
 1481: 		m = &vm_page_array[i];
 1482: 		if (m->wire_count || m->hold_count || m->busy ||
 1483: 		    (m->flags & PG_BUSY))
 1484: 			continue;
 1485: 		pmap_remove_all(m);
 1486: 	}
 1487: 	pmap_pagedaemon_waken = 0;
 1488: }
 1489: 	
 1490: 
 1491: /*
 1492:  * If it is the first entry on the list, it is actually
 1493:  * in the header and we must copy the following entry up
 1494:  * to the header.  Otherwise we must search the list for
 1495:  * the entry.  In either case we free the now unused entry.
 1496:  */
 1497: static int
 1498: pmap_remove_entry(struct pmap *pmap, vm_page_t m, 
 1499: 			vm_offset_t va, pmap_inval_info_t info)
 1500: {
 1501: 	pv_entry_t pv;
 1502: 	int rtval;
 1503: 	int s;
 1504: 
 1505: 	s = splvm();
 1506: 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1507: 		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1508: 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1509: 				break;
 1510: 		}
 1511: 	} else {
 1512: 		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1513: 			if (va == pv->pv_va) 
 1514: 				break;
 1515: 		}
 1516: 	}
 1517: 
 1518: 	rtval = 0;
 1519: 	if (pv) {
 1520: 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info);
 1521: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1522: 		m->md.pv_list_count--;
 1523: 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1524: 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1525: 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1526: 		free_pv_entry(pv);
 1527: 	}
 1528: 	splx(s);
 1529: 	return rtval;
 1530: }
 1531: 
 1532: /*
 1533:  * Create a pv entry for page at pa for
 1534:  * (pmap, va).
 1535:  */
 1536: static void
 1537: pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 1538: {
 1539: 	int s;
 1540: 	pv_entry_t pv;
 1541: 
 1542: 	s = splvm();
 1543: 	pv = get_pv_entry();
 1544: 	pv->pv_va = va;
 1545: 	pv->pv_pmap = pmap;
 1546: 	pv->pv_ptem = mpte;
 1547: 
 1548: 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1549: 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1550: 	m->md.pv_list_count++;
 1551: 
 1552: 	splx(s);
 1553: }
 1554: 
 1555: /*
 1556:  * pmap_remove_pte: do the things to unmap a page in a process
 1557:  */
 1558: static int
 1559: pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va,
 1560: 	pmap_inval_info_t info)
 1561: {
 1562: 	unsigned oldpte;
 1563: 	vm_page_t m;
 1564: 
 1565: 	pmap_inval_add(info, pmap, va);
 1566: 	oldpte = loadandclear(ptq);
 1567: 	if (oldpte & PG_W)
 1568: 		pmap->pm_stats.wired_count -= 1;
 1569: 	/*
 1570: 	 * Machines that don't support invlpg, also don't support
 1571: 	 * PG_G.  XXX PG_G is disabled for SMP so don't worry about
 1572: 	 * the SMP case.
 1573: 	 */
 1574: 	if (oldpte & PG_G)
 1575: 		cpu_invlpg((void *)va);
 1576: 	pmap->pm_stats.resident_count -= 1;
 1577: 	if (oldpte & PG_MANAGED) {
 1578: 		m = PHYS_TO_VM_PAGE(oldpte);
 1579: 		if (oldpte & PG_M) {
 1580: #if defined(PMAP_DIAGNOSTIC)
 1581: 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1582: 				printf(
 1583: 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1584: 				    va, oldpte);
 1585: 			}
 1586: #endif
 1587: 			if (pmap_track_modified(va))
 1588: 				vm_page_dirty(m);
 1589: 		}
 1590: 		if (oldpte & PG_A)
 1591: 			vm_page_flag_set(m, PG_REFERENCED);
 1592: 		return pmap_remove_entry(pmap, m, va, info);
 1593: 	} else {
 1594: 		return pmap_unuse_pt(pmap, va, NULL, info);
 1595: 	}
 1596: 
 1597: 	return 0;
 1598: }
 1599: 
 1600: /*
 1601:  * pmap_remove_page:
 1602:  *
 1603:  *	Remove a single page from a process address space.
 1604:  *
 1605:  *	This function may not be called from an interrupt if the pmap is
 1606:  *	not kernel_pmap.
 1607:  */
 1608: static void
 1609: pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info)
 1610: {
 1611: 	unsigned *ptq;
 1612: 
 1613: 	/*
 1614: 	 * if there is no pte for this address, just skip it!!!  Otherwise
 1615: 	 * get a local va for mappings for this pmap and remove the entry.
 1616: 	 */
 1617: 	if (*pmap_pde(pmap, va) != 0) {
 1618: 		ptq = get_ptbase(pmap) + i386_btop(va);
 1619: 		if (*ptq) {
 1620: 			pmap_remove_pte(pmap, ptq, va, info);
 1621: 		}
 1622: 	}
 1623: }
 1624: 
 1625: /*
 1626:  * pmap_remove:
 1627:  *
 1628:  *	Remove the given range of addresses from the specified map.
 1629:  *
 1630:  *	It is assumed that the start and end are properly
 1631:  *	rounded to the page size.
 1632:  *
 1633:  *	This function may not be called from an interrupt if the pmap is
 1634:  *	not kernel_pmap.
 1635:  */
 1636: void
 1637: pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
 1638: {
 1639: 	unsigned *ptbase;
 1640: 	vm_offset_t pdnxt;
 1641: 	vm_offset_t ptpaddr;
 1642: 	vm_offset_t sindex, eindex;
 1643: 	struct pmap_inval_info info;
 1644: 
 1645: 	if (pmap == NULL)
 1646: 		return;
 1647: 
 1648: 	if (pmap->pm_stats.resident_count == 0)
 1649: 		return;
 1650: 
 1651: 	pmap_inval_init(&info);
 1652: 
 1653: 	/*
 1654: 	 * special handling of removing one page.  a very
 1655: 	 * common operation and easy to short circuit some
 1656: 	 * code.
 1657: 	 */
 1658: 	if (((sva + PAGE_SIZE) == eva) && 
 1659: 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1660: 		pmap_remove_page(pmap, sva, &info);
 1661: 		pmap_inval_flush(&info);
 1662: 		return;
 1663: 	}
 1664: 
 1665: 	/*
 1666: 	 * Get a local virtual address for the mappings that are being
 1667: 	 * worked with.
 1668: 	 */
 1669: 	ptbase = get_ptbase(pmap);
 1670: 
 1671: 	sindex = i386_btop(sva);
 1672: 	eindex = i386_btop(eva);
 1673: 
 1674: 	for (; sindex < eindex; sindex = pdnxt) {
 1675: 		unsigned pdirindex;
 1676: 
 1677: 		/*
 1678: 		 * Calculate index for next page table.
 1679: 		 */
 1680: 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1681: 		if (pmap->pm_stats.resident_count == 0)
 1682: 			break;
 1683: 
 1684: 		pdirindex = sindex / NPDEPG;
 1685: 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1686: 			pmap_inval_add(&info, pmap, -1);
 1687: 			pmap->pm_pdir[pdirindex] = 0;
 1688: 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1689: 			continue;
 1690: 		}
 1691: 
 1692: 		/*
 1693: 		 * Weed out invalid mappings. Note: we assume that the page
 1694: 		 * directory table is always allocated, and in kernel virtual.
 1695: 		 */
 1696: 		if (ptpaddr == 0)
 1697: 			continue;
 1698: 
 1699: 		/*
 1700: 		 * Limit our scan to either the end of the va represented
 1701: 		 * by the current page table page, or to the end of the
 1702: 		 * range being removed.
 1703: 		 */
 1704: 		if (pdnxt > eindex) {
 1705: 			pdnxt = eindex;
 1706: 		}
 1707: 
 1708: 		for (; sindex != pdnxt; sindex++) {
 1709: 			vm_offset_t va;
 1710: 			if (ptbase[sindex] == 0)
 1711: 				continue;
 1712: 			va = i386_ptob(sindex);
 1713: 			if (pmap_remove_pte(pmap, ptbase + sindex, va, &info))
 1714: 				break;
 1715: 		}
 1716: 	}
 1717: 	pmap_inval_flush(&info);
 1718: }
 1719: 
 1720: /*
 1721:  * pmap_remove_all:
 1722:  *
 1723:  *	Removes this physical page from all physical maps in which it resides.
 1724:  *	Reflects back modify bits to the pager.
 1725:  *
 1726:  *	This routine may not be called from an interrupt.
 1727:  */
 1728: 
 1729: static void
 1730: pmap_remove_all(vm_page_t m)
 1731: {
 1732: 	struct pmap_inval_info info;
 1733: 	unsigned *pte, tpte;
 1734: 	pv_entry_t pv;
 1735: 	int s;
 1736: 
 1737: #if defined(PMAP_DIAGNOSTIC)
 1738: 	/*
 1739: 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 1740: 	 * pages!
 1741: 	 */
 1742: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1743: 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m));
 1744: 	}
 1745: #endif
 1746: 
 1747: 	pmap_inval_init(&info);
 1748: 	s = splvm();
 1749: 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1750: 		pv->pv_pmap->pm_stats.resident_count--;
 1751: 
 1752: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1753: 		pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 1754: 
 1755: 		tpte = loadandclear(pte);
 1756: 		if (tpte & PG_W)
 1757: 			pv->pv_pmap->pm_stats.wired_count--;
 1758: 
 1759: 		if (tpte & PG_A)
 1760: 			vm_page_flag_set(m, PG_REFERENCED);
 1761: 
 1762: 		/*
 1763: 		 * Update the vm_page_t clean and reference bits.
 1764: 		 */
 1765: 		if (tpte & PG_M) {
 1766: #if defined(PMAP_DIAGNOSTIC)
 1767: 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 1768: 				printf(
 1769: 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1770: 				    pv->pv_va, tpte);
 1771: 			}
 1772: #endif
 1773: 			if (pmap_track_modified(pv->pv_va))
 1774: 				vm_page_dirty(m);
 1775: 		}
 1776: 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1777: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1778: 		m->md.pv_list_count--;
 1779: 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
 1780: 		free_pv_entry(pv);
 1781: 	}
 1782: 
 1783: 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1784: 	splx(s);
 1785: 	pmap_inval_flush(&info);
 1786: }
 1787: 
 1788: /*
 1789:  * pmap_protect:
 1790:  *
 1791:  *	Set the physical protection on the specified range of this map
 1792:  *	as requested.
 1793:  *
 1794:  *	This function may not be called from an interrupt if the map is
 1795:  *	not the kernel_pmap.
 1796:  */
 1797: void
 1798: pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1799: {
 1800: 	unsigned *ptbase;
 1801: 	vm_offset_t pdnxt, ptpaddr;
 1802: 	vm_pindex_t sindex, eindex;
 1803: 	pmap_inval_info info;
 1804: 
 1805: 	if (pmap == NULL)
 1806: 		return;
 1807: 
 1808: 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1809: 		pmap_remove(pmap, sva, eva);
 1810: 		return;
 1811: 	}
 1812: 
 1813: 	if (prot & VM_PROT_WRITE)
 1814: 		return;
 1815: 
 1816: 	pmap_inval_init(&info);
 1817: 
 1818: 	ptbase = get_ptbase(pmap);
 1819: 
 1820: 	sindex = i386_btop(sva);
 1821: 	eindex = i386_btop(eva);
 1822: 
 1823: 	for (; sindex < eindex; sindex = pdnxt) {
 1824: 
 1825: 		unsigned pdirindex;
 1826: 
 1827: 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1828: 
 1829: 		pdirindex = sindex / NPDEPG;
 1830: 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1831: 			pmap_inval_add(&info, pmap, -1);
 1832: 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1833: 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1834: 			continue;
 1835: 		}
 1836: 
 1837: 		/*
 1838: 		 * Weed out invalid mappings. Note: we assume that the page
 1839: 		 * directory table is always allocated, and in kernel virtual.
 1840: 		 */
 1841: 		if (ptpaddr == 0)
 1842: 			continue;
 1843: 
 1844: 		if (pdnxt > eindex) {
 1845: 			pdnxt = eindex;
 1846: 		}
 1847: 
 1848: 		for (; sindex != pdnxt; sindex++) {
 1849: 
 1850: 			unsigned pbits;
 1851: 			vm_page_t m;
 1852: 
 1853: 			/* XXX this isn't optimal */
 1854: 			pmap_inval_add(&info, pmap, i386_ptob(sindex));
 1855: 			pbits = ptbase[sindex];
 1856: 
 1857: 			if (pbits & PG_MANAGED) {
 1858: 				m = NULL;
 1859: 				if (pbits & PG_A) {
 1860: 					m = PHYS_TO_VM_PAGE(pbits);
 1861: 					vm_page_flag_set(m, PG_REFERENCED);
 1862: 					pbits &= ~PG_A;
 1863: 				}
 1864: 				if (pbits & PG_M) {
 1865: 					if (pmap_track_modified(i386_ptob(sindex))) {
 1866: 						if (m == NULL)
 1867: 							m = PHYS_TO_VM_PAGE(pbits);
 1868: 						vm_page_dirty(m);
 1869: 						pbits &= ~PG_M;
 1870: 					}
 1871: 				}
 1872: 			}
 1873: 
 1874: 			pbits &= ~PG_RW;
 1875: 
 1876: 			if (pbits != ptbase[sindex]) {
 1877: 				ptbase[sindex] = pbits;
 1878: 			}
 1879: 		}
 1880: 	}
 1881: 	pmap_inval_flush(&info);
 1882: }
 1883: 
 1884: /*
 1885:  *	Insert the given physical page (p) at
 1886:  *	the specified virtual address (v) in the
 1887:  *	target physical map with the protection requested.
 1888:  *
 1889:  *	If specified, the page will be wired down, meaning
 1890:  *	that the related pte can not be reclaimed.
 1891:  *
 1892:  *	NB:  This is the only routine which MAY NOT lazy-evaluate
 1893:  *	or lose information.  That is, this routine must actually
 1894:  *	insert this page into the given map NOW.
 1895:  */
 1896: void
 1897: pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1898: 	   boolean_t wired)
 1899: {
 1900: 	vm_paddr_t pa;
 1901: 	unsigned *pte;
 1902: 	vm_paddr_t opa;
 1903: 	vm_offset_t origpte, newpte;
 1904: 	vm_page_t mpte;
 1905: 	pmap_inval_info info;
 1906: 
 1907: 	if (pmap == NULL)
 1908: 		return;
 1909: 
 1910: 	va &= PG_FRAME;
 1911: #ifdef PMAP_DIAGNOSTIC
 1912: 	if (va > VM_MAX_KERNEL_ADDRESS)
 1913: 		panic("pmap_enter: toobig");
 1914: 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1915: 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1916: #endif
 1917: 
 1918: 	mpte = NULL;
 1919: 	/*
 1920: 	 * In the case that a page table page is not
 1921: 	 * resident, we are creating it here.
 1922: 	 */
 1923: 	if (va < UPT_MIN_ADDRESS) {
 1924: 		mpte = pmap_allocpte(pmap, va);
 1925: 	}
 1926: 
 1927: 	pmap_inval_init(&info);
 1928: 	pte = pmap_pte(pmap, va);
 1929: 
 1930: 	/*
 1931: 	 * Page Directory table entry not valid, we need a new PT page
 1932: 	 */
 1933: 	if (pte == NULL) {
 1934: 		panic("pmap_enter: invalid page directory pdir=%x, va=0x%x\n",
 1935: 		     (unsigned) pmap->pm_pdir[PTDPTDI], va);
 1936: 	}
 1937: 
 1938: 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 1939: 	pmap_inval_add(&info, pmap, va); /* XXX non-optimal */
 1940: 	origpte = *(vm_offset_t *)pte;
 1941: 	opa = origpte & PG_FRAME;
 1942: 
 1943: 	if (origpte & PG_PS)
 1944: 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 1945: 
 1946: 	/*
 1947: 	 * Mapping has not changed, must be protection or wiring change.
 1948: 	 */
 1949: 	if (origpte && (opa == pa)) {
 1950: 		/*
 1951: 		 * Wiring change, just update stats. We don't worry about
 1952: 		 * wiring PT pages as they remain resident as long as there
 1953: 		 * are valid mappings in them. Hence, if a user page is wired,
 1954: 		 * the PT page will be also.
 1955: 		 */
 1956: 		if (wired && ((origpte & PG_W) == 0))
 1957: 			pmap->pm_stats.wired_count++;
 1958: 		else if (!wired && (origpte & PG_W))
 1959: 			pmap->pm_stats.wired_count--;
 1960: 
 1961: #if defined(PMAP_DIAGNOSTIC)
 1962: 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 1963: 			printf(
 1964: 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1965: 			    va, origpte);
 1966: 		}
 1967: #endif
 1968: 
 1969: 		/*
 1970: 		 * Remove extra pte reference
 1971: 		 */
 1972: 		if (mpte)
 1973: 			mpte->hold_count--;
 1974: 
 1975: 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 1976: 			if ((origpte & PG_RW) == 0)
 1977: 				*pte |= PG_RW;
 1978: 			pmap_inval_flush(&info);
 1979: 			return;
 1980: 		}
 1981: 
 1982: 		/*
 1983: 		 * We might be turning off write access to the page,
 1984: 		 * so we go ahead and sense modify status.
 1985: 		 */
 1986: 		if (origpte & PG_MANAGED) {
 1987: 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 1988: 				vm_page_t om;
 1989: 				om = PHYS_TO_VM_PAGE(opa);
 1990: 				vm_page_dirty(om);
 1991: 			}
 1992: 			pa |= PG_MANAGED;
 1993: 		}
 1994: 		goto validate;
 1995: 	} 
 1996: 	/*
 1997: 	 * Mapping has changed, invalidate old range and fall through to
 1998: 	 * handle validating new mapping.
 1999: 	 */
 2000: 	if (opa) {
 2001: 		int err;
 2002: 		err = pmap_remove_pte(pmap, pte, va, &info);
 2003: 		if (err)
 2004: 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 2005: 	}
 2006: 
 2007: 	/*
 2008: 	 * Enter on the PV list if part of our managed memory. Note that we
 2009: 	 * raise IPL while manipulating pv_table since pmap_enter can be
 2010: 	 * called at interrupt time.
 2011: 	 */
 2012: 	if (pmap_initialized && 
 2013: 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 2014: 		pmap_insert_entry(pmap, va, mpte, m);
 2015: 		pa |= PG_MANAGED;
 2016: 	}
 2017: 
 2018: 	/*
 2019: 	 * Increment counters
 2020: 	 */
 2021: 	pmap->pm_stats.resident_count++;
 2022: 	if (wired)
 2023: 		pmap->pm_stats.wired_count++;
 2024: 
 2025: validate:
 2026: 	/*
 2027: 	 * Now validate mapping with desired protection/wiring.
 2028: 	 */
 2029: 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 2030: 
 2031: 	if (wired)
 2032: 		newpte |= PG_W;
 2033: 	if (va < UPT_MIN_ADDRESS)
 2034: 		newpte |= PG_U;
 2035: 	if (pmap == kernel_pmap)
 2036: 		newpte |= pgeflag;
 2037: 
 2038: 	/*
 2039: 	 * if the mapping or permission bits are different, we need
 2040: 	 * to update the pte.
 2041: 	 */
 2042: 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2043: 		*pte = newpte | PG_A;
 2044: 	}
 2045: 	pmap_inval_flush(&info);
 2046: }
 2047: 
 2048: /*
 2049:  * this code makes some *MAJOR* assumptions:
 2050:  * 1. Current pmap & pmap exists.
 2051:  * 2. Not wired.
 2052:  * 3. Read access.
 2053:  * 4. No page table pages.
 2054:  * 5. Tlbflush is deferred to calling procedure.
 2055:  * 6. Page IS managed.
 2056:  * but is *MUCH* faster than pmap_enter...
 2057:  */
 2058: 
 2059: static vm_page_t
 2060: pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2061: {
 2062: 	unsigned *pte;
 2063: 	vm_paddr_t pa;
 2064: 	pmap_inval_info info;
 2065: 
 2066: 	pmap_inval_init(&info);
 2067: 
 2068: 	/*
 2069: 	 * In the case that a page table page is not
 2070: 	 * resident, we are creating it here.
 2071: 	 */
 2072: 	if (va < UPT_MIN_ADDRESS) {
 2073: 		unsigned ptepindex;
 2074: 		vm_offset_t ptepa;
 2075: 
 2076: 		/*
 2077: 		 * Calculate pagetable page index
 2078: 		 */
 2079: 		ptepindex = va >> PDRSHIFT;
 2080: 		if (mpte && (mpte->pindex == ptepindex)) {
 2081: 			mpte->hold_count++;
 2082: 		} else {
 2083: retry:
 2084: 			/*
 2085: 			 * Get the page directory entry
 2086: 			 */
 2087: 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 2088: 
 2089: 			/*
 2090: 			 * If the page table page is mapped, we just increment
 2091: 			 * the hold count, and activate it.
 2092: 			 */
 2093: 			if (ptepa) {
 2094: 				if (ptepa & PG_PS)
 2095: 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2096: 				if (pmap->pm_ptphint &&
 2097: 					(pmap->pm_ptphint->pindex == ptepindex)) {
 2098: 					mpte = pmap->pm_ptphint;
 2099: 				} else {
 2100: 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 2101: 					pmap->pm_ptphint = mpte;
 2102: 				}
 2103: 				if (mpte == NULL)
 2104: 					goto retry;
 2105: 				mpte->hold_count++;
 2106: 			} else {
 2107: 				mpte = _pmap_allocpte(pmap, ptepindex);
 2108: 			}
 2109: 		}
 2110: 	} else {
 2111: 		mpte = NULL;
 2112: 	}
 2113: 
 2114: 	/*
 2115: 	 * This call to vtopte makes the assumption that we are
 2116: 	 * entering the page into the current pmap.  In order to support
 2117: 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 2118: 	 * But that isn't as quick as vtopte.
 2119: 	 */
 2120: 	pte = (unsigned *)vtopte(va);
 2121: 	if (*pte) {
 2122: 		if (mpte)
 2123: 			pmap_unwire_pte_hold(pmap, mpte, &info);
 2124: 		return 0;
 2125: 	}
 2126: 
 2127: 	/*
 2128: 	 * Enter on the PV list if part of our managed memory. Note that we
 2129: 	 * raise IPL while manipulating pv_table since pmap_enter can be
 2130: 	 * called at interrupt time.
 2131: 	 */
 2132: 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2133: 		pmap_insert_entry(pmap, va, mpte, m);
 2134: 
 2135: 	/*
 2136: 	 * Increment counters
 2137: 	 */
 2138: 	pmap->pm_stats.resident_count++;
 2139: 
 2140: 	pa = VM_PAGE_TO_PHYS(m);
 2141: 
 2142: 	/*
 2143: 	 * Now validate mapping with RO protection
 2144: 	 */
 2145: 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2146: 		*pte = pa | PG_V | PG_U;
 2147: 	else
 2148: 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 2149: 
 2150: 	return mpte;
 2151: }
 2152: 
 2153: /*
 2154:  * Make a temporary mapping for a physical address.  This is only intended
 2155:  * to be used for panic dumps.
 2156:  */
 2157: void *
 2158: pmap_kenter_temporary(vm_paddr_t pa, int i)
 2159: {
 2160: 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 2161: 	return ((void *)crashdumpmap);
 2162: }
 2163: 
 2164: #define MAX_INIT_PT (96)
 2165: /*
 2166:  * pmap_object_init_pt preloads the ptes for a given object
 2167:  * into the specified pmap.  This eliminates the blast of soft
 2168:  * faults on process startup and immediately after an mmap.
 2169:  */
 2170: void
 2171: pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 2172: 		    vm_pindex_t pindex, vm_size_t size, int limit)
 2173: {
 2174: 	vm_offset_t tmpidx;
 2175: 	int psize;
 2176: 	vm_page_t p, mpte;
 2177: 	int objpgs;
 2178: 
 2179: 	if (pmap == NULL || object == NULL)
 2180: 		return;
 2181: 
 2182: 	/*
 2183: 	 * This code maps large physical mmap regions into the
 2184: 	 * processor address space.  Note that some shortcuts
 2185: 	 * are taken, but the code works.
 2186: 	 */
 2187: 	if (pseflag &&
 2188: 		(object->type == OBJT_DEVICE) &&
 2189: 		((addr & (NBPDR - 1)) == 0) &&
 2190: 		((size & (NBPDR - 1)) == 0) ) {
 2191: 		int i;
 2192: 		vm_page_t m[1];
 2193: 		unsigned int ptepindex;
 2194: 		int npdes;
 2195: 		vm_offset_t ptepa;
 2196: 
 2197: 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2198: 			return;
 2199: 
 2200: retry:
 2201: 		p = vm_page_lookup(object, pindex);
 2202: 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 2203: 			goto retry;
 2204: 
 2205: 		if (p == NULL) {
 2206: 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2207: 			if (p == NULL)
 2208: 				return;
 2209: 			m[0] = p;
 2210: 
 2211: 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2212: 				vm_page_free(p);
 2213: 				return;
 2214: 			}
 2215: 
 2216: 			p = vm_page_lookup(object, pindex);
 2217: 			vm_page_wakeup(p);
 2218: 		}
 2219: 
 2220: 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 2221: 		if (ptepa & (NBPDR - 1)) {
 2222: 			return;
 2223: 		}
 2224: 
 2225: 		p->valid = VM_PAGE_BITS_ALL;
 2226: 
 2227: 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2228: 		npdes = size >> PDRSHIFT;
 2229: 		for(i=0;i<npdes;i++) {
 2230: 			pmap->pm_pdir[ptepindex] =
 2231: 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 2232: 			ptepa += NBPDR;
 2233: 			ptepindex += 1;
 2234: 		}
 2235: 		vm_page_flag_set(p, PG_MAPPED);
 2236: 		cpu_invltlb();
 2237: 		smp_invltlb();
 2238: 		return;
 2239: 	}
 2240: 
 2241: 	psize = i386_btop(size);
 2242: 
 2243: 	if ((object->type != OBJT_VNODE) ||
 2244: 		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 2245: 			(object->resident_page_count > MAX_INIT_PT))) {
 2246: 		return;
 2247: 	}
 2248: 
 2249: 	if (psize + pindex > object->size) {
 2250: 		if (object->size < pindex)
 2251: 			return;		  
 2252: 		psize = object->size - pindex;
 2253: 	}
 2254: 
 2255: 	mpte = NULL;
 2256: 	/*
 2257: 	 * if we are processing a major portion of the object, then scan the
 2258: 	 * entire thing.
 2259: 	 */
 2260: 	if (psize > (object->resident_page_count >> 2)) {
 2261: 		objpgs = psize;
 2262: 
 2263: 		for (p = TAILQ_FIRST(&object->memq);
 2264: 		    ((objpgs > 0) && (p != NULL));
 2265: 		    p = TAILQ_NEXT(p, listq)) {
 2266: 
 2267: 			tmpidx = p->pindex;
 2268: 			if (tmpidx < pindex) {
 2269: 				continue;
 2270: 			}
 2271: 			tmpidx -= pindex;
 2272: 			if (tmpidx >= psize) {
 2273: 				continue;
 2274: 			}
 2275: 			/*
 2276: 			 * don't allow an madvise to blow away our really
 2277: 			 * free pages allocating pv entries.
 2278: 			 */
 2279: 			if ((limit & MAP_PREFAULT_MADVISE) &&
 2280: 			    vmstats.v_free_count < vmstats.v_free_reserved) {
 2281: 				break;
 2282: 			}
 2283: 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2284: 				(p->busy == 0) &&
 2285: 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2286: 				if ((p->queue - p->pc) == PQ_CACHE)
 2287: 					vm_page_deactivate(p);
 2288: 				vm_page_busy(p);
 2289: 				mpte = pmap_enter_quick(pmap, 
 2290: 					addr + i386_ptob(tmpidx), p, mpte);
 2291: 				vm_page_flag_set(p, PG_MAPPED);
 2292: 				vm_page_wakeup(p);
 2293: 			}
 2294: 			objpgs -= 1;
 2295: 		}
 2296: 	} else {
 2297: 		/*
 2298: 		 * else lookup the pages one-by-one.
 2299: 		 */
 2300: 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 2301: 			/*
 2302: 			 * don't allow an madvise to blow away our really
 2303: 			 * free pages allocating pv entries.
 2304: 			 */
 2305: 			if ((limit & MAP_PREFAULT_MADVISE) &&
 2306: 			    vmstats.v_free_count < vmstats.v_free_reserved) {
 2307: 				break;
 2308: 			}
 2309: 			p = vm_page_lookup(object, tmpidx + pindex);
 2310: 			if (p &&
 2311: 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2312: 				(p->busy == 0) &&
 2313: 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2314: 				if ((p->queue - p->pc) == PQ_CACHE)
 2315: 					vm_page_deactivate(p);
 2316: 				vm_page_busy(p);
 2317: 				mpte = pmap_enter_quick(pmap, 
 2318: 					addr + i386_ptob(tmpidx), p, mpte);
 2319: 				vm_page_flag_set(p, PG_MAPPED);
 2320: 				vm_page_wakeup(p);
 2321: 			}
 2322: 		}
 2323: 	}
 2324: }
 2325: 
 2326: /*
 2327:  * pmap_prefault provides a quick way of clustering
 2328:  * pagefaults into a processes address space.  It is a "cousin"
 2329:  * of pmap_object_init_pt, except it runs at page fault time instead
 2330:  * of mmap time.
 2331:  */
 2332: #define PFBAK 4
 2333: #define PFFOR 4
 2334: #define PAGEORDER_SIZE (PFBAK+PFFOR)
 2335: 
 2336: static int pmap_prefault_pageorder[] = {
 2337: 	-PAGE_SIZE, PAGE_SIZE,
 2338: 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 2339: 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 2340: 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 2341: };
 2342: 
 2343: void
 2344: pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 2345: {
 2346: 	int i;
 2347: 	vm_offset_t starta;
 2348: 	vm_offset_t addr;
 2349: 	vm_pindex_t pindex;
 2350: 	vm_page_t m, mpte;
 2351: 	vm_object_t object;
 2352: 
 2353: 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 2354: 		return;
 2355: 
 2356: 	object = entry->object.vm_object;
 2357: 
 2358: 	starta = addra - PFBAK * PAGE_SIZE;
 2359: 	if (starta < entry->start) {
 2360: 		starta = entry->start;
 2361: 	} else if (starta > addra) {
 2362: 		starta = 0;
 2363: 	}
 2364: 
 2365: 	mpte = NULL;
 2366: 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 2367: 		vm_object_t lobject;
 2368: 		unsigned *pte;
 2369: 
 2370: 		addr = addra + pmap_prefault_pageorder[i];
 2371: 		if (addr > addra + (PFFOR * PAGE_SIZE))
 2372: 			addr = 0;
 2373: 
 2374: 		if (addr < starta || addr >= entry->end)
 2375: 			continue;
 2376: 
 2377: 		if ((*pmap_pde(pmap, addr)) == NULL) 
 2378: 			continue;
 2379: 
 2380: 		pte = (unsigned *) vtopte(addr);
 2381: 		if (*pte)
 2382: 			continue;
 2383: 
 2384: 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 2385: 		lobject = object;
 2386: 		for (m = vm_page_lookup(lobject, pindex);
 2387: 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 2388: 		    lobject = lobject->backing_object) {
 2389: 			if (lobject->backing_object_offset & PAGE_MASK)
 2390: 				break;
 2391: 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 2392: 			m = vm_page_lookup(lobject->backing_object, pindex);
 2393: 		}
 2394: 
 2395: 		/*
 2396: 		 * give-up when a page is not in memory
 2397: 		 */
 2398: 		if (m == NULL)
 2399: 			break;
 2400: 
 2401: 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2402: 			(m->busy == 0) &&
 2403: 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2404: 
 2405: 			if ((m->queue - m->pc) == PQ_CACHE) {
 2406: 				vm_page_deactivate(m);
 2407: 			}
 2408: 			vm_page_busy(m);
 2409: 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 2410: 			vm_page_flag_set(m, PG_MAPPED);
 2411: 			vm_page_wakeup(m);
 2412: 		}
 2413: 	}
 2414: }
 2415: 
 2416: /*
 2417:  *	Routine:	pmap_change_wiring
 2418:  *	Function:	Change the wiring attribute for a map/virtual-address
 2419:  *			pair.
 2420:  *	In/out conditions:
 2421:  *			The mapping must already exist in the pmap.
 2422:  */
 2423: void
 2424: pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 2425: {
 2426: 	unsigned *pte;
 2427: 
 2428: 	if (pmap == NULL)
 2429: 		return;
 2430: 
 2431: 	pte = pmap_pte(pmap, va);
 2432: 
 2433: 	if (wired && !pmap_pte_w(pte))
 2434: 		pmap->pm_stats.wired_count++;
 2435: 	else if (!wired && pmap_pte_w(pte))
 2436: 		pmap->pm_stats.wired_count--;
 2437: 
 2438: 	/*
 2439: 	 * Wiring is not a hardware characteristic so there is no need to
 2440: 	 * invalidate TLB.  However, in an SMP environment we must use
 2441: 	 * a locked bus cycle to update the pte (if we are not using 
 2442: 	 * the pmap_inval_*() API that is)... it's ok to do this for simple
 2443: 	 * wiring changes.
 2444: 	 */
 2445: #ifdef SMP
 2446: 	if (wired)
 2447: 		atomic_set_int(pte, PG_W);
 2448: 	else
 2449: 		atomic_clear_int(pte, PG_W);
 2450: #else
 2451: 	if (wired)
 2452: 		atomic_set_int_nonlocked(pte, PG_W);
 2453: 	else
 2454: 		atomic_clear_int_nonlocked(pte, PG_W);
 2455: #endif
 2456: }
 2457: 
 2458: 
 2459: 
 2460: /*
 2461:  *	Copy the range specified by src_addr/len
 2462:  *	from the source map to the range dst_addr/len
 2463:  *	in the destination map.
 2464:  *
 2465:  *	This routine is only advisory and need not do anything.
 2466:  */
 2467: void
 2468: pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 
 2469: 	vm_size_t len, vm_offset_t src_addr)
 2470: {
 2471: 	pmap_inval_info info;
 2472: 	vm_offset_t addr;
 2473: 	vm_offset_t end_addr = src_addr + len;
 2474: 	vm_offset_t pdnxt;
 2475: 	unsigned src_frame, dst_frame;
 2476: 	vm_page_t m;
 2477: 
 2478: 	if (dst_addr != src_addr)
 2479: 		return;
 2480: 
 2481: 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 2482: 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 2483: 		return;
 2484: 	}
 2485: 
 2486: 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 2487: 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 2488: 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 2489: 		/* The page directory is not shared between CPUs */
 2490: 		cpu_invltlb();
 2491: 	}
 2492: 	pmap_inval_init(&info);
 2493: 	pmap_inval_add(&info, dst_pmap, -1);
 2494: 	pmap_inval_add(&info, src_pmap, -1);
 2495: 
 2496: 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 2497: 		unsigned *src_pte, *dst_pte;
 2498: 		vm_page_t dstmpte, srcmpte;
 2499: 		vm_offset_t srcptepaddr;
 2500: 		unsigned ptepindex;
 2501: 
 2502: 		if (addr >= UPT_MIN_ADDRESS)
 2503: 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 2504: 
 2505: 		/*
 2506: 		 * Don't let optional prefaulting of pages make us go
 2507: 		 * way below the low water mark of free pages or way
 2508: 		 * above high water mark of used pv entries.
 2509: 		 */
 2510: 		if (vmstats.v_free_count < vmstats.v_free_reserved ||
 2511: 		    pv_entry_count > pv_entry_high_water)
 2512: 			break;
 2513: 		
 2514: 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 2515: 		ptepindex = addr >> PDRSHIFT;
 2516: 
 2517: 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 2518: 		if (srcptepaddr == 0)
 2519: 			continue;
 2520: 			
 2521: 		if (srcptepaddr & PG_PS) {
 2522: 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2523: 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 2524: 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 2525: 			}
 2526: 			continue;
 2527: 		}
 2528: 
 2529: 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 2530: 		if ((srcmpte == NULL) ||
 2531: 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 2532: 			continue;
 2533: 
 2534: 		if (pdnxt > end_addr)
 2535: 			pdnxt = end_addr;
 2536: 
 2537: 		src_pte = (unsigned *) vtopte(addr);
 2538: 		dst_pte = (unsigned *) avtopte(addr);
 2539: 		while (addr < pdnxt) {
 2540: 			unsigned ptetemp;
 2541: 			ptetemp = *src_pte;
 2542: 			/*
 2543: 			 * we only virtual copy managed pages
 2544: 			 */
 2545: 			if ((ptetemp & PG_MANAGED) != 0) {
 2546: 				/*
 2547: 				 * We have to check after allocpte for the
 2548: 				 * pte still being around...  allocpte can
 2549: 				 * block.
 2550: 				 */
 2551: 				dstmpte = pmap_allocpte(dst_pmap, addr);
 2552: 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 2553: 					/*
 2554: 					 * Clear the modified and
 2555: 					 * accessed (referenced) bits
 2556: 					 * during the copy.
 2557: 					 */
 2558: 					m = PHYS_TO_VM_PAGE(ptetemp);
 2559: 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 2560: 					dst_pmap->pm_stats.resident_count++;
 2561: 					pmap_insert_entry(dst_pmap, addr,
 2562: 						dstmpte, m);
 2563: 	 			} else {
 2564: 					pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
 2565: 				}
 2566: 				if (dstmpte->hold_count >= srcmpte->hold_count)
 2567: 					break;
 2568: 			}
 2569: 			addr += PAGE_SIZE;
 2570: 			src_pte++;
 2571: 			dst_pte++;
 2572: 		}
 2573: 	}
 2574: 	pmap_inval_flush(&info);
 2575: }	
 2576: 
 2577: /*
 2578:  *	Routine:	pmap_kernel
 2579:  *	Function:
 2580:  *		Returns the physical map handle for the kernel.
 2581:  */
 2582: pmap_t
 2583: pmap_kernel(void)
 2584: {
 2585: 	return (kernel_pmap);
 2586: }
 2587: 
 2588: /*
 2589:  * pmap_zero_page:
 2590:  *
 2591:  *	Zero the specified PA by mapping the page into KVM and clearing its
 2592:  *	contents.
 2593:  *
 2594:  *	This function may be called from an interrupt and no locking is
 2595:  *	required.
 2596:  */
 2597: void
 2598: pmap_zero_page(vm_paddr_t phys)
 2599: {
 2600: 	struct mdglobaldata *gd = mdcpu;
 2601: 
 2602: 	crit_enter();
 2603: 	if (*(int *)gd->gd_CMAP3)
 2604: 		panic("pmap_zero_page: CMAP3 busy");
 2605: 	*(int *)gd->gd_CMAP3 =
 2606: 		    PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2607: 	cpu_invlpg(gd->gd_CADDR3);
 2608: 
 2609: #if defined(I686_CPU)
 2610: 	if (cpu_class == CPUCLASS_686)
 2611: 		i686_pagezero(gd->gd_CADDR3);
 2612: 	else
 2613: #endif
 2614: 		bzero(gd->gd_CADDR3, PAGE_SIZE);
 2615: 	*(int *) gd->gd_CMAP3 = 0;
 2616: 	crit_exit();
 2617: }
 2618: 
 2619: /*
 2620:  * pmap_zero_page:
 2621:  *
 2622:  *	Zero part of a physical page by mapping it into memory and clearing
 2623:  *	its contents with bzero.
 2624:  *
 2625:  *	off and size may not cover an area beyond a single hardware page.
 2626:  */
 2627: void
 2628: pmap_zero_page_area(vm_paddr_t phys, int off, int size)
 2629: {
 2630: 	struct mdglobaldata *gd = mdcpu;
 2631: 
 2632: 	crit_enter();
 2633: 	if (*(int *) gd->gd_CMAP3)
 2634: 		panic("pmap_zero_page: CMAP3 busy");
 2635: 	*(int *) gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2636: 	cpu_invlpg(gd->gd_CADDR3);
 2637: 
 2638: #if defined(I686_CPU)
 2639: 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 2640: 		i686_pagezero(gd->gd_CADDR3);
 2641: 	else
 2642: #endif
 2643: 		bzero((char *)gd->gd_CADDR3 + off, size);
 2644: 	*(int *) gd->gd_CMAP3 = 0;
 2645: 	crit_exit();
 2646: }
 2647: 
 2648: /*
 2649:  * pmap_copy_page:
 2650:  *
 2651:  *	Copy the physical page from the source PA to the target PA.
 2652:  *	This function may be called from an interrupt.  No locking
 2653:  *	is required.
 2654:  */
 2655: void
 2656: pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
 2657: {
 2658: 	struct mdglobaldata *gd = mdcpu;
 2659: 
 2660: 	crit_enter();
 2661: 	if (*(int *) gd->gd_CMAP1)
 2662: 		panic("pmap_copy_page: CMAP1 busy");
 2663: 	if (*(int *) gd->gd_CMAP2)
 2664: 		panic("pmap_copy_page: CMAP2 busy");
 2665: 
 2666: 	*(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 2667: 	*(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 2668: 
 2669: 	cpu_invlpg(gd->gd_CADDR1);
 2670: 	cpu_invlpg(gd->gd_CADDR2);
 2671: 
 2672: 	bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
 2673: 
 2674: 	*(int *) gd->gd_CMAP1 = 0;
 2675: 	*(int *) gd->gd_CMAP2 = 0;
 2676: 	crit_exit();
 2677: }
 2678: 
 2679: /*
 2680:  * pmap_copy_page_frag:
 2681:  *
 2682:  *	Copy the physical page from the source PA to the target PA.
 2683:  *	This function may be called from an interrupt.  No locking
 2684:  *	is required.
 2685:  */
 2686: void
 2687: pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
 2688: {
 2689: 	struct mdglobaldata *gd = mdcpu;
 2690: 
 2691: 	crit_enter();
 2692: 	if (*(int *) gd->gd_CMAP1)
 2693: 		panic("pmap_copy_page: CMAP1 busy");
 2694: 	if (*(int *) gd->gd_CMAP2)
 2695: 		panic("pmap_copy_page: CMAP2 busy");
 2696: 
 2697: 	*(int *) gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 2698: 	*(int *) gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 2699: 
 2700: 	cpu_invlpg(gd->gd_CADDR1);
 2701: 	cpu_invlpg(gd->gd_CADDR2);
 2702: 
 2703: 	bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
 2704: 	      (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
 2705: 	      bytes);
 2706: 
 2707: 	*(int *) gd->gd_CMAP1 = 0;
 2708: 	*(int *) gd->gd_CMAP2 = 0;
 2709: 	crit_exit();
 2710: }
 2711: 
 2712: 
 2713: /*
 2714:  *	Routine:	pmap_pageable
 2715:  *	Function:
 2716:  *		Make the specified pages (by pmap, offset)
 2717:  *		pageable (or not) as requested.
 2718:  *
 2719:  *		A page which is not pageable may not take
 2720:  *		a fault; therefore, its page table entry
 2721:  *		must remain valid for the duration.
 2722:  *
 2723:  *		This routine is merely advisory; pmap_enter
 2724:  *		will specify that these pages are to be wired
 2725:  *		down (or not) as appropriate.
 2726:  */
 2727: void
 2728: pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, boolean_t pageable)
 2729: {
 2730: }
 2731: 
 2732: /*
 2733:  * Returns true if the pmap's pv is one of the first
 2734:  * 16 pvs linked to from this page.  This count may
 2735:  * be changed upwards or downwards in the future; it
 2736:  * is only necessary that true be returned for a small
 2737:  * subset of pmaps for proper page aging.
 2738:  */
 2739: boolean_t
 2740: pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 2741: {
 2742: 	pv_entry_t pv;
 2743: 	int loops = 0;
 2744: 	int s;
 2745: 
 2746: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2747: 		return FALSE;
 2748: 
 2749: 	s = splvm();
 2750: 
 2751: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2752: 		if (pv->pv_pmap == pmap) {
 2753: 			splx(s);
 2754: 			return TRUE;
 2755: 		}
 2756: 		loops++;
 2757: 		if (loops >= 16)
 2758: 			break;
 2759: 	}
 2760: 	splx(s);
 2761: 	return (FALSE);
 2762: }
 2763: 
 2764: #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2765: /*
 2766:  * Remove all pages from specified address space
 2767:  * this aids process exit speeds.  Also, this code
 2768:  * is special cased for current process only, but
 2769:  * can have the more generic (and slightly slower)
 2770:  * mode enabled.  This is much faster than pmap_remove
 2771:  * in the case of running down an entire address space.
 2772:  */
 2773: void
 2774: pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2775: {
 2776: 	unsigned *pte, tpte;
 2777: 	pv_entry_t pv, npv;
 2778: 	int s;
 2779: 	vm_page_t m;
 2780: 	pmap_inval_info info;
 2781: 
 2782: #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2783: 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 2784: 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 2785: 		return;
 2786: 	}
 2787: #endif
 2788: 
 2789: 	pmap_inval_init(&info);
 2790: 	s = splvm();
 2791: 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 2792: 		pv;
 2793: 		pv = npv) {
 2794: 
 2795: 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 2796: 			npv = TAILQ_NEXT(pv, pv_plist);
 2797: 			continue;
 2798: 		}
 2799: 
 2800: #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2801: 		pte = (unsigned *)vtopte(pv->pv_va);
 2802: #else
 2803: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2804: #endif
 2805: 		pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 2806: 		tpte = *pte;
 2807: 
 2808: /*
 2809:  * We cannot remove wired pages from a process' mapping at this time
 2810:  */
 2811: 		if (tpte & PG_W) {
 2812: 			npv = TAILQ_NEXT(pv, pv_plist);
 2813: 			continue;
 2814: 		}
 2815: 		*pte = 0;
 2816: 
 2817: 		m = PHYS_TO_VM_PAGE(tpte);
 2818: 
 2819: 		KASSERT(m < &vm_page_array[vm_page_array_size],
 2820: 			("pmap_remove_pages: bad tpte %x", tpte));
 2821: 
 2822: 		pv->pv_pmap->pm_stats.resident_count--;
 2823: 
 2824: 		/*
 2825: 		 * Update the vm_page_t clean and reference bits.
 2826: 		 */
 2827: 		if (tpte & PG_M) {
 2828: 			vm_page_dirty(m);
 2829: 		}
 2830: 
 2831: 
 2832: 		npv = TAILQ_NEXT(pv, pv_plist);
 2833: 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 2834: 
 2835: 		m->md.pv_list_count--;
 2836: 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2837: 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 2838: 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 2839: 		}
 2840: 
 2841: 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
 2842: 		free_pv_entry(pv);
 2843: 	}
 2844: 	pmap_inval_flush(&info);
 2845: 	splx(s);
 2846: }
 2847: 
 2848: /*
 2849:  * pmap_testbit tests bits in pte's
 2850:  * note that the testbit/changebit routines are inline,
 2851:  * and a lot of things compile-time evaluate.
 2852:  */
 2853: static boolean_t
 2854: pmap_testbit(vm_page_t m, int bit)
 2855: {
 2856: 	pv_entry_t pv;
 2857: 	unsigned *pte;
 2858: 	int s;
 2859: 
 2860: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2861: 		return FALSE;
 2862: 
 2863: 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 2864: 		return FALSE;
 2865: 
 2866: 	s = splvm();
 2867: 
 2868: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2869: 		/*
 2870: 		 * if the bit being tested is the modified bit, then
 2871: 		 * mark clean_map and ptes as never
 2872: 		 * modified.
 2873: 		 */
 2874: 		if (bit & (PG_A|PG_M)) {
 2875: 			if (!pmap_track_modified(pv->pv_va))
 2876: 				continue;
 2877: 		}
 2878: 
 2879: #if defined(PMAP_DIAGNOSTIC)
 2880: 		if (!pv->pv_pmap) {
 2881: 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2882: 			continue;
 2883: 		}
 2884: #endif
 2885: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2886: 		if (*pte & bit) {
 2887: 			splx(s);
 2888: 			return TRUE;
 2889: 		}
 2890: 	}
 2891: 	splx(s);
 2892: 	return (FALSE);
 2893: }
 2894: 
 2895: /*
 2896:  * this routine is used to modify bits in ptes
 2897:  */
 2898: static __inline void
 2899: pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 2900: {
 2901: 	struct pmap_inval_info info;
 2902: 	pv_entry_t pv;
 2903: 	unsigned *pte;
 2904: 	int s;
 2905: 
 2906: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2907: 		return;
 2908: 
 2909: 	pmap_inval_init(&info);
 2910: 	s = splvm();
 2911: 
 2912: 	/*
 2913: 	 * Loop over all current mappings setting/clearing as appropos If
 2914: 	 * setting RO do we need to clear the VAC?
 2915: 	 */
 2916: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2917: 		/*
 2918: 		 * don't write protect pager mappings
 2919: 		 */
 2920: 		if (!setem && (bit == PG_RW)) {
 2921: 			if (!pmap_track_modified(pv->pv_va))
 2922: 				continue;
 2923: 		}
 2924: 
 2925: #if defined(PMAP_DIAGNOSTIC)
 2926: 		if (!pv->pv_pmap) {
 2927: 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2928: 			continue;
 2929: 		}
 2930: #endif
 2931: 
 2932: 		/*
 2933: 		 * Careful here.  We can use a locked bus instruction to
 2934: 		 * clear PG_A or PG_M safely but we need to synchronize
 2935: 		 * with the target cpus when we mess with PG_RW.
 2936: 		 */
 2937: 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2938: 		if (bit == PG_RW)
 2939: 			pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
 2940: 
 2941: 		if (setem) {
 2942: #ifdef SMP
 2943: 			atomic_set_int(pte, bit);
 2944: #else
 2945: 			atomic_set_int_nonlocked(pte, bit);
 2946: #endif
 2947: 		} else {
 2948: 			vm_offset_t pbits = *(vm_offset_t *)pte;
 2949: 			if (pbits & bit) {
 2950: 				if (bit == PG_RW) {
 2951: 					if (pbits & PG_M) {
 2952: 						vm_page_dirty(m);
 2953: 					}
 2954: #ifdef SMP
 2955: 					atomic_clear_int(pte, PG_M|PG_RW);
 2956: #else
 2957: 					atomic_clear_int_nonlocked(pte, PG_M|PG_RW);
 2958: #endif
 2959: 				} else {
 2960: #ifdef SMP
 2961: 					atomic_clear_int(pte, bit);
 2962: #else
 2963: 					atomic_clear_int_nonlocked(pte, bit);
 2964: #endif
 2965: 				}
 2966: 			}
 2967: 		}
 2968: 	}
 2969: 	pmap_inval_flush(&info);
 2970: 	splx(s);
 2971: }
 2972: 
 2973: /*
 2974:  *      pmap_page_protect:
 2975:  *
 2976:  *      Lower the permission for all mappings to a given page.
 2977:  */
 2978: void
 2979: pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2980: {
 2981: 	if ((prot & VM_PROT_WRITE) == 0) {
 2982: 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2983: 			pmap_changebit(m, PG_RW, FALSE);
 2984: 		} else {
 2985: 			pmap_remove_all(m);
 2986: 		}
 2987: 	}
 2988: }
 2989: 
 2990: vm_paddr_t
 2991: pmap_phys_address(int ppn)
 2992: {
 2993: 	return (i386_ptob(ppn));
 2994: }
 2995: 
 2996: /*
 2997:  *	pmap_ts_referenced:
 2998:  *
 2999:  *	Return a count of reference bits for a page, clearing those bits.
 3000:  *	It is not necessary for every reference bit to be cleared, but it
 3001:  *	is necessary that 0 only be returned when there are truly no
 3002:  *	reference bits set.
 3003:  *
 3004:  *	XXX: The exact number of bits to check and clear is a matter that
 3005:  *	should be tested and standardized at some point in the future for
 3006:  *	optimal aging of shared pages.
 3007:  */
 3008: int
 3009: pmap_ts_referenced(vm_page_t m)
 3010: {
 3011: 	pv_entry_t pv, pvf, pvn;
 3012: 	unsigned *pte;
 3013: 	int s;
 3014: 	int rtval = 0;
 3015: 
 3016: 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 3017: 		return (rtval);
 3018: 
 3019: 	s = splvm();
 3020: 
 3021: 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 3022: 
 3023: 		pvf = pv;
 3024: 
 3025: 		do {
 3026: 			pvn = TAILQ_NEXT(pv, pv_list);
 3027: 
 3028: 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 3029: 
 3030: 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 3031: 
 3032: 			if (!pmap_track_modified(pv->pv_va))
 3033: 				continue;
 3034: 
 3035: 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 3036: 
 3037: 			if (pte && (*pte & PG_A)) {
 3038: #ifdef SMP
 3039: 				atomic_clear_int(pte, PG_A);
 3040: #else
 3041: 				atomic_clear_int_nonlocked(pte, PG_A);
 3042: #endif
 3043: 				rtval++;
 3044: 				if (rtval > 4) {
 3045: 					break;
 3046: 				}
 3047: 			}
 3048: 		} while ((pv = pvn) != NULL && pv != pvf);
 3049: 	}
 3050: 	splx(s);
 3051: 
 3052: 	return (rtval);
 3053: }
 3054: 
 3055: /*
 3056:  *	pmap_is_modified:
 3057:  *
 3058:  *	Return whether or not the specified physical page was modified
 3059:  *	in any physical maps.
 3060:  */
 3061: boolean_t
 3062: pmap_is_modified(vm_page_t m)
 3063: {
 3064: 	return pmap_testbit(m, PG_M);
 3065: }
 3066: 
 3067: /*
 3068:  *	Clear the modify bits on the specified physical page.
 3069:  */
 3070: void
 3071: pmap_clear_modify(vm_page_t m)
 3072: {
 3073: 	pmap_changebit(m, PG_M, FALSE);
 3074: }
 3075: 
 3076: /*
 3077:  *	pmap_clear_reference:
 3078:  *
 3079:  *	Clear the reference bit on the specified physical page.
 3080:  */
 3081: void
 3082: pmap_clear_reference(vm_page_t m)
 3083: {
 3084: 	pmap_changebit(m, PG_A, FALSE);
 3085: }
 3086: 
 3087: /*
 3088:  * Miscellaneous support routines follow
 3089:  */
 3090: 
 3091: static void
 3092: i386_protection_init(void)
 3093: {
 3094: 	int *kp, prot;
 3095: 
 3096: 	kp = protection_codes;
 3097: 	for (prot = 0; prot < 8; prot++) {
 3098: 		switch (prot) {
 3099: 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 3100: 			/*
 3101: 			 * Read access is also 0. There isn't any execute bit,
 3102: 			 * so just make it readable.
 3103: 			 */
 3104: 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 3105: 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 3106: 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 3107: 			*kp++ = 0;
 3108: 			break;
 3109: 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 3110: 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3111: 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 3112: 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3113: 			*kp++ = PG_RW;
 3114: 			break;
 3115: 		}
 3116: 	}
 3117: }
 3118: 
 3119: /*
 3120:  * Map a set of physical memory pages into the kernel virtual
 3121:  * address space. Return a pointer to where it is mapped. This
 3122:  * routine is intended to be used for mapping device memory,
 3123:  * NOT real memory.
 3124:  *
 3125:  * NOTE: we can't use pgeflag unless we invalidate the pages one at
 3126:  * a time.
 3127:  */
 3128: void *
 3129: pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 3130: {
 3131: 	vm_offset_t va, tmpva, offset;
 3132: 	unsigned *pte;
 3133: 
 3134: 	offset = pa & PAGE_MASK;
 3135: 	size = roundup(offset + size, PAGE_SIZE);
 3136: 
 3137: 	va = kmem_alloc_pageable(kernel_map, size);
 3138: 	if (!va)
 3139: 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 3140: 
 3141: 	pa = pa & PG_FRAME;
 3142: 	for (tmpva = va; size > 0;) {
 3143: 		pte = (unsigned *)vtopte(tmpva);
 3144: 		*pte = pa | PG_RW | PG_V; /* | pgeflag; */
 3145: 		size -= PAGE_SIZE;
 3146: 		tmpva += PAGE_SIZE;
 3147: 		pa += PAGE_SIZE;
 3148: 	}
 3149: 	cpu_invltlb();
 3150: 	smp_invltlb();
 3151: 
 3152: 	return ((void *)(va + offset));
 3153: }
 3154: 
 3155: void
 3156: pmap_unmapdev(vm_offset_t va, vm_size_t size)
 3157: {
 3158: 	vm_offset_t base, offset;
 3159: 
 3160: 	base = va & PG_FRAME;
 3161: 	offset = va & PAGE_MASK;
 3162: 	size = roundup(offset + size, PAGE_SIZE);
 3163: 	kmem_free(kernel_map, base, size);
 3164: }
 3165: 
 3166: /*
 3167:  * perform the pmap work for mincore
 3168:  */
 3169: int
 3170: pmap_mincore(pmap_t pmap, vm_offset_t addr)
 3171: {
 3172: 	unsigned *ptep, pte;
 3173: 	vm_page_t m;
 3174: 	int val = 0;
 3175: 	
 3176: 	ptep = pmap_pte(pmap, addr);
 3177: 	if (ptep == 0) {
 3178: 		return 0;
 3179: 	}
 3180: 
 3181: 	if ((pte = *ptep) != 0) {
 3182: 		vm_offset_t pa;
 3183: 
 3184: 		val = MINCORE_INCORE;
 3185: 		if ((pte & PG_MANAGED) == 0)
 3186: 			return val;
 3187: 
 3188: 		pa = pte & PG_FRAME;
 3189: 
 3190: 		m = PHYS_TO_VM_PAGE(pa);
 3191: 
 3192: 		/*
 3193: 		 * Modified by us
 3194: 		 */
 3195: 		if (pte & PG_M)
 3196: 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 3197: 		/*
 3198: 		 * Modified by someone
 3199: 		 */
 3200: 		else if (m->dirty || pmap_is_modified(m))
 3201: 			val |= MINCORE_MODIFIED_OTHER;
 3202: 		/*
 3203: 		 * Referenced by us
 3204: 		 */
 3205: 		if (pte & PG_A)
 3206: 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 3207: 
 3208: 		/*
 3209: 		 * Referenced by someone
 3210: 		 */
 3211: 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 3212: 			val |= MINCORE_REFERENCED_OTHER;
 3213: 			vm_page_flag_set(m, PG_REFERENCED);
 3214: 		}
 3215: 	} 
 3216: 	return val;
 3217: }
 3218: 
 3219: void
 3220: pmap_activate(struct proc *p)
 3221: {
 3222: 	pmap_t	pmap;
 3223: 
 3224: 	pmap = vmspace_pmap(p->p_vmspace);
 3225: #if defined(SMP)
 3226: 	atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid);
 3227: #else
 3228: 	pmap->pm_active |= 1;
 3229: #endif
 3230: #if defined(SWTCH_OPTIM_STATS)
 3231: 	tlb_flush_count++;
 3232: #endif
 3233: 	p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir);
 3234: 	load_cr3(p->p_thread->td_pcb->pcb_cr3);
 3235: }
 3236: 
 3237: vm_offset_t
 3238: pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3239: {
 3240: 
 3241: 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3242: 		return addr;
 3243: 	}
 3244: 
 3245: 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 3246: 	return addr;
 3247: }
 3248: 
 3249: 
 3250: #if defined(PMAP_DEBUG)
 3251: int
 3252: pmap_pid_dump(int pid)
 3253: {
 3254: 	pmap_t pmap;
 3255: 	struct proc *p;
 3256: 	int npte = 0;
 3257: 	int index;
 3258: 	FOREACH_PROC_IN_SYSTEM(p) {
 3259: 		if (p->p_pid != pid)
 3260: 			continue;
 3261: 
 3262: 		if (p->p_vmspace) {
 3263: 			int i,j;
 3264: 			index = 0;
 3265: 			pmap = vmspace_pmap(p->p_vmspace);
 3266: 			for(i=0;i<1024;i++) {
 3267: 				pd_entry_t *pde;
 3268: 				unsigned *pte;
 3269: 				unsigned base = i << PDRSHIFT;
 3270: 				
 3271: 				pde = &pmap->pm_pdir[i];
 3272: 				if (pde && pmap_pde_v(pde)) {
 3273: 					for(j=0;j<1024;j++) {
 3274: 						unsigned va = base + (j << PAGE_SHIFT);
 3275: 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3276: 							if (index) {
 3277: 								index = 0;
 3278: 								printf("\n");
 3279: 							}
 3280: 							return npte;
 3281: 						}
 3282: 						pte = pmap_pte_quick( pmap, va);
 3283: 						if (pte && pmap_pte_v(pte)) {
 3284: 							vm_offset_t pa;
 3285: 							vm_page_t m;
 3286: 							pa = *(int *)pte;
 3287: 							m = PHYS_TO_VM_PAGE(pa);
 3288: 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3289: 								va, pa, m->hold_count, m->wire_count, m->flags);
 3290: 							npte++;
 3291: 							index++;
 3292: 							if (index >= 2) {
 3293: 								index = 0;
 3294: 								printf("\n");
 3295: 							} else {
 3296: 								printf(" ");
 3297: 							}
 3298: 						}
 3299: 					}
 3300: 				}
 3301: 			}
 3302: 		}
 3303: 	}
 3304: 	return npte;
 3305: }
 3306: #endif
 3307: 
 3308: #if defined(DEBUG)
 3309: 
 3310: static void	pads (pmap_t pm);
 3311: void		pmap_pvdump (vm_paddr_t pa);
 3312: 
 3313: /* print address space of pmap*/
 3314: static void
 3315: pads(pmap_t pm)
 3316: {
 3317: 	unsigned va, i, j;
 3318: 	unsigned *ptep;
 3319: 
 3320: 	if (pm == kernel_pmap)
 3321: 		return;
 3322: 	for (i = 0; i < 1024; i++)
 3323: 		if (pm->pm_pdir[i])
 3324: 			for (j = 0; j < 1024; j++) {
 3325: 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3326: 				if (pm == kernel_pmap && va < KERNBASE)
 3327: 					continue;
 3328: 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3329: 					continue;
 3330: 				ptep = pmap_pte_quick(pm, va);
 3331: 				if (pmap_pte_v(ptep))
 3332: 					printf("%x:%x ", va, *(int *) ptep);
 3333: 			};
 3334: 
 3335: }
 3336: 
 3337: void
 3338: pmap_pvdump(vm_paddr_t pa)
 3339: {
 3340: 	pv_entry_t pv;
 3341: 	vm_page_t m;
 3342: 
 3343: 	printf("pa %08llx", (long long)pa);
 3344: 	m = PHYS_TO_VM_PAGE(pa);
 3345: 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3346: #ifdef used_to_be
 3347: 		printf(" -> pmap %p, va %x, flags %x",
 3348: 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 3349: #endif
 3350: 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3351: 		pads(pv->pv_pmap);
 3352: 	}
 3353: 	printf(" ");
 3354: }
 3355: #endif