aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Baldwin <jhb@FreeBSD.org>2015-04-30 15:48:48 +0000
committerJohn Baldwin <jhb@FreeBSD.org>2015-04-30 15:48:48 +0000
commited95805e90ec0f61683cd402a42e6f915339de7d (patch)
tree4a1cda02dc46c294f76d64f91257f14bc9f65e38
parent902945c770ac37b116ea3251aa4b914d8942209c (diff)
downloadsrc-ed95805e90ec0f61683cd402a42e6f915339de7d.tar.gz
src-ed95805e90ec0f61683cd402a42e6f915339de7d.zip
Remove support for Xen PV domU kernels. Support for HVM domU kernels
remains. Xen is planning to phase out support for PV upstream since it is harder to maintain and has more overhead. Modern x86 CPUs include virtualization extensions that support HVM guests instead of PV guests. In addition, the PV code was i386 only and not as well maintained recently as the HVM code. - Remove the i386-only NATIVE option that was used to disable certain components for PV kernels. These components are now standard as they are on amd64. - Remove !XENHVM bits from PV drivers. - Remove various shims required for XEN (e.g. PT_UPDATES_FLUSH, LOAD_CR3, etc.) - Remove duplicate copy of <xen/features.h>. - Remove unused, i386-only xenstored.h. Differential Revision: https://reviews.freebsd.org/D2362 Reviewed by: royger Tested by: royger (i386/amd64 HVM domU and amd64 PVH dom0) Relnotes: yes
Notes
Notes: svn path=/head/; revision=282274
-rw-r--r--share/man/man4/xen.439
-rw-r--r--sys/amd64/include/xen/xenfunc.h9
-rw-r--r--sys/amd64/include/xen/xenpmap.h227
-rw-r--r--sys/amd64/include/xen/xenvar.h61
-rw-r--r--sys/conf/files48
-rw-r--r--sys/conf/files.amd642
-rw-r--r--sys/conf/files.i38643
-rw-r--r--sys/conf/options.i3862
-rw-r--r--sys/dev/xen/balloon/balloon.c48
-rw-r--r--sys/dev/xen/blkback/blkback.c13
-rw-r--r--sys/dev/xen/control/control.c130
-rw-r--r--sys/dev/xen/grant_table/grant_table.c70
-rw-r--r--sys/dev/xen/netback/netback.c14
-rw-r--r--sys/dev/xen/netfront/netfront.c10
-rw-r--r--sys/i386/conf/DEFAULTS1
-rw-r--r--sys/i386/conf/XEN96
-rw-r--r--sys/i386/i386/apic_vector.s5
-rw-r--r--sys/i386/i386/genassym.c5
-rw-r--r--sys/i386/i386/machdep.c318
-rw-r--r--sys/i386/i386/minidump_machdep.c28
-rw-r--r--sys/i386/i386/support.s2
-rw-r--r--sys/i386/i386/swtch.s22
-rw-r--r--sys/i386/i386/sys_machdep.c103
-rw-r--r--sys/i386/i386/vm_machdep.c27
-rw-r--r--sys/i386/include/asmacros.h31
-rw-r--r--sys/i386/include/cpufunc.h46
-rw-r--r--sys/i386/include/intr_machdep.h8
-rw-r--r--sys/i386/include/pcpu.h32
-rw-r--r--sys/i386/include/pmap.h81
-rw-r--r--sys/i386/include/segments.h6
-rw-r--r--sys/i386/include/smp.h7
-rw-r--r--sys/i386/include/vmparam.h8
-rw-r--r--sys/i386/include/xen/features.h22
-rw-r--r--sys/i386/include/xen/hypercall.h6
-rw-r--r--sys/i386/include/xen/xen-os.h99
-rw-r--r--sys/i386/include/xen/xenfunc.h1
-rw-r--r--sys/i386/include/xen/xenpmap.h237
-rw-r--r--sys/i386/include/xen/xenstored.h89
-rw-r--r--sys/i386/include/xen/xenvar.h85
-rw-r--r--sys/i386/isa/npx.c9
-rw-r--r--sys/i386/pci/pci_cfgreg.c10
-rw-r--r--sys/i386/pci/pci_pir.c9
-rw-r--r--sys/i386/xen/clock.c570
-rw-r--r--sys/i386/xen/exception.s494
-rw-r--r--sys/i386/xen/locore.s360
-rw-r--r--sys/i386/xen/mp_machdep.c1292
-rw-r--r--sys/i386/xen/mptable.c109
-rw-r--r--sys/i386/xen/pmap.c4420
-rw-r--r--sys/i386/xen/xen_machdep.c1236
-rw-r--r--sys/kern/kern_intr.c5
-rw-r--r--sys/kern/kern_synch.c9
-rw-r--r--sys/kern/subr_param.c4
-rw-r--r--sys/kern/subr_trap.c9
-rw-r--r--sys/vm/vm_page.c8
-rw-r--r--sys/x86/include/segments.h8
-rw-r--r--sys/x86/x86/busdma_bounce.c5
-rw-r--r--sys/x86/x86/cpu_machdep.c53
-rw-r--r--sys/x86/x86/identcpu.c4
-rw-r--r--sys/x86/x86/intr_machdep.c7
-rw-r--r--sys/x86/x86/local_apic.c6
-rw-r--r--sys/x86/xen/xen_intr.c4
-rw-r--r--sys/x86/xen/xen_nexus.c6
62 files changed, 90 insertions, 10628 deletions
diff --git a/share/man/man4/xen.4 b/share/man/man4/xen.4
index 2eea69225836..c2b1f5be2cbf 100644
--- a/share/man/man4/xen.4
+++ b/share/man/man4/xen.4
@@ -35,14 +35,6 @@
.Nm xen
.Nd Xen Hypervisor Guest (DomU) Support
.Sh SYNOPSIS
-To compile para-virtualized (PV) Xen guest support into an i386 kernel, place
-the following lines in your kernel configuration file:
-.Bd -ragged -offset indent
-.Cd "options PAE"
-.Cd "options XEN"
-.Cd "nooptions NATIVE"
-.Ed
-.Pp
To compile hardware-assisted virtualization (HVM) Xen guest support with
para-virtualized drivers into an amd64 or i386 kernel,
place the following lines in your kernel configuration file:
@@ -69,34 +61,14 @@ and hence able to optimize certain behaviors to improve performance or
semantics.
.Pp
.Fx
-supports a fully para-virtualized (PV) kernel on the i386 architecture using
-.Cd "options XEN"
-and
-.Cd "nooptions NATIVE" ;
-currently, this requires use of a PAE kernel, enabled via
-.Cd "options PAE" .
-.Pp
-.Fx
-supports hardware-assisted virtualization (HVM) on both the i386 and amd64
-kernels; however, PV device drivers with an HVM kernel are only supported on
-the amd64 architecture, and require
-.Cd "options XENHVM"
-and
-.Cd "device xenpci" .
+supports hardware-assisted virtualization (HVM) on both i386 and amd64
+kernels.
.Pp
Para-virtualized device drivers are required in order to support certain
functionality, such as processing management requests, returning idle
physical memory pages to the hypervisor, etc.
.Ss Xen DomU device drivers
-Xen para-virtualized drivers are automatically added to the kernel if a PV
-kernel is compiled using
-.Cd "options XEN" ;
-for HVM environments,
-.Cd "options XENHVM"
-and
-.Cd "device xenpci"
-are required.
-The follow drivers are supported:
+These para-virtualized drivers are supported:
.Bl -hang -offset indent -width blkfront
.It Nm balloon
Allow physical memory pages to be returned to the hypervisor as a result of
@@ -148,8 +120,6 @@ It is recommended that adaptive locking be disabled when using Xen:
.Cd "options NO_ADAPTIVE_RWLOCKS"
.Cd "options NO_ADAPTIVE_SX"
.Ed
-.Sh SEE ALSO
-.Xr pae 4
.Sh HISTORY
Support for
.Nm
@@ -173,9 +143,6 @@ This manual page was written by
.Fx
is only able to run as a Xen guest (DomU) and not as a Xen host (Dom0).
.Pp
-A fully para-virtualized (PV) kernel is only supported on i386, and not
-amd64.
-.Pp
As of this release, Xen PV DomU support is not heavily tested; instability
has been reported during VM migration of PV kernels.
.Pp
diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h
index d03d4f685e24..d8a6b5c5c99b 100644
--- a/sys/amd64/include/xen/xenfunc.h
+++ b/sys/amd64/include/xen/xenfunc.h
@@ -29,12 +29,7 @@
#ifndef _XEN_XENFUNC_H_
#define _XEN_XENFUNC_H_
-#ifdef XENHVM
#include <machine/xen/xenvar.h>
-#else
-#include <machine/xen/xenpmap.h>
-#include <machine/segments.h>
-#endif
#define BKPT __asm__("int3");
#define XPQ_CALL_DEPTH 5
@@ -64,10 +59,6 @@ void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
#endif
-#ifndef XENHVM
-void xen_update_descriptor(union descriptor *, union descriptor *);
-#endif
-
extern struct mtx balloon_lock;
#if 0
#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags)
diff --git a/sys/amd64/include/xen/xenpmap.h b/sys/amd64/include/xen/xenpmap.h
deleted file mode 100644
index d768dad5f311..000000000000
--- a/sys/amd64/include/xen/xenpmap.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#include <machine/xen/features.h>
-
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void xen_check_queue(void);
-#if 0
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-#endif
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
- vm_offset_t offset, ppte;
- vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
- int pgindex;
-
- ppte = (vm_offset_t)pte;
- pgoffset = (ppte & PAGE_MASK);
- offset = ppte - (vm_offset_t)PTmap;
- pgindex = ppte >> PDRSHIFT;
-
- pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
- retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
- return (retval);
-}
-#endif
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
-do { \
- PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
- (_ma), \
- UVMF_INVLPG| UVMF_ALL) < 0); \
-} while (/*CONSTCOND*/0)
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
- vm_paddr_t tmp = (mpa & PG_FRAME);
-
- return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
- vm_paddr_t tmp = (ppa & PG_FRAME);
-
- return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet
- PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet
- PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif
- return;
- }
- xen_phys_machine[pfn] = mfn;
-}
-
-
-
-
-#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h
index d9dbc5d9186a..110a351bac62 100644
--- a/sys/amd64/include/xen/xenvar.h
+++ b/sys/amd64/include/xen/xenvar.h
@@ -48,68 +48,7 @@ if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__
#define TRACE_DEBUG(argflags, _f, _a...)
#endif
-#ifdef XENHVM
-
-static inline vm_paddr_t
-phystomach(vm_paddr_t pa)
-{
-
- return (pa);
-}
-
-static inline vm_paddr_t
-machtophys(vm_paddr_t ma)
-{
-
- return (ma);
-}
-
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa) (pa)
-#define MFNTOPFN(ma) (ma)
-
-#define set_phys_to_machine(pfn, mfn) ((void)0)
-#define phys_to_machine_mapping_valid(pfn) (TRUE)
-#define PT_UPDATES_FLUSH() ((void)0)
-
-#else
-
-extern xen_pfn_t *xen_phys_machine;
-
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-#endif
void xpq_init(void);
diff --git a/sys/conf/files b/sys/conf/files
index 7831c67eeaab..e91a97bcdafb 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2676,24 +2676,24 @@ wpi.fw optional wpifw \
clean "wpi.fw"
dev/xe/if_xe.c optional xe
dev/xe/if_xe_pccard.c optional xe pccard
-dev/xen/balloon/balloon.c optional xen | xenhvm
-dev/xen/blkfront/blkfront.c optional xen | xenhvm
-dev/xen/blkback/blkback.c optional xen | xenhvm
-dev/xen/console/console.c optional xen | xenhvm
-dev/xen/console/xencons_ring.c optional xen | xenhvm
-dev/xen/control/control.c optional xen | xenhvm
-dev/xen/grant_table/grant_table.c optional xen | xenhvm
-dev/xen/netback/netback.c optional xen | xenhvm
-dev/xen/netfront/netfront.c optional xen | xenhvm
+dev/xen/balloon/balloon.c optional xenhvm
+dev/xen/blkfront/blkfront.c optional xenhvm
+dev/xen/blkback/blkback.c optional xenhvm
+dev/xen/console/console.c optional xenhvm
+dev/xen/console/xencons_ring.c optional xenhvm
+dev/xen/control/control.c optional xenhvm
+dev/xen/grant_table/grant_table.c optional xenhvm
+dev/xen/netback/netback.c optional xenhvm
+dev/xen/netfront/netfront.c optional xenhvm
dev/xen/xenpci/xenpci.c optional xenpci
-dev/xen/timer/timer.c optional xen | xenhvm
-dev/xen/pvcpu/pvcpu.c optional xen | xenhvm
-dev/xen/xenstore/xenstore.c optional xen | xenhvm
-dev/xen/xenstore/xenstore_dev.c optional xen | xenhvm
-dev/xen/xenstore/xenstored_dev.c optional xen | xenhvm
-dev/xen/evtchn/evtchn_dev.c optional xen | xenhvm
-dev/xen/privcmd/privcmd.c optional xen | xenhvm
-dev/xen/debug/debug.c optional xen | xenhvm
+dev/xen/timer/timer.c optional xenhvm
+dev/xen/pvcpu/pvcpu.c optional xenhvm
+dev/xen/xenstore/xenstore.c optional xenhvm
+dev/xen/xenstore/xenstore_dev.c optional xenhvm
+dev/xen/xenstore/xenstored_dev.c optional xenhvm
+dev/xen/evtchn/evtchn_dev.c optional xenhvm
+dev/xen/privcmd/privcmd.c optional xenhvm
+dev/xen/debug/debug.c optional xenhvm
dev/xl/if_xl.c optional xl pci
dev/xl/xlphy.c optional xl pci
fs/autofs/autofs.c optional autofs
@@ -4043,13 +4043,13 @@ vm/vm_reserv.c standard
vm/vm_unix.c standard
vm/vm_zeroidle.c standard
vm/vnode_pager.c standard
-xen/features.c optional xen | xenhvm
-xen/xenbus/xenbus_if.m optional xen | xenhvm
-xen/xenbus/xenbus.c optional xen | xenhvm
-xen/xenbus/xenbusb_if.m optional xen | xenhvm
-xen/xenbus/xenbusb.c optional xen | xenhvm
-xen/xenbus/xenbusb_front.c optional xen | xenhvm
-xen/xenbus/xenbusb_back.c optional xen | xenhvm
+xen/features.c optional xenhvm
+xen/xenbus/xenbus_if.m optional xenhvm
+xen/xenbus/xenbus.c optional xenhvm
+xen/xenbus/xenbusb_if.m optional xenhvm
+xen/xenbus/xenbusb.c optional xenhvm
+xen/xenbus/xenbusb_front.c optional xenhvm
+xen/xenbus/xenbusb_back.c optional xenhvm
xdr/xdr.c optional krpc | nfslockd | nfscl | nfsd
xdr/xdr_array.c optional krpc | nfslockd | nfscl | nfsd
xdr/xdr_mbuf.c optional krpc | nfslockd | nfscl | nfsd
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 2b6d165fbba9..9f57a12ca9ac 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -577,7 +577,7 @@ x86/x86/pvclock.c standard
x86/x86/tsc.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/pv.c optional xenhvm
x86/xen/pvcpu_enum.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 7aba0f433c0a..823475a49dac 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -428,16 +428,15 @@ i386/bios/smapi_bios.S optional smapi
i386/i386/atomic.c standard \
compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
i386/i386/autoconf.c standard
-i386/i386/bios.c optional native
-i386/i386/bioscall.s optional native
+i386/i386/bios.c standard
+i386/i386/bioscall.s standard
i386/i386/bpf_jit_machdep.c optional bpf_jitter
i386/i386/db_disasm.c optional ddb
i386/i386/db_interface.c optional ddb
i386/i386/db_trace.c optional ddb
i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris
i386/i386/elf_machdep.c standard
-i386/i386/exception.s optional native
-i386/xen/exception.s optional xen
+i386/i386/exception.s standard
i386/i386/gdb_machdep.c optional gdb
i386/i386/geode.c optional cpu_geode
i386/i386/i686_mem.c optional mem
@@ -445,22 +444,17 @@ i386/i386/in_cksum.c optional inet | inet6
i386/i386/initcpu.c standard
i386/i386/io.c optional io
i386/i386/k6_mem.c optional mem
-i386/i386/locore.s optional native no-obj
-i386/xen/locore.s optional xen no-obj
+i386/i386/locore.s standard no-obj
i386/i386/longrun.c optional cpu_enable_longrun
i386/i386/machdep.c standard
-i386/xen/xen_machdep.c optional xen
i386/i386/mem.c optional mem
i386/i386/minidump_machdep.c standard
i386/i386/mp_clock.c optional smp
-i386/i386/mp_machdep.c optional native smp
-i386/xen/mp_machdep.c optional xen smp
+i386/i386/mp_machdep.c optional smp
i386/i386/mp_watchdog.c optional mp_watchdog smp
-i386/i386/mpboot.s optional smp native
-i386/xen/mptable.c optional apic xen
+i386/i386/mpboot.s optional smp
i386/i386/perfmon.c optional perfmon
-i386/i386/pmap.c optional native
-i386/xen/pmap.c optional xen
+i386/i386/pmap.c standard
i386/i386/ptrace_machdep.c standard
i386/i386/stack_machdep.c optional ddb | stack
i386/i386/support.s standard
@@ -489,7 +483,6 @@ i386/ibcs2/ibcs2_util.c optional ibcs2
i386/ibcs2/ibcs2_xenix.c optional ibcs2
i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2
i386/ibcs2/imgact_coff.c optional ibcs2
-i386/xen/clock.c optional xen
i386/isa/elink.c optional ep | ie
i386/isa/npx.c optional npx
i386/isa/pmtimer.c optional pmtimer
@@ -566,9 +559,9 @@ x86/iommu/intel_qi.c optional acpi acpi_dmar pci
x86/iommu/intel_quirks.c optional acpi acpi_dmar pci
x86/iommu/intel_utils.c optional acpi acpi_dmar pci
x86/isa/atpic.c optional atpic
-x86/isa/atrtc.c optional native
-x86/isa/clock.c optional native
-x86/isa/elcr.c optional atpic | apic native
+x86/isa/atrtc.c standard
+x86/isa/clock.c standard
+x86/isa/elcr.c optional atpic | apic
x86/isa/isa.c optional isa
x86/isa/isa_dma.c optional isa
x86/isa/nmi.c standard
@@ -583,20 +576,20 @@ x86/x86/fdt_machdep.c optional fdt
x86/x86/identcpu.c standard
x86/x86/intr_machdep.c standard
x86/x86/io_apic.c optional apic
-x86/x86/legacy.c optional native
+x86/x86/legacy.c standard
x86/x86/local_apic.c optional apic
x86/x86/mca.c standard
-x86/x86/mptable.c optional apic native
-x86/x86/mptable_pci.c optional apic native pci
-x86/x86/mp_x86.c optional native smp
+x86/x86/mptable.c optional apic
+x86/x86/mptable_pci.c optional apic pci
+x86/x86/mp_x86.c optional smp
x86/x86/msi.c optional apic pci
x86/x86/nexus.c standard
x86/x86/tsc.c standard
x86/x86/pvclock.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
-x86/xen/xenpv.c optional xen | xenhvm
-x86/xen/xen_nexus.c optional xen | xenhvm
-x86/xen/xen_msi.c optional xen | xenhvm
+x86/xen/xenpv.c optional xenhvm
+x86/xen/xen_nexus.c optional xenhvm
+x86/xen/xen_msi.c optional xenhvm
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 6f5c45b7e1b2..69eb7e374a21 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -121,8 +121,6 @@ NPX_DEBUG opt_npx.h
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
-NATIVE opt_global.h
-XEN opt_global.h
XENHVM opt_global.h
HYPERV opt_global.h
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index e113e2ce9256..a6036d84961a 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -118,11 +118,6 @@ current_target(void)
static unsigned long
minimum_target(void)
{
-#ifdef XENHVM
-#define max_pfn realmem
-#else
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
unsigned long min_pages, curr_pages = current_target();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
@@ -139,16 +134,15 @@ minimum_target(void)
* 32768 1320
* 131072 4392
*/
- if (max_pfn < MB2PAGES(128))
- min_pages = MB2PAGES(8) + (max_pfn >> 1);
- else if (max_pfn < MB2PAGES(512))
- min_pages = MB2PAGES(40) + (max_pfn >> 2);
- else if (max_pfn < MB2PAGES(2048))
- min_pages = MB2PAGES(104) + (max_pfn >> 3);
+ if (realmem < MB2PAGES(128))
+ min_pages = MB2PAGES(8) + (realmem >> 1);
+ else if (realmem < MB2PAGES(512))
+ min_pages = MB2PAGES(40) + (realmem >> 2);
+ else if (realmem < MB2PAGES(2048))
+ min_pages = MB2PAGES(104) + (realmem >> 3);
else
- min_pages = MB2PAGES(296) + (max_pfn >> 5);
+ min_pages = MB2PAGES(296) + (realmem >> 5);
#undef MB2PAGES
-#undef max_pfn
/* Don't enforce growth */
return (min(min_pages, curr_pages));
@@ -204,12 +198,9 @@ increase_reservation(unsigned long nr_pages)
bs.balloon_low--;
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
- !phys_to_machine_mapping_valid(pfn)),
+ KASSERT(xen_feature(XENFEAT_auto_translated_physmap),
("auto translated physmap but mapping is valid"));
- set_phys_to_machine(pfn, frame_list[i]);
-
vm_page_free(page);
}
@@ -258,9 +249,8 @@ decrease_reservation(unsigned long nr_pages)
}
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- frame_list[i] = PFNTOMFN(pfn);
+ frame_list[i] = pfn;
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
bs.balloon_low++;
}
@@ -393,21 +383,11 @@ static int
xenballoon_attach(device_t dev)
{
int err;
-#ifndef XENHVM
- vm_page_t page;
- unsigned long pfn;
-
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
-#ifndef XENHVM
- bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
-#else
bs.current_pages = xen_pv_domain() ?
HYPERVISOR_start_info->nr_pages : realmem;
-#endif
bs.target_pages = bs.current_pages;
bs.balloon_low = 0;
bs.balloon_high = 0;
@@ -416,16 +396,6 @@ xenballoon_attach(device_t dev)
kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
-#ifndef XENHVM
- /* Initialise the balloon with excess memory space. */
- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
- page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
- TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
- bs.balloon_low++;
- }
-#undef max_pfn
-#endif
-
target_watch.callback = watch_target;
err = xs_register_watch(&target_watch);
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
index 1273961a3ca8..b647fec8b5c2 100644
--- a/sys/dev/xen/blkback/blkback.c
+++ b/sys/dev/xen/blkback/blkback.c
@@ -742,7 +742,6 @@ struct xbb_softc {
/** Mutex protecting per-instance data. */
struct mtx lock;
-#ifdef XENHVM
/**
* Resource representing allocated physical address space
* associated with our per-instance kva region.
@@ -751,7 +750,6 @@ struct xbb_softc {
/** Resource id for allocated physical address space. */
int pseudo_phys_res_id;
-#endif
/**
* I/O statistics from BlockBack dispatch down. These are
@@ -2818,16 +2816,12 @@ static void
xbb_free_communication_mem(struct xbb_softc *xbb)
{
if (xbb->kva != 0) {
-#ifndef XENHVM
- kva_free(xbb->kva, xbb->kva_size);
-#else
if (xbb->pseudo_phys_res != NULL) {
bus_release_resource(xbb->dev, SYS_RES_MEMORY,
xbb->pseudo_phys_res_id,
xbb->pseudo_phys_res);
xbb->pseudo_phys_res = NULL;
}
-#endif
}
xbb->kva = 0;
xbb->gnt_base_addr = 0;
@@ -3055,12 +3049,6 @@ xbb_alloc_communication_mem(struct xbb_softc *xbb)
DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n",
device_get_nameunit(xbb->dev), xbb->kva_size,
xbb->reqlist_kva_size);
-#ifndef XENHVM
- xbb->kva = kva_alloc(xbb->kva_size);
- if (xbb->kva == 0)
- return (ENOMEM);
- xbb->gnt_base_addr = xbb->kva;
-#else /* XENHVM */
/*
* Reserve a range of pseudo physical memory that we can map
* into kva. These pages will only be backed by machine
@@ -3078,7 +3066,6 @@ xbb_alloc_communication_mem(struct xbb_softc *xbb)
}
xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res);
xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res);
-#endif /* XENHVM */
DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n",
device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva,
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 665a5acdbe29..2a0d459ee621 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -138,9 +138,7 @@ __FBSDID("$FreeBSD$");
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
-#ifdef XENHVM
#include <xen/hvm.h>
-#endif
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
@@ -192,133 +190,6 @@ xctrl_reboot()
shutdown_nice(0);
}
-#ifndef XENHVM
-extern void xencons_suspend(void);
-extern void xencons_resume(void);
-
-/* Full PV mode suspension. */
-static void
-xctrl_suspend()
-{
- int i, j, k, fpp, suspend_cancelled;
- unsigned long max_pfn, start_info_mfn;
-
- EVENTHANDLER_INVOKE(power_suspend);
-
-#ifdef SMP
- struct thread *td;
- cpuset_t map;
- u_int cpuid;
-
- /*
- * Bind us to CPU 0 and stop any other VCPUs.
- */
- td = curthread;
- thread_lock(td);
- sched_bind(td, 0);
- thread_unlock(td);
- cpuid = PCPU_GET(cpuid);
- KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0"));
-
- map = all_cpus;
- CPU_CLR(cpuid, &map);
- CPU_NAND(&map, &stopped_cpus);
- if (!CPU_EMPTY(&map))
- stop_cpus(map);
-#endif
-
- /*
- * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
- * drivers need this.
- */
- mtx_lock(&Giant);
- if (DEVICE_SUSPEND(root_bus) != 0) {
- mtx_unlock(&Giant);
- printf("%s: device_suspend failed\n", __func__);
-#ifdef SMP
- if (!CPU_EMPTY(&map))
- restart_cpus(map);
-#endif
- return;
- }
- mtx_unlock(&Giant);
-
- local_irq_disable();
-
- xencons_suspend();
- gnttab_suspend();
- intr_suspend();
-
- max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
-
- void *shared_info = HYPERVISOR_shared_info;
- HYPERVISOR_shared_info = NULL;
- pmap_kremove((vm_offset_t) shared_info);
- PT_UPDATES_FLUSH();
-
- xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
-
- /*
- * We'll stop somewhere inside this hypercall. When it returns,
- * we'll start resuming after the restore.
- */
- start_info_mfn = VTOMFN(xen_start_info);
- pmap_suspend();
- suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
- pmap_resume();
-
- pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
- HYPERVISOR_shared_info = shared_info;
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- VTOMFN(xen_pfn_to_mfn_frame_list_list);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- xen_pfn_to_mfn_frame_list_list[k] =
- VTOMFN(xen_pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- xen_pfn_to_mfn_frame_list[k][j] =
- VTOMFN(&xen_phys_machine[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-
- gnttab_resume(NULL);
- intr_resume(suspend_cancelled != 0);
- local_irq_enable();
- xencons_resume();
-
-#ifdef CONFIG_SMP
- for_each_cpu(i)
- vcpu_prepare(i);
-
-#endif
-
- /*
- * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
- * the VCPU hotplug callback can race with our vcpu_prepare
- */
- mtx_lock(&Giant);
- DEVICE_RESUME(root_bus);
- mtx_unlock(&Giant);
-
-#ifdef SMP
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- if (!CPU_EMPTY(&map))
- restart_cpus(map);
-#endif
- EVENTHANDLER_INVOKE(power_resume);
-}
-
-#else
-
-/* HVM mode suspension. */
static void
xctrl_suspend()
{
@@ -417,7 +288,6 @@ xctrl_suspend()
printf("System resumed after suspension\n");
}
-#endif
static void
xctrl_crash()
diff --git a/sys/dev/xen/grant_table/grant_table.c b/sys/dev/xen/grant_table/grant_table.c
index 25116575c023..ad65fe0113f0 100644
--- a/sys/dev/xen/grant_table/grant_table.c
+++ b/sys/dev/xen/grant_table/grant_table.c
@@ -53,7 +53,6 @@ static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static struct mtx gnttab_list_lock;
-#ifdef XENHVM
/*
* Resource representing allocated physical address space
* for the grant table metainfo
@@ -62,7 +61,6 @@ static struct resource *gnttab_pseudo_phys_res;
/* Resource id for allocated physical address space. */
static int gnttab_pseudo_phys_res_id;
-#endif
static grant_entry_t *shared;
@@ -510,72 +508,6 @@ unmap_pte_fn(pte_t *pte, struct page *pmd_page,
}
#endif
-#ifndef XENHVM
-
-static int
-gnttab_map(unsigned int start_idx, unsigned int end_idx)
-{
- struct gnttab_setup_table setup;
- u_long *frames;
-
- unsigned int nr_gframes = end_idx + 1;
- int i, rc;
-
- frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
- if (!frames)
- return (ENOMEM);
-
- setup.dom = DOMID_SELF;
- setup.nr_frames = nr_gframes;
- set_xen_guest_handle(setup.frame_list, frames);
-
- rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
- if (rc == -ENOSYS) {
- free(frames, M_DEVBUF);
- return (ENOSYS);
- }
- KASSERT(!(rc || setup.status),
- ("unexpected result from grant_table_op"));
-
- if (shared == NULL) {
- vm_offset_t area;
-
- area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
- KASSERT(area, ("can't allocate VM space for grant table"));
- shared = (grant_entry_t *)area;
- }
-
- for (i = 0; i < nr_gframes; i++)
- PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE,
- ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
-
- free(frames, M_DEVBUF);
-
- return (0);
-}
-
-int
-gnttab_resume(device_t dev)
-{
-
- if (max_nr_grant_frames() < nr_grant_frames)
- return (ENOSYS);
- return (gnttab_map(0, nr_grant_frames - 1));
-}
-
-int
-gnttab_suspend(void)
-{
- int i;
-
- for (i = 0; i < nr_grant_frames; i++)
- pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
-
- return (0);
-}
-
-#else /* XENHVM */
-
static vm_paddr_t resume_frames;
static int
@@ -638,8 +570,6 @@ gnttab_resume(device_t dev)
return (gnttab_map(0, nr_gframes - 1));
}
-#endif
-
static int
gnttab_expand(unsigned int req_entries)
{
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
index 63337ad4fdbf..b5c1c1362821 100644
--- a/sys/dev/xen/netback/netback.c
+++ b/sys/dev/xen/netback/netback.c
@@ -473,7 +473,6 @@ struct xnb_softc {
*/
gnttab_copy_table tx_gnttab;
-#ifdef XENHVM
/**
* Resource representing allocated physical address space
* associated with our per-instance kva region.
@@ -482,7 +481,6 @@ struct xnb_softc {
/** Resource id for allocated physical address space. */
int pseudo_phys_res_id;
-#endif
/** Ring mapping and interrupt configuration data. */
struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES];
@@ -626,16 +624,12 @@ static void
xnb_free_communication_mem(struct xnb_softc *xnb)
{
if (xnb->kva != 0) {
-#ifndef XENHVM
- kva_free(xnb->kva, xnb->kva_size);
-#else
if (xnb->pseudo_phys_res != NULL) {
bus_release_resource(xnb->dev, SYS_RES_MEMORY,
xnb->pseudo_phys_res_id,
xnb->pseudo_phys_res);
xnb->pseudo_phys_res = NULL;
}
-#endif /* XENHVM */
}
xnb->kva = 0;
xnb->gnt_base_addr = 0;
@@ -816,12 +810,7 @@ xnb_alloc_communication_mem(struct xnb_softc *xnb)
for (i=0; i < XNB_NUM_RING_TYPES; i++) {
xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
}
-#ifndef XENHVM
- xnb->kva = kva_alloc(xnb->kva_size);
- if (xnb->kva == 0)
- return (ENOMEM);
- xnb->gnt_base_addr = xnb->kva;
-#else /* defined XENHVM */
+
/*
* Reserve a range of pseudo physical memory that we can map
* into kva. These pages will only be backed by machine
@@ -840,7 +829,6 @@ xnb_alloc_communication_mem(struct xnb_softc *xnb)
}
xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
-#endif /* !defined XENHVM */
return (0);
}
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 27900ba69bb7..3c1f9527d65c 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -879,12 +879,11 @@ refill:
if (sc->copying_receiver == 0) {
gnttab_grant_foreign_transfer_ref(ref,
otherend_id, pfn);
- sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+ sc->rx_pfn_array[nr_flips] = pfn;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Remove this page before passing
* back to Xen.
*/
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
MULTI_update_va_mapping(&sc->rx_mcl[i],
vaddr, 0, 0);
}
@@ -892,7 +891,7 @@ refill:
} else {
gnttab_grant_foreign_access_ref(ref,
otherend_id,
- PFNTOMFN(pfn), 0);
+ pfn, 0);
}
req->id = id;
req->gref = ref;
@@ -907,7 +906,6 @@ refill:
* We may have allocated buffers which have entries outstanding
* in the page * update queue -- make sure we flush those first!
*/
- PT_UPDATES_FLUSH();
if (nr_flips != 0) {
#ifdef notyet
/* Tell the ballon driver what is going on. */
@@ -1361,8 +1359,6 @@ xennet_get_responses(struct netfront_info *np,
mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
MMU_MACHPHYS_UPDATE;
mmu->val = pfn;
-
- set_phys_to_machine(pfn, mfn);
}
pages_flipped++;
} else {
@@ -1927,7 +1923,7 @@ network_connect(struct netfront_info *np)
} else {
gnttab_grant_foreign_access_ref(ref,
xenbus_get_otherend_id(np->xbdev),
- PFNTOMFN(pfn), 0);
+ pfn, 0);
}
req->gref = ref;
req->id = requeue_idx;
diff --git a/sys/i386/conf/DEFAULTS b/sys/i386/conf/DEFAULTS
index 78d807c3d344..d5bdb1df2522 100644
--- a/sys/i386/conf/DEFAULTS
+++ b/sys/i386/conf/DEFAULTS
@@ -26,7 +26,6 @@ options GEOM_PART_EBR_COMPAT
options GEOM_PART_MBR
# enable support for native hardware
-options NATIVE
device atpic
options NEW_PCIB
diff --git a/sys/i386/conf/XEN b/sys/i386/conf/XEN
deleted file mode 100644
index dd83670e21b4..000000000000
--- a/sys/i386/conf/XEN
+++ /dev/null
@@ -1,96 +0,0 @@
-#
-# XEN -- Kernel configuration for i386 XEN DomU
-#
-# $FreeBSD$
-
-cpu I686_CPU
-ident XEN
-
-makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-
-# The following drivers don't build with PAE or XEN enabled.
-makeoptions WITHOUT_MODULES="ctl dpt drm drm2 hptmv ida"
-
-# The following drivers don't work with PAE enabled.
-makeoptions WITHOUT_MODULES+="ncr pst"
-
-options SCHED_ULE # ULE scheduler
-options PREEMPTION # Enable kernel thread preemption
-
-options INET # InterNETworking
-options INET6 # IPv6 communications protocols
-options SCTP # Stream Control Transmission Protocol
-options FFS # Berkeley Fast Filesystem
-options SOFTUPDATES # Enable FFS soft updates support
-options UFS_ACL # Support for access control lists
-options UFS_DIRHASH # Improve performance on big directories
-options UFS_GJOURNAL # Enable gjournal-based UFS journaling
-options NFSCL # Network Filesystem Client
-options NFSD # Network Filesystem Server
-options NFSLOCKD # Network Lock Manager
-options NFS_ROOT # NFS usable as /, requires NFSCL
-options MSDOSFS # MSDOS Filesystem
-options CD9660 # ISO 9660 Filesystem
-options PROCFS # Process filesystem (requires PSEUDOFS)
-options PSEUDOFS # Pseudo-filesystem framework
-options GEOM_PART_GPT # GUID Partition Tables.
-options GEOM_LABEL # Provides labelization
-options COMPAT_FREEBSD4 # Compatible with FreeBSD4
-options COMPAT_FREEBSD5 # Compatible with FreeBSD5
-options COMPAT_FREEBSD6 # Compatible with FreeBSD6
-options COMPAT_FREEBSD7 # Compatible with FreeBSD7
-options COMPAT_FREEBSD9 # Compatible with FreeBSD9
-options COMPAT_FREEBSD10 # Compatible with FreeBSD10
-options KTRACE # ktrace(1) support
-options STACK # stack(9) support
-options SYSVSHM # SYSV-style shared memory
-options SYSVMSG # SYSV-style message queues
-options SYSVSEM # SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options AUDIT # Security event auditing
-
-# Debugging for use in -current
-options KDB # Enable kernel debugger support.
-options DDB # Support DDB.
-options GDB # Support remote GDB.
-options DEADLKRES # Enable the deadlock resolver
-options INVARIANTS # Enable calls of extra sanity checking
-options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
-options WITNESS # Enable checks to detect deadlocks and cycles
-options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-
-options PAE
-nooption NATIVE
-option XEN
-nodevice atpic
-nodevice isa
-options MCLSHIFT=12
-
-# To make an SMP kernel, the next two lines are needed
-options SMP # Symmetric MultiProcessor Kernel
-device apic # I/O APIC
-
-#device atkbdc # AT keyboard controller
-#device atkbd # AT keyboard
-device psm # PS/2 mouse
-device pci
-
-#device kbdmux # keyboard multiplexer
-
-# Pseudo devices.
-device loop # Network loopback
-device random # Entropy device
-device ether # Ethernet support
-device tun # Packet tunnel.
-device md # Memory "disks"
-device gif # IPv6 and IPv4 tunneling
-
-# Wireless cards
-options IEEE80211_SUPPORT_MESH
-options AH_SUPPORT_AR5416
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device bpf # Berkeley packet filter
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 3ad10b916d82..18b3c5dc2d64 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -266,7 +266,6 @@ IDTVEC(invlcache)
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(ipi_intr_bitmap_handler)
@@ -281,7 +280,7 @@ IDTVEC(ipi_intr_bitmap_handler)
call ipi_bitmap_handler
MEXITCOUNT
jmp doreti
-#endif
+
/*
* Executed by a CPU when it receives an IPI_STOP from another CPU.
*/
@@ -301,7 +300,6 @@ IDTVEC(cpustop)
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(cpususpend)
@@ -314,7 +312,6 @@ IDTVEC(cpususpend)
POP_FRAME
jmp doreti_iret
-#endif
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 97e2e979bd20..7a00740a998c 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -238,11 +238,6 @@ ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
#endif
-#ifdef XEN
-ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
-#endif
-
#ifdef HWPMC_HOOKS
ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
#endif
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 72f76850b7c0..a5867c43057a 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -160,24 +160,6 @@ int arch_i386_is_xbox = 0;
uint32_t arch_i386_xbox_memsize = 0;
#endif
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-extern trap_info_t trap_table[];
-struct proc_ldt default_proc_ldt;
-extern int init_first;
-int running_xen = 1;
-extern unsigned long physfree;
-#endif /* XEN */
-
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
@@ -356,9 +338,7 @@ cpu_startup(dummy)
*/
bufinit();
vm_pager_bufferinit();
-#ifndef XEN
cpu_setregs();
-#endif
}
/*
@@ -1291,13 +1271,8 @@ SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
int _default_ldt;
-#ifdef XEN
-union descriptor *gdt;
-union descriptor *ldt;
-#else
union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
union descriptor ldt[NLDT]; /* local descriptor table */
-#endif
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
@@ -1397,7 +1372,6 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 1,
.ssd_gran = 1 },
-#ifndef XEN
/* GPROC0_SEL 9 Proc 0 Tss Descriptor */
{
.ssd_base = 0x0,
@@ -1489,7 +1463,6 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 0,
.ssd_gran = 0 },
-#endif /* !XEN */
};
static struct soft_segment_descriptor ldt_segs[] = {
@@ -1641,7 +1614,7 @@ sdtossd(sd, ssd)
ssd->ssd_gran = sd->sd_gran;
}
-#if !defined(PC98) && !defined(XEN)
+#if !defined(PC98)
static int
add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
int *physmap_idxp)
@@ -1748,9 +1721,8 @@ add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
if (!add_smap_entry(smap, physmap, physmap_idxp))
break;
}
-#endif /* !PC98 && !XEN */
+#endif /* !PC98 */
-#ifndef XEN
static void
basemem_setup(void)
{
@@ -1798,7 +1770,6 @@ basemem_setup(void)
for (i = basemem / 4; i < 160; i++)
pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
}
-#endif /* !XEN */
/*
* Populate the (physmap) array with base/bound pairs describing the
@@ -2074,8 +2045,6 @@ do_next:
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#else /* PC98 */
static void
@@ -2086,7 +2055,6 @@ getmemsize(int first)
vm_paddr_t physmap[PHYSMAP_SIZE];
pt_entry_t *pte;
quad_t dcons_addr, dcons_size, physmem_tunable;
-#ifndef XEN
int hasbrokenint12, i, res;
u_int extmem;
struct vm86frame vmf;
@@ -2094,17 +2062,8 @@ getmemsize(int first)
vm_paddr_t pa;
struct bios_smap *smap, *smapbase;
caddr_t kmdp;
-#endif
has_smap = 0;
-#if defined(XEN)
- Maxmem = xen_start_info->nr_pages - init_first;
- physmem = Maxmem;
- basemem = 0;
- physmap[0] = init_first << PAGE_SHIFT;
- physmap[1] = ptoa(Maxmem) - round_page(msgbufsize);
- physmap_idx = 0;
-#else
#ifdef XBOX
if (arch_i386_is_xbox) {
/*
@@ -2247,7 +2206,6 @@ have_smap:
physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
physmap_done:
-#endif
/*
* Now, physmap contains a map of physical memory.
*/
@@ -2321,7 +2279,6 @@ physmap_done:
getenv_quad("dcons.size", &dcons_size) == 0)
dcons_addr = 0;
-#ifndef XEN
/*
* physmap is in bytes, so when converting to page boundaries,
* round up the start address and round down the end address.
@@ -2442,13 +2399,6 @@ do_next:
}
*pte = 0;
invltlb();
-#else
- phys_avail[0] = physfree;
- phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
- dump_avail[0] = 0;
- dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
-
-#endif
/*
* XXX
@@ -2472,272 +2422,9 @@ do_next:
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#endif /* PC98 */
-#ifdef XEN
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-register_t
-init386(first)
- int first;
-{
- unsigned long gdtmachpfn;
- int error, gsel_tss, metadata_missing, x, pa;
- struct pcpu *pc;
-#ifdef CPU_ENABLE_SSE
- struct xstate_hdr *xhdr;
-#endif
- struct callback_register event = {
- .type = CALLBACKTYPE_event,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
- };
- struct callback_register failsafe = {
- .type = CALLBACKTYPE_failsafe,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
- };
-
- thread0.td_kstack = proc0kstack;
- thread0.td_kstack_pages = KSTACK_PAGES;
-
- /*
- * This may be done better later if it gets more high level
- * components in it. If so just link td->td_proc here.
- */
- proc_linkup0(&proc0, &thread0);
-
- metadata_missing = 0;
- if (xen_start_info->mod_start) {
- preload_metadata = (caddr_t)xen_start_info->mod_start;
- preload_bootstrap_relocate(KERNBASE);
- } else {
- metadata_missing = 1;
- }
- if (envmode == 1)
- kern_envp = static_env;
- else if ((caddr_t)xen_start_info->cmd_line)
- kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
-
- boothowto |= xen_boothowto(kern_envp);
-
- /* Init basic tunables, hz etc */
- init_param1();
-
- /*
- * XEN occupies a portion of the upper virtual address space
- * At its base it manages an array mapping machine page frames
- * to physical page frames - hence we need to be able to
- * access 4GB - (64MB - 4MB + 64k)
- */
- gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-
- pc = &__pcpu[0];
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
- bzero(gdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &gdt[x].sd);
-
- mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
-
- gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V);
- PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
- lgdt(&r_gdt);
- gdtset = 1;
-
- if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
- panic("set_trap_table failed - error %d\n", error);
- }
-
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
- if (error == 0)
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
-#if CONFIG_XEN_COMPAT <= 0x030002
- if (error == -ENOXENSYS)
- HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
- (unsigned long)Xhypervisor_callback,
- GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
-#endif
- pcpu_init(pc, 0, sizeof(struct pcpu));
- for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
- pmap_kenter(pa + KERNBASE, pa);
- dpcpu_init((void *)(first + KERNBASE), 0);
- first += DPCPU_SIZE;
- physfree += DPCPU_SIZE;
- init_first += DPCPU_SIZE / PAGE_SIZE;
-
- PCPU_SET(prvspace, pc);
- PCPU_SET(curthread, &thread0);
-
- /*
- * Initialize mutexes.
- *
- * icu_lock: in order to allow an interrupt to occur in a critical
- * section, to set pcpu->ipending (etc...) properly, we
- * must be able to get the icu lock, so it can't be
- * under witness.
- */
- mutex_init();
- mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
-
- /* make ldt memory segments */
- PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
- bzero(ldt, PAGE_SIZE);
- ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
- ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
- for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
- ssdtosd(&ldt_segs[x], &ldt[x].sd);
-
- default_proc_ldt.ldt_base = (caddr_t)ldt;
- default_proc_ldt.ldt_len = 6;
- _default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt);
- PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
-
-#if defined(XEN_PRIVILEGED)
- /*
- * Initialize the i8254 before the console so that console
- * initialization can use DELAY().
- */
- i8254_init();
-#endif
-
- /*
- * Initialize the console before we print anything out.
- */
- cninit();
-
- if (metadata_missing)
- printf("WARNING: loader(8) metadata is missing!\n");
-
-#ifdef DEV_ISA
-#ifdef DEV_ATPIC
- elcr_probe();
- atpic_startup();
-#else
- /* Reset and mask the atpics and leave them shut down. */
- atpic_reset();
-
- /*
- * Point the ICU spurious interrupt vectors at the APIC spurious
- * interrupt handler.
- */
- setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#endif
-
-#ifdef DDB
- db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
-#endif
-
- kdb_init();
-
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
-#endif
-
- finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- initializecpu(); /* Initialize CPU registers */
- initializecpucache();
-
- /* pointer to selector slot for %fs/%gs */
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#if defined(PAE) || defined(PAE_TABLES)
- dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
- dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
- vm86_initialize();
- getmemsize(first);
- init_param2(physmem);
-
- /* now running on new page tables, configured,and u/iom is accessible */
-
- msgbufinit(msgbufp, msgbufsize);
-#ifdef DEV_NPX
- npxinit(true);
-#endif
- /*
- * Set up thread0 pcb after npxinit calculated pcb + fpu save
- * area size. Zero out the extended state header in fpu save
- * area.
- */
- thread0.td_pcb = get_pcb_td(&thread0);
- bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
-#ifdef CPU_ENABLE_SSE
- if (use_xsave) {
- xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
- 1);
- xhdr->xstate_bv = xsave_mask;
- }
-#endif
- PCPU_SET(curpcb, thread0.td_pcb);
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
- PCPU_GET(common_tss.tss_esp0));
-
- /* transfer to user mode */
-
- _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
- _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
-
- /* setup proc 0's pcb */
- thread0.td_pcb->pcb_flags = 0;
-#if defined(PAE) || defined(PAE_TABLES)
- thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
-#else
- thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
-#endif
- thread0.td_pcb->pcb_ext = 0;
- thread0.td_frame = &proc0_tf;
- thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
- thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
-
- cpu_probe_amdc1e();
-
- /* Location of kernel stack for locore */
- return ((register_t)thread0.td_pcb);
-}
-
-#else
register_t
init386(first)
int first;
@@ -3061,7 +2748,6 @@ init386(first)
/* Location of kernel stack for locore */
return ((register_t)thread0.td_pcb);
}
-#endif
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
diff --git a/sys/i386/i386/minidump_machdep.c b/sys/i386/i386/minidump_machdep.c
index a7d2a69e3901..8f78abcb585e 100644
--- a/sys/i386/i386/minidump_machdep.c
+++ b/sys/i386/i386/minidump_machdep.c
@@ -68,10 +68,6 @@ static void *dump_va;
static uint64_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 4);
-#ifndef XEN
-#define xpmap_mtop(x) (x)
-#define xpmap_ptom(x) (x)
-#endif
static int
@@ -205,7 +201,7 @@ minidumpsys(struct dumperinfo *di)
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
- pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
+ pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
@@ -215,10 +211,10 @@ minidumpsys(struct dumperinfo *di)
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
- pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
+ pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
- pa = xpmap_mtop(pt[k] & PG_FRAME);
+ pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
@@ -318,24 +314,8 @@ minidumpsys(struct dumperinfo *di)
continue;
}
if ((pd[j] & PG_V) == PG_V) {
- pa = xpmap_mtop(pd[j] & PG_FRAME);
-#ifndef XEN
+ pa = pd[j] & PG_FRAME;
error = blk_write(di, 0, pa, PAGE_SIZE);
-#else
- pt = pmap_kenter_temporary(pa, 0);
- memcpy(fakept, pt, PAGE_SIZE);
- for (i = 0; i < NPTEPG; i++)
- fakept[i] = xpmap_mtop(fakept[i]);
- error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
- if (error)
- goto fail;
- /* flush, in case we reuse fakept in the same block */
- error = blk_flush(di);
- if (error)
- goto fail;
- bzero(fakept, sizeof(fakept));
-#endif
-
if (error)
goto fail;
} else {
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 0a08012e0ec0..78d76efd5c42 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -695,11 +695,9 @@ END(bcmp)
*/
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
-#ifndef XEN
/* reload the descriptor table */
movl 4(%esp),%eax
lgdt (%eax)
-#endif
/* flush the prefetch q */
jmp 1f
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index 26bfd3b1e120..6bedb48c3647 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -88,7 +88,7 @@ ENTRY(cpu_throw)
movl 8(%esp),%ecx /* New thread */
movl TD_PCB(%ecx),%edx
movl PCB_CR3(%edx),%eax
- LOAD_CR3(%eax)
+ movl %eax,%cr3
/* set bit in new pm_active */
movl TD_PROC(%ecx),%eax
movl P_VMSPACE(%eax), %ebx
@@ -174,10 +174,10 @@ ENTRY(cpu_switch)
/* switch address space */
movl PCB_CR3(%edx),%eax
- READ_CR3(%ebx) /* The same address space? */
+ movl %cr3,%ebx /* The same address space? */
cmpl %ebx,%eax
je sw0
- LOAD_CR3(%eax) /* new address space */
+ movl %eax,%cr3 /* new address space */
movl %esi,%eax
movl PCPU(CPUID),%esi
SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */
@@ -204,18 +204,6 @@ sw0:
SETOP %esi,TD_LOCK(%edi) /* Switchout td_lock */
sw1:
BLOCK_SPIN(%ecx)
-#ifdef XEN
- pushl %eax
- pushl %ecx
- pushl %edx
- call xen_handle_thread_switch
- popl %edx
- popl %ecx
- popl %eax
- /*
- * XXX set IOPL
- */
-#else
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
@@ -264,7 +252,7 @@ sw1:
movl 12(%esi), %ebx
movl %eax, 8(%edi)
movl %ebx, 12(%edi)
-#endif
+
/* Restore context. */
movl PCB_EBX(%edx),%ebx
movl PCB_ESP(%edx),%esp
@@ -290,7 +278,7 @@ sw1:
movl _default_ldt,%eax
cmpl PCPU(CURRENTLDT),%eax
je 2f
- LLDT(_default_ldt)
+ lldt _default_ldt
movl %eax,PCPU(CURRENTLDT)
jmp 2f
1:
diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c
index 951886273107..9044d19b2654 100644
--- a/sys/i386/i386/sys_machdep.c
+++ b/sys/i386/i386/sys_machdep.c
@@ -59,20 +59,6 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
-#ifdef XEN
-#include <machine/xen/xenfunc.h>
-
-void i386_reset_ldt(struct proc_ldt *pldt);
-
-void
-i386_reset_ldt(struct proc_ldt *pldt)
-{
- xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
-}
-#else
-#define i386_reset_ldt(x)
-#endif
-
#include <vm/vm_kern.h> /* for kernel_map */
#define MAX_LD 8192
@@ -211,12 +197,7 @@ sysarch(td, uap)
*/
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -226,12 +207,7 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_fsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[0] = sd;
-#endif
critical_exit();
td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
}
@@ -252,12 +228,7 @@ sysarch(td, uap)
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -267,12 +238,7 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_gsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[1] = sd;
-#endif
critical_exit();
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
@@ -434,10 +400,6 @@ set_user_ldt(struct mdproc *mdp)
}
pldt = mdp->md_ldt;
-#ifdef XEN
- i386_reset_ldt(pldt);
- PCPU_SET(currentldt, (int)pldt);
-#else
#ifdef SMP
gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
#else
@@ -445,7 +407,6 @@ set_user_ldt(struct mdproc *mdp)
#endif
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
-#endif /* XEN */
if (dtlocked)
mtx_unlock_spin(&dt_lock);
}
@@ -464,43 +425,6 @@ set_user_ldt_rv(struct vmspace *vmsp)
}
#endif
-#ifdef XEN
-
-/*
- * dt_lock must be held. Returns with dt_lock held.
- */
-struct proc_ldt *
-user_ldt_alloc(struct mdproc *mdp, int len)
-{
- struct proc_ldt *pldt, *new_ldt;
-
- mtx_assert(&dt_lock, MA_OWNED);
- mtx_unlock_spin(&dt_lock);
- new_ldt = malloc(sizeof(struct proc_ldt),
- M_SUBPROC, M_WAITOK);
-
- new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
- round_page(len * sizeof(union descriptor)), M_WAITOK);
- new_ldt->ldt_refcnt = 1;
- new_ldt->ldt_active = 0;
-
- mtx_lock_spin(&dt_lock);
- if ((pldt = mdp->md_ldt)) {
- if (len > pldt->ldt_len)
- len = pldt->ldt_len;
- bcopy(pldt->ldt_base, new_ldt->ldt_base,
- len * sizeof(union descriptor));
- } else {
- bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
- }
- mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len*sizeof(union descriptor));
- mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- return (new_ldt);
-}
-#else
/*
* dt_lock must be held. Returns with dt_lock held.
*/
@@ -535,7 +459,6 @@ user_ldt_alloc(struct mdproc *mdp, int len)
return (new_ldt);
}
-#endif /* !XEN */
/*
* Must be called with dt_lock held. Returns with dt_lock unheld.
@@ -553,13 +476,8 @@ user_ldt_free(struct thread *td)
}
if (td == curthread) {
-#ifdef XEN
- i386_reset_ldt(&default_proc_ldt);
- PCPU_SET(currentldt, (int)&default_proc_ldt);
-#else
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
-#endif
}
mdp->md_ldt = NULL;
@@ -785,27 +703,7 @@ again:
td->td_retval[0] = uap->start;
return (error);
}
-#ifdef XEN
-static int
-i386_set_ldt_data(struct thread *td, int start, int num,
- union descriptor *descs)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
- mtx_assert(&dt_lock, MA_OWNED);
-
- while (num) {
- xen_update_descriptor(
- &((union descriptor *)(pldt->ldt_base))[start],
- descs);
- num--;
- start++;
- descs++;
- }
- return (0);
-}
-#else
static int
i386_set_ldt_data(struct thread *td, int start, int num,
union descriptor *descs)
@@ -821,7 +719,6 @@ i386_set_ldt_data(struct thread *td, int start, int num,
num * sizeof(union descriptor));
return (0);
}
-#endif /* !XEN */
static int
i386_ldt_grow(struct thread *td, int len)
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 5fd4a16030c4..5671dc916b78 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -89,9 +89,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_param.h>
-#ifdef XEN
-#include <xen/hypervisor.h>
-#endif
#ifdef PC98
#include <pc98/cbus/cbus.h>
#else
@@ -304,10 +301,8 @@ cpu_fork(td1, p2, td2, flags)
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
- /*
- * XXX XEN need to check on PSL_USER is handled
- */
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+
/*
* Now, cpu_switch() can schedule the new process.
* pcb_esp is loaded pointing to the cpu_switch() stack frame
@@ -698,12 +693,6 @@ cpu_reset_real()
#endif
disable_intr();
-#ifdef XEN
- if (smp_processor_id() == 0)
- HYPERVISOR_shutdown(SHUTDOWN_reboot);
- else
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-#endif
#ifdef CPU_ELAN
if (elan_mmcr != NULL)
elan_mmcr->RESCFG = 1;
@@ -797,13 +786,8 @@ sf_buf_map(struct sf_buf *sf, int flags)
*/
ptep = vtopte(sf->kva);
opte = *ptep;
-#ifdef XEN
- PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
- | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
-#else
*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
-#endif
/*
* Avoid unnecessary TLB invalidations: If the sf_buf's old
@@ -854,15 +838,8 @@ sf_buf_shootdown(struct sf_buf *sf, int flags)
int
sf_buf_unmap(struct sf_buf *sf)
{
-#ifdef XEN
- /*
- * Xen doesn't like having dangling R/W mappings
- */
- pmap_qremove(sf->kva, 1);
- return (1);
-#else
+
return (0);
-#endif
}
static void
diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h
index c1c3f645cbf5..716915c36616 100644
--- a/sys/i386/include/asmacros.h
+++ b/sys/i386/include/asmacros.h
@@ -176,37 +176,6 @@
movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
movl %eax, %fs
-#ifdef XEN
-#define LOAD_CR3(reg) \
- movl reg,PCPU(CR3); \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %esi ; \
- pushl reg ; \
- call xen_load_cr3 ; \
- addl $4,%esp ; \
- popl %esi ; \
- popl %edx ; \
- popl %ecx ; \
-
-#define READ_CR3(reg) movl PCPU(CR3),reg;
-#define LLDT(arg) \
- pushl %edx ; \
- pushl %eax ; \
- xorl %eax,%eax ; \
- movl %eax,%gs ; \
- call i386_reset_ldt ; \
- popl %eax ; \
- popl %edx
-#define CLI call ni_cli
-#else
-#define LOAD_CR3(reg) movl reg,%cr3;
-#define READ_CR3(reg) movl %cr3,reg;
-#define LLDT(arg) lldt arg;
-#define CLI cli
-#endif /* !XEN */
-
-
#endif /* LOCORE */
#ifdef __STDC__
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index f80a8983ab29..3bc25d42116e 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -42,17 +42,6 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#ifdef XEN
-extern void xen_cli(void);
-extern void xen_sti(void);
-extern u_int xen_rcr2(void);
-extern void xen_load_cr3(u_int data);
-extern void xen_tlb_flush(void);
-extern void xen_invlpg(u_int addr);
-extern void write_eflags(u_int eflags);
-extern u_int read_eflags(void);
-#endif
-
struct region_descriptor;
#define readb(va) (*(volatile uint8_t *) (va))
@@ -106,11 +95,8 @@ clts(void)
static __inline void
disable_intr(void)
{
-#ifdef XEN
- xen_cli();
-#else
+
__asm __volatile("cli" : : : "memory");
-#endif
}
static __inline void
@@ -132,11 +118,8 @@ cpuid_count(u_int ax, u_int cx, u_int *p)
static __inline void
enable_intr(void)
{
-#ifdef XEN
- xen_sti();
-#else
+
__asm __volatile("sti");
-#endif
}
static __inline void
@@ -325,11 +308,7 @@ ia32_pause(void)
}
static __inline u_int
-#ifdef XEN
-_read_eflags(void)
-#else
read_eflags(void)
-#endif
{
u_int ef;
@@ -389,11 +368,7 @@ wbinvd(void)
}
static __inline void
-#ifdef XEN
-_write_eflags(u_int ef)
-#else
write_eflags(u_int ef)
-#endif
{
__asm __volatile("pushl %0; popfl" : : "r" (ef));
}
@@ -425,9 +400,6 @@ rcr2(void)
{
u_int data;
-#ifdef XEN
- return (xen_rcr2());
-#endif
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
@@ -435,11 +407,8 @@ rcr2(void)
static __inline void
load_cr3(u_int data)
{
-#ifdef XEN
- xen_load_cr3(data);
-#else
+
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
-#endif
}
static __inline u_int
@@ -491,11 +460,8 @@ load_xcr(u_int reg, uint64_t val)
static __inline void
invltlb(void)
{
-#ifdef XEN
- xen_tlb_flush();
-#else
+
load_cr3(rcr3());
-#endif
}
/*
@@ -506,11 +472,7 @@ static __inline void
invlpg(u_int addr)
{
-#ifdef XEN
- xen_invlpg(addr);
-#else
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-#endif
}
static __inline u_short
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index 082b649242c6..96ac06a5c514 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -58,13 +58,7 @@
(FIRST_MSI_INT + NUM_MSI_INTS)
#define LAST_EVTCHN_INT \
(FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#elif defined(XEN)
-#include <xen/xen-os.h>
-#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS
-#define FIRST_EVTCHN_INT 0
-#define LAST_EVTCHN_INT \
- (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#else /* !XEN && !XENHVM */
+#else /* !XENHVM */
#define NUM_EVTCHN_INTS 0
#endif
#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index dc29b6dad247..231f80f6a303 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -44,34 +44,6 @@
* other processors"
*/
-#if defined(XEN)
-
-/* These are peridically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- uint64_t tsc_timestamp; /* TSC at last update of time vals. */
- uint64_t system_timestamp; /* Time, in nanosecs, since boot. */
- uint32_t tsc_to_nsec_mul;
- uint32_t tsc_to_usec_mul;
- int tsc_shift;
- uint32_t version;
-};
-
-#define PCPU_XEN_FIELDS \
- ; \
- u_int pc_cr3; /* track cr3 for R1/R3*/ \
- vm_paddr_t *pc_pdir_shadow; \
- uint64_t pc_processed_system_time; \
- struct shadow_time_info pc_shadow_time; \
- char __pad[185]
-
-#else /* !XEN */
-
-#define PCPU_XEN_FIELDS \
- ; \
- char __pad[233]
-
-#endif
-
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
@@ -85,8 +57,8 @@ struct shadow_time_info {
u_int pc_apic_id; \
int pc_private_tss; /* Flag indicating private tss*/\
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
- u_int pc_vcpu_id /* Xen vCPU ID */ \
- PCPU_XEN_FIELDS
+ u_int pc_vcpu_id; /* Xen vCPU ID */ \
+ char __pad[233]
#ifdef _KERNEL
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 0d8057f7c481..76822b142338 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -219,76 +219,6 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
*/
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
-#if defined(XEN)
-#include <sys/param.h>
-
-#include <xen/xen-os.h>
-
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenpmap.h>
-
-extern pt_entry_t pg_nx;
-
-#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M)
-
-#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
-#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
-
-#define VTOM(va) xpmap_ptom(VTOP(va))
-
-static __inline vm_paddr_t
-pmap_kextract_ma(vm_offset_t va)
-{
- vm_paddr_t ma;
- if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
- ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
- } else {
- ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
- }
- return ma;
-}
-
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
- return xpmap_mtop(pmap_kextract_ma(va));
-}
-#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
-
-vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
-
-void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
-void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
-void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
-
-static __inline pt_entry_t
-pte_load_store(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA(ptep, v, TRUE);
- return (r);
-}
-
-static __inline pt_entry_t
-pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA_MA(ptep, v, TRUE);
- return (r);
-}
-
-#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL)
-
-#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte)
-#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-
-#elif !defined(XEN)
-
/*
* KPTmap is a linear mapping of the kernel page table. It differs from the
* recursive mapping in two ways: (1) it only provides access to kernel page
@@ -328,13 +258,8 @@ pmap_kextract(vm_offset_t va)
}
return (pa);
}
-#endif
-
-#if !defined(XEN)
-#define PT_UPDATES_FLUSH()
-#endif
-#if (defined(PAE) || defined(PAE_TABLES)) && !defined(XEN)
+#if (defined(PAE) || defined(PAE_TABLES))
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
@@ -343,7 +268,7 @@ pmap_kextract(vm_offset_t va)
extern pt_entry_t pg_nx;
-#elif !defined(PAE) && !defined(PAE_TABLES) && !defined(XEN)
+#else /* !(PAE || PAE_TABLES) */
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
@@ -352,7 +277,7 @@ extern pt_entry_t pg_nx;
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
-#endif /* PAE */
+#endif /* !(PAE || PAE_TABLES) */
#define pte_clear(ptep) pte_store(ptep, 0)
diff --git a/sys/i386/include/segments.h b/sys/i386/include/segments.h
index d67f2e0dc3e2..635dffc48f34 100644
--- a/sys/i386/include/segments.h
+++ b/sys/i386/include/segments.h
@@ -82,14 +82,8 @@ struct region_descriptor {
#ifdef _KERNEL
extern int _default_ldt;
-#ifdef XEN
-extern struct proc_ldt default_proc_ldt;
-extern union descriptor *gdt;
-extern union descriptor *ldt;
-#else
extern union descriptor gdt[];
extern union descriptor ldt[NLDT];
-#endif
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
extern struct region_descriptor r_gdt, r_idt;
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 71f31217a0ea..71c830ea895a 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -90,9 +90,7 @@ inthand_t
void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
-#ifndef XEN
void cpususpend_handler(void);
-#endif
void init_secondary_tail(void);
void invltlb_handler(void);
void invlpg_handler(void);
@@ -101,9 +99,7 @@ void invlcache_handler(void);
void init_secondary(void);
void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
-#ifndef XEN
void ipi_bitmap_handler(struct trapframe frame);
-#endif
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
@@ -121,9 +117,6 @@ void mem_range_AP_init(void);
void topo_probe(void);
void ipi_send_cpu(int cpu, u_int ipi);
-#ifdef XEN
-void ipi_to_irq_init(void);
-#endif
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index 777566914556..cb23b0595649 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -135,11 +135,7 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
-#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
-#define KERNLOAD 0
-#else
#define KERNLOAD (1 << PDRSHIFT)
-#endif
#endif /* !defined(KERNLOAD) */
/*
@@ -149,11 +145,7 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
-#ifdef XEN
-#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START
-#else
#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
-#endif
#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
diff --git a/sys/i386/include/xen/features.h b/sys/i386/include/xen/features.h
deleted file mode 100644
index fb4f68008144..000000000000
--- a/sys/i386/include/xen/features.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/******************************************************************************
- * features.h
- *
- * Query the features reported by Xen.
- *
- * Copyright (c) 2006, Ian Campbell
- *
- * $FreeBSD$
- */
-
-#ifndef __ASM_XEN_FEATURES_H__
-#define __ASM_XEN_FEATURES_H__
-
-#include <xen/interface/version.h>
-
-extern void setup_xen_features(void);
-
-extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32];
-
-#define xen_feature(flag) (xen_features[flag])
-
-#endif /* __ASM_XEN_FEATURES_H__ */
diff --git a/sys/i386/include/xen/hypercall.h b/sys/i386/include/xen/hypercall.h
index c7e2a00ef23d..1c4d52904a46 100644
--- a/sys/i386/include/xen/hypercall.h
+++ b/sys/i386/include/xen/hypercall.h
@@ -246,14 +246,8 @@ HYPERVISOR_memory_op(
return _hypercall2(int, memory_op, cmd, arg);
}
-#if defined(XEN)
-int HYPERVISOR_multicall(multicall_entry_t *, int);
-static inline int
-_HYPERVISOR_multicall(
-#else /* XENHVM */
static inline int
HYPERVISOR_multicall(
-#endif
void *call_list, int nr_calls)
{
return _hypercall2(int, multicall, call_list, nr_calls);
diff --git a/sys/i386/include/xen/xen-os.h b/sys/i386/include/xen/xen-os.h
index 3d1ef049cca7..9b9b63ff0a58 100644
--- a/sys/i386/include/xen/xen-os.h
+++ b/sys/i386/include/xen/xen-os.h
@@ -44,105 +44,6 @@ static inline void rep_nop(void)
}
#define cpu_relax() rep_nop()
-#ifndef XENHVM
-
-#ifdef SMP
-extern int gdtset;
-
-#include <sys/time.h> /* XXX for pcpu.h */
-#include <sys/pcpu.h> /* XXX for PCPU_GET */
-static inline int
-smp_processor_id(void)
-{
- if (__predict_true(gdtset))
- return PCPU_GET(cpuid);
- return 0;
-}
-
-#else
-#define smp_processor_id() 0
-#endif
-
-#ifndef PANIC_IF
-#define PANIC_IF(exp) if (__predict_false(exp)) {printf("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);}
-#endif
-
-/*
- * Crude memory allocator for memory allocation early in boot.
- */
-void *bootmem_alloc(unsigned int size);
-void bootmem_free(void *ptr, unsigned int size);
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in the shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-
-#define __cli() \
-do { \
- vcpu_info_t *_vcpu; \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-
-#define __sti() \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if (__predict_false(_vcpu->evtchn_upcall_pending)) \
- force_evtchn_callback(); \
-} while (0)
-
-#define __restore_flags(x) \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
- barrier(); /* unmask then check (avoid races) */ \
- if (__predict_false(_vcpu->evtchn_upcall_pending)) \
- force_evtchn_callback(); \
- } \
-} while (0)
-
-/*
- * Add critical_{enter, exit}?
- *
- */
-#define __save_and_cli(x) \
-do { \
- vcpu_info_t *_vcpu; \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- (x) = _vcpu->evtchn_upcall_mask; \
- _vcpu->evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-
-#define local_irq_save(x) __save_and_cli(x)
-#define local_irq_restore(x) __restore_flags(x)
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
-
-#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
-#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
-#define spin_lock_irqsave mtx_lock_irqsave
-#define spin_unlock_irqrestore mtx_unlock_irqrestore
-
-#endif /* !XENHVM */
-
/* This is a barrier for the compiler only, NOT the processor! */
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/sys/i386/include/xen/xenfunc.h b/sys/i386/include/xen/xenfunc.h
index f02ee1212e32..f48b1f14719d 100644
--- a/sys/i386/include/xen/xenfunc.h
+++ b/sys/i386/include/xen/xenfunc.h
@@ -34,7 +34,6 @@
#include <vm/pmap.h>
-#include <machine/xen/xenpmap.h>
#include <machine/segments.h>
#include <sys/pcpu.h>
diff --git a/sys/i386/include/xen/xenpmap.h b/sys/i386/include/xen/xenpmap.h
deleted file mode 100644
index 8287e723a7e9..000000000000
--- a/sys/i386/include/xen/xenpmap.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * $FreeBSD$
- */
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#if defined(XEN)
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-void pmap_suspend(void);
-void pmap_resume(void);
-void xen_check_queue(void);
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif
-
-
-#include <sys/param.h>
-#include <sys/pcpu.h>
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
- vm_offset_t offset, ppte;
- vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
- int pgindex;
-
- ppte = (vm_offset_t)pte;
- pgoffset = (ppte & PAGE_MASK);
- offset = ppte - (vm_offset_t)PTmap;
- pgindex = ppte >> PDRSHIFT;
-
- pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
- retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
- return (retval);
-}
-#endif
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
-do { \
- PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
- (_ma), \
- UVMF_INVLPG| UVMF_ALL) < 0); \
-} while (/*CONSTCOND*/0)
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
- vm_paddr_t tmp = (mpa & PG_FRAME);
-
- return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
- vm_paddr_t tmp = (ppa & PG_FRAME);
-
- return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet
- PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet
- PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif
- return;
- }
- xen_phys_machine[pfn] = mfn;
-}
-
-static __inline int
-phys_to_machine_mapping_valid(unsigned long pfn)
-{
- return xen_phys_machine[pfn] != INVALID_P2M_ENTRY;
-}
-
-#endif /* !XEN */
-
-#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/i386/include/xen/xenstored.h b/sys/i386/include/xen/xenstored.h
deleted file mode 100644
index e584fa523e39..000000000000
--- a/sys/i386/include/xen/xenstored.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Simple prototyle Xen Store Daemon providing simple tree-like database.
- * Copyright (C) 2005 Rusty Russell IBM Corporation
- *
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _XENSTORED_H
-#define _XENSTORED_H
-
-enum xsd_sockmsg_type
-{
- XS_DEBUG,
- XS_SHUTDOWN,
- XS_DIRECTORY,
- XS_READ,
- XS_GET_PERMS,
- XS_WATCH,
- XS_WATCH_ACK,
- XS_UNWATCH,
- XS_TRANSACTION_START,
- XS_TRANSACTION_END,
- XS_OP_READ_ONLY = XS_TRANSACTION_END,
- XS_INTRODUCE,
- XS_RELEASE,
- XS_GETDOMAINPATH,
- XS_WRITE,
- XS_MKDIR,
- XS_RM,
- XS_SET_PERMS,
- XS_WATCH_EVENT,
- XS_ERROR,
-};
-
-#define XS_WRITE_NONE "NONE"
-#define XS_WRITE_CREATE "CREATE"
-#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
-
-/* We hand errors as strings, for portability. */
-struct xsd_errors
-{
- int errnum;
- const char *errstring;
-};
-#define XSD_ERROR(x) { x, #x }
-static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
- XSD_ERROR(EINVAL),
- XSD_ERROR(EACCES),
- XSD_ERROR(EEXIST),
- XSD_ERROR(EISDIR),
- XSD_ERROR(ENOENT),
- XSD_ERROR(ENOMEM),
- XSD_ERROR(ENOSPC),
- XSD_ERROR(EIO),
- XSD_ERROR(ENOTEMPTY),
- XSD_ERROR(ENOSYS),
- XSD_ERROR(EROFS),
- XSD_ERROR(EBUSY),
- XSD_ERROR(ETIMEDOUT),
- XSD_ERROR(EISCONN),
-};
-struct xsd_sockmsg
-{
- uint32_t type;
- uint32_t len; /* Length of data following this. */
-
- /* Generally followed by nul-terminated string(s). */
-};
-
-#endif /* _XENSTORED_H */
diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h
index 569460723240..484c279c0ff1 100644
--- a/sys/i386/include/xen/xenvar.h
+++ b/sys/i386/include/xen/xenvar.h
@@ -29,91 +29,8 @@
#ifndef XENVAR_H_
#define XENVAR_H_
-#include <machine/xen/features.h>
-
-#if defined(XEN)
-
-#define XBOOTUP 0x1
-#define XPMAP 0x2
-extern int xendebug_flags;
-#ifndef NOXENDEBUG
-/* Print directly to the Xen console during debugging. */
-#define XENPRINTF xc_printf
-#else
-#define XENPRINTF printf
-#endif
-
-extern xen_pfn_t *xen_phys_machine;
-extern xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-extern xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-
-#if 0
-#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_DEBUG(argflags, _f, _a...) \
-if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
-#else
-#define TRACE_ENTER
-#define TRACE_EXIT
-#define TRACE_DEBUG(argflags, _f, _a...)
-#endif
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-
-void xpq_init(void);
-
-#define BITS_PER_LONG 32
-#define NR_CPUS XEN_LEGACY_MAX_VCPUS
-
-#define BITS_TO_LONGS(bits) \
- (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#define DECLARE_BITMAP(name,bits) \
- unsigned long name[BITS_TO_LONGS(bits)]
-
-int xen_create_contiguous_region(vm_page_t pages, int npages);
-
-void xen_destroy_contiguous_region(void * addr, int npages);
-
-#elif defined(XENHVM)
+#include <xen/features.h>
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa) (pa)
-#define MFNTOPFN(ma) (ma)
-
-#define set_phys_to_machine(pfn, mfn) ((void)0)
-#define phys_to_machine_mapping_valid(pfn) (TRUE)
-
-#endif /* !XEN && !XENHVM */
#endif
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index b0ea4e762474..a7113e20da27 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -69,10 +69,6 @@ __FBSDID("$FreeBSD$");
#include <machine/ucontext.h>
#include <machine/intr_machdep.h>
-#ifdef XEN
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#endif
#ifdef DEV_ISA
#include <isa/isavar.h>
@@ -157,13 +153,8 @@ void xsaveopt(char *addr, uint64_t mask);
#endif /* __GNUCLIKE_ASM && !lint */
-#ifdef XEN
-#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
-#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
-#else
#define start_emulating() load_cr0(rcr0() | CR0_TS)
#define stop_emulating() clts()
-#endif
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c
index 5d57e89334a1..2716a7a23ce6 100644
--- a/sys/i386/pci/pci_cfgreg.c
+++ b/sys/i386/pci/pci_cfgreg.c
@@ -93,9 +93,7 @@ static uint32_t pci_docfgregread(int bus, int slot, int func, int reg,
int bytes);
static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
-#ifndef XEN
static int pcireg_cfgopen(void);
-#endif
static int pciereg_cfgread(int bus, unsigned slot, unsigned func,
unsigned reg, unsigned bytes);
static void pciereg_cfgwrite(int bus, unsigned slot, unsigned func,
@@ -116,7 +114,6 @@ pci_i386_map_intline(int line)
return (line);
}
-#ifndef XEN
static u_int16_t
pcibios_get_version(void)
{
@@ -137,7 +134,6 @@ pcibios_get_version(void)
}
return (args.ebx & 0xffff);
}
-#endif
/*
* Initialise access to PCI configuration space
@@ -145,9 +141,6 @@ pcibios_get_version(void)
int
pci_cfgregopen(void)
{
-#ifdef XEN
- return (0);
-#else
static int opened = 0;
uint64_t pciebar;
u_int16_t vid, did;
@@ -202,7 +195,6 @@ pci_cfgregopen(void)
}
return(1);
-#endif
}
static uint32_t
@@ -390,7 +382,6 @@ pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
mtx_unlock_spin(&pcicfg_mtx);
}
-#ifndef XEN
/* check whether the configuration mechanism has been correctly identified */
static int
pci_cfgcheck(int maxdev)
@@ -607,7 +598,6 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
return (1);
}
-#endif /* !XEN */
#define PCIE_PADDR(base, reg, bus, slot, func) \
((base) + \
diff --git a/sys/i386/pci/pci_pir.c b/sys/i386/pci/pci_pir.c
index 0d64cabf4aab..6aeaae3502be 100644
--- a/sys/i386/pci/pci_pir.c
+++ b/sys/i386/pci/pci_pir.c
@@ -137,9 +137,6 @@ pci_pir_open(void)
int i;
uint8_t ck, *cv;
-#ifdef XEN
- return;
-#else
/* Don't try if we've already found a table. */
if (pci_route_table != NULL)
return;
@@ -150,7 +147,7 @@ pci_pir_open(void)
sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
if (sigaddr == 0)
return;
-#endif
+
/* If we found something, check the checksum and length. */
/* XXX - Use pmap_mapdev()? */
pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
@@ -481,11 +478,7 @@ pci_pir_biosroute(int bus, int device, int func, int pin, int irq)
args.eax = PCIBIOS_ROUTE_INTERRUPT;
args.ebx = (bus << 8) | (device << 3) | func;
args.ecx = (irq << 8) | (0xa + pin);
-#ifdef XEN
- return (0);
-#else
return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
-#endif
}
diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c
deleted file mode 100644
index ffb436e4d15d..000000000000
--- a/sys/i386/xen/clock.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz and Don Ahn.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/* #define DELAYDEBUG */
-/*
- * Routines to handle clock hardware.
- */
-
-#include "opt_ddb.h"
-#include "opt_clock.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/clock.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/timeet.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/sysctl.h>
-#include <sys/cons.h>
-#include <sys/power.h>
-
-#include <machine/clock.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#include <machine/pvclock.h>
-#if defined(SMP)
-#include <machine/smp.h>
-#endif
-#include <machine/specialreg.h>
-#include <machine/timerreg.h>
-
-#include <x86/isa/icu.h>
-#include <isa/isareg.h>
-#include <isa/rtc.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/pmap.h>
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/vcpu.h>
-#include <machine/cpu.h>
-#include <xen/xen_intr.h>
-
-/*
- * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
- * can use a simple formula for leap years.
- */
-#define LEAPYEAR(y) (!((y) % 4))
-#define DAYSPERYEAR (28+30*4+31*7)
-
-#ifndef TIMER_FREQ
-#define TIMER_FREQ 1193182
-#endif
-
-#ifdef CYC2NS_SCALE_FACTOR
-#undef CYC2NS_SCALE_FACTOR
-#endif
-#define CYC2NS_SCALE_FACTOR 10
-
-/* Values for timerX_state: */
-#define RELEASED 0
-#define RELEASE_PENDING 1
-#define ACQUIRED 2
-#define ACQUIRE_PENDING 3
-
-struct mtx clock_lock;
-#define RTC_LOCK_INIT \
- mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
-#define RTC_LOCK mtx_lock_spin(&clock_lock)
-#define RTC_UNLOCK mtx_unlock_spin(&clock_lock)
-#define NS_PER_TICK (1000000000ULL/hz)
-
-int adjkerntz; /* local offset from UTC in seconds */
-int clkintr_pending;
-int pscnt = 1;
-int psdiv = 1;
-int wall_cmos_clock;
-u_int timer_freq = TIMER_FREQ;
-static u_long cyc2ns_scale;
-static uint64_t processed_system_time; /* stime (ns) at last processing. */
-
-#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod, __base; \
- __base = (base); \
- __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
- __upper = __high; \
- if (__high) { \
- __upper = __high % (__base); \
- __high = __high / (__base); \
- } \
- __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
- __asm("":"=A" (n):"a" (__low),"d" (__high)); \
- __mod; \
-})
-
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return ((cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR);
-}
-
-static uint32_t
-getit(void)
-{
- return (pvclock_get_last_cycles());
-}
-
-
-/*
- * XXX: timer needs more SMP work.
- */
-void
-i8254_init(void)
-{
-
- RTC_LOCK_INIT;
-}
-
-/*
- * Wait "n" microseconds.
- * Relies on timer 1 counting down from (timer_freq / hz)
- * Note: timer had better have been programmed before this is first used!
- */
-void
-i8254_delay(int n)
-{
- int delta, ticks_left;
- uint32_t tick, prev_tick;
-#ifdef DELAYDEBUG
- int getit_calls = 1;
- int n1;
- static int state = 0;
-
- if (state == 0) {
- state = 1;
- for (n1 = 1; n1 <= 10000000; n1 *= 10)
- DELAY(n1);
- state = 2;
- }
- if (state == 1)
- printf("DELAY(%d)...", n);
-#endif
- /*
- * Read the counter first, so that the rest of the setup overhead is
- * counted. Guess the initial overhead is 20 usec (on most systems it
- * takes about 1.5 usec for each of the i/o's in getit(). The loop
- * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
- * multiplications and divisions to scale the count take a while).
- *
- * However, if ddb is active then use a fake counter since reading
- * the i8254 counter involves acquiring a lock. ddb must not go
- * locking for many reasons, but it calls here for at least atkbd
- * input.
- */
- prev_tick = getit();
-
- n -= 0; /* XXX actually guess no initial overhead */
- /*
- * Calculate (n * (timer_freq / 1e6)) without using floating point
- * and without any avoidable overflows.
- */
- if (n <= 0)
- ticks_left = 0;
- else if (n < 256)
- /*
- * Use fixed point to avoid a slow division by 1000000.
- * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
- * 2^15 is the first power of 2 that gives exact results
- * for n between 0 and 256.
- */
- ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
- else
- /*
- * Don't bother using fixed point, although gcc-2.7.2
- * generates particularly poor code for the long long
- * division, since even the slow way will complete long
- * before the delay is up (unless we're interrupted).
- */
- ticks_left = ((u_int)n * (long long)timer_freq + 999999)
- / 1000000;
-
- while (ticks_left > 0) {
- tick = getit();
-#ifdef DELAYDEBUG
- ++getit_calls;
-#endif
- delta = tick - prev_tick;
- prev_tick = tick;
- if (delta < 0) {
- /*
- * Guard against timer0_max_count being wrong.
- * This shouldn't happen in normal operation,
- * but it may happen if set_timer_freq() is
- * traced.
- */
- /* delta += timer0_max_count; ??? */
- if (delta < 0)
- delta = 0;
- }
- ticks_left -= delta;
- }
-#ifdef DELAYDEBUG
- if (state == 1)
- printf(" %d calls to getit() at %d usec each\n",
- getit_calls, (n + 5) / getit_calls);
-#endif
-}
-
-void
-startrtclock()
-{
- uint64_t __cpu_khz;
- uint32_t cpu_khz;
- struct vcpu_time_info *info;
-
- __cpu_khz = 1000000ULL << 32;
- info = &HYPERVISOR_shared_info->vcpu_info[0].time;
-
- (void)do_div(__cpu_khz, info->tsc_to_system_mul);
- if ( info->tsc_shift < 0 )
- cpu_khz = __cpu_khz << -info->tsc_shift;
- else
- cpu_khz = __cpu_khz >> info->tsc_shift;
-
- printf("Xen reported: %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
-
- set_cyc2ns_scale(cpu_khz/1000);
- tsc_freq = cpu_khz * 1000;
-}
-
-/*
- * RTC support routines
- */
-
-
-static __inline int
-readrtc(int port)
-{
- return(bcd2bin(rtcin(port)));
-}
-
-
-#ifdef XEN_PRIVILEGED_GUEST
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-static void
-domu_inittodr(time_t base)
-{
- unsigned long sec;
- int s, y;
- struct timespec ts;
-
- update_wallclock();
- add_uptime_to_wallclock();
-
- RTC_LOCK;
-
- if (base) {
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - shadow_tv.tv_sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- tc_setclock(&shadow_tv);
- }
- RTC_UNLOCK;
-}
-
-/*
- * Write system time back to RTC.
- */
-static void
-domu_resettodr(void)
-{
- unsigned long tm;
- int s;
- dom0_op_t op;
- struct shadow_time_info *shadow;
- struct pcpu *pc;
-
- pc = pcpu_find(smp_processor_id());
- shadow = &pc->pc_shadow_time;
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- if ((xen_start_info->flags & SIF_INITDOMAIN) &&
- !independent_wallclock)
- {
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = tm;
- op.u.settime.nsecs = 0;
- op.u.settime.system_time = shadow->system_timestamp;
- HYPERVISOR_dom0_op(&op);
- update_wallclock();
- add_uptime_to_wallclock();
- } else if (independent_wallclock) {
- /* notyet */
- ;
- }
-}
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-void
-inittodr(time_t base)
-{
- unsigned long sec, days;
- int year, month;
- int y, m, s;
- struct timespec ts;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_inittodr(base);
- return;
- }
-
- if (base) {
- s = splclock();
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- splx(s);
- }
-
- /* Look if we have a RTC present and the time is valid */
- if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
- goto wrong_time;
-
- /* wait for time update to complete */
- /* If RTCSA_TUP is zero, we have at least 244us before next update */
- s = splhigh();
- while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
- splx(s);
- s = splhigh();
- }
-
- days = 0;
-#ifdef USE_RTC_CENTURY
- year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
-#else
- year = readrtc(RTC_YEAR) + 1900;
- if (year < 1970)
- year += 100;
-#endif
- if (year < 1970) {
- splx(s);
- goto wrong_time;
- }
- month = readrtc(RTC_MONTH);
- for (m = 1; m < month; m++)
- days += daysinmonth[m-1];
- if ((month > 2) && LEAPYEAR(year))
- days ++;
- days += readrtc(RTC_DAY) - 1;
- for (y = 1970; y < year; y++)
- days += DAYSPERYEAR + LEAPYEAR(y);
- sec = ((( days * 24 +
- readrtc(RTC_HRS)) * 60 +
- readrtc(RTC_MIN)) * 60 +
- readrtc(RTC_SEC));
- /* sec now contains the number of seconds, since Jan 1 1970,
- in the local time zone */
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- ts.tv_sec = sec;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
- splx(s);
- return;
-
- wrong_time:
- printf("Invalid time in real time clock.\n");
- printf("Check and reset the date immediately!\n");
-}
-
-
-/*
- * Write system time back to RTC
- */
-void
-resettodr()
-{
- unsigned long tm;
- int y, m, s;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_resettodr();
- return;
- }
-
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- /* Disable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
-
- /* Calculate local time to put in RTC */
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
- writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
- writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
-
- /* We have now the days since 01-01-1970 in tm */
- writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */
- for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
- tm >= m;
- y++, m = DAYSPERYEAR + LEAPYEAR(y))
- tm -= m;
-
- /* Now we have the years in y and the day-of-the-year in tm */
- writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
-#ifdef USE_RTC_CENTURY
- writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
-#endif
- for (m = 0; ; m++) {
- int ml;
-
- ml = daysinmonth[m];
- if (m == 1 && LEAPYEAR(y))
- ml++;
- if (tm < ml)
- break;
- tm -= ml;
- }
-
- writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
- writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
-
- /* Reenable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_24HR);
- rtcin(RTC_INTR);
-}
-#endif
-
-/*
- * Start clocks running.
- */
-void
-cpu_initclocks(void)
-{
- cpu_initclocks_bsp();
-}
-
-/* Return system time offset by ticks */
-uint64_t
-get_system_time(int ticks)
-{
- return (processed_system_time + (ticks * NS_PER_TICK));
-}
-
-int
-timer_spkr_acquire(void)
-{
-
- return (0);
-}
-
-int
-timer_spkr_release(void)
-{
-
- return (0);
-}
-
-void
-timer_spkr_setfreq(int freq)
-{
-
-}
-
diff --git a/sys/i386/xen/exception.s b/sys/i386/xen/exception.s
deleted file mode 100644
index 95f1c0e6703f..000000000000
--- a/sys/i386/xen/exception.s
+++ /dev/null
@@ -1,494 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include "opt_apic.h"
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
-#include "assym.s"
-
-#define SEL_RPL_MASK 0x0002
-#define __HYPERVISOR_iret 23
-
-/* Offsets into shared_info_t. */
-
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-
-#define sizeof_vcpu_shift 6
-
-
-#ifdef SMP
-#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \
- shl $sizeof_vcpu_shift,reg ; \
- addl HYPERVISOR_shared_info,reg
-#else
-#define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
-#endif
-
-#define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __DISABLE_INTERRUPTS(reg)
-#define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __ENABLE_INTERRUPTS(reg)
-#define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
-
-#define POPA \
- popl %edi; \
- popl %esi; \
- popl %ebp; \
- popl %ebx; \
- popl %ebx; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
- .text
-
-/*****************************************************************************/
-/* Trap handling */
-/*****************************************************************************/
-/*
- * Trap and fault vector routines.
- *
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
- *
- * The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
- * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu. The cpu
- * will also push an 'error' code for certain traps. We push a dummy
- * error code for those traps where the cpu doesn't in order to maintain
- * a consistent frame. We also push a contrived 'trap number'.
- *
- * The cpu does not push the general registers, we must do that, and we
- * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
- * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
- * must load them with appropriate values for supervisor mode operation.
- */
-
-MCOUNT_LABEL(user)
-MCOUNT_LABEL(btrap)
-
-#define TRAP(a) pushl $(a) ; jmp alltraps
-
-IDTVEC(div)
- pushl $0; TRAP(T_DIVIDE)
-IDTVEC(dbg)
- pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
- pushl $0; TRAP(T_NMI)
-IDTVEC(bpt)
- pushl $0; TRAP(T_BPTFLT)
-IDTVEC(ofl)
- pushl $0; TRAP(T_OFLOW)
-IDTVEC(bnd)
- pushl $0; TRAP(T_BOUND)
-IDTVEC(ill)
- pushl $0; TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- pushl $0; TRAP(T_DNA)
-IDTVEC(fpusegm)
- pushl $0; TRAP(T_FPOPFLT)
-IDTVEC(tss)
- TRAP(T_TSSFLT)
-IDTVEC(missing)
- TRAP(T_SEGNPFLT)
-IDTVEC(stk)
- TRAP(T_STKFLT)
-IDTVEC(prot)
- TRAP(T_PROTFLT)
-IDTVEC(page)
- TRAP(T_PAGEFLT)
-IDTVEC(mchk)
- pushl $0; TRAP(T_MCHK)
-IDTVEC(rsvd)
- pushl $0; TRAP(T_RESERVED)
-IDTVEC(fpu)
- pushl $0; TRAP(T_ARITHTRAP)
-IDTVEC(align)
- TRAP(T_ALIGNFLT)
-IDTVEC(xmm)
- pushl $0; TRAP(T_XMMFLT)
-
-IDTVEC(hypervisor_callback)
- pushl $0;
- pushl $0;
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-upcall_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-call_evtchn_upcall:
- movl TF_EIP(%esp),%eax
- cmpl $scrit,%eax
- jb 10f
- cmpl $ecrit,%eax
- jb critical_region_fixup
-
-10: pushl %esp
- call xen_intr_handle_upcall
- addl $4,%esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-hypervisor_callback_pending:
- DISABLE_INTERRUPTS(%esi) /* cli */
- jmp 10b
- /*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
- */
- SUPERALIGN_TEXT
- .globl alltraps
- .type alltraps,@function
-alltraps:
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-
-alltraps_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-
-calltrap:
- push %esp
- call trap
- add $4, %esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
- *
- * The intersegment call has been set up to specify one dummy parameter.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
- SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
- pushfl /* save eflags */
- popl 8(%esp) /* shuffle into tf_eflags */
- pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
- *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- */
- SUPERALIGN_TEXT
-IDTVEC(int0x80_syscall)
- pushl $2 /* sizeof "int 0x80" */
- pushl $0xBEEF /* for debug */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-ENTRY(fork_trampoline)
- pushl %esp /* trapframe pointer */
- pushl %ebx /* arg1 */
- pushl %esi /* function */
- call fork_exit
- addl $12,%esp
- /* cut from syscall */
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-/*
- * To efficiently implement classification of trap and interrupt handlers
- * for profiling, there must be only trap handlers between the labels btrap
- * and bintr, and only interrupt handlers between the labels bintr and
- * eintr. This is implemented (partly) by including files that contain
- * some of the handlers. Before including the files, set up a normal asm
- * environment so that the included files doen't need to know that they are
- * included.
- */
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-MCOUNT_LABEL(bintr)
-
-#ifdef DEV_APIC
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-
-#include <i386/i386/apic_vector.s>
-#endif
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-#include <i386/i386/vm86bios.s>
-
- .text
-MCOUNT_LABEL(eintr)
-
-/*
- * void doreti(struct trapframe)
- *
- * Handle return from interrupts, traps and syscalls.
- */
- .text
- SUPERALIGN_TEXT
- .type doreti,@function
-doreti:
- FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
-doreti_next:
-#ifdef notyet
- /*
- * Check if ASTs can be handled now. PSL_VM must be checked first
- * since segment registers only have an RPL in non-VM86 mode.
- */
- testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
- jz doreti_notvm86
- movl PCPU(CURPCB),%ecx
- testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */
- jz doreti_ast /* can handle ASTS now if not */
- jmp doreti_exit
-
-doreti_notvm86:
-#endif
- testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
- jz doreti_exit /* can't handle ASTs now if not */
-
-doreti_ast:
- /*
- * Check for ASTs atomically with returning. Disabling CPU
- * interrupts provides sufficient locking even in the SMP case,
- * since we will be informed of any new ASTs by an IPI.
- */
- DISABLE_INTERRUPTS(%esi) /* cli */
- movl PCPU(CURTHREAD),%eax
- testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
- je doreti_exit
- ENABLE_INTERRUPTS(%esi) /* sti */
- pushl %esp /* pass a pointer to the trapframe */
- call ast
- add $4,%esp
- jmp doreti_ast
-
- /*
- * doreti_exit: pop registers, iret.
- *
- * The segment register pop is a special case, since it may
- * fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c.
- */
-doreti_exit:
- ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
-
- .globl scrit
-scrit:
- __TEST_PENDING(%esi)
- jnz hypervisor_callback_pending /* More to go */
-
- MEXITCOUNT
-
- .globl doreti_popl_fs
-doreti_popl_fs:
- popl %fs
- .globl doreti_popl_es
-doreti_popl_es:
- popl %es
- .globl doreti_popl_ds
-doreti_popl_ds:
- popl %ds
-
- /*
- * This is important: as nothing is atomic over here (we can get
- * interrupted any time), we use the critical_region_fixup() in
- * order to figure out where out stack is. Therefore, do NOT use
- * 'popal' here without fixing up the table!
- */
- POPA
- addl $8,%esp
- .globl doreti_iret
-doreti_iret:
- jmp hypercall_page + (__HYPERVISOR_iret * 32)
- .globl ecrit
-ecrit:
- /*
- * doreti_iret_fault and friends. Alternative return code for
- * the case where we get a fault in the doreti_exit code
- * above. trap() (i386/i386/trap.c) catches this specific
- * case, sends the process a signal and continues in the
- * corresponding place in the code below.
- */
- ALIGN_TEXT
- .globl doreti_iret_fault
-doreti_iret_fault:
- subl $8,%esp
- pushal
- pushl %ds
- .globl doreti_popl_ds_fault
-doreti_popl_ds_fault:
- pushl %es
- .globl doreti_popl_es_fault
-doreti_popl_es_fault:
- pushl %fs
- .globl doreti_popl_fs_fault
-doreti_popl_fs_fault:
- movl $0,TF_ERR(%esp) /* XXX should be the error code */
- movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp alltraps_with_regs_pushed
-
- /*
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-*/
-
-.globl critical_region_fixup
-critical_region_fixup:
- addl $critical_fixup_table-scrit,%eax
- movzbl (%eax),%eax # %eax contains num bytes popped
- movl %esp,%esi
- add %eax,%esi # %esi points at end of src region
- movl %esp,%edi
- add $0x40,%edi # %edi points at end of dst region
- movl %eax,%ecx
- shr $2,%ecx # convert bytes to words
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
- subl $4,%edi
- movl (%esi),%eax
- movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
- jmp hypervisor_callback_pending
-
-
-critical_fixup_table:
-.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
-.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
-.byte 0x0,0x0 #pop %fs
-.byte 0x04 #pop %es
-.byte 0x08 #pop %ds
-.byte 0x0c #pop %edi
-.byte 0x10 #pop %esi
-.byte 0x14 #pop %ebp
-.byte 0x18 #pop %ebx
-.byte 0x1c #pop %ebx
-.byte 0x20 #pop %edx
-.byte 0x24 #pop %ecx
-.byte 0x28 #pop %eax
-.byte 0x2c,0x2c,0x2c #add $0x8,%esp
-#if 0
- .byte 0x34 #iret
-#endif
-.byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret
-
-
-/* # Hypervisor uses this for application faults while it executes.*/
-ENTRY(failsafe_callback)
- pushal
- call xen_failsafe_handler
-/*# call install_safe_pf_handler */
- movl 28(%esp),%ebx
-1: movl %ebx,%ds
- movl 32(%esp),%ebx
-2: movl %ebx,%es
- movl 36(%esp),%ebx
-3: movl %ebx,%fs
- movl 40(%esp),%ebx
-4: movl %ebx,%gs
-/*# call install_normal_pf_handler */
- popal
- addl $12,%esp
- iret
-
-
diff --git a/sys/i386/xen/locore.s b/sys/i386/xen/locore.s
deleted file mode 100644
index 7e6768463213..000000000000
--- a/sys/i386/xen/locore.s
+++ /dev/null
@@ -1,360 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD$
- *
- * originally from: locore.s, by William F. Jolitz
- *
- * Substantially rewritten by David Greenman, Rod Grimes,
- * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
- * and many others.
- */
-
-#include "opt_bootp.h"
-#include "opt_compat.h"
-#include "opt_nfsroot.h"
-#include "opt_pmap.h"
-
-#include <sys/syscall.h>
-#include <sys/reboot.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/psl.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#define __ASSEMBLY__
-#include <xen/interface/elfnote.h>
-
-/* The defines below have been lifted out of <machine/xen-public/arch-x86_32.h> */
-#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
-#define KERNEL_CS FLAT_RING1_CS
-#define KERNEL_DS FLAT_RING1_DS
-
-#include "assym.s"
-
-.section __xen_guest
- .ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000"
- .byte 0
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "HEAD")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, btext)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, XEN_HYPERVISOR_VIRT_START)
-#if 0
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables")
-
-#ifdef PAE
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
- ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
-
-
-
-/*
- * XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
- .globl PTmap,PTD,PTDpde
- .set PTmap,(PTDPTDI << PDRSHIFT)
- .set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
- .set PTDpde,PTD + (PTDPTDI * PDESIZE)
-
-/*
- * Compiled KERNBASE location and the kernel load address
- */
- .globl kernbase
- .set kernbase,KERNBASE
- .globl kernload
- .set kernload,KERNLOAD
-
-/*
- * Globals
- */
- .data
- ALIGN_DATA /* just to be sure */
-
- .space 0x2000 /* space for tmpstk - temporary stack */
-tmpstk:
-
- .globl bootinfo
-bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
-
- .globl KERNend
-KERNend: .long 0 /* phys addr end of kernel (just after bss) */
- .globl physfree
-physfree: .long 0 /* phys addr of next free page */
-
- .globl IdlePTD
-IdlePTD: .long 0 /* phys addr of kernel PTD */
-
-#ifdef PAE
- .globl IdlePDPT
-IdlePDPT: .long 0 /* phys addr of kernel PDPT */
-#endif
-
-#ifdef SMP
- .globl KPTphys
-#endif
-KPTphys: .long 0 /* phys addr of kernel page tables */
- .globl gdtset
-gdtset: .long 0 /* GDT is valid */
-
- .globl proc0kstack
-proc0kstack: .long 0 /* address of proc 0 kstack space */
-p0kpa: .long 0 /* phys addr of proc0's STACK */
-
-vm86phystk: .long 0 /* PA of vm86/bios stack */
-
- .globl vm86paddr, vm86pa
-vm86paddr: .long 0 /* address of vm86 region */
-vm86pa: .long 0 /* phys addr of vm86 region */
-
-#ifdef PC98
- .globl pc98_system_parameter
-pc98_system_parameter:
- .space 0x240
-#endif
-
- .globl avail_space
-avail_space: .long 0
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-/*
- * We're already in protected mode, so no remapping is needed.
- */
-#define R(foo) (foo)
-
-#define ALLOCPAGES(foo) \
- movl R(physfree), %esi ; \
- movl $((foo)*PAGE_SIZE), %eax ; \
- addl %esi, %eax ; \
- movl %eax, R(physfree) ; \
- movl %esi, %edi ; \
- movl $((foo)*PAGE_SIZE),%ecx ; \
- xorl %eax,%eax ; \
- cld ; \
- rep ; \
- stosb
-
-/*
- * fillkpt
- * eax = page frame address
- * ebx = index into page table
- * ecx = how many pages to map
- * base = base address of page dir/table
- * prot = protection bits
- */
-#define fillkpt(base, prot) \
- shll $PTESHIFT,%ebx ; \
- addl base,%ebx ; \
- orl $PG_V,%eax ; \
- orl prot,%eax ; \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $PTESIZE,%ebx ; /* next pte */ \
- loop 1b
-
-/*
- * fillkptphys(prot)
- * eax = physical address
- * ecx = how many pages to map
- * prot = protection bits
- */
-#define fillkptphys(prot) \
- movl %eax, %ebx ; \
- shrl $PAGE_SHIFT, %ebx ; \
- fillkpt(R(KPTphys), prot)
-
-/* Temporary stack */
-.space 8192
-tmpstack:
- .long tmpstack, KERNEL_DS
-
- .text
-
-.p2align 12, 0x90
-
-#define HYPERCALL_PAGE_OFFSET 0x1000
-.org HYPERCALL_PAGE_OFFSET
-ENTRY(hypercall_page)
- .cfi_startproc
- .skip 0x1000
- .cfi_endproc
-
-/**********************************************************************
- *
- * This is where the bootblocks start us, set the ball rolling...
- *
- */
-NON_GPROF_ENTRY(btext)
- /* At the end of our stack, we shall have free space - so store it */
- movl %esp,%ebx
- movl %ebx,R(avail_space)
-
- lss tmpstack,%esp
-
- pushl %esi
- call initvalues
- popl %esi
-
- /* Store the CPUID information */
- xorl %eax,%eax
- cpuid # cpuid 0
- movl %eax,R(cpu_high) # highest capability
- movl %ebx,R(cpu_vendor) # store vendor string
- movl %edx,R(cpu_vendor+4)
- movl %ecx,R(cpu_vendor+8)
- movb $0,R(cpu_vendor+12)
-
- movl $1,%eax
- cpuid # cpuid 1
- movl %eax,R(cpu_id) # store cpu_id
- movl %ebx,R(cpu_procinfo) # store cpu_procinfo
- movl %edx,R(cpu_feature) # store cpu_feature
- movl %ecx,R(cpu_feature2) # store cpu_feature2
- rorl $8,%eax # extract family type
- andl $15,%eax
- cmpl $5,%eax
- movl $CPU_686,R(cpu)
-
- movl proc0kstack,%eax
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
- xorl %ebp,%ebp /* mark end of frames */
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- pushl physfree
- call init386
- addl $4, %esp
- call mi_startup
- /* NOTREACHED */
- int $3
-
-/*
- * Signal trampoline, copied to top of user stack
- */
-NON_GPROF_ENTRY(sigcode)
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC_EFLAGS(%eax)
- jne 1f
- mov UC_GS(%eax), %gs /* restore %gs */
-1:
- movl $SYS_sigreturn,%eax
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-
-#ifdef COMPAT_FREEBSD4
- ALIGN_TEXT
-freebsd4_sigcode:
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC4(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC4_EFLAGS(%eax)
- jne 1f
- mov UC4_GS(%eax),%gs /* restore %gs */
-1:
- movl $344,%eax /* 4.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-#endif
-
-#ifdef COMPAT_43
- ALIGN_TEXT
-osigcode:
- call *SIGF_HANDLER(%esp) /* call signal handler */
- lea SIGF_SC(%esp),%eax /* get sigcontext */
- pushl %eax
- testl $PSL_VM,SC_PS(%eax)
- jne 9f
- movl SC_GS(%eax),%gs /* restore %gs */
-9:
- movl $103,%eax /* 3.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
-0: jmp 0b
-#endif /* COMPAT_43 */
-
- ALIGN_TEXT
-esigcode:
-
- .data
- .globl szsigcode
-szsigcode:
- .long esigcode-sigcode
-#ifdef COMPAT_FREEBSD4
- .globl szfreebsd4_sigcode
-szfreebsd4_sigcode:
- .long esigcode-freebsd4_sigcode
-#endif
-#ifdef COMPAT_43
- .globl szosigcode
-szosigcode:
- .long esigcode-osigcode
-#endif
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
deleted file mode 100644
index 6556d25f4bc6..000000000000
--- a/sys/i386/xen/mp_machdep.c
+++ /dev/null
@@ -1,1292 +0,0 @@
-/*-
- * Copyright (c) 1996, by Steve Passe
- * Copyright (c) 2008, by Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_apic.h"
-#include "opt_cpu.h"
-#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
-#include "opt_pmap.h"
-#include "opt_sched.h"
-#include "opt_smp.h"
-
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config file.
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cons.h> /* cngetc() */
-#include <sys/cpuset.h>
-#ifdef GPROF
-#include <sys/gmon.h>
-#endif
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sched.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-
-#include <x86/apicreg.h>
-#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
-#include <machine/pcb.h>
-#include <machine/psl.h>
-#include <machine/smp.h>
-#include <machine/specialreg.h>
-#include <machine/pcpu.h>
-
-#include <xen/xen-os.h>
-#include <xen/evtchn.h>
-#include <xen/xen_intr.h>
-#include <xen/hypervisor.h>
-#include <xen/interface/vcpu.h>
-
-/*---------------------------- Extern Declarations ---------------------------*/
-extern struct pcpu __pcpu[];
-
-extern void Xhypervisor_callback(void);
-extern void failsafe_callback(void);
-
-/*--------------------------- Forward Declarations ---------------------------*/
-static driver_filter_t smp_reschedule_interrupt;
-static driver_filter_t smp_call_function_interrupt;
-static int start_all_aps(void);
-static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-
-/*---------------------------------- Macros ----------------------------------*/
-#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
-
-/*-------------------------------- Local Types -------------------------------*/
-typedef void call_data_func_t(uintptr_t , uintptr_t);
-
-struct xen_ipi_handler
-{
- driver_filter_t *filter;
- const char *description;
-};
-
-enum {
- RESCHEDULE_VECTOR,
- CALL_FUNCTION_VECTOR,
-};
-
-/*-------------------------------- Global Data -------------------------------*/
-static u_int hyperthreading_cpus;
-static cpuset_t hyperthreading_cpus_mask;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
-int bootAP;
-static union descriptor *bootAPgdt;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
-
-struct pcb stoppcbs[MAXCPU];
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
-static u_int logical_cpus;
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-int cpu_logical;
-int cpu_cores;
-
-static const struct xen_ipi_handler xen_ipis[] =
-{
- [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" },
- [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" }
-};
-
-/*------------------------------- Per-CPU Data -------------------------------*/
-DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
-DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
-
-/*------------------------------ Implementation ------------------------------*/
-struct cpu_group *
-cpu_topo(void)
-{
- if (cpu_cores == 0)
- cpu_cores = 1;
- if (cpu_logical == 0)
- cpu_logical = 1;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
- CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
-}
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-u_int
-mp_bootaddress(u_int basemem)
-{
-
- return (basemem);
-}
-
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU)
- mp_ncpus++;
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- mp_maxid = MAXCPU - 1;
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
-{
- int i;
-
- /* Initialize the logical ID to APIC ID table. */
- for (i = 0; i < MAXCPU; i++) {
- cpu_apic_ids[i] = -1;
- cpu_ipi_pending[i] = 0;
- }
-
- /* Set boot_cpu_id if needed. */
- if (boot_cpu_id == -1) {
- boot_cpu_id = PCPU_GET(apic_id);
- cpu_info[boot_cpu_id].cpu_bsp = 1;
- } else
- KASSERT(boot_cpu_id == PCPU_GET(apic_id),
- ("BSP's APIC ID doesn't match boot_cpu_id"));
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
-
- assign_cpu_ids();
-
- /* Start each Application Processor */
- start_all_aps();
-
- /* Setup the initial logical CPUs info. */
- logical_cpus = 0;
- CPU_ZERO(&logical_cpus_mask);
- if (cpu_feature & CPUID_HTT)
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-
- set_interrupt_apic_ids();
-}
-
-
-static void
-iv_rendezvous(uintptr_t a, uintptr_t b)
-{
- smp_rendezvous_action();
-}
-
-static void
-iv_invltlb(uintptr_t a, uintptr_t b)
-{
- xen_tlb_flush();
-}
-
-static void
-iv_invlpg(uintptr_t a, uintptr_t b)
-{
- xen_invlpg(a);
-}
-
-static void
-iv_invlrng(uintptr_t a, uintptr_t b)
-{
- vm_offset_t start = (vm_offset_t)a;
- vm_offset_t end = (vm_offset_t)b;
-
- while (start < end) {
- xen_invlpg(start);
- start += PAGE_SIZE;
- }
-}
-
-
-static void
-iv_invlcache(uintptr_t a, uintptr_t b)
-{
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * These start from "IPI offset" APIC_IPI_INTS
- */
-static call_data_func_t *ipi_vectors[5] =
-{
- iv_rendezvous,
- iv_invltlb,
- iv_invlpg,
- iv_invlrng,
- iv_invlcache,
-};
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-static int
-smp_reschedule_interrupt(void *unused)
-{
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(curthread);
- }
-
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- return (FILTER_HANDLED);
-}
-
-struct _call_data {
- uint16_t func_id;
- uint16_t wait;
- uintptr_t arg1;
- uintptr_t arg2;
- atomic_t started;
- atomic_t finished;
-};
-
-static struct _call_data *call_data;
-
-static int
-smp_call_function_interrupt(void *unused)
-{
- call_data_func_t *func;
- uintptr_t arg1 = call_data->arg1;
- uintptr_t arg2 = call_data->arg2;
- int wait = call_data->wait;
- atomic_t *started = &call_data->started;
- atomic_t *finished = &call_data->finished;
-
- /* We only handle function IPIs, not bitmap IPIs */
- if (call_data->func_id < APIC_IPI_INTS ||
- call_data->func_id > IPI_BITMAP_VECTOR)
- panic("invalid function id %u", call_data->func_id);
-
- func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(arg1, arg2);
-
- if (wait) {
- mb();
- atomic_inc(finished);
- }
- atomic_add_int(&smp_tlb_wait, 1);
- return (FILTER_HANDLED);
-}
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- int i, x;
-
- /* List CPUs */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
- continue;
- if (cpu_info[x].cpu_disabled)
- printf(" cpu (AP): APIC ID: %2d (disabled)\n", x);
- else {
- KASSERT(i < mp_ncpus,
- ("mp_ncpus and actual cpus are out of whack"));
- printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
- }
- }
-}
-
-static int
-xen_smp_cpu_init(unsigned int cpu)
-{
- xen_intr_handle_t *ipi_handle;
- const struct xen_ipi_handler *ipi;
- int idx, rc;
-
- ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
- for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
-
- /*
- * The PCPU variable pc_device is not initialized on i386 PV,
- * so we have to use the root_bus device in order to setup
- * the IPIs.
- */
- rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
- ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
- if (rc != 0) {
- printf("Unable to allocate a XEN IPI port. "
- "Error %d\n", rc);
- break;
- }
- xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
- }
-
- for (;idx < nitems(xen_ipis); idx++)
- ipi_handle[idx] = NULL;
-
- if (rc == 0)
- return (0);
-
- /* Either all are successfully mapped, or none at all. */
- for (idx = 0; idx < nitems(xen_ipis); idx++) {
- if (ipi_handle[idx] == NULL)
- continue;
-
- xen_intr_unbind(ipi_handle[idx]);
- ipi_handle[idx] = NULL;
- }
-
- return (rc);
-}
-
-static void
-xen_smp_intr_init_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- xen_smp_cpu_init(i);
-}
-
-static void
-xen_smp_intr_setup_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- DPCPU_ID_SET(i, vcpu_info,
- &HYPERVISOR_shared_info->vcpu_info[i]);
-}
-
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-/*
- * AP CPU's call this to initialize themselves.
- */
-void
-init_secondary(void)
-{
- vm_offset_t addr;
- u_int cpuid;
- int gsel_tss;
-
-
- /* bootAP is set in start_ap() to our ID. */
- PCPU_SET(currentldt, _default_ldt);
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-#if 0
- gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
-#endif
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-#if 0
- PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
-
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-#endif
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- /*
- * Set to a known state:
- * Set by mpboot.s: CR0_PG, CR0_PE
- * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
- */
- /*
- * signal our startup to the BSP.
- */
- mp_naps++;
-
- /* Spin until the BSP releases the AP's. */
- while (!aps_ready)
- ia32_pause();
-
- /* BSP may have changed PTD while we were waiting */
- invltlb();
- for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
- invlpg(addr);
-
-#if 0
- /* set up SSE/NX */
- initializecpu();
-#endif
-
- /* set up FPU state on the AP */
- npxinit(false);
-#if 0
- /* A quick check from sanity claus */
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-#endif
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mtx_lock_spin(&ap_boot_mtx);
-#if 0
-
- /* Init local apic for irq's */
- lapic_setup(1);
-#endif
- smp_cpus++;
-
- cpuid = PCPU_GET(cpuid);
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- /* Determine if we are a hyperthread. */
- if (hyperthreading_cpus > 1 &&
- PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- CPU_SET(cpuid, &hyperthreading_cpus_mask);
-#if 0
- if (bootverbose)
- lapic_dump("AP");
-#endif
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* wait until all the AP's are up */
- while (smp_started == 0)
- ia32_pause();
-
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- /* enter the scheduler */
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
-}
-
-/*******************************************************************
- * local functions and data
- */
-
-/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (hyperthreading_cpus > 1 &&
- apic_id % hyperthreading_cpus != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-void
-assign_cpu_ids(void)
-{
- u_int i;
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
- * so we only have to assign IDs for APs.
- */
- mp_ncpus = 1;
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
- * start each AP in our list
- */
-/* Lowest 1MB is already mapped: don't touch*/
-#define TMPMAP_START 1
-int
-start_all_aps(void)
-{
- int x,apic_id, cpu;
- struct pcpu *pc;
-
- mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
- /* start each AP */
- for (cpu = 1; cpu < mp_ncpus; cpu++) {
- apic_id = cpu_apic_ids[cpu];
-
-
- bootAP = cpu;
- bootAPgdt = gdt + (512*cpu);
-
- /* Get per-cpu data */
- pc = &__pcpu[bootAP];
- pcpu_init(pc, bootAP, sizeof(struct pcpu));
- dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
- M_WAITOK | M_ZERO), bootAP);
- pc->pc_apic_id = cpu_apic_ids[bootAP];
- pc->pc_vcpu_id = cpu_apic_ids[bootAP];
- pc->pc_prvspace = pc;
- pc->pc_curthread = 0;
-
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
- bzero(bootAPgdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
- PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
-#ifdef notyet
-
- if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
- apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
- acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
-#ifdef CONFIG_ACPI
- if (acpiid != 0xff)
- x86_acpiid_to_apicid[acpiid] = apicid;
-#endif
- }
-#endif
-
- /* attempt to start the Application Processor */
- if (!start_ap(cpu)) {
- printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
- /* better panic as the AP may be running loose */
- printf("panic y/n? [y] ");
- if (cngetc() != 'n')
- panic("bye-bye");
- }
-
- CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
- }
-
-
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
- /* number of APs actually started */
- return (mp_naps);
-}
-
-extern uint8_t *pcpu_boot_stack;
-extern trap_info_t trap_table[];
-
-static void
-smp_trap_init(trap_info_t *trap_ctxt)
-{
- const trap_info_t *t = trap_table;
-
- for (t = trap_table; t->address; t++) {
- trap_ctxt[t->vector].flags = t->flags;
- trap_ctxt[t->vector].cs = t->cs;
- trap_ctxt[t->vector].address = t->address;
- }
-}
-
-extern struct rwlock pvh_global_lock;
-extern int nkpt;
-static void
-cpu_initialize_context(unsigned int cpu)
-{
- /* vcpu_guest_context_t is too large to allocate on the stack.
- * Hence we allocate statically and protect it with a lock */
- vm_page_t m[NPGPTD + 2];
- static vcpu_guest_context_t ctxt;
- vm_offset_t boot_stack;
- vm_offset_t newPTD;
- vm_paddr_t ma[NPGPTD];
- int i;
-
- /*
- * Page 0,[0-3] PTD
- * Page 1, [4] boot stack
- * Page [5] PDPT
- *
- */
- for (i = 0; i < NPGPTD + 2; i++) {
- m[i] = vm_page_alloc(NULL, 0,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
-
- pmap_zero_page(m[i]);
-
- }
- boot_stack = kva_alloc(PAGE_SIZE);
- newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
- ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
-
-#ifdef PAE
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
- for (i = 0; i < NPGPTD; i++) {
- ((vm_paddr_t *)boot_stack)[i] =
- ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
- }
-#endif
-
- /*
- * Copy cpu0 IdlePTD to new IdlePTD - copying only
- * kernel mappings
- */
- pmap_qenter(newPTD, m, 4);
-
- memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
- (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
- nkpt*sizeof(vm_paddr_t));
-
- pmap_qremove(newPTD, 4);
- kva_free(newPTD, 4 * PAGE_SIZE);
- /*
- * map actual idle stack to boot_stack
- */
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
-
-
- xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
- rw_wlock(&pvh_global_lock);
- for (i = 0; i < 4; i++) {
- int pdir = (PTDPTDI + i) / NPDEPG;
- int curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)
- ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
- ma[i]);
- }
- PT_UPDATES_FLUSH();
- rw_wunlock(&pvh_global_lock);
-
- memset(&ctxt, 0, sizeof(ctxt));
- ctxt.flags = VGCF_IN_KERNEL;
- ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
- ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.eip = (unsigned long)init_secondary;
- ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
-
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- ctxt.ldt_ents = 0;
- ctxt.gdt_frames[0] =
- (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
- ctxt.gdt_ents = 512;
-
-#ifdef __i386__
- ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
-
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = boot_stack + PAGE_SIZE;
-
- ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
- ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
-#else /* __x86_64__ */
- ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = idle->thread.rsp0;
-
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.syscall_callback_eip = (unsigned long)system_call;
-
- ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
-
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
-#endif
-
- printf("gdtpfn=%lx pdptpfn=%lx\n",
- ctxt.gdt_frames[0],
- ctxt.ctrlreg[3] >> PAGE_SHIFT);
-
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
- DELAY(3000);
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
-}
-
-/*
- * This function starts the AP (application processor) identified
- * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
- * to accomplish this. This is necessary because of the nuances
- * of the different hardware we might encounter. It isn't pretty,
- * but it seems to work.
- */
-
-int cpus;
-static int
-start_ap(int apic_id)
-{
- int ms;
-
- /* used as a watchpoint to signal AP startup */
- cpus = mp_naps;
-
- cpu_initialize_context(apic_id);
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return (1); /* return SUCCESS */
- DELAY(1000);
- }
- return (0); /* return FAILURE */
-}
-
-static void
-ipi_pcpu(int cpu, u_int ipi)
-{
- KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
- xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (!old_pending)
- ipi_pcpu(cpu, RESCHEDULE_VECTOR);
- } else {
- KASSERT(call_data != NULL, ("call_data not set"));
- ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
- }
-}
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- u_int ncpu;
- struct _call_data data;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1,
- vm_offset_t addr2)
-{
- int cpu, ncpu, othercpus;
- struct _call_data data;
-
- othercpus = mp_ncpus - 1;
- if (CPU_ISFULLSET(&mask)) {
- if (othercpus < 1)
- return;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- return;
- }
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (CPU_ISFULLSET(&mask)) {
- ncpu = othercpus;
- ipi_all_but_self(vector);
- } else {
- ncpu = 0;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
- vector);
- ipi_send_cpu(cpu, vector);
- ncpu++;
- }
- }
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-void
-smp_invltlb(void)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
- }
-}
-
-void
-smp_masked_invltlb(cpuset_t mask)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
- }
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
- cpuset_t other_cpus;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- ipi_selected(other_cpus, ipi);
-}
-
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
- int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handlers for TLB related IPIs
- *
- * On i386 Xen PV this are no-ops since this port doesn't support SMP.
- */
-void
-invltlb_handler(void)
-{
-}
-
-void
-invlpg_handler(void)
-{
-}
-
-void
-invlrng_handler(void)
-{
-}
-
-void
-invlcache_handler(void)
-{
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
-SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);
diff --git a/sys/i386/xen/mptable.c b/sys/i386/xen/mptable.c
deleted file mode 100644
index 81d7c1bafc64..000000000000
--- a/sys/i386/xen/mptable.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <x86/apicvar.h>
-
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/smp.h>
-#include <xen/interface/vcpu.h>
-
-
-static int mptable_probe(void);
-static int mptable_probe_cpus(void);
-static void mptable_register(void *dummy);
-static int mptable_setup_local(void);
-static int mptable_setup_io(void);
-
-static struct apic_enumerator mptable_enumerator = {
- "MPTable",
- mptable_probe,
- mptable_probe_cpus,
- mptable_setup_local,
- mptable_setup_io
-};
-
-static int
-mptable_probe(void)
-{
-
- return (-100);
-}
-
-static int
-mptable_probe_cpus(void)
-{
- int i, rc;
-
- for (i = 0; i < MAXCPU; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
- cpu_add(i, (i == 0));
- }
-
- return (0);
-}
-
-/*
- * Initialize the local APIC on the BSP.
- */
-static int
-mptable_setup_local(void)
-{
-
- PCPU_SET(apic_id, 0);
- PCPU_SET(vcpu_id, 0);
- return (0);
-}
-
-static int
-mptable_setup_io(void)
-{
-
- return (0);
-}
-
-static void
-mptable_register(void *dummy __unused)
-{
-
- apic_register_enumerator(&mptable_enumerator);
-}
-SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
- NULL);
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
deleted file mode 100644
index 757fc36bb3d4..000000000000
--- a/sys/i386/xen/pmap.c
+++ /dev/null
@@ -1,4420 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- * Copyright (c) 1994 David Greenman
- * All rights reserved.
- * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- */
-/*-
- * Copyright (c) 2003 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * This software was developed for the FreeBSD Project by Jake Burkholder,
- * Safeport Network Services, and Network Associates Laboratories, the
- * Security Research Division of Network Associates, Inc. under
- * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
- * CHATS research program.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Manages physical address maps.
- *
- * Since the information managed by this module is
- * also stored by the logical address mapping module,
- * this module may throw away valid virtual-to-physical
- * mappings at almost any time. However, invalidations
- * of virtual-to-physical mappings must be done as
- * requested.
- *
- * In order to cope with hardware architectures which
- * make virtual-to-physical map invalidates expensive,
- * this module may delay invalidate or reduced protection
- * operations until such time as they are actually
- * necessary. This module is given full information as
- * to which processors are currently using which maps,
- * and to when physical maps must be made correct.
- */
-
-#include "opt_cpu.h"
-#include "opt_pmap.h"
-#include "opt_smp.h"
-#include "opt_xbox.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/msgbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sf_buf.h>
-#include <sys/sx.h>
-#include <sys/vmmeter.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#ifdef SMP
-#include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/specialreg.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-
-#ifdef XBOX
-#include <machine/xbox.h>
-#endif
-
-#include <xen/interface/xen.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/hypercall.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
-#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
-#define DIAGNOSTIC
-
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINE extern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
-#ifdef PV_STATS
-#define PV_STAT(x) do { x ; } while (0)
-#else
-#define PV_STAT(x) do { } while (0)
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-#define HAMFISTED_LOCKING
-#ifdef HAMFISTED_LOCKING
-static struct mtx createdelete_lock;
-#endif
-
-struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
-
-vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
-vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
-int pgeflag = 0; /* PG_G or-in */
-int pseflag = 0; /* PG_PS or-in */
-
-int nkpt;
-vm_offset_t kernel_vm_end;
-extern u_int32_t KERNend;
-
-#ifdef PAE
-pt_entry_t pg_nx;
-#endif
-
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-static int pat_works; /* Is page attribute table sane? */
-
-/*
- * This lock is defined as static in other pmap implementations. It cannot,
- * however, be defined as static here, because it is (ab)used to serialize
- * queued page table changes in other sources files.
- */
-struct rwlock pvh_global_lock;
-
-/*
- * Data for the pv entry allocation mechanism
- */
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static int shpgperproc = PMAP_SHPGPERPROC;
-
-struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
-int pv_maxchunks; /* How many chunks we have KVA for */
-vm_offset_t pv_vafree; /* freelist stored in the PTE */
-
-/*
- * All those kernel PT submaps that BSD is so fond of
- */
-struct sysmaps {
- struct mtx lock;
- pt_entry_t *CMAP1;
- pt_entry_t *CMAP2;
- caddr_t CADDR1;
- caddr_t CADDR2;
-};
-static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP3;
-caddr_t ptvmmap = 0;
-caddr_t CADDR3;
-struct msgbuf *msgbufp = 0;
-
-/*
- * Crashdump maps.
- */
-static caddr_t crashdumpmap;
-
-static pt_entry_t *PMAP1 = 0, *PMAP2;
-static pt_entry_t *PADDR1 = 0, *PADDR2;
-#ifdef SMP
-static int PMAP1cpu;
-static int PMAP1changedcpu;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
- &PMAP1changedcpu, 0,
- "Number of times pmap_pte_quick changed CPU with same PMAP1");
-#endif
-static int PMAP1changed;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
- &PMAP1changed, 0,
- "Number of times pmap_pte_quick changed PMAP1");
-static int PMAP1unchanged;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
- &PMAP1unchanged, 0,
- "Number of times pmap_pte_quick didn't change PMAP1");
-static struct mtx PMAP2mutex;
-
-static void free_pv_chunk(struct pv_chunk *pc);
-static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
-static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
-static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
-
-static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static void pmap_flush_page(vm_page_t m);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- vm_page_t *free);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
- vm_page_t *free);
-static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
- vm_offset_t va);
-static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m);
-
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
-static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
-static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-
-static __inline void pagezero(void *page);
-
-CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
-CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
-
-/*
- * If you get an error here, then you set KVA_PAGES wrong! See the
- * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
- * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
- */
-CTASSERT(KERNBASE % (1 << 24) == 0);
-
-void
-pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
-{
- vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
-
- switch (type) {
- case SH_PD_SET_VA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- xpmap_ptom(val & ~(PG_RW)));
-#endif
- xen_queue_pt_update(pdir_ma,
- xpmap_ptom(val));
- break;
- case SH_PD_SET_VA_MA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- val & ~(PG_RW));
-#endif
- xen_queue_pt_update(pdir_ma, val);
- break;
- case SH_PD_SET_VA_CLEAR:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma, 0);
-#endif
- xen_queue_pt_update(pdir_ma, 0);
- break;
- }
-}
-
-/*
- * Bootstrap the system enough to run with virtual memory.
- *
- * On the i386 this is called after mapping has already been enabled
- * and just syncs the pmap module with what has already been done.
- * [We can't call it easily with mapping off since the kernel is not
- * mapped with PA == VA, hence we would have to relocate every address
- * from the linked base (virtual) address "KERNBASE" to the actual
- * (physical) address starting relative to 0]
- */
-void
-pmap_bootstrap(vm_paddr_t firstaddr)
-{
- vm_offset_t va;
- pt_entry_t *pte, *unused;
- struct sysmaps *sysmaps;
- int i;
-
- /*
- * Initialize the first available kernel virtual address. However,
- * using "firstaddr" may waste a few pages of the kernel virtual
- * address space, because locore may not have mapped every physical
- * page that it allocated. Preferably, locore would provide a first
- * unused virtual address in addition to "firstaddr".
- */
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-
- virtual_end = VM_MAX_KERNEL_ADDRESS;
-
- /*
- * Initialize the kernel pmap (which is statically allocated).
- */
- PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
- kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
-#endif
- CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
- TAILQ_INIT(&kernel_pmap->pm_pvchunk);
-
- /*
- * Initialize the global pv list lock.
- */
- rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE);
-
- LIST_INIT(&allpmaps);
- mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
- if (nkpt == 0)
- nkpt = NKPT;
-
- /*
- * Reserve some special page table entries/VA space for temporary
- * mapping of pages.
- */
-#define SYSMAP(c, p, v, n) \
- v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
- va = virtual_avail;
- pte = vtopte(va);
-
- /*
- * CMAP1/CMAP2 are used for zeroing and copying pages.
- * CMAP3 is used for the idle process page zeroing.
- */
- for (i = 0; i < MAXCPU; i++) {
- sysmaps = &sysmaps_pcpu[i];
- mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
- SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
- SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- }
- SYSMAP(caddr_t, CMAP3, CADDR3, 1)
- PT_SET_MA(CADDR3, 0);
-
- /*
- * Crashdump maps.
- */
- SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
-
- /*
- * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
- */
- SYSMAP(caddr_t, unused, ptvmmap, 1)
-
- /*
- * msgbufp is used to map the system message buffer.
- */
- SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
-
- /*
- * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
- * respectively.
- */
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
- SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
-
- mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
-
- virtual_avail = va;
-
- /*
- * Leave in place an identity mapping (virt == phys) for the low 1 MB
- * physical memory region that is used by the ACPI wakeup code. This
- * mapping must not have PG_G set.
- */
-#ifndef XEN
- /*
- * leave here deliberately to show that this is not supported
- */
-#ifdef XBOX
- /* FIXME: This is gross, but needed for the XBOX. Since we are in such
- * an early stadium, we cannot yet neatly map video memory ... :-(
- * Better fixes are very welcome! */
- if (!arch_i386_is_xbox)
-#endif
- for (i = 1; i < NKPT; i++)
- PTD[i] = 0;
-
- /* Initialize the PAT MSR if present. */
- pmap_init_pat();
-
- /* Turn on PG_G on kernel page(s) */
- pmap_set_pg();
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
-#endif
-}
-
-/*
- * Setup the PAT MSR.
- */
-void
-pmap_init_pat(void)
-{
- uint64_t pat_msr;
-
- /* Bail if this CPU doesn't implement PAT. */
- if (!(cpu_feature & CPUID_PAT))
- return;
-
- if (cpu_vendor_id != CPU_VENDOR_INTEL ||
- (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
- /*
- * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
- * Program 4 and 5 as WP and WC.
- * Leave 6 and 7 as UC and UC-.
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
- pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
- PAT_VALUE(5, PAT_WRITE_COMBINING);
- pat_works = 1;
- } else {
- /*
- * Due to some Intel errata, we can only safely use the lower 4
- * PAT entries. Thus, just replace PAT Index 2 with WC instead
- * of UC-.
- *
- * Intel Pentium III Processor Specification Update
- * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
- * or Mode C Paging)
- *
- * Intel Pentium IV Processor Specification Update
- * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~PAT_MASK(2);
- pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
- pat_works = 0;
- }
- wrmsr(MSR_PAT, pat_msr);
-}
-
-/*
- * Initialize a vm_page's machine-dependent fields.
- */
-void
-pmap_page_init(vm_page_t m)
-{
-
- TAILQ_INIT(&m->md.pv_list);
- m->md.pat_mode = PAT_WRITE_BACK;
-}
-
-/*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
- * Requirements:
- * - Must deal with pages in order to ensure that none of the PG_* bits
- * are ever set, PG_V in particular.
- * - Assumes we can write to ptes without pte_store() atomic ops, even
- * on PAE systems. This should be ok.
- * - Assumes nothing will ever test these addresses for 0 to indicate
- * no mapping instead of correctly checking PG_V.
- * - Assumes a vm_offset_t will fit in a pte (true for i386).
- * Because PG_V is never set, there can be no mappings to invalidate.
- */
-static int ptelist_count = 0;
-static vm_offset_t
-pmap_ptelist_alloc(vm_offset_t *head)
-{
- vm_offset_t va;
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- if (ptelist_count == 0) {
- printf("out of memory!!!!!!\n");
- return (0); /* Out of memory */
- }
- ptelist_count--;
- va = phead[ptelist_count];
- return (va);
-}
-
-static void
-pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
-{
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- phead[ptelist_count++] = va;
-}
-
-static void
-pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
-{
- int i, nstackpages;
- vm_offset_t va;
- vm_page_t m;
-
- nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
- for (i = 0; i < nstackpages; i++) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- m = vm_page_alloc(NULL, i,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- pmap_qenter(va, &m, 1);
- }
-
- *head = (vm_offset_t)base;
- for (i = npages - 1; i >= nstackpages; i--) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- pmap_ptelist_free(head, va);
- }
-}
-
-
-/*
- * Initialize the pmap module.
- * Called by vm_init, to initialize any structures that the pmap
- * system needs to map virtual memory.
- */
-void
-pmap_init(void)
-{
-
- /*
- * Initialize the address space (zone) for the pv entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- */
- TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
- pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
- TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
- pv_entry_max = roundup(pv_entry_max, _NPCPV);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
-
- pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
- pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
- if (pv_chunkbase == NULL)
- panic("pmap_init: not enough kvm for pv chunks");
- pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-}
-
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
- "Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
- "Page share factor per proc");
-
-static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
- "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
- &pmap_pde_mappings, 0, "2/4MB page mappings");
-
-/***************************************************
- * Low level helper routines.....
- ***************************************************/
-
-/*
- * Determine the appropriate bits to set in a PTE or PDE for a specified
- * caching mode.
- */
-int
-pmap_cache_bits(int mode, boolean_t is_pde)
-{
- int pat_flag, pat_index, cache_bits;
-
- /* The PAT bit is different for PTE's and PDE's. */
- pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
-
- /* If we don't support PAT, map extended modes to older ones. */
- if (!(cpu_feature & CPUID_PAT)) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- case PAT_WRITE_THROUGH:
- case PAT_WRITE_BACK:
- break;
- case PAT_UNCACHED:
- case PAT_WRITE_COMBINING:
- case PAT_WRITE_PROTECTED:
- mode = PAT_UNCACHEABLE;
- break;
- }
- }
-
- /* Map the caching mode to a PAT index. */
- if (pat_works) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_UNCACHED:
- pat_index = 2;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 5;
- break;
- case PAT_WRITE_PROTECTED:
- pat_index = 4;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- } else {
- switch (mode) {
- case PAT_UNCACHED:
- case PAT_UNCACHEABLE:
- case PAT_WRITE_PROTECTED:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 2;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- }
-
- /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
- cache_bits = 0;
- if (pat_index & 0x4)
- cache_bits |= pat_flag;
- if (pat_index & 0x2)
- cache_bits |= PG_NC_PCD;
- if (pat_index & 0x1)
- cache_bits |= PG_NC_PWT;
- return (cache_bits);
-}
-#ifdef SMP
-/*
- * For SMP, these functions have to use the IPI mechanism for coherence.
- *
- * N.B.: Before calling any of the following TLB invalidation functions,
- * the calling processor must ensure that all stores updating a non-
- * kernel page table are globally performed. Otherwise, another
- * processor could cache an old, pre-update entry without being
- * invalidated. This can happen one of two ways: (1) The pmap becomes
- * active on another processor after its pm_active field is checked by
- * one of the following functions but before a store updating the page
- * table is globally performed. (2) The pmap becomes active on another
- * processor before its pm_active field is checked but due to
- * speculative loads one of the following functions stills reads the
- * pmap as inactive on the other processor.
- *
- * The kernel page table is exempt because its pm_active field is
- * immutable. The kernel page table is always active on every
- * processor.
- */
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invlpg(va);
- smp_invlpg(va);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invlpg(va);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, va);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- cpuset_t other_cpus;
- vm_offset_t addr;
- u_int cpuid;
-
- CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
- pmap, sva, eva);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- smp_invlpg_range(sva, eva);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, sva, eva);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_all(pmap_t pmap)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invltlb();
- smp_invltlb();
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invltlb();
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus);
- }
- sched_unpin();
-}
-
-void
-pmap_invalidate_cache(void)
-{
-
- sched_pin();
- wbinvd();
- smp_cache_flush();
- sched_unpin();
-}
-#else /* !SMP */
-/*
- * Normal, non-SMP, 486+ invalidation functions.
- * We inline these within pmap.c for speed.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invlpg(va);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t addr;
-
- if (eva - sva > PAGE_SIZE)
- CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_cache(void)
-{
-
- wbinvd();
-}
-#endif /* !SMP */
-
-#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
-{
-
- if (force) {
- sva &= ~(vm_offset_t)cpu_clflush_line_size;
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
-
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported, do nothing. */
- else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
- /*
- * Otherwise, do per-cache line flush. Use the mfence
- * instruction to insure that previous stores are
- * included in the write-back. The processor
- * propagates flush to other processors in the cache
- * coherence domain.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
- }
-}
-
-void
-pmap_invalidate_cache_pages(vm_page_t *pages, int count)
-{
- int i;
-
- if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
- (cpu_feature & CPUID_CLFSH) == 0) {
- pmap_invalidate_cache();
- } else {
- for (i = 0; i < count; i++)
- pmap_flush_page(pages[i]);
- }
-}
-
-/*
- * Are we current address space or kernel? N.B. We return FALSE when
- * a pmap's page table is in use because a kernel thread is borrowing
- * it. The borrowed page table can change spontaneously, making any
- * dependence on its continued use subject to a race condition.
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
- return (pmap == kernel_pmap ||
- (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
-}
-
-/*
- * If the given pmap is not the current or kernel pmap, the returned pte must
- * be released by passing it to pmap_pte_release().
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- mtx_lock(&PMAP2mutex);
- newpf = *pde & PG_FRAME;
- if ((*PMAP2 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (*PMAP2 & 0xffffffff));
- }
- return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (NULL);
-}
-
-/*
- * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
- * being NULL.
- */
-static __inline void
-pmap_pte_release(pt_entry_t *pte)
-{
-
- if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
- CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
- *PMAP2);
- rw_wlock(&pvh_global_lock);
- PT_SET_VA(PMAP2, 0, TRUE);
- rw_wunlock(&pvh_global_lock);
- mtx_unlock(&PMAP2mutex);
- }
-}
-
-static __inline void
-invlcaddr(void *caddr)
-{
-
- invlpg((u_int)caddr);
- PT_UPDATES_FLUSH();
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists. This eliminates many coarse-grained
- * invltlb calls. Note that many of the pv list
- * scans are across different pmaps. It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- *
- * If the given pmap is not the current pmap, pvh_global_lock
- * must be held and curthread pinned to a CPU.
- */
-static pt_entry_t *
-pmap_pte_quick(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- newpf = *pde & PG_FRAME;
- if ((*PMAP1 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (u_long)*PMAP1);
-
-#ifdef SMP
- PMAP1cpu = PCPU_GET(cpuid);
-#endif
- PMAP1changed++;
- } else
-#ifdef SMP
- if (PMAP1cpu != PCPU_GET(cpuid)) {
- PMAP1cpu = PCPU_GET(cpuid);
- invlcaddr(PADDR1);
- PMAP1changedcpu++;
- } else
-#endif
- PMAP1unchanged++;
- return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (0);
-}
-
-/*
- * Routine: pmap_extract
- * Function:
- * Extract the physical page address associated
- * with the given map/virtual_address pair.
- */
-vm_paddr_t
-pmap_extract(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
- pt_entry_t pteval;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- pteval = *pte ? xpmap_mtop(*pte) : 0;
- rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_ma
- * Function:
- * Like pmap_extract, but returns machine address
- */
-vm_paddr_t
-pmap_extract_ma(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_and_hold
- * Function:
- * Atomically extract and hold the physical page
- * with the given pmap and virtual address pair
- * if that mapping permits the given protection.
- */
-vm_page_t
-pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
-{
- pd_entry_t pde;
- pt_entry_t pte, *ptep;
- vm_page_t m;
- vm_paddr_t pa;
-
- pa = 0;
- m = NULL;
- PMAP_LOCK(pmap);
-retry:
- pde = PT_GET(pmap_pde(pmap, va));
- if (pde != 0) {
- if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
- (va & PDRMASK));
- vm_page_hold(m);
- }
- } else {
- ptep = pmap_pte(pmap, va);
- pte = PT_GET(ptep);
- pmap_pte_release(ptep);
- if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
- vm_page_hold(m);
- }
- }
- }
- PA_UNLOCK_COND(pa);
- PMAP_UNLOCK(pmap);
- return (m);
-}
-
-/***************************************************
- * Low level mapping routines.....
- ***************************************************/
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-void
-pmap_kenter(vm_offset_t va, vm_paddr_t pa)
-{
-
- PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
-}
-
-void
-pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
-}
-
-static __inline void
-pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
-{
-
- PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
-}
-
-/*
- * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-PMAP_INLINE void
-pmap_kremove(vm_offset_t va)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- PT_CLEAR_VA(pte, FALSE);
-}
-
-/*
- * Used to map a range of physical addresses into kernel
- * virtual address space.
- *
- * The value passed in '*virt' is a suggested virtual address for
- * the mapping. Architectures which can support a direct-mapped
- * physical to virtual region can return the appropriate address
- * within that region, leaving '*virt' unchanged. Other
- * architectures should map the pages starting at '*virt' and
- * update '*virt' with the first usable address after the mapped
- * region.
- */
-vm_offset_t
-pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
-{
- vm_offset_t va, sva;
-
- va = sva = *virt;
- CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
- va, start, end, prot);
- while (start < end) {
- pmap_kenter(va, start);
- va += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, sva, va);
- *virt = va;
- return (sva);
-}
-
-
-/*
- * Add a list of wired pages to the kva
- * this routine is only used for temporary
- * kernel mappings that do not need to have
- * page modification or references recorded.
- * Note that old mappings are simply written
- * over. The page *must* be wired.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
-{
- pt_entry_t *endpte, *pte;
- vm_paddr_t pa;
- vm_offset_t va = sva;
- int mclcount = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error;
-
- CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
- pte = vtopte(sva);
- endpte = pte + count;
- while (pte < endpte) {
- pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
-
- mclp->op = __HYPERVISOR_update_va_mapping;
- mclp->args[0] = va;
- mclp->args[1] = (uint32_t)(pa & 0xffffffff);
- mclp->args[2] = (uint32_t)(pa >> 32);
- mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
-
- va += PAGE_SIZE;
- pte++;
- ma++;
- mclp++;
- mclcount++;
- if (mclcount == 16) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- mclp = mcl;
- mclcount = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (mclcount) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
-#ifdef INVARIANTS
- for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
- KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
-#endif
-}
-
-/*
- * This routine tears out page mappings from the
- * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qremove(vm_offset_t sva, int count)
-{
- vm_offset_t va;
-
- CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
- va = sva;
- rw_wlock(&pvh_global_lock);
- critical_enter();
- while (count-- > 0) {
- pmap_kremove(va);
- va += PAGE_SIZE;
- }
- PT_UPDATES_FLUSH();
- pmap_invalidate_range(kernel_pmap, sva, va);
- critical_exit();
- rw_wunlock(&pvh_global_lock);
-}
-
-/***************************************************
- * Page table page management routines.....
- ***************************************************/
-static __inline void
-pmap_free_zero_pages(vm_page_t free)
-{
- vm_page_t m;
-
- while (free != NULL) {
- m = free;
- free = (void *)m->object;
- m->object = NULL;
- vm_page_free_zero(m);
- }
-}
-
-/*
- * Decrements a page table page's wire count, which is used to record the
- * number of valid page table entries within the page. If the wire count
- * drops to zero, then the page table page is unmapped. Returns TRUE if the
- * page table page was unmapped and FALSE otherwise.
- */
-static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
-
- --m->wire_count;
- if (m->wire_count == 0) {
- _pmap_unwire_ptp(pmap, m, free);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
- vm_offset_t pteva;
-
- PT_UPDATES_FLUSH();
- /*
- * unmap the page table page
- */
- xen_pt_unpin(pmap->pm_pdir[m->pindex]);
- /*
- * page *might* contain residual mapping :-/
- */
- PD_CLEAR_VA(pmap, m->pindex, TRUE);
- pmap_zero_page(m);
- --pmap->pm_stats.resident_count;
-
- /*
- * This is a release store so that the ordinary store unmapping
- * the page table page is globally performed before TLB shoot-
- * down is begun.
- */
- atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
-
- /*
- * Do an invltlb to make the invalidated mapping
- * take effect immediately.
- */
- pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
- pmap_invalidate_page(pmap, pteva);
-
- /*
- * Put page on a list so that it is released after
- * *ALL* TLB shootdown is done
- */
- m->object = (void *)*free;
- *free = m;
-}
-
-/*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
- */
-static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pd_entry_t ptepde;
- vm_page_t mpte;
-
- if (va >= VM_MAXUSER_ADDRESS)
- return (0);
- ptepde = PT_GET(pmap_pde(pmap, va));
- mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return (pmap_unwire_ptp(pmap, mpte, free));
-}
-
-/*
- * Initialize the pmap for the swapper process.
- */
-void
-pmap_pinit0(pmap_t pmap)
-{
-
- PMAP_LOCK_INIT(pmap);
- /*
- * Since the page table directory is shared with the kernel pmap,
- * which is already included in the list "allpmaps", this pmap does
- * not need to be inserted into that list.
- */
- pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
- pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
-#endif
- CPU_ZERO(&pmap->pm_active);
- PCPU_SET(curpmap, pmap);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-}
-
-/*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
- */
-int
-pmap_pinit(pmap_t pmap)
-{
- vm_page_t m, ptdpg[NPGPTD + 1];
- int npgptd = NPGPTD + 1;
- int i;
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- /*
- * No need to allocate page table space yet but we do need a valid
- * page directory table.
- */
- if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
- if (pmap->pm_pdir == NULL) {
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (0);
- }
-#ifdef PAE
- pmap->pm_pdpt = (pd_entry_t *)kva_alloc(1);
-#endif
- }
-
- /*
- * allocate the page directory page(s)
- */
- for (i = 0; i < npgptd;) {
- m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO);
- if (m == NULL)
- VM_WAIT;
- else {
- ptdpg[i++] = m;
- }
- }
-
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-
- for (i = 0; i < NPGPTD; i++)
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- pagezero(pmap->pm_pdir + (i * NPDEPG));
-
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- /* Copy the kernel page table directory entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
- mtx_unlock_spin(&allpmaps_lock);
-
-#ifdef PAE
- pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
- if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdpt, PAGE_SIZE);
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pmap->pm_pdpt[i] = ma | PG_V;
-
- }
-#endif
- for (i = 0; i < NPGPTD; i++) {
- pt_entry_t *pd;
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pd = pmap->pm_pdir + (i * NPDEPG);
- PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
-#if 0
- xen_pgd_pin(ma);
-#endif
- }
-
-#ifdef PAE
- PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
-#endif
- rw_wlock(&pvh_global_lock);
- xen_flush_queue();
- xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
- PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
- }
- xen_flush_queue();
- rw_wunlock(&pvh_global_lock);
- CPU_ZERO(&pmap->pm_active);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (1);
-}
-
-/*
- * this routine is called if the page table page is not
- * mapped correctly.
- */
-static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
-{
- vm_paddr_t ptema;
- vm_page_t m;
-
- /*
- * Allocate a page table page.
- */
- if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
- PMAP_UNLOCK(pmap);
- rw_wunlock(&pvh_global_lock);
- VM_WAIT;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- }
-
- /*
- * Indicate the need to retry. While waiting, the page table
- * page may have been allocated.
- */
- return (NULL);
- }
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
-
- /*
- * Map the pagetable page into the process address space, if
- * it isn't already there.
- */
-
- pmap->pm_stats.resident_count++;
-
- ptema = VM_PAGE_TO_MACH(m);
- xen_pt_pin(ptema);
- PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
- (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
-
- KASSERT(pmap->pm_pdir[ptepindex],
- ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
- return (m);
-}
-
-static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
-{
- u_int ptepindex;
- pd_entry_t ptema;
- vm_page_t m;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
-retry:
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * This supports switching from a 4MB page to a
- * normal 4K page.
- */
- if (ptema & PG_PS) {
- /*
- * XXX
- */
- pmap->pm_pdir[ptepindex] = 0;
- ptema = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_invalidate_all(kernel_pmap);
- }
-
- /*
- * If the page table page is mapped, we just increment the
- * hold count, and activate it.
- */
- if (ptema & PG_V) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- m->wire_count++;
- } else {
- /*
- * Here if the pte page isn't mapped, or if it has
- * been deallocated.
- */
- CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
- pmap, va, flags);
- m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
- goto retry;
-
- KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
- }
- return (m);
-}
-
-
-/***************************************************
-* Pmap allocation/deallocation routines.
- ***************************************************/
-
-
-/*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_release(pmap_t pmap)
-{
- vm_page_t m, ptdpg[2*NPGPTD+1];
- vm_paddr_t ma;
- int i;
-#ifdef PAE
- int npgptd = NPGPTD + 1;
-#else
- int npgptd = NPGPTD;
-#endif
-
- KASSERT(pmap->pm_stats.resident_count == 0,
- ("pmap_release: pmap resident count %ld != 0",
- pmap->pm_stats.resident_count));
- PT_UPDATES_FLUSH();
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- KASSERT(CPU_EMPTY(&pmap->pm_active),
- ("releasing active pmap %p", pmap));
- mtx_lock_spin(&allpmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
- for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-#ifdef PAE
- ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
-#endif
-
- for (i = 0; i < npgptd; i++) {
- m = ptdpg[i];
- ma = VM_PAGE_TO_MACH(m);
- /* unpinning L1 and L2 treated the same */
-#if 0
- xen_pgd_unpin(ma);
-#else
- if (i == NPGPTD)
- xen_pgd_unpin(ma);
-#endif
-#ifdef PAE
- if (i < NPGPTD)
- KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
-#endif
- m->wire_count--;
- atomic_subtract_int(&vm_cnt.v_wire_count, 1);
- vm_page_free(m);
- }
-#ifdef PAE
- pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static int
-kvm_size(SYSCTL_HANDLER_ARGS)
-{
- unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
-
- return (sysctl_handle_long(oidp, &ksize, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_size, "IU", "Size of KVM");
-
-static int
-kvm_free(SYSCTL_HANDLER_ARGS)
-{
- unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
-
- return (sysctl_handle_long(oidp, &kfree, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_free, "IU", "Amount of KVM free");
-
-/*
- * grow the number of kernel page table entries, if needed
- */
-void
-pmap_growkernel(vm_offset_t addr)
-{
- struct pmap *pmap;
- vm_paddr_t ptppaddr;
- vm_page_t nkpg;
- pd_entry_t newpdir;
-
- mtx_assert(&kernel_map->system_mtx, MA_OWNED);
- if (kernel_vm_end == 0) {
- kernel_vm_end = KERNBASE;
- nkpt = 0;
- while (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
- nkpt++;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
- }
- addr = roundup2(addr, NBPDR);
- if (addr - 1 >= kernel_map->max_offset)
- addr = kernel_map->max_offset;
- while (kernel_vm_end < addr) {
- if (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- continue;
- }
-
- nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
- VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (nkpg == NULL)
- panic("pmap_growkernel: no memory to grow kernel");
-
- nkpt++;
-
- if ((nkpg->flags & PG_ZERO) == 0)
- pmap_zero_page(nkpg);
- ptppaddr = VM_PAGE_TO_PHYS(nkpg);
- newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- rw_wlock(&pvh_global_lock);
- PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list)
- PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
-
- mtx_unlock_spin(&allpmaps_lock);
- rw_wunlock(&pvh_global_lock);
-
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
-}
-
-
-/***************************************************
- * page management routines.
- ***************************************************/
-
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 11);
-CTASSERT(_NPCPV == 336);
-
-static __inline struct pv_chunk *
-pv_to_chunk(pv_entry_t pv)
-{
-
- return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
-}
-
-#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-
-#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
-#define PC_FREE10 0x0000fffful /* Free values for index 10 */
-
-static const uint32_t pc_freemask[_NPCM] = {
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE10
-};
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
- "Current number of pv entries");
-
-#ifdef PV_STATS
-static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
- "Current number of pv entry chunks");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
- "Current number of pv entry chunks allocated");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
- "Current number of pv entry chunks frees");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
- "Number of times tried to get a chunk page but failed.");
-
-static long pv_entry_frees, pv_entry_allocs;
-static int pv_entry_spare;
-
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
- "Current number of pv entry frees");
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
- "Current number of pv entry allocs");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
- "Current number of spare pv entries");
-#endif
-
-/*
- * We are in a serious low memory condition. Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- */
-static vm_page_t
-pmap_pv_reclaim(pmap_t locked_pmap)
-{
- struct pch newtail;
- struct pv_chunk *pc;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- pv_entry_t pv;
- vm_offset_t va;
- vm_page_t free, m, m_pc;
- uint32_t inuse;
- int bit, field, freed;
-
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- pmap = NULL;
- free = m_pc = NULL;
- TAILQ_INIT(&newtail);
- while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
- free == NULL)) {
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- if (pmap != pc->pc_pmap) {
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- pmap = pc->pc_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap)
- PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
- pmap = NULL;
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- }
-
- /*
- * Destroy every non-wired, 4 KB page mapping in the chunk.
- */
- freed = 0;
- for (field = 0; field < _NPCM; field++) {
- for (inuse = ~pc->pc_map[field] & pc_freemask[field];
- inuse != 0; inuse &= ~(1UL << bit)) {
- bit = bsfl(inuse);
- pv = &pc->pc_pventry[field * 32 + bit];
- va = pv->pv_va;
- pte = pmap_pte(pmap, va);
- tpte = *pte;
- if ((tpte & PG_W) == 0)
- tpte = pte_load_clear(pte);
- pmap_pte_release(pte);
- if ((tpte & PG_W) != 0)
- continue;
- KASSERT(tpte != 0,
- ("pmap_pv_reclaim: pmap %p va %x zero pte",
- pmap, va));
- if ((tpte & PG_G) != 0)
- pmap_invalidate_page(pmap, va);
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if ((tpte & PG_A) != 0)
- vm_page_aflag_set(m, PGA_REFERENCED);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- pc->pc_map[field] |= 1UL << bit;
- pmap_unuse_pt(pmap, va, &free);
- freed++;
- }
- }
- if (freed == 0) {
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- /* Every freed mapping is for a 4 KB page. */
- pmap->pm_stats.resident_count -= freed;
- PV_STAT(pv_entry_frees += freed);
- PV_STAT(pv_entry_spare += freed);
- pv_entry_count -= freed;
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != pc_freemask[field]) {
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-
- /*
- * One freed pv entry in locked_pmap is
- * sufficient.
- */
- if (pmap == locked_pmap)
- goto out;
- break;
- }
- if (field == _NPCM) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* Entire chunk is free; return it. */
- m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
- break;
- }
- }
-out:
- TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
- m_pc = free;
- free = (void *)m_pc->object;
- /* Recycle a freed page table page. */
- m_pc->wire_count = 1;
- atomic_add_int(&vm_cnt.v_wire_count, 1);
- }
- pmap_free_zero_pages(free);
- return (m_pc);
-}
-
-/*
- * free the pv_entry back to the free list
- */
-static void
-free_pv_entry(pmap_t pmap, pv_entry_t pv)
-{
- struct pv_chunk *pc;
- int idx, field, bit;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc = pv_to_chunk(pv);
- idx = pv - &pc->pc_pventry[0];
- field = idx / 32;
- bit = idx % 32;
- pc->pc_map[field] |= 1ul << bit;
- for (idx = 0; idx < _NPCM; idx++)
- if (pc->pc_map[idx] != pc_freemask[idx]) {
- /*
- * 98% of the time, pc is already at the head of the
- * list. If it isn't already, move it to the head.
- */
- if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
- pc)) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- }
- return;
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
-}
-
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
- vm_page_t m;
-
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* entire chunk is free, return it */
- m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- vm_page_unwire(m, PQ_INACTIVE);
- vm_page_free(m);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
-}
-
-/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
- */
-static pv_entry_t
-get_pv_entry(pmap_t pmap, boolean_t try)
-{
- static const struct timeval printinterval = { 60, 0 };
- static struct timeval lastprint;
- int bit, field;
- pv_entry_t pv;
- struct pv_chunk *pc;
- vm_page_t m;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PV_STAT(pv_entry_allocs++);
- pv_entry_count++;
- if (pv_entry_count > pv_entry_high_water)
- if (ratecheck(&lastprint, &printinterval))
- printf("Approaching the limit on PV entries, consider "
- "increasing either the vm.pmap.shpgperproc or the "
- "vm.pmap.pv_entry_max tunable.\n");
-retry:
- pc = TAILQ_FIRST(&pmap->pm_pvchunk);
- if (pc != NULL) {
- for (field = 0; field < _NPCM; field++) {
- if (pc->pc_map[field]) {
- bit = bsfl(pc->pc_map[field]);
- break;
- }
- }
- if (field < _NPCM) {
- pv = &pc->pc_pventry[field * 32 + bit];
- pc->pc_map[field] &= ~(1ul << bit);
- /* If this was the last item, move it to tail */
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != 0) {
- PV_STAT(pv_entry_spare--);
- return (pv); /* not full, return */
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare--);
- return (pv);
- }
- }
- /*
- * Access to the ptelist "pv_vafree" is synchronized by the page
- * queues lock. If "pv_vafree" is currently non-empty, it will
- * remain non-empty until pmap_ptelist_alloc() completes.
- */
- if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
- VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
- if (try) {
- pv_entry_count--;
- PV_STAT(pc_chunk_tryfail++);
- return (NULL);
- }
- m = pmap_pv_reclaim(pmap);
- if (m == NULL)
- goto retry;
- }
- PV_STAT(pc_chunk_count++);
- PV_STAT(pc_chunk_allocs++);
- pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
- pmap_qenter((vm_offset_t)pc, &m, 1);
- if ((m->flags & PG_ZERO) == 0)
- pagezero(pc);
- pc->pc_pmap = pmap;
- pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
- for (field = 1; field < _NPCM; field++)
- pc->pc_map[field] = pc_freemask[field];
- TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
- pv = &pc->pc_pventry[0];
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare += _NPCPV - 1);
- return (pv);
-}
-
-static __inline pv_entry_t
-pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
- if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
- break;
- }
- }
- return (pv);
-}
-
-static void
-pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- pv = pmap_pvh_remove(pvh, pmap, va);
- KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
- free_pv_entry(pmap, pv);
-}
-
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- pmap_pvh_free(&m->md, pmap, va);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-}
-
-/*
- * Conditionally create a pv entry.
- */
-static boolean_t
-pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-/*
- * pmap_remove_pte: do the things to unmap a page in a process
- */
-static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t oldpte;
- vm_page_t m;
-
- CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
- pmap, (u_long)*ptq, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpte = *ptq;
- PT_SET_VA_MA(ptq, 0, TRUE);
- KASSERT(oldpte != 0,
- ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va));
- if (oldpte & PG_W)
- pmap->pm_stats.wired_count -= 1;
- /*
- * Machines that don't support invlpg, also don't support
- * PG_G.
- */
- if (oldpte & PG_G)
- pmap_invalidate_page(kernel_pmap, va);
- pmap->pm_stats.resident_count -= 1;
- if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
- if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if (oldpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- pmap_remove_entry(pmap, m, va);
- }
- return (pmap_unuse_pt(pmap, va, free));
-}
-
-/*
- * Remove a single page from a process address space
- */
-static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t *pte;
-
- CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
- pmap, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
- return;
- pmap_remove_pte(pmap, pte, va, free);
- pmap_invalidate_page(pmap, va);
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
-}
-
-/*
- * Remove the given range of addresses from the specified map.
- *
- * It is assumed that the start and end are properly
- * rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- vm_page_t free = NULL;
- int anyvalid;
-
- CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- /*
- * Perform an unsynchronized read. This is, however, safe.
- */
- if (pmap->pm_stats.resident_count == 0)
- return;
-
- anyvalid = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if ((sva + PAGE_SIZE == eva) &&
- ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva, &free);
- goto out;
- }
-
- for (; sva < eva; sva = pdnxt) {
- u_int pdirindex;
-
- /*
- * Calculate index for next page table.
- */
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- if (pmap->pm_stats.resident_count == 0)
- break;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- PD_CLEAR_VA(pmap, pdirindex, TRUE);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
- }
-
- /*
- * Limit our scan to either the end of the va represented
- * by the current page table page, or to the end of the
- * range being removed.
- */
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
-
- /*
- * The TLB entry for a PG_G mapping is invalidated
- * by pmap_remove_pte().
- */
- if ((*pte & PG_G) == 0)
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva, &free))
- break;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
-out:
- if (anyvalid)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Routine: pmap_remove_all
- * Function:
- * Removes this physical page from
- * all physical maps in which it resides.
- * Reflects back modify bits to the pager.
- *
- * Notes:
- * Original versions of this routine were very
- * inefficient because they iteratively called
- * pmap_remove (slow...)
- */
-
-void
-pmap_remove_all(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- vm_page_t free;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_all: page %p is not managed", m));
- free = NULL;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pmap, pv->pv_va);
- tpte = *pte;
- PT_SET_VA_MA(pte, 0, TRUE);
- KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte",
- pmap, pv->pv_va));
- if (tpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
-
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- pmap_unuse_pt(pmap, pv->pv_va, &free);
- pmap_invalidate_page(pmap, pv->pv_va);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- free_pv_entry(pmap, pv);
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Set the physical protection on the
- * specified range of this map as requested.
- */
-void
-pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- int anychanged;
-
- CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
- pmap, sva, eva, prot);
-
- if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
- pmap_remove(pmap, sva, eva);
- return;
- }
-
-#ifdef PAE
- if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
- (VM_PROT_WRITE|VM_PROT_EXECUTE))
- return;
-#else
- if (prot & VM_PROT_WRITE)
- return;
-#endif
-
- anychanged = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pt_entry_t obits, pbits;
- u_int pdirindex;
-
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- if ((prot & VM_PROT_WRITE) == 0)
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pmap->pm_pdir[pdirindex] |= pg_nx;
-#endif
- anychanged = 1;
- continue;
- }
-
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- vm_page_t m;
-
-retry:
- /*
- * Regardless of whether a pte is 32 or 64 bits in
- * size, PG_RW, PG_A, and PG_M are among the least
- * significant 32 bits.
- */
- obits = pbits = *pte;
- if ((pbits & PG_V) == 0)
- continue;
-
- if ((prot & VM_PROT_WRITE) == 0) {
- if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
- (PG_MANAGED | PG_M | PG_RW)) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- pbits &= ~(PG_RW | PG_M);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pbits |= pg_nx;
-#endif
-
- if (pbits != obits) {
- obits = *pte;
- PT_SET_VA_MA(pte, pbits, TRUE);
- if (*pte != pbits)
- goto retry;
- if (obits & PG_G)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = 1;
- }
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-int
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- u_int flags, int8_t psind __unused)
-{
- pd_entry_t *pde;
- pt_entry_t *pte;
- pt_entry_t newpte, origpte;
- pv_entry_t pv;
- vm_paddr_t opa, pa;
- vm_page_t mpte, om;
- boolean_t invlva, wired;
-
- CTR5(KTR_PMAP,
- "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x flags=0x%x",
- pmap, va, VM_PAGE_TO_MACH(m), prot, flags);
- va = trunc_page(va);
- KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
- KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
- ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
- va));
- if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
- VM_OBJECT_ASSERT_LOCKED(m->object);
-
- mpte = NULL;
- wired = (flags & PMAP_ENTER_WIRED) != 0;
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- sched_pin();
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- mpte = pmap_allocpte(pmap, va, flags);
- if (mpte == NULL) {
- KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
- ("pmap_allocpte failed with sleep allowed"));
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_RESOURCE_SHORTAGE);
- }
- }
-
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_PS) != 0)
- panic("pmap_enter: attempted pmap_enter on 4MB page");
- pte = pmap_pte_quick(pmap, va);
-
- /*
- * Page Directory table entry not valid, we need a new PT page
- */
- if (pte == NULL) {
- panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
- (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
- }
-
- pa = VM_PAGE_TO_PHYS(m);
- om = NULL;
- opa = origpte = 0;
-
-#if 0
- KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
- pte, *pte));
-#endif
- origpte = *pte;
- if (origpte)
- origpte = xpmap_mtop(origpte);
- opa = origpte & PG_FRAME;
-
- /*
- * Mapping has not changed, must be protection or wiring change.
- */
- if (origpte && (opa == pa)) {
- /*
- * Wiring change, just update stats. We don't worry about
- * wiring PT pages as they remain resident as long as there
- * are valid mappings in them. Hence, if a user page is wired,
- * the PT page will be also.
- */
- if (wired && ((origpte & PG_W) == 0))
- pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
- pmap->pm_stats.wired_count--;
-
- /*
- * Remove extra pte reference
- */
- if (mpte)
- mpte->wire_count--;
-
- if (origpte & PG_MANAGED) {
- om = m;
- pa |= PG_MANAGED;
- }
- goto validate;
- }
-
- pv = NULL;
-
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (origpte & PG_MANAGED) {
- om = PHYS_TO_VM_PAGE(opa);
- pv = pmap_pvh_remove(&om->md, pmap, va);
- } else if (va < VM_MAXUSER_ADDRESS)
- printf("va=0x%x is unmanaged :-( \n", va);
-
- if (mpte != NULL) {
- mpte->wire_count--;
- KASSERT(mpte->wire_count > 0,
- ("pmap_enter: missing reference to page table page,"
- " va: 0x%x", va));
- }
- } else
- pmap->pm_stats.resident_count++;
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
- ("pmap_enter: managed mapping within the clean submap"));
- if (pv == NULL)
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pa |= PG_MANAGED;
- } else if (pv != NULL)
- free_pv_entry(pmap, pv);
-
- /*
- * Increment counters
- */
- if (wired)
- pmap->pm_stats.wired_count++;
-
-validate:
- /*
- * Now validate mapping with desired protection/wiring.
- */
- newpte = (pt_entry_t)(pa | PG_V);
- if ((prot & VM_PROT_WRITE) != 0) {
- newpte |= PG_RW;
- if ((newpte & PG_MANAGED) != 0)
- vm_page_aflag_set(m, PGA_WRITEABLE);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- newpte |= pg_nx;
-#endif
- if (wired)
- newpte |= PG_W;
- if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
- if (pmap == kernel_pmap)
- newpte |= pgeflag;
-
- critical_enter();
- /*
- * if the mapping or permission bits are different, we need
- * to update the pte.
- */
- if ((origpte & ~(PG_M|PG_A)) != newpte) {
- if (origpte) {
- invlva = FALSE;
- origpte = *pte;
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- if (origpte & PG_A) {
- if (origpte & PG_MANAGED)
- vm_page_aflag_set(om, PGA_REFERENCED);
- if (opa != VM_PAGE_TO_PHYS(m))
- invlva = TRUE;
-#ifdef PAE
- if ((origpte & PG_NX) == 0 &&
- (newpte & PG_NX) != 0)
- invlva = TRUE;
-#endif
- }
- if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((origpte & PG_MANAGED) != 0)
- vm_page_dirty(om);
- if ((prot & VM_PROT_WRITE) == 0)
- invlva = TRUE;
- }
- if ((origpte & PG_MANAGED) != 0 &&
- TAILQ_EMPTY(&om->md.pv_list))
- vm_page_aflag_clear(om, PGA_WRITEABLE);
- if (invlva)
- pmap_invalidate_page(pmap, va);
- } else{
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- }
-
- }
- PT_UPDATES_FLUSH();
- critical_exit();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_SUCCESS);
-}
-
-/*
- * Maps a sequence of resident pages belonging to the same object.
- * The sequence begins with the given page m_start. This page is
- * mapped at the given virtual address start. Each subsequent page is
- * mapped at a virtual address that is offset from start by the same
- * amount as the page is offset from m_start within the object. The
- * last page in the sequence is the page with the largest offset from
- * m_start that can be mapped at a virtual address less than the given
- * virtual address end. Not every virtual page between start and end
- * is mapped; only those for which a resident page exists with the
- * corresponding offset from m_start are mapped.
- */
-void
-pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
- vm_page_t m_start, vm_prot_t prot)
-{
- vm_page_t m, mpte;
- vm_pindex_t diff, psize;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error, count = 0;
-
- VM_OBJECT_ASSERT_LOCKED(m_start->object);
-
- psize = atop(end - start);
- mpte = NULL;
- m = m_start;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
- mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
- prot, mpte);
- m = TAILQ_NEXT(m, listq);
- if (count == 16) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- mclp = mcl;
- count = 0;
- }
- }
- if (count) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * but is *MUCH* faster than pmap_enter...
- */
-
-void
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
-{
- multicall_entry_t mcl, *mclp;
- int count = 0;
- mclp = &mcl;
-
- CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
- pmap, va, m, prot);
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
- if (count)
- HYPERVISOR_multicall(&mcl, count);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-#ifdef notyet
-void
-pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
-{
- int i, error, index = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
-
- PMAP_LOCK(pmap);
- for (i = 0; i < count; i++, addrs++, pages++, prots++) {
- if (!pmap_is_prefaultable_locked(pmap, *addrs))
- continue;
-
- (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
- if (index == 16) {
- error = HYPERVISOR_multicall(mcl, index);
- mclp = mcl;
- index = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (index) {
- error = HYPERVISOR_multicall(mcl, index);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
- PMAP_UNLOCK(pmap);
-}
-#endif
-
-static vm_page_t
-pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte)
-{
- pt_entry_t *pte;
- vm_paddr_t pa;
- vm_page_t free;
- multicall_entry_t *mcl = *mclpp;
-
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
- (m->oflags & VPO_UNMANAGED) != 0,
- ("pmap_enter_quick_locked: managed mapping within the clean submap"));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- u_int ptepindex;
- pd_entry_t ptema;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
- if (mpte && (mpte->pindex == ptepindex)) {
- mpte->wire_count++;
- } else {
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * If the page table page is mapped, we just increment
- * the hold count, and activate it.
- */
- if (ptema & PG_V) {
- if (ptema & PG_PS)
- panic("pmap_enter_quick: unexpected mapping into 4MB page");
- mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- mpte->wire_count++;
- } else {
- mpte = _pmap_allocpte(pmap, ptepindex,
- PMAP_ENTER_NOSLEEP);
- if (mpte == NULL)
- return (mpte);
- }
- }
- } else {
- mpte = NULL;
- }
-
- /*
- * This call to vtopte makes the assumption that we are
- * entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
- * But that isn't as quick as vtopte.
- */
- KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
- pte = vtopte(va);
- if (*pte & PG_V) {
- if (mpte != NULL) {
- mpte->wire_count--;
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0 &&
- !pmap_try_insert_pv_entry(pmap, va, m)) {
- if (mpte != NULL) {
- free = NULL;
- if (pmap_unwire_ptp(pmap, mpte, &free)) {
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- }
-
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Increment counters
- */
- pmap->pm_stats.resident_count++;
-
- pa = VM_PAGE_TO_PHYS(m);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pa |= pg_nx;
-#endif
-
-#if 0
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pte_store(pte, pa | PG_V | PG_U);
- else
- pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-#else
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pa = xpmap_ptom(pa | PG_V | PG_U);
- else
- pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
-
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = va;
- mcl->args[1] = (uint32_t)(pa & 0xffffffff);
- mcl->args[2] = (uint32_t)(pa >> 32);
- mcl->args[3] = 0;
- *mclpp = mcl + 1;
- *count = *count + 1;
-#endif
- return (mpte);
-}
-
-/*
- * Make a temporary mapping for a physical address. This is only intended
- * to be used for panic dumps.
- */
-void *
-pmap_kenter_temporary(vm_paddr_t pa, int i)
-{
- vm_offset_t va;
- vm_paddr_t ma = xpmap_ptom(pa);
-
- va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
- PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
- invlpg(va);
- return ((void *)crashdumpmap);
-}
-
-/*
- * This code maps large physical mmap regions into the
- * processor address space. Note that some shortcuts
- * are taken, but the code works.
- */
-void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
- vm_pindex_t pindex, vm_size_t size)
-{
- pd_entry_t *pde;
- vm_paddr_t pa, ptepa;
- vm_page_t p;
- int pat_mode;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
- ("pmap_object_init_pt: non-device object"));
- if (pseflag &&
- (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
- if (!vm_object_populate(object, pindex, pindex + atop(size)))
- return;
- p = vm_page_lookup(object, pindex);
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- pat_mode = p->md.pat_mode;
-
- /*
- * Abort the mapping if the first page is not physically
- * aligned to a 2/4MB page boundary.
- */
- ptepa = VM_PAGE_TO_PHYS(p);
- if (ptepa & (NBPDR - 1))
- return;
-
- /*
- * Skip the first page. Abort the mapping if the rest of
- * the pages are not physically contiguous or have differing
- * memory attributes.
- */
- p = TAILQ_NEXT(p, listq);
- for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
- pa += PAGE_SIZE) {
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- if (pa != VM_PAGE_TO_PHYS(p) ||
- pat_mode != p->md.pat_mode)
- return;
- p = TAILQ_NEXT(p, listq);
- }
-
- /*
- * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and
- * "size" is a multiple of 2/4M, adding the PAT setting to
- * "pa" will not affect the termination of this loop.
- */
- PMAP_LOCK(pmap);
- for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
- size; pa += NBPDR) {
- pde = pmap_pde(pmap, addr);
- if (*pde == 0) {
- pde_store(pde, pa | PG_PS | PG_M | PG_A |
- PG_U | PG_RW | PG_V);
- pmap->pm_stats.resident_count += NBPDR /
- PAGE_SIZE;
- pmap_pde_mappings++;
- }
- /* Else continue on if the PDE is already valid. */
- addr += NBPDR;
- }
- PMAP_UNLOCK(pmap);
- }
-}
-
-/*
- * Clear the wired attribute from the mappings for the specified range of
- * addresses in the given pmap. Every valid mapping within that range
- * must have the wired attribute set. In contrast, invalid mappings
- * cannot have the wired attribute set, so they are ignored.
- *
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
- */
-void
-pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t *pde;
- pt_entry_t *pte;
-
- CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva,
- eva);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- pde = pmap_pde(pmap, sva);
- if ((*pde & PG_V) == 0)
- continue;
- if ((*pde & PG_PS) != 0)
- panic("pmap_unwire: unexpected PG_PS in pde %#jx",
- (uintmax_t)*pde);
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
- if ((*pte & PG_W) == 0)
- panic("pmap_unwire: pte %#jx is missing PG_W",
- (uintmax_t)*pte);
- PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE);
- pmap->pm_stats.wired_count--;
- }
- }
- if (*PMAP1)
- PT_CLEAR_VA(PMAP1, FALSE);
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-
-/*
- * Copy the range specified by src_addr/len
- * from the source map to the range dst_addr/len
- * in the destination map.
- *
- * This routine is only advisory and need not do anything.
- */
-
-void
-pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
- vm_offset_t src_addr)
-{
- vm_page_t free;
- vm_offset_t addr;
- vm_offset_t end_addr = src_addr + len;
- vm_offset_t pdnxt;
-
- if (dst_addr != src_addr)
- return;
-
- if (!pmap_is_current(src_pmap)) {
- CTR2(KTR_PMAP,
- "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
- (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
-
- return;
- }
- CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
- dst_pmap, src_pmap, dst_addr, len, src_addr);
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- rw_wlock(&pvh_global_lock);
- if (dst_pmap < src_pmap) {
- PMAP_LOCK(dst_pmap);
- PMAP_LOCK(src_pmap);
- } else {
- PMAP_LOCK(src_pmap);
- PMAP_LOCK(dst_pmap);
- }
- sched_pin();
- for (addr = src_addr; addr < end_addr; addr = pdnxt) {
- pt_entry_t *src_pte, *dst_pte;
- vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- u_int ptepindex;
-
- KASSERT(addr < UPT_MIN_ADDRESS,
- ("pmap_copy: invalid to pmap_copy page tables"));
-
- pdnxt = (addr + NBPDR) & ~PDRMASK;
- if (pdnxt < addr)
- pdnxt = end_addr;
- ptepindex = addr >> PDRSHIFT;
-
- srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
- if (srcptepaddr == 0)
- continue;
-
- if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0) {
- PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
- dst_pmap->pm_stats.resident_count +=
- NBPDR / PAGE_SIZE;
- }
- continue;
- }
-
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
- KASSERT(srcmpte->wire_count > 0,
- ("pmap_copy: source page table page is unused"));
-
- if (pdnxt > end_addr)
- pdnxt = end_addr;
-
- src_pte = vtopte(addr);
- while (addr < pdnxt) {
- pt_entry_t ptetemp;
- ptetemp = *src_pte;
- /*
- * we only virtual copy managed pages
- */
- if ((ptetemp & PG_MANAGED) != 0) {
- dstmpte = pmap_allocpte(dst_pmap, addr,
- PMAP_ENTER_NOSLEEP);
- if (dstmpte == NULL)
- goto out;
- dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0 &&
- pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
- /*
- * Clear the wired, modified, and
- * accessed (referenced) bits
- * during the copy.
- */
- KASSERT(ptetemp != 0, ("src_pte not set"));
- PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
- KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
- ("no pmap copy expected: 0x%jx saw: 0x%jx",
- ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte));
- dst_pmap->pm_stats.resident_count++;
- } else {
- free = NULL;
- if (pmap_unwire_ptp(dst_pmap, dstmpte,
- &free)) {
- pmap_invalidate_page(dst_pmap,
- addr);
- pmap_free_zero_pages(free);
- }
- goto out;
- }
- if (dstmpte->wire_count >= srcmpte->wire_count)
- break;
- }
- addr += PAGE_SIZE;
- src_pte++;
- }
- }
-out:
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(src_pmap);
- PMAP_UNLOCK(dst_pmap);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static __inline void
-pagezero(void *page)
-{
-#if defined(I686_CPU)
- if (cpu_class == CPUCLASS_686) {
-#if defined(CPU_ENABLE_SSE)
- if (cpu_feature & CPUID_SSE2)
- sse2_pagezero(page);
- else
-#endif
- i686_pagezero(page);
- } else
-#endif
- bzero(page, PAGE_SIZE);
-}
-
-/*
- * pmap_zero_page zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- */
-void
-pmap_zero_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(sysmaps->CADDR2);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_area zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- *
- * off and size may not cover an area beyond a single hardware page.
- */
-void
-pmap_zero_page_area(vm_page_t m, int off, int size)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page_area: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-
- if (off == 0 && size == PAGE_SIZE)
- pagezero(sysmaps->CADDR2);
- else
- bzero((char *)sysmaps->CADDR2 + off, size);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_idle zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents. This
- * is intended to be called from the vm_pagezero process only and
- * outside of Giant.
- */
-void
-pmap_zero_page_idle(vm_page_t m)
-{
-
- if (*CMAP3)
- panic("pmap_zero_page_idle: CMAP3 busy");
- sched_pin();
- PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(CADDR3);
- PT_SET_MA(CADDR3, 0);
- sched_unpin();
-}
-
-/*
- * pmap_copy_page copies the specified (machine independent)
- * page by mapping the page into virtual memory and using
- * bcopy to copy the page, one machine dependent page at a
- * time.
- */
-void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1)
- panic("pmap_copy_page: CMAP1 busy");
- if (*sysmaps->CMAP2)
- panic("pmap_copy_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
- bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-int unmapped_buf_allowed = 1;
-
-void
-pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
- vm_offset_t b_offset, int xfersize)
-{
- struct sysmaps *sysmaps;
- vm_page_t a_pg, b_pg;
- char *a_cp, *b_cp;
- vm_offset_t a_pg_offset, b_pg_offset;
- int cnt;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1 != 0)
- panic("pmap_copy_pages: CMAP1 busy");
- if (*sysmaps->CMAP2 != 0)
- panic("pmap_copy_pages: CMAP2 busy");
- sched_pin();
- while (xfersize > 0) {
- a_pg = ma[a_offset >> PAGE_SHIFT];
- a_pg_offset = a_offset & PAGE_MASK;
- cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
- b_pg = mb[b_offset >> PAGE_SHIFT];
- b_pg_offset = b_offset & PAGE_MASK;
- cnt = min(cnt, PAGE_SIZE - b_pg_offset);
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M);
- a_cp = sysmaps->CADDR1 + a_pg_offset;
- b_cp = sysmaps->CADDR2 + b_pg_offset;
- bcopy(a_cp, b_cp, cnt);
- a_offset += cnt;
- b_offset += cnt;
- xfersize -= cnt;
- }
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * Returns true if the pmap's pv is one of the first
- * 16 pvs linked to from this page. This count may
- * be changed upwards or downwards in the future; it
- * is only necessary that true be returned for a small
- * subset of pmaps for proper page aging.
- */
-boolean_t
-pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
-{
- pv_entry_t pv;
- int loops = 0;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_page_exists_quick: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- if (PV_PMAP(pv) == pmap) {
- rv = TRUE;
- break;
- }
- loops++;
- if (loops >= 16)
- break;
- }
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_page_wired_mappings:
- *
- * Return the number of managed mappings to the given physical page
- * that are wired.
- */
-int
-pmap_page_wired_mappings(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- int count;
-
- count = 0;
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (count);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_W) != 0)
- count++;
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (count);
-}
-
-/*
- * Returns TRUE if the given page is mapped. Otherwise, returns FALSE.
- */
-boolean_t
-pmap_page_is_mapped(vm_page_t m)
-{
-
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (FALSE);
- return (!TAILQ_EMPTY(&m->md.pv_list));
-}
-
-/*
- * Remove all pages from specified address space
- * this aids process exit speeds. Also, this code
- * is special cased for current process only, but
- * can have the more generic (and slightly slower)
- * mode enabled. This is much faster than pmap_remove
- * in the case of running down an entire address space.
- */
-void
-pmap_remove_pages(pmap_t pmap)
-{
- pt_entry_t *pte, tpte;
- vm_page_t m, free = NULL;
- pv_entry_t pv;
- struct pv_chunk *pc, *npc;
- int field, idx;
- int32_t bit;
- uint32_t inuse, bitmask;
- int allfree;
-
- CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
-
- if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
- printf("warning: pmap_remove_pages called with non-current pmap\n");
- return;
- }
- rw_wlock(&pvh_global_lock);
- KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
- PMAP_LOCK(pmap);
- sched_pin();
- TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
- KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap,
- pc->pc_pmap));
- allfree = 1;
- for (field = 0; field < _NPCM; field++) {
- inuse = ~pc->pc_map[field] & pc_freemask[field];
- while (inuse != 0) {
- bit = bsfl(inuse);
- bitmask = 1UL << bit;
- idx = field * 32 + bit;
- pv = &pc->pc_pventry[idx];
- inuse &= ~bitmask;
-
- pte = vtopte(pv->pv_va);
- tpte = *pte ? xpmap_mtop(*pte) : 0;
-
- if (tpte == 0) {
- printf(
- "TPTE at %p IS ZERO @ VA %08x\n",
- pte, pv->pv_va);
- panic("bad pte");
- }
-
-/*
- * We cannot remove wired pages from a process' mapping at this time
- */
- if (tpte & PG_W) {
- allfree = 0;
- continue;
- }
-
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- KASSERT(m->phys_addr == (tpte & PG_FRAME),
- ("vm_page_t %p phys_addr mismatch %016jx %016jx",
- m, (uintmax_t)m->phys_addr,
- (uintmax_t)tpte));
-
- KASSERT(m < &vm_page_array[vm_page_array_size],
- ("pmap_remove_pages: bad tpte %#jx",
- (uintmax_t)tpte));
-
-
- PT_CLEAR_VA(pte, FALSE);
-
- /*
- * Update the vm_page_t clean/reference bits.
- */
- if (tpte & PG_M)
- vm_page_dirty(m);
-
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-
- pmap_unuse_pt(pmap, pv->pv_va, &free);
-
- /* Mark free */
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc->pc_map[field] |= bitmask;
- pmap->pm_stats.resident_count--;
- }
- }
- PT_UPDATES_FLUSH();
- if (allfree) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
- sched_unpin();
- pmap_invalidate_all(pmap);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * pmap_is_modified:
- *
- * Return whether or not the specified physical page was modified
- * in any physical maps.
- */
-boolean_t
-pmap_is_modified(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_modified: page %p is not managed", m));
- rv = FALSE;
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
- * is clear, no PTEs can have PG_M set.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return (rv);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & PG_M) != 0;
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_is_prefaultable:
- *
- * Return whether or not the specified virtual address is elgible
- * for prefault.
- */
-static boolean_t
-pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
-{
- pt_entry_t *pte;
- boolean_t rv = FALSE;
-
- return (rv);
-
- if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
- pte = vtopte(addr);
- rv = (*pte == 0);
- }
- return (rv);
-}
-
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
-{
- boolean_t rv;
-
- PMAP_LOCK(pmap);
- rv = pmap_is_prefaultable_locked(pmap, addr);
- PMAP_UNLOCK(pmap);
- return (rv);
-}
-
-boolean_t
-pmap_is_referenced(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_referenced: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-void
-pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
- rw_wunlock(&pvh_global_lock);
- PMAP_MARK_PRIV(xpmap_mtop(*pte));
- pmap_pte_release(pte);
- }
-}
-
-void
-pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
- rw_wunlock(&pvh_global_lock);
- pmap_pte_release(pte);
- }
-}
-
-/*
- * Clear the write and modified bits in each of the given page's mappings.
- */
-void
-pmap_remove_write(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t oldpte, *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_write: page %p is not managed", m));
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * set by another thread while the object is locked. Thus,
- * if PGA_WRITEABLE is clear, no page table entries need updating.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
-retry:
- oldpte = *pte;
- if ((oldpte & PG_RW) != 0) {
- vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
-
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
- PT_SET_VA_MA(pte, newpte, TRUE);
- if (*pte != newpte)
- goto retry;
-
- if ((oldpte & PG_M) != 0)
- vm_page_dirty(m);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * pmap_ts_referenced:
- *
- * Return a count of reference bits for a page, clearing those bits.
- * It is not necessary for every reference bit to be cleared, but it
- * is necessary that 0 only be returned when there are truly no
- * reference bits set.
- *
- * XXX: The exact number of bits to check and clear is a matter that
- * should be tested and standardized at some point in the future for
- * optimal aging of shared pages.
- */
-int
-pmap_ts_referenced(vm_page_t m)
-{
- pv_entry_t pv, pvf, pvn;
- pmap_t pmap;
- pt_entry_t *pte;
- int rtval = 0;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_ts_referenced: page %p is not managed", m));
- rw_wlock(&pvh_global_lock);
- sched_pin();
- if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pvf = pv;
- do {
- pvn = TAILQ_NEXT(pv, pv_next);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_A) != 0) {
- PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- rtval++;
- if (rtval > 4)
- pvn = NULL;
- }
- PMAP_UNLOCK(pmap);
- } while ((pv = pvn) != NULL && pv != pvf);
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rtval);
-}
-
-/*
- * Apply the given advice to the specified range of addresses within the
- * given pmap. Depending on the advice, clear the referenced and/or
- * modified flags in each mapping and set the mapped page's dirty field.
- */
-void
-pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
-{
- pd_entry_t oldpde;
- pt_entry_t *pte;
- vm_offset_t pdnxt;
- vm_page_t m;
- boolean_t anychanged;
-
- if (advice != MADV_DONTNEED && advice != MADV_FREE)
- return;
- anychanged = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- oldpde = pmap->pm_pdir[sva >> PDRSHIFT];
- if ((oldpde & (PG_PS | PG_V)) != PG_V)
- continue;
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
- PG_V))
- continue;
- else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if (advice == MADV_DONTNEED) {
- /*
- * Future calls to pmap_is_modified()
- * can be avoided by making the page
- * dirty now.
- */
- m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE);
- } else if ((*pte & PG_A) != 0)
- PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE);
- else
- continue;
- if ((*pte & PG_G) != 0)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = TRUE;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Clear the modify bits on the specified physical page.
- */
-void
-pmap_clear_modify(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_clear_modify: page %p is not managed", m));
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(!vm_page_xbusied(m),
- ("pmap_clear_modify: page %p is exclusive busied", m));
-
- /*
- * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
- * If the object containing the page is locked and the page is not
- * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
- */
- if ((m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_M is among the least significant
- * 32 bits.
- */
- PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * Miscellaneous support routines follow
- */
-
-/*
- * Map a set of physical memory pages into the kernel virtual
- * address space. Return a pointer to where it is mapped. This
- * routine is intended to be used for mapping device memory,
- * NOT real memory.
- */
-void *
-pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
-{
- vm_offset_t va, offset;
- vm_size_t tmpsize;
-
- offset = pa & PAGE_MASK;
- size = round_page(offset + size);
- pa = pa & PG_FRAME;
-
- if (pa < KERNLOAD && pa + size <= KERNLOAD)
- va = KERNBASE + pa;
- else
- va = kva_alloc(size);
- if (!va)
- panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
- for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
- pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
- pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size, FALSE);
- return ((void *)(va + offset));
-}
-
-void *
-pmap_mapdev(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
-}
-
-void *
-pmap_mapbios(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
-}
-
-void
-pmap_unmapdev(vm_offset_t va, vm_size_t size)
-{
- vm_offset_t base, offset;
-
- if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
- return;
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
- kva_free(base, size);
-}
-
-/*
- * Sets the memory attribute for the specified page.
- */
-void
-pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
-{
-
- m->md.pat_mode = ma;
- if ((m->flags & PG_FICTITIOUS) != 0)
- return;
-
- /*
- * If "m" is a normal page, flush it from the cache.
- * See pmap_invalidate_cache_range().
- *
- * First, try to find an existing mapping of the page by sf
- * buffer. sf_buf_invalidate_cache() modifies mapping and
- * flushes the cache.
- */
- if (sf_buf_invalidate_cache(m))
- return;
-
- /*
- * If page is not mapped by sf buffer, but CPU does not
- * support self snoop, map the page transient and do
- * invalidation. In the worst case, whole cache is flushed by
- * pmap_invalidate_cache_range().
- */
- if ((cpu_feature & CPUID_SS) == 0)
- pmap_flush_page(m);
-}
-
-static void
-pmap_flush_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
- vm_offset_t sva, eva;
-
- if ((cpu_feature & CPUID_CLFSH) != 0) {
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_flush_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0));
- invlcaddr(sysmaps->CADDR2);
- sva = (vm_offset_t)sysmaps->CADDR2;
- eva = sva + PAGE_SIZE;
-
- /*
- * Use mfence despite the ordering implied by
- * mtx_{un,}lock() because clflush is not guaranteed
- * to be ordered by any other instruction.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
- } else
- pmap_invalidate_cache();
-}
-
-/*
- * Changes the specified virtual address range's memory type to that given by
- * the parameter "mode". The specified virtual address range must be
- * completely contained within either the kernel map.
- *
- * Returns zero if the change completed successfully, and either EINVAL or
- * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
- * of the virtual address range was not mapped, and ENOMEM is returned if
- * there was insufficient memory available to complete the change.
- */
-int
-pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
-{
- vm_offset_t base, offset, tmpva;
- pt_entry_t *pte;
- u_int opte, npte;
- pd_entry_t *pde;
- boolean_t changed;
-
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
-
- /* Only supported on kernel virtual addresses. */
- if (base <= VM_MAXUSER_ADDRESS)
- return (EINVAL);
-
- /* 4MB pages and pages that aren't mapped aren't supported. */
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
- pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde & PG_PS)
- return (EINVAL);
- if ((*pde & PG_V) == 0)
- return (EINVAL);
- pte = vtopte(va);
- if ((*pte & PG_V) == 0)
- return (EINVAL);
- }
-
- changed = FALSE;
-
- /*
- * Ok, all the pages exist and are 4k, so run through them updating
- * their cache mode.
- */
- for (tmpva = base; size > 0; ) {
- pte = vtopte(tmpva);
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PTE, so we can just spin on updating the low 32-bits.
- */
- do {
- opte = *(u_int *)pte;
- npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
- npte |= pmap_cache_bits(mode, 0);
- PT_SET_VA_MA(pte, npte, TRUE);
- } while (npte != opte && (*pte != npte));
- if (npte != opte)
- changed = TRUE;
- tmpva += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
-
- /*
- * Flush CPU caches to make sure any data isn't cached that
- * shouldn't be, etc.
- */
- if (changed) {
- pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
- }
- return (0);
-}
-
-/*
- * perform the pmap work for mincore
- */
-int
-pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
-{
- pt_entry_t *ptep, pte;
- vm_paddr_t pa;
- int val;
-
- PMAP_LOCK(pmap);
-retry:
- ptep = pmap_pte(pmap, addr);
- pte = (ptep != NULL) ? PT_GET(ptep) : 0;
- pmap_pte_release(ptep);
- val = 0;
- if ((pte & PG_V) != 0) {
- val |= MINCORE_INCORE;
- if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
- if ((pte & PG_A) != 0)
- val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
- }
- if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
- (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
- (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
- pa = pte & PG_FRAME;
- /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
- if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
- goto retry;
- } else
- PA_UNLOCK_COND(*locked_pa);
- PMAP_UNLOCK(pmap);
- return (val);
-}
-
-void
-pmap_activate(struct thread *td)
-{
- pmap_t pmap, oldpmap;
- u_int cpuid;
- u_int32_t cr3;
-
- critical_enter();
- pmap = vmspace_pmap(td->td_proc->p_vmspace);
- oldpmap = PCPU_GET(curpmap);
- cpuid = PCPU_GET(cpuid);
-#if defined(SMP)
- CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
- CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
-#else
- CPU_CLR(cpuid, &oldpmap->pm_active);
- CPU_SET(cpuid, &pmap->pm_active);
-#endif
-#ifdef PAE
- cr3 = vtophys(pmap->pm_pdpt);
-#else
- cr3 = vtophys(pmap->pm_pdir);
-#endif
- /*
- * pmap_activate is for the current thread on the current cpu
- */
- td->td_pcb->pcb_cr3 = cr3;
- PT_UPDATES_FLUSH();
- load_cr3(cr3);
- PCPU_SET(curpmap, pmap);
- critical_exit();
-}
-
-void
-pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
-{
-}
-
-/*
- * Increase the starting virtual address of the given mapping if a
- * different alignment might result in more superpage mappings.
- */
-void
-pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
- vm_offset_t *addr, vm_size_t size)
-{
- vm_offset_t superpage_offset;
-
- if (size < NBPDR)
- return;
- if (object != NULL && (object->flags & OBJ_COLORED) != 0)
- offset += ptoa(object->pg_color);
- superpage_offset = offset & PDRMASK;
- if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
- (*addr & PDRMASK) == superpage_offset)
- return;
- if ((*addr & PDRMASK) < superpage_offset)
- *addr = (*addr & ~PDRMASK) + superpage_offset;
- else
- *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
-}
-
-void
-pmap_suspend()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * We need to remove the recursive mapping structure from all
- * our pmaps so that Xen doesn't get confused when it restores
- * the page tables. The recursive map lives at page directory
- * index PTDPTDI. We assume that the suspend code has stopped
- * the other vcpus (if any).
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = 0;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-void
-pmap_resume()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * Restore the recursive map that we removed on suspend.
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-#if defined(PMAP_DEBUG)
-pmap_pid_dump(int pid)
-{
- pmap_t pmap;
- struct proc *p;
- int npte = 0;
- int index;
-
- sx_slock(&allproc_lock);
- FOREACH_PROC_IN_SYSTEM(p) {
- if (p->p_pid != pid)
- continue;
-
- if (p->p_vmspace) {
- int i,j;
- index = 0;
- pmap = vmspace_pmap(p->p_vmspace);
- for (i = 0; i < NPDEPTD; i++) {
- pd_entry_t *pde;
- pt_entry_t *pte;
- vm_offset_t base = i << PDRSHIFT;
-
- pde = &pmap->pm_pdir[i];
- if (pde && pmap_pde_v(pde)) {
- for (j = 0; j < NPTEPG; j++) {
- vm_offset_t va = base + (j << PAGE_SHIFT);
- if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
- if (index) {
- index = 0;
- printf("\n");
- }
- sx_sunlock(&allproc_lock);
- return (npte);
- }
- pte = pmap_pte(pmap, va);
- if (pte && pmap_pte_v(pte)) {
- pt_entry_t pa;
- vm_page_t m;
- pa = PT_GET(pte);
- m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
- printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
- va, pa, m->hold_count, m->wire_count, m->flags);
- npte++;
- index++;
- if (index >= 2) {
- index = 0;
- printf("\n");
- } else {
- printf(" ");
- }
- }
- }
- }
- }
- }
- }
- sx_sunlock(&allproc_lock);
- return (npte);
-}
-#endif
-
-#if defined(DEBUG)
-
-static void pads(pmap_t pm);
-void pmap_pvdump(vm_paddr_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pmap_t pm)
-{
- int i, j;
- vm_paddr_t va;
- pt_entry_t *ptep;
-
- if (pm == kernel_pmap)
- return;
- for (i = 0; i < NPDEPTD; i++)
- if (pm->pm_pdir[i])
- for (j = 0; j < NPTEPG; j++) {
- va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
- if (pm == kernel_pmap && va < KERNBASE)
- continue;
- if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
- continue;
- ptep = pmap_pte(pm, va);
- if (pmap_pte_v(ptep))
- printf("%x:%x ", va, *ptep);
- };
-
-}
-
-void
-pmap_pvdump(vm_paddr_t pa)
-{
- pv_entry_t pv;
- pmap_t pmap;
- vm_page_t m;
-
- printf("pa %x", pa);
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
- pads(pmap);
- }
- printf(" ");
-}
-#endif
diff --git a/sys/i386/xen/xen_machdep.c b/sys/i386/xen/xen_machdep.c
deleted file mode 100644
index dbaa7ad7cadb..000000000000
--- a/sys/i386/xen/xen_machdep.c
+++ /dev/null
@@ -1,1236 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004-2006,2008 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/reboot.h>
-#include <sys/rwlock.h>
-#include <sys/sysproto.h>
-#include <sys/boot.h>
-
-#include <xen/xen-os.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
-#include <machine/pcb.h>
-#include <machine/stdarg.h>
-#include <machine/vmparam.h>
-#include <machine/cpu.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/asmacros.h>
-
-
-
-#include <xen/hypervisor.h>
-#include <xen/xenstore/xenstorevar.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenpmap.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/memory.h>
-#include <machine/xen/features.h>
-#ifdef SMP
-#include <machine/privatespace.h>
-#endif
-
-
-#include <vm/vm_page.h>
-
-
-#define IDTVEC(name) __CONCAT(X,name)
-
-extern inthand_t
-IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
- IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
- IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
- IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
-
-
-int xendebug_flags;
-start_info_t *xen_start_info;
-start_info_t *HYPERVISOR_start_info;
-shared_info_t *HYPERVISOR_shared_info;
-xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
-xen_pfn_t *xen_phys_machine;
-xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-int preemptable, init_first;
-extern unsigned int avail_space;
-int xen_vector_callback_enabled = 0;
-enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
-
-void ni_cli(void);
-void ni_sti(void);
-
-
-void
-ni_cli(void)
-{
- CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
- __asm__("pushl %edx;"
- "pushl %eax;"
- );
- __cli();
- __asm__("popl %eax;"
- "popl %edx;"
- );
-}
-
-
-void
-ni_sti(void)
-{
- __asm__("pushl %edx;"
- "pushl %esi;"
- "pushl %eax;"
- );
- __sti();
- __asm__("popl %eax;"
- "popl %esi;"
- "popl %edx;"
- );
-}
-
-void
-force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-
-/*
- * Modify the cmd_line by converting ',' to NULLs so that it is in a format
- * suitable for the static env vars.
- */
-char *
-xen_setbootenv(char *cmd_line)
-{
- char *cmd_line_next;
-
- /* Skip leading spaces */
- for (; *cmd_line == ' '; cmd_line++);
-
- xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
-
- for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
- return cmd_line;
-}
-
-int
-xen_boothowto(char *envp)
-{
- int i, howto = 0;
-
- /* get equivalents from the environment */
- for (i = 0; howto_names[i].ev != NULL; i++)
- if (kern_getenv(howto_names[i].ev) != NULL)
- howto |= howto_names[i].mask;
- return howto;
-}
-
-
-#define XPQUEUE_SIZE 128
-
-struct mmu_log {
- char *file;
- int line;
-};
-
-#ifdef SMP
-/* per-cpu queues and indices */
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-#endif
-
-static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
-static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-
-#define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
-#define XPQ_QUEUE xpq_queue[vcpu]
-#define XPQ_IDX xpq_idx[vcpu]
-#define SET_VCPU() int vcpu = smp_processor_id()
-#else
-
-static mmu_update_t xpq_queue[XPQUEUE_SIZE];
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
-#endif
-static int xpq_idx = 0;
-
-#define XPQ_QUEUE_LOG xpq_queue_log
-#define XPQ_QUEUE xpq_queue
-#define XPQ_IDX xpq_idx
-#define SET_VCPU()
-#endif /* !SMP */
-
-#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-
-#if 0
-static void
-xen_dump_queue(void)
-{
- int _xpq_idx = XPQ_IDX;
- int i;
-
- if (_xpq_idx <= 1)
- return;
-
- xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
- for (i = 0; i < _xpq_idx; i++) {
- xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
- XPQ_QUEUE[i].ptr);
- }
-}
-#endif
-
-
-static __inline void
-_xen_flush_queue(void)
-{
- SET_VCPU();
- int _xpq_idx = XPQ_IDX;
- int error, i;
-
-#ifdef INVARIANTS
- if (__predict_true(gdtset))
- CRITICAL_ASSERT(curthread);
-#endif
-
- XPQ_IDX = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
- _xpq_idx, NULL, DOMID_SELF);
-
-#if 0
- if (__predict_true(gdtset))
- for (i = _xpq_idx; i > 0;) {
- if (i >= 3) {
- CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
- "ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff),
- (XPQ_QUEUE[i-3].val & 0xffffffff),
- (XPQ_QUEUE[i-3].ptr & 0xffffffff));
- i -= 3;
- } else if (i == 2) {
- CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff));
- i = 0;
- } else {
- CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff));
- i = 0;
- }
- }
-#endif
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printf("val: %llx ptr: %llx\n",
- XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
-
-}
-
-void
-xen_flush_queue(void)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- if (XPQ_IDX != 0) _xen_flush_queue();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-static __inline void
-xen_increment_idx(void)
-{
- SET_VCPU();
-
- XPQ_IDX++;
- if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
- xen_flush_queue();
-}
-
-void
-xen_check_queue(void)
-{
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
-}
-
-void
-xen_invlpg(vm_offset_t va)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_ALL;
- op.arg1.linear_addr = va & ~PAGE_MASK;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_load_cr3(u_int val)
-{
- struct mmuext_op op;
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-#ifdef KTR
-static __inline u_int
-rebp(void)
-{
- u_int data;
-
- __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));
- return (data);
-}
-#endif
-
-u_int
-read_eflags(void)
-{
- vcpu_info_t *_vcpu;
- u_int eflags;
-
- eflags = _read_eflags();
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
- if (_vcpu->evtchn_upcall_mask)
- eflags &= ~PSL_I;
-
- return (eflags);
-}
-
-void
-write_eflags(u_int eflags)
-{
- u_int intr;
-
- CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
- intr = ((eflags & PSL_I) == 0);
- __restore_flags(intr);
- _write_eflags(eflags);
-}
-
-void
-xen_cli(void)
-{
- CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
- __cli();
-}
-
-void
-xen_sti(void)
-{
- CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
- __sti();
-}
-
-u_int
-xen_rcr2(void)
-{
-
- return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
-}
-
-void
-_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = pfn;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-extern struct rwlock pvh_global_lock;
-
-void
-_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- rw_assert(&pvh_global_lock, RA_WLOCKED);
-
- KASSERT((ptr & 7) == 0, ("misaligned update"));
-
- if (__predict_true(gdtset))
- critical_enter();
-
- XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-void
-xen_pgdpt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L3_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L2_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L1_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_set_ldt(vm_paddr_t ptr, unsigned long len)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.arg1.linear_addr = ptr;
- op.arg2.nr_ents = len;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
- vm_paddr_t pa;
- pt_entry_t *ptp;
-
- ptp = vtopte((vm_offset_t)table);
- pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
- if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
- panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-
-#if 0
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void
-contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] |=
- ((1UL<<end_off)-1) & -(1UL<<start_off);
- } else {
- contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
- while ( ++curr_idx < end_idx )
- contiguous_bitmap[curr_idx] = ~0UL;
- contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
- }
-}
-
-static void
-contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] &=
- -(1UL<<end_off) | ((1UL<<start_off)-1);
- } else {
- contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
- while ( ++curr_idx != end_idx )
- contiguous_bitmap[curr_idx] = 0;
- contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
- }
-}
-#endif
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int
-xen_create_contiguous_region(vm_page_t pages, int npages)
-{
- unsigned long mfn, i, flags;
- int order;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- balloon_lock(flags);
-
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- /* 1. give away machine pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- mfn = PFNTOMFN(pfn);
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
- }
-
-
- /* 2. Get a new contiguous memory extent. */
- reservation.extent_order = order;
- /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not
- * running with a broxen driver XXXEN
- */
- reservation.address_bits = 31;
- if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
- goto fail;
-
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- xen_machphys_update(mfn+i, pfn);
- PFNTOMFN(pfn) = mfn+i;
- }
-
- xen_tlb_flush();
-
-#if 0
- contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- balloon_unlock(flags);
-
- return 0;
-
- fail:
- reservation.extent_order = 0;
- reservation.address_bits = 0;
-
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation) != 1);
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-
- return ENOMEM;
-}
-
-void
-xen_destroy_contiguous_region(void *addr, int npages)
-{
- unsigned long mfn, i, flags, order, pfn0;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- pfn0 = vtophys(addr) >> PAGE_SHIFT;
-#if 0
- scrub_pages(vstart, 1 << order);
-#endif
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- balloon_lock(flags);
-
-#if 0
- contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val = 0;
- pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
-
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_decrease_reservation, &reservation) != 1);
- }
-
- /* 2. Map new pages in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val;
- pfn = pfn0 + i;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
-
- new_val = mfn << PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE),
- new_val, PG_KERNEL));
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-}
-
-extern vm_offset_t proc0kstack;
-extern int vm86paddr, vm86phystk;
-char *bootmem_start, *bootmem_current, *bootmem_end;
-
-pteinfo_t *pteinfo_list;
-void initvalues(start_info_t *startinfo);
-
-void *
-bootmem_alloc(unsigned int size)
-{
- char *retptr;
-
- retptr = bootmem_current;
- PANIC_IF(retptr + size > bootmem_end);
- bootmem_current += size;
-
- return retptr;
-}
-
-void
-bootmem_free(void *ptr, unsigned int size)
-{
- char *tptr;
-
- tptr = ptr;
- PANIC_IF(tptr != bootmem_current - size ||
- bootmem_current - size < bootmem_start);
-
- bootmem_current -= size;
-}
-
-#if 0
-static vm_paddr_t
-xpmap_mtop2(vm_paddr_t mpa)
-{
- return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
- ) | (mpa & ~PG_FRAME);
-}
-
-static pd_entry_t
-xpmap_get_bootpde(vm_paddr_t va)
-{
-
- return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
-}
-
-static pd_entry_t
-xpmap_get_vbootpde(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_bootpde(va);
- if ((pde & PG_V) == 0)
- return (pde & ~PG_FRAME);
- return (pde & ~PG_FRAME) |
- (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
-}
-
-static pt_entry_t 8*
-xpmap_get_bootptep(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_vbootpde(va);
- if ((pde & PG_V) == 0)
- return (void *)-1;
-#define PT_MASK 0x003ff000 /* page table address bits */
- return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
-}
-
-static pt_entry_t
-xpmap_get_bootpte(vm_paddr_t va)
-{
-
- return xpmap_get_bootptep(va)[0];
-}
-#endif
-
-
-#ifdef ADD_ISA_HOLE
-static void
-shift_phys_machine(unsigned long *phys_machine, int nr_pages)
-{
-
- unsigned long *tmp_page, *current_page, *next_page;
- int i;
-
- tmp_page = bootmem_alloc(PAGE_SIZE);
- current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));
- next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));
- bcopy(phys_machine, tmp_page, PAGE_SIZE);
-
- while (current_page > phys_machine) {
- /* save next page */
- bcopy(next_page, tmp_page, PAGE_SIZE);
- /* shift down page */
- bcopy(current_page, next_page, PAGE_SIZE);
- /* finish swap */
- bcopy(tmp_page, current_page, PAGE_SIZE);
-
- current_page -= (PAGE_SIZE/sizeof(unsigned long));
- next_page -= (PAGE_SIZE/sizeof(unsigned long));
- }
- bootmem_free(tmp_page, PAGE_SIZE);
-
- for (i = 0; i < nr_pages; i++) {
- xen_machphys_update(phys_machine[i], i);
- }
- memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
-
-}
-#endif /* ADD_ISA_HOLE */
-
-/*
- * Build a directory of the pages that make up our Physical to Machine
- * mapping table. The Xen suspend/restore code uses this to find our
- * mapping table.
- */
-static void
-init_frame_list_list(void *arg)
-{
- unsigned long nr_pages = xen_start_info->nr_pages;
-#define FPP (PAGE_SIZE/sizeof(xen_pfn_t))
- int i, j, k;
-
- xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- for (i = 0, j = 0, k = -1; i < nr_pages;
- i += FPP, j++) {
- if ((j & (FPP - 1)) == 0) {
- k++;
- xen_pfn_to_mfn_frame_list[k] =
- malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- xen_pfn_to_mfn_frame_list_list[k] =
- VTOMFN(xen_pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- xen_pfn_to_mfn_frame_list[k][j] =
- VTOMFN(&xen_phys_machine[i]);
- }
-
- HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
- = VTOMFN(xen_pfn_to_mfn_frame_list_list);
-}
-SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
-
-extern unsigned long physfree;
-
-int pdir, curoffset;
-extern int nkpt;
-
-extern uint32_t kernbase;
-
-void
-initvalues(start_info_t *startinfo)
-{
- vm_offset_t cur_space, cur_space_pt;
- struct physdev_set_iopl set_iopl;
-
- int l3_pages, l2_pages, l1_pages, offset;
- vm_paddr_t console_page_ma, xen_store_ma;
- vm_offset_t tmpva;
- vm_paddr_t shinfo;
-#ifdef PAE
- vm_paddr_t IdlePDPTma, IdlePDPTnewma;
- vm_paddr_t IdlePTDnewma[4];
- pd_entry_t *IdlePDPTnew, *IdlePTDnew;
- vm_paddr_t IdlePTDma[4];
-#else
- vm_paddr_t IdlePTDma[1];
-#endif
- unsigned long i;
- int ncpus = MAXCPU;
-
- nkpt = min(
- min(
- max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
- NPGPTD*NPDEPG - KPTDI),
- (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
-#ifdef notyet
- /*
- * need to install handler
- */
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
-#endif
- xen_start_info = startinfo;
- HYPERVISOR_start_info = startinfo;
- xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
-
- IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
- l1_pages = 0;
-
-#ifdef PAE
- l3_pages = 1;
- l2_pages = 0;
- IdlePDPT = (pd_entry_t *)startinfo->pt_base;
- IdlePDPTma = VTOM(startinfo->pt_base);
- for (i = (KERNBASE >> 30);
- (i < 4) && (IdlePDPT[i] != 0); i++)
- l2_pages++;
- /*
- * Note that only one page directory has been allocated at this point.
- * Thus, if KERNBASE
- */
- for (i = 0; i < l2_pages; i++)
- IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
-
- l2_pages = (l2_pages == 0) ? 1 : l2_pages;
-#else
- l3_pages = 0;
- l2_pages = 1;
-#endif
- for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
- (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
-
- if (IdlePTD[i] == 0)
- break;
- l1_pages++;
- }
-
- /* number of pages allocated after the pts + 1*/;
- cur_space = xen_start_info->pt_base +
- (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
-
- xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
- cur_space);
-
- xc_printf("KERNBASE=%x,pt_base=%lx, VTOPFN(base)=%x, nr_pt_frames=%lx\n",
- KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames);
- xendebug_flags = 0; /* 0xffffffff; */
-
-#ifdef ADD_ISA_HOLE
- shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
-#endif
- XENPRINTF("IdlePTD %p\n", IdlePTD);
- XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%x pt_base: 0x%lx "
- "mod_start: 0x%lx mod_len: 0x%lx\n",
- xen_start_info->nr_pages, xen_start_info->shared_info,
- xen_start_info->flags, xen_start_info->pt_base,
- xen_start_info->mod_start, xen_start_info->mod_len);
-
-#ifdef PAE
- IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
- bzero(IdlePDPTnew, PAGE_SIZE);
-
- IdlePDPTnewma = VTOM(IdlePDPTnew);
- IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
- bzero(IdlePTDnew, 4*PAGE_SIZE);
-
- for (i = 0; i < 4; i++)
- IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
- /*
- * L3
- *
- * Copy the 4 machine addresses of the new PTDs in to the PDPT
- *
- */
- for (i = 0; i < 4; i++)
- IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
-
- __asm__("nop;");
- /*
- *
- * re-map the new PDPT read-only
- */
- PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
- /*
- *
- * Unpin the current PDPT
- */
- xen_pt_unpin(IdlePDPTma);
-
-#endif /* PAE */
-
- /* Map proc0's KSTACK */
- proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
- xc_printf("proc0kstack=%u\n", proc0kstack);
-
- /* vm86/bios stack */
- cur_space += PAGE_SIZE;
-
- /* Map space for the vm86 region */
- vm86paddr = (vm_offset_t)cur_space;
- cur_space += (PAGE_SIZE * 3);
-
- /* allocate 4 pages for bootmem allocator */
- bootmem_start = bootmem_current = (char *)cur_space;
- cur_space += (4 * PAGE_SIZE);
- bootmem_end = (char *)cur_space;
-
- /* allocate pages for gdt */
- gdt = (union descriptor *)cur_space;
- cur_space += PAGE_SIZE*ncpus;
-
- /* allocate page for ldt */
- ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
- cur_space += PAGE_SIZE;
-
- /* unmap remaining pages from initial chunk
- *
- */
- for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
- tmpva += PAGE_SIZE) {
- bzero((char *)tmpva, PAGE_SIZE);
- PT_SET_MA(tmpva, (vm_paddr_t)0);
- }
-
- PT_UPDATES_FLUSH();
-
- memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
- ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
- l1_pages*sizeof(pt_entry_t));
-
- for (i = 0; i < 4; i++) {
- PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
- IdlePTDnewma[i] | PG_V);
- }
- xen_load_cr3(VTOP(IdlePDPTnew));
- xen_pgdpt_pin(VTOM(IdlePDPTnew));
-
- /* allocate remainder of nkpt pages */
- cur_space_pt = cur_space;
- for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
- i++, cur_space += PAGE_SIZE) {
- pdir = (offset + i) / NPDEPG;
- curoffset = ((offset + i) % NPDEPG);
- if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
- break;
-
- /*
- * make sure that all the initial page table pages
- * have been zeroed
- */
- PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
- bzero((char *)cur_space, PAGE_SIZE);
- PT_SET_MA(cur_space, (vm_paddr_t)0);
- xen_pt_pin(VTOM(cur_space));
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- VTOM(cur_space) | PG_KERNEL);
- PT_UPDATES_FLUSH();
- }
-
- for (i = 0; i < 4; i++) {
- pdir = (PTDPTDI + i) / NPDEPG;
- curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- IdlePTDnewma[i] | PG_V);
- }
-
- PT_UPDATES_FLUSH();
-
- IdlePTD = IdlePTDnew;
- IdlePDPT = IdlePDPTnew;
- IdlePDPTma = IdlePDPTnewma;
-
- HYPERVISOR_shared_info = (shared_info_t *)cur_space;
- cur_space += PAGE_SIZE;
-
- xen_store = (struct xenstore_domain_interface *)cur_space;
- cur_space += PAGE_SIZE;
-
- console_page = (char *)cur_space;
- cur_space += PAGE_SIZE;
-
- /*
- * shared_info is an unsigned long so this will randomly break if
- * it is allocated above 4GB - I guess people are used to that
- * sort of thing with Xen ... sigh
- */
- shinfo = xen_start_info->shared_info;
- PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
-
- xc_printf("#4\n");
-
- xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
- PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
- console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
- PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
-
- xc_printf("#5\n");
-
- set_iopl.iopl = 1;
- PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
- xc_printf("#6\n");
-#if 0
- /* add page table for KERNBASE */
- xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_KERNEL);
- xen_flush_queue();
-#ifdef PAE
- xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#else
- xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#endif
- xen_flush_queue();
- cur_space += PAGE_SIZE;
- xc_printf("#6\n");
-#endif /* 0 */
-#ifdef notyet
- if (xen_start_info->flags & SIF_INITDOMAIN) {
- /* Map first megabyte */
- for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE)
- PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
- xen_flush_queue();
- }
-#endif
- /*
- * re-map kernel text read-only
- *
- */
- for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
- i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
- PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
-
- xc_printf("#7\n");
- physfree = VTOP(cur_space);
- init_first = physfree >> PAGE_SHIFT;
- IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
- IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
- setup_xen_features();
- xc_printf("#8, proc0kstack=%u\n", proc0kstack);
-}
-
-
-trap_info_t trap_table[] = {
- { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
- { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
- { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
- { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
- /* This is UPL on Linux and KPL on BSD */
- { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
- { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
- { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
- /*
- * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
- * no handler for double fault
- */
- { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
- {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
- {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
- {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
- {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
- {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
- {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
- {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
- {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
- {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
- {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
- {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
- { 0, 0, 0, 0 }
-};
-
-/* Perform a multicall and check that individual calls succeeded. */
-int
-HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
-{
- int ret = 0;
- int i;
-
- /* Perform the multicall. */
- PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
-
- /* Check the results of individual hypercalls. */
- for (i = 0; i < nr_calls; i++)
- if (__predict_false(call_list[i].result < 0))
- ret++;
- if (__predict_false(ret > 0))
- panic("%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
-
- /* If we didn't panic already, everything succeeded. */
- return (0);
-}
-
-/********** CODE WORTH KEEPING ABOVE HERE *****************/
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
- panic("xen_failsafe_handler called!\n");
-}
-
-void xen_handle_thread_switch(struct pcb *pcb);
-
-/* This is called by cpu_switch() when switching threads. */
-/* The pcb arg refers to the process control block of the */
-/* next thread which is to run */
-void
-xen_handle_thread_switch(struct pcb *pcb)
-{
- uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
- uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
- multicall_entry_t mcl[3];
- int i = 0;
-
- /* Notify Xen of task switch */
- mcl[i].op = __HYPERVISOR_stack_switch;
- mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
- mcl[i++].args[1] = (unsigned long)pcb;
-
- /* Check for update of fsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- a += 2;
- b += 2;
-
- /* Check for update of gsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- (void)HYPERVISOR_multicall(mcl, i);
-}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 7a5d93659913..a84019ad8241 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -1455,12 +1455,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
/* Schedule the ithread if needed. */
if (thread) {
error = intr_event_schedule_thread(ie);
-#ifndef XEN
KASSERT(error == 0, ("bad stray interrupt"));
-#else
- if (error != 0)
- log(LOG_WARNING, "bad stray interrupt");
-#endif
}
critical_exit();
td->td_intr_nesting_level--;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index ad10dc786371..a238a0944fbc 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -66,12 +66,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
#define KTDSTATE(td) \
(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \
((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \
@@ -475,9 +469,6 @@ mi_switch(int flags, struct thread *newtd)
"lockname:\"%s\"", td->td_lockname);
#endif
SDT_PROBE0(sched, , , preempt);
-#ifdef XEN
- PT_UPDATES_FLUSH();
-#endif
sched_switch(td, newtd, flags);
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index f662ec268077..cba656aa1884 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -99,11 +99,7 @@ pid_t pid_max = PID_MAX;
long maxswzone; /* max swmeta KVA storage */
long maxbcache; /* max buffer cache KVA storage */
long maxpipekva; /* Limit on pipe KVA */
-#ifdef XEN
-int vm_guest = VM_GUEST_XEN;
-#else
int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */
-#endif
u_long maxtsiz; /* max text size */
u_long dfldsiz; /* initial data size limit */
u_long maxdsiz; /* max data size */
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 744c064f3a1c..93f7557aef63 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -80,12 +80,6 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#endif
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
@@ -136,9 +130,6 @@ userret(struct thread *td, struct trapframe *frame)
* Let the scheduler adjust our priority etc.
*/
sched_userret(td);
-#ifdef XEN
- PT_UPDATES_FLUSH();
-#endif
/*
* Check for misbehavior.
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6cdbbc7520bb..0aec01f64bc2 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -431,14 +431,6 @@ vm_page_startup(vm_offset_t vaddr)
phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
}
-#ifdef XEN
- /*
- * There is no obvious reason why i386 PV Xen needs vm_page structs
- * created for these pseudo-physical addresses. XXX
- */
- vm_phys_add_seg(0, phys_avail[0]);
-#endif
-
low_water = phys_avail[0];
high_water = phys_avail[1];
diff --git a/sys/x86/include/segments.h b/sys/x86/include/segments.h
index a5c1ea46b7b4..65b5870d4a82 100644
--- a/sys/x86/include/segments.h
+++ b/sys/x86/include/segments.h
@@ -46,11 +46,7 @@
*/
#define SEL_RPL_MASK 3 /* requester priv level */
#define ISPL(s) ((s)&3) /* priority level of a selector */
-#ifdef XEN
-#define SEL_KPL 1 /* kernel priority level */
-#else
#define SEL_KPL 0 /* kernel priority level */
-#endif
#define SEL_UPL 3 /* user priority level */
#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
#define SEL_LDT 4 /* local descriptor table */
@@ -244,11 +240,7 @@ union descriptor {
#define GBIOSUTIL_SEL 16 /* BIOS interface (Utility) */
#define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */
#define GNDIS_SEL 18 /* For the NDIS layer */
-#ifdef XEN
-#define NGDT 9
-#else
#define NGDT 19
-#endif
/*
* Entries in the Local Descriptor Table (LDT)
diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c
index f5c1b926a97a..dcdeafa733ca 100644
--- a/sys/x86/x86/busdma_bounce.c
+++ b/sys/x86/x86/busdma_bounce.c
@@ -147,11 +147,6 @@ static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
int flags);
-#ifdef XEN
-#undef pmap_kextract
-#define pmap_kextract pmap_kextract_ma
-#endif
-
/*
* Allocate a device specific dma_tag.
*/
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
index 846a1236c3ea..f8d1f083c244 100644
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -100,15 +100,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pager.h>
#include <vm/vm_param.h>
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-#endif
-
/*
* Machine dependent boot() routine
*
@@ -193,33 +184,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
return (0);
}
-#if defined(__i386__) && defined(XEN)
-
-static void
-idle_block(void)
-{
-
- HYPERVISOR_sched_op(SCHEDOP_block, 0);
-}
-
-void
-cpu_halt(void)
-{
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int scheduler_running;
-
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
-
- scheduler_running = 1;
- enable_intr();
- idle_block();
-}
-
-#else
/*
* Shutdown the CPU as much as possible
*/
@@ -230,8 +194,6 @@ cpu_halt(void)
halt();
}
-#endif
-
void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
@@ -263,7 +225,6 @@ cpu_idle_acpi(sbintime_t sbt)
}
#endif /* !PC98 */
-#if !defined(__i386__) || !defined(XEN)
static void
cpu_idle_hlt(sbintime_t sbt)
{
@@ -295,7 +256,6 @@ cpu_idle_hlt(sbintime_t sbt)
__asm __volatile("sti; hlt");
*state = STATE_RUNNING;
}
-#endif
static void
cpu_idle_mwait(sbintime_t sbt)
@@ -370,7 +330,7 @@ cpu_probe_amdc1e(void)
}
}
-#if defined(__i386__) && (defined(PC98) || defined(XEN))
+#if defined(__i386__) && defined(PC98)
void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
#else
void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
@@ -379,17 +339,15 @@ void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
-#if !defined(__i386__) || !defined(XEN)
uint64_t msr;
-#endif
sbintime_t sbt = -1;
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
-#if defined(MP_WATCHDOG) && (!defined(__i386__) || !defined(XEN))
+#ifdef MP_WATCHDOG
ap_watchdog(PCPU_GET(cpuid));
#endif
-#if !defined(__i386__) || !defined(XEN)
+
/* If we are busy - try to use fast methods. */
if (busy) {
if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
@@ -397,7 +355,6 @@ cpu_idle(int busy)
goto out;
}
}
-#endif
/* If we have time - switch timers into idle mode. */
if (!busy) {
@@ -405,14 +362,12 @@ cpu_idle(int busy)
sbt = cpu_idleclock();
}
-#if !defined(__i386__) || !defined(XEN)
/* Apply AMD APIC timer C1E workaround. */
if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
msr = rdmsr(MSR_AMDK8_IPM);
if (msr & AMDK8_CMPHALT)
wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
}
-#endif
/* Call main idle method. */
cpu_idle_fn(sbt);
@@ -422,9 +377,7 @@ cpu_idle(int busy)
cpu_activeclock();
critical_exit();
}
-#if !defined(__i386__) || !defined(XEN)
out:
-#endif
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
busy, curcpu);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index ca2b5ca92725..9f39d1c82af8 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -1190,7 +1190,6 @@ hook_tsc_freq(void *arg __unused)
SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
-#ifndef XEN
static const char *const vm_bnames[] = {
"QEMU", /* QEMU */
"Plex86", /* Plex86 */
@@ -1281,7 +1280,6 @@ identify_hypervisor(void)
freeenv(p);
}
}
-#endif
/*
* Final stage of CPU identification.
@@ -1314,9 +1312,7 @@ identify_cpu(void)
cpu_feature2 = regs[2];
#endif
-#ifndef XEN
identify_hypervisor();
-#endif
cpu_vendor_id = find_cpu_vendor_id();
/*
diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c
index b27df4a36c34..d81c9132dd8c 100644
--- a/sys/x86/x86/intr_machdep.c
+++ b/sys/x86/x86/intr_machdep.c
@@ -532,13 +532,6 @@ intr_shuffle_irqs(void *arg __unused)
struct intsrc *isrc;
int i;
-#ifdef XEN
- /*
- * Doesn't work yet
- */
- return;
-#endif
-
/* Don't bother on UP. */
if (mp_ncpus == 1)
return;
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index e0656ef1b8dc..d75a45236ea2 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1579,17 +1579,13 @@ apic_setup_io(void *dummy __unused)
* Local APIC must be registered before other PICs and pseudo PICs
* for proper suspend/resume order.
*/
-#ifndef XEN
intr_register_pic(&lapic_pic);
-#endif
retval = best_enum->apic_setup_io();
if (retval != 0)
printf("%s: Failed to setup I/O APICs: returned %d\n",
best_enum->apic_name, retval);
-#ifdef XEN
- return;
-#endif
+
/*
* Finish setting up the local APIC on the BSP once we know
* how to properly program the LINT pins. In particular, this
diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c
index 64979b136ead..3bc4b4399f5c 100644
--- a/sys/x86/xen/xen_intr.c
+++ b/sys/x86/xen/xen_intr.c
@@ -1,7 +1,7 @@
/******************************************************************************
* xen_intr.c
*
- * Xen event and interrupt services for x86 PV and HVM guests.
+ * Xen event and interrupt services for x86 HVM guests.
*
* Copyright (c) 2002-2005, K A Fraser
* Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
@@ -864,10 +864,8 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
u_int to_cpu, vcpu_id;
int error, masked;
-#ifdef XENHVM
if (xen_vector_callback_enabled == 0)
return (EOPNOTSUPP);
-#endif
to_cpu = apic_cpuid(apic_id);
vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id;
diff --git a/sys/x86/xen/xen_nexus.c b/sys/x86/xen/xen_nexus.c
index f25f970c609b..73506fc955f0 100644
--- a/sys/x86/xen/xen_nexus.c
+++ b/sys/x86/xen/xen_nexus.c
@@ -66,14 +66,11 @@ static int
nexus_xen_attach(device_t dev)
{
int error;
-#ifndef XEN
device_t acpi_dev = NULL;
-#endif
nexus_init_resources();
bus_generic_probe(dev);
-#ifndef XEN
if (xen_initial_domain()) {
/* Disable some ACPI devices that are not usable by Dom0 */
acpi_cpu_disabled = true;
@@ -84,13 +81,10 @@ nexus_xen_attach(device_t dev)
if (acpi_dev == NULL)
panic("Unable to add ACPI bus to Xen Dom0");
}
-#endif
error = bus_generic_attach(dev);
-#ifndef XEN
if (xen_initial_domain() && (error == 0))
acpi_install_wakeup_handler(device_get_softc(acpi_dev));
-#endif
return (error);
}