Subject: xen3 arch-x86 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1073:8fe973d8fb98) Patch-mainline: n/a Acked-by: jbeulich@novell.com List of files that don't require modification anymore (and hence removed from this patch), for reference and in case upstream wants to take the forward porting patches: 2.6.26/arch/x86/kernel/crash.c 2.6.30/arch/x86/kernel/acpi/boot.c --- arch/x86/Makefile | 24 +++++++++++++++++++++++- arch/x86/boot/Makefile | 9 +++++++++ arch/x86/include/asm/acpi.h | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/apic.h | 2 ++ arch/x86/include/asm/kexec.h | 13 +++++++++++++ arch/x86/include/asm/topology.h | 2 +- arch/x86/include/asm/types.h | 2 +- arch/x86/kernel/Makefile | 9 +++++++++ arch/x86/kernel/acpi/Makefile | 4 ++++ arch/x86/kernel/cpu/mcheck/Makefile | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/Makefile | 1 + arch/x86/lib/Makefile | 2 ++ arch/x86/mm/Makefile | 2 ++ arch/x86/oprofile/Makefile | 7 +++++++ arch/x86/pci/Makefile | 3 +++ arch/x86/power/cpu.c | 4 ++++ arch/x86/vdso/Makefile | 2 ++ arch/x86/vdso/vdso32-setup.c | 34 ++++++++++++++++++++++++++++++++++ 19 files changed, 166 insertions(+), 3 deletions(-) Index: linux-2.6.38-master/arch/x86/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/Makefile +++ linux-2.6.38-master/arch/x86/Makefile @@ -116,6 +116,10 @@ endif # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +# Xen subarch support +mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen +mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/ + KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) @@ -151,9 +155,26 @@ boot := arch/x86/boot BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage -PHONY += bzImage $(BOOT_TARGETS) +PHONY += bzImage vmlinuz $(BOOT_TARGETS) + +ifdef CONFIG_XEN +CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) + +ifdef CONFIG_X86_64 +LDFLAGS_vmlinux := -e startup_64 +endif # Default kernel to build +all: vmlinuz + +# KBUILD_IMAGE specifies the target image being built +KBUILD_IMAGE := $(boot)/vmlinuz + +vmlinuz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) +else +# Default kernel to build all: bzImage # KBUILD_IMAGE specify target image being built @@ -169,6 +190,7 @@ endif $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ +endif PHONY += install install: Index: linux-2.6.38-master/arch/x86/boot/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/boot/Makefile +++ linux-2.6.38-master/arch/x86/boot/Makefile @@ -23,6 +23,7 @@ ROOT_DEV := CURRENT SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage +targets += vmlinuz vmlinux-stripped targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed @@ -195,6 +196,14 @@ bzlilo: $(obj)/bzImage cp System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi +$(obj)/vmlinuz: $(obj)/vmlinux-stripped FORCE + $(call if_changed,gzip) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +$(obj)/vmlinux-stripped: OBJCOPYFLAGS := -g --strip-unneeded +$(obj)/vmlinux-stripped: vmlinux FORCE + $(call if_changed,objcopy) + install: sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" Index: linux-2.6.38-master/arch/x86/kernel/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/kernel/Makefile +++ linux-2.6.38-master/arch/x86/kernel/Makefile @@ -110,9 +110,12 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_X86_XEN) += fixup.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) + obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o @@ -121,4 +124,10 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o + + time_64-$(CONFIG_XEN) += time_32.o + pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o endif + +disabled-obj-$(CONFIG_XEN) := i8259_$(BITS).o reboot.o smpboot_$(BITS).o +%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := Index: linux-2.6.38-master/arch/x86/kernel/acpi/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/kernel/acpi/Makefile +++ linux-2.6.38-master/arch/x86/kernel/acpi/Makefile @@ -5,6 +5,9 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wake ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o +ifneq ($(CONFIG_PROCESSOR_EXTERNAL_CONTROL),) +obj-$(CONFIG_XEN) += processor_extcntl_xen.o +endif endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin @@ -12,3 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w $(obj)/realmode/wakeup.bin: FORCE $(Q)$(MAKE) $(build)=$(obj)/realmode +disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o Index: linux-2.6.38-master/arch/x86/kernel/cpu/mcheck/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/kernel/cpu/mcheck/Makefile +++ linux-2.6.38-master/arch/x86/kernel/cpu/mcheck/Makefile @@ -3,6 +3,7 @@ obj-y = mce.o mce-severity.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o +obj-$(CONFIG_X86_XEN_MCE) += mce_dom0.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o Index: linux-2.6.38-master/arch/x86/kernel/cpu/mcheck/mce.c =================================================================== --- linux-2.6.38-master.orig/arch/x86/kernel/cpu/mcheck/mce.c +++ linux-2.6.38-master/arch/x86/kernel/cpu/mcheck/mce.c @@ -1146,8 +1146,15 @@ void mce_log_therm_throt_event(__u64 sta * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). + * + * We will disable polling in DOM0 since all CMCI/Polling + * mechanism will be done in XEN for Intel CPUs */ +#if defined (CONFIG_X86_XEN_MCE) +static int check_interval = 0; /* disable polling */ +#else static int check_interval = 5 * 60; /* 5 minutes */ +#endif static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); @@ -1312,6 +1319,7 @@ static int __cpuinit __mcheck_cpu_apply_ /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { +#ifndef CONFIG_XEN if (c->x86 == 15 && banks > 4) { /* * disable GART TBL walk error reporting, which @@ -1320,6 +1328,7 @@ static int __cpuinit __mcheck_cpu_apply_ */ clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } +#endif if (c->x86 <= 17 && mce_bootlog < 0) { /* * Lots of broken BIOS around that don't clear them @@ -1387,6 +1396,7 @@ static void __cpuinit __mcheck_cpu_ancie static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { +#ifndef CONFIG_X86_64_XEN switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); @@ -1397,6 +1407,7 @@ static void __mcheck_cpu_init_vendor(str default: break; } +#endif } static void __mcheck_cpu_init_timer(void) @@ -2142,6 +2153,16 @@ static __init int mcheck_init_device(voi register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); +#ifdef CONFIG_X86_XEN_MCE + if (is_initial_xendomain()) { + /* Register vIRQ handler for MCE LOG processing */ + extern void bind_virq_for_mce(void); + + printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n"); + bind_virq_for_mce(); + } +#endif + return err; } Index: linux-2.6.38-master/arch/x86/kernel/cpu/mtrr/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/kernel/cpu/mtrr/Makefile +++ linux-2.6.38-master/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,3 +1,4 @@ obj-y := main.o if.o generic.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o +obj-$(CONFIG_XEN) := main.o if.o Index: linux-2.6.38-master/arch/x86/lib/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/lib/Makefile +++ linux-2.6.38-master/arch/x86/lib/Makefile @@ -43,3 +43,5 @@ else lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif + +lib-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o Index: linux-2.6.38-master/arch/x86/mm/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/mm/Makefile +++ linux-2.6.38-master/arch/x86/mm/Makefile @@ -26,6 +26,8 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BIT obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_XEN) += hypervisor.o + obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_MEMTEST) += memtest.o Index: linux-2.6.38-master/arch/x86/oprofile/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/oprofile/Makefile +++ linux-2.6.38-master/arch/x86/oprofile/Makefile @@ -6,7 +6,14 @@ DRIVER_OBJS = $(addprefix ../../../drive oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifdef CONFIG_XEN +XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ + xenoprofile.o) +oprofile-y := $(DRIVER_OBJS) \ + $(XENOPROF_COMMON_OBJS) xenoprof.o +else oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif Index: linux-2.6.38-master/arch/x86/pci/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/pci/Makefile +++ linux-2.6.38-master/arch/x86/pci/Makefile @@ -5,6 +5,9 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$ obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o obj-$(CONFIG_PCI_XEN) += xen.o +# pcifront should be after mmconfig.o and direct.o as it should only +# take over if direct access to the PCI bus is unavailable +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o obj-y += fixup.o obj-$(CONFIG_X86_INTEL_CE) += ce4100.o Index: linux-2.6.38-master/arch/x86/power/cpu.c =================================================================== --- linux-2.6.38-master.orig/arch/x86/power/cpu.c +++ linux-2.6.38-master/arch/x86/power/cpu.c @@ -129,6 +129,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { +#ifndef CONFIG_X86_NO_TSS int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); @@ -141,7 +142,10 @@ static void fix_processor_context(void) #ifdef CONFIG_X86_64 get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; +#endif +#endif +#ifdef CONFIG_X86_64 syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ Index: linux-2.6.38-master/arch/x86/include/asm/acpi.h =================================================================== --- linux-2.6.38-master.orig/arch/x86/include/asm/acpi.h +++ linux-2.6.38-master/arch/x86/include/asm/acpi.h @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_XEN +#include +#endif + #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -122,6 +126,27 @@ extern unsigned long acpi_wakeup_address /* early initialization routine */ extern void acpi_reserve_wakeup_memory(void); +#ifdef CONFIG_XEN +static inline int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt_val, + u32 pm1b_cnt_val) +{ + struct xen_platform_op op = { + .cmd = XENPF_enter_acpi_sleep, + .interface_version = XENPF_INTERFACE_VERSION, + .u = { + .enter_acpi_sleep = { + .pm1a_cnt_val = pm1a_cnt_val, + .pm1b_cnt_val = pm1b_cnt_val, + .sleep_state = sleep_state, + }, + }, + }; + + return HYPERVISOR_platform_op(&op); +} +#endif /* CONFIG_XEN */ + /* * Check if the CPU can handle C2 and deeper */ @@ -180,7 +205,9 @@ static inline void disable_acpi(void) { #endif /* !CONFIG_ACPI */ +#ifndef CONFIG_XEN #define ARCH_HAS_POWER_INIT 1 +#endif struct bootnode; Index: linux-2.6.38-master/arch/x86/include/asm/apic.h =================================================================== --- linux-2.6.38-master.orig/arch/x86/include/asm/apic.h +++ linux-2.6.38-master/arch/x86/include/asm/apic.h @@ -15,7 +15,9 @@ #include #include +#ifndef CONFIG_XEN #define ARCH_APICTIMER_STOPS_ON_C3 1 +#endif /* * Debugging macros Index: linux-2.6.38-master/arch/x86/include/asm/kexec.h =================================================================== --- linux-2.6.38-master.orig/arch/x86/include/asm/kexec.h +++ linux-2.6.38-master/arch/x86/include/asm/kexec.h @@ -163,6 +163,19 @@ struct kimage_arch { }; #endif +/* Under Xen we need to work with machine addresses. These macros give the + * machine address of a certain page to the generic kexec code instead of + * the pseudo physical address which would be given by the default macros. + */ + +#ifdef CONFIG_XEN +#define KEXEC_ARCH_HAS_PAGE_MACROS +#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) +#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) +#define kexec_virt_to_phys(addr) virt_to_machine(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ Index: linux-2.6.38-master/arch/x86/include/asm/topology.h =================================================================== --- linux-2.6.38-master.orig/arch/x86/include/asm/topology.h +++ linux-2.6.38-master/arch/x86/include/asm/topology.h @@ -30,7 +30,7 @@ # define ENABLE_TOPO_DEFINES # endif #else -# ifdef CONFIG_SMP +# if defined(CONFIG_SMP) && !defined(CONFIG_XEN) # define ENABLE_TOPO_DEFINES # endif #endif Index: linux-2.6.38-master/arch/x86/include/asm/types.h =================================================================== --- linux-2.6.38-master.orig/arch/x86/include/asm/types.h +++ linux-2.6.38-master/arch/x86/include/asm/types.h @@ -9,7 +9,7 @@ #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; -#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) +#if defined(CONFIG_X86_64) || defined(CONFIG_XEN) || defined(CONFIG_HIGHMEM64G) /* DMA addresses come in 32-bit and 64-bit flavours. */ typedef u64 dma_addr_t; #else Index: linux-2.6.38-master/arch/x86/vdso/Makefile =================================================================== --- linux-2.6.38-master.orig/arch/x86/vdso/Makefile +++ linux-2.6.38-master/arch/x86/vdso/Makefile @@ -65,6 +65,8 @@ obj-$(VDSO32-y) += vdso32-syms.lds vdso32.so-$(VDSO32-y) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter +xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 +vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) vdso32-images = $(vdso32.so-y:%=vdso32-%.so) Index: linux-2.6.38-master/arch/x86/vdso/vdso32-setup.c =================================================================== --- linux-2.6.38-master.orig/arch/x86/vdso/vdso32-setup.c +++ linux-2.6.38-master/arch/x86/vdso/vdso32-setup.c @@ -26,6 +26,10 @@ #include #include +#ifdef CONFIG_XEN +#include +#endif + enum { VDSO_DISABLED = 0, VDSO_ENABLED = 1, @@ -225,6 +229,7 @@ static inline void map_compat_vdso(int m void enable_sep_cpu(void) { +#ifndef CONFIG_XEN int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -239,6 +244,35 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); put_cpu(); +#else + extern asmlinkage void ia32pv_sysenter_target(void); + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, + }; + + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + + get_cpu(); + + if (xen_feature(XENFEAT_supervisor_mode_kernel)) + sysenter.address.eip = (unsigned long)ia32_sysenter_target; + + switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { + case 0: + break; +#if CONFIG_XEN_COMPAT < 0x030200 + case -ENOSYS: + sysenter.type = CALLBACKTYPE_sysenter_deprecated; + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) + break; +#endif + default: + clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); + break; + } +#endif } static struct vm_area_struct gate_vma;