Subject: xen3 arch-x86 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1017:948c933f8839) Patch-mainline: n/a Acked-by: jbeulich@novell.com List of files that don't require modification anymore (and hence removed from this patch), for reference and in case upstream wants to take the forward porting patches: 2.6.26/arch/x86/kernel/crash.c 2.6.30/arch/x86/kernel/acpi/boot.c --- head-2010-04-15.orig/arch/x86/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -111,6 +111,10 @@ endif # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +# Xen subarch support +mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen +mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/ + KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) @@ -149,9 +153,26 @@ boot := arch/x86/boot BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage -PHONY += bzImage $(BOOT_TARGETS) +PHONY += bzImage vmlinuz $(BOOT_TARGETS) + +ifdef CONFIG_XEN +CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) + +ifdef CONFIG_X86_64 +LDFLAGS_vmlinux := -e startup_64 +endif # Default kernel to build +all: vmlinuz + +# KBUILD_IMAGE specifies the target image being built +KBUILD_IMAGE := $(boot)/vmlinuz + +vmlinuz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) +else +# Default kernel to build all: bzImage # KBUILD_IMAGE specify target image being built @@ -167,6 +188,7 @@ endif $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ +endif PHONY += install install: --- head-2010-04-15.orig/arch/x86/boot/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/boot/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -23,6 +23,7 @@ ROOT_DEV := CURRENT SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage +targets += vmlinuz vmlinux-stripped targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed @@ -195,6 +196,14 @@ bzlilo: $(obj)/bzImage cp System.map $(INSTALL_PATH)/ if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi +$(obj)/vmlinuz: $(obj)/vmlinux-stripped FORCE + $(call if_changed,gzip) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +$(obj)/vmlinux-stripped: OBJCOPYFLAGS := -g --strip-unneeded +$(obj)/vmlinux-stripped: vmlinux FORCE + $(call if_changed,objcopy) + install: sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" --- head-2010-04-15.orig/arch/x86/kernel/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/kernel/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -117,9 +117,12 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_X86_XEN) += fixup.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) + obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o @@ -130,4 +133,10 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o + + time_64-$(CONFIG_XEN) += time_32.o + pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o endif + +disabled-obj-$(CONFIG_XEN) := i8259_$(BITS).o reboot.o smpboot_$(BITS).o +%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := --- head-2010-04-15.orig/arch/x86/kernel/acpi/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/kernel/acpi/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -5,6 +5,9 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wake ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o +ifneq ($(CONFIG_PROCESSOR_EXTERNAL_CONTROL),) +obj-$(CONFIG_XEN) += processor_extcntl_xen.o +endif endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin @@ -12,3 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w $(obj)/realmode/wakeup.bin: FORCE $(Q)$(MAKE) $(build)=$(obj)/realmode +disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o --- head-2010-04-15.orig/arch/x86/kernel/cpu/mcheck/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/kernel/cpu/mcheck/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -4,6 +4,7 @@ obj-$(CONFIG_X86_ANCIENT_MCE) += winchip obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_XEON75XX) += mce-xeon75xx.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o +obj-$(CONFIG_X86_XEN_MCE) += mce_dom0.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o --- head-2010-04-15.orig/arch/x86/kernel/cpu/mcheck/mce.c 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/kernel/cpu/mcheck/mce.c 2010-04-15 09:44:40.000000000 +0200 @@ -1149,8 +1149,15 @@ void mce_log_therm_throt_event(__u64 sta * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). + * + * We will disable polling in DOM0 since all CMCI/Polling + * mechanism will be done in XEN for Intel CPUs */ +#if defined (CONFIG_X86_XEN_MCE) +static int check_interval = 0; /* disable polling */ +#else static int check_interval = 5 * 60; /* 5 minutes */ +#endif static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); @@ -1315,6 +1322,7 @@ static int __cpuinit __mcheck_cpu_apply_ /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { +#ifndef CONFIG_XEN if (c->x86 == 15 && banks > 4) { /* * disable GART TBL walk error reporting, which @@ -1323,6 +1331,7 @@ static int __cpuinit __mcheck_cpu_apply_ */ clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } +#endif if (c->x86 <= 17 && mce_bootlog < 0) { /* * Lots of broken BIOS around that don't clear them @@ -1390,6 +1399,7 @@ static void __cpuinit __mcheck_cpu_ancie static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { +#ifndef CONFIG_X86_64_XEN switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); @@ -1400,6 +1410,7 @@ static void __mcheck_cpu_init_vendor(str default: break; } +#endif } static void __mcheck_cpu_init_timer(void) @@ -2096,6 +2107,16 @@ static __init int mcheck_init_device(voi register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); +#ifdef CONFIG_X86_XEN_MCE + if (is_initial_xendomain()) { + /* Register vIRQ handler for MCE LOG processing */ + extern void bind_virq_for_mce(void); + + printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n"); + bind_virq_for_mce(); + } +#endif + return err; } --- head-2010-04-15.orig/arch/x86/kernel/cpu/mtrr/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/kernel/cpu/mtrr/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -1,3 +1,4 @@ obj-y := main.o if.o generic.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o +obj-$(CONFIG_XEN) := main.o if.o --- head-2010-04-15.orig/arch/x86/lib/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/lib/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -41,3 +41,5 @@ else lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif + +lib-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o --- head-2010-04-15.orig/arch/x86/mm/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/mm/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -25,4 +25,6 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BIT obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_XEN) += hypervisor.o + obj-$(CONFIG_MEMTEST) += memtest.o --- head-2010-04-15.orig/arch/x86/oprofile/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/oprofile/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -6,7 +6,14 @@ DRIVER_OBJS = $(addprefix ../../../drive oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifdef CONFIG_XEN +XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ + xenoprofile.o) +oprofile-y := $(DRIVER_OBJS) \ + $(XENOPROF_COMMON_OBJS) xenoprof.o +else oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif --- head-2010-04-15.orig/arch/x86/pci/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/pci/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -4,6 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o +# pcifront should be after mmconfig.o and direct.o as it should only +# take over if direct access to the PCI bus is unavailable +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o --- head-2010-04-15.orig/arch/x86/power/cpu.c 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/power/cpu.c 2010-03-24 15:01:37.000000000 +0100 @@ -126,6 +126,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { +#ifndef CONFIG_X86_NO_TSS int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); @@ -138,7 +139,10 @@ static void fix_processor_context(void) #ifdef CONFIG_X86_64 get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; +#endif +#endif +#ifdef CONFIG_X86_64 syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ --- head-2010-04-15.orig/arch/x86/include/asm/acpi.h 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/include/asm/acpi.h 2010-03-24 15:01:37.000000000 +0100 @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_XEN +#include +#endif + #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -120,6 +124,27 @@ extern unsigned long acpi_wakeup_address /* early initialization routine */ extern void acpi_reserve_wakeup_memory(void); +#ifdef CONFIG_XEN +static inline int acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt_val, + u32 pm1b_cnt_val) +{ + struct xen_platform_op op = { + .cmd = XENPF_enter_acpi_sleep, + .interface_version = XENPF_INTERFACE_VERSION, + .u = { + .enter_acpi_sleep = { + .pm1a_cnt_val = pm1a_cnt_val, + .pm1b_cnt_val = pm1b_cnt_val, + .sleep_state = sleep_state, + }, + }, + }; + + return HYPERVISOR_platform_op(&op); +} +#endif /* CONFIG_XEN */ + /* * Check if the CPU can handle C2 and deeper */ @@ -178,7 +203,9 @@ static inline void disable_acpi(void) { #endif /* !CONFIG_ACPI */ +#ifndef CONFIG_XEN #define ARCH_HAS_POWER_INIT 1 +#endif struct bootnode; --- head-2010-04-15.orig/arch/x86/include/asm/apic.h 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/include/asm/apic.h 2010-03-24 15:01:37.000000000 +0100 @@ -15,7 +15,9 @@ #include #include +#ifndef CONFIG_XEN #define ARCH_APICTIMER_STOPS_ON_C3 1 +#endif /* * Debugging macros --- head-2010-04-15.orig/arch/x86/include/asm/kexec.h 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/include/asm/kexec.h 2010-03-24 15:01:37.000000000 +0100 @@ -163,6 +163,19 @@ struct kimage_arch { }; #endif +/* Under Xen we need to work with machine addresses. These macros give the + * machine address of a certain page to the generic kexec code instead of + * the pseudo physical address which would be given by the default macros. + */ + +#ifdef CONFIG_XEN +#define KEXEC_ARCH_HAS_PAGE_MACROS +#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) +#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) +#define kexec_virt_to_phys(addr) virt_to_machine(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ --- head-2010-04-15.orig/arch/x86/include/asm/types.h 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/include/asm/types.h 2010-03-24 15:01:37.000000000 +0100 @@ -9,7 +9,7 @@ #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; -#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) +#if defined(CONFIG_X86_64) || defined(CONFIG_XEN) || defined(CONFIG_HIGHMEM64G) /* DMA addresses come in 32-bit and 64-bit flavours. */ typedef u64 dma_addr_t; #else --- head-2010-04-15.orig/arch/x86/vdso/Makefile 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/vdso/Makefile 2010-03-24 15:01:37.000000000 +0100 @@ -65,6 +65,8 @@ obj-$(VDSO32-y) += vdso32-syms.lds vdso32.so-$(VDSO32-y) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter +xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 +vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) vdso32-images = $(vdso32.so-y:%=vdso32-%.so) --- head-2010-04-15.orig/arch/x86/vdso/vdso32-setup.c 2010-04-15 09:37:46.000000000 +0200 +++ head-2010-04-15/arch/x86/vdso/vdso32-setup.c 2010-03-24 15:01:37.000000000 +0100 @@ -26,6 +26,10 @@ #include #include +#ifdef CONFIG_XEN +#include +#endif + enum { VDSO_DISABLED = 0, VDSO_ENABLED = 1, @@ -225,6 +229,7 @@ static inline void map_compat_vdso(int m void enable_sep_cpu(void) { +#ifndef CONFIG_XEN int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -239,6 +244,35 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); put_cpu(); +#else + extern asmlinkage void ia32pv_sysenter_target(void); + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, + }; + + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + + get_cpu(); + + if (xen_feature(XENFEAT_supervisor_mode_kernel)) + sysenter.address.eip = (unsigned long)ia32_sysenter_target; + + switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { + case 0: + break; +#if CONFIG_XEN_COMPAT < 0x030200 + case -ENOSYS: + sysenter.type = CALLBACKTYPE_sysenter_deprecated; + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) + break; +#endif + default: + clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); + break; + } +#endif } static struct vm_area_struct gate_vma;