Keeps patches unpacked for easier history browsing

devel-3.9
Joanna Rutkowska 14 years ago
parent f07ec3f05e
commit d1298d1dfb

@ -60,14 +60,14 @@ Source17: apply-patches
Source33: check-for-config-changes
Source60: config.sh
Source100: config-%{build_flavor}
Source102: patches.arch.tar.bz2
Source103: patches.drivers.tar.bz2
Source104: patches.fixes.tar.bz2
Source105: patches.rpmify.tar.bz2
Source106: patches.suse.tar.bz2
Source107: patches.xen.tar.bz2
Source108: patches.addon.tar.bz2
Source109: patches.kernel.org.tar.bz2
Source200: patches.arch
Source201: patches.drivers
Source202: patches.fixes
Source203: patches.rpmify
Source204: patches.suse
Source205: patches.xen
Source206: patches.addon
Source207: patches.kernel.org
BuildRoot: %{_tmppath}/%{name}-%{version}-build
ExclusiveArch: x86_64
@ -84,13 +84,13 @@ fi
SYMBOLS="xen-dom0 xenlinux"
# Unpack all sources and patches
%setup -q -c -T -a 0 -a 102 -a 103 -a 104 -a 105 -a 106 -a 107 -a 108 -a 109
%setup -q -c -T -a 0
mkdir -p %kernel_build_dir
cd linux-%version
%_sourcedir/apply-patches %_sourcedir/series.conf .. $SYMBOLS
%_sourcedir/apply-patches %_sourcedir/series.conf %_sourcedir $SYMBOLS
cd %kernel_build_dir

Binary file not shown.

Binary file not shown.

@ -0,0 +1,69 @@
From: Dimitri Sivanich <sivanich@sgi.com>
Subject: Expose the irq_desc node as /proc/irq/*/node.
References: bnc#566745, fate#306952
Patch-mainline: not yet
This file provides device hardware locality information for apps desiring
to include hardware locality in irq mapping decisions.
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rafael J. Wysocki <rjw@suse.de>
---
Documentation/filesystems/proc.txt | 4 ++++
kernel/irq/proc.c | 23 +++++++++++++++++++++++
2 files changed, 27 insertions(+)
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -566,6 +566,10 @@ The default_smp_affinity mask applies to
IRQs which have not yet been allocated/activated, and hence which lack a
/proc/irq/[0-9]* directory.
+The node file on an SMP system shows the node to which the device using the IRQ
+reports itself as being attached. This hardware locality information does not
+include information about any possible driver locality preference.
+
prof_cpu_mask specifies which CPUs are to be profiled by the system wide
profiler. Default value is ffffffff (all cpus).
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -146,6 +146,26 @@ static const struct file_operations defa
.release = single_release,
.write = default_affinity_write,
};
+
+static int irq_node_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long) m->private);
+
+ seq_printf(m, "%d\n", desc->node);
+ return 0;
+}
+
+static int irq_node_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_node_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations irq_node_proc_fops = {
+ .open = irq_node_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif
static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -230,6 +250,9 @@ void register_irq_proc(unsigned int irq,
/* create /proc/irq/<irq>/smp_affinity */
proc_create_data("smp_affinity", 0600, desc->dir,
&irq_affinity_proc_fops, (void *)(long)irq);
+
+ proc_create_data("node", 0444, desc->dir,
+ &irq_node_proc_fops, (void *)(long)irq);
#endif
proc_create_data("spurious", 0444, desc->dir,

@ -0,0 +1,24 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: acpi: export acpi_os_hotplug_execute
Patch-mainline: not yet
The ACPI dock driver changes require acpi_os_hotplug_execute,
which wasn't exported.
This patch exports it.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
drivers/acpi/osl.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -782,6 +782,7 @@ acpi_status acpi_os_hotplug_execute(acpi
{
return __acpi_os_execute(0, function, context, 1);
}
+EXPORT_SYMBOL(acpi_os_hotplug_execute);
void acpi_os_wait_events_complete(void *context)
{

@ -0,0 +1,67 @@
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Subject: ACPI: EC: Don't degrade to poll mode at storm automatically.
References: bnc#446142
Patch-Mainline: no
Signed-off-by: Thomas Renninger <trenn@suse.de>
Not all users of semi-broken EC devices want to degrade to poll mode, so
give them right to choose.
Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
---
Documentation/kernel-parameters.txt | 5 +++++
drivers/acpi/ec.c | 15 +++++++++++++++
2 files changed, 20 insertions(+)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -691,6 +691,11 @@ and is between 256 and 4096 characters.
eata= [HW,SCSI]
+ ec_intr= [HW,ACPI] ACPI Embedded Controller interrupt mode
+ Format: <int>
+ 0: polling mode
+ non-0: interrupt mode (default)
+
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -118,6 +118,8 @@ static struct acpi_ec {
spinlock_t curr_lock;
} *boot_ec, *first_ec;
+int acpi_ec_intr = 1; /* Default is interrupt mode */
+
static int EC_FLAGS_MSI; /* Out-of-spec MSI controller */
/* --------------------------------------------------------------------------
@@ -754,6 +756,8 @@ static int ec_install_handlers(struct ac
&acpi_ec_gpe_handler, ec);
if (ACPI_FAILURE(status))
return -ENODEV;
+ if (!acpi_ec_intr)
+ set_bit(EC_FLAGS_NO_GPE, &ec->flags);
acpi_set_gpe_type(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
acpi_enable_gpe(NULL, ec->gpe);
status = acpi_install_address_space_handler(ec->handle,
@@ -1034,3 +1038,14 @@ static void __exit acpi_ec_exit(void)
return;
}
#endif /* 0 */
+
+static int __init acpi_ec_set_intr_mode(char *str)
+{
+ if (!get_option(&str, &acpi_ec_intr)) {
+ acpi_ec_intr = 0;
+ return 0;
+ }
+ return 1;
+}
+
+__setup("ec_intr=", acpi_ec_set_intr_mode);

@ -0,0 +1,59 @@
From: Kurt Garloff <garloff@suse.de>
Subject: Use SRAT table rev to use 8bit or 16/32bit PXM fields (ia64)
References: bnc#503038
Patch-mainline: not yet
In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides
32bits for these. The new fields were reserved before.
According to the ACPI spec, the OS must disregrard reserved fields.
ia64 did handle the PXM fields almost consistently, but depending on
sgi's sn2 platform. This patch leaves the sn2 logic in, but does also
use 16/32 bits for PXM if the SRAT has rev 2 or higher.
The patch also adds __init to the two pxm accessor functions, as they
access __initdata now and are called from an __init function only anyway.
Note that the code only uses 16 bits for the PXM field in the processor
proximity field; the patch does not address this as 16 bits are more than
enough.
This is patch 3/3.
Signed-off-by: Kurt Garloff <garloff@suse.de>
---
arch/ia64/kernel/acpi.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -428,22 +428,24 @@ static u32 __devinitdata pxm_flag[PXM_FL
static struct acpi_table_slit __initdata *slit_table;
cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
-static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
+static int __init
+get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
int pxm;
pxm = pa->proximity_domain_lo;
- if (ia64_platform_is("sn2"))
+ if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
pxm += pa->proximity_domain_hi[0] << 8;
return pxm;
}
-static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
+static int __init
+get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
{
int pxm;
pxm = ma->proximity_domain;
- if (!ia64_platform_is("sn2"))
+ if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
pxm &= 0xff;
return pxm;

@ -0,0 +1,52 @@
From: Kurt Garloff <garloff@suse.de>
Subject: Store SRAT table revision
References: bnc#503038
Patch-mainline: not yet
In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides
32bits for these. The new fields were reserved before.
According to the ACPI spec, the OS must disregrard reserved fields.
In order to know whether or not, we must know what version the SRAT
table has.
This patch stores the SRAT table revision for later consumption
by arch specific __init functions.
This is patch 1/3.
Signed-off-by: Kurt Garloff <garloff@suse.de>
---
drivers/acpi/numa.c | 3 +++
include/acpi/acpi_numa.h | 1 +
2 files changed, 4 insertions(+)
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -45,6 +45,8 @@ static int pxm_to_node_map[MAX_PXM_DOMAI
static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
+unsigned char acpi_srat_revision __initdata;
+
int pxm_to_node(int pxm)
{
if (pxm < 0)
@@ -259,6 +261,7 @@ static int __init acpi_parse_srat(struct
return -EINVAL;
srat = (struct acpi_table_srat *)table;
+ acpi_srat_revision = srat->header.revision;
return 0;
}
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -15,6 +15,7 @@ extern int pxm_to_node(int);
extern int node_to_pxm(int);
extern void __acpi_map_pxm_to_node(int, int);
extern int acpi_map_pxm_to_node(int);
+extern unsigned char acpi_srat_revision;
#endif /* CONFIG_ACPI_NUMA */
#endif /* __ACP_NUMA_H */

@ -0,0 +1,42 @@
From: Kurt Garloff <garloff@suse.de>
Subject: Use SRAT table rev to use 8bit or 32bit PXM fields (x86-64)
References: bnc#503038
Patch-mainline: not yet
In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides
32bits for these. The new fields were reserved before.
According to the ACPI spec, the OS must disregrard reserved fields.
x86-64 was rather inconsistent prior to this patch; it used 8 bits
for the pxm field in cpu_affinity, but 32 bits in mem_affinity.
This patch makes it consistent: Either use 8 bits consistently (SRAT
rev 1 or lower) or 32 bits (SRAT rev 2 or higher).
This is patch 2/3.
Signed-off-by: Kurt Garloff <garloff@suse.de>
---
arch/x86/mm/srat_64.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -156,6 +156,8 @@ acpi_numa_processor_affinity_init(struct
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
+ if (acpi_srat_revision >= 2)
+ pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -259,6 +261,8 @@ acpi_numa_memory_affinity_init(struct ac
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
+ if (acpi_srat_revision <= 1)
+ pxm &= 0xff;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");

@ -0,0 +1,105 @@
From: Thomas Renninger <trenn@suse.de>
Subject: Avoid critical temp shutdowns on specific ThinkPad T4x(p) and R40
References: https://bugzilla.novell.com/show_bug.cgi?id=333043
Patch-mainline: not yet
---
drivers/acpi/thermal.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -42,6 +42,7 @@
#include <linux/seq_file.h>
#include <linux/reboot.h>
#include <linux/device.h>
+#include <linux/dmi.h>
#include <asm/uaccess.h>
#include <linux/thermal.h>
#include <acpi/acpi_bus.h>
@@ -1383,6 +1384,66 @@ static void acpi_thermal_guess_offset(st
tz->kelvin_offset = 2732;
}
+static struct dmi_system_id thermal_psv_dmi_table[] = {
+ {
+ .ident = "IBM ThinkPad T41",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T41"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad T42",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T42"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad T43",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T43"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad T41p",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T41p"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad T42p",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T42p"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad T43p",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad T43p"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad R40",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad R40"),
+ },
+ },
+ {
+ .ident = "IBM ThinkPad R50p",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,"ThinkPad R50p"),
+ },
+ },
+ {},
+};
+
static int acpi_thermal_add(struct acpi_device *device)
{
int result = 0;
@@ -1414,6 +1475,18 @@ static int acpi_thermal_add(struct acpi_
if (result)
goto free_memory;
+ if (dmi_check_system(thermal_psv_dmi_table)) {
+ if (tz->trips.passive.flags.valid &&
+ tz->trips.passive.temperature > CELSIUS_TO_KELVIN(85)) {
+ printk (KERN_INFO "Adjust passive trip point from %lu"
+ " to %lu\n",
+ KELVIN_TO_CELSIUS(tz->trips.passive.temperature),
+ KELVIN_TO_CELSIUS(tz->trips.passive.temperature - 150));
+ tz->trips.passive.temperature -= 150;
+ acpi_thermal_set_polling(tz, 5);
+ }
+ }
+
result = acpi_thermal_add_fs(device);
if (result)
goto unregister_thermal_zone;

@ -0,0 +1,118 @@
From: Thomas Renninger <trenn@suse.de>
Subject: Introduce acpi_root_table=rsdt boot param and dmi list to force rsdt
Patch-mainline: not yet
References: http://bugzilla.kernel.org/show_bug.cgi?id=8246
This one is part of a patch series:
acpi_thinkpad_introduce_acpi_root_table_boot_param.patch
acpi_thinkpad_introduce_acpica_rsdt_global_variable.patch
acpi_thinkpad_remove_R40e_c-state_blacklist.patch
Blacklist R40e, R51e and T40, T40p, T41, T41p, T42, T42p, R50 and R50p
ThinkPads to use the RSDT instead of the XSDT.
Update: Jan 12 2009 jeffm
* 2.6.29-rc1 introduced acpi_rsdt_forced. I've updated the patch to issue
a warning that acpi=rsdt is the prefered method of forcing.
* Moved the dmi table stuff to the main dmi table in x86/kernel/acpi/boot.
Update: Apr 10 2009 jeffm
* Removed documentation, since it's deprecated.
Signed-off-by: Thomas Renninger <trenn@suse.de>
Tested-by: Mark Doughty <me@markdoughty.co.uk>
CC: Yakui Zhao <yakui.zhao@intel.com>
---
arch/x86/kernel/acpi/boot.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
drivers/acpi/tables.c | 3 ++
2 files changed, 56 insertions(+)
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1313,6 +1313,21 @@ static int __init dmi_ignore_irq0_timer_
return 0;
}
+static int __init force_acpi_rsdt(const struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=rsdt\n",
+ d->ident);
+ acpi_rsdt_forced = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: "
+ "acpi=rsdt\n");
+ }
+ return 0;
+
+}
+
/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
@@ -1388,6 +1403,32 @@ static struct dmi_system_id __initdata a
DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
},
},
+
+ /*
+ * Boxes that need RSDT as ACPI root table
+ */
+ {
+ .callback = force_acpi_rsdt,
+ .ident = "ThinkPad ", /* R40e, broken C-states */
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "1SET")},
+ },
+ {
+ .callback = force_acpi_rsdt,
+ .ident = "ThinkPad ", /* R50e, slow booting */
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "1WET")},
+ },
+ {
+ .callback = force_acpi_rsdt,
+ .ident = "ThinkPad ", /* T40, T40p, T41, T41p, T42, T42p
+ R50, R50p */
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "1RET")},
+ },
{}
};
@@ -1583,6 +1624,18 @@ static int __init parse_acpi(char *arg)
}
early_param("acpi", parse_acpi);
+/* Alias for acpi=rsdt for compatibility with openSUSE 11.1 and SLE11 */
+static int __init parse_acpi_root_table(char *opt)
+{
+ if (!strcmp(opt, "rsdt")) {
+ acpi_rsdt_forced = 1;
+ printk(KERN_WARNING "acpi_root_table=rsdt is deprecated. "
+ "Please use acpi=rsdt instead.\n");
+ }
+ return 0;
+}
+early_param("acpi_root_table", parse_acpi_root_table);
+
/* FIXME: Using pci= for an ACPI parameter is a travesty. */
static int __init parse_pci(char *arg)
{
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -339,6 +339,9 @@ int __init acpi_table_init(void)
{
acpi_status status;
+ if (acpi_rsdt_forced)
+ printk(KERN_INFO "Using RSDT as ACPI root table\n");
+
status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
if (ACPI_FAILURE(status))
return 1;

@ -0,0 +1,603 @@
From: Russ Anderson <rja@sgi.com>
Subject: ia64: Call migration code on correctable errors v8
References: 415829
Acked-by: schwab@suse.de
Patch-mainline: not yet
Migrate data off pages with correctable memory errors. This patch is the
ia64 specific piece. It connects the CPE handler to the page migration
code. It is implemented as a kernel loadable module, similar to the mca
recovery code (mca_recovery.ko). This allows the feature to be turned off
by uninstalling the module.
Update Jan 19 2009 jeffm:
- isolate_lru_page doesn't put the page on a list anymore
Signed-off-by: Russ Anderson <rja@sgi.com>
---
arch/ia64/Kconfig | 9
arch/ia64/include/asm/mca.h | 6
arch/ia64/include/asm/page.h | 1
arch/ia64/kernel/Makefile | 1
arch/ia64/kernel/cpe_migrate.c | 434 +++++++++++++++++++++++++++++++++++++++++
arch/ia64/kernel/mca.c | 37 +++
6 files changed, 487 insertions(+), 1 deletion(-)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -505,6 +505,15 @@ config ARCH_PROC_KCORE_TEXT
config IA64_MCA_RECOVERY
tristate "MCA recovery from errors other than TLB."
+config IA64_CPE_MIGRATE
+ tristate "Migrate data off pages with correctable errors"
+ default m
+ help
+ Migrate data off pages with correctable memory errors. Selecting
+ Y will build this functionality into the kernel. Selecting M will
+ build this functionality as a kernel loadable module. Installing
+ the module will turn on the functionality.
+
config PERFMON
bool "Performance monitor support"
help
--- a/arch/ia64/include/asm/mca.h
+++ b/arch/ia64/include/asm/mca.h
@@ -142,6 +142,7 @@ extern unsigned long __per_cpu_mca[NR_CP
extern int cpe_vector;
extern int ia64_cpe_irq;
+extern int cpe_poll_enabled;
extern void ia64_mca_init(void);
extern void ia64_mca_cpu_init(void *);
extern void ia64_os_mca_dispatch(void);
@@ -156,11 +157,16 @@ extern void ia64_slave_init_handler(void
extern void ia64_mca_cmc_vector_setup(void);
extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
extern void ia64_unreg_MCA_extension(void);
+extern int ia64_reg_CE_extension(int (*fn)(void *));
+extern void ia64_unreg_CE_extension(void);
extern unsigned long ia64_get_rnat(unsigned long *);
extern void ia64_set_psr_mc(void);
extern void ia64_mca_printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+extern struct list_head badpagelist;
+extern unsigned int total_badpages;
+
struct ia64_mca_notify_die {
struct ia64_sal_os_state *sos;
int *monarch_cpu;
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -121,6 +121,7 @@ extern unsigned long max_low_pfn;
#endif
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+#define phys_to_page(kaddr) (pfn_to_page(kaddr >> PAGE_SHIFT))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PERFMON) += perfmon_defaul
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+obj-$(CONFIG_IA64_CPE_MIGRATE) += cpe_migrate.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
--- /dev/null
+++ b/arch/ia64/kernel/cpe_migrate.c
@@ -0,0 +1,434 @@
+/*
+ * File: cpe_migrate.c
+ * Purpose: Migrate data from physical pages with excessive correctable
+ * errors to new physical pages. Keep the old pages on a discard
+ * list.
+ *
+ * Copyright (C) 2008 SGI - Silicon Graphics Inc.
+ * Copyright (C) 2008 Russ Anderson <rja@sgi.com>
+ */
+
+#include <linux/sysdev.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/vmalloc.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
+#include <linux/memcontrol.h>
+#include <linux/kobject.h>
+
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/mca.h>
+
+#define BADRAM_BASENAME "badram"
+#define CE_HISTORY_LENGTH 30
+
+struct cpe_info {
+ u64 paddr;
+ u16 node;
+};
+static struct cpe_info cpe[CE_HISTORY_LENGTH];
+
+static int cpe_polling_enabled = 1;
+static int cpe_head;
+static int cpe_tail;
+static int work_scheduled;
+static int mstat_cannot_isolate;
+static int mstat_failed_to_discard;
+static int mstat_already_marked;
+static int mstat_already_on_list;
+
+DEFINE_SPINLOCK(cpe_migrate_lock);
+
+static void
+get_physical_address(void *buffer, u64 *paddr, u16 *node)
+{
+ sal_log_record_header_t *rh;
+ sal_log_mem_dev_err_info_t *mdei;
+ ia64_err_rec_t *err_rec;
+ sal_log_platform_err_info_t *plat_err;
+ efi_guid_t guid;
+
+ err_rec = buffer;
+ rh = &err_rec->sal_elog_header;
+ *paddr = 0;
+ *node = 0;
+
+ /*
+ * Make sure it is a corrected error.
+ */
+ if (rh->severity != sal_log_severity_corrected)
+ return;
+
+ plat_err = (sal_log_platform_err_info_t *)&err_rec->proc_err;
+
+ guid = plat_err->mem_dev_err.header.guid;
+ if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+ /*
+ * Memory cpe
+ */
+ mdei = &plat_err->mem_dev_err;
+ if (mdei->valid.oem_data) {
+ if (mdei->valid.physical_addr)
+ *paddr = mdei->physical_addr;
+
+ if (mdei->valid.node) {
+ if (ia64_platform_is("sn2"))
+ *node = nasid_to_cnodeid(mdei->node);
+ else
+ *node = mdei->node;
+ }
+ }
+ }
+}
+
+static struct page *
+alloc_migrate_page(struct page *ignored, unsigned long node, int **x)
+{
+
+ return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+}
+
+static int
+validate_paddr_page(u64 paddr)
+{
+ struct page *page;
+
+ if (!paddr)
+ return -EINVAL;
+
+ if (!ia64_phys_addr_valid(paddr))
+ return -EINVAL;
+
+ if (!pfn_valid(paddr >> PAGE_SHIFT))
+ return -EINVAL;
+
+ page = phys_to_page(paddr);
+ if (PageMemError(page))
+ mstat_already_marked++;
+ return 0;
+}
+
+extern int isolate_lru_page(struct page *);
+static int
+ia64_mca_cpe_move_page(u64 paddr, u32 node)
+{
+ LIST_HEAD(pagelist);
+ struct page *page;
+ int ret;
+
+ ret = validate_paddr_page(paddr);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * convert physical address to page number
+ */
+ page = phys_to_page(paddr);
+
+ migrate_prep();
+ ret = isolate_lru_page(page);
+ if (ret) {
+ mstat_cannot_isolate++;
+ return ret;
+ }
+
+ list_add(&page->lru, &pagelist);
+ ret = migrate_pages(&pagelist, alloc_migrate_page, node, 0);
+ if (ret == 0) {
+ total_badpages++;
+ list_add_tail(&page->lru, &badpagelist);
+ } else {
+ mstat_failed_to_discard++;
+ /*
+ * The page failed to migrate and is not on the bad page list.
+ * Clearing the error bit will allow another attempt to migrate
+ * if it gets another correctable error.
+ */
+ ClearPageMemError(page);
+ }
+
+ return 0;
+}
+
+/*
+ * ia64_mca_cpe_migrate
+ * The worker that does the actual migration. It pulls a
+ * physical address off the list and calls the migration code.
+ */
+static void
+ia64_mca_cpe_migrate(struct work_struct *unused)
+{
+ int ret;
+ u64 paddr;
+ u16 node;
+
+ do {
+ paddr = cpe[cpe_tail].paddr;
+ if (paddr) {
+ /*
+ * There is a valid entry that needs processing.
+ */
+ node = cpe[cpe_tail].node;
+
+ ret = ia64_mca_cpe_move_page(paddr, node);
+ if (ret <= 0)
+ /*
+ * Even though the return status is negative,
+ * clear the entry. If the same address has
+ * another CPE it will be re-added to the list.
+ */
+ cpe[cpe_tail].paddr = 0;
+
+ }
+ if (++cpe_tail >= CE_HISTORY_LENGTH)
+ cpe_tail = 0;
+
+ } while (cpe_tail != cpe_head);
+ work_scheduled = 0;
+}
+
+static DECLARE_WORK(cpe_enable_work, ia64_mca_cpe_migrate);
+DEFINE_SPINLOCK(cpe_list_lock);
+
+/*
+ * cpe_setup_migrate
+ * Get the physical address out of the CPE record, add it
+ * to the list of addresses to migrate (if not already on),
+ * and schedule the back end worker task. This is called
+ * in interrupt context so cannot directly call the migration
+ * code.
+ *
+ * Inputs
+ * rec The CPE record
+ * Outputs
+ * 1 on Success, -1 on failure
+ */
+static int
+cpe_setup_migrate(void *rec)
+{
+ u64 paddr;
+ u16 node;
+ /* int head, tail; */
+ int i, ret;
+
+ if (!rec)
+ return -EINVAL;
+
+ get_physical_address(rec, &paddr, &node);
+ ret = validate_paddr_page(paddr);
+ if (ret < 0)
+ return -EINVAL;
+
+ if ((cpe_head != cpe_tail) || (cpe[cpe_head].paddr != 0))
+ /*
+ * List not empty
+ */
+ for (i = 0; i < CE_HISTORY_LENGTH; i++) {
+ if (PAGE_ALIGN(cpe[i].paddr) == PAGE_ALIGN(paddr)) {
+ mstat_already_on_list++;
+ return 1; /* already on the list */
+ }
+ }
+
+ if (!spin_trylock(&cpe_list_lock)) {
+ /*
+ * Someone else has the lock. To avoid spinning in interrupt
+ * handler context, bail.
+ */
+ return 1;
+ }
+
+ if (cpe[cpe_head].paddr == 0) {
+ cpe[cpe_head].node = node;
+ cpe[cpe_head].paddr = paddr;
+
+ if (++cpe_head >= CE_HISTORY_LENGTH)
+ cpe_head = 0;
+ }
+ spin_unlock(&cpe_list_lock);
+
+ if (!work_scheduled) {
+ work_scheduled = 1;
+ schedule_work(&cpe_enable_work);
+ }
+
+ return 1;
+}
+
+/*
+ * =============================================================================
+ */
+
+/*
+ * free_one_bad_page
+ * Free one page from the list of bad pages.
+ */
+static int
+free_one_bad_page(unsigned long paddr)
+{
+ LIST_HEAD(pagelist);
+ struct page *page, *page2, *target;
+
+ /*
+ * Verify page address
+ */
+ target = phys_to_page(paddr);
+ list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ if (page != target)
+ continue;
+
+ ClearPageMemError(page); /* Mark the page as good */
+ total_badpages--;
+ list_move_tail(&page->lru, &pagelist);
+ putback_lru_pages(&pagelist);
+ break;
+ }
+ return 0;
+}
+
+/*
+ * free_all_bad_pages
+ * Free all of the pages on the bad pages list.
+ */
+static int
+free_all_bad_pages(void)
+{
+ struct page *page, *page2;
+
+ list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ ClearPageMemError(page); /* Mark the page as good */
+ total_badpages--;
+ }
+ putback_lru_pages(&badpagelist);
+ return 0;
+}
+
+#define OPT_LEN 16
+
+static ssize_t
+badpage_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ char optstr[OPT_LEN];
+ unsigned long opt;
+ int len = OPT_LEN;
+ int err;
+
+ if (count < len)
+ len = count;
+
+ strlcpy(optstr, buf, len);
+
+ err = strict_strtoul(optstr, 16, &opt);
+ if (err)
+ return err;
+
+ if (opt == 0)
+ free_all_bad_pages();
+ else
+ free_one_bad_page(opt);
+
+ return count;
+}
+
+/*
+ * badpage_show
+ * Display the number, size, and addresses of all the pages on the
+ * bad page list.
+ *
+ * Note that sysfs provides buf of PAGE_SIZE length. bufend tracks
+ * the remaining space in buf to avoid overflowing.
+ */
+static ssize_t
+badpage_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+
+{
+ struct page *page, *page2;
+ int i = 0, cnt = 0;
+ char *bufend = buf + PAGE_SIZE;
+
+ cnt = snprintf(buf, bufend - (buf + cnt),
+ "Memory marked bad: %d kB\n"
+ "Pages marked bad: %d\n"
+ "Unable to isolate on LRU: %d\n"
+ "Unable to migrate: %d\n"
+ "Already marked bad: %d\n"
+ "Already on list: %d\n"
+ "List of bad physical pages\n",
+ total_badpages << (PAGE_SHIFT - 10), total_badpages,
+ mstat_cannot_isolate, mstat_failed_to_discard,
+ mstat_already_marked, mstat_already_on_list
+ );
+
+ list_for_each_entry_safe(page, page2, &badpagelist, lru) {
+ if (bufend - (buf + cnt) < 20)
+ break; /* Avoid overflowing the buffer */
+ cnt += snprintf(buf + cnt, bufend - (buf + cnt),
+ " 0x%011lx", page_to_phys(page));
+ if (!(++i % 5))
+ cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
+ }
+ cnt += snprintf(buf + cnt, bufend - (buf + cnt), "\n");
+
+ return cnt;
+}
+
+static struct kobj_attribute badram_attr = {
+ .attr = {
+ .name = "badram",
+ .mode = S_IWUSR | S_IRUGO,
+ },
+ .show = badpage_show,
+ .store = badpage_store,
+};
+
+static int __init
+cpe_migrate_external_handler_init(void)
+{
+ int error;
+
+ error = sysfs_create_file(kernel_kobj, &badram_attr.attr);
+ if (error)
+ return -EINVAL;
+
+ /*
+ * register external ce handler
+ */
+ if (ia64_reg_CE_extension(cpe_setup_migrate)) {
+ printk(KERN_ERR "ia64_reg_CE_extension failed.\n");
+ return -EFAULT;
+ }
+ cpe_poll_enabled = cpe_polling_enabled;
+
+ printk(KERN_INFO "Registered badram Driver\n");
+ return 0;
+}
+
+static void __exit
+cpe_migrate_external_handler_exit(void)
+{
+ /* unregister external mca handlers */
+ ia64_unreg_CE_extension();
+
+ sysfs_remove_file(kernel_kobj, &badram_attr.attr);
+}
+
+module_init(cpe_migrate_external_handler_init);
+module_exit(cpe_migrate_external_handler_exit);
+
+module_param(cpe_polling_enabled, int, 0644);
+MODULE_PARM_DESC(cpe_polling_enabled,
+ "Enable polling with migration");
+
+MODULE_AUTHOR("Russ Anderson <rja@sgi.com>");
+MODULE_DESCRIPTION("ia64 Corrected Error page migration driver");
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -68,6 +68,9 @@
*
* 2007-04-27 Russ Anderson <rja@sgi.com>
* Support multiple cpus going through OS_MCA in the same event.
+ *
+ * 2008-04-22 Russ Anderson <rja@sgi.com>
+ * Migrate data off pages with correctable memory errors.
*/
#include <linux/jiffies.h>
#include <linux/types.h>
@@ -163,7 +166,14 @@ static int cmc_polling_enabled = 1;
* but encounters problems retrieving CPE logs. This should only be
* necessary for debugging.
*/
-static int cpe_poll_enabled = 1;
+int cpe_poll_enabled = 1;
+EXPORT_SYMBOL(cpe_poll_enabled);
+
+unsigned int total_badpages;
+EXPORT_SYMBOL(total_badpages);
+
+LIST_HEAD(badpagelist);
+EXPORT_SYMBOL(badpagelist);
extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
@@ -523,6 +533,28 @@ int mca_recover_range(unsigned long addr
}
EXPORT_SYMBOL_GPL(mca_recover_range);
+/* Function pointer to Corrected Error memory migration driver */
+int (*ia64_mca_ce_extension)(void *);
+
+int
+ia64_reg_CE_extension(int (*fn)(void *))
+{
+ if (ia64_mca_ce_extension)
+ return 1;
+
+ ia64_mca_ce_extension = fn;
+ return 0;
+}
+EXPORT_SYMBOL(ia64_reg_CE_extension);
+
+void
+ia64_unreg_CE_extension(void)
+{
+ if (ia64_mca_ce_extension)
+ ia64_mca_ce_extension = NULL;
+}
+EXPORT_SYMBOL(ia64_unreg_CE_extension);
+
#ifdef CONFIG_ACPI
int cpe_vector = -1;
@@ -534,6 +566,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, v
static unsigned long cpe_history[CPE_HISTORY_LENGTH];
static int index;
static DEFINE_SPINLOCK(cpe_history_lock);
+ int recover;
IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
__func__, cpe_irq, smp_processor_id());
@@ -580,6 +613,8 @@ ia64_mca_cpe_int_handler (int cpe_irq, v
out:
/* Get the CPE error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+ recover = (ia64_mca_ce_extension && ia64_mca_ce_extension(
+ IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_CPE)));
return IRQ_HANDLED;
}

@ -0,0 +1,159 @@
From: Russ Anderson <rja@sgi.com>
Subject: ia64: cpe_migrate.ko causes deadlock.
References: bnc#464676
Patch-mainline: not yet, depends on patches.arch/ia64-page-migration
schedule_on_each_cpu() deadlocks when called from an event thread.
Change cpe_migrate to use a kthread to avoid the problem.
Signed-off-by: Russ Anderson <rja@sgi.com>
Acked-by: Raymund Will <rw@suse.de>
---
arch/ia64/kernel/cpe_migrate.c | 72 +++++++++++++++++++++++++++++++----------
1 file changed, 56 insertions(+), 16 deletions(-)
--- a/arch/ia64/kernel/cpe_migrate.c
+++ b/arch/ia64/kernel/cpe_migrate.c
@@ -22,6 +22,7 @@
#include <linux/page-isolation.h>
#include <linux/memcontrol.h>
#include <linux/kobject.h>
+#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/system.h>
@@ -40,12 +41,15 @@ static struct cpe_info cpe[CE_HISTORY_LE
static int cpe_polling_enabled = 1;
static int cpe_head;
static int cpe_tail;
-static int work_scheduled;
static int mstat_cannot_isolate;
static int mstat_failed_to_discard;
static int mstat_already_marked;
static int mstat_already_on_list;
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+DECLARE_WAIT_QUEUE_HEAD(cpe_activate_IRQ_wq);
+static DECLARE_COMPLETION(kthread_cpe_migrated_exited);
+int cpe_active;
DEFINE_SPINLOCK(cpe_migrate_lock);
static void
@@ -160,12 +164,12 @@ ia64_mca_cpe_move_page(u64 paddr, u32 no
}
/*
- * ia64_mca_cpe_migrate
- * The worker that does the actual migration. It pulls a
- * physical address off the list and calls the migration code.
+ * cpe_process_queue
+ * Pulls the physical address off the list and calls the migration code.
+ * Will process all the addresses on the list.
*/
-static void
-ia64_mca_cpe_migrate(struct work_struct *unused)
+void
+cpe_process_queue(void)
{
int ret;
u64 paddr;
@@ -193,10 +197,36 @@ ia64_mca_cpe_migrate(struct work_struct
cpe_tail = 0;
} while (cpe_tail != cpe_head);
- work_scheduled = 0;
+ return;
+}
+
+inline int
+cpe_list_empty(void)
+{
+ return (cpe_head == cpe_tail) && (!cpe[cpe_head].paddr);
+}
+
+/*
+ * kthread_cpe_migrate
+ * kthread_cpe_migrate is created at module load time and lives
+ * until the module is removed. When not active, it will sleep.
+ */
+static int
+kthread_cpe_migrate(void *ignore)
+{
+ while (cpe_active) {
+ /*
+ * wait for work
+ */
+ (void)wait_event_interruptible(cpe_activate_IRQ_wq,
+ (!cpe_list_empty() ||
+ !cpe_active));
+ cpe_process_queue(); /* process work */
+ }
+ complete(&kthread_cpe_migrated_exited);
+ return 0;
}
-static DECLARE_WORK(cpe_enable_work, ia64_mca_cpe_migrate);
DEFINE_SPINLOCK(cpe_list_lock);
/*
@@ -228,10 +258,7 @@ cpe_setup_migrate(void *rec)
if (ret < 0)
return -EINVAL;
- if ((cpe_head != cpe_tail) || (cpe[cpe_head].paddr != 0))
- /*
- * List not empty
- */
+ if (!cpe_list_empty())
for (i = 0; i < CE_HISTORY_LENGTH; i++) {
if (PAGE_ALIGN(cpe[i].paddr) == PAGE_ALIGN(paddr)) {
mstat_already_on_list++;
@@ -256,10 +283,7 @@ cpe_setup_migrate(void *rec)
}
spin_unlock(&cpe_list_lock);
- if (!work_scheduled) {
- work_scheduled = 1;
- schedule_work(&cpe_enable_work);
- }
+ wake_up_interruptible(&cpe_activate_IRQ_wq);
return 1;
}
@@ -396,12 +420,23 @@ static int __init
cpe_migrate_external_handler_init(void)
{
int error;
+ struct task_struct *kthread;
error = sysfs_create_file(kernel_kobj, &badram_attr.attr);
if (error)
return -EINVAL;
/*
+ * set up the kthread
+ */
+ cpe_active = 1;
+ kthread = kthread_run(kthread_cpe_migrate, NULL, "cpe_migrate");
+ if (IS_ERR(kthread)) {
+ complete(&kthread_cpe_migrated_exited);
+ return -EFAULT;
+ }
+
+ /*
* register external ce handler
*/
if (ia64_reg_CE_extension(cpe_setup_migrate)) {
@@ -420,6 +455,11 @@ cpe_migrate_external_handler_exit(void)
/* unregister external mca handlers */
ia64_unreg_CE_extension();
+ /* Stop kthread */
+ cpe_active = 0; /* tell kthread_cpe_migrate to exit */
+ wake_up_interruptible(&cpe_activate_IRQ_wq);
+ wait_for_completion(&kthread_cpe_migrated_exited);
+
sysfs_remove_file(kernel_kobj, &badram_attr.attr);
}

@ -0,0 +1,54 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: kmsg: Fix parameter limitations
Patch-mainline: Whenever kmsg is upstream
The kmsg infrastructure, currently only employed on s/390, has limitations
with the parameters it can handle due to the way it assembles the
magic string for parsing with scripts/kmsg-doc.
cpp expects the result to be a valid expression and exits with an error
if it is not.
The netfilter ipvs code causes this error, though there are more examples:
error: pasting "_ARGS_" and "&" does not give a valid preprocessing token
This stems from an otherwise valid expression:
pr_info("Registered protocols (%s)\n", &protocols[2]);
It tries to concatenate _ARGS_ and &protocols[2] and fails.
This patch fixes the issue by stringifying the entire parameter list
and allowing kmsg-doc to unquote the resultant expression.
The dev_* expressions that evaluate to __KMSG_DEV are unaffected because
the insertion of the "dev, " between _ARGS_ and the parameter list ends
up creating a valid expression.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
include/linux/kernel.h | 2 +-
scripts/kmsg-doc | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -391,7 +391,7 @@ static inline char *pack_hex_byte(char *
/* generate magic string for scripts/kmsg-doc to parse */
#define pr_printk_hash(level, format, ...) \
- __KMSG_PRINT(level _FMT_ format _ARGS_ ##__VA_ARGS__ _END_)
+ __KMSG_PRINT(level _FMT_ format _ARGS_ #__VA_ARGS__ _END_)
#elif defined(CONFIG_KMSG_IDS) && defined(KMSG_COMPONENT)
--- a/scripts/kmsg-doc
+++ b/scripts/kmsg-doc
@@ -307,7 +307,7 @@ sub process_cpp_file($$$$)
while (<FD>) {
chomp;
- if (/.*__KMSG_PRINT\(\s*(\S*)\s*_FMT_(.*)_ARGS_\s*(.*)?_END_\s*\)/o) {
+ if (/.*__KMSG_PRINT\(\s*(\S*)\s*_FMT_(.*)_ARGS_\s*"(.*)"\s*_END_\s*\)/o) {
if ($component ne "") {
add_kmsg_print($component, $1, $2, $3);
} else {

@ -0,0 +1,139 @@
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Nov 2009 00:39:12 +0100
Subject: Only export selected pv-ops feature structs
References: bnc#556135, FATE#306453
Patch-Mainline: Submitted to virtualization list
To really check for sure that we're not using any pv-ops code by accident,
we should make sure that we don't even export the structures used to access
pv-ops exported functions.
So let's surround the pv-ops structs by #ifdefs.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/x86/kernel/paravirt.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -124,11 +124,21 @@ static void *get_call_destination(u8 typ
{
struct paravirt_patch_template tmpl = {
.pv_init_ops = pv_init_ops,
+#ifdef CONFIG_PARAVIRT_TIME
.pv_time_ops = pv_time_ops,
+#endif
+#ifdef CONFIG_PARAVIRT_CPU
.pv_cpu_ops = pv_cpu_ops,
+#endif
+#ifdef CONFIG_PARAVIRT_IRQ
.pv_irq_ops = pv_irq_ops,
+#endif
+#ifdef CONFIG_PARAVIRT_APIC
.pv_apic_ops = pv_apic_ops,
+#endif
+#ifdef CONFIG_PARAVIRT_MMU
.pv_mmu_ops = pv_mmu_ops,
+#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
#endif
@@ -185,6 +195,7 @@ unsigned paravirt_patch_insns(void *insn
return insn_len;
}
+#ifdef CONFIG_PARAVIRT_MMU
static void native_flush_tlb(void)
{
__native_flush_tlb();
@@ -203,6 +214,7 @@ static void native_flush_tlb_single(unsi
{
__native_flush_tlb_single(addr);
}
+#endif /* CONFIG_PARAVIRT_MMU */
/* These are in entry.S */
extern void native_iret(void);
@@ -284,6 +296,7 @@ enum paravirt_lazy_mode paravirt_get_laz
return percpu_read(paravirt_lazy_mode);
}
+#ifdef CONFIG_PARAVIRT_MMU
void arch_flush_lazy_mmu_mode(void)
{
preempt_disable();
@@ -295,6 +308,7 @@ void arch_flush_lazy_mmu_mode(void)
preempt_enable();
}
+#endif /* CONFIG_PARAVIRT_MMU */
struct pv_info pv_info = {
.name = "bare hardware",
@@ -306,11 +320,16 @@ struct pv_info pv_info = {
struct pv_init_ops pv_init_ops = {
.patch = native_patch,
};
+EXPORT_SYMBOL_GPL(pv_info);
+#ifdef CONFIG_PARAVIRT_TIME
struct pv_time_ops pv_time_ops = {
.sched_clock = native_sched_clock,
};
+EXPORT_SYMBOL_GPL(pv_time_ops);
+#endif
+#ifdef CONFIG_PARAVIRT_IRQ
struct pv_irq_ops pv_irq_ops = {
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
@@ -322,7 +341,10 @@ struct pv_irq_ops pv_irq_ops = {
.adjust_exception_frame = paravirt_nop,
#endif
};
+EXPORT_SYMBOL (pv_irq_ops);
+#endif
+#ifdef CONFIG_PARAVIRT_CPU
struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
@@ -383,12 +405,17 @@ struct pv_cpu_ops pv_cpu_ops = {
.start_context_switch = paravirt_nop,
.end_context_switch = paravirt_nop,
};
+EXPORT_SYMBOL (pv_cpu_ops);
+#endif
+#ifdef CONFIG_PARAVIRT_APIC
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
.startup_ipi_hook = paravirt_nop,
#endif
};
+EXPORT_SYMBOL_GPL(pv_apic_ops);
+#endif
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
/* 32-bit pagetable entries */
@@ -398,6 +425,7 @@ struct pv_apic_ops pv_apic_ops = {
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
#endif
+#ifdef CONFIG_PARAVIRT_MMU
struct pv_mmu_ops pv_mmu_ops = {
.read_cr2 = native_read_cr2,
@@ -466,10 +494,5 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap,
};
-
-EXPORT_SYMBOL_GPL(pv_time_ops);
-EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
-EXPORT_SYMBOL_GPL(pv_apic_ops);
-EXPORT_SYMBOL_GPL(pv_info);
-EXPORT_SYMBOL (pv_irq_ops);
+#endif

@ -0,0 +1,80 @@
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Nov 2009 12:58:00 +0100
Subject: Replace kvm io delay pv-ops with linux magic
References: bnc#556135, FATE#306453
Patch-Mainline: Submitted to virtualization list
Currently we use pv-ops to tell linux not to do anything on io_delay.
While the basic idea is good IMHO, I don't see why we would need pv-ops
for that. The io delay function already has a switch that can do nothing
if you're so inclined.
So here's a patch (stacked on top of the previous pv-ops series) that
removes the io delay pv-ops hook and just sets the native io delay
variable instead.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/x86/Kconfig | 14 --------------
arch/x86/kernel/kvm.c | 16 +++-------------
2 files changed, 3 insertions(+), 27 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -544,20 +544,6 @@ config KVM_GUEST
This option enables various optimizations for running under the KVM
hypervisor.
-config KVM_IODELAY
- bool "KVM IO-delay support"
- depends on KVM_GUEST
- select PARAVIRT_CPU
- ---help---
- Usually we wait for PIO access to complete. When inside KVM there's
- no need to do that, as we know that we're not going through a bus,
- but process PIO requests instantly.
-
- This option disables PIO waits, but drags in CPU-bound pv-ops. Thus
- you will probably get more speed loss than speedup using this option.
-
- If in doubt, say N.
-
config KVM_MMU
bool "KVM PV MMU support"
depends on KVM_GUEST
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -29,15 +29,6 @@
#include <linux/hardirq.h>
#include <asm/timer.h>
-#ifdef CONFIG_KVM_IODELAY
-/*
- * No need for any "IO delay" on KVM
- */
-static void kvm_io_delay(void)
-{
-}
-#endif /* CONFIG_KVM_IODELAY */
-
#ifdef CONFIG_KVM_MMU
#define MMU_QUEUE_SIZE 1024
@@ -201,13 +192,12 @@ static void kvm_leave_lazy_mmu(void)
static void __init paravirt_ops_setup(void)
{
+ extern int io_delay_type;
pv_info.name = "KVM";
pv_info.paravirt_enabled = 1;
-#ifdef CONFIG_KVM_IODELAY
- if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
- pv_cpu_ops.io_delay = kvm_io_delay;
-#endif
+ /* Disable IO delay */
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
#ifdef CONFIG_KVM_MMU
if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {

@ -0,0 +1,743 @@
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Nov 2009 00:27:59 +0100
Subject: Split paravirt ops by functionality
References: bnc#556135, FATE#306453
Patch-Mainline: Submitted to virtualization list
Currently when using paravirt ops it's an all-or-nothing option. We can either
use pv-ops for CPU, MMU, timing, etc. or not at all.
Now there are some use cases where we don't need the full feature set, but only
a small chunk of it. KVM is a pretty prominent example for this.
So let's make everything a bit more fine-grained. We already have a splitting
by function groups, namely "cpu", "mmu", "time", "irq", "apic" and "spinlock".
Taking that existing splitting and extending it to only compile in the PV
capable bits sounded like a natural fit. That way we don't get performance hits
in MMU code from using the KVM PV clock which only needs the TIME parts of
pv-ops.
We define a new CONFIG_PARAVIRT_ALL option that basically does the same thing
the CONFIG_PARAVIRT did before this splitting. We move all users of
CONFIG_PARAVIRT to CONFIG_PARAVIRT_ALL, so they behave the same way they did
before.
So here it is - the splitting! I would have made the patch smaller, but this
was the closest I could get to atomic (for bisect) while staying sane.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/x86/Kconfig | 47 +++++++++++++++++++++++++---
arch/x86/include/asm/apic.h | 2 -
arch/x86/include/asm/desc.h | 4 +-
arch/x86/include/asm/fixmap.h | 2 -
arch/x86/include/asm/io.h | 2 -
arch/x86/include/asm/irqflags.h | 21 +++++++++---
arch/x86/include/asm/mmu_context.h | 4 +-
arch/x86/include/asm/msr.h | 4 +-
arch/x86/include/asm/paravirt.h | 44 +++++++++++++++++++++++++-
arch/x86/include/asm/paravirt_types.h | 12 +++++++
arch/x86/include/asm/pgalloc.h | 2 -
arch/x86/include/asm/pgtable-3level_types.h | 2 -
arch/x86/include/asm/pgtable.h | 2 -
arch/x86/include/asm/processor.h | 2 -
arch/x86/include/asm/required-features.h | 2 -
arch/x86/include/asm/smp.h | 2 -
arch/x86/include/asm/system.h | 13 +++++--
arch/x86/include/asm/tlbflush.h | 4 +-
arch/x86/kernel/head_64.S | 2 -
arch/x86/kernel/paravirt.c | 2 +
arch/x86/kernel/tsc.c | 2 -
arch/x86/kernel/vsmp_64.c | 2 -
arch/x86/kernel/x8664_ksyms_64.c | 2 -
arch/x86/xen/Kconfig | 2 -
24 files changed, 146 insertions(+), 37 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -362,7 +362,7 @@ endif
config X86_VSMP
bool "ScaleMP vSMP"
- select PARAVIRT
+ select PARAVIRT_ALL
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
---help---
@@ -510,7 +510,7 @@ source "arch/x86/xen/Kconfig"
config VMI
bool "VMI Guest support (DEPRECATED)"
- select PARAVIRT
+ select PARAVIRT_ALL
depends on X86_32
---help---
VMI provides a paravirtualized interface to the VMware ESX server
@@ -529,7 +529,6 @@ config VMI
config KVM_CLOCK
bool "KVM paravirtualized clock"
- select PARAVIRT
select PARAVIRT_CLOCK
---help---
Turning on this option will allow you to run a paravirtualized clock
@@ -540,7 +539,7 @@ config KVM_CLOCK
config KVM_GUEST
bool "KVM Guest support"
- select PARAVIRT
+ select PARAVIRT_ALL
---help---
This option enables various optimizations for running under the KVM
hypervisor.
@@ -568,8 +567,48 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer N.
+config PARAVIRT_CPU
+ bool
+ select PARAVIRT
+ default n
+
+config PARAVIRT_TIME
+ bool
+ select PARAVIRT
+ default n
+
+config PARAVIRT_IRQ
+ bool
+ select PARAVIRT
+ default n
+
+config PARAVIRT_APIC
+ bool
+ select PARAVIRT
+ default n
+
+config PARAVIRT_MMU
+ bool
+ select PARAVIRT
+ default n
+
+#
+# This is a placeholder to activate the old "include all pv-ops functionality"
+# behavior. If you're using this I'd recommend looking through your code to see
+# if you can be more specific. It probably saves you a few cycles!
+#
+config PARAVIRT_ALL
+ bool
+ select PARAVIRT_CPU
+ select PARAVIRT_TIME
+ select PARAVIRT_IRQ
+ select PARAVIRT_APIC
+ select PARAVIRT_MMU
+ default n
+
config PARAVIRT_CLOCK
bool
+ select PARAVIRT_TIME
default n
endif
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -81,7 +81,7 @@ static inline bool apic_from_smp_config(
/*
* Basic functions accessing APICs.
*/
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_APIC
#include <asm/paravirt.h>
#endif
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -78,7 +78,7 @@ static inline int desc_empty(const void
return !(desc[0] | desc[1]);
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_CPU
#include <asm/paravirt.h>
#else
#define load_TR_desc() native_load_tr_desc()
@@ -108,7 +108,7 @@ static inline void paravirt_alloc_ldt(st
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_CPU */
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -162,7 +162,7 @@ void __native_set_fixmap(enum fixed_addr
void native_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_MMU
static inline void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -268,7 +268,7 @@ extern void native_io_delay(void);
extern int io_delay_type;
extern void io_delay_init(void);
-#if defined(CONFIG_PARAVIRT)
+#if defined(CONFIG_PARAVIRT_CPU)
#include <asm/paravirt.h>
#else
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -58,9 +58,11 @@ static inline void native_halt(void)
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
-#else
+#endif
+
#ifndef __ASSEMBLY__
+#ifndef CONFIG_PARAVIRT_IRQ
static inline unsigned long __raw_local_save_flags(void)
{
return native_save_fl();
@@ -110,12 +112,17 @@ static inline unsigned long __raw_local_
return flags;
}
-#else
+#endif /* CONFIG_PARAVIRT_IRQ */
+
+#else /* __ASSEMBLY__ */
+#ifndef CONFIG_PARAVIRT_IRQ
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
+#endif /* !CONFIG_PARAVIRT_IRQ */
#ifdef CONFIG_X86_64
+#ifndef CONFIG_PARAVIRT_CPU
#define SWAPGS swapgs
/*
* Currently paravirt can't handle swapgs nicely when we
@@ -128,8 +135,6 @@ static inline unsigned long __raw_local_
*/
#define SWAPGS_UNSAFE_STACK swapgs
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
-
#define INTERRUPT_RETURN iretq
#define USERGS_SYSRET64 \
swapgs; \
@@ -141,16 +146,22 @@ static inline unsigned long __raw_local_
swapgs; \
sti; \
sysexit
+#endif /* !CONFIG_PARAVIRT_CPU */
+
+#ifndef CONFIG_PARAVIRT_IRQ
+#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
+#endif /* !CONFIG_PARAVIRT_IRQ */
#else
+#ifndef CONFIG_PARAVIRT_CPU
#define INTERRUPT_RETURN iret
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
#define GET_CR0_INTO_EAX movl %cr0, %eax
+#endif /* !CONFIG_PARAVIRT_CPU */
#endif
#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
#ifndef __ASSEMBLY__
#define raw_local_save_flags(flags) \
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -6,14 +6,14 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_MMU
#include <asm-generic/mm_hooks.h>
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
}
-#endif /* !CONFIG_PARAVIRT */
+#endif /* !CONFIG_PARAVIRT_MMU */
/*
* Used for LDT copy/destruction.
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -135,7 +135,7 @@ static inline unsigned long long native_
return EAX_EDX_VAL(val, low, high);
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_CPU
#include <asm/paravirt.h>
#else
#include <linux/errno.h>
@@ -246,7 +246,7 @@ do {
#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
-#endif /* !CONFIG_PARAVIRT */
+#endif /* !CONFIG_PARAVIRT_CPU */
#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -18,6 +18,7 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled;
}
+#ifdef CONFIG_PARAVIRT_CPU
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
@@ -58,7 +59,9 @@ static inline void write_cr0(unsigned lo
{
PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
}
+#endif /* CONFIG_PARAVIRT_CPU */
+#ifdef CONFIG_PARAVIRT_MMU
static inline unsigned long read_cr2(void)
{
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
@@ -78,7 +81,9 @@ static inline void write_cr3(unsigned lo
{
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}
+#endif /* CONFIG_PARAVIRT_MMU */
+#ifdef CONFIG_PARAVIRT_CPU
static inline unsigned long read_cr4(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
@@ -92,8 +97,9 @@ static inline void write_cr4(unsigned lo
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
+#endif /* CONFIG_PARAVIRT_CPU */
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_CPU)
static inline unsigned long read_cr8(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
@@ -105,6 +111,7 @@ static inline void write_cr8(unsigned lo
}
#endif
+#ifdef CONFIG_PARAVIRT_IRQ
static inline void raw_safe_halt(void)
{
PVOP_VCALL0(pv_irq_ops.safe_halt);
@@ -114,14 +121,18 @@ static inline void halt(void)
{
PVOP_VCALL0(pv_irq_ops.safe_halt);
}
+#endif /* CONFIG_PARAVIRT_IRQ */
+#ifdef CONFIG_PARAVIRT_CPU
static inline void wbinvd(void)
{
PVOP_VCALL0(pv_cpu_ops.wbinvd);
}
+#endif
#define get_kernel_rpl() (pv_info.kernel_rpl)
+#ifdef CONFIG_PARAVIRT_CPU
static inline u64 paravirt_read_msr(unsigned msr, int *err)
{
return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
@@ -224,12 +235,16 @@ do { \
} while (0)
#define rdtscll(val) (val = paravirt_read_tsc())
+#endif /* CONFIG_PARAVIRT_CPU */
+#ifdef CONFIG_PARAVIRT_TIME
static inline unsigned long long paravirt_sched_clock(void)
{
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
}
+#endif /* CONFIG_PARAVIRT_TIME */
+#ifdef CONFIG_PARAVIRT_CPU
static inline unsigned long long paravirt_read_pmc(int counter)
{
return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
@@ -345,8 +360,9 @@ static inline void slow_down_io(void)
pv_cpu_ops.io_delay();
#endif
}
+#endif /* CONFIG_PARAVIRT_CPU */
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_APIC)
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp)
{
@@ -355,6 +371,7 @@ static inline void startup_ipi_hook(int
}
#endif
+#ifdef CONFIG_PARAVIRT_MMU
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
@@ -689,7 +706,9 @@ static inline void pmd_clear(pmd_t *pmdp
set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
+#endif /* CONFIG_PARAVIRT_MMU */
+#ifdef CONFIG_PARAVIRT_CPU
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
{
@@ -700,7 +719,9 @@ static inline void arch_end_context_swit
{
PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
}
+#endif /* CONFIG_PARAVIRT_CPU */
+#ifdef CONFIG_PARAVIRT_MMU
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
{
@@ -719,6 +740,7 @@ static inline void __set_fixmap(unsigned
{
pv_mmu_ops.set_fixmap(idx, phys, flags);
}
+#endif /* CONFIG_PARAVIRT_MMU */
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
@@ -829,6 +851,7 @@ static __always_inline void arch_spin_un
#define __PV_IS_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { func })
+#ifdef CONFIG_PARAVIRT_IRQ
static inline unsigned long __raw_local_save_flags(void)
{
return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
@@ -857,6 +880,7 @@ static inline unsigned long __raw_local_
raw_local_irq_disable();
return f;
}
+#endif /* CONFIG_PARAVIRT_IRQ */
/* Make sure as little as possible of this mess escapes. */
@@ -939,10 +963,13 @@ extern void default_banner(void);
#define PARA_INDIRECT(addr) *%cs:addr
#endif
+#ifdef CONFIG_PARAVIRT_CPU
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+#endif /* CONFIG_PARAVIRT_CPU */
+#ifdef CONFIG_PARAVIRT_IRQ
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
@@ -954,13 +981,17 @@ extern void default_banner(void);
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif /* CONFIG_PARAVIRT_IRQ */
+#ifdef CONFIG_PARAVIRT_CPU
#define USERGS_SYSRET32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
+#endif /* CONFIG_PARAVIRT_CPU */
#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT_CPU
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
@@ -970,10 +1001,12 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+#endif /* CONFIG_PARAVIRT_CPU */
#else /* !CONFIG_X86_32 */
+#ifdef CONFIG_PARAVIRT_CPU
/*
* If swapgs is used while the userspace stack is still current,
* there's no way to call a pvop. The PV replacement *must* be
@@ -993,17 +1026,23 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
)
+#endif /* CONFIG_PARAVIRT_CPU */
+#ifdef CONFIG_PARAVIRT_MMU
#define GET_CR2_INTO_RCX \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
movq %rax, %rcx; \
xorq %rax, %rax;
+#endif /* CONFIG_PARAVIRT_MMU */
+#ifdef CONFIG_PARAVIRT_IRQ
#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
CLBR_NONE, \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
+#endif /* CONFIG_PARAVIRT_IRQ */
+#ifdef CONFIG_PARAVIRT_CPU
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
@@ -1013,6 +1052,7 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+#endif /* CONFIG_PARAVIRT_CPU */
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -339,12 +339,24 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct pv_init_ops pv_init_ops;
+#ifdef CONFIG_PARAVIRT_TIME
extern struct pv_time_ops pv_time_ops;
+#endif
+#ifdef CONFIG_PARAVIRT_CPU
extern struct pv_cpu_ops pv_cpu_ops;
+#endif
+#ifdef CONFIG_PARAVIRT_IRQ
extern struct pv_irq_ops pv_irq_ops;
+#endif
+#ifdef CONFIG_PARAVIRT_APIC
extern struct pv_apic_ops pv_apic_ops;
+#endif
+#ifdef CONFIG_PARAVIRT_MMU
extern struct pv_mmu_ops pv_mmu_ops;
+#endif
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern struct pv_lock_ops pv_lock_ops;
+#endif
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -7,7 +7,7 @@
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
#include <asm/paravirt.h>
#else
#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm)
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -18,7 +18,7 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLY__ */
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd)
#else
#define SHARED_KERNEL_PMD 1
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,7 +28,7 @@ extern unsigned long empty_zero_page[PAG
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
#include <asm/paravirt.h>
#else /* !CONFIG_PARAVIRT */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -573,7 +573,7 @@ static inline void native_swapgs(void)
#endif
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_CPU
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,7 +48,7 @@
#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
#define NEED_PGE 0
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -66,7 +66,7 @@ struct smp_ops {
extern void set_cpu_sibling_map(int cpu);
#ifdef CONFIG_SMP
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_APIC
#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
#endif
extern struct smp_ops smp_ops;
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -304,13 +304,18 @@ static inline void native_wbinvd(void)
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
-#else
-#define read_cr0() (native_read_cr0())
-#define write_cr0(x) (native_write_cr0(x))
+#endif/* CONFIG_PARAVIRT */
+
+#ifndef CONFIG_PARAVIRT_MMU
#define read_cr2() (native_read_cr2())
#define write_cr2(x) (native_write_cr2(x))
#define read_cr3() (native_read_cr3())
#define write_cr3(x) (native_write_cr3(x))
+#endif /* CONFIG_PARAVIRT_MMU */
+
+#ifndef CONFIG_PARAVIRT_CPU
+#define read_cr0() (native_read_cr0())
+#define write_cr0(x) (native_write_cr0(x))
#define read_cr4() (native_read_cr4())
#define read_cr4_safe() (native_read_cr4_safe())
#define write_cr4(x) (native_write_cr4(x))
@@ -324,7 +329,7 @@ static inline void native_wbinvd(void)
/* Clear the 'TS' bit */
#define clts() (native_clts())
-#endif/* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_CPU */
#define stts() write_cr0(read_cr0() | X86_CR0_TS)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -7,7 +7,7 @@
#include <asm/processor.h>
#include <asm/system.h>
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
#include <asm/paravirt.h>
#else
#define __flush_tlb() __native_flush_tlb()
@@ -162,7 +162,7 @@ static inline void reset_lazy_tlbstate(v
#endif /* SMP */
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_MMU
#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
#endif
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -20,7 +20,7 @@
#include <asm/processor-flags.h>
#include <asm/percpu.h>
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_MMU
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
#else
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -155,12 +155,14 @@ unsigned paravirt_patch_default(u8 type,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insnbuf, len);
+#ifdef CONFIG_PARAVIRT_CPU
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+#endif
else
/* Otherwise call the function; assume target could
clobber any caller-save reg */
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -66,7 +66,7 @@ u64 native_sched_clock(void)
/* We need to define a real function for sched_clock, to override the
weak default version */
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_TIME
unsigned long long sched_clock(void)
{
return paravirt_sched_clock();
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -22,7 +22,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_IRQ
/*
* Interrupt control on vSMPowered systems:
* ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -55,6 +55,6 @@ EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(init_level4_pgt);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_CPU
EXPORT_SYMBOL(native_load_gs_index);
#endif
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -4,7 +4,7 @@
config XEN
bool "Xen guest support"
- select PARAVIRT
+ select PARAVIRT_ALL
select PARAVIRT_CLOCK
depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
depends on X86_CMPXCHG && X86_TSC

@ -0,0 +1,125 @@
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Nov 2009 00:45:10 +0100
Subject: Split the KVM pv-ops support by feature
References: bnc#556135, FATE#306453
Patch-Mainline: Submitted to virtualization list
Currently selecting KVM guest support enabled multiple features at once that
not everyone necessarily wants to have, namely:
- PV MMU
- zero io delay
- apic detection workaround
Let's split them off so we don't drag in the full pv-ops framework just to
detect we're running on KVM. That gives us more chances to tweak performance!
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/x86/Kconfig | 29 ++++++++++++++++++++++++++++-
arch/x86/kernel/kvm.c | 22 +++++++++++++++-------
2 files changed, 43 insertions(+), 8 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -539,11 +539,38 @@ config KVM_CLOCK
config KVM_GUEST
bool "KVM Guest support"
- select PARAVIRT_ALL
+ select PARAVIRT
---help---
This option enables various optimizations for running under the KVM
hypervisor.
+config KVM_IODELAY
+ bool "KVM IO-delay support"
+ depends on KVM_GUEST
+ select PARAVIRT_CPU
+ ---help---
+ Usually we wait for PIO access to complete. When inside KVM there's
+ no need to do that, as we know that we're not going through a bus,
+ but process PIO requests instantly.
+
+ This option disables PIO waits, but drags in CPU-bound pv-ops. Thus
+ you will probably get more speed loss than speedup using this option.
+
+ If in doubt, say N.
+
+config KVM_MMU
+ bool "KVM PV MMU support"
+ depends on KVM_GUEST
+ select PARAVIRT_MMU
+ ---help---
+ This option enables the paravirtualized MMU for KVM. In most cases
+ it's pretty useless and shouldn't be used.
+
+ It will only cost you performance, because it drags in pv-ops for
+ memory management.
+
+ If in doubt, say N.
+
source "arch/x86/lguest/Kconfig"
config PARAVIRT
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -29,6 +29,16 @@
#include <linux/hardirq.h>
#include <asm/timer.h>
+#ifdef CONFIG_KVM_IODELAY
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+#endif /* CONFIG_KVM_IODELAY */
+
+#ifdef CONFIG_KVM_MMU
#define MMU_QUEUE_SIZE 1024
struct kvm_para_state {
@@ -43,13 +53,6 @@ static struct kvm_para_state *kvm_para_s
return &per_cpu(para_state, raw_smp_processor_id());
}
-/*
- * No need for any "IO delay" on KVM
- */
-static void kvm_io_delay(void)
-{
-}
-
static void kvm_mmu_op(void *buffer, unsigned len)
{
int r;
@@ -194,15 +197,19 @@ static void kvm_leave_lazy_mmu(void)
mmu_queue_flush(state);
paravirt_leave_lazy_mmu();
}
+#endif /* CONFIG_KVM_MMU */
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
pv_info.paravirt_enabled = 1;
+#ifdef CONFIG_KVM_IODELAY
if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
pv_cpu_ops.io_delay = kvm_io_delay;
+#endif
+#ifdef CONFIG_KVM_MMU
if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
pv_mmu_ops.set_pte = kvm_set_pte;
pv_mmu_ops.set_pte_at = kvm_set_pte_at;
@@ -226,6 +233,7 @@ static void __init paravirt_ops_setup(vo
pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
}
+#endif /* CONFIG_KVM_MMU */
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
#endif

@ -0,0 +1,148 @@
From: Russ Anderson <rja@sgi.com>
Subject: mm: Avoid putting a bad page back on the LRU v8
References: 415829
Acked-by: schwab@suse.de
Patch-mainline: not yet
Prevent a page with a physical memory error from being placed back
on the LRU. A new page flag (PG_memerror) is added if
CONFIG_PAGEFLAGS_EXTENDED is defined.
Version 8 change: Removed hot path check for pages with memory
errors on the free list.
Signed-off-by: Russ Anderson <rja@sgi.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/page-flags.h | 16 +++++++++++++++-
mm/migrate.c | 33 +++++++++++++++++++++++++++++++++
mm/vmscan.c | 1 +
3 files changed, 49 insertions(+), 1 deletion(-)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -88,6 +88,7 @@ enum pageflags {
PG_private_2, /* If pagecache, has fs aux data */
PG_writeback, /* Page is under writeback */
#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ PG_memerror, /* Page has a physical memory error */
PG_head, /* A head page */
PG_tail, /* A tail page */
#else
@@ -168,14 +169,21 @@ static inline int TestClearPage##uname(s
static inline int __TestClearPage##uname(struct page *page) \
{ return __test_and_clear_bit(PG_##lname, &page->flags); }
+#define PAGEFLAGMASK(uname, lname) \
+static inline int PAGEMASK_##uname(void) \
+ { return (1 << PG_##lname); }
+
#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
- SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
+ SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) \
+ PAGEFLAGMASK(uname, lname)
#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
__SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
#define PAGEFLAG_FALSE(uname) \
static inline int Page##uname(struct page *page) \
+ { return 0; } \
+static inline int PAGEMASK_##uname(void) \
{ return 0; }
#define TESTSCFLAG(uname, lname) \
@@ -393,6 +401,12 @@ static inline void __ClearPageTail(struc
#endif /* !PAGEFLAGS_EXTENDED */
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+PAGEFLAG(MemError, memerror)
+#else
+PAGEFLAG_FALSE(MemError)
+#endif
+
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1 << PG_mlocked)
#else
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -53,6 +53,7 @@ int migrate_prep(void)
return 0;
}
+EXPORT_SYMBOL(migrate_prep);
/*
* Add isolated pages on the list back to the LRU under page lock
@@ -75,6 +76,7 @@ int putback_lru_pages(struct list_head *
}
return count;
}
+EXPORT_SYMBOL(putback_lru_pages);
/*
* Restore a potential migration pte to a working pte entry
@@ -658,6 +660,25 @@ unlock:
* restored.
*/
list_del(&page->lru);
+ if (PageMemError(page)) {
+ if (rc == 0)
+ /*
+ * A page with a memory error that has
+ * been migrated will not be moved to
+ * the LRU.
+ */
+ goto move_newpage;
+ else
+ /*
+ * The page failed to migrate and will not
+ * be added to the bad page list. Clearing
+ * the error bit will allow another attempt
+ * to migrate if it gets another correctable
+ * error.
+ */
+ ClearPageMemError(page);
+ }
+
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
@@ -732,6 +753,17 @@ int migrate_pages(struct list_head *from
}
}
}
+
+ if (rc != 0)
+ list_for_each_entry_safe(page, page2, from, lru)
+ if (PageMemError(page))
+ /*
+ * The page failed to migrate. Clearing
+ * the error bit will allow another attempt
+ * to migrate if it gets another correctable
+ * error.
+ */
+ ClearPageMemError(page);
rc = 0;
out:
if (!swapwrite)
@@ -744,6 +776,7 @@ out:
return nr_failed + retry;
}
+EXPORT_SYMBOL(migrate_pages);
#ifdef CONFIG_NUMA
/*
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1091,6 +1091,7 @@ int isolate_lru_page(struct page *page)
}
return ret;
}
+EXPORT_SYMBOL(isolate_lru_page);
/*
* Are there way too many processes in the direct reclaim path already?

@ -0,0 +1,266 @@
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 31 Mar 2010 19:56:42 +0000 (+0200)
Subject: powernow-k8: Add core performance boost support
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git
Git-commit: 73860c6b2fd159a35637e233d735e36887c266ad
References: bnc#602209
Patch-mainline: 2.6.35-rc1
powernow-k8: Add core performance boost support
Starting with F10h, revE, AMD processors add support for a dynamic
core boosting feature called Core Performance Boost. When a specific
condition is present, a subset of the cores on a system are boosted
beyond their P0 operating frequency to speed up the performance of
single-threaded applications.
In the normal case, the system comes out of reset with core boosting
enabled. This patch adds a sysfs knob with which core boosting can be
switched on or off for benchmarking purposes.
While at it, make the CPB code hotplug-aware so that taking cores
offline wouldn't interfere with boosting the remaining online cores.
Furthermore, add cpu_online_mask hotplug protection as suggested by
Andrew.
Finally, cleanup the driver init codepath and update copyrights.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <1270065406-1814-3-git-send-email-bp@amd64.org>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 161 ++++++++++++++++++++++++++++--
arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 2
2 files changed, 151 insertions(+), 12 deletions(-)
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,6 +1,5 @@
-
/*
- * (c) 2003-2006 Advanced Micro Devices, Inc.
+ * (c) 2003-2010 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -54,6 +53,10 @@ static DEFINE_PER_CPU(struct powernow_k8
static int cpu_family = CPU_OPTERON;
+/* core performance boost */
+static bool cpb_capable, cpb_enabled;
+static struct msr *msrs;
+
#ifndef CONFIG_SMP
static inline const struct cpumask *cpu_core_mask(int cpu)
{
@@ -1394,8 +1397,77 @@ out:
return khz;
}
+static void _cpb_toggle_msrs(bool t)
+{
+ int cpu;
+
+ get_online_cpus();
+
+ rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ if (t)
+ reg->l &= ~BIT(25);
+ else
+ reg->l |= BIT(25);
+ }
+ wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ put_online_cpus();
+}
+
+/*
+ * Switch on/off core performance boosting.
+ *
+ * 0=disable
+ * 1=enable.
+ */
+static void cpb_toggle(bool t)
+{
+ if (!cpb_capable)
+ return;
+
+ if (t && !cpb_enabled) {
+ cpb_enabled = true;
+ _cpb_toggle_msrs(t);
+ printk(KERN_INFO PFX "Core Boosting enabled.\n");
+ } else if (!t && cpb_enabled) {
+ cpb_enabled = false;
+ _cpb_toggle_msrs(t);
+ printk(KERN_INFO PFX "Core Boosting disabled.\n");
+ }
+}
+
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
+{
+ int ret = -EINVAL;
+ unsigned long val = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (!ret && (val == 0 || val == 1) && cpb_capable)
+ cpb_toggle(val);
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
+{
+ return sprintf(buf, "%u\n", cpb_enabled);
+}
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+define_one_rw(cpb);
+
static struct freq_attr *powernow_k8_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
+ &cpb,
NULL,
};
@@ -1411,10 +1483,51 @@ static struct cpufreq_driver cpufreq_amd
.attr = powernow_k8_attr,
};
+/*
+ * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
+ * cannot block the remaining ones from boosting. On the CPU_UP path we
+ * simply keep the boost-disable flag in sync with the current global
+ * state.
+ */
+static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action,
+ void *hcpu)
+{
+ unsigned cpu = (long)hcpu;
+ u32 lo, hi;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+
+ if (!cpb_enabled) {
+ rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+ lo |= BIT(25);
+ wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+ }
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+ lo &= ~BIT(25);
+ wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cpb_nb = {
+ .notifier_call = cpb_notify,
+};
+
/* driver entry point for init */
static int __cpuinit powernowk8_init(void)
{
- unsigned int i, supported_cpus = 0;
+ unsigned int i, supported_cpus = 0, cpu;
for_each_online_cpu(i) {
int rc;
@@ -1423,15 +1536,36 @@ static int __cpuinit powernowk8_init(voi
supported_cpus++;
}
- if (supported_cpus == num_online_cpus()) {
- printk(KERN_INFO PFX "Found %d %s "
- "processors (%d cpu cores) (" VERSION ")\n",
- num_online_nodes(),
- boot_cpu_data.x86_model_id, supported_cpus);
- return cpufreq_register_driver(&cpufreq_amd64_driver);
+ if (supported_cpus != num_online_cpus())
+ return -ENODEV;
+
+ printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
+ num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
+
+ if (boot_cpu_has(X86_FEATURE_CPB)) {
+
+ cpb_capable = true;
+
+ register_cpu_notifier(&cpb_nb);
+
+ msrs = msrs_alloc();
+ if (!msrs) {
+ printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
+ return -ENOMEM;
+ }
+
+ rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ cpb_enabled |= !(!!(reg->l & BIT(25)));
+ }
+
+ printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
+ (cpb_enabled ? "on" : "off"));
}
- return -ENODEV;
+ return cpufreq_register_driver(&cpufreq_amd64_driver);
}
/* driver entry point for term */
@@ -1439,6 +1573,13 @@ static void __exit powernowk8_exit(void)
{
dprintk("exit\n");
+ if (boot_cpu_has(X86_FEATURE_CPB)) {
+ msrs_free(msrs);
+ msrs = NULL;
+
+ unregister_cpu_notifier(&cpb_nb);
+ }
+
cpufreq_unregister_driver(&cpufreq_amd64_driver);
}
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,7 +5,6 @@
* http://www.gnu.org/licenses/gpl.html
*/
-
enum pstate {
HW_PSTATE_INVALID = 0xff,
HW_PSTATE_0 = 0,
@@ -55,7 +54,6 @@ struct powernow_k8_data {
struct cpumask *available_cores;
};
-
/* processor's cpuid instruction support */
#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */
#define CPUID_XFAM 0x0ff00000 /* extended family */

@ -0,0 +1,33 @@
From: Takashi Iwai <tiwai@suse.de>
Subject: [PATCH] Fix build_error without CONFIG_PPC_83xx
Patch-mainline:
References:
fsl_deep_sleep() is defined only with CONFIG_PPC_83xx although
CONFIG_IPIC is set for CONFIG_PPC_MPC512x, too.
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
---
arch/powerpc/sysdev/ipic.c | 2 ++
1 file changed, 2 insertions(+)
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -921,6 +921,7 @@ static int ipic_suspend(struct sys_devic
ipic_saved_state.sermr = ipic_read(ipic->regs, IPIC_SERMR);
ipic_saved_state.sercr = ipic_read(ipic->regs, IPIC_SERCR);
+#ifdef CONFIG_PPC_83xx
if (fsl_deep_sleep()) {
/* In deep sleep, make sure there can be no
* pending interrupts, as this can cause
@@ -931,6 +932,7 @@ static int ipic_suspend(struct sys_devic
ipic_write(ipic->regs, IPIC_SEMSR, 0);
ipic_write(ipic->regs, IPIC_SERMR, 0);
}
+#endif
return 0;
}

@ -0,0 +1,19 @@
From: olh@suse.de
Subject: force speed to fix autodetection on pegasos2
Patch-mainline: never
---
arch/powerpc/platforms/chrp/setup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -294,7 +294,7 @@ static void chrp_init_early(void)
if (!property)
goto out_put;
if (!strcmp(property, "failsafe") || !strcmp(property, "serial"))
- add_preferred_console("ttyS", 0, NULL);
+ add_preferred_console("ttyS", 0, "115200");
out_put:
of_node_put(node);
}

@ -0,0 +1,77 @@
From: Olaf Hering <olh@suse.de>
Subject: new prom=nodisplay option to avoid crash in firmware on B50
Patch-mainline: not yet
add prom=nodisplay
avoid crash in firmware on IBM B50 when OF stdout is on serial.
0 > boot scsi/sd@4:1,yaboot |
yaboot starting: loaded at 00200000 00222530 (0/0/00c1a078; sp: 00efffd0)
brokenfirmware did not claim executable memory, fixed it myself
Config file 'yaboot.cnf' read, 213 bytes
Welcome to yaboot version 10.1.22-r945.SuSE
booted from '/pci@80000000/scsi@10/sd@4:1,yaboot'
Enter "help" to get some basic usage information
boot:
* linux
boot: linux 3
Please wait, loading kernel...
Allocated 00600000 bytes for executable @ 02000000
Elf32 kernel loaded...
Loading ramdisk...
ramdisk loaded 0030e057 @ 04100000
OF stdout device is: /pci@80000000/isa@b/serial@i3f8
command line: root=/dev/system/root xmon=on sysrq=1 quiet panic=12 3
memory layout at init:
memory_limit : 00000000 (16 MB aligned)
alloc_bottom : 0440f000
alloc_top : 30000000
alloc_top_hi : 40000000
rmo_top : 30000000
ram_top : 40000000
Looking for displays
found display : /pci@80000000/display@16, opening ...
Unexpected Firmware Error:
DEFAULT CATCH!, code=fff00300 at %SRR0: 00c18ccc %SRR1: 00003030
ok
0 > reset-all
---
arch/powerpc/kernel/prom_init.c | 10 ++++++++++
1 file changed, 10 insertions(+)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -169,6 +169,7 @@ static unsigned long __initdata dt_strin
static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static int __initdata prom_no_display;
#ifdef CONFIG_PPC64
static int __initdata prom_iommu_force_on;
static int __initdata prom_iommu_off;
@@ -570,6 +571,14 @@ static void __init early_cmdline_parse(v
#endif /* CONFIG_CMDLINE */
prom_printf("command line: %s\n", RELOC(prom_cmd_line));
+ opt = strstr(RELOC(prom_cmd_line), RELOC("prom="));
+ if (opt) {
+ opt += 5;
+ while (*opt && *opt == ' ')
+ opt++;
+ if (!strncmp(opt, RELOC("nodisplay"), 9))
+ RELOC(prom_no_display) = 1;
+ }
#ifdef CONFIG_PPC64
opt = strstr(RELOC(prom_cmd_line), RELOC("iommu="));
if (opt) {
@@ -2546,6 +2555,7 @@ unsigned long __init prom_init(unsigned
/*
* Initialize display devices
*/
+ if (RELOC(prom_no_display) == 0)
prom_check_displays();
#ifdef CONFIG_PPC64

@ -0,0 +1,36 @@
From: Olaf Hering <olh@novell.com>
Subject: [PATCH] poweroc: vio modalias
Patch-mainline: not yet
Acked-by: Olaf Hering <olh@novell.com>
---
arch/powerpc/kernel/vio.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1319,9 +1319,24 @@ static ssize_t devspec_show(struct devic
return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
}
+static ssize_t modalias_show (struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct device_node *of_node = dev->archdata.of_node;
+ const char *compat;
+ int i = 0;
+
+ if (of_node) {
+ compat = of_get_property(of_node, "compatible", &i);
+ i = sprintf (buf, "vio:T%sS%s\n", of_node->type, compat);
+ }
+ return i;
+}
+
static struct device_attribute vio_dev_attrs[] = {
__ATTR_RO(name),
__ATTR_RO(devspec),
+ __ATTR_RO(modalias),
__ATTR_NULL
};

@ -0,0 +1,42 @@
Date: Thu, 9 Oct 2008 11:20:27 -0400
From: Neil Horman <nhorman@tuxdriver.com>
To: linux-kernel@vger.kernel.org, kexec@lists.infradead.org,
vgoyal@redhat.com, hbabu@us.ibm.com
Subject: [PATCH] add additional symbols to /sys/kernel/vmcoreinfo data for
ppc(64)
Cc: nhorman@tuxdriver.com
Patch-mainline: not yet
Hey-
The makdumpdile dump filtering program, in some modes of operation needs
the node_data and/or contig_page_data symbols to function properly. These
symbols are missing from the powerpc kernel. This patch adds those symbols in
properly. Tested successfully by myself and the reporter.
Regards
Neil
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Bernhard Walle <bwalle@suse.de>
arch/powerpc/kernel/machine_kexec.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -45,6 +45,14 @@ void machine_kexec_cleanup(struct kimage
ppc_md.machine_kexec_cleanup(image);
}
+void arch_crash_save_vmcoreinfo(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+}
+
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.

@ -0,0 +1,132 @@
Subject: [PATCH] add syslog printing to xmon debugger.
From: Linas Vepstas <linas@austin.ibm.com>
Patch-mainline: Not yet
This patch 'dmesg'/printk log buffer printing to xmon. I find this
useful because crashes are almost always preceeded by interesting
printk's. This patch is simple & straightforward, except for one
possibly controversial aspect: it embeds a small snippet in
kernel/printk.c to return the location of the syslog. This is
needed because kallsyms and even CONFIG_KALLSYMS_ALL is not enough
to reveal the location of log_buf. This code is about 90%
cut-n-paste of earlier code from Keith Owens.
Signed-off-by: Olaf Hering <olh@suse.de>
---
arch/powerpc/xmon/xmon.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
kernel/printk.c | 15 ++++++++++++
2 files changed, 72 insertions(+)
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -138,6 +138,7 @@ static struct bpt *in_breakpoint_table(u
static int do_step(struct pt_regs *);
static void bpt_cmds(void);
static void cacheflush(void);
+static void xmon_show_dmesg(void);
static int cpu_cmd(void);
static void csum(void);
static void bootcmds(void);
@@ -194,6 +195,7 @@ Commands:\n\
#endif
"\
C checksum\n\
+ D show dmesg (printk) buffer\n\
d dump bytes\n\
di dump instructions\n\
df dump float values\n\
@@ -828,6 +830,9 @@ cmds(struct pt_regs *excp)
case 'd':
dump();
break;
+ case 'D':
+ xmon_show_dmesg();
+ break;
case 'l':
symbol_lookup();
break;
@@ -2599,6 +2604,58 @@ static void xmon_print_symbol(unsigned l
printf("%s", after);
}
+extern void debugger_syslog_data(char *syslog_data[4]);
+#define SYSLOG_WRAP(p) if (p < syslog_data[0]) p = syslog_data[1]-1; \
+ else if (p >= syslog_data[1]) p = syslog_data[0];
+
+static void xmon_show_dmesg(void)
+{
+ char *syslog_data[4], *start, *end, c;
+ int logsize;
+
+ /* syslog_data[0,1] physical start, end+1.
+ * syslog_data[2,3] logical start, end+1.
+ */
+ debugger_syslog_data(syslog_data);
+ if (syslog_data[2] == syslog_data[3])
+ return;
+ logsize = syslog_data[1] - syslog_data[0];
+ start = syslog_data[0] + (syslog_data[2] - syslog_data[0]) % logsize;
+ end = syslog_data[0] + (syslog_data[3] - syslog_data[0]) % logsize;
+
+ /* Do a line at a time (max 200 chars) to reduce overhead */
+ c = '\0';
+ while(1) {
+ char *p;
+ int chars = 0;
+ if (!*start) {
+ while (!*start) {
+ ++start;
+ SYSLOG_WRAP(start);
+ if (start == end)
+ break;
+ }
+ if (start == end)
+ break;
+ }
+ p = start;
+ while (*start && chars < 200) {
+ c = *start;
+ ++chars;
+ ++start;
+ SYSLOG_WRAP(start);
+ if (start == end || c == '\n')
+ break;
+ }
+ if (chars)
+ printf("%.*s", chars, p);
+ if (start == end)
+ break;
+ }
+ if (c != '\n')
+ printf("\n");
+}
+
#ifdef CONFIG_PPC_BOOK3S_64
static void dump_slb(void)
{
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -413,6 +413,21 @@ SYSCALL_DEFINE3(syslog, int, type, char
return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
}
+#ifdef CONFIG_DEBUG_KERNEL
+/* Its very handy to be able to view the syslog buffer during debug.
+ * But do_syslog() uses locks so it cannot be used during debugging.
+ * Instead, provide the start and end of the physical and logical logs.
+ * This is equivalent to do_syslog(3).
+ */
+void debugger_syslog_data(char *syslog_data[4])
+{
+ syslog_data[0] = log_buf;
+ syslog_data[1] = log_buf + log_buf_len;
+ syslog_data[2] = log_buf + log_end - (logged_chars < log_buf_len ? logged_chars : log_buf_len);
+ syslog_data[3] = log_buf + log_end;
+}
+#endif /* CONFIG_DEBUG_KERNEL */
+
/*
* Call the console drivers on a range of log_buf
*/

@ -0,0 +1,43 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: [PATCH] s390: Define FREE_PTE_NR
Patch-mainline: Never, unless FREE_PTE_NR is used in generic code
Commit ba8a9229ab9e80278c28ad68b15053f65b2b0a7c from
Martin Schwidefsky <schwidefsky@de.ibm.com> removed the
#include <asm-generic/tlb.h> from asm-s390/tlb.h when he defined the
s390-specific TLB operations.
FREE_PTR_NR is generally an internal-only value, but our unmap_vmas-lat
patch uses it to make smarter decisions about dumping PTEs in chunks.
This patch restores the generic value in asm-s390/tlb.h. Since it's only
used for an optimization, this should be safe.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
arch/s390/include/asm/tlb.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -34,6 +34,19 @@
#define TLB_NR_PTRS 508
#endif
+/* Lifted from asm-generic/tlb.h; Is used by patches.suse/unmap_vmas-lat */
+/*
+ * For UP we don't need to worry about TLB flush
+ * and page free order so much..
+ */
+#ifdef CONFIG_SMP
+ #define FREE_PTE_NR 506
+ #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
+#else
+ #define FREE_PTE_NR 1
+ #define tlb_fast_mode(tlb) 1
+#endif
+
struct mmu_gather {
struct mm_struct *mm;
unsigned int fullmm;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,87 @@
From: IBM <lcm@us.ibm.com>
Subject: Use apic=bigsmp on specific xseries machines
References: bnc#440497
Patch-Mainline: not yet
Signed-off-by: Thomas Renninger <trenn@suse.de>
arch/x86/kernel/apic/bigsmp_32.c | 30 +++++++++++++++++++++++++++---
arch/x86/kernel/apic/probe_32.c | 4 ++--
2 files changed, 29 insertions(+), 5 deletions(-)
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -156,7 +156,7 @@ static void bigsmp_send_IPI_all(int vect
static int dmi_bigsmp; /* can be set by dmi scanners */
-static int hp_ht_bigsmp(const struct dmi_system_id *d)
+static int force_bigsmp_apic(const struct dmi_system_id *d)
{
printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
dmi_bigsmp = 1;
@@ -166,17 +166,41 @@ static int hp_ht_bigsmp(const struct dmi
static const struct dmi_system_id bigsmp_dmi_table[] = {
- { hp_ht_bigsmp, "HP ProLiant DL760 G2",
+ { force_bigsmp_apic, "HP ProLiant DL760 G2",
{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
}
},
- { hp_ht_bigsmp, "HP ProLiant DL740",
+ { force_bigsmp_apic, "HP ProLiant DL740",
{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
}
},
+
+ { force_bigsmp_apic, "IBM x260 / x366 / x460",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "-[ZT"),
+ }
+ },
+
+ { force_bigsmp_apic, "IBM x3800 / x3850 / x3950",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "-[ZU"),
+ }
+ },
+
+ { force_bigsmp_apic, "IBM x3800 / x3850 / x3950",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "-[ZS"),
+ }
+ },
+
+ { force_bigsmp_apic, "IBM x3850 M2 / x3950 M2",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BIOS_VERSION, "-[A3"),
+ }
+ },
{ } /* NULL entry stops DMI scanning */
};
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -267,7 +267,7 @@ generic_mps_oem_check(struct mpc_table *
if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
continue;
- if (!cmdline_apic) {
+ if (!cmdline_apic && apic == &apic_default) {
apic = apic_probe[i];
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);
@@ -287,7 +287,7 @@ int __init default_acpi_madt_oem_check(c
if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
continue;
- if (!cmdline_apic) {
+ if (!cmdline_apic && apic == &apic_default) {
apic = apic_probe[i];
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);

@ -0,0 +1,49 @@
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 31 Mar 2010 19:56:41 +0000 (+0200)
Subject: x86, cpu: Add AMD core boosting feature flag to /proc/cpuinfo
Git-commit: 5958f1d5d722df7a9e5d129676614a8e5219bacd
Patch-mainline: 2.6.35-rc1
x86, cpu: Add AMD core boosting feature flag to /proc/cpuinfo
By semi-popular demand, this adds the Core Performance Boost feature
flag to /proc/cpuinfo. Possible use case for this is userspace tools
like cpufreq-aperf, for example, so that they don't have to jump through
hoops of accessing "/dev/cpu/%d/cpuid" in order to check for CPB hw
support, or call cpuid from userspace.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <1270065406-1814-2-git-send-email-bp@amd64.org>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/kernel/cpu/addon_cpuid_features.c | 5 +++--
2 files changed, 4 insertions(+), 2 deletions(-)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -161,6 +161,7 @@
*/
#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
+#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */
/* Virtualization flags: Linux defined */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -30,8 +30,9 @@ void __cpuinit init_scattered_cpuid_feat
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
- { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
- { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
+ { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
+ { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },

@ -0,0 +1,222 @@
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Thu, 18 Mar 2010 17:41:46 +0000 (+0100)
Subject: x86, cpufreq: Add APERF/MPERF support for AMD processors
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git
Git-commit: a2fed573f065e526bfd5cbf26e5491973d9e9aaa
References: bnc#602209
Patch-mainline: 2.6.35-rc1
x86, cpufreq: Add APERF/MPERF support for AMD processors
Starting with model 10 of Family 0x10, AMD processors may have
support for APERF/MPERF. Add support for identifying it and using
it within cpufreq. Move the APERF/MPERF functions out of the
acpi-cpufreq code and into their own file so they can easily be
shared.
Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
LKML-Reference: <20100401141956.GA1930@aftab>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/x86/kernel/cpu/cpufreq/Makefile | 4 +-
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 44 +------------------------
arch/x86/kernel/cpu/cpufreq/mperf.c | 51 +++++++++++++++++++++++++++++
arch/x86/kernel/cpu/cpufreq/mperf.h | 9 +++++
arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 8 ++++
5 files changed, 72 insertions(+), 44 deletions(-)
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -2,8 +2,8 @@
# K8 systems. ACPI is preferred to all other hardware-specific drivers.
# speedstep-* is preferred over p4-clockmod.
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -45,6 +45,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include "mperf.h"
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
"acpi-cpufreq", msg)
@@ -70,8 +71,6 @@ struct acpi_cpufreq_data {
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
-
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -239,45 +238,6 @@ static u32 get_cur_val(const struct cpum
return cmd.val;
}
-/* Called via smp_call_function_single(), on the target CPU */
-static void read_measured_perf_ctrs(void *_cur)
-{
- struct aperfmperf *am = _cur;
-
- get_aperfmperf(am);
-}
-
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
-static unsigned int get_measured_perf(struct cpufreq_policy *policy,
- unsigned int cpu)
-{
- struct aperfmperf perf;
- unsigned long ratio;
- unsigned int retval;
-
- if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
- return 0;
-
- ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
- per_cpu(acfreq_old_perf, cpu) = perf;
-
- retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
-
- return retval;
-}
-
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
@@ -699,7 +659,7 @@ static int acpi_cpufreq_cpu_init(struct
/* Check for APERF/MPERF support in hardware */
if (cpu_has(c, X86_FEATURE_APERFMPERF))
- acpi_cpufreq_driver.getavg = get_measured_perf;
+ acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
dprintk("CPU%u - ACPI performance management activated.\n", cpu);
for (i = 0; i < perf->state_count; i++)
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/mperf.c
@@ -0,0 +1,51 @@
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+
+#include "mperf.h"
+
+static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
+
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
+{
+ struct aperfmperf *am = _cur;
+
+ get_aperfmperf(am);
+}
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+ unsigned int cpu)
+{
+ struct aperfmperf perf;
+ unsigned long ratio;
+ unsigned int retval;
+
+ if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
+ return 0;
+
+ ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+ per_cpu(acfreq_old_perf, cpu) = perf;
+
+ retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
+MODULE_LICENSE("GPL");
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/mperf.h
@@ -0,0 +1,9 @@
+/*
+ * (c) 2010 Advanced Micro Devices, Inc.
+ * Your use of this code is subject to the terms and conditions of the
+ * GNU general public license version 2. See "COPYING" or
+ * http://www.gnu.org/licenses/gpl.html
+ */
+
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+ unsigned int cpu);
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -45,6 +45,7 @@
#define PFX "powernow-k8: "
#define VERSION "version 2.20.00"
#include "powernow-k8.h"
+#include "mperf.h"
/* serialize freq changes */
static DEFINE_MUTEX(fidvid_mutex);
@@ -57,6 +58,8 @@ static int cpu_family = CPU_OPTERON;
static bool cpb_capable, cpb_enabled;
static struct msr *msrs;
+static struct cpufreq_driver cpufreq_amd64_driver;
+
#ifndef CONFIG_SMP
static inline const struct cpumask *cpu_core_mask(int cpu)
{
@@ -1252,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init
struct powernow_k8_data *data;
struct init_on_cpu init_on_cpu;
int rc;
+ struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
if (!cpu_online(pol->cpu))
return -ENODEV;
@@ -1326,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init
return -EINVAL;
}
+ /* Check for APERF/MPERF support in hardware */
+ if (cpu_has(c, X86_FEATURE_APERFMPERF))
+ cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
+
cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
if (cpu_family == CPU_HW_PSTATE)

@ -0,0 +1,26 @@
From: Takashi Iwai <tiwai@suse.de>
Subject: x86: workaround for mccreary HPET read problem
Patch-mainline: not yet
References: bnc#433746
On mccreacy platform, the read of HPET CMP register seems not updated
immediately after the write and returns the previous value instead.
A workaround is to read the register twice.
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
---
arch/x86/kernel/hpet.c | 1 +
1 file changed, 1 insertion(+)
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -385,6 +385,7 @@ static int hpet_next_event(unsigned long
cnt += (u32) delta;
hpet_writel(cnt, HPET_Tn_CMP(timer));
+ hpet_readl(HPET_Tn_CMP(timer)); /* pre-read for bnc#433746 */
/*
* We need to read back the CMP register on certain HPET
* implementations (ATI chipsets) which seem to delay the

@ -0,0 +1,38 @@
From: Tejun Heo <tj@kernel.org>
Subject: x86: disallow DAC for MCP51 PCI bridge
References: bnc#463829
Patch-mainline: not yet
MCP51 corrupts DAC transfers. Disallow it. Reported by pgnet on
bnc#463829.
https://bugzilla.novell.com/show_bug.cgi?id=463829
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: pgnet <pgnet.trash@gmail.com>
Signed-off-by: Tejun Heo <teheo@suse.de>
---
arch/x86/kernel/pci-dma.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -318,4 +318,18 @@ static __devinit void via_no_dac(struct
}
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+
+/*
+ * MCP51 PCI bridge corrupts data for DAC. Disable it. Reported in
+ * bnc#463829.
+ */
+static __devinit void mcp51_no_dac(struct pci_dev *dev)
+{
+ if (forbid_dac == 0) {
+ printk(KERN_INFO
+ "PCI: MCP51 PCI bridge detected. Disabling DAC.\n");
+ forbid_dac = 1;
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x026f, mcp51_no_dac);
#endif

@ -0,0 +1,223 @@
From: Jiri Bohac <jbohac@suse.cz>
Subject: allow 64-bit mode for HPET Timer0
References: bnc#456700
The kernel uses the HPET timers in 32-bit mode for clock-events.
While 32 bits, with a wrap-around time of >4 minutes, is probably
good enough for the clock-event purposes, on some chipsets this
has a negative side-effect on the HPET main counter.
Unlike the original HPET specification 1.0 from 2004, which does not
mention any side-effects of setting TN_32MODE_CNF on the
individual timers, the ICH9 documentation, for example, says:
NOTE: When this bit is set to 1, the hardware counter will
do a 32-bit operation on comparator match and rollovers, thus
the upper 32-bit of the Timer 0 Comparator Value register is
ignored. The upper 32-bit of the main counter is not involved
in any rollover from lower 32-bit of the main counter and
becomes all zeros.
(see http://www.intel.com/assets/pdf/datasheet/316972.pdf, page
819, section 21.1.5, Bit 8). I've seen this behaviour also on
ICH8. I have no idea what other chipsets are affected. But I have
seen AMD chipsets that Do The Right Thing.
This means, that when the kernel configures the Timer 0 to 32-bit
mode, on these chipsets it also cripples the 64-bit main counter
to 32 bits.
The HPET may be mmapped in userspace and the main counter
accessed directly by applications, expecting a 64-bit main
counter.
This patch allows the Timer0 to be configured in 64-bit mode
on x86_64 when a hpet64 command-line option is specified.
Updated-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
---
Documentation/kernel-parameters.txt | 2
arch/x86/kernel/hpet.c | 88 ++++++++++++++++++++++++++++++++----
2 files changed, 81 insertions(+), 9 deletions(-)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -497,6 +497,8 @@ and is between 256 and 4096 characters.
Range: 0 - 8192
Default: 64
+ hpet64 [X86-64,HPET] enable 64-bit mode of the HPET timer (bnc#456700)
+
com20020= [HW,NET] ARCnet - COM20020 chipset
Format:
<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -37,6 +37,7 @@ unsigned long hpet_address;
static unsigned long hpet_num_timers;
#endif
static void __iomem *hpet_virt_address;
+static int hpet_legacy_use_64_bits;
struct hpet_dev {
struct clock_event_device evt;
@@ -59,6 +60,33 @@ static inline void hpet_writel(unsigned
#ifdef CONFIG_X86_64
#include <asm/pgtable.h>
+static inline unsigned long hpet_read_value(unsigned long a)
+{
+ if (hpet_legacy_use_64_bits)
+ return readq(hpet_virt_address + a);
+ else
+ return readl(hpet_virt_address + a);
+}
+
+static void hpet_write_value(unsigned long d, unsigned long a)
+{
+ if (hpet_legacy_use_64_bits)
+ writeq(d, hpet_virt_address + a);
+ else
+ writel(d, hpet_virt_address + a);
+}
+
+#else
+
+static inline unsigned long hpet_read_value(unsigned long a)
+{
+ return readl(hpet_virt_address + a);
+}
+
+static void hpet_write_value(unsigned long d, unsigned long a)
+{
+ writel(d, hpet_virt_address + a);
+}
#endif
static inline void hpet_set_mapping(void)
@@ -103,6 +131,17 @@ static int __init disable_hpet(char *str
}
__setup("nohpet", disable_hpet);
+#ifdef CONFIG_X86_64
+static int hpet64 = 0;
+static int __init hpet64_setup(char *str)
+{
+ hpet64 = 1;
+ return 1;
+}
+__setup("hpet64", hpet64_setup);
+#endif
+
+
static inline int is_hpet_capable(void)
{
return !boot_hpet_disable && hpet_address;
@@ -212,6 +251,7 @@ static void hpet_reserve_platform_timers
* Common hpet info
*/
static unsigned long hpet_period;
+static int hpet_legacy_use_64_bits; /* configure T0 in 64-bit mode? */
static void hpet_legacy_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt);
@@ -278,10 +318,38 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = 1;
}
+static int timer0_use_64_bits(void)
+{
+#ifndef CONFIG_X86_64
+ /* using the HPET in 64-bit mode without atomic 64-bit
+ * accesses is too inefficient
+ */
+ return 0;
+#else
+
+ if (unlikely(hpet64)) {
+ u32 id, t0_cfg;
+ id = hpet_readl(HPET_ID);
+ t0_cfg = hpet_readl(HPET_Tn_CFG(0));
+
+ if ((id & HPET_ID_64BIT) && (t0_cfg & HPET_TN_64BIT_CAP)) {
+ printk(KERN_DEBUG "hpet timer0 configured in 64-bit mode\n");
+ return 1;
+ }
+ else {
+ printk(KERN_DEBUG "hpet timer0 does not support 64-bit mode\n");
+ return 0;
+ }
+ }
+ else return 0;
+#endif
+}
+
static void hpet_legacy_clockevent_register(void)
{
/* Start HPET legacy interrupts */
hpet_enable_legacy_int();
+ hpet_legacy_use_64_bits = timer0_use_64_bits();
/*
* The mult factor is defined as (include/linux/clockchips.h)
@@ -328,9 +396,10 @@ static void hpet_set_mode(enum clock_eve
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
- HPET_TN_SETVAL | HPET_TN_32BIT;
+ HPET_TN_SETVAL |
+ (hpet_legacy_use_64_bits ? 0 : HPET_TN_32BIT);
hpet_writel(cfg, HPET_Tn_CFG(timer));
- hpet_writel(cmp, HPET_Tn_CMP(timer));
+ hpet_write_value(cmp, HPET_Tn_CMP(timer));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -339,7 +408,7 @@ static void hpet_set_mode(enum clock_eve
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ hpet_write_value((unsigned long) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
break;
@@ -347,7 +416,8 @@ static void hpet_set_mode(enum clock_eve
case CLOCK_EVT_MODE_ONESHOT:
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_PERIODIC;
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ cfg |= HPET_TN_ENABLE |
+ (hpet_legacy_use_64_bits ? 0 : HPET_TN_32BIT);
hpet_writel(cfg, HPET_Tn_CFG(timer));
break;
@@ -376,11 +446,11 @@ static void hpet_set_mode(enum clock_eve
static int hpet_next_event(unsigned long delta,
struct clock_event_device *evt, int timer)
{
- u32 cnt;
+ unsigned long cnt;
- cnt = hpet_readl(HPET_COUNTER);
+ cnt = hpet_read_value(HPET_COUNTER);
cnt += (u32) delta;
- hpet_writel(cnt, HPET_Tn_CMP(timer));
+ hpet_write_value(cnt, HPET_Tn_CMP(timer));
hpet_readl(HPET_Tn_CMP(timer)); /* pre-read for bnc#433746 */
/*
@@ -388,9 +458,9 @@ static int hpet_next_event(unsigned long
* what we wrote hit the chip before we compare it to the
* counter.
*/
- WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
+ WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != (u32)cnt);
- return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
+ return (s32)((u32)hpet_readl(HPET_COUNTER) - (u32)cnt) >= 0 ? -ETIME : 0;
}
static void hpet_legacy_set_mode(enum clock_event_mode mode,

@ -0,0 +1,447 @@
From: jbeulich@novell.com
Subject: fix unwind annotations
Patch-mainline: tbd
References: bnc#472783, bnc#588458
---
arch/x86/kernel/entry_64.S | 193 +++++++++++++++++++++++----------------------
arch/x86/kernel/head_64.S | 13 +++
2 files changed, 115 insertions(+), 91 deletions(-)
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -38,6 +38,7 @@
*/
#include <linux/linkage.h>
+#include <linux/stringify.h>
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
@@ -240,21 +241,21 @@ ENDPROC(native_usergs_sysret64)
/*
* initial frame state for interrupts (and exceptions without error code)
*/
- .macro EMPTY_FRAME start=1 offset=0
- .if \start
+ .macro EMPTY_FRAME offset=0
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,8+\offset
- .else
- CFI_DEF_CFA_OFFSET 8+\offset
- .endif
+ CFI_DEF_CFA rsp,\offset
.endm
/*
* initial frame state for interrupts (and exceptions without error code)
*/
.macro INTR_FRAME start=1 offset=0
- EMPTY_FRAME \start, SS+8+\offset-RIP
+ .if \start
+ EMPTY_FRAME __stringify(SS+8+\offset-RIP)
+ .else
+ CFI_DEF_CFA_OFFSET SS+8+\offset-RIP
+ .endif
/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
CFI_REL_OFFSET rsp, RSP+\offset-RIP
/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
@@ -267,15 +268,16 @@ ENDPROC(native_usergs_sysret64)
* with vector already pushed)
*/
.macro XCPT_FRAME start=1 offset=0
- INTR_FRAME \start, RIP+\offset-ORIG_RAX
- /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
+ INTR_FRAME \start, __stringify(RIP+\offset-ORIG_RAX)
.endm
/*
* frame that enables calling into C.
*/
.macro PARTIAL_FRAME start=1 offset=0
- XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
+ .if \start >= 0
+ XCPT_FRAME \start, __stringify(ORIG_RAX+\offset-ARGOFFSET)
+ .endif
CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
@@ -291,7 +293,9 @@ ENDPROC(native_usergs_sysret64)
* frame that enables passing a complete pt_regs to a C function.
*/
.macro DEFAULT_FRAME start=1 offset=0
- PARTIAL_FRAME \start, R11+\offset-R15
+ .if \start >= -1
+ PARTIAL_FRAME \start, __stringify(R11+\offset-R15)
+ .endif
CFI_REL_OFFSET rbx, RBX+\offset
CFI_REL_OFFSET rbp, RBP+\offset
CFI_REL_OFFSET r12, R12+\offset
@@ -302,21 +306,23 @@ ENDPROC(native_usergs_sysret64)
/* save partial stack frame */
ENTRY(save_args)
- XCPT_FRAME
+ XCPT_FRAME offset=__stringify(ORIG_RAX-ARGOFFSET+16)
cld
- movq_cfi rdi, RDI+16-ARGOFFSET
- movq_cfi rsi, RSI+16-ARGOFFSET
- movq_cfi rdx, RDX+16-ARGOFFSET
- movq_cfi rcx, RCX+16-ARGOFFSET
- movq_cfi rax, RAX+16-ARGOFFSET
- movq_cfi r8, R8+16-ARGOFFSET
- movq_cfi r9, R9+16-ARGOFFSET
- movq_cfi r10, R10+16-ARGOFFSET
- movq_cfi r11, R11+16-ARGOFFSET
+ movq %rdi, RDI+16-ARGOFFSET(%rsp)
+ movq %rsi, RSI+16-ARGOFFSET(%rsp)
+ movq %rdx, RDX+16-ARGOFFSET(%rsp)
+ movq %rcx, RCX+16-ARGOFFSET(%rsp)
+ movq_cfi rax, __stringify(RAX+16-ARGOFFSET)
+ movq %r8, R8+16-ARGOFFSET(%rsp)
+ movq %r9, R9+16-ARGOFFSET(%rsp)
+ movq %r10, R10+16-ARGOFFSET(%rsp)
+ movq_cfi r11, __stringify(R11+16-ARGOFFSET)
leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
movq_cfi rbp, 8 /* push %rbp */
leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
+ CFI_DEF_CFA_REGISTER rbp
+ CFI_ADJUST_CFA_OFFSET -8
testl $3, CS(%rdi)
je 1f
SWAPGS
@@ -328,11 +334,10 @@ ENTRY(save_args)
*/
1: incl PER_CPU_VAR(irq_count)
jne 2f
- popq_cfi %rax /* move return address... */
+ popq %rax /* move return address... */
mov PER_CPU_VAR(irq_stack_ptr),%rsp
- EMPTY_FRAME 0
- pushq_cfi %rbp /* backlink for unwinder */
- pushq_cfi %rax /* ... to the new stack */
+ pushq %rbp /* backlink for unwinder */
+ pushq %rax /* ... to the new stack */
/*
* We entered an interrupt context - irqs are off:
*/
@@ -342,14 +347,14 @@ ENTRY(save_args)
END(save_args)
ENTRY(save_rest)
- PARTIAL_FRAME 1 REST_SKIP+8
+ CFI_STARTPROC
movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
+ movq %rbx, RBX+16(%rsp)
+ movq %rbp, RBP+16(%rsp)
+ movq %r12, R12+16(%rsp)
+ movq %r13, R13+16(%rsp)
+ movq %r14, R14+16(%rsp)
+ movq %r15, R15+16(%rsp)
movq %r11, 8(%rsp) /* return address */
FIXUP_TOP_OF_STACK %r11, 16
ret
@@ -359,23 +364,23 @@ END(save_rest)
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
- XCPT_FRAME 1 RDI+8
+ XCPT_FRAME offset=__stringify(ORIG_RAX-R15+8)
cld
- movq_cfi rdi, RDI+8
- movq_cfi rsi, RSI+8
- movq_cfi rdx, RDX+8
- movq_cfi rcx, RCX+8
- movq_cfi rax, RAX+8
- movq_cfi r8, R8+8
- movq_cfi r9, R9+8
- movq_cfi r10, R10+8
- movq_cfi r11, R11+8
- movq_cfi rbx, RBX+8
- movq_cfi rbp, RBP+8
- movq_cfi r12, R12+8
- movq_cfi r13, R13+8
- movq_cfi r14, R14+8
- movq_cfi r15, R15+8
+ movq %rdi, RDI+8(%rsp)
+ movq %rsi, RSI+8(%rsp)
+ movq_cfi rdx, __stringify(RDX+8)
+ movq_cfi rcx, __stringify(RCX+8)
+ movq_cfi rax, __stringify(RAX+8)
+ movq %r8, R8+8(%rsp)
+ movq %r9, R9+8(%rsp)
+ movq %r10, R10+8(%rsp)
+ movq %r11, R11+8(%rsp)
+ movq_cfi rbx, __stringify(RBX+8)
+ movq %rbp, RBP+8(%rsp)
+ movq %r12, R12+8(%rsp)
+ movq %r13, R13+8(%rsp)
+ movq %r14, R14+8(%rsp)
+ movq %r15, R15+8(%rsp)
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
@@ -685,7 +690,7 @@ ENTRY(\label)
subq $REST_SKIP, %rsp
CFI_ADJUST_CFA_OFFSET REST_SKIP
call save_rest
- DEFAULT_FRAME 0 8 /* offset 8: return address */
+ DEFAULT_FRAME -2 8 /* offset 8: return address */
leaq 8(%rsp), \arg /* pt_regs pointer */
call \func
jmp ptregscall_common
@@ -702,12 +707,12 @@ END(\label)
ENTRY(ptregscall_common)
DEFAULT_FRAME 1 8 /* offset 8: return address */
RESTORE_TOP_OF_STACK %r11, 8
- movq_cfi_restore R15+8, r15
- movq_cfi_restore R14+8, r14
- movq_cfi_restore R13+8, r13
- movq_cfi_restore R12+8, r12
- movq_cfi_restore RBP+8, rbp
- movq_cfi_restore RBX+8, rbx
+ movq_cfi_restore __stringify(R15+8), r15
+ movq_cfi_restore __stringify(R14+8), r14
+ movq_cfi_restore __stringify(R13+8), r13
+ movq_cfi_restore __stringify(R12+8), r12
+ movq_cfi_restore __stringify(RBP+8), rbp
+ movq_cfi_restore __stringify(RBX+8), rbx
ret $REST_SKIP /* pop extended registers */
CFI_ENDPROC
END(ptregscall_common)
@@ -719,9 +724,8 @@ END(ptregscall_common)
ENTRY(stub_execve)
CFI_STARTPROC
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
+ addq $8, %rsp
+ PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
movq %rsp, %rcx
@@ -740,7 +744,7 @@ END(stub_execve)
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
+ PARTIAL_FRAME 0
SAVE_REST
movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
@@ -796,10 +805,12 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- subq $10*8, %rsp
- CFI_ADJUST_CFA_OFFSET 10*8
+ subq $ORIG_RAX-ARGOFFSET+8, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
call save_args
- PARTIAL_FRAME 0
+ PARTIAL_FRAME -1 8
+ CFI_REL_OFFSET rbp, 0
+ CFI_DEF_CFA_REGISTER rbp
call \func
.endm
@@ -1036,10 +1047,10 @@ ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
- subq $15*8,%rsp
- CFI_ADJUST_CFA_OFFSET 15*8
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call error_entry
- DEFAULT_FRAME 0
+ DEFAULT_FRAME -1
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
call \do_sym
@@ -1054,8 +1065,10 @@ ENTRY(\sym)
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
- subq $15*8, %rsp
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
+ DEFAULT_FRAME -1
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
@@ -1071,8 +1084,10 @@ ENTRY(\sym)
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
- subq $15*8, %rsp
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
+ DEFAULT_FRAME -1
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
@@ -1089,10 +1104,10 @@ END(\sym)
ENTRY(\sym)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- subq $15*8,%rsp
- CFI_ADJUST_CFA_OFFSET 15*8
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call error_entry
- DEFAULT_FRAME 0
+ DEFAULT_FRAME -1
movq %rsp,%rdi /* pt_regs pointer */
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
@@ -1107,10 +1122,10 @@ END(\sym)
ENTRY(\sym)
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- subq $15*8,%rsp
- CFI_ADJUST_CFA_OFFSET 15*8
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
- DEFAULT_FRAME 0
+ DEFAULT_FRAME -1
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
movq ORIG_RAX(%rsp),%rsi /* get error code */
@@ -1426,7 +1441,7 @@ paranoidzeroentry machine_check *machine
/* ebx: no swapgs flag */
ENTRY(paranoid_exit)
- INTR_FRAME
+ DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %ebx,%ebx /* swapgs needed? */
@@ -1476,25 +1491,24 @@ END(paranoid_exit)
* returns in "no swapgs flag" in %ebx.
*/
ENTRY(error_entry)
- XCPT_FRAME
- CFI_ADJUST_CFA_OFFSET 15*8
+ XCPT_FRAME offset=__stringify(ORIG_RAX-R15+8)
/* oldrax contains error code */
cld
- movq_cfi rdi, RDI+8
- movq_cfi rsi, RSI+8
- movq_cfi rdx, RDX+8
- movq_cfi rcx, RCX+8
- movq_cfi rax, RAX+8
- movq_cfi r8, R8+8
- movq_cfi r9, R9+8
- movq_cfi r10, R10+8
- movq_cfi r11, R11+8
- movq_cfi rbx, RBX+8
- movq_cfi rbp, RBP+8
- movq_cfi r12, R12+8
- movq_cfi r13, R13+8
- movq_cfi r14, R14+8
- movq_cfi r15, R15+8
+ movq %rdi, RDI+8(%rsp)
+ movq %rsi, RSI+8(%rsp)
+ movq %rdx, RDX+8(%rsp)
+ movq %rcx, RCX+8(%rsp)
+ movq %rax, RAX+8(%rsp)
+ movq %r8, R8+8(%rsp)
+ movq %r9, R9+8(%rsp)
+ movq %r10, R10+8(%rsp)
+ movq %r11, R11+8(%rsp)
+ movq_cfi rbx, __stringify(RBX+8)
+ movq %rbp, RBP+8(%rsp)
+ movq %r12, R12+8(%rsp)
+ movq %r13, R13+8(%rsp)
+ movq %r14, R14+8(%rsp)
+ movq %r15, R15+8(%rsp)
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
@@ -1503,7 +1517,6 @@ error_swapgs:
error_sti:
TRACE_IRQS_OFF
ret
- CFI_ENDPROC
/*
* There are two places in the kernel that can potentially fault with
@@ -1513,6 +1522,7 @@ error_sti:
* compat mode. Check for these here too.
*/
error_kernelspace:
+ CFI_REL_OFFSET rcx, RCX+8
incl %ebx
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
@@ -1528,6 +1542,7 @@ bstep_iret:
/* Fix truncated RIP */
movq %rcx,RIP+8(%rsp)
jmp error_swapgs
+ CFI_ENDPROC
END(error_entry)
@@ -1556,10 +1571,10 @@ ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1
- subq $15*8, %rsp
- CFI_ADJUST_CFA_OFFSET 15*8
+ subq $ORIG_RAX-R15, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
- DEFAULT_FRAME 0
+ DEFAULT_FRAME -1
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -285,6 +285,8 @@ early_idt_handlers:
ENTRY(early_idt_handler)
#ifdef CONFIG_EARLY_PRINTK
+#include <asm/calling.h>
+#include <asm/dwarf2.h>
cmpl $2,early_recursion_flag(%rip)
jz 1f
incl early_recursion_flag(%rip)
@@ -300,6 +302,16 @@ ENTRY(early_idt_handler)
testl $0x27d00,%eax
je 0f
popq %r8 # get error code
+
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp, SS+8-RIP
+# CFI_REL_OFFSET ss, SS-RIP
+ CFI_REL_OFFSET rsp, RSP-RIP
+# CFI_REL_OFFSET rflags, EFLAGS-RIP
+# CFI_REL_OFFSET cs, CS-RIP
+ CFI_REL_OFFSET rip, RIP-RIP
+
0: movq 0(%rsp),%rcx # get ip
movq 8(%rsp),%rdx # get cs
xorl %eax,%eax
@@ -313,6 +325,7 @@ ENTRY(early_idt_handler)
movq 0(%rsp),%rsi # get rip again
call __print_symbol
#endif
+ CFI_ENDPROC
#endif /* EARLY_PRINTK */
1: hlt
jmp 1b

@ -0,0 +1,74 @@
From: Brandon Philips <bphilips@suse.de>
Subject: Avoid oops on G33 in 1MB stolen Mem case
References: bnc#391261
Patch-Mainline: soon (see bug for ref)
This is similar to f443675affe3f16dd428e46f0f7fd3f4d703eeab which was
reverted because it broke with older XOrg driver. This patch only fixes
the 1MB stolen case since it causes an oops due to a calculation
problem.
This will not work with older X drivers without the accompanying patch
but I think avoiding an oops and making it possible to work with an
up-to-date xorg driver is reasonable.
Explanation of the oops:
> static void intel_i830_init_gtt_entries(void)
...
> } else if (IS_G33) {
> /* G33's GTT size defined in gmch_ctrl */
> switch (gmch_ctrl & G33_PGETBL_SIZE_MASK) {
> case G33_PGETBL_SIZE_1M:
> size = 1024;
> break;
...
> size += 4;
size = 1028
Then since we have the BIOS setting 1MB for the device in the GMCH
control we get to here:
> } else {
> switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
> case I855_GMCH_GMS_STOLEN_1M:
> gtt_entries = MB(1) - KB(size);
> break;
MB(1) = 1 * 1024 * 1024
KB(1028) = 1028 * 1024
MB(1) - KB(1028) = -4096
> gtt_entries /= KB(4);
> intel_private.gtt_entries = gtt_entries;
We end up with -1 in gtt_entries.
This leads to intel_i915_configure reading/writing to areas outside of
mapped memory and the oops.
Signed-off-by: Brandon Philips <bphilips@suse.de>
Acked-by: Thomas Renninger <trenn@suse.de>
---
drivers/char/agp/intel-agp.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -801,6 +801,13 @@ static void intel_i830_init_gtt_entries(
} else {
switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
case I855_GMCH_GMS_STOLEN_1M:
+ if (IS_G33) {
+ size = 0;
+ printk(KERN_WARNING PFX
+ "Warning: G33 chipset with 1MB"
+ " allocated. Older X.org Intel drivers"
+ " will not work.\n");
+ }
gtt_entries = MB(1) - KB(size);
break;
case I855_GMCH_GMS_STOLEN_4M:

@ -0,0 +1,581 @@
From: Andi Kleen <andi@firstfloor.org>
Subject: x86, mce: Xeon75xx specific interface to get corrected memory error information
Patch-Mainline: submitted to x86-tip, added but reverted due to a minor compile issue
which gets fixed by and incremental patch
References: bnc#573380, fate#307738
http://lkml.org/lkml/2010/1/22/98
Xeon 75xx doesn't log physical addresses on corrected machine check
events in the standard architectural MSRs. Instead the address has to
be retrieved in a model specific way. This makes it impossible to do
predictive failure analysis.
Implement cpu model specific code to do this in mce-xeon75xx.c using a
new hook that is called from the generic poll code. The code retrieves
the physical address/DIMM of the last corrected error from the
platform and makes the address look like a standard architectural MCA
address for further processing.
In addition the DIMM information is retrieved and put into two new
aux0/aux1 fields in struct mce. These fields are specific to a given
CPU. These fields can then be decoded by mcelog into specific DIMM
information. The latest mcelog version has support for this.
Longer term this will be likely in a different output format, but
short term that seemed like the least intrusive solution. Older mcelog
can deal with an extended record.
There's no code to print this information on a panic because this only
works for corrected errors, and corrected errors do not usually result
in panics.
The act of retrieving the DIMM/PA information can take some time, so
this code has a rate limit to avoid taking too much CPU time on a
error flood.
The whole thing can be loaded as a module and has suitable PCI-IDs so
that it can be auto-loaded by a distribution. The code also checks
explicitely for the expected CPU model number to make sure this code
doesn't run anywhere else.
Signed-off-by: Thomas Renninger <trenn@suse.de>
---
arch/x86/Kconfig | 8
arch/x86/include/asm/mce.h | 2
arch/x86/kernel/cpu/mcheck/Makefile | 1
arch/x86/kernel/cpu/mcheck/mce-internal.h | 1
arch/x86/kernel/cpu/mcheck/mce-xeon75xx.c | 427 ++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/mcheck/mce.c | 11
arch/x86/kernel/e820.c | 3
7 files changed, 452 insertions(+), 1 deletion(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -919,6 +919,14 @@ config X86_MCE_INTEL
Additional support for intel specific MCE features such as
the thermal monitor.
+config X86_MCE_XEON75XX
+ tristate "Intel Xeon 7500 series corrected memory error driver"
+ depends on X86_MCE_INTEL
+ ---help---
+ Add support for a Intel Xeon 7500 series specific memory error driver.
+ This allows to report the DIMM and physical address on a corrected
+ memory error machine check event.
+
config X86_MCE_AMD
def_bool y
prompt "AMD MCE features"
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -67,6 +67,8 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 aux0; /* model specific */
+ __u64 aux1; /* model specific */
};
/*
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -2,6 +2,7 @@ obj-y = mce.o mce-severity.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
+obj-$(CONFIG_X86_MCE_XEON75XX) += mce-xeon75xx.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,4 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
+extern void (*cpu_specific_poll)(struct mce *);
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-xeon75xx.c
@@ -0,0 +1,427 @@
+/*
+ * Xeon 7500 series specific machine check support code.
+ * Copyright 2009, 2010 Intel Corporation
+ * Author: Andi Kleen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Implement Xeon 7500 series specific code to retrieve the physical address
+ * and DIMM information for corrected memory errors.
+ *
+ * Interface: mce->aux0/aux1 is mapped to a struct pfa_dimm with pad
+ * redefined to DIMM valid bits. Consumers check CPUID and bank and
+ * then interpret aux0/aux1
+ */
+
+/* #define DEBUG 1 */ /* disable for production */
+#define pr_fmt(x) "MCE: " x
+
+#include <linux/moduleparam.h>
+#include <linux/pci_ids.h>
+#include <linux/hrtimer.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <asm/processor.h>
+#include <asm/e820.h>
+#include <asm/mce.h>
+#include <asm/io.h>
+
+#include "mce-internal.h"
+
+#define PFA_SIG "$PFA"
+#define PFA_SIG_LEN 4
+
+/* DIMM description */
+struct aux_pfa_dimm {
+ u8 fbd_channel_id;
+ u8 ddr_channel_id;
+ u8 ddr_dimm_id;
+ u8 ddr_rank_id;
+ u8 ddr_dimm_bank_id;
+ u8 ddr_dimm_row_id;
+ u8 ddr_dimm_column_id;
+ u8 valid;
+} __attribute__((packed));
+
+struct pfa_dimm {
+ u8 fbd_channel_id;
+ u8 ddr_channel_id;
+ u8 ddr_dimm_id;
+ u8 ddr_rank_id;
+ u8 ddr_dimm_bank_id;
+ u32 ddr_dimm_row_id;
+ u32 ddr_dimm_column_id;
+} __attribute__((packed));
+
+/* Memory translation table in memory. */
+struct pfa_table {
+ u8 sig[PFA_SIG_LEN]; /* Signature: '$PFA' */
+ u16 len; /* total length */
+ u16 revision; /* 0x11 */
+ u8 checksum; /* 8bit sum to zero */
+ u8 db_value; /* mailbox port command value */
+ u8 db_port; /* mailbox port */
+ /* end of header; end of checksum */
+ u8 command; /* input command */
+ u32 valid; /* valid input/output bits */
+ u16 status; /* output status */
+ u8 socket_id; /* input socket id*/
+ u8 bank_id; /* input MCE bank id */
+ u32 pad1;
+ u64 mbox_address;
+ u64 physical_addr; /* physical address */
+ struct pfa_dimm dimm[2];
+ /*
+ * topology information follows: not used for now.
+ */
+} __attribute__((packed));
+
+/* DIMM valid bits in valid: DIMM0: 8..12; DIMM1 16..20 */
+#define DIMM_VALID_BITS(val, num) (((val) >> (4 + (num) * 8)) & DIMM_VALID_ALL)
+#define DIMM_SET_VALID(val, num) ((val) << (4 + (num) * 8))
+
+enum {
+ MCE_BANK_MBOX0 = 8,
+ MCE_BANK_MBOX1 = 9,
+
+ PFA_REVISION = 0x11, /* v1.1 */
+
+ /* Status bits for valid field */
+ PFA_VALID_MA = (1 << 0),
+ PFA_VALID_SOCKETID = (1 << 1),
+ PFA_VALID_BANKID = (1 << 2),
+ PFA_VALID_PA = (1 << 3),
+
+ /* DIMM valid bits in valid */
+ /* use with DIMM_VALID_BITS/DIMM_SET_VALID for pfa->valid */
+ DIMM_VALID_FBD_CHAN = (1 << 0),
+ DIMM_VALID_DDR_CHAN = (1 << 1),
+ DIMM_VALID_DDR_DIMM = (1 << 2),
+ DIMM_VALID_DDR_RANK = (1 << 3),
+ DIMM_VALID_DIMM_BANK = (1 << 4),
+ DIMM_VALID_DIMM_ROW = (1 << 5),
+ DIMM_VALID_DIMM_COLUMN = (1 << 6),
+ DIMM_VALID_ALL = 0x7f,
+
+ PFA_DIMM_VALID_MASK = DIMM_SET_VALID(DIMM_VALID_ALL, 0)
+ | DIMM_SET_VALID(DIMM_VALID_ALL, 1),
+
+ /* Values for status field */
+ PFA_STATUS_SUCCESS = 0,
+ PFA_STATUS_SOCKET_INVALID = (1 << 1),
+ PFA_STATUS_MBOX_INVALID = (1 << 2),
+ PFA_STATUS_MA_INVALID = (1 << 3),
+ PFA_STATUS_PA_INVALID = (1 << 4),
+
+ /* Values for command field */
+ PFA_CMD_GET_MEM_CORR_ERR_PA = 0,
+ PFA_CMD_PA_TO_DIMM_ADDR = 1,
+ PFA_CMD_DIMM_TO_PA = 2,
+ PFA_CMD_GET_TOPOLOGY = 3,
+
+ /* PCI device IDs and the base register */
+ ICH_PFA_CFG = 0x8c, /* SCRATCH4 */
+ PCI_DEVICE_ID_BXB_ICH_LEGACY0 = 0x3422,
+};
+
+static struct pfa_table *pfa_table __read_mostly;
+static int memerr_max_conv_rate __read_mostly = 100;
+static int memerr_min_interval __read_mostly = 500;
+static int pfa_lost; /* for diagnosis */
+
+enum {
+ RATE_LIMIT_PERIOD = USEC_PER_SEC, /* in us; period of rate limit */
+};
+
+module_param(memerr_max_conv_rate, int, 0644);
+MODULE_PARM_DESC(memerr_max_conv_rate,
+ "Maximum number of memory error conversions each second; 0 to disable");
+module_param(memerr_min_interval, int, 0644);
+MODULE_PARM_DESC(memerr_min_interval,
+ "Minimum time delta between two memory conversions; in us; default 500");
+
+static int notest;
+static int nocsum;
+module_param(notest, int, 0);
+module_param(nocsum, int, 0);
+
+static u64 encode_dimm(struct pfa_dimm *d, u8 valid)
+{
+ union {
+ struct aux_pfa_dimm d;
+ u64 v;
+ } p;
+
+ BUILD_BUG_ON(sizeof(struct aux_pfa_dimm) != sizeof(u64));
+ p.d.fbd_channel_id = d->fbd_channel_id;
+ p.d.ddr_channel_id = d->ddr_channel_id;
+ p.d.ddr_dimm_id = d->ddr_dimm_id;
+ p.d.ddr_rank_id = d->ddr_rank_id;
+ p.d.ddr_dimm_bank_id = d->ddr_dimm_bank_id;
+ p.d.ddr_dimm_row_id = d->ddr_dimm_row_id;
+ if (p.d.ddr_dimm_row_id != d->ddr_dimm_row_id) /* truncated? */
+ valid &= ~DIMM_VALID_DIMM_ROW;
+ p.d.ddr_dimm_column_id = d->ddr_dimm_column_id;
+ if (p.d.ddr_dimm_column_id != d->ddr_dimm_column_id)
+ valid &= ~DIMM_VALID_DIMM_COLUMN;
+ p.d.valid = valid;
+ pr_debug("PFA fbd_ch %u ddr_ch %u dimm %u rank %u bank %u valid %x\n",
+ d->fbd_channel_id,
+ d->ddr_channel_id,
+ d->ddr_dimm_id,
+ d->ddr_rank_id,
+ d->ddr_dimm_bank_id,
+ valid);
+ return p.v;
+}
+
+static u8 csum(u8 *table, u16 len)
+{
+ u8 sum = 0;
+ int i;
+ for (i = 0; i < len; i++)
+ sum += *table++;
+ return sum;
+}
+
+/*
+ * Execute a command through the mailbox interface.
+ */
+static int
+pfa_command(unsigned bank, unsigned socketid, unsigned command, unsigned valid)
+{
+ pfa_table->bank_id = bank;
+ pfa_table->socket_id = socketid;
+ pfa_table->valid = valid | PFA_VALID_SOCKETID;
+ pfa_table->command = command;
+
+ outb(pfa_table->db_value, pfa_table->db_port);
+
+ mb(); /* Reread fields after they got changed */
+
+ if (pfa_table->status != PFA_STATUS_SUCCESS) {
+ pr_debug("Memory PFA command %d failed: socket:%d bank:%d status:%x\n",
+ command, socketid, bank, pfa_table->status);
+ return -pfa_table->status;
+ }
+ return 0;
+}
+
+/*
+ * Retrieve physical address and DIMMs.
+ */
+static int translate_memory_error(struct mce *m)
+{
+ struct pfa_table *pfa = pfa_table;
+ u64 status;
+ int ret;
+ u32 valid;
+ int cpu = smp_processor_id();
+
+ /* Make sure our structures match the specification */
+ BUILD_BUG_ON(offsetof(struct pfa_table, physical_addr) != 0x20);
+ BUILD_BUG_ON(offsetof(struct pfa_table, status) != 0x10);
+ BUILD_BUG_ON(offsetof(struct pfa_table, physical_addr) != 0x20);
+ BUILD_BUG_ON(offsetof(struct pfa_table, dimm[1].ddr_dimm_column_id) !=
+ 0x3e);
+
+ /* Ask for PA/DIMMs of last error */
+ if (pfa_command(m->bank, m->socketid,
+ PFA_CMD_GET_MEM_CORR_ERR_PA, PFA_VALID_BANKID) < 0)
+ return -1;
+
+ /*
+ * Recheck machine check bank. If the overflow bit was set
+ * there was a race. Don't use the information in this case.
+ */
+ rdmsrl(MSR_IA32_MCx_STATUS(m->bank), status);
+ if (status & MCI_STATUS_OVER) {
+ pr_debug("%d: overflow race on bank %d\n", cpu, m->bank);
+ return -1;
+ }
+
+ ret = -1;
+ valid = pfa->valid;
+ if (valid & PFA_VALID_PA) {
+ m->status |= MCI_STATUS_ADDRV;
+ m->addr = pfa_table->physical_addr;
+ pr_debug("%d: got physical address %llx valid %x\n",
+ cpu, m->addr, valid);
+ ret = 0;
+ }
+
+ /* When DIMM information was supplied pass it out */
+ if (valid & PFA_DIMM_VALID_MASK) {
+ m->aux0 = encode_dimm(&pfa->dimm[0], DIMM_VALID_BITS(valid, 0));
+ m->aux1 = encode_dimm(&pfa->dimm[1], DIMM_VALID_BITS(valid, 1));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * Xeon 75xx specific mce poll method to retrieve the physical address
+ * and DIMM information.
+ */
+static void xeon75xx_mce_poll(struct mce *m)
+{
+ static DEFINE_SPINLOCK(convert_lock); /* Protect table and static */
+ static unsigned long cperm;
+ static ktime_t last, last_int;
+ unsigned long flags;
+ ktime_t now;
+ s64 delta;
+
+ /* Memory error? */
+ if (m->bank != MCE_BANK_MBOX0 && m->bank != MCE_BANK_MBOX1)
+ return;
+ if (m->status & MCI_STATUS_OVER)
+ return;
+ if (memerr_max_conv_rate == 0)
+ return;
+
+ spin_lock_irqsave(&convert_lock, flags);
+ /*
+ * Rate limit conversions. The conversion takes some time,
+ * but it's not good to use all the CPU time during a error
+ * flood.
+ * Enforce maximum number per second and minimum interval.
+ * The ktime call should use TSC on this machine and be fast.
+ */
+ now = ktime_get();
+ delta = ktime_us_delta(now, last);
+ if (delta >= RATE_LIMIT_PERIOD) {
+ cperm = 0;
+ last = now;
+ }
+ if (ktime_us_delta(now, last_int) >= memerr_min_interval &&
+ ++cperm <= memerr_max_conv_rate) {
+ if (translate_memory_error(m) < 0) {
+ /* On error stop converting for the next second */
+ cperm = memerr_max_conv_rate;
+ pr_debug("PFA translation failed\n");
+ }
+ } else
+ pfa_lost++;
+ last_int = now;
+ spin_unlock_irqrestore(&convert_lock, flags);
+}
+
+static struct pci_device_id bxb_mce_pciids[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_BXB_ICH_LEGACY0) },
+ {}
+};
+
+static int __init xeon75xx_mce_init(void)
+{
+ u32 addr = 0;
+ struct pci_dev *dev;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6 ||
+ boot_cpu_data.x86_model != 0x2e)
+ return -ENODEV;
+
+ /*
+ * Get table address from register in IOH.
+ * This just looks up the device, because we don't want to "own" it.
+ */
+ dev = NULL;
+ while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, dev))
+ != NULL) {
+ if (!pci_match_id(bxb_mce_pciids, dev))
+ continue;
+ pci_read_config_dword(dev, ICH_PFA_CFG, &addr);
+ if (addr)
+ break;
+ }
+ pci_dev_put(dev);
+ if (!addr)
+ return -ENODEV;
+
+ if (!e820_all_mapped(addr, addr + PAGE_SIZE, E820_RESERVED)) {
+ pr_info("PFA table at %x not e820 reserved\n", addr);
+ return -ENODEV;
+ }
+
+ pfa_table = (__force struct pfa_table *)ioremap_cache(addr, PAGE_SIZE);
+ if (!pfa_table) {
+ pr_err("Cannot map PFA table at %x\n", addr);
+ return -EIO;
+ }
+
+ if (memcmp(&pfa_table->sig, PFA_SIG, PFA_SIG_LEN) ||
+ pfa_table->len < sizeof(struct pfa_table) ||
+ /* assume newer versions are compatible */
+ pfa_table->revision < PFA_REVISION) {
+ pr_info("PFA table at %x invalid\n", addr);
+ goto error_unmap;
+ }
+
+ if (!nocsum && csum((u8 *)pfa_table,
+ offsetof(struct pfa_table, command))) {
+ pr_info("PFA table at %x length %u has invalid checksum\n",
+ addr, pfa_table->len);
+ goto error_unmap;
+ }
+
+ /* Not strictly needed today */
+ if (pfa_table->len > PAGE_SIZE) {
+ unsigned len = roundup(pfa_table->len, PAGE_SIZE);
+ iounmap(pfa_table);
+ pfa_table = (__force void *)ioremap_cache(addr, len);
+ if (!pfa_table) {
+ pr_err("Cannot remap %u bytes PFA table at %x\n",
+ len, addr);
+ return -EIO;
+ }
+ }
+
+ if (!notest) {
+ int status = pfa_command(0, 0, PFA_CMD_GET_TOPOLOGY, 0);
+ if (status < 0) {
+ pr_err("Test of PFA table failed: %x\n", -status);
+ goto error_unmap;
+ }
+ }
+
+ pr_info("Found Xeon75xx PFA memory error translation table at %x\n",
+ addr);
+ mb();
+ cpu_specific_poll = xeon75xx_mce_poll;
+ return 0;
+
+error_unmap:
+ iounmap(pfa_table);
+ return -ENODEV;
+}
+
+MODULE_DEVICE_TABLE(pci, bxb_mce_pciids);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Andi Kleen");
+MODULE_DESCRIPTION("Intel Xeon 75xx specific DIMM error reporting");
+
+#ifdef CONFIG_MODULE
+static void __exit xeon75xx_mce_exit(void)
+{
+ cpu_specific_poll = NULL;
+ wmb();
+ /* Wait for all machine checks to finish before really unloading */
+ synchronize_rcu();
+ iounmap(pfa_table);
+}
+
+module_init(xeon75xx_mce_init);
+module_exit(xeon75xx_mce_exit);
+#else
+/* When built-in run as soon as the PCI subsystem is up */
+fs_initcall(xeon75xx_mce_init);
+#endif
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -94,6 +94,8 @@ static char *mce_helper_argv[2] = { mc
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+void (*cpu_specific_poll)(struct mce *);
+EXPORT_SYMBOL_GPL(cpu_specific_poll);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
@@ -371,6 +373,11 @@ static void mce_wrmsrl(u32 msr, u64 v)
wrmsrl(msr, v);
}
+static int under_injection(void)
+{
+ return __get_cpu_var(injectm).finished;
+}
+
/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
@@ -574,6 +581,10 @@ void machine_check_poll(enum mcp_flags f
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+ if (cpu_specific_poll && !under_injection() && !mce_dont_log_ce)
+ cpu_specific_poll(&m);
+
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(e820_any_mapped);
* Note: this function only works correct if the e820 table is sorted and
* not-overlapping, which is the case
*/
-int __init e820_all_mapped(u64 start, u64 end, unsigned type)
+int e820_all_mapped(u64 start, u64 end, unsigned type)
{
int i;
@@ -98,6 +98,7 @@ int __init e820_all_mapped(u64 start, u6
}
return 0;
}
+EXPORT_SYMBOL_GPL(e820_all_mapped);
/*
* Add a memory region to the kernel e820 map.

@ -0,0 +1,25 @@
From: Andi Kleen <andi@firstfloor.org>
Subject: x86, mce: Xeon75xx specific interface to get corrected memory error information
Patch-Mainline: submitted to x86-tip, added but reverted due to a minor compile issue
which gets fixed by this patch
References: bnc#573380, fate#307738
http://lkml.org/lkml/2010/1/23/50
Signed-off-by: Thomas Renninger <trenn@suse.de>
---
arch/x86/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -921,7 +921,7 @@ config X86_MCE_INTEL
config X86_MCE_XEON75XX
tristate "Intel Xeon 7500 series corrected memory error driver"
- depends on X86_MCE_INTEL
+ depends on X86_MCE_INTEL && PCI
---help---
Add support for a Intel Xeon 7500 series specific memory error driver.
This allows to report the DIMM and physical address on a corrected

@ -0,0 +1,72 @@
From: H. Peter Anvin <hpa@zytor.com>
Subject: x86, mce: Rename cpu_specific_poll to mce_cpu_specific_poll
Patch-Mainline: submitted to x86-tip, added but reverted due to a minor compile issue
which gets fixed by and incremental patch
References: bnc#573380, fate#307738
http://lkml.org/lkml/2010/1/22/99
cpu_specific_poll is a global variable, and it should have a global
namespace name. Since it is MCE-specific (it takes a struct mce *),
rename it mce_cpu_specific_poll.
Signed-off-by: Thomas Renninger <trenn@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 +-
arch/x86/kernel/cpu/mcheck/mce-xeon75xx.c | 4 ++--
arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++----
3 files changed, 7 insertions(+), 7 deletions(-)
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,4 +28,4 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
-extern void (*cpu_specific_poll)(struct mce *);
+extern void (*mce_cpu_specific_poll)(struct mce *);
--- a/arch/x86/kernel/cpu/mcheck/mce-xeon75xx.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-xeon75xx.c
@@ -396,7 +396,7 @@ static int __init xeon75xx_mce_init(void
pr_info("Found Xeon75xx PFA memory error translation table at %x\n",
addr);
mb();
- cpu_specific_poll = xeon75xx_mce_poll;
+ mce_cpu_specific_poll = xeon75xx_mce_poll;
return 0;
error_unmap:
@@ -412,7 +412,7 @@ MODULE_DESCRIPTION("Intel Xeon 75xx spec
#ifdef CONFIG_MODULE
static void __exit xeon75xx_mce_exit(void)
{
- cpu_specific_poll = NULL;
+ mce_cpu_specific_poll = NULL;
wmb();
/* Wait for all machine checks to finish before really unloading */
synchronize_rcu();
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -94,8 +94,8 @@ static char *mce_helper_argv[2] = { mc
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-void (*cpu_specific_poll)(struct mce *);
-EXPORT_SYMBOL_GPL(cpu_specific_poll);
+void (*mce_cpu_specific_poll)(struct mce *);
+EXPORT_SYMBOL_GPL(mce_cpu_specific_poll);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
@@ -582,8 +582,8 @@ void machine_check_poll(enum mcp_flags f
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
- if (cpu_specific_poll && !under_injection() && !mce_dont_log_ce)
- cpu_specific_poll(&m);
+ if (mce_cpu_specific_poll && !under_injection() && !mce_dont_log_ce)
+ mce_cpu_specific_poll(&m);
/*
* Don't get the IP here because it's unlikely to

Binary file not shown.

@ -0,0 +1,77 @@
From 9286a0bc63de32c66d894b45dcf048a072a84cd7 Mon Sep 17 00:00:00 2001
From: Adam Jackson <ajax@redhat.com>
Date: Mon, 19 Apr 2010 15:57:25 -0400
Subject: [PATCH 1/4] drm/i915: Use spatio-temporal dithering on PCH
Spatial dither is better than nothing, but ST is even better.
(from ajax's followup message:)
I noticed this with:
http://ajax.fedorapeople.org/YellowFlower.jpg
set as my desktop background in Gnome on a 1280x800 machine (in
particular, a Sony Vaio VPCB1 with 6-bit panel and a rather bright black
level). Easiest way to test this is by poking at PIPEACONF with
intel_reg_write directly:
% sudo intel_reg_write 0x70008 0xc0000040 # no dither
% sudo intel_reg_write 0x70008 0xc0000050 # spatial
% sudo intel_reg_write 0x70008 0xc0000054 # ST
I notice it especially strongly in the relatively flat dark area in the
top left. Closer than about 18" I can see a noticeable checkerboard
pattern with plain spatial dithering. ST smooths that out; I can still
tell that it's lacking color precision, but it's not offensive.
Signed-off-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
drivers/gpu/drm/i915/i915_reg.h | 5 ++++-
drivers/gpu/drm/i915/intel_display.c | 10 ++++++----
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4cbc521..89b6efc 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1924,7 +1924,10 @@
/* Display & cursor control */
/* dithering flag on Ironlake */
-#define PIPE_ENABLE_DITHER (1 << 4)
+#define PIPE_ENABLE_DITHER (1 << 4)
+#define PIPE_DITHER_TYPE_MASK (3 << 2)
+#define PIPE_DITHER_TYPE_SPATIAL (0 << 2)
+#define PIPE_DITHER_TYPE_ST01 (1 << 2)
/* Pipe A */
#define PIPEADSL 0x70000
#define PIPEACONF 0x70008
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c7502b6..f1a37d9 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3321,14 +3321,16 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
/* set the dithering flag */
if (IS_I965G(dev)) {
if (dev_priv->lvds_dither) {
- if (HAS_PCH_SPLIT(dev))
+ if (HAS_PCH_SPLIT(dev)) {
pipeconf |= PIPE_ENABLE_DITHER;
- else
+ pipeconf |= PIPE_DITHER_TYPE_ST01;
+ } else
lvds |= LVDS_ENABLE_DITHER;
} else {
- if (HAS_PCH_SPLIT(dev))
+ if (HAS_PCH_SPLIT(dev)) {
pipeconf &= ~PIPE_ENABLE_DITHER;
- else
+ pipeconf &= ~PIPE_DITHER_TYPE_MASK;
+ } else
lvds &= ~LVDS_ENABLE_DITHER;
}
}
--
1.7.0.1

@ -0,0 +1,37 @@
From 7f588d4ca94f4efd146b47cdcb6483edda4886f4 Mon Sep 17 00:00:00 2001
From: Adam Jackson <ajax@redhat.com>
Date: Fri, 28 May 2010 17:17:37 -0400
Subject: [PATCH 2/4] drm/i915: Honor sync polarity from VBT panel timing descriptors
I'm actually kind of shocked that it works at all otherwise.
Signed-off-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
drivers/gpu/drm/i915/intel_bios.c | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index f9ba452..8905070 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -95,6 +95,16 @@ fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode,
panel_fixed_mode->clock = dvo_timing->clock * 10;
panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
+ if (dvo_timing->hsync_positive)
+ panel_fixed_mode->flags |= DRM_MODE_FLAG_PHSYNC;
+ else
+ panel_fixed_mode->flags |= DRM_MODE_FLAG_NHSYNC;
+
+ if (dvo_timing->vsync_positive)
+ panel_fixed_mode->flags |= DRM_MODE_FLAG_PVSYNC;
+ else
+ panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC;
+
/* Some VBTs have bogus h/vtotal values */
if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal)
panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
--
1.7.0.1

@ -0,0 +1,266 @@
From 2fb8b53110fdf163eae9e8a506bf769449e2ee4b Mon Sep 17 00:00:00 2001
From: Joanna Rutkowska <joanna@invisiblethingslab.com>
Date: Tue, 29 Jun 2010 08:34:37 +0200
Subject: [PATCH 3/4] drm/i915: Add the support of eDP on DP-D for Ibex/CPT
On some machines the eDP is connected on the PCH DP-D instead of DP-A.
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Conflicts:
drivers/gpu/drm/i915/intel_dp.c
---
drivers/gpu/drm/i915/intel_display.c | 2 +-
drivers/gpu/drm/i915/intel_dp.c | 99 ++++++++++++++++++++++++++++++---
drivers/gpu/drm/i915/intel_drv.h | 1 +
3 files changed, 92 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f1a37d9..32ae849 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3073,7 +3073,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
temp |= PIPE_8BPC;
else
temp |= PIPE_6BPC;
- } else if (is_edp) {
+ } else if (is_edp || intel_edp_is_pch(crtc)) {
switch (dev_priv->edp_bpp/3) {
case 8:
temp |= PIPE_8BPC;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 77e40cf..c13c3bf 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -43,6 +43,7 @@
#define DP_LINK_CONFIGURATION_SIZE 9
#define IS_eDP(i) ((i)->type == INTEL_OUTPUT_EDP)
+#define IS_PCH_eDP(dp_priv) ((dp_priv)->is_edpd)
struct intel_dp_priv {
uint32_t output_reg;
@@ -58,6 +59,7 @@ struct intel_dp_priv {
struct intel_encoder *intel_encoder;
struct i2c_adapter adapter;
struct i2c_algo_dp_aux_data algo;
+ bool is_edpd;
};
static void
@@ -130,8 +132,9 @@ intel_dp_link_required(struct drm_device *dev,
struct intel_encoder *intel_encoder, int pixel_clock)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_dp_priv *dp_priv = intel_encoder->dev_priv;
- if (IS_eDP(intel_encoder))
+ if (IS_eDP(intel_encoder) || IS_PCH_eDP(dp_priv))
return (pixel_clock * dev_priv->edp_bpp) / 8;
else
return pixel_clock * 3;
@@ -534,14 +537,14 @@ intel_reduce_ratio(uint32_t *num, uint32_t *den)
}
static void
-intel_dp_compute_m_n(int bytes_per_pixel,
+intel_dp_compute_m_n(int bpp,
int nlanes,
int pixel_clock,
int link_clock,
struct intel_dp_m_n *m_n)
{
m_n->tu = 64;
- m_n->gmch_m = pixel_clock * bytes_per_pixel;
+ m_n->gmch_m = (pixel_clock * bpp) >> 3;
m_n->gmch_n = link_clock * nlanes;
intel_reduce_ratio(&m_n->gmch_m, &m_n->gmch_n);
m_n->link_m = pixel_clock;
@@ -549,6 +552,31 @@ intel_dp_compute_m_n(int bytes_per_pixel,
intel_reduce_ratio(&m_n->link_m, &m_n->link_n);
}
+bool intel_edp_is_pch(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_encoder *encoder;
+ bool ret = false;
+
+ list_for_each_entry(encoder, &mode_config->encoder_list, head) {
+ struct intel_encoder *intel_encoder;
+ struct intel_dp_priv *dp_priv;
+
+ if (!encoder || encoder->crtc != crtc)
+ continue;
+
+ intel_encoder = enc_to_intel_encoder(encoder);
+ dp_priv = intel_encoder->dev_priv;
+
+ if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
+ ret = IS_PCH_eDP(dp_priv);
+ break;
+ }
+ }
+ return ret;
+}
+
void
intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
@@ -558,7 +586,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
struct drm_connector *connector;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- int lane_count = 4;
+ int lane_count = 4, bpp = 24;
struct intel_dp_m_n m_n;
/*
@@ -573,6 +601,8 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
lane_count = dp_priv->lane_count;
+ if (IS_PCH_eDP(dp_priv))
+ bpp = dev_priv->edp_bpp;
break;
}
}
@@ -582,7 +612,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
* the number of bytes_per_pixel post-LUT, which we always
* set up for 8-bits of R/G/B, or 3 bytes total.
*/
- intel_dp_compute_m_n(3, lane_count,
+ intel_dp_compute_m_n(bpp, lane_count,
mode->clock, adjusted_mode->clock, &m_n);
if (HAS_PCH_SPLIT(dev)) {
@@ -711,13 +741,13 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode)
if (mode != DRM_MODE_DPMS_ON) {
if (dp_reg & DP_PORT_EN) {
intel_dp_link_down(intel_encoder, dp_priv->DP);
- if (IS_eDP(intel_encoder))
+ if (IS_eDP(intel_encoder) || IS_PCH_eDP(dp_priv))
ironlake_edp_backlight_off(dev);
}
} else {
if (!(dp_reg & DP_PORT_EN)) {
intel_dp_link_train(intel_encoder, dp_priv->DP, dp_priv->link_configuration);
- if (IS_eDP(intel_encoder))
+ if (IS_eDP(intel_encoder) || IS_PCH_eDP(dp_priv))
ironlake_edp_backlight_on(dev);
}
}
@@ -1225,6 +1255,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
struct intel_encoder *intel_encoder = to_intel_encoder(connector);
struct drm_device *dev = intel_encoder->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_dp_priv *dp_priv = intel_encoder->dev_priv;
int ret;
/* We should parse the EDID data and find out if it has an audio sink
@@ -1235,7 +1266,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
return ret;
/* if eDP has no EDID, try to use fixed panel mode from VBT */
- if (IS_eDP(intel_encoder)) {
+ if (IS_eDP(intel_encoder) || IS_PCH_eDP(dp_priv)) {
if (dev_priv->panel_fixed_mode != NULL) {
struct drm_display_mode *mode;
mode = drm_mode_duplicate(dev, dev_priv->panel_fixed_mode);
@@ -1299,6 +1330,50 @@ intel_dp_hot_plug(struct intel_encoder *intel_encoder)
intel_dp_check_link_status(intel_encoder);
}
+/* Return which DP Port should be selected for Transcoder DP control */
+int
+intel_trans_dp_port_sel (struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_encoder *encoder;
+ struct intel_encoder *intel_encoder = NULL;
+
+ list_for_each_entry(encoder, &mode_config->encoder_list, head) {
+ if (encoder->crtc != crtc)
+ continue;
+
+ intel_encoder = enc_to_intel_encoder(encoder);
+ if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
+ struct intel_dp_priv *dp_priv = intel_encoder->dev_priv;
+ return dp_priv->output_reg;
+ }
+ }
+ return -1;
+}
+
+static bool intel_dpd_is_edp(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct child_device_config *p_child;
+ int i, ret = false;
+
+ if (!dev_priv->child_dev_num)
+ return false;
+
+ for (i = 0; i < dev_priv->child_dev_num; i++) {
+ p_child = dev_priv->child_dev + i;
+ if (p_child->device_type != DEVICE_TYPE_eDP)
+ continue;
+
+ if (p_child->dvo_port == PORT_IDPD) {
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
void
intel_dp_init(struct drm_device *dev, int output_reg)
{
@@ -1320,6 +1395,7 @@ intel_dp_init(struct drm_device *dev, int output_reg)
DRM_MODE_CONNECTOR_DisplayPort);
drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
+ dp_priv->is_edpd = false;
if (output_reg == DP_A)
intel_encoder->type = INTEL_OUTPUT_EDP;
else
@@ -1335,6 +1411,11 @@ intel_dp_init(struct drm_device *dev, int output_reg)
if (IS_eDP(intel_encoder))
intel_encoder->clone_mask = (1 << INTEL_EDP_CLONE_BIT);
+ if (HAS_PCH_SPLIT(dev) && (output_reg == PCH_DP_D)) {
+ if (intel_dpd_is_edp(dev))
+ dp_priv->is_edpd = true;
+ }
+
intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
connector->interlace_allowed = true;
connector->doublescan_allowed = 0;
@@ -1383,7 +1464,7 @@ intel_dp_init(struct drm_device *dev, int output_reg)
intel_encoder->ddc_bus = &dp_priv->adapter;
intel_encoder->hot_plug = intel_dp_hot_plug;
- if (output_reg == DP_A) {
+ if ((output_reg == DP_A) || IS_PCH_eDP(dp_priv)) {
/* initialize panel mode from VBT if available for eDP */
if (dev_priv->lfp_lvds_vbt_mode) {
dev_priv->panel_fixed_mode =
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index e302537..0858a17 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -175,6 +175,7 @@ extern void intel_dp_init(struct drm_device *dev, int dp_reg);
void
intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode);
+extern bool intel_edp_is_pch(struct drm_crtc *crtc);
extern void intel_edp_link_config (struct intel_encoder *, int *, int *);
--
1.7.0.1

@ -0,0 +1,38 @@
From 46e3e699294d3fe4fecb08d697bb29addab29576 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Fri, 28 May 2010 20:28:41 +0800
Subject: [PATCH 4/4] drm/i915: Configure the PIPECONF dither correctly for eDP
The non-8 BPC can be used for the eDP output device that is connected through
DP-A or DP-D on PCH. In such case we should set the PIPECONF dither correctly.
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
---
drivers/gpu/drm/i915/intel_display.c | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 32ae849..49c9663 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3239,6 +3239,17 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
/* setup pipeconf */
pipeconf = I915_READ(pipeconf_reg);
+ if (HAS_PCH_SPLIT(dev) && (is_edp || intel_edp_is_pch(crtc))) {
+ /* configure the dither correctly for eDP */
+ pipeconf &= ~PIPE_DITHER_TYPE_MASK;
+ if ((pipeconf & PIPE_BPC_MASK) != PIPE_8BPC) {
+ pipeconf |= PIPE_ENABLE_DITHER;
+ pipeconf |= PIPE_DITHER_TYPE_ST01;
+ } else {
+ pipeconf &= ~PIPE_ENABLE_DITHER;
+ }
+ }
+
/* Set up the display plane register */
dspcntr = DISPPLANE_GAMMA_ENABLE;
--
1.7.0.1

@ -0,0 +1,40 @@
From: Brandon Philips <bphilips@suse.de>
Subject: [PATCH] bnx2: entropy source
Patch-mainline: never
References: FATE#307517
Current disk-less systems have no entropy source whatsoever. Therefore, the
network drivers tg3, bnx2, e1000, e1000e, igb and ixgbe should be enabled to
feed entropy to the kernel via the IRQF_SAMPLE_RANDOM flag when loaded. This
option shall not be enabled by default but implemented via a module option to
be activated by the administrator.
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/bnx2.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -84,6 +84,10 @@ MODULE_FIRMWARE(FW_MIPS_FILE_09);
MODULE_FIRMWARE(FW_RV2P_FILE_09);
MODULE_FIRMWARE(FW_RV2P_FILE_09_Ax);
+static int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow bnx2 to populate the /dev/random entropy pool");
+
static int disable_msi = 0;
module_param(disable_msi, int, 0);
@@ -6081,6 +6085,9 @@ bnx2_request_irq(struct bnx2 *bp)
else
flags = IRQF_SHARED;
+ if (entropy)
+ flags |= IRQF_SAMPLE_RANDOM;
+
for (i = 0; i < bp->irq_nvecs; i++) {
irq = &bp->irq_tbl[i];
rc = request_irq(irq->vector, irq->handler, flags, irq->name,

@ -0,0 +1,45 @@
From: Xiuling Ma <xma@us.ibm.com>
Subject: [PATCH] disable catas_reset by default to avoid problems with EEH
References: bnc#456389
Patch-mainline: not yet
PPC machines with EEH and Mellanox ib/net cards with catastrophic error
recovery that encounter a PCI bus error can crash and become
unresponsive.
Disable the card reset to avoid this.
NOTE: an upstream fix will come later once IBM can review a couple of
approaches I suggested since this fix is brute force. This driver didn't have
this reset on error feature in SLES10 so it isn't a feature removal.
Signed-off-by: Xiuling Ma <xma@us.ibm.com>
Acked-by: Brandon Philips <bphilips@suse.de>
---
drivers/infiniband/hw/mthca/mthca_catas.c | 2 +-
drivers/net/mlx4/catas.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -51,7 +51,7 @@ static LIST_HEAD(catas_list);
static struct workqueue_struct *catas_wq;
static struct work_struct catas_work;
-static int catas_reset_disable;
+static int catas_reset_disable = 1;
module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(catas_lock);
static LIST_HEAD(catas_list);
static struct work_struct catas_work;
-static int internal_err_reset = 1;
+static int internal_err_reset = 0;
module_param(internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
"Reset device on internal errors if non-zero (default 1)");

@ -0,0 +1,178 @@
From 578454ff7eab61d13a26b568f99a89a2c9edc881 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 20 May 2010 18:07:20 +0200
Subject: driver core: add devname module aliases to allow module on-demand auto-loading
Patch-mainline: 2.6.35
From: Kay Sievers <kay.sievers@vrfy.org>
commit 578454ff7eab61d13a26b568f99a89a2c9edc881 upstream.
This adds:
alias: devname:<name>
to some common kernel modules, which will allow the on-demand loading
of the kernel module when the device node is accessed.
Ideally all these modules would be compiled-in, but distros seems too
much in love with their modularization that we need to cover the common
cases with this new facility. It will allow us to remove a bunch of pretty
useless init scripts and modprobes from init scripts.
The static device node aliases will be carried in the module itself. The
program depmod will extract this information to a file in the module directory:
$ cat /lib/modules/2.6.34-00650-g537b60d-dirty/modules.devname
# Device nodes to trigger on-demand module loading.
microcode cpu/microcode c10:184
fuse fuse c10:229
ppp_generic ppp c108:0
tun net/tun c10:200
dm_mod mapper/control c10:235
Udev will pick up the depmod created file on startup and create all the
static device nodes which the kernel modules specify, so that these modules
get automatically loaded when the device node is accessed:
$ /sbin/udevd --debug
...
static_dev_create_from_modules: mknod '/dev/cpu/microcode' c10:184
static_dev_create_from_modules: mknod '/dev/fuse' c10:229
static_dev_create_from_modules: mknod '/dev/ppp' c108:0
static_dev_create_from_modules: mknod '/dev/net/tun' c10:200
static_dev_create_from_modules: mknod '/dev/mapper/control' c10:235
udev_rules_apply_static_dev_perms: chmod '/dev/net/tun' 0666
udev_rules_apply_static_dev_perms: chmod '/dev/fuse' 0666
A few device nodes are switched to statically allocated numbers, to allow
the static nodes to work. This might also useful for systems which still run
a plain static /dev, which is completely unsafe to use with any dynamic minor
numbers.
Note:
The devname aliases must be limited to the *common* and *single*instance*
device nodes, like the misc devices, and never be used for conceptually limited
systems like the loop devices, which should rather get fixed properly and get a
control node for losetup to talk to, instead of creating a random number of
device nodes in advance, regardless if they are ever used.
This facility is to hide the mess distros are creating with too modualized
kernels, and just to hide that these modules are not compiled-in, and not to
paper-over broken concepts. Thanks! :)
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Cc: Ian Kent <raven@themaw.net>
Signed-Off-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -443,6 +443,8 @@ Your cooperation is appreciated.
231 = /dev/snapshot System memory snapshot device
232 = /dev/kvm Kernel-based virtual machine (hardware virtualization extensions)
233 = /dev/kmview View-OS A process with a view
+ 234 = /dev/btrfs-control Btrfs control device
+ 235 = /dev/autofs Autofs control device
240-254 Reserved for local use
255 Reserved for MISC_DYNAMIC_MINOR
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 2cd8c54..fa6551d 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -260,6 +260,7 @@ static void microcode_dev_exit(void)
}
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+MODULE_ALIAS("devname:cpu/microcode");
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while (0)
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 5441688..c5f8eb1 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2926,5 +2926,5 @@ EXPORT_SYMBOL(ppp_output_wakeup);
EXPORT_SYMBOL(ppp_register_compressor);
EXPORT_SYMBOL(ppp_unregister_compressor);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV_MAJOR(PPP_MAJOR);
-MODULE_ALIAS("/dev/ppp");
+MODULE_ALIAS_CHARDEV(PPP_MAJOR, 0);
+MODULE_ALIAS("devname:ppp");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 97b2553..005cad6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1649,3 +1649,4 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION);
MODULE_AUTHOR(DRV_COPYRIGHT);
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(TUN_MINOR);
+MODULE_ALIAS("devname:net/tun");
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index d29b7f6..d832062 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -736,11 +736,14 @@ static const struct file_operations _dev_ioctl_fops = {
};
static struct miscdevice _autofs_dev_ioctl_misc = {
- .minor = MISC_DYNAMIC_MINOR,
+ .minor = AUTOFS_MINOR,
.name = AUTOFS_DEVICE_NAME,
.fops = &_dev_ioctl_fops
};
+MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
+MODULE_ALIAS("devname:autofs");
+
/* Register/deregister misc character device */
int autofs_dev_ioctl_init(void)
{
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1866dff..2909a03 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -832,11 +832,14 @@ static const struct file_operations btrfs_ctl_fops = {
};
static struct miscdevice btrfs_misc = {
- .minor = MISC_DYNAMIC_MINOR,
+ .minor = BTRFS_MINOR,
.name = "btrfs-control",
.fops = &btrfs_ctl_fops
};
+MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
+MODULE_ALIAS("devname:btrfs-control");
+
static int btrfs_interface_init(void)
{
return misc_register(&btrfs_misc);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index eb7e942..e53df5e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+MODULE_ALIAS("devname:fuse");
static struct kmem_cache *fuse_req_cachep;
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 8b5f7cc..b631c46 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,8 @@
#define FUSE_MINOR 229
#define KVM_MINOR 232
#define VHOST_NET_MINOR 233
+#define BTRFS_MINOR 234
+#define AUTOFS_MINOR 235
#define MISC_DYNAMIC_MINOR 255
struct device;

@ -0,0 +1,35 @@
From feacc14de65224ccda1d8fae5140cdf043a151b0 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 17 Jun 2010 12:42:14 +0200
Subject: [PATCH] drm/nouveau: Don't clear AGPCMD completely on INIT_RESET.
We just need to clear the SBA and ENABLE bits to reset the AGP
controller: If the AGP bridge was configured to use "fast writes",
clearing the FW bit would break the subsequent MMIO writes and
eventually end with a lockup.
Note that all the BIOSes I've seen do the same as we did (it works for
them because they don't use MMIO), OTOH the blob leaves FW untouched.
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
---
drivers/gpu/drm/nouveau/nouveau_bios.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index abc382a..7c983d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -1910,7 +1910,8 @@ init_reset(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
/* no iexec->execute check by design */
pci_nv_19 = bios_rd32(bios, NV_PBUS_PCI_NV_19);
- bios_wr32(bios, NV_PBUS_PCI_NV_19, 0);
+ bios_wr32(bios, NV_PBUS_PCI_NV_19, pci_nv_19 & ~0xf00);
+
bios_wr32(bios, reg, value1);
udelay(10);
--
1.7.0.1

@ -0,0 +1,110 @@
From d83809c6fdb908ba708382c9a506f6647d1fa86d Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sun, 9 May 2010 14:49:52 +0200
Subject: [PATCH] drm/nouveau: allow cursor image and position to survive suspend
- This isn't triggered yet on a normal kernel, because it still does a VT
switch, but it seemed like a good idea to fix this now.
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
---
drivers/gpu/drm/nouveau/nouveau_crtc.h | 2 ++
drivers/gpu/drm/nouveau/nouveau_drv.c | 29 +++++++++++++++++++++++++++++
drivers/gpu/drm/nouveau/nv04_cursor.c | 1 +
drivers/gpu/drm/nouveau/nv50_cursor.c | 1 +
4 files changed, 33 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index 49fa7b2..cb1ce2a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -40,6 +40,8 @@ struct nouveau_crtc {
int sharpness;
int last_dpms;
+ int cursor_saved_x, cursor_saved_y;
+
struct {
int cpp;
bool blanked;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 1de974a..4bccba3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -177,6 +177,13 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
nouveau_bo_unpin(nouveau_fb->nvbo);
}
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+
+ nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+ nouveau_bo_unpin(nv_crtc->cursor.nvbo);
+ }
+
NV_INFO(dev, "Evicting buffers...\n");
ttm_bo_evict_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
@@ -318,12 +325,34 @@ nouveau_pci_resume(struct pci_dev *pdev)
nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM);
}
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+ int ret;
+
+ ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
+ if (!ret)
+ ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
+ if (ret)
+ NV_ERROR(dev, "Could not pin/map cursor.\n");
+ }
+
if (dev_priv->card_type < NV_50) {
nv04_display_restore(dev);
NVLockVgaCrtcs(dev, false);
} else
nv50_display_init(dev);
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+
+ nv_crtc->cursor.set_offset(nv_crtc,
+ nv_crtc->cursor.nvbo->bo.offset -
+ dev_priv->vm_vram_base);
+
+ nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x,
+ nv_crtc->cursor_saved_y);
+ }
+
/* Force CLUT to get re-loaded during modeset */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
diff --git a/drivers/gpu/drm/nouveau/nv04_cursor.c b/drivers/gpu/drm/nouveau/nv04_cursor.c
index 89a91b9..aaf3de3 100644
--- a/drivers/gpu/drm/nouveau/nv04_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv04_cursor.c
@@ -20,6 +20,7 @@ nv04_cursor_hide(struct nouveau_crtc *nv_crtc, bool update)
static void
nv04_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
{
+ nv_crtc->cursor_saved_x = x; nv_crtc->cursor_saved_y = y;
NVWriteRAMDAC(nv_crtc->base.dev, nv_crtc->index,
NV_PRAMDAC_CU_START_POS,
XLATE(y, 0, NV_PRAMDAC_CU_START_POS_Y) |
diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
index 753e723..03ad7ab 100644
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
@@ -107,6 +107,7 @@ nv50_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
{
struct drm_device *dev = nv_crtc->base.dev;
+ nv_crtc->cursor_saved_x = x; nv_crtc->cursor_saved_y = y;
nv_wr32(dev, NV50_PDISPLAY_CURSOR_USER_POS(nv_crtc->index),
((y & 0xFFFF) << 16) | (x & 0xFFFF));
/* Needed to make the cursor move. */
--
1.7.0.1

@ -0,0 +1,68 @@
From: Brandeburg, Jesse <jesse.brandeburg@intel.com>
Subject: [PATCH] e1000: enhance frame fragment detection
References: bnc#567376, CVE-2009-4536
Patch-Mainline: Yes
A security discussion was recently given:
http://events.ccc.de/congress/2009/Fahrplan//events/3596.en.html And a patch
that I submitted awhile back was brought up. Apparently some of their testing
revealed that they were able to force a buffer fragment in e1000 in which the
trailing fragment was greater than 4 bytes. As a result the fragment check I
introduced failed to detect the fragement and a partial invalid frame was
passed up into the network stack. I've written this patch to correct it. I'm
in the process of testing it now, but it makes good logical sense to me.
Effectively it maintains a per-adapter state variable which detects a non-EOP
frame, and discards it and subsequent non-EOP frames leading up to _and_
_including_ the next positive-EOP frame (as it is by definition the last
fragment). This should prevent any and all partial frames from entering the
network stack from e1000.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/e1000/e1000.h | 2 ++
drivers/net/e1000/e1000_main.c | 13 +++++++++++--
2 files changed, 13 insertions(+), 2 deletions(-)
--- a/drivers/net/e1000/e1000.h
+++ b/drivers/net/e1000/e1000.h
@@ -326,6 +326,8 @@ struct e1000_adapter {
/* for ioport free */
int bars;
int need_ioport;
+
+ bool discarding;
};
enum e1000_state_t {
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3834,13 +3834,22 @@ static bool e1000_clean_rx_irq(struct e1
length = le16_to_cpu(rx_desc->length);
/* !EOP means multiple descriptors were used to store a single
- * packet, also make sure the frame isn't just CRC only */
- if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) {
+ * packet, if thats the case we need to toss it. In fact, we
+ * to toss every packet with the EOP bit clear and the next
+ * frame that _does_ have the EOP bit set, as it is by
+ * definition only a frame fragment
+ */
+ if (unlikely(!(status & E1000_RXD_STAT_EOP)))
+ adapter->discarding = true;
+
+ if (adapter->discarding) {
/* All receives must fit into a single buffer */
E1000_DBG("%s: Receive packet consumed multiple"
" buffers\n", netdev->name);
/* recycle */
buffer_info->skb = skb;
+ if (status & E1000_RXD_STAT_EOP)
+ adapter->discarding = false;
goto next_desc;
}

@ -0,0 +1,47 @@
From: Jiri Benc <jbenc@suse.cz>
Subject: Enable e1000 as entropy source (disabled by default)
References: FATE#307517
Patch-mainline: never
Based on the patch by Oracle:
> e1000: Add IRQF_SAMPLE_RANDOM flag to e1000 as a module option
>
> This patch allows for the bnx2 to add to the /dev/random entropy pool
> via a module parameter, entropy.
>
> 0 - default for EL5 - do not populate the entropy pool
> 1 - optional - Uses IRQF_SAMPLE_RANDOM flag on request_irq calls to populate
> the /dev/random pool
>
> Signed-off-by: John Sobecki <john.sobecki@oracle.com>
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/e1000/e1000_main.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -213,6 +213,10 @@ static int debug = NETIF_MSG_DRV | NETIF
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+static int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow e1000 to populate the /dev/random entropy pool");
+
/**
* e1000_init_module - Driver Registration Routine
*
@@ -262,6 +266,9 @@ static int e1000_request_irq(struct e100
int irq_flags = IRQF_SHARED;
int err;
+ if (entropy)
+ irq_flags |= IRQF_SAMPLE_RANDOM;
+
err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
netdev);
if (err) {

@ -0,0 +1,142 @@
From: Neil Horman <nhorman@tuxdriver.com>
Subject: [PATCH] e1000e: enhance frame fragment detection
References: bnc#567376, CVE-2009-4538
A security discussion was recently given:
http://events.ccc.de/congress/2009/Fahrplan//events/3596.en.html And a patch
that I submitted awhile back was brought up. Apparently some of their testing
revealed that they were able to force a buffer fragment in e1000e in which the
trailing fragment was greater than 4 bytes. As a result the fragment check I
introduced failed to detect the fragement and a partial invalid frame was
passed up into the network stack. I've written this patch to correct it. I'm
in the process of testing it now, but it makes good logical sense to me.
Effectively it maintains a per-adapter state variable which detects a non-EOP
frame, and discards it and subsequent non-EOP frames leading up to _and_
_including_ the next positive-EOP frame (as it is by definition the last
fragment). This should prevent any and all partial frames from entering the
network stack from e1000e
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/e1000e/e1000.h | 3 ++-
drivers/net/e1000e/netdev.c | 13 +++++++++++--
2 files changed, 13 insertions(+), 3 deletions(-)
Index: linux-2.6.31-openSUSE-11.2/drivers/net/e1000e/e1000.h
===================================================================
--- linux-2.6.31-openSUSE-11.2.orig/drivers/net/e1000e/e1000.h
+++ linux-2.6.31-openSUSE-11.2/drivers/net/e1000e/e1000.h
@@ -412,7 +412,8 @@ struct e1000_info {
enum e1000_state_t {
__E1000_TESTING,
__E1000_RESETTING,
- __E1000_DOWN
+ __E1000_DOWN,
+ __E1000_DISCARDING
};
enum latency_range {
Index: linux-2.6.31-openSUSE-11.2/drivers/net/e1000e/netdev.c
===================================================================
--- linux-2.6.31-openSUSE-11.2.orig/drivers/net/e1000e/netdev.c
+++ linux-2.6.31-openSUSE-11.2/drivers/net/e1000e/netdev.c
@@ -483,12 +483,21 @@ static bool e1000_clean_rx_irq(struct e1
length = le16_to_cpu(rx_desc->length);
/* !EOP means multiple descriptors were used to store a single
- * packet, also make sure the frame isn't just CRC only */
- if (!(status & E1000_RXD_STAT_EOP) || (length <= 4)) {
+ * packet, if thats the case we need to toss it. In fact, we
+ * to toss every packet with the EOP bit clear and the next
+ * frame that _does_ have the EOP bit set, as it is by
+ * definition only a frame fragment
+ */
+ if (unlikely(!(status & E1000_RXD_STAT_EOP)))
+ set_bit(__E1000_DISCARDING, &adapter->state);
+
+ if (test_bit(__E1000_DISCARDING, &adapter->state)) {
/* All receives must fit into a single buffer */
e_dbg("Receive packet consumed multiple buffers\n");
/* recycle */
buffer_info->skb = skb;
+ if (status & E1000_RXD_STAT_EOP)
+ clear_bit(__E1000_DISCARDING, &adapter->state);
goto next_desc;
}

@ -0,0 +1,92 @@
From: Jiri Benc <jbenc@suse.cz>
Subject: Enable e1000e as entropy source (disabled by default)
References: FATE#307517
Patch-mainline: never
Current disk-less systems have no entropy source whatsoever. Therefore, the
network drivers tg3, bnx2, e1000, e1000e, igb and ixgbe should be enabled to
feed entropy to the kernel via the IRQF_SAMPLE_RANDOM flag when loaded. This
option shall not be enabled by default but implemented via a module option to
be activated by the administrator.
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/e1000e/e1000.h | 1 +
drivers/net/e1000e/netdev.c | 14 +++++++++-----
drivers/net/e1000e/param.c | 4 ++++
3 files changed, 14 insertions(+), 5 deletions(-)
Index: linux-2.6.34-master/drivers/net/e1000e/e1000.h
===================================================================
--- linux-2.6.34-master.orig/drivers/net/e1000e/e1000.h
+++ linux-2.6.34-master/drivers/net/e1000e/e1000.h
@@ -466,6 +466,7 @@ extern void e1000e_reset_interrupt_capab
extern void e1000e_disable_aspm(struct pci_dev *pdev, u16 state);
extern unsigned int copybreak;
+extern int entropy;
extern char *e1000e_get_hw_dev_name(struct e1000_hw *hw);
Index: linux-2.6.34-master/drivers/net/e1000e/netdev.c
===================================================================
--- linux-2.6.34-master.orig/drivers/net/e1000e/netdev.c
+++ linux-2.6.34-master/drivers/net/e1000e/netdev.c
@@ -1496,8 +1496,8 @@ static int e1000_request_msix(struct e10
else
memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
err = request_irq(adapter->msix_entries[vector].vector,
- e1000_intr_msix_rx, 0, adapter->rx_ring->name,
- netdev);
+ e1000_intr_msix_rx, entropy ? IRQF_SAMPLE_RANDOM : 0,
+ adapter->rx_ring->name, netdev);
if (err)
goto out;
adapter->rx_ring->itr_register = E1000_EITR_82574(vector);
@@ -1538,6 +1538,7 @@ static int e1000_request_irq(struct e100
{
struct net_device *netdev = adapter->netdev;
int err;
+ int irq_flags = 0;
if (adapter->msix_entries) {
err = e1000_request_msix(adapter);
@@ -1549,7 +1550,8 @@ static int e1000_request_irq(struct e100
e1000e_set_interrupt_capability(adapter);
}
if (adapter->flags & FLAG_MSI_ENABLED) {
- err = request_irq(adapter->pdev->irq, e1000_intr_msi, 0,
+ err = request_irq(adapter->pdev->irq, e1000_intr_msi,
+ entropy ? IRQF_SAMPLE_RANDOM : 0,
netdev->name, netdev);
if (!err)
return err;
@@ -1559,8 +1561,10 @@ static int e1000_request_irq(struct e100
adapter->int_mode = E1000E_INT_MODE_LEGACY;
}
- err = request_irq(adapter->pdev->irq, e1000_intr, IRQF_SHARED,
- netdev->name, netdev);
+ if (entropy)
+ irq_flags |= IRQF_SAMPLE_RANDOM;
+ err = request_irq(adapter->pdev->irq, e1000_intr,
+ irq_flags | IRQF_SHARED, netdev->name, netdev);
if (err)
e_err("Unable to allocate interrupt, Error: %d\n", err);
Index: linux-2.6.34-master/drivers/net/e1000e/param.c
===================================================================
--- linux-2.6.34-master.orig/drivers/net/e1000e/param.c
+++ linux-2.6.34-master/drivers/net/e1000e/param.c
@@ -31,6 +31,10 @@
#include "e1000.h"
+int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow e1000e to populate the /dev/random entropy pool");
+
/*
* This is the only thing that needs to be changed to adjust the
* maximum number of ports that the driver can manage.

@ -0,0 +1,43 @@
Subject: add alias entry for portN properties
From: olh@suse.de
References: 435215 - LTC48564
Patch-mainline: not yet
Use separate table for alias entries in the ehea module,
otherwise the probe() function will operate on the separate ports
instead of the lhea-"root" entry of the device-tree
---
drivers/net/ehea/ehea_main.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -111,6 +111,19 @@ static int __devinit ehea_probe_adapter(
static int __devexit ehea_remove(struct of_device *dev);
+static struct of_device_id ehea_module_device_table[] = {
+ {
+ .name = "lhea",
+ .compatible = "IBM,lhea",
+ },
+ {
+ .type = "network",
+ .compatible = "IBM,lhea-ethernet",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ehea_module_device_table);
+
static struct of_device_id ehea_device_table[] = {
{
.name = "lhea",
@@ -118,7 +131,6 @@ static struct of_device_id ehea_device_t
},
{},
};
-MODULE_DEVICE_TABLE(of, ehea_device_table);
static struct of_platform_driver ehea_driver = {
.name = "ehea",

@ -0,0 +1,380 @@
From: Jiri Kosina <jkosina@suse.cz>
Subject: Elo USB touchscreen driver
Patch-mainline: will be submitted for 2.6.28
References: FATE#304972
This is a driver for Elo USB touchscreen devices.
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
---
drivers/hid/hid-core.c | 2
drivers/hid/hid-ids.h | 2
drivers/input/touchscreen/Kconfig | 12 +
drivers/input/touchscreen/Makefile | 1
drivers/input/touchscreen/elousb.c | 305 +++++++++++++++++++++++++++++++++++++
5 files changed, 322 insertions(+)
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1567,6 +1567,8 @@ static const struct hid_device_id hid_ig
{ HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_4000U) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_4500U) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0001) },
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -171,7 +171,9 @@
#define USB_VENDOR_ID_DRAGONRISE 0x0079
#define USB_VENDOR_ID_ELO 0x04E7
+#define USB_DEVICE_ID_ELO_4000U 0x0009
#define USB_DEVICE_ID_ELO_TS2700 0x0020
+#define USB_DEVICE_ID_ELO_4500U 0x0030
#define USB_VENDOR_ID_ESSENTIAL_REALITY 0x0d7f
#define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -193,6 +193,18 @@ config TOUCHSCREEN_ELO
To compile this driver as a module, choose M here: the
module will be called elo.
+config TOUCHSCREEN_ELOUSB
+ tristate "Elo USB touchscreens"
+ select USB
+ help
+ Say Y here if you have an Elo USB touchscreen connected to
+ your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called elousb.
+
config TOUCHSCREEN_WACOM_W8001
tristate "Wacom W8001 penabled serial touchscreen"
select SERIO
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_DYNAPRO) += dyn
obj-$(CONFIG_TOUCHSCREEN_GUNZE) += gunze.o
obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o
obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o
+obj-$(CONFIG_TOUCHSCREEN_ELOUSB) += elousb.o
obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
obj-$(CONFIG_TOUCHSCREEN_MC13783) += mc13783_ts.o
--- /dev/null
+++ b/drivers/input/touchscreen/elousb.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 1999-2001 Vojtech Pavlik
+ *
+ * Elo USB touchscreen support
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Should you need to contact me, the author, you can do so either by
+ * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
+ * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/usb.h>
+#include <linux/usb/input.h>
+#include <linux/hid.h>
+#include <linux/input.h>
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v1.1"
+#define DRIVER_AUTHOR "Vojtech Pavlik <vojtech@suse.cz>"
+#define DRIVER_DESC "Elo USB touchscreen driver"
+#define DRIVER_LICENSE "GPL"
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE(DRIVER_LICENSE);
+
+struct elousb {
+ char name[128];
+ char phys[64];
+ struct usb_device *usbdev;
+ struct input_dev *dev;
+ struct urb *irq;
+
+ unsigned char *data;
+ dma_addr_t data_dma;
+};
+
+static void elousb_irq(struct urb *urb)
+{
+ struct elousb *elo = urb->context;
+ unsigned char *data = elo->data;
+ struct input_dev *dev = elo->dev;
+ int status;
+
+ switch (urb->status) {
+ case 0: /* success */
+ break;
+ case -ECONNRESET: /* unlink */
+ case -ENOENT:
+ case -ESHUTDOWN:
+ return;
+ /* -EPIPE: should clear the halt */
+ default: /* error */
+ goto resubmit;
+ }
+
+ if (data[0] != 'T') /* Mandatory ELO packet marker */
+ return;
+
+
+ input_report_abs(dev, ABS_X, ((u32)data[3] << 8) | data[2]);
+ input_report_abs(dev, ABS_Y, ((u32)data[5] << 8) | data[4]);
+
+ input_report_abs(dev, ABS_PRESSURE,
+ (data[1] & 0x80) ? (((u32)data[7] << 8) | data[6]): 0);
+
+ if (data[1] & 0x03) {
+ input_report_key(dev, BTN_TOUCH, 1);
+ input_sync(dev);
+ }
+
+ if (data[1] & 0x04)
+ input_report_key(dev, BTN_TOUCH, 0);
+
+ input_sync(dev);
+
+resubmit:
+ status = usb_submit_urb (urb, GFP_ATOMIC);
+ if (status)
+ err ("can't resubmit intr, %s-%s/input0, status %d",
+ elo->usbdev->bus->bus_name,
+ elo->usbdev->devpath, status);
+}
+
+static int elousb_open(struct input_dev *dev)
+{
+ struct elousb *elo = input_get_drvdata(dev);
+
+ elo->irq->dev = elo->usbdev;
+ if (usb_submit_urb(elo->irq, GFP_KERNEL))
+ return -EIO;
+
+ return 0;
+}
+
+static void elousb_close(struct input_dev *dev)
+{
+ struct elousb *elo = input_get_drvdata(dev);
+
+ usb_kill_urb(elo->irq);
+}
+
+static int elousb_probe(struct usb_interface *intf, const struct usb_device_id *id)
+{
+ struct usb_device *dev = interface_to_usbdev(intf);
+ struct usb_host_interface *interface;
+ struct usb_endpoint_descriptor *endpoint;
+ struct hid_descriptor *hdesc;
+ struct elousb *elo;
+ struct input_dev *input_dev;
+ int pipe, i;
+ unsigned int rsize = 0;
+ int error = -ENOMEM;
+ char *rdesc;
+
+ interface = intf->cur_altsetting;
+
+ if (interface->desc.bNumEndpoints != 1)
+ return -ENODEV;
+
+ endpoint = &interface->endpoint[0].desc;
+ if (!(endpoint->bEndpointAddress & USB_DIR_IN))
+ return -ENODEV;
+ if ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) != USB_ENDPOINT_XFER_INT)
+ return -ENODEV;
+
+ if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) &&
+ (!interface->desc.bNumEndpoints ||
+ usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) {
+ err("HID class descriptor not present");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < hdesc->bNumDescriptors; i++)
+ if (hdesc->desc[i].bDescriptorType == HID_DT_REPORT)
+ rsize = le16_to_cpu(hdesc->desc[i].wDescriptorLength);
+
+ if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) {
+ err("weird size of report descriptor (%u)", rsize);
+ return -ENODEV;
+ }
+
+
+ pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress);
+
+ elo = kzalloc(sizeof(struct elousb), GFP_KERNEL);
+ input_dev = input_allocate_device();
+ if (!elo || !input_dev)
+ goto fail1;
+
+ elo->data = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &elo->data_dma);
+ if (!elo->data)
+ goto fail1;
+
+ elo->irq = usb_alloc_urb(0, GFP_KERNEL);
+ if (!elo->irq)
+ goto fail2;
+
+ if (!(rdesc = kmalloc(rsize, GFP_KERNEL)))
+ goto fail3;
+
+ elo->usbdev = dev;
+ elo->dev = input_dev;
+
+ if ((error = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+ HID_REQ_SET_IDLE, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0,
+ interface->desc.bInterfaceNumber,
+ NULL, 0, USB_CTRL_SET_TIMEOUT)) < 0) {
+ err("setting HID idle timeout failed, error %d", error);
+ error = -ENODEV;
+ goto fail4;
+ }
+
+ if ((error = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+ USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_DIR_IN,
+ HID_DT_REPORT << 8, interface->desc.bInterfaceNumber,
+ rdesc, rsize, USB_CTRL_GET_TIMEOUT)) < rsize) {
+ err("reading HID report descriptor failed, error %d", error);
+ error = -ENODEV;
+ goto fail4;
+ }
+
+ if (dev->manufacturer)
+ strlcpy(elo->name, dev->manufacturer, sizeof(elo->name));
+
+ if (dev->product) {
+ if (dev->manufacturer)
+ strlcat(elo->name, " ", sizeof(elo->name));
+ strlcat(elo->name, dev->product, sizeof(elo->name));
+ }
+
+ if (!strlen(elo->name))
+ snprintf(elo->name, sizeof(elo->name),
+ "Elo touchscreen %04x:%04x",
+ le16_to_cpu(dev->descriptor.idVendor),
+ le16_to_cpu(dev->descriptor.idProduct));
+
+ usb_make_path(dev, elo->phys, sizeof(elo->phys));
+ strlcat(elo->phys, "/input0", sizeof(elo->phys));
+
+ input_dev->name = elo->name;
+ input_dev->phys = elo->phys;
+ usb_to_input_id(dev, &input_dev->id);
+ input_dev->dev.parent = &intf->dev;
+
+ input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+ set_bit(BTN_TOUCH, input_dev->keybit);
+ input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
+ set_bit(ABS_PRESSURE, input_dev->absbit);
+
+ input_set_abs_params(input_dev, ABS_X, 0, 4000, 0, 0);
+ input_set_abs_params(input_dev, ABS_Y, 0, 3840, 0, 0);
+ input_set_abs_params(input_dev, ABS_PRESSURE, 0, 256, 0, 0);
+
+ input_set_drvdata(input_dev, elo);
+
+ input_dev->open = elousb_open;
+ input_dev->close = elousb_close;
+
+ usb_fill_int_urb(elo->irq, dev, pipe, elo->data, 8,
+ elousb_irq, elo, endpoint->bInterval);
+ elo->irq->transfer_dma = elo->data_dma;
+ elo->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+ input_register_device(elo->dev);
+
+ usb_set_intfdata(intf, elo);
+ return 0;
+
+fail4:
+ kfree(rdesc);
+fail3:
+ usb_free_urb(elo->irq);
+fail2:
+ usb_buffer_free(dev, 8, elo->data, elo->data_dma);
+fail1:
+ input_free_device(input_dev);
+ kfree(elo);
+ return -ENOMEM;
+}
+
+static void elousb_disconnect(struct usb_interface *intf)
+{
+ struct elousb *elo = usb_get_intfdata (intf);
+
+ usb_set_intfdata(intf, NULL);
+ if (elo) {
+ usb_kill_urb(elo->irq);
+ input_unregister_device(elo->dev);
+ usb_free_urb(elo->irq);
+ usb_buffer_free(interface_to_usbdev(intf), 8, elo->data, elo->data_dma);
+ kfree(elo);
+ }
+}
+
+static struct usb_device_id elousb_id_table [] = {
+ { USB_DEVICE(0x04e7, 0x0009) }, /* CarrolTouch 4000U */
+ { USB_DEVICE(0x04e7, 0x0030) }, /* CarrolTouch 4500U */
+ { } /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE (usb, elousb_id_table);
+
+static struct usb_driver elousb_driver = {
+ .name = "elousb",
+ .probe = elousb_probe,
+ .disconnect = elousb_disconnect,
+ .id_table = elousb_id_table,
+};
+
+static int __init elousb_init(void)
+{
+ int retval = usb_register(&elousb_driver);
+ if (retval == 0)
+ printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_VERSION ":" DRIVER_DESC);
+ return retval;
+}
+
+static void __exit elousb_exit(void)
+{
+ usb_deregister(&elousb_driver);
+}
+
+module_init(elousb_init);
+module_exit(elousb_exit);

@ -0,0 +1,70 @@
From: Jiri Benc <jbenc@suse.cz>
Subject: Enable igb as entropy source (disabled by default)
References: FATE#307517
Patch-mainline: never
Current disk-less systems have no entropy source whatsoever. Therefore, the
network drivers tg3, bnx2, e1000, e1000e, igb and ixgbe should be enabled to
feed entropy to the kernel via the IRQF_SAMPLE_RANDOM flag when loaded. This
option shall not be enabled by default but implemented via a module option to
be activated by the administrator.
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/igb/igb_main.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -60,6 +60,10 @@ static const struct e1000_info *igb_info
[board_82575] = &e1000_82575_info,
};
+static int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow igb to populate the /dev/random entropy pool");
+
static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
@@ -587,7 +591,8 @@ static int igb_request_msix(struct igb_a
int i, err = 0, vector = 0;
err = request_irq(adapter->msix_entries[vector].vector,
- igb_msix_other, 0, netdev->name, adapter);
+ igb_msix_other, entropy ? IRQF_SAMPLE_RANDOM : 0,
+ netdev->name, adapter);
if (err)
goto out;
vector++;
@@ -882,6 +887,10 @@ static int igb_request_irq(struct igb_ad
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
int err = 0;
+ int irq_flags = 0;
+
+ if (entropy)
+ irq_flags = IRQF_SAMPLE_RANDOM;
if (adapter->msix_entries) {
err = igb_request_msix(adapter);
@@ -916,7 +925,7 @@ static int igb_request_irq(struct igb_ad
}
if (adapter->flags & IGB_FLAG_HAS_MSI) {
- err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
+ err = request_irq(adapter->pdev->irq, igb_intr_msi, irq_flags,
netdev->name, adapter);
if (!err)
goto request_done;
@@ -926,7 +935,8 @@ static int igb_request_irq(struct igb_ad
adapter->flags &= ~IGB_FLAG_HAS_MSI;
}
- err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
+ irq_flags |= IRQF_SHARED;
+ err = request_irq(adapter->pdev->irq, igb_intr, irq_flags,
netdev->name, adapter);
if (err)

@ -0,0 +1,218 @@
From: Takashi Iwai <tiwai@suse.de>
Subject: [PATCH 2/2] input: Add LED support to Synaptics device
Patch-mainline: Submitted
References: bnc#547370,bnc#582529,bnc#589014
The new Synaptics devices have an LED on the top-left corner.
This patch adds a new LED class device to control it. It's created
dynamically upon synaptics device probing.
The LED is controlled via the command 0x0a with parameters 0x88 or 0x10.
This seems only on/off control although other value might be accepted.
The detection of the LED isn't clear yet. It should have been the new
capability bits that indicate the presence, but on real machines, it
doesn't fit. So, for the time being, the driver checks the product id
in the ext capability bits and assumes that LED exists on the known
devices.
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
drivers/input/mouse/Kconfig | 9 +++
drivers/input/mouse/synaptics.c | 111 ++++++++++++++++++++++++++++++++++++++++
drivers/input/mouse/synaptics.h | 3 +
3 files changed, 123 insertions(+)
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -19,6 +19,7 @@
select SERIO_LIBPS2
select SERIO_I8042 if X86
select SERIO_GSCPS2 if GSC
+ select LEDS_CLASS if MOUSE_PS2_SYNAPICS_LED
help
Say Y here if you have a PS/2 mouse connected to your system. This
includes the standard 2 or 3-button PS/2 mouse, as well as PS/2
@@ -67,6 +68,14 @@
If unsure, say Y.
+config MOUSE_PS2_SYNAPTICS_LED
+ bool "Support embedded LED on Synaptics devices"
+ depends on MOUSE_PS2_SYNAPTICS
+ select NEW_LEDS
+ help
+ Say Y here if you have a Synaptics device with an embedded LED.
+ This will enable LED class driver to control the LED device.
+
config MOUSE_PS2_LIFEBOOK
bool "Fujitsu Lifebook PS/2 mouse protocol extension" if EMBEDDED
default y
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -28,6 +28,7 @@
#include <linux/input.h>
#include <linux/serio.h>
#include <linux/libps2.h>
+#include <linux/leds.h>
#include <linux/slab.h>
#include "psmouse.h"
#include "synaptics.h"
@@ -335,6 +336,110 @@
serio_register_port(serio);
}
+#ifdef CONFIG_MOUSE_PS2_SYNAPTICS_LED
+/*
+ * LED handling:
+ * Some Synaptics devices have an embeded LED at the top-left corner.
+ */
+
+struct synaptics_led {
+ struct psmouse *psmouse;
+ struct work_struct work;
+ struct led_classdev cdev;
+};
+
+static void synaptics_set_led(struct psmouse *psmouse, int on)
+{
+ int i;
+ unsigned char cmd = on ? 0x88 : 0x10;
+
+ ps2_begin_command(&psmouse->ps2dev);
+ if (__ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11))
+ goto out;
+ for (i = 6; i >= 0; i -= 2) {
+ unsigned char d = (cmd >> i) & 3;
+ if (__ps2_command(&psmouse->ps2dev, &d, PSMOUSE_CMD_SETRES))
+ goto out;
+ }
+ cmd = 0x0a;
+ __ps2_command(&psmouse->ps2dev, &cmd, PSMOUSE_CMD_SETRATE);
+ out:
+ ps2_end_command(&psmouse->ps2dev);
+}
+
+static void synaptics_led_work(struct work_struct *work)
+{
+ struct synaptics_led *led;
+
+ led = container_of(work, struct synaptics_led, work);
+ synaptics_set_led(led->psmouse, led->cdev.brightness);
+}
+
+static void synaptics_led_cdev_brightness_set(struct led_classdev *cdev,
+ enum led_brightness value)
+{
+ struct synaptics_led *led;
+
+ led = container_of(cdev, struct synaptics_led, cdev);
+ schedule_work(&led->work);
+}
+
+static void synaptics_sync_led(struct psmouse *psmouse)
+{
+ struct synaptics_data *priv = psmouse->private;
+
+ if (priv->led)
+ synaptics_set_led(psmouse, priv->led->cdev.brightness);
+}
+
+static int synaptics_init_led(struct psmouse *psmouse)
+{
+ struct synaptics_data *priv = psmouse->private;
+ struct synaptics_led *led;
+ int err;
+
+ /* FIXME: LED is supposedly detectable in cap0c[1] 0x20, but it seems
+ * not working on real machines.
+ * So we check the product id to be sure.
+ */
+ if (!priv->ext_cap_0c || SYN_CAP_PRODUCT_ID(priv->ext_cap) != 0xe4)
+ return 0;
+
+ printk(KERN_INFO "synaptics: support LED control\n");
+ led = kzalloc(sizeof(struct synaptics_led), GFP_KERNEL);
+ if (!led)
+ return -ENOMEM;
+ led->psmouse = psmouse;
+ INIT_WORK(&led->work, synaptics_led_work);
+ led->cdev.name = "psmouse::synaptics";
+ led->cdev.brightness_set = synaptics_led_cdev_brightness_set;
+ led->cdev.flags = LED_CORE_SUSPENDRESUME;
+ err = led_classdev_register(NULL, &led->cdev);
+ if (err < 0) {
+ kfree(led);
+ return err;
+ }
+ priv->led = led;
+ return 0;
+}
+
+static void synaptics_free_led(struct psmouse *psmouse)
+{
+ struct synaptics_data *priv = psmouse->private;
+
+ if (!priv->led)
+ return;
+ cancel_work_sync(&priv->led->work);
+ synaptics_set_led(psmouse, 0);
+ led_classdev_unregister(&priv->led->cdev);
+ kfree(priv->led);
+}
+#else
+#define synaptics_init_led(ps) 0
+#define synaptics_free_led(ps) do {} while (0)
+#define synaptics_sync_led(ps) do {} while (0)
+#endif
+
/*****************************************************************************
* Functions to interpret the absolute mode packets
****************************************************************************/
@@ -622,6 +727,7 @@
static void synaptics_disconnect(struct psmouse *psmouse)
{
+ synaptics_free_led(psmouse);
synaptics_reset(psmouse);
kfree(psmouse->private);
psmouse->private = NULL;
@@ -653,6 +759,8 @@
return -1;
}
+ synaptics_sync_led(psmouse);
+
return 0;
}
@@ -727,6 +835,9 @@
SYN_ID_MAJOR(priv->identity), SYN_ID_MINOR(priv->identity),
priv->model_id, priv->capabilities, priv->ext_cap, priv->ext_cap_0c);
+ if (synaptics_init_led(psmouse) < 0)
+ goto init_fail;
+
set_input_params(psmouse->dev, priv);
/*
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -94,6 +94,8 @@
signed char scroll;
};
+struct synaptics_led;
+
struct synaptics_data {
/* Data read from the touchpad */
unsigned long int model_id; /* Model-ID */
@@ -107,6 +109,7 @@
unsigned char pkt_type; /* packet type - old, new, etc */
unsigned char mode; /* current mode byte */
int scroll;
+ struct synaptics_led *led;
};
void synaptics_module_init(void);

@ -0,0 +1,90 @@
From: Jiri Benc <jbenc@suse.cz>
Subject: Enable ixgbe as entropy source (disabled by default)
References: FATE#307517
Patch-mainline: never
Current disk-less systems have no entropy source whatsoever. Therefore, the
network drivers tg3, bnx2, e1000, e1000e, igb and ixgbe should be enabled to
feed entropy to the kernel via the IRQF_SAMPLE_RANDOM flag when loaded. This
option shall not be enabled by default but implemented via a module option to
be activated by the administrator.
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/ixgbe/ixgbe_main.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -55,6 +55,11 @@ static const char ixgbe_driver_string[]
const char ixgbe_driver_version[] = DRV_VERSION;
static char ixgbe_copyright[] = "Copyright (c) 1999-2010 Intel Corporation.";
+static int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow ixgbe to populate the /dev/random entropy pool");
+
+
static const struct ixgbe_info *ixgbe_info_tbl[] = {
[board_82598] = &ixgbe_82598_info,
[board_82599] = &ixgbe_82599_info,
@@ -1717,6 +1722,7 @@ static int ixgbe_request_msix_irqs(struc
irqreturn_t (*handler)(int, void *);
int i, vector, q_vectors, err;
int ri=0, ti=0;
+ int irq_flags;
/* Decrement for Other and TCP Timer vectors */
q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
@@ -1732,20 +1738,26 @@ static int ixgbe_request_msix_irqs(struc
for (vector = 0; vector < q_vectors; vector++) {
handler = SET_HANDLER(adapter->q_vector[vector]);
+ irq_flags = 0;
if(handler == &ixgbe_msix_clean_rx) {
sprintf(adapter->name[vector], "%s-%s-%d",
netdev->name, "rx", ri++);
+ if (entropy)
+ irq_flags = IRQF_SAMPLE_RANDOM;
}
else if(handler == &ixgbe_msix_clean_tx) {
sprintf(adapter->name[vector], "%s-%s-%d",
netdev->name, "tx", ti++);
}
- else
+ else {
sprintf(adapter->name[vector], "%s-%s-%d",
netdev->name, "TxRx", vector);
+ if (entropy)
+ irq_flags = IRQF_SAMPLE_RANDOM;
+ }
err = request_irq(adapter->msix_entries[vector].vector,
- handler, 0, adapter->name[vector],
+ handler, irq_flags, adapter->name[vector],
adapter->q_vector[vector]);
if (err) {
DPRINTK(PROBE, ERR,
@@ -1931,14 +1943,19 @@ static int ixgbe_request_irq(struct ixgb
{
struct net_device *netdev = adapter->netdev;
int err;
+ int irq_flags = 0;
+
+ if (entropy)
+ irq_flags = IRQF_SAMPLE_RANDOM;
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
err = ixgbe_request_msix_irqs(adapter);
} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
- err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
+ err = request_irq(adapter->pdev->irq, ixgbe_intr, irq_flags,
netdev->name, netdev);
} else {
- err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
+ irq_flags |= IRQF_SHARED;
+ err = request_irq(adapter->pdev->irq, ixgbe_intr, irq_flags,
netdev->name, netdev);
}

@ -0,0 +1,35 @@
From: Tejun Heo <teheo@suse.de>
Date: Wed, 7 Feb 2007 12:37:41 -0800
Subject: [PATCH] libata: add waits for GoVault
References: 246451
Patch-mainline: not yet
Iomega GoVault drives need specific waits here and there. Upstream
approach hasn't been determined yet. This is temp solution from Gary
Hade. Read the following thread for details.
http://thread.gmane.org/gmane.linux.ide/14545/focus=14663
With recent changes in the reset sequence (ATA_TMOUT_FF_WAIT and
prefer-hardreset), the only thing which needs adjustment is
ATA_TMOUT_FF_WAIT (the prereset wait part is unnecessary as the wait
is necessary only for softreset when SCR registers are accessible and
in those cases libata now always uses hardreset which doesn't require
such wait).
Signed-off-by: Tejun Heo <teheo@suse.de>
---
include/linux/libata.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -260,7 +260,7 @@ enum {
* HHD424020F7SV00. Increase to 2secs when parallel probing
* is in place.
*/
- ATA_TMOUT_FF_WAIT = 800,
+ ATA_TMOUT_FF_WAIT = 2000,
/* Spec mandates to wait for ">= 2ms" before checking status
* after reset. We wait 150ms, because that was the magic

@ -0,0 +1,24 @@
From: Tejun Heo <teheo@suse.de>
Subject: [PATCH] libata: unlock HPA by default
References: 299267
Patch-mainline: not yet
Unlock HPA by default. This is to stay compatible with the old IDE
drivers.
Signed-off-by: Tejun Heo <teheo@suse.de>
---
drivers/ata/libata-core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -139,7 +139,7 @@ int libata_fua = 0;
module_param_named(fua, libata_fua, int, 0444);
MODULE_PARM_DESC(fua, "FUA support (0=off [default], 1=on)");
-static int ata_ignore_hpa;
+static int ata_ignore_hpa = 1;
module_param_named(ignore_hpa, ata_ignore_hpa, int, 0644);
MODULE_PARM_DESC(ignore_hpa, "Ignore HPA limit (0=keep BIOS limits, 1=ignore limits, using full disk)");

@ -0,0 +1,70 @@
From: Martin Wilck <martin.wilck@fujitsu-siemens.com>
Subject: megaraid_mbox: Oops on SG_IO
References: bnc#475619
Patch-mainline: not yet
This patch fixes an Oops in megaraid_mbox that happens when a
MODE_SENSE command for a logical drive is started viaioctl(SG_IO).
The problem only occurs if the buffer specified by the user to receive
the mode data resides in highmem and if the buffer is aligned for
direct dma (no bounce buffer necessary). megaraid_mbox emulates
the MODE_SENSE command and writes the data using memset() directly
into user buffer. If the buffer is at a currently unmapped highmem
page, this leads to an Oops.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/megaraid/megaraid_mbox.c | 28 +++++++++++++++++++++++-----
1 file changed, 23 insertions(+), 5 deletions(-)
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -1585,13 +1585,20 @@ megaraid_mbox_build_cmd(adapter_t *adapt
case MODE_SENSE:
{
struct scatterlist *sgl;
- caddr_t vaddr;
+ struct page *pg;
+ unsigned char *vaddr;
+ unsigned long flags;
sgl = scsi_sglist(scp);
- if (sg_page(sgl)) {
- vaddr = (caddr_t) sg_virt(&sgl[0]);
+ pg = sg_page(sgl);
+ if (pg) {
+ local_irq_save(flags);
+ vaddr = kmap_atomic(pg, KM_BIO_SRC_IRQ) + sgl->offset;
memset(vaddr, 0, scp->cmnd[4]);
+
+ kunmap_atomic(vaddr, KM_BIO_SRC_IRQ);
+ local_irq_restore(flags);
}
else {
con_log(CL_ANN, (KERN_WARNING
@@ -2329,9 +2336,20 @@ megaraid_mbox_dpc(unsigned long devp)
if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0
&& IS_RAID_CH(raid_dev, scb->dev_channel)) {
+ struct page *pg;
+ unsigned char *vaddr;
+ unsigned long flags;
+
sgl = scsi_sglist(scp);
- if (sg_page(sgl)) {
- c = *(unsigned char *) sg_virt(&sgl[0]);
+ pg = sg_page(sgl);
+ if (pg) {
+ local_irq_save(flags);
+ vaddr = kmap_atomic(pg, KM_BIO_SRC_IRQ) + sgl->offset;
+
+ c = *vaddr;
+
+ kunmap_atomic(vaddr, KM_BIO_SRC_IRQ);
+ local_irq_restore(flags);
} else {
con_log(CL_ANN, (KERN_WARNING
"megaraid mailbox: invalid sg:%d\n",

File diff suppressed because it is too large Load Diff

@ -0,0 +1,30 @@
From: Tejun Heo <tj@kernel.org>
Subject: [PATCH] pci: disable MSI on VIA K8M800
References: bnc#599508
Patch-Mainline: Pending for 2.6.35 and -stable
MSI delivery from on-board ahci controller doesn't work on K8M800. At
this point, it's unclear whether the culprit is with the ahci
controller or the host bridge. Given the track record and considering
the rather minimal impact of MSI, disabling it seems reasonable.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rainer Hurtado Navarro <publio.escipion.el.africano@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Tejun Heo <teheo@suse.de>
---
drivers/pci/quirks.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-2.6.34-master/drivers/pci/quirks.c
===================================================================
--- linux-2.6.34-master.orig/drivers/pci/quirks.c
+++ linux-2.6.34-master/drivers/pci/quirks.c
@@ -2112,6 +2112,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AT
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disable_all_msi);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disable_all_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi);
/* Disable MSI on chipsets that are known to not support it */
static void __devinit quirk_disable_msi(struct pci_dev *dev)

@ -0,0 +1,53 @@
From: Olaf Hering <olh@suse.de>
Subject: enable mouse button emulation also for G5
Patch-mainline: never
fix compile errors
drivers/macintosh/Kconfig | 2 +-
drivers/macintosh/adb.c | 4 ++++
drivers/macintosh/adbhid.c | 6 +++++-
3 files changed, 10 insertions(+), 2 deletions(-)
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -13,7 +13,7 @@ if MACINTOSH_DRIVERS
config ADB
bool "Apple Desktop Bus (ADB) support"
- depends on MAC || (PPC_PMAC && PPC32)
+ depends on MAC || PPC_PMAC
help
Apple Desktop Bus (ADB) support is for support of devices which
are connected to an ADB port. ADB devices tend to have 4 pins.
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -298,6 +298,10 @@ static int __init adb_init(void)
if (!machine_is(chrp) && !machine_is(powermac))
return 0;
#endif
+#ifdef CONFIG_PPC64
+ if (!machine_is(powermac))
+ return 0;
+#endif
#ifdef CONFIG_MAC
if (!MACH_IS_MAC)
return 0;
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -1264,10 +1264,14 @@ init_ms_a3(int id)
static int __init adbhid_init(void)
{
-#ifndef CONFIG_MAC
+#ifdef CONFIG_PPC32
if (!machine_is(chrp) && !machine_is(powermac))
return 0;
#endif
+#ifdef CONFIG_PPC64
+ if (!machine_is(powermac))
+ return 0;
+#endif
led_request.complete = 1;

@ -0,0 +1,197 @@
From: Matt Carlson <mcarlson@broadcom.com>
Subject: tg3: 5785 and 57780 asic revs not working
References: bnc#580780
Patch-mainline: Never
There is a known problem with phylib that causes a lot of problems.
Phylib does not load phy modules as it detects devices on the MDIO bus.
If the phylib module gets loaded as a dependancy of tg3, there will be
no opportunity to load the needed broadcom.ko module before tg3 requests
phylib to probe the MDIO bus. The result will be that tg3 will fail to
attach to 5785 and 57780 devices.
There are several known solutions to this problem. (None of these
should go upstream. The upstream fix should be to get phylib to load
modules for devices it encounters.) Only one of them need be applied.
1) Statically link in the broadcom.ko module into the kernel.
2) Add the following to /etc/modprobe.d/local.conf or its equivalent:
install tg3 /sbin/modprobe broadcom; /sbin/modprobe --ignore-install tg3
3) Apply the following patch:
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/tg3.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/net/tg3.h | 9 +++++
2 files changed, 92 insertions(+)
Index: linux-2.6.34-master/drivers/net/tg3.c
===================================================================
--- linux-2.6.34-master.orig/drivers/net/tg3.c
+++ linux-2.6.34-master/drivers/net/tg3.c
@@ -1956,6 +1956,58 @@ static int tg3_phy_reset(struct tg3 *tp)
tg3_phy_toggle_apd(tp, false);
out:
+ if ((tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM50610 ||
+ (tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM50610M) {
+ u32 reg;
+
+ /* Enable SM_DSP clock and tx 6dB coding. */
+ reg = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, reg);
+
+ reg = MII_TG3_DSP_EXP8_REJ2MHz;
+ tg3_phydsp_write(tp, MII_TG3_DSP_EXP8, reg);
+
+ /* Apply workaround to A0 revision parts only. */
+ if (tp->phy_id == TG3_PHY_ID_BCM50610 ||
+ tp->phy_id == TG3_PHY_ID_BCM50610M) {
+ tg3_phydsp_write(tp, 0x001F, 0x0300);
+ tg3_phydsp_write(tp, 0x601F, 0x0002);
+ tg3_phydsp_write(tp, 0x0F75, 0x003C);
+ tg3_phydsp_write(tp, 0x0F96, 0x0010);
+ tg3_phydsp_write(tp, 0x0F97, 0x0C0C);
+ }
+
+ /* Turn off SM_DSP clock. */
+ reg = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, reg);
+
+ /* Clear all mode configuration bits. */
+ reg = MII_TG3_MISC_SHDW_WREN |
+ MII_TG3_MISC_SHDW_RGMII_SEL;
+ tg3_writephy(tp, MII_TG3_MISC_SHDW, reg);
+ }
+ if ((tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM57780) {
+ u32 reg;
+
+ /* Enable SM_DSP clock and tx 6dB coding. */
+ reg = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, reg);
+
+ tg3_writephy(tp, MII_TG3_DSP_ADDRESS, MII_TG3_DSP_EXP75);
+ tg3_readphy(tp, MII_TG3_DSP_RW_PORT, &reg);
+ reg |= MII_TG3_DSP_EXP75_SUP_CM_OSC;
+ tg3_phydsp_write(tp, MII_TG3_DSP_EXP75, reg);
+
+ /* Turn off SM_DSP clock. */
+ reg = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
+ MII_TG3_AUXCTL_ACTL_TX_6DB;
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, reg);
+ }
if (tp->tg3_flags2 & TG3_FLG2_PHY_ADC_BUG) {
tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f);
@@ -2018,6 +2070,22 @@ out:
/* adjust output voltage */
tg3_writephy(tp, MII_TG3_FET_PTEST, 0x12);
}
+ else if (tp->tg3_flags3 & TG3_FLG3_PHY_IS_FET) {
+ u32 brcmtest;
+ if (!tg3_readphy(tp, MII_TG3_FET_TEST, &brcmtest) &&
+ !tg3_writephy(tp, MII_TG3_FET_TEST,
+ brcmtest | MII_TG3_FET_SHADOW_EN)) {
+ u32 val, reg = MII_TG3_FET_SHDW_AUXMODE4;
+
+ if (!tg3_readphy(tp, reg, &val)) {
+ val &= ~MII_TG3_FET_SHDW_AM4_LED_MASK;
+ val |= MII_TG3_FET_SHDW_AM4_LED_MODE1;
+ tg3_writephy(tp, reg, val);
+ }
+
+ tg3_writephy(tp, MII_TG3_FET_TEST, brcmtest);
+ }
+ }
tg3_phy_toggle_automdix(tp, 1);
tg3_phy_set_wirespeed(tp);
@@ -3260,6 +3328,15 @@ relink:
tw32_f(MAC_MODE, tp->mac_mode);
udelay(40);
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) {
+ if (tp->link_config.active_speed == SPEED_10)
+ tw32(MAC_MI_STAT,
+ MAC_MI_STAT_10MBPS_MODE |
+ MAC_MI_STAT_LNKSTAT_ATTN_ENAB);
+ else
+ tw32(MAC_MI_STAT, MAC_MI_STAT_LNKSTAT_ATTN_ENAB);
+ }
+
if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
/* Polled via timer. */
tw32_f(MAC_EVENT, 0);
@@ -13505,9 +13582,11 @@ static int __devinit tg3_get_invariants(
GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX)
tp->coalesce_mode |= HOSTCC_MODE_32BYTE;
+#if 0
if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 ||
GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780)
tp->tg3_flags3 |= TG3_FLG3_USE_PHYLIB;
+#endif
err = tg3_mdio_init(tp);
if (err)
@@ -14293,6 +14372,10 @@ static char * __devinit tg3_phy_string(s
case TG3_PHY_ID_BCM5718C: return "5718C";
case TG3_PHY_ID_BCM5718S: return "5718S";
case TG3_PHY_ID_BCM57765: return "57765";
+ case TG3_PHY_ID_BCM50610: return "50610";
+ case TG3_PHY_ID_BCM50610M: return "50610M";
+ case TG3_PHY_ID_BCMAC131: return "AC131";
+ case TG3_PHY_ID_BCM57780: return "57780";
case TG3_PHY_ID_BCM8002: return "8002/serdes";
case 0: return "serdes";
default: return "unknown";
Index: linux-2.6.34-master/drivers/net/tg3.h
===================================================================
--- linux-2.6.34-master.orig/drivers/net/tg3.h
+++ linux-2.6.34-master/drivers/net/tg3.h
@@ -2086,6 +2086,7 @@
#define MII_TG3_DSP_EXP8_REJ2MHz 0x0001
#define MII_TG3_DSP_EXP8_AEDW 0x0200
#define MII_TG3_DSP_EXP75 0x0f75
+#define MII_TG3_DSP_EXP75_SUP_CM_OSC 0x0001
#define MII_TG3_DSP_EXP96 0x0f96
#define MII_TG3_DSP_EXP97 0x0f97
@@ -2141,6 +2142,8 @@
#define MII_TG3_MISC_SHDW_SCR5_LPED 0x0010
#define MII_TG3_MISC_SHDW_SCR5_SEL 0x1400
+#define MII_TG3_MISC_SHDW_RGMII_SEL 0x2c00
+
#define MII_TG3_TEST1 0x1e
#define MII_TG3_TEST1_TRIM_EN 0x0010
#define MII_TG3_TEST1_CRC_EN 0x8000
@@ -2158,6 +2161,8 @@
#define MII_TG3_FET_SHDW_MISCCTRL_MDIX 0x4000
#define MII_TG3_FET_SHDW_AUXMODE4 0x1a
+#define MII_TG3_FET_SHDW_AM4_LED_MODE1 0x0001
+#define MII_TG3_FET_SHDW_AM4_LED_MASK 0x0003
#define MII_TG3_FET_SHDW_AUXMODE4_SBPD 0x0008
#define MII_TG3_FET_SHDW_AUXSTAT2 0x1b
@@ -2943,6 +2948,10 @@ struct tg3 {
#define TG3_PHY_ID_BCM57765 0x5c0d8a40
#define TG3_PHY_ID_BCM5906 0xdc00ac40
#define TG3_PHY_ID_BCM8002 0x60010140
+#define TG3_PHY_ID_BCM50610 0xbc050d60
+#define TG3_PHY_ID_BCM50610M 0xbc050d70
+#define TG3_PHY_ID_BCMAC131 0xbc050c70
+#define TG3_PHY_ID_BCM57780 0x5c0d8990
#define TG3_PHY_ID_INVALID 0xffffffff
#define PHY_ID_RTL8211C 0x001cc910

@ -0,0 +1,61 @@
From: Brandon Philips <bphilips@suse.de>
Subject: [PATCH] tg3: entropy source
Patch-mainline: never
References: FATE#307517
Signed-off-by: Brandon Philips <bphilips@suse.de>
---
drivers/net/tg3.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -15,7 +15,6 @@
* notice is accompanying it.
*/
-
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
@@ -66,6 +65,10 @@
#include "tg3.h"
+static int entropy = 0;
+module_param(entropy, int, 0);
+MODULE_PARM_DESC(entropy, "Allow tg3 to populate the /dev/random entropy pool");
+
#define DRV_MODULE_NAME "tg3"
#define DRV_MODULE_VERSION "3.108"
#define DRV_MODULE_RELDATE "February 17, 2010"
@@ -8494,10 +8497,13 @@ restart_timer:
static int tg3_request_irq(struct tg3 *tp, int irq_num)
{
irq_handler_t fn;
- unsigned long flags;
+ unsigned long flags = 0;
char *name;
struct tg3_napi *tnapi = &tp->napi[irq_num];
+ if (entropy)
+ flags = IRQF_SAMPLE_RANDOM;
+
if (tp->irq_cnt == 1)
name = tp->dev->name;
else {
@@ -8510,12 +8516,11 @@ static int tg3_request_irq(struct tg3 *t
fn = tg3_msi;
if (tp->tg3_flags2 & TG3_FLG2_1SHOT_MSI)
fn = tg3_msi_1shot;
- flags = IRQF_SAMPLE_RANDOM;
} else {
fn = tg3_interrupt;
if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
fn = tg3_interrupt_tagged;
- flags = IRQF_SHARED | IRQF_SAMPLE_RANDOM;
+ flags |= IRQF_SHARED;
}
return request_irq(tnapi->irq_vec, fn, flags, name, tnapi);

Binary file not shown.

@ -0,0 +1,40 @@
From: Thomas Renninger <trenn@suse.de>
Subject: acpi-cpufreq: Fix CPU_ANY CPUFREQ_{PRE,POST}CHANGE notification
Patch-Mainline: submitted - please revert after 2.6.35
References: none
Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: venki@google.com
CC: davej@redhat.com
CC: arjan@infradead.org
CC: davej@redhat.com
CC: linux-kernel@vger.kernel.org
---
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4591680..c6de3a9 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -391,7 +391,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
freqs.old = perf->states[perf->state].core_frequency * 1000;
freqs.new = data->freq_table[next_state].frequency;
- for_each_cpu(i, cmd.mask) {
+ for_each_cpu(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -407,7 +407,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
- for_each_cpu(i, cmd.mask) {
+ for_each_cpu(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
--
1.6.3

@ -0,0 +1,42 @@
From: Thomas Renninger <trenn@suse.de>
Subject: Do not try to set up acpi processor stuff on cores exceeding maxcpus=
References: bnc#601520
Patch-Mainline: Not yet
Signed-off-by: Thomas Renninger <trenn@suse.de>
---
drivers/acpi/processor_driver.c | 5 +++++
init/main.c | 3 ++-
2 files changed, 7 insertions(+), 1 deletion(-)
Index: linux-2.6.34-master/init/main.c
===================================================================
--- linux-2.6.34-master.orig/init/main.c
+++ linux-2.6.34-master/init/main.c
@@ -124,7 +124,8 @@ static char *ramdisk_execute_command;
#ifdef CONFIG_SMP
/* Setup configured maximum number of CPUs to activate */
-unsigned int __initdata setup_max_cpus = NR_CPUS;
+unsigned int setup_max_cpus = NR_CPUS;
+EXPORT_SYMBOL(setup_max_cpus);
/*
* Setup routine for controlling SMP activation
Index: linux-2.6.34-master/drivers/acpi/processor_driver.c
===================================================================
--- linux-2.6.34-master.orig/drivers/acpi/processor_driver.c
+++ linux-2.6.34-master/drivers/acpi/processor_driver.c
@@ -581,6 +581,11 @@ static int __cpuinit acpi_processor_add(
return 0;
}
+#ifdef CONFIG_SMP
+ if (pr->id >= setup_max_cpus && pr->id != 0)
+ return 0;
+#endif
+
BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
/*

@ -0,0 +1,67 @@
From: Nick Piggin <npiggin@suse.de>
Subject: be more aggressive with zone reclaims
References: bnc#476525
Patch-mainline: no
The zone reclaim design is not very good for parallel allocations.
The primary problem is that only one thread is allowed to perform
zone-reclaim at a time. If another thread needs memory from that
zone/node, then its zone-reclaim will fail and it will be forced
to fall back to allocating from another zone.
Additionally, the default zone reclaim priority is insufficient
for massively parallel allocations. Lower ZONE_RECLAIM_PRIORITY
to fix it. This can result in higher latency spikes, but similar
kind of page allocation latency can often be encountered as
normal part of page reclaim when pagecache fills memory.
Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
---
mm/vmscan.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2501,7 +2501,7 @@ int zone_reclaim_mode __read_mostly;
* of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone.
*/
-#define ZONE_RECLAIM_PRIORITY 4
+#define ZONE_RECLAIM_PRIORITY 0
/*
* Percentage of pages in a zone that must be unmapped for zone_reclaim to
@@ -2607,6 +2607,8 @@ static int __zone_reclaim(struct zone *z
slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
if (slab_reclaimable > zone->min_slab_pages) {
+ unsigned long lru_pages = zone_reclaimable_pages(zone);
+
/*
* shrink_slab() does not currently allow us to determine how
* many pages were freed in this zone. So we take the current
@@ -2617,10 +2619,7 @@ static int __zone_reclaim(struct zone *z
* Note that shrink_slab will free memory on all zones and may
* take a long time.
*/
- while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
- zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
- slab_reclaimable - nr_pages)
- ;
+ shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
/*
* Update nr_reclaimed by the number of slab pages we
@@ -2674,11 +2673,7 @@ int zone_reclaim(struct zone *zone, gfp_
if (node_state(node_id, N_CPU) && node_id != numa_node_id())
return ZONE_RECLAIM_NOSCAN;
- if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
- return ZONE_RECLAIM_NOSCAN;
-
ret = __zone_reclaim(zone, gfp_mask, order);
- zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);

@ -0,0 +1,45 @@
From: jbeulich@novell.com
Subject: Module use count must be updated as bridges are created/destroyed
Patch-mainline: unknown
References: 267651
Otherwise 'modprobe -r' on a module having a dependency on bridge will
implicitly unload bridge, bringing down all connectivity that was using
bridges.
---
net/bridge/br_if.c | 9 +++++++++
1 file changed, 9 insertions(+)
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -279,6 +279,11 @@ int br_add_bridge(struct net *net, const
if (!dev)
return -ENOMEM;
+ if (!try_module_get(THIS_MODULE)) {
+ free_netdev(dev);
+ return -ENOENT;
+ }
+
rtnl_lock();
if (strchr(dev->name, '%')) {
ret = dev_alloc_name(dev, dev->name);
@@ -297,6 +302,8 @@ int br_add_bridge(struct net *net, const
unregister_netdevice(dev);
out:
rtnl_unlock();
+ if (ret)
+ module_put(THIS_MODULE);
return ret;
out_free:
@@ -328,6 +335,8 @@ int br_del_bridge(struct net *net, const
del_br(netdev_priv(dev), NULL);
rtnl_unlock();
+ if (ret == 0)
+ module_put(THIS_MODULE);
return ret;
}

@ -0,0 +1,136 @@
From: Steve French <sfrench@us.ibm.com>
Subject: [CIFS] Allow null nd (as nfs server uses) on create
References: bnc#593940
Patch-mainline: queued (in cifs devel git)
commit fa588e0c57048b3d4bfcd772d80dc0615f83fd35 in cifs-2.6.git
While creating a file on a server which supports unix extensions
such as Samba, if a file is being created which does not supply
nameidata (i.e. nd is null), cifs client can oops when calling
cifs_posix_open.
Signed-off-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
---
fs/cifs/cifsproto.h | 6 ++++--
fs/cifs/dir.c | 20 ++++++++++++--------
fs/cifs/file.c | 11 +++++++----
3 files changed, 23 insertions(+), 14 deletions(-)
Index: linux-2.6.33-master/fs/cifs/cifsproto.h
===================================================================
--- linux-2.6.33-master.orig/fs/cifs/cifsproto.h
+++ linux-2.6.33-master/fs/cifs/cifsproto.h
@@ -95,8 +95,10 @@ extern struct cifsFileInfo *cifs_new_fil
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, int mode, int oflags,
- __u32 *poplock, __u16 *pnetfid, int xid);
+ struct vfsmount *mnt,
+ struct super_block *sb,
+ int mode, int oflags,
+ __u32 *poplock, __u16 *pnetfid, int xid);
extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
FILE_UNIX_BASIC_INFO *info,
struct cifs_sb_info *cifs_sb);
Index: linux-2.6.33-master/fs/cifs/dir.c
===================================================================
--- linux-2.6.33-master.orig/fs/cifs/dir.c
+++ linux-2.6.33-master/fs/cifs/dir.c
@@ -183,13 +183,14 @@ cifs_new_fileinfo(struct inode *newinode
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, int mode, int oflags,
- __u32 *poplock, __u16 *pnetfid, int xid)
+ struct vfsmount *mnt, struct super_block *sb,
+ int mode, int oflags,
+ __u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
FILE_UNIX_BASIC_INFO *presp_data;
__u32 posix_flags = 0;
- struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_fattr fattr;
cFYI(1, ("posix open %s", full_path));
@@ -242,7 +243,7 @@ int cifs_posix_open(char *full_path, str
/* get new inode and set it up */
if (*pinode == NULL) {
- *pinode = cifs_iget(mnt->mnt_sb, &fattr);
+ *pinode = cifs_iget(sb, &fattr);
if (!*pinode) {
rc = -ENOMEM;
goto posix_open_ret;
@@ -251,7 +252,8 @@ int cifs_posix_open(char *full_path, str
cifs_fattr_to_inode(*pinode, &fattr);
}
- cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
+ if (mnt)
+ cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
posix_open_ret:
kfree(presp_data);
@@ -315,13 +317,14 @@ cifs_create(struct inode *inode, struct
if (nd && (nd->flags & LOOKUP_OPEN))
oflags = nd->intent.open.flags;
else
- oflags = FMODE_READ;
+ oflags = FMODE_READ | SMB_O_CREAT;
if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
- rc = cifs_posix_open(full_path, &newinode, nd->path.mnt,
- mode, oflags, &oplock, &fileHandle, xid);
+ rc = cifs_posix_open(full_path, &newinode,
+ nd ? nd->path.mnt : NULL,
+ inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
negotation. EREMOTE indicates DFS junction, which is not
@@ -678,6 +681,7 @@ cifs_lookup(struct inode *parent_dir_ino
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
&fileHandle, xid);
Index: linux-2.6.33-master/fs/cifs/file.c
===================================================================
--- linux-2.6.33-master.orig/fs/cifs/file.c
+++ linux-2.6.33-master/fs/cifs/file.c
@@ -298,10 +298,12 @@ int cifs_open(struct inode *inode, struc
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+ oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- cifs_sb->mnt_file_mode /* ignored */,
- oflags, &oplock, &netfid, xid);
+ inode->i_sb,
+ cifs_sb->mnt_file_mode /* ignored */,
+ oflags, &oplock, &netfid, xid);
if (rc == 0) {
cFYI(1, ("posix open succeeded"));
/* no need for special case handling of setting mode
@@ -513,8 +515,9 @@ reopen_error_exit:
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- cifs_sb->mnt_file_mode /* ignored */,
- oflags, &oplock, &netfid, xid);
+ inode->i_sb,
+ cifs_sb->mnt_file_mode /* ignored */,
+ oflags, &oplock, &netfid, xid);
if (rc == 0) {
cFYI(1, ("posix reopen succeeded"));
goto reopen_success;

@ -0,0 +1,65 @@
From: Thomas Renninger <trenn@suse.de>
Subject: CPUFREQ: ondemand: Limit default sampling rate to 300ms max.
References: bnc#464461
Patch-Mainline: never, SLE11 only
Modified for SP1 by Jiri Bohac <jbohac@suse.cz>
HW cpufreq drivers (e.g. all non-acpi AMD) may report too high latency values.
The default sampling rate (how often the ondemand/conservative governor
checks for frequency adjustments) may therefore be much too high,
resulting in performance loss.
Restrict default sampling rate to 300ms. 333ms sampling rate is field
tested with userspace governors, 300ms should be a fine maximum default
value for the ondemand kernel governor for all HW out there.
Set default up_threshold to 40 on multi core systems.
This should avoid effects where two CPU intensive threads are waiting on
each other on separate cores. On a single core machine these would all be
processed on one core resulting in higher utilization of the one core.
---
drivers/cpufreq/cpufreq_ondemand.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -35,6 +35,7 @@
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (11)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
+#define MAX_DEFAULT_SAMPLING_RATE (300 * 1000U)
/*
* The polling frequency of this governor depends on the capability of
@@ -679,6 +680,29 @@ static int cpufreq_governor_dbs(struct c
dbs_tuners_ins.sampling_rate =
max(min_sampling_rate,
latency * LATENCY_MULTIPLIER);
+ /*
+ * Cut def_sampling rate to 300ms if it was above,
+ * still consider to not set it above latency
+ * transition * 100
+ */
+ if (dbs_tuners_ins.sampling_rate > MAX_DEFAULT_SAMPLING_RATE) {
+ dbs_tuners_ins.sampling_rate =
+ max(min_sampling_rate, MAX_DEFAULT_SAMPLING_RATE);
+ printk(KERN_INFO "CPUFREQ: ondemand sampling "
+ "rate set to %d ms\n",
+ dbs_tuners_ins.sampling_rate / 1000);
+ }
+ /*
+ * Be conservative in respect to performance.
+ * If an application calculates using two threads
+ * depending on each other, they will be run on several
+ * CPU cores resulting on 50% load on both.
+ * SLED might still want to prefer 80% up_threshold
+ * by default, but we cannot differ that here.
+ */
+ if (num_online_cpus() > 1)
+ dbs_tuners_ins.up_threshold =
+ DEF_FREQUENCY_UP_THRESHOLD / 2;
}
mutex_unlock(&dbs_mutex);

@ -0,0 +1,29 @@
From: Hannes Reinecke <hare@suse.de>
Subject: Reattach device handler for multipath devices
References: bnc#435688
Patch-mainline: not yet
The multipath daemon might have specified a different device_handler
than the one a device is attached to by default.
So we should try to re-attach with the user-specified device_handler
and only return an error if that fails.
And we should _not_ detach existing hardware handlers. This will
set the path to failed during failover.
Signed-off-by: Hannes Reinecke <hare@suse.de
---
drivers/md/dm-mpath.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -168,8 +168,6 @@ static void free_pgpaths(struct list_hea
list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
list_del(&pgpath->list);
- if (m->hw_handler_name)
- scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
free_pgpath(pgpath);
}

@ -0,0 +1,39 @@
From: Nikanth Karthikesan <knikanth@suse.de>
Subject: Release md->map_lock before set_disk_ro
Patch-mainline: No
References: bnc#556899 bnc#479784
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Calling set_disk_ro() with irqs disabled triggers a warning.
set_disk_ro() can be called outside the
write_lock_irqsave(&md->map_lock)? And to get the
dm_table_get_mode(md->map), we just need to hold a reference
with dm_get_table() and dm_table_put()
---
drivers/md/dm.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6.33-master/drivers/md/dm.c
===================================================================
--- linux-2.6.33-master.orig/drivers/md/dm.c
+++ linux-2.6.33-master/drivers/md/dm.c
@@ -2102,12 +2102,15 @@ static struct dm_table *__bind(struct ma
old_map = md->map;
md->map = t;
dm_table_set_restrictions(t, q, limits);
+ write_unlock_irqrestore(&md->map_lock, flags);
+
+ dm_table_get(md->map);
if (!(dm_table_get_mode(t) & FMODE_WRITE)) {
set_disk_ro(md->disk, 1);
} else {
set_disk_ro(md->disk, 0);
}
- write_unlock_irqrestore(&md->map_lock, flags);
+ dm_table_put(md->map);
return old_map;
}

@ -0,0 +1,90 @@
From: Hannes Reinecke <hare@suse.de>
Subject: dm multipath devices are not getting created for readonly devices
References: bnc#382705
Patch-mainline: not yet
Currently we cannot create device-mapper tables for multipath devices
whenever they are read-only.
This patch modifies the device-mapper to set the 'READ-ONLY' flag
automatically whenever a read-only is added to the table.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/md/dm-table.c | 10 +++++++++-
drivers/md/dm.c | 18 ++++++++++++++++--
2 files changed, 25 insertions(+), 3 deletions(-)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -462,11 +462,19 @@ static int __table_get_device(struct dm_
dd->dm_dev.mode = mode;
dd->dm_dev.bdev = NULL;
- if ((r = open_dev(dd, dev, t->md))) {
+ r = open_dev(dd, dev, t->md);
+ if (r == -EROFS) {
+ dd->dm_dev.mode &= ~FMODE_WRITE;
+ r = open_dev(dd, dev, t->md);
+ }
+ if (r) {
kfree(dd);
return r;
}
+ if (dd->dm_dev.mode != mode)
+ t->mode = dd->dm_dev.mode;
+
format_dev_t(dd->dm_dev.name, dev);
atomic_set(&dd->count, 0);
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -337,16 +337,25 @@ int dm_deleting_md(struct mapped_device
static int dm_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mapped_device *md;
+ int retval = 0;
spin_lock(&_minor_lock);
md = bdev->bd_disk->private_data;
- if (!md)
+ if (!md) {
+ retval = -ENXIO;
goto out;
+ }
if (test_bit(DMF_FREEING, &md->flags) ||
dm_deleting_md(md)) {
md = NULL;
+ retval = -ENXIO;
+ goto out;
+ }
+ if (get_disk_ro(md->disk) && (mode & FMODE_WRITE)) {
+ md = NULL;
+ retval = -EROFS;
goto out;
}
@@ -356,7 +365,7 @@ static int dm_blk_open(struct block_devi
out:
spin_unlock(&_minor_lock);
- return md ? 0 : -ENXIO;
+ return retval;
}
static int dm_blk_close(struct gendisk *disk, fmode_t mode)
@@ -2093,6 +2102,11 @@ static struct dm_table *__bind(struct ma
old_map = md->map;
md->map = t;
dm_table_set_restrictions(t, q, limits);
+ if (!(dm_table_get_mode(t) & FMODE_WRITE)) {
+ set_disk_ro(md->disk, 1);
+ } else {
+ set_disk_ro(md->disk, 0);
+ }
write_unlock_irqrestore(&md->map_lock, flags);
return old_map;

@ -0,0 +1,25 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: dmar: Fix oops with no DMAR table
References: bnc#548108
Patch-mainline: submitted 17 Mar 2010
On systems without a DMAR table and with DMAR enabled, we will oops
in dmar_ir_supported. This patch makes sure we actually have a DMAR
table before checking it.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
drivers/pci/dmar.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1460,5 +1460,5 @@ int __init dmar_ir_support(void)
{
struct acpi_table_dmar *dmar;
dmar = (struct acpi_table_dmar *)dmar_tbl;
- return dmar->flags & 0x1;
+ return dmar && dmar->flags & 0x1;
}

@ -0,0 +1,41 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: [PATCH] ext3: always mark super uptodate before dirty
References: bnc#457043
Patch-mainline: not yet
The superblock's bh is something of an exception. It is only read
during mount and is only released during unmount. The in-memory
copy is invariably the most recent one.
If a write error occurs while syncing the superblock, it will be marked
!uptodate. When another error occurs, ext3_error will invoke
ext3_commit_super, which will mark the superblock dirty and try to
sync it out again. If the buffer is !uptodate, then mark_buffer_dirty
will issue a warning, but continue anyway.
This patch marks it uptodate before writing it out. This doesn't really
change anything other than silencing the warning in mark_buffer_dirty.
If the write succeeds, good. Otherwise, it will just have uptodate
cleared again.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
fs/ext3/super.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2382,6 +2382,13 @@ static int ext3_commit_super(struct supe
es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
BUFFER_TRACE(sbh, "marking dirty");
+
+ /* We only read the superblock once. The in-memory version is
+ * always the most recent. If ext3_error is called after a
+ * superblock write failure, it will be !uptodate. This write
+ * will likely fail also, but it avoids the WARN_ON in
+ * mark_buffer_dirty. */
+ set_buffer_uptodate(sbh);
mark_buffer_dirty(sbh);
if (sync)
error = sync_dirty_buffer(sbh);

@ -0,0 +1,63 @@
From: Ludwig Nussel <lnussel@novell.com>
Subject: make nf_conntrack_slp actually work
References: bnc#470963
Patch-mainline: not yet, depends on patches.suse/netfilter-ip_conntrack_slp.patch
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
net/netfilter/nf_conntrack_slp.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--- a/net/netfilter/nf_conntrack_slp.c
+++ b/net/netfilter/nf_conntrack_slp.c
@@ -47,15 +47,15 @@ static int help(struct sk_buff *skb, uns
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_expect *exp;
- struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
struct in_device *in_dev;
__be32 mask = 0;
+ __be32 src = 0;
/* we're only interested in locally generated packets */
if (skb->sk == NULL)
goto out;
- if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+ if (rt == NULL || !(rt->rt_flags & (RTCF_MULTICAST|RTCF_BROADCAST)))
goto out;
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
@@ -64,15 +64,18 @@ static int help(struct sk_buff *skb, uns
in_dev = __in_dev_get_rcu(rt->u.dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
- if (ifa->ifa_broadcast == iph->daddr) {
- mask = ifa->ifa_mask;
- break;
- }
+ /* this is a hack as slp uses multicast we can't match
+ * the destination address to some broadcast address. So
+ * just take the first one. Better would be to install
+ * expectations for all addresses */
+ mask = ifa->ifa_mask;
+ src = ifa->ifa_broadcast;
+ break;
} endfor_ifa(in_dev);
}
rcu_read_unlock();
- if (mask == 0)
+ if (mask == 0 || src == 0)
goto out;
exp = nf_ct_expect_alloc(ct);
@@ -80,6 +83,7 @@ static int help(struct sk_buff *skb, uns
goto out;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->tuple.src.u3.ip = src;
exp->tuple.src.u.udp.port = htons(SLP_PORT);
exp->mask.src.u3.ip = mask;

@ -0,0 +1,30 @@
From: Dean Roe <roe@sgi.com>
Subject: Prevent NULL pointer deref in grab_swap_token
References: 159260
Patch-mainline: not yet
grab_swap_token() assumes that the current process has an mm struct,
which is not true for kernel threads invoking get_user_pages(). Since
this should be extremely rare, just return from grab_swap_token()
without doing anything.
Signed-off-by: Dean Roe <roe@sgi.com>
Acked-by: mason@suse.de
Acked-by: okir@suse.de
mm/thrash.c | 3 +++
1 file changed, 3 insertions(+)
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -31,6 +31,9 @@ void grab_swap_token(struct mm_struct *m
int current_interval;
global_faults++;
+ if (mm == NULL)
+ return;
+
current_interval = global_faults - mm->faultstamp;

@ -0,0 +1,110 @@
From: Thomas Renninger <trenn@suse.de>
Subject: x86 platform drivers: hp-wmi Reorder event id processing
References: bnc#598059
Patch-Mainline: submitted
Event id 0x4 defines the hotkey event.
No need (or even wrong) to query HPWMI_HOTKEY_QUERY if event id is != 0x4.
Reorder the eventcode conditionals and use switch case instead of if/else.
Use an enum for the event ids cases.
Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: mjg@redhat.com
CC: linux-acpi@vger.kernel.org
---
drivers/platform/x86/hp-wmi.c | 51 ++++++++++++++++++++++++++----------------
1 file changed, 32 insertions(+), 19 deletions(-)
Index: linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
===================================================================
--- linux-2.6.33-master.orig/drivers/platform/x86/hp-wmi.c
+++ linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
@@ -58,6 +58,12 @@ enum hp_wmi_radio {
HPWMI_WWAN = 2,
};
+enum hp_wmi_event_ids {
+ HPWMI_DOCK_EVENT = 1,
+ HPWMI_BEZEL_BUTTON = 4,
+ HPWMI_WIRELESS = 5,
+};
+
static int __devinit hp_wmi_bios_setup(struct platform_device *device);
static int __exit hp_wmi_bios_remove(struct platform_device *device);
static int hp_wmi_resume_handler(struct device *device);
@@ -338,7 +344,7 @@ static void hp_wmi_notify(u32 value, voi
struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
static struct key_entry *key;
union acpi_object *obj;
- int eventcode;
+ int eventcode, key_code;
acpi_status status;
status = wmi_get_event_data(value, &response);
@@ -357,28 +363,32 @@ static void hp_wmi_notify(u32 value, voi
eventcode = *((u8 *) obj->buffer.pointer);
kfree(obj);
- if (eventcode == 0x4)
- eventcode = hp_wmi_perform_query(HPWMI_HOTKEY_QUERY, 0,
- 0);
- key = hp_wmi_get_entry_by_scancode(eventcode);
- if (key) {
- switch (key->type) {
- case KE_KEY:
- input_report_key(hp_wmi_input_dev,
- key->keycode, 1);
- input_sync(hp_wmi_input_dev);
- input_report_key(hp_wmi_input_dev,
- key->keycode, 0);
- input_sync(hp_wmi_input_dev);
- break;
- }
- } else if (eventcode == 0x1) {
+ switch (eventcode) {
+ case HPWMI_DOCK_EVENT:
input_report_switch(hp_wmi_input_dev, SW_DOCK,
hp_wmi_dock_state());
input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
hp_wmi_tablet_state());
input_sync(hp_wmi_input_dev);
- } else if (eventcode == 0x5) {
+ break;
+ case HPWMI_BEZEL_BUTTON:
+ key_code = hp_wmi_perform_query(HPWMI_HOTKEY_QUERY, 0,
+ 0);
+ key = hp_wmi_get_entry_by_scancode(key_code);
+ if (key) {
+ switch (key->type) {
+ case KE_KEY:
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 1);
+ input_sync(hp_wmi_input_dev);
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 0);
+ input_sync(hp_wmi_input_dev);
+ break;
+ }
+ }
+ break;
+ case HPWMI_WIRELESS:
if (wifi_rfkill)
rfkill_set_states(wifi_rfkill,
hp_wmi_get_sw_state(HPWMI_WIFI),
@@ -391,9 +401,12 @@ static void hp_wmi_notify(u32 value, voi
rfkill_set_states(wwan_rfkill,
hp_wmi_get_sw_state(HPWMI_WWAN),
hp_wmi_get_hw_state(HPWMI_WWAN));
- } else
+ break;
+ default:
printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
eventcode);
+ break;
+ }
}
static int __init hp_wmi_input_setup(void)

@ -0,0 +1,25 @@
From: Thomas Renninger <trenn@suse.de>
Subject: x86 platform drivers: hp-wmi Add media key 0x20e8
References: bnc#598059
Patch-Mainline: submitted
Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: mjg@redhat.com
CC: linux-acpi@vger.kernel.org
---
drivers/platform/x86/hp-wmi.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
===================================================================
--- linux-2.6.33-master.orig/drivers/platform/x86/hp-wmi.c
+++ linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
@@ -96,6 +96,7 @@ static struct key_entry hp_wmi_keymap[]
{KE_KEY, 0x02, KEY_BRIGHTNESSUP},
{KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
{KE_KEY, 0x20e6, KEY_PROG1},
+ {KE_KEY, 0x20e8, KEY_MEDIA},
{KE_KEY, 0x2142, KEY_MEDIA},
{KE_KEY, 0x213b, KEY_INFO},
{KE_KEY, 0x2169, KEY_DIRECTION},

@ -0,0 +1,39 @@
From: Thomas Renninger <trenn@suse.de>
Subject: x86 platform drivers: hp-wmi Catch and log unkown event and key codes correctly
References: bnc#598059
Patch-Mainline: submitted
Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: mjg@redhat.com
CC: linux-acpi@vger.kernel.org
---
drivers/platform/x86/hp-wmi.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
Index: linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
===================================================================
--- linux-2.6.33-master.orig/drivers/platform/x86/hp-wmi.c
+++ linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
@@ -386,7 +386,9 @@ static void hp_wmi_notify(u32 value, voi
input_sync(hp_wmi_input_dev);
break;
}
- }
+ } else
+ printk(KERN_INFO "HP WMI: Unknown key code - 0x%x\n",
+ key_code);
break;
case HPWMI_WIRELESS:
if (wifi_rfkill)
@@ -403,8 +405,8 @@ static void hp_wmi_notify(u32 value, voi
hp_wmi_get_hw_state(HPWMI_WWAN));
break;
default:
- printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
- eventcode);
+ printk(KERN_INFO "HP WMI: Unknown eventcode - %d\n",
+ eventcode);
break;
}
}

@ -0,0 +1,61 @@
From: Thomas Renninger <trenn@suse.de>
Subject: x86 platform drivers: hp-wmi Use consistent prefix string for messages.
References: bnc#598059
Patch-Mainline: submitted
Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: mjg@redhat.com
CC: linux-acpi@vger.kernel.org
---
drivers/platform/x86/hp-wmi.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
Index: linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
===================================================================
--- linux-2.6.33-master.orig/drivers/platform/x86/hp-wmi.c
+++ linux-2.6.33-master/drivers/platform/x86/hp-wmi.c
@@ -52,6 +52,8 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE9
#define HPWMI_WIRELESS_QUERY 0x5
#define HPWMI_HOTKEY_QUERY 0xc
+#define PREFIX "HP WMI: "
+
enum hp_wmi_radio {
HPWMI_WIFI = 0,
HPWMI_BLUETOOTH = 1,
@@ -349,14 +351,14 @@ static void hp_wmi_notify(u32 value, voi
status = wmi_get_event_data(value, &response);
if (status != AE_OK) {
- printk(KERN_INFO "hp-wmi: bad event status 0x%x\n", status);
+ printk(KERN_INFO PREFIX "bad event status 0x%x\n", status);
return;
}
obj = (union acpi_object *)response.pointer;
if (!obj || obj->type != ACPI_TYPE_BUFFER || obj->buffer.length != 8) {
- printk(KERN_INFO "HP WMI: Unknown response received\n");
+ printk(KERN_INFO PREFIX "Unknown response received\n");
kfree(obj);
return;
}
@@ -387,7 +389,7 @@ static void hp_wmi_notify(u32 value, voi
break;
}
} else
- printk(KERN_INFO "HP WMI: Unknown key code - 0x%x\n",
+ printk(KERN_INFO PREFIX "Unknown key code - 0x%x\n",
key_code);
break;
case HPWMI_WIRELESS:
@@ -405,7 +407,7 @@ static void hp_wmi_notify(u32 value, voi
hp_wmi_get_hw_state(HPWMI_WWAN));
break;
default:
- printk(KERN_INFO "HP WMI: Unknown eventcode - %d\n",
+ printk(KERN_INFO PREFIX "Unknown eventcode - %d\n",
eventcode);
break;
}

@ -0,0 +1,44 @@
Date: Tue, 6 Jan 2009 10:27:41 -0600
From: Dimitri Sivanich <sivanich@sgi.com>
To: linux-ia64@vger.kernel.org, Tony Luck <tony.luck@intel.com>,
Greg KH <greg@kroah.com>
Cc: linux-kernel@vger.kernel.org,
Peter Zijlstra <peterz@infradead.org>,
Gregory Haskins <ghaskins@novell.com>, Nick Piggin <npiggin@suse.de>,
Tony Luck <tony.luck@gmail.com>, Robin Holt <holt@sgi.com>
Subject: configure HAVE_UNSTABLE_SCHED_CLOCK for SGI_SN systems
Patch-mainline: not yet
Turn on CONFIG_HAVE_UNSTABLE_SCHED_CLOCK for SGI_SN.
SGI Altix has unsynchronized itc clocks. This results in rq->clock
occasionally being set to a time in the past by a remote cpu.
Note that it is possible that this problem may exist for other ia64
machines as well, based on the following comment for sched_clock() in
arch/ia64/kernel/head.S:
* Return a CPU-local timestamp in nano-seconds. This timestamp is
* NOT synchronized across CPUs its return value must never be
* compared against the values returned on another CPU. The usage in
* kernel/sched.c ensures that.
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---
arch/ia64/Kconfig | 1 +
1 file changed, 1 insertion(+)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -545,6 +545,7 @@ config IA64_MC_ERR_INJECT
config SGI_SN
def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+ select HAVE_UNSTABLE_SCHED_CLOCK
config IA64_ESI
bool "ESI (Extensible SAL Interface) support"

@ -0,0 +1,53 @@
From: Jan Blunck <jblunck@suse.de>
Subject: ia64-kvm: fix sparse warnings
Patch-mainline: not yet
This patch fixes some sparse warning about dubious one-bit signed bitfield.
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
arch/ia64/kvm/vti.h | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
--- a/arch/ia64/kvm/vti.h
+++ b/arch/ia64/kvm/vti.h
@@ -83,13 +83,13 @@
union vac {
unsigned long value;
struct {
- int a_int:1;
- int a_from_int_cr:1;
- int a_to_int_cr:1;
- int a_from_psr:1;
- int a_from_cpuid:1;
- int a_cover:1;
- int a_bsw:1;
+ unsigned int a_int:1;
+ unsigned int a_from_int_cr:1;
+ unsigned int a_to_int_cr:1;
+ unsigned int a_from_psr:1;
+ unsigned int a_from_cpuid:1;
+ unsigned int a_cover:1;
+ unsigned int a_bsw:1;
long reserved:57;
};
};
@@ -97,12 +97,12 @@ union vac {
union vdc {
unsigned long value;
struct {
- int d_vmsw:1;
- int d_extint:1;
- int d_ibr_dbr:1;
- int d_pmc:1;
- int d_to_pmd:1;
- int d_itm:1;
+ unsigned int d_vmsw:1;
+ unsigned int d_extint:1;
+ unsigned int d_ibr_dbr:1;
+ unsigned int d_pmc:1;
+ unsigned int d_to_pmd:1;
+ unsigned int d_itm:1;
long reserved:58;
};
};

@ -0,0 +1,27 @@
From: unknown@suse.de
Subject: some unknown ieee1394 patch
Patch-mainline: not yet
make the long format the default because its also the default in the
new firewire stack.
Maybe it simplifies migration for new 10.3 installs to 11.0 or later.
Maybe it is bad for existing 10.3 and earlier installs.
modprobe -v sbp2 sbp2_long_sysfs_ieee1394_id=0 to get the old short name
modprobe -v sbp2 sbp2_long_sysfs_ieee1394_id=1 to get the new long name
---
drivers/ieee1394/sbp2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -225,7 +225,7 @@ MODULE_PARM_DESC(workarounds, "Work arou
* independent of the implementation of the ieee1394 nodemgr, the longer format
* is recommended for future use.
*/
-static int sbp2_long_sysfs_ieee1394_id;
+static int sbp2_long_sysfs_ieee1394_id = 1;
module_param_named(long_ieee1394_id, sbp2_long_sysfs_ieee1394_id, bool, 0644);
MODULE_PARM_DESC(long_ieee1394_id, "8+3+2 bytes format of ieee1394_id in sysfs "
"(default = backwards-compatible = N, SAM-conforming = Y)");

@ -0,0 +1,30 @@
From: Jiri Kosina <jkosina@suse.cz>
Subject: Input: Add Acer Aspire 5710 to nomux blacklist
References: bnc#404881
Patch-mainline: submitted
Acer Aspire needs to be added to nomux blacklist, otherwise the touchpad
misbehaves.
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -360,6 +360,13 @@ static const struct dmi_system_id __init
},
},
{
+ /* Acer Aspire 5710 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710"),
+ },
+ },
+ {
/* Gericom Bellagio */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Gericom"),

@ -0,0 +1,28 @@
Subject: kbuild: fix generating of *.symtypes files
From: Michal Marek <mmarek@suse.cz>
Patch-mainline: submitted 2009-06-29
Commit 37a8d9f ("kbuild: simplify use of genksyms") broke generating of
*.symtypes files during build (with KBUILD_SYMTYPES set). This patch
fixes it.
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
scripts/Makefile.build | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -156,9 +156,9 @@ $(obj)/%.i: $(src)/%.c FORCE
cmd_gensymtypes = \
$(CPP) -D__GENKSYMS__ $(c_flags) $< | \
- $(GENKSYMS) -T $@ -a $(ARCH) \
+ $(GENKSYMS) $(if $(strip $(1)), -T $(@:.o=.symtypes)) -a $(ARCH) \
$(if $(KBUILD_PRESERVE),-p) \
- $(if $(1),-r $(firstword $(wildcard $(@:.symtypes=.symref) /dev/null)))
+ -r $(firstword $(wildcard $(basename $@).symref /dev/null))
quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
cmd_cc_symtypes_c = \

@ -0,0 +1,21 @@
From: agraf@suse.de
Subject: Ignore apic polarity
Patch-mainline: unknown
References: bnc#556564
---
virt/kvm/ioapic.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -200,7 +200,8 @@ int kvm_ioapic_set_irq(struct kvm_ioapic
spin_lock(&ioapic->lock);
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
- level ^= entry.fields.polarity;
+// polarity is always active high in qemu
+// level ^= entry.fields.polarity;
if (!level)
ioapic->irr &= ~mask;
else {

@ -0,0 +1,73 @@
From: agraf@suse.de
Subject: Implement some missing intercepts so osx doesn't blow up
Patch-mainline: unknown
References: bnc#556564
---
arch/x86/kvm/svm.c | 20 ++++++++++++++++++--
arch/x86/kvm/x86.c | 4 +++-
2 files changed, 21 insertions(+), 3 deletions(-)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1995,6 +1995,22 @@ static int skinit_interception(struct vc
return 1;
}
+static int monitor_interception(struct vcpu_svm *svm)
+{
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ return 1;
+}
+
+static int mwait_interception(struct vcpu_svm *svm)
+{
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ return kvm_emulate_halt(&svm->vcpu);
+}
+
static int invalid_op_interception(struct vcpu_svm *svm)
{
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
@@ -2376,8 +2392,8 @@ static int (*svm_exit_handlers[])(struct
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
- [SVM_EXIT_MONITOR] = invalid_op_interception,
- [SVM_EXIT_MWAIT] = invalid_op_interception,
+ [SVM_EXIT_MONITOR] = monitor_interception,
+ [SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_NPF] = pf_interception,
};
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1144,6 +1144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
break;
+ case 0xe2:
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
@@ -1400,6 +1401,7 @@ int kvm_get_msr_common(struct kvm_vcpu *
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
+ case 0xe2:
data = 0;
break;
case MSR_MTRRcap:
@@ -1848,7 +1850,7 @@ static void do_cpuid_ent(struct kvm_cpui
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
/* cpuid 1.ecx */
const u32 kvm_supported_word4_x86_features =
- F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
+ F(XMM3) | bit((4*32+ 3)) /* MONITOR */ | 0 /* Reserved, DTES64 */ |
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |

@ -0,0 +1,193 @@
From: Bernhard Walle <bwalle@suse.de>
Subject: Fix performance regression on large IA64 systems
References: bnc #469589
Patch-mainline: no (and never will)
This patch tries to address a performance regression discovered by SGI.
Patch b60c1f6ffd88850079ae419aa933ab0eddbd5535 removes the call
to note_interrupt() in __do_IRQ(). Patch d85a60d85ea5b7c597508c1510c88e657773d378
adds it again. Because it's needed for irqpoll.
That patch now introduces a new parameter 'only_fixup' for note_interrupt().
This parameter determines two cases:
TRUE => The function should be only executed when irqfixup is set.
Either 'irqpoll' or 'irqfixup' directly set that.
FALSE => Just the behaviour as note_interrupt() always had.
Now the patch converts all calls of note_interrupt() to only_fixup=FALSE,
except the call that has been removed by b60c1f6ffd88850079ae419aa933ab0eddbd5535.
So that call is always done, but the body is only executed when either
'irqpoll' or 'irqfixup' are specified.
This patch is not meant for mainline inclusion in the first run!
Signed-off-by: Bernhard Walle <bwalle@suse.de>
---
arch/arm/mach-ns9xxx/irq.c | 2 +-
arch/powerpc/platforms/cell/interrupt.c | 2 +-
drivers/mfd/ezx-pcap.c | 3 ++-
drivers/mfd/twl4030-irq.c | 2 +-
include/linux/irq.h | 2 +-
kernel/irq/chip.c | 12 ++++++------
kernel/irq/handle.c | 4 ++--
kernel/irq/spurious.c | 10 +++++++++-
8 files changed, 23 insertions(+), 14 deletions(-)
--- a/arch/arm/mach-ns9xxx/irq.c
+++ b/arch/arm/mach-ns9xxx/irq.c
@@ -85,7 +85,7 @@ static void handle_prio_irq(unsigned int
/* XXX: There is no direct way to access noirqdebug, so check
* unconditionally for spurious irqs...
* Maybe this function should go to kernel/irq/chip.c? */
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -268,7 +268,7 @@ static void handle_iic_irq(unsigned int
raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -203,7 +203,8 @@ static void pcap_isr_work(struct work_st
break;
if (desc->status & IRQ_DISABLED)
- note_interrupt(irq, desc, IRQ_NONE);
+ note_interrupt(irq, desc, IRQ_NONE,
+ false);
else
desc->handle_irq(irq, desc);
}
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -330,7 +330,7 @@ static int twl4030_irq_thread(void *data
*/
if (d->status & IRQ_DISABLED)
note_interrupt(module_irq, d,
- IRQ_NONE);
+ IRQ_NONE, false);
else
d->handle_irq(module_irq, d);
}
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -324,7 +324,7 @@ static inline void generic_handle_irq(un
/* Handling of unhandled and spurious interrupts: */
extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret);
+ irqreturn_t action_ret, bool only_fixup);
/* Resending of interrupts :*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq);
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -390,7 +390,7 @@ void handle_nested_irq(unsigned int irq)
action_ret = action->thread_fn(action->irq, action->dev_id);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock_irq(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -434,7 +434,7 @@ handle_simple_irq(unsigned int irq, stru
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -479,7 +479,7 @@ handle_level_irq(unsigned int irq, struc
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -535,7 +535,7 @@ handle_fasteoi_irq(unsigned int irq, str
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -613,7 +613,7 @@ handle_edge_irq(unsigned int irq, struct
raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
@@ -642,7 +642,7 @@ handle_percpu_irq(unsigned int irq, stru
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
if (desc->chip->eoi)
desc->chip->eoi(irq);
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -465,7 +465,7 @@ unsigned int __do_IRQ(unsigned int irq)
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, true);
}
desc->chip->end(irq);
return 1;
@@ -519,7 +519,7 @@ unsigned int __do_IRQ(unsigned int irq)
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
raw_spin_lock(&desc->lock);
if (likely(!(desc->status & IRQ_PENDING)))
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -213,9 +213,17 @@ try_misrouted_irq(unsigned int irq, stru
return action && (action->flags & IRQF_IRQPOLL);
}
+/*
+ * The parameter "only_fixup" means that the function should be only executed
+ * if this parameter is set either to false or to true simultaneously with
+ * irqfixup enabled.
+ */
void note_interrupt(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret)
+ irqreturn_t action_ret, bool only_fixup)
{
+ if (only_fixup && irqfixup == 0)
+ return;
+
if (unlikely(action_ret != IRQ_HANDLED)) {
/*
* If we are seeing only the odd spurious IRQ caused by

@ -0,0 +1,120 @@
From: Jeff Mahoney <jeffm@suse.com>
Subject: netfilter: Remove pointless CONFIG_NF_CT_ACCT warning
References: bnc#552033 (and others)
Patch-mainline: not yet
CONFIG_NF_CT_ACCT was scheduled at 2.6.27 release-time to be removed
in 2.6.29. That hasn't happened, and it's sort of pointless to remove the
option as it sets the default value for whether it's nf_conntrack_acct is
enabled at boot-time.
It still issues a really annoying warning though. This patch properly
documents the option as controlling the default and undeprecates it. It
also renames the option to a more subsystem-consistent NF_CONNTRACK_ACCT.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
Documentation/feature-removal-schedule.txt | 9 ---------
Documentation/kernel-parameters.txt | 3 +--
net/netfilter/Kconfig | 11 +++++------
net/netfilter/nf_conntrack_acct.c | 8 +-------
net/netfilter/nf_conntrack_netlink.c | 2 --
5 files changed, 7 insertions(+), 26 deletions(-)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -313,15 +313,6 @@ Who: Johannes Berg <johannes@sipsolution
---------------------------
-What: CONFIG_NF_CT_ACCT
-When: 2.6.29
-Why: Accounting can now be enabled/disabled without kernel recompilation.
- Currently used only to set a default value for a feature that is also
- controlled by a kernel/module/sysfs/sysctl parameter.
-Who: Krzysztof Piotr Oledzki <ole@ans.pl>
-
----------------------------
-
What: sysfs ui for changing p4-clockmod parameters
When: September 2009
Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1567,8 +1567,7 @@ and is between 256 and 4096 characters.
[NETFILTER] Enable connection tracking flow accounting
0 to disable accounting
1 to enable accounting
- Default value depends on CONFIG_NF_CT_ACCT that is
- going to be removed in 2.6.29.
+ Default value depends on CONFIG_NF_CT_ACCT.
nfsaddrs= [NFS]
See Documentation/filesystems/nfs/nfsroot.txt.
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -40,12 +40,13 @@ config NF_CONNTRACK
if NF_CONNTRACK
-config NF_CT_ACCT
- bool "Connection tracking flow accounting"
+config NF_CONNTRACK_ACCT
+ bool "Enable connection tracking flow accounting by default"
depends on NETFILTER_ADVANCED
help
- If this option is enabled, the connection tracking code will
- keep per-flow packet and byte counters.
+
+ This option controls whether per-flow packet and byte counters
+ are enabled by default.
Those counters can be used for flow-based accounting or the
`connbytes' match.
@@ -57,8 +58,6 @@ config NF_CT_ACCT
You may also disable/enable it on a running system with:
sysctl net.netfilter.nf_conntrack_acct=0/1
- This option will be removed in 2.6.29.
-
If unsure, say `N'.
config NF_CONNTRACK_MARK
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -16,7 +16,7 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
-#ifdef CONFIG_NF_CT_ACCT
+#ifdef CONFIG_NF_CONNTRACK_ACCT
#define NF_CT_ACCT_DEFAULT 1
#else
#define NF_CT_ACCT_DEFAULT 0
@@ -113,12 +113,6 @@ int nf_conntrack_acct_init(struct net *n
net->ct.sysctl_acct = nf_ct_acct;
if (net_eq(net, &init_net)) {
-#ifdef CONFIG_NF_CT_ACCT
- printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
- printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
- printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
-
ret = nf_ct_extend_register(&acct_extend);
if (ret < 0) {
printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -435,11 +435,9 @@ ctnetlink_nlmsg_size(const struct nf_con
+ 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
-#ifdef CONFIG_NF_CT_ACCT
+ 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+ 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+ 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
-#endif
+ nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ nla_total_size(0) /* CTA_PROTOINFO */
+ nla_total_size(0) /* CTA_HELP */

@ -0,0 +1,46 @@
From: Andreas Gruenbacher <agruen@suse.de>
Subject: "No acl" entry put in client-side acl cache instead of "not cached"
References: 171059
When the acl of a file is not cached and only the default acl of that
file is requested, a NULL "no acl" entry is put in the client-side acl
cache of nfs instead of ERR_PTR(-EAGAIN) "not cached".
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.16/fs/nfs/nfs3acl.c
===================================================================
--- linux-2.6.16.orig/fs/nfs/nfs3acl.c
+++ linux-2.6.16/fs/nfs/nfs3acl.c
@@ -172,8 +172,10 @@ static void nfs3_cache_acls(struct inode
inode->i_ino, acl, dfacl);
spin_lock(&inode->i_lock);
__nfs3_forget_cached_acls(NFS_I(inode));
- nfsi->acl_access = posix_acl_dup(acl);
- nfsi->acl_default = posix_acl_dup(dfacl);
+ if (!IS_ERR(acl))
+ nfsi->acl_access = posix_acl_dup(acl);
+ if (!IS_ERR(dfacl))
+ nfsi->acl_default = posix_acl_dup(dfacl);
spin_unlock(&inode->i_lock);
}
@@ -250,7 +252,9 @@ struct posix_acl *nfs3_proc_getacl(struc
res.acl_access = NULL;
}
}
- nfs3_cache_acls(inode, res.acl_access, res.acl_default);
+ nfs3_cache_acls(inode,
+ (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL),
+ (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL));
switch(type) {
case ACL_TYPE_ACCESS:
@@ -321,6 +325,7 @@ static int nfs3_proc_setacls(struct inod
switch (status) {
case 0:
status = nfs_refresh_inode(inode, &fattr);
+ nfs3_cache_acls(inode, acl, dfacl);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:

@ -0,0 +1,31 @@
From: Michal Hocko <mhocko@novell.com>
Subject: Don't fail allocations for the slot table when mounting an NFS filesystem
Patch-mainline: no
References: bnc#519820
When the *_slot_table_entries exceeds 111, the slot_table_size
exceeds 32K and an order-4 allocation is forced. This does not
retry nearly as much as order-3 so failure is more likely.
But mount and autofs in particular doesn't cope well with failure.
So force __GFP_REPEAT - the assumption is that people will only
set the slot_table_size sysctl large on a machine with plenty
of memory, so this should not block indefinitely.
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
---
net/sunrpc/xprtsock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2295,7 +2295,7 @@ static struct rpc_xprt *xs_setup_xprt(st
xprt = &new->xprt;
xprt->max_reqs = slot_table_size;
- xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
+ xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL | __GFP_REPEAT);
if (xprt->slot == NULL) {
kfree(xprt);
dprintk("RPC: xs_setup_xprt: couldn't allocate slot "

@ -0,0 +1,170 @@
From: ffilz@us.ibm.com
Subject: Revert "NFS: Allow redirtying of a completed unstable write."
Patch-mainline: REVERT patch from 2.6.27
References: 442267
mainline commit e468bae97d243fe0e1515abaa1f7d0edf1476ad0
introduces a BUG() that is apprently fairly easy to trigger.
As it is just making a minor performance enhancement, it is best to
revert the patch until the issue is better understood.
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Neil Brown <neilb@suse.de>
---
fs/nfs/write.c | 65 ++++++++++++++++++++++++++++-----------------------------
1 file changed, 33 insertions(+), 32 deletions(-)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -250,9 +250,12 @@ static int nfs_page_async_flush(struct n
return ret;
spin_lock(&inode->i_lock);
}
- if (test_bit(PG_CLEAN, &req->wb_flags)) {
+ if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+ /* This request is marked for commit */
spin_unlock(&inode->i_lock);
- BUG();
+ nfs_clear_page_tag_locked(req);
+ nfs_pageio_complete(pgio);
+ return 0;
}
if (nfs_set_page_writeback(page) != 0) {
spin_unlock(&inode->i_lock);
@@ -411,6 +414,19 @@ nfs_mark_request_dirty(struct nfs_page *
__set_page_dirty_nobuffers(req->wb_page);
}
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+ struct page *page = req->wb_page;
+
+ if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags))
+ return 0;
+ return !PageWriteback(page);
+}
+
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
/*
* Add a request to the inode's commit list.
@@ -422,7 +438,7 @@ nfs_mark_request_commit(struct nfs_page
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
- set_bit(PG_CLEAN, &(req)->wb_flags);
+ set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
radix_tree_tag_set(&nfsi->nfs_page_tree,
req->wb_index,
NFS_PAGE_TAG_COMMIT);
@@ -432,19 +448,6 @@ nfs_mark_request_commit(struct nfs_page
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
-static int
-nfs_clear_request_commit(struct nfs_page *req)
-{
- struct page *page = req->wb_page;
-
- if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
- dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
- return 1;
- }
- return 0;
-}
-
static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
@@ -454,7 +457,7 @@ int nfs_write_need_commit(struct nfs_wri
static inline
int nfs_reschedule_unstable_write(struct nfs_page *req)
{
- if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+ if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
nfs_mark_request_commit(req);
return 1;
}
@@ -470,12 +473,6 @@ nfs_mark_request_commit(struct nfs_page
{
}
-static inline int
-nfs_clear_request_commit(struct nfs_page *req)
-{
- return 0;
-}
-
static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
@@ -533,8 +530,11 @@ static void nfs_cancel_commit_list(struc
while(!list_empty(head)) {
req = nfs_list_entry(head->next);
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
nfs_list_remove_request(req);
- nfs_clear_request_commit(req);
+ clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
nfs_inode_remove_request(req);
nfs_unlock_request(req);
}
@@ -614,7 +614,8 @@ static struct nfs_page *nfs_try_to_updat
* Note: nfs_flush_incompatible() will already
* have flushed out requests having wrong owners.
*/
- if (offset > rqend
+ if (!nfs_dirty_request(req)
+ || offset > rqend
|| end < req->wb_offset)
goto out_flushme;
@@ -630,10 +631,6 @@ static struct nfs_page *nfs_try_to_updat
spin_lock(&inode->i_lock);
}
- if (nfs_clear_request_commit(req))
- radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
- req->wb_index, NFS_PAGE_TAG_COMMIT);
-
/* Okay, the request matches. Update the region */
if (offset < req->wb_offset) {
req->wb_offset = offset;
@@ -715,7 +712,8 @@ int nfs_flush_incompatible(struct file *
req = nfs_page_find_request(page);
if (req == NULL)
return 0;
- do_flush = req->wb_page != page || req->wb_context != ctx;
+ do_flush = req->wb_page != page || req->wb_context != ctx
+ || !nfs_dirty_request(req);
nfs_release_request(req);
if (!do_flush)
return 0;
@@ -1341,7 +1339,10 @@ static void nfs_commit_release(void *cal
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- nfs_clear_request_commit(req);
+ clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
dprintk("NFS: commit (%s/%lld %d@%lld)",
req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -1516,7 +1517,7 @@ int nfs_wb_page_cancel(struct inode *ino
req = nfs_page_find_request(page);
if (req == NULL)
goto out;
- if (test_bit(PG_CLEAN, &req->wb_flags)) {
+ if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
nfs_release_request(req);
break;
}

@ -0,0 +1,155 @@
Patch-mainline: submitted 04aug2009
References: bnc#498708
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:06:38 +1000
Subject: [PATCH 07/12] sunrpc/cache: allow thread to block while waiting for cache update.
The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.
1/ We NFSv4, requests can be quite complex and re-trying a whole
request when a later part fails should only be a list-resort, not a
normal practice.
2/ Large requests, and in particular any 'write' request, will not be
queued by the current code and doing so would be undesirable.
In many cases only a very sort wait is needed before the cache gets
valid data.
So, providing the underlying transport permits it by setting
->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.
The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.
These values are probably much higher than needed, but will ensure
some forward progress.
[Fixed 18Jan2010 to return -ve from cache_refer_req waits for the
upcall to complete instead of deferring the request.
Thanks to Dong Yang Li <dyli@novell.com>
]
Signed-off-by: NeilBrown <neilb@suse.de>
---
include/linux/sunrpc/cache.h | 3 ++
net/sunrpc/cache.c | 44 ++++++++++++++++++++++++++++++++++++++++++-
net/sunrpc/svc_xprt.c | 11 ++++++++++
3 files changed, 57 insertions(+), 1 deletion(-)
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -125,6 +125,9 @@ struct cache_detail {
*/
struct cache_req {
struct cache_deferred_req *(*defer)(struct cache_req *req);
+ int thread_wait; /* How long (jiffies) we can block the
+ * current thread to wait for updates.
+ */
};
/* this must be embedded in a deferred_request that is being
* delayed awaiting cache-fill
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -497,10 +497,22 @@ static LIST_HEAD(cache_defer_list);
static struct list_head cache_defer_hash[DFR_HASHSIZE];
static int cache_defer_cnt;
+struct thread_deferred_req {
+ struct cache_deferred_req handle;
+ wait_queue_head_t wait;
+};
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+ struct thread_deferred_req *dr =
+ container_of(dreq, struct thread_deferred_req, handle);
+ wake_up(&dr->wait);
+}
+
static int cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq, *discard;
int hash = DFR_HASH(item);
+ struct thread_deferred_req sleeper;
if (cache_defer_cnt >= DFR_MAX) {
/* too much in the cache, randomly drop this one,
@@ -509,7 +521,14 @@ static int cache_defer_req(struct cache_
if (net_random()&1)
return -ENOMEM;
}
- dreq = req->defer(req);
+ if (req->thread_wait) {
+ dreq = &sleeper.handle;
+ init_waitqueue_head(&sleeper.wait);
+ dreq->revisit = cache_restart_thread;
+ } else
+ dreq = req->defer(req);
+
+ retry:
if (dreq == NULL)
return -ENOMEM;
@@ -543,6 +562,29 @@ static int cache_defer_req(struct cache_
cache_revisit_request(item);
return -EAGAIN;
}
+
+ if (dreq == &sleeper.handle) {
+ wait_event_interruptible_timeout(
+ sleeper.wait,
+ !test_bit(CACHE_PENDING, &item->flags)
+ || list_empty(&sleeper.handle.hash),
+ req->thread_wait);
+ spin_lock(&cache_defer_lock);
+ if (!list_empty(&sleeper.handle.hash)) {
+ list_del_init(&sleeper.handle.recent);
+ list_del_init(&sleeper.handle.hash);
+ cache_defer_cnt--;
+ }
+ spin_unlock(&cache_defer_lock);
+ if (test_bit(CACHE_PENDING, &item->flags)) {
+ /* item is still pending, try request
+ * deferral
+ */
+ dreq = req->defer(req);
+ goto retry;
+ }
+ return -EAGAIN;
+ }
return 0;
}
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -650,6 +650,11 @@ int svc_recv(struct svc_rqst *rqstp, lon
if (signalled() || kthread_should_stop())
return -EINTR;
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided.
+ */
+ rqstp->rq_chandle.thread_wait = 5*HZ;
+
spin_lock_bh(&pool->sp_lock);
xprt = svc_xprt_dequeue(pool);
if (xprt) {
@@ -657,6 +662,12 @@ int svc_recv(struct svc_rqst *rqstp, lon
svc_xprt_get(xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+
+ /* As there is a shortage of threads and this request
+ * had to be queue, don't allow the thread to wait so
+ * long for cache updates.
+ */
+ rqstp->rq_chandle.thread_wait = 1*HZ;
} else {
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);

@ -0,0 +1,127 @@
Patch-mainline: submitted 04aug2009
References: bnc#498708
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:06:38 +1000
Subject: [PATCH 08/12] sunrpc/cache: retry cache lookups that return -ETIMEDOUT
If cache_check returns -ETIMEDOUT, then the cache item is not
up-to-date, but there is no pending upcall.
This could mean the data is not available, or it could mean that the
good data has been stored in a new cache item.
So re-do the lookup and if that returns a new item, proceed using that
item.
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/export.c | 18 ++++++++++++++++++
net/sunrpc/svcauth_unix.c | 23 ++++++++++++++++++++---
2 files changed, 38 insertions(+), 3 deletions(-)
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -787,9 +787,18 @@ exp_find_key(svc_client *clp, int fsid_t
memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
ek = svc_expkey_lookup(&key);
+ again:
if (ek == NULL)
return ERR_PTR(-ENOMEM);
err = cache_check(&svc_expkey_cache, &ek->h, reqp);
+ if (err == -ETIMEDOUT) {
+ struct svc_expkey *prev_ek = ek;
+ ek = svc_expkey_lookup(&key);
+ if (ek != prev_ek)
+ goto again;
+ if (ek)
+ cache_put(&ek->h, &svc_expkey_cache);
+ }
if (err)
return ERR_PTR(err);
return ek;
@@ -859,9 +868,18 @@ static svc_export *exp_get_by_name(svc_c
key.ex_path = *path;
exp = svc_export_lookup(&key);
+ retry:
if (exp == NULL)
return ERR_PTR(-ENOMEM);
err = cache_check(&svc_export_cache, &exp->h, reqp);
+ if (err == -ETIMEDOUT) {
+ struct svc_export *prev_exp = exp;
+ exp = svc_export_lookup(&key);
+ if (exp != prev_exp)
+ goto retry;
+ if (exp)
+ cache_put(&exp->h, &svc_export_cache);
+ }
if (err)
return ERR_PTR(err);
return exp;
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -662,13 +662,14 @@ static struct unix_gid *unix_gid_lookup(
static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
{
- struct unix_gid *ug;
+ struct unix_gid *ug, *prevug;
struct group_info *gi;
int ret;
ug = unix_gid_lookup(uid);
if (!ug)
return ERR_PTR(-EAGAIN);
+retry:
ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle);
switch (ret) {
case -ENOENT:
@@ -677,6 +678,13 @@ static struct group_info *unix_gid_find(
gi = get_group_info(ug->gi);
cache_put(&ug->h, &unix_gid_cache);
return gi;
+ case -ETIMEDOUT:
+ prevug = ug;
+ ug = unix_gid_lookup(uid);
+ if (ug != prevug)
+ goto retry;
+ if (ug)
+ cache_put(&ug->h, &unix_gid_cache);
default:
return ERR_PTR(-EAGAIN);
}
@@ -687,7 +695,7 @@ svcauth_unix_set_client(struct svc_rqst
{
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6, sin6_storage;
- struct ip_map *ipm;
+ struct ip_map *ipm, *prev_ipm;
struct group_info *gi;
struct svc_cred *cred = &rqstp->rq_cred;
@@ -713,14 +721,23 @@ svcauth_unix_set_client(struct svc_rqst
ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
&sin6->sin6_addr);
+ retry:
if (ipm == NULL)
return SVC_DENIED;
switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
default:
BUG();
- case -EAGAIN:
case -ETIMEDOUT:
+ prev_ipm = ipm;
+ ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
+ &sin6->sin6_addr);
+ if (ipm != prev_ipm)
+ goto retry;
+ if (ipm)
+ cache_put(&ipm->h, &ip_map_cache);
+
+ case -EAGAIN:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;

@ -0,0 +1,141 @@
Patch-mainline: submitted 04aug2009
References: bnc#498708
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:06:39 +1000
Subject: [PATCH 09/12] nfsd/idmap: drop special request deferal in favour of improved default.
The idmap code manages request deferal by waiting for a reply from
userspace rather than putting the NFS request on a queue to be retried
from the start.
Now that the comment deferal code does this there is no need for the
special code in idmap.
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4idmap.c | 105 +++++-----------------------------------------------
1 file changed, 11 insertions(+), 94 deletions(-)
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -481,109 +481,26 @@ nfsd_idmap_shutdown(void)
cache_unregister(&nametoid_cache);
}
-/*
- * Deferred request handling
- */
-
-struct idmap_defer_req {
- struct cache_req req;
- struct cache_deferred_req deferred_req;
- wait_queue_head_t waitq;
- atomic_t count;
-};
-
-static inline void
-put_mdr(struct idmap_defer_req *mdr)
-{
- if (atomic_dec_and_test(&mdr->count))
- kfree(mdr);
-}
-
-static inline void
-get_mdr(struct idmap_defer_req *mdr)
-{
- atomic_inc(&mdr->count);
-}
-
-static void
-idmap_revisit(struct cache_deferred_req *dreq, int toomany)
-{
- struct idmap_defer_req *mdr =
- container_of(dreq, struct idmap_defer_req, deferred_req);
-
- wake_up(&mdr->waitq);
- put_mdr(mdr);
-}
-
-static struct cache_deferred_req *
-idmap_defer(struct cache_req *req)
-{
- struct idmap_defer_req *mdr =
- container_of(req, struct idmap_defer_req, req);
-
- mdr->deferred_req.revisit = idmap_revisit;
- get_mdr(mdr);
- return (&mdr->deferred_req);
-}
-
-static inline int
-do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
- struct cache_detail *detail, struct ent **item,
- struct idmap_defer_req *mdr)
-{
- *item = lookup_fn(key);
- if (!*item)
- return -ENOMEM;
- return cache_check(detail, &(*item)->h, &mdr->req);
-}
-
-static inline int
-do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
- struct ent *key, struct cache_detail *detail,
- struct ent **item)
-{
- int ret = -ENOMEM;
-
- *item = lookup_fn(key);
- if (!*item)
- goto out_err;
- ret = -ETIMEDOUT;
- if (!test_bit(CACHE_VALID, &(*item)->h.flags)
- || (*item)->h.expiry_time < get_seconds()
- || detail->flush_time > (*item)->h.last_refresh)
- goto out_put;
- ret = -ENOENT;
- if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
- goto out_put;
- return 0;
-out_put:
- cache_put(&(*item)->h, detail);
-out_err:
- *item = NULL;
- return ret;
-}
-
static int
idmap_lookup(struct svc_rqst *rqstp,
struct ent *(*lookup_fn)(struct ent *), struct ent *key,
struct cache_detail *detail, struct ent **item)
{
- struct idmap_defer_req *mdr;
int ret;
- mdr = kzalloc(sizeof(*mdr), GFP_KERNEL);
- if (!mdr)
+ *item = lookup_fn(key);
+ if (!*item)
return -ENOMEM;
- atomic_set(&mdr->count, 1);
- init_waitqueue_head(&mdr->waitq);
- mdr->req.defer = idmap_defer;
- ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr);
- if (ret == -EAGAIN) {
- wait_event_interruptible_timeout(mdr->waitq,
- test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ);
- ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item);
+ retry:
+ ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
+
+ if (ret == -ETIMEDOUT) {
+ struct ent *prev_item = *item;
+ *item = lookup_fn(key);
+ if (*item != prev_item)
+ goto retry;
+ cache_put(&(*item)->h, detail);
}
- put_mdr(mdr);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save