From: jbeulich@novell.com Subject: fold per-CPU VIRQs onto a single IRQ each Patch-mainline: n/a --- head-2011-02-17.orig/arch/x86/kernel/time-xen.c 2010-11-23 15:07:01.000000000 +0100 +++ head-2011-02-17/arch/x86/kernel/time-xen.c 2010-10-05 16:57:34.000000000 +0200 @@ -671,19 +671,17 @@ int xen_update_persistent_clock(void) } /* Dynamically-mapped IRQ. */ -DEFINE_PER_CPU(int, timer_irq); +static int __read_mostly timer_irq = -1; +static struct irqaction timer_action = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED|IRQF_TIMER, + .name = "timer" +}; static void __init setup_cpu0_timer_irq(void) { - per_cpu(timer_irq, 0) = - bind_virq_to_irqhandler( - VIRQ_TIMER, - 0, - timer_interrupt, - IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING, - "timer0", - NULL); - BUG_ON(per_cpu(timer_irq, 0) < 0); + timer_irq = bind_virq_to_irqaction(VIRQ_TIMER, 0, &timer_action); + BUG_ON(timer_irq < 0); } static void __init _late_time_init(void) @@ -829,8 +827,6 @@ void xen_halt(void) } #ifdef CONFIG_SMP -static char timer_name[NR_CPUS][15]; - int __cpuinit local_setup_timer(unsigned int cpu) { int seq, irq; @@ -856,16 +852,10 @@ int __cpuinit local_setup_timer(unsigned init_missing_ticks_accounting(cpu); } while (read_seqretry(&xtime_lock, seq)); - sprintf(timer_name[cpu], "timer%u", cpu); - irq = bind_virq_to_irqhandler(VIRQ_TIMER, - cpu, - timer_interrupt, - IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING, - timer_name[cpu], - NULL); + irq = bind_virq_to_irqaction(VIRQ_TIMER, cpu, &timer_action); if (irq < 0) return irq; - per_cpu(timer_irq, cpu) = irq; + BUG_ON(timer_irq != irq); return 0; } @@ -873,7 +863,7 @@ int __cpuinit local_setup_timer(unsigned void __cpuinit local_teardown_timer(unsigned int cpu) { BUG_ON(cpu == 0); - unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL); + unbind_from_per_cpu_irq(timer_irq, cpu, &timer_action); } #endif --- head-2011-02-17.orig/drivers/xen/core/evtchn.c 2011-02-15 17:52:39.000000000 +0100 +++ head-2011-02-17/drivers/xen/core/evtchn.c 2011-02-16 08:29:06.000000000 +0100 @@ -59,6 +59,23 @@ static DEFINE_SPINLOCK(irq_mapping_updat static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 }; +#if defined(CONFIG_SMP) && defined(CONFIG_X86) +static struct percpu_irqaction { + struct irqaction action; /* must be first */ + struct percpu_irqaction *next; + cpumask_var_t cpus; +} *virq_actions[NR_VIRQS]; +/* IRQ <-> VIRQ mapping. */ +static DECLARE_BITMAP(virq_per_cpu, NR_VIRQS) __read_mostly; +static DEFINE_PER_CPU_READ_MOSTLY(int[NR_VIRQS], virq_to_evtchn); +#define BUG_IF_VIRQ_PER_CPU(irq_cfg) \ + BUG_ON(type_from_irq_cfg(irq_cfg) == IRQT_VIRQ \ + && test_bit(index_from_irq_cfg(irq_cfg), virq_per_cpu)) +#else +#define BUG_IF_VIRQ_PER_CPU(irq_cfg) ((void)0) +#define PER_CPU_VIRQ_IRQ +#endif + /* IRQ <-> IPI mapping. */ #if defined(CONFIG_SMP) && defined(CONFIG_X86) static int __read_mostly ipi_irq = -1; @@ -160,21 +177,34 @@ static inline unsigned int type_from_irq return cfg ? type_from_irq_cfg(cfg) : IRQT_UNBOUND; } -#ifndef PER_CPU_IPI_IRQ static inline unsigned int evtchn_from_per_cpu_irq(const struct irq_cfg *cfg, unsigned int cpu) { - BUG_ON(type_from_irq_cfg(cfg) != IRQT_IPI); - return per_cpu(ipi_evtchn, cpu); -} + switch (type_from_irq_cfg(cfg)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: + return per_cpu(virq_to_evtchn, cpu)[index_from_irq_cfg(cfg)]; +#endif +#ifndef PER_CPU_IPI_IRQ + case IRQT_IPI: + return per_cpu(ipi_evtchn, cpu); #endif + } + BUG(); + return 0; +} static inline unsigned int evtchn_from_irq_cfg(const struct irq_cfg *cfg) { + switch (type_from_irq_cfg(cfg)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: +#endif #ifndef PER_CPU_IPI_IRQ - if (type_from_irq_cfg(cfg) == IRQT_IPI) - return evtchn_from_per_cpu_irq(cfg, smp_processor_id()); + case IRQT_IPI: #endif + return evtchn_from_per_cpu_irq(cfg, smp_processor_id()); + } return cfg->info & ((1U << _EVTCHN_BITS) - 1); } @@ -357,13 +387,22 @@ asmlinkage void __irq_entry evtchn_do_up * hardirq handlers see an up-to-date system time even if we * have just woken from a long idle period. */ +#ifdef PER_CPU_VIRQ_IRQ if ((irq = percpu_read(virq_to_irq[VIRQ_TIMER])) != -1) { port = evtchn_from_irq(irq); +#else + port = percpu_read(virq_to_evtchn[VIRQ_TIMER]); + if (VALID_EVTCHN(port)) { +#endif l1i = port / BITS_PER_LONG; l2i = port % BITS_PER_LONG; if (active_evtchns(l1i) & (1ul<info = mk_irq_info(IRQT_VIRQ, virq, evtchn); per_cpu(virq_to_irq, cpu)[virq] = irq; @@ -646,7 +693,9 @@ static void unbind_from_irq(unsigned int struct irq_cfg *cfg = irq_cfg(irq); int evtchn = evtchn_from_irq_cfg(cfg); + BUG_IF_VIRQ_PER_CPU(cfg); BUG_IF_IPI(cfg); + spin_lock(&irq_mapping_update_lock); if (!--cfg->bindcount && VALID_EVTCHN(evtchn)) { @@ -658,6 +707,11 @@ static void unbind_from_irq(unsigned int case IRQT_VIRQ: per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) [index_from_irq_cfg(cfg)] = -1; +#ifndef PER_CPU_VIRQ_IRQ + for_each_possible_cpu(cpu) + per_cpu(virq_to_evtchn, cpu) + [index_from_irq_cfg(cfg)] = 0; +#endif break; #if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) case IRQT_IPI: @@ -691,13 +745,34 @@ static void unbind_from_irq(unsigned int spin_unlock(&irq_mapping_update_lock); } -#ifndef PER_CPU_IPI_IRQ -void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu) +#if !defined(PER_CPU_IPI_IRQ) || !defined(PER_CPU_VIRQ_IRQ) +static inline struct percpu_irqaction *alloc_percpu_irqaction(gfp_t gfp) +{ + struct percpu_irqaction *new = kzalloc(sizeof(*new), GFP_ATOMIC); + + if (new && !zalloc_cpumask_var(&new->cpus, gfp)) { + kfree(new); + new = NULL; + } + return new; +} + +static inline void free_percpu_irqaction(struct percpu_irqaction *action) +{ + if (!action) + return; + free_cpumask_var(action->cpus); + kfree(action); +} + +void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu, + struct irqaction *action) { struct evtchn_close close; struct irq_data *data = irq_get_irq_data(irq); struct irq_cfg *cfg = irq_data_cfg(data); int evtchn = evtchn_from_per_cpu_irq(cfg, cpu); + struct percpu_irqaction *free_action = NULL; spin_lock(&irq_mapping_update_lock); @@ -706,6 +781,34 @@ void unbind_from_per_cpu_irq(unsigned in BUG_ON(cfg->bindcount <= 1); cfg->bindcount--; + +#ifndef PER_CPU_VIRQ_IRQ + if (type_from_irq_cfg(cfg) == IRQT_VIRQ) { + unsigned int virq = index_from_irq_cfg(cfg); + struct percpu_irqaction *cur, *prev = NULL; + + cur = virq_actions[virq]; + while (cur) { + if (cur->action.dev_id == action) { + cpumask_clear_cpu(cpu, cur->cpus); + if (cpumask_empty(cur->cpus)) { + WARN_ON(free_action); + if (prev) + prev->next = cur->next; + else + virq_actions[virq] + = cur->next; + free_action = cur; + } + } else if (cpumask_test_cpu(cpu, cur->cpus)) + evtchn = 0; + cur = (prev = cur)->next; + } + if (!VALID_EVTCHN(evtchn)) + goto done; + } +#endif + cpumask_clear_cpu(cpu, data->affinity); close.port = evtchn; @@ -713,9 +816,17 @@ void unbind_from_per_cpu_irq(unsigned in BUG(); switch (type_from_irq_cfg(cfg)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: + per_cpu(virq_to_evtchn, cpu) + [index_from_irq_cfg(cfg)] = 0; + break; +#endif +#ifndef PER_CPU_IPI_IRQ case IRQT_IPI: per_cpu(ipi_evtchn, cpu) = 0; break; +#endif default: BUG(); break; @@ -727,9 +838,18 @@ void unbind_from_per_cpu_irq(unsigned in evtchn_to_irq[evtchn] = -1; } +#ifndef PER_CPU_VIRQ_IRQ +done: +#endif spin_unlock(&irq_mapping_update_lock); + + if (free_action) { + free_irq(irq, free_action->action.dev_id); + free_percpu_irqaction(free_action); + } } -#endif /* !PER_CPU_IPI_IRQ */ +EXPORT_SYMBOL_GPL(unbind_from_per_cpu_irq); +#endif /* !PER_CPU_IPI_IRQ || !PER_CPU_VIRQ_IRQ */ int bind_caller_port_to_irqhandler( unsigned int caller_port, @@ -811,6 +931,10 @@ int bind_virq_to_irqhandler( { int irq, retval; +#ifndef PER_CPU_VIRQ_IRQ + BUG_ON(test_bit(virq, virq_per_cpu)); +#endif + irq = bind_virq_to_irq(virq, cpu); if (irq < 0) return irq; @@ -826,6 +950,109 @@ int bind_virq_to_irqhandler( EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); #ifdef CONFIG_SMP +#ifndef PER_CPU_VIRQ_IRQ +int bind_virq_to_irqaction( + unsigned int virq, + unsigned int cpu, + struct irqaction *action) +{ + struct evtchn_bind_virq bind_virq; + struct irq_cfg *cfg; + int evtchn, irq, retval = 0; + struct percpu_irqaction *cur = NULL, *new; + + BUG_ON(!test_bit(virq, virq_per_cpu)); + + if (action->dev_id) + return -EINVAL; + + new = alloc_percpu_irqaction(GFP_ATOMIC); + if (new) { + new->action = *action; + new->action.dev_id = action; + } + + spin_lock(&irq_mapping_update_lock); + + for (cur = virq_actions[virq]; cur; cur = cur->next) + if (cur->action.dev_id == action) + break; + if (!cur) { + if (!new) { + spin_unlock(&irq_mapping_update_lock); + return -ENOMEM; + } + new->next = virq_actions[virq]; + virq_actions[virq] = cur = new; + new = NULL; + retval = 1; + } + cpumask_set_cpu(cpu, cur->cpus); + action = &cur->action; + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { + unsigned int nr; + + BUG_ON(!retval); + + if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg, + &dynirq_chip, true)) < 0) { + virq_actions[virq] = cur->next; + spin_unlock(&irq_mapping_update_lock); + free_percpu_irqaction(new); + return irq; + } + + /* Extra reference so count will never drop to zero. */ + cfg->bindcount++; + + for_each_possible_cpu(nr) + per_cpu(virq_to_irq, nr)[virq] = irq; + cfg->info = mk_irq_info(IRQT_VIRQ, virq, 0); + } else + cfg = irq_cfg(irq); + + evtchn = per_cpu(virq_to_evtchn, cpu)[virq]; + if (!VALID_EVTCHN(evtchn)) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + evtchn_to_irq[evtchn] = irq; + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + cfg->bindcount++; + + spin_unlock(&irq_mapping_update_lock); + + free_percpu_irqaction(new); + + if (retval == 0) { + unsigned long flags; + + local_irq_save(flags); + unmask_evtchn(evtchn); + local_irq_restore(flags); + } else { + action->flags |= IRQF_PERCPU; + retval = setup_irq(irq, action); + if (retval) { + unbind_from_per_cpu_irq(irq, cpu, action); + BUG_ON(retval > 0); + irq = retval; + } + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqaction); +#endif + #ifdef PER_CPU_IPI_IRQ int bind_ipi_to_irqhandler( unsigned int ipi, @@ -905,7 +1132,7 @@ int __cpuinit bind_ipi_to_irqaction( action->flags |= IRQF_PERCPU | IRQF_NO_SUSPEND; retval = setup_irq(ipi_irq, action); if (retval) { - unbind_from_per_cpu_irq(ipi_irq, cpu); + unbind_from_per_cpu_irq(ipi_irq, cpu, NULL); BUG_ON(retval > 0); ipi_irq = retval; } @@ -941,7 +1168,9 @@ static void rebind_irq_to_cpu(struct irq const struct irq_cfg *cfg = irq_data_cfg(data); int evtchn = evtchn_from_irq_cfg(cfg); + BUG_IF_VIRQ_PER_CPU(cfg); BUG_IF_IPI(cfg); + if (VALID_EVTCHN(evtchn)) rebind_evtchn_to_cpu(evtchn, tcpu); } @@ -1233,7 +1462,9 @@ void notify_remote_via_irq(int irq) if (WARN_ON_ONCE(!cfg)) return; + BUG_ON(type_from_irq_cfg(cfg) == IRQT_VIRQ); BUG_IF_IPI(cfg); + evtchn = evtchn_from_irq_cfg(cfg); if (VALID_EVTCHN(evtchn)) notify_remote_via_evtchn(evtchn); @@ -1246,6 +1477,7 @@ int irq_to_evtchn_port(int irq) if (!cfg) return 0; + BUG_IF_VIRQ_PER_CPU(cfg); BUG_IF_IPI(cfg); return evtchn_from_irq_cfg(cfg); } @@ -1313,6 +1545,12 @@ static void restore_cpu_virqs(unsigned i if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; +#ifndef PER_CPU_VIRQ_IRQ + if (test_bit(virq, virq_per_cpu) + && !VALID_EVTCHN(per_cpu(virq_to_evtchn, cpu)[virq])) + continue; +#endif + BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_VIRQ, virq, 0)); /* Get a new binding from Xen. */ @@ -1325,7 +1563,20 @@ static void restore_cpu_virqs(unsigned i /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; +#ifdef PER_CPU_VIRQ_IRQ irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn); +#else + if (test_bit(virq, virq_per_cpu)) + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + else { + unsigned int cpu; + + irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, + evtchn); + for_each_possible_cpu(cpu) + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + } +#endif bind_evtchn_to_cpu(evtchn, cpu); /* Ready for use. */ @@ -1389,7 +1640,11 @@ static int evtchn_resume(struct sys_devi /* Avoid doing anything in the 'suspend cancelled' case. */ status.dom = DOMID_SELF; +#ifdef PER_CPU_VIRQ_IRQ status.port = evtchn_from_irq(percpu_read(virq_to_irq[VIRQ_TIMER])); +#else + status.port = percpu_read(virq_to_evtchn[VIRQ_TIMER]); +#endif if (HYPERVISOR_event_channel_op(EVTCHNOP_status, &status)) BUG(); if (status.status == EVTCHNSTAT_virq @@ -1666,6 +1921,15 @@ void __init xen_init_IRQ(void) unsigned int i; struct physdev_pirq_eoi_gmfn eoi_gmfn; +#ifndef PER_CPU_VIRQ_IRQ + __set_bit(VIRQ_TIMER, virq_per_cpu); + __set_bit(VIRQ_DEBUG, virq_per_cpu); + __set_bit(VIRQ_XENOPROF, virq_per_cpu); +#ifdef CONFIG_IA64 + __set_bit(VIRQ_ITC, virq_per_cpu); +#endif +#endif + init_evtchn_cpu_bindings(); #ifdef CONFIG_SPARSE_IRQ --- head-2011-02-17.orig/drivers/xen/core/smpboot.c 2011-03-03 16:14:20.000000000 +0100 +++ head-2011-02-17/drivers/xen/core/smpboot.c 2011-03-03 16:14:51.000000000 +0100 @@ -125,7 +125,7 @@ static int __cpuinit xen_smp_intr_init(u fail: xen_spinlock_cleanup(cpu); unbind_ipi: - unbind_from_per_cpu_irq(ipi_irq, cpu); + unbind_from_per_cpu_irq(ipi_irq, cpu, NULL); return rc; } @@ -135,7 +135,7 @@ static void __cpuinit xen_smp_intr_exit( if (cpu != 0) local_teardown_timer(cpu); - unbind_from_per_cpu_irq(ipi_irq, cpu); + unbind_from_per_cpu_irq(ipi_irq, cpu, NULL); xen_spinlock_cleanup(cpu); } #endif --- head-2011-02-17.orig/drivers/xen/netback/netback.c 2011-03-01 11:52:43.000000000 +0100 +++ head-2011-02-17/drivers/xen/netback/netback.c 2011-03-01 11:53:15.000000000 +0100 @@ -1630,6 +1630,12 @@ static irqreturn_t netif_be_dbg(int irq, return IRQ_HANDLED; } + +static struct irqaction netif_be_dbg_action = { + .handler = netif_be_dbg, + .flags = IRQF_SHARED, + .name = "net-be-dbg" +}; #endif static int __init netback_init(void) @@ -1689,12 +1695,9 @@ static int __init netback_init(void) netif_xenbus_init(); #ifdef NETBE_DEBUG_INTERRUPT - (void)bind_virq_to_irqhandler(VIRQ_DEBUG, - 0, - netif_be_dbg, - IRQF_SHARED, - "net-be-dbg", - &netif_be_dbg); + (void)bind_virq_to_irqaction(VIRQ_DEBUG, + 0, + &netif_be_dbg_action); #endif return 0; --- head-2011-02-17.orig/drivers/xen/xenoprof/xenoprofile.c 2011-02-01 14:42:26.000000000 +0100 +++ head-2011-02-17/drivers/xen/xenoprof/xenoprofile.c 2010-09-09 16:53:30.000000000 +0200 @@ -209,6 +209,11 @@ static irqreturn_t xenoprof_ovf_interrup return IRQ_HANDLED; } +static struct irqaction ovf_action = { + .handler = xenoprof_ovf_interrupt, + .flags = IRQF_DISABLED, + .name = "xenoprof" +}; static void unbind_virq(void) { @@ -216,7 +221,7 @@ static void unbind_virq(void) for_each_online_cpu(i) { if (ovf_irq[i] >= 0) { - unbind_from_irqhandler(ovf_irq[i], NULL); + unbind_from_per_cpu_irq(ovf_irq[i], i, &ovf_action); ovf_irq[i] = -1; } } @@ -229,12 +234,7 @@ static int bind_virq(void) int result; for_each_online_cpu(i) { - result = bind_virq_to_irqhandler(VIRQ_XENOPROF, - i, - xenoprof_ovf_interrupt, - IRQF_DISABLED|IRQF_NOBALANCING, - "xenoprof", - NULL); + result = bind_virq_to_irqaction(VIRQ_XENOPROF, i, &ovf_action); if (result < 0) { unbind_virq(); --- head-2011-02-17.orig/include/xen/evtchn.h 2011-02-02 15:09:43.000000000 +0100 +++ head-2011-02-17/include/xen/evtchn.h 2010-11-23 16:18:23.000000000 +0100 @@ -94,6 +94,17 @@ int bind_virq_to_irqhandler( unsigned long irqflags, const char *devname, void *dev_id); +#if defined(CONFIG_SMP) && defined(CONFIG_XEN) && defined(CONFIG_X86) +int bind_virq_to_irqaction( + unsigned int virq, + unsigned int cpu, + struct irqaction *action); +#else +#define bind_virq_to_irqaction(virq, cpu, action) \ + bind_virq_to_irqhandler(virq, cpu, (action)->handler, \ + (action)->flags | IRQF_NOBALANCING, \ + (action)->name, action) +#endif #if defined(CONFIG_SMP) && !defined(MODULE) #ifndef CONFIG_X86 int bind_ipi_to_irqhandler( @@ -118,9 +129,13 @@ DECLARE_PER_CPU(DECLARE_BITMAP(, NR_IPIS */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); -#if defined(CONFIG_SMP) && !defined(MODULE) && defined(CONFIG_X86) +#if defined(CONFIG_SMP) && defined(CONFIG_XEN) && defined(CONFIG_X86) /* Specialized unbind function for per-CPU IRQs. */ -void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu); +void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu, + struct irqaction *); +#else +#define unbind_from_per_cpu_irq(irq, cpu, action) \ + unbind_from_irqhandler(irq, action) #endif #ifndef CONFIG_XEN