diff --git a/patches.fixes/compat-make-compat_alloc_user_space-incorporate-the-access_ok b/patches.fixes/compat-make-compat_alloc_user_space-incorporate-the-access_ok new file mode 100644 index 0000000..5b311ac --- /dev/null +++ b/patches.fixes/compat-make-compat_alloc_user_space-incorporate-the-access_ok @@ -0,0 +1,175 @@ +From: H. Peter Anvin +Date: Tue, 7 Sep 2010 23:16:18 +0000 (-0700) +Subject: compat: Make compat_alloc_user_space() incorporate the access_ok() +Git-commit: c41d68a513c71e35a14f66d71782d27a79a81ea6 +References: CVE-2010-3081 bnc#639709 +Patch-mainline: 2.6.36 +Introduced-by: Prior to 2.6.5 + +compat: Make compat_alloc_user_space() incorporate the access_ok() + +compat_alloc_user_space() expects the caller to independently call +access_ok() to verify the returned area. A missing call could +introduce problems on some architectures. + +This patch incorporates the access_ok() check into +compat_alloc_user_space() and also adds a sanity check on the length. +The existing compat_alloc_user_space() implementations are renamed +arch_compat_alloc_user_space() and are used as part of the +implementation of the new global function. + +This patch assumes NULL will cause __get_user()/__put_user() to either +fail or access userspace on all architectures. This should be +followed by checking the return value of compat_access_user_space() +for NULL in the callers, at which time the access_ok() in the callers +can also be removed. + +Reported-by: Ben Hawkes +Signed-off-by: H. Peter Anvin +Acked-by: Benjamin Herrenschmidt +Acked-by: Chris Metcalf +Acked-by: David S. Miller +Acked-by: Ingo Molnar +Acked-by: Thomas Gleixner +Acked-by: Tony Luck +Cc: Andrew Morton +Cc: Arnd Bergmann +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Heiko Carstens +Cc: Helge Deller +Cc: James Bottomley +Cc: Kyle McMartin +Cc: Martin Schwidefsky +Cc: Paul Mackerras +Cc: Ralf Baechle +Cc: +Acked-by: Jeff Mahoney +--- + + arch/ia64/include/asm/compat.h | 2 +- + arch/mips/include/asm/compat.h | 2 +- + arch/parisc/include/asm/compat.h | 2 +- + arch/powerpc/include/asm/compat.h | 2 +- + arch/s390/include/asm/compat.h | 2 +- + arch/sparc/include/asm/compat.h | 2 +- + arch/x86/include/asm/compat.h | 2 +- + include/linux/compat.h | 3 +++ + kernel/compat.c | 21 +++++++++++++++++++++ + 9 files changed, 31 insertions(+), 7 deletions(-) + +--- a/arch/ia64/include/asm/compat.h ++++ b/arch/ia64/include/asm/compat.h +@@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr) + } + + static __inline__ void __user * +-compat_alloc_user_space (long len) ++arch_compat_alloc_user_space (long len) + { + struct pt_regs *regs = task_pt_regs(current); + return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len); +--- a/arch/mips/include/asm/compat.h ++++ b/arch/mips/include/asm/compat.h +@@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = (struct pt_regs *) + ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; +--- a/arch/parisc/include/asm/compat.h ++++ b/arch/parisc/include/asm/compat.h +@@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static __inline__ void __user *compat_alloc_user_space(long len) ++static __inline__ void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = ¤t->thread.regs; + return (void __user *)regs->gr[30]; +--- a/arch/powerpc/include/asm/compat.h ++++ b/arch/powerpc/include/asm/compat.h +@@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = current->thread.regs; + unsigned long usp = regs->gpr[1]; +--- a/arch/s390/include/asm/compat.h ++++ b/arch/s390/include/asm/compat.h +@@ -181,7 +181,7 @@ static inline int is_compat_task(void) + + #endif + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + unsigned long stack; + +--- a/arch/sparc/include/asm/compat.h ++++ b/arch/sparc/include/asm/compat.h +@@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = current_thread_info()->kregs; + unsigned long usp = regs->u_regs[UREG_I6]; +--- a/arch/x86/include/asm/compat.h ++++ b/arch/x86/include/asm/compat.h +@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compa + return (u32)(unsigned long)uptr; + } + +-static inline void __user *compat_alloc_user_space(long len) ++static inline void __user *arch_compat_alloc_user_space(long len) + { + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->sp - len; +--- a/include/linux/compat.h ++++ b/include/linux/compat.h +@@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvec + const struct compat_iovec __user *uvector, unsigned long nr_segs, + unsigned long fast_segs, struct iovec *fast_pointer, + struct iovec **ret_pointer); ++ ++extern void __user *compat_alloc_user_space(unsigned long len); ++ + #endif /* CONFIG_COMPAT */ + #endif /* _LINUX_COMPAT_H */ +--- a/kernel/compat.c ++++ b/kernel/compat.c +@@ -1137,3 +1137,24 @@ compat_sys_sysinfo(struct compat_sysinfo + + return 0; + } ++ ++/* ++ * Allocate user-space memory for the duration of a single system call, ++ * in order to marshall parameters inside a compat thunk. ++ */ ++void __user *compat_alloc_user_space(unsigned long len) ++{ ++ void __user *ptr; ++ ++ /* If len would occupy more than half of the entire compat space... */ ++ if (unlikely(len > (((compat_uptr_t)~0) >> 1))) ++ return NULL; ++ ++ ptr = arch_compat_alloc_user_space(len); ++ ++ if (unlikely(!access_ok(VERIFY_WRITE, ptr, len))) ++ return NULL; ++ ++ return ptr; ++} ++EXPORT_SYMBOL_GPL(compat_alloc_user_space); diff --git a/patches.fixes/execve-improve-interactivity-with-large-arguments b/patches.fixes/execve-improve-interactivity-with-large-arguments new file mode 100644 index 0000000..b44754c --- /dev/null +++ b/patches.fixes/execve-improve-interactivity-with-large-arguments @@ -0,0 +1,42 @@ +From: Roland McGrath +Date: Wed, 8 Sep 2010 02:36:28 +0000 (-0700) +Subject: execve: improve interactivity with large arguments +Git-commit: 7993bc1f4663c0db67bb8f0d98e6678145b387cd +Patch-mainline: 2.6.36-rc4 +References: bnc#635425 +Introduced-by: 2.6.23 + +execve: improve interactivity with large arguments + +This adds a preemption point during the copying of the argument and +environment strings for execve, in copy_strings(). There is already +a preemption point in the count() loop, so this doesn't add any new +points in the abstract sense. + +When the total argument+environment strings are very large, the time +spent copying them can be much more than a normal user time slice. +So this change improves the interactivity of the rest of the system +when one process is doing an execve with very large arguments. + +Signed-off-by: Roland McGrath +Reviewed-by: KOSAKI Motohiro +Signed-off-by: Linus Torvalds +Acked-by: Jeff Mahoney +--- + + fs/exec.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/exec.c b/fs/exec.c +index 1b63237..6f2d777 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -419,6 +419,8 @@ static int copy_strings(int argc, const char __user *const __user *argv, + while (len > 0) { + int offset, bytes_to_copy; + ++ cond_resched(); ++ + offset = pos % PAGE_SIZE; + if (offset == 0) + offset = PAGE_SIZE; diff --git a/patches.fixes/execve-make-responsive-to-sigkill-with-large-arguments b/patches.fixes/execve-make-responsive-to-sigkill-with-large-arguments new file mode 100644 index 0000000..ee00a70 --- /dev/null +++ b/patches.fixes/execve-make-responsive-to-sigkill-with-large-arguments @@ -0,0 +1,57 @@ +From: Roland McGrath +Date: Wed, 8 Sep 2010 02:37:06 +0000 (-0700) +Subject: execve: make responsive to SIGKILL with large arguments +Git-commit: 9aea5a65aa7a1af9a4236dfaeb0088f1624f9919 +Patch-mainline: 2.6.36-rc4 +References: bnc#635425 +Introduced-by: 2.6.23 + +execve: make responsive to SIGKILL with large arguments + +An execve with a very large total of argument/environment strings +can take a really long time in the execve system call. It runs +uninterruptibly to count and copy all the strings. This change +makes it abort the exec quickly if sent a SIGKILL. + +Note that this is the conservative change, to interrupt only for +SIGKILL, by using fatal_signal_pending(). It would be perfectly +correct semantics to let any signal interrupt the string-copying in +execve, i.e. use signal_pending() instead of fatal_signal_pending(). +We'll save that change for later, since it could have user-visible +consequences, such as having a timer set too quickly make it so that +an execve can never complete, though it always happened to work before. + +Signed-off-by: Roland McGrath +Reviewed-by: KOSAKI Motohiro +Signed-off-by: Linus Torvalds +Acked-by: Jeff Mahoney +--- + + fs/exec.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/fs/exec.c b/fs/exec.c +index 6f2d777..828dd24 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max) + argv++; + if (i++ >= max) + return -E2BIG; ++ ++ if (fatal_signal_pending(current)) ++ return -ERESTARTNOHAND; + cond_resched(); + } + } +@@ -419,6 +422,10 @@ static int copy_strings(int argc, const char __user *const __user *argv, + while (len > 0) { + int offset, bytes_to_copy; + ++ if (fatal_signal_pending(current)) { ++ ret = -ERESTARTNOHAND; ++ goto out; ++ } + cond_resched(); + + offset = pos % PAGE_SIZE; diff --git a/patches.fixes/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring b/patches.fixes/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring new file mode 100644 index 0000000..76104c7 --- /dev/null +++ b/patches.fixes/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring @@ -0,0 +1,56 @@ +From: David Howells +Date: Fri, 10 Sep 2010 08:59:51 +0000 (+0100) +Subject: KEYS: Fix bug in keyctl_session_to_parent() if parent has no session + keyring +Git-commit: 3d96406c7da1ed5811ea52a3b0905f4f0e295376 +Patch-mainline: 2.6.36-rc4 +References: CVE-2010-2960 bnc#634637 +Introduced-by: 2.6.32 + +KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring + +Fix a bug in keyctl_session_to_parent() whereby it tries to check the ownership +of the parent process's session keyring whether or not the parent has a session +keyring [CVE-2010-2960]. + +This results in the following oops: + + BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 + IP: [] keyctl_session_to_parent+0x251/0x443 + ... + Call Trace: + [] ? keyctl_session_to_parent+0x67/0x443 + [] ? __do_fault+0x24b/0x3d0 + [] sys_keyctl+0xb4/0xb8 + [] system_call_fastpath+0x16/0x1b + +if the parent process has no session keyring. + +If the system is using pam_keyinit then it mostly protected against this as all +processes derived from a login will have inherited the session keyring created +by pam_keyinit during the log in procedure. + +To test this, pam_keyinit calls need to be commented out in /etc/pam.d/. + +Reported-by: Tavis Ormandy +Signed-off-by: David Howells +Acked-by: Tavis Ormandy +Signed-off-by: Linus Torvalds +Acked-by: Jeff Mahoney +--- + + security/keys/keyctl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -1292,7 +1292,8 @@ long keyctl_session_to_parent(void) + goto not_permitted; + + /* the keyrings must have the same UID */ +- if (pcred ->tgcred->session_keyring->uid != mycred->euid || ++ if ((pcred->tgcred->session_keyring && ++ pcred->tgcred->session_keyring->uid != mycred->euid) || + mycred->tgcred->session_keyring->uid != mycred->euid) + goto not_permitted; + diff --git a/patches.fixes/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent b/patches.fixes/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent new file mode 100644 index 0000000..f354f8f --- /dev/null +++ b/patches.fixes/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent @@ -0,0 +1,69 @@ +From: David Howells +Date: Fri, 10 Sep 2010 08:59:46 +0000 (+0100) +Subject: KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() +Git-commit: 9d1ac65a9698513d00e5608d93fca0c53f536c14 +Patch-mainline: 2.6.36-rc4 +References: CVE-2010-2960 bnc#634637 +Introduced-by: 2.6.32 + +KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() + +There's an protected access to the parent process's credentials in the middle +of keyctl_session_to_parent(). This results in the following RCU warning: + + =================================================== + [ INFO: suspicious rcu_dereference_check() usage. ] + --------------------------------------------------- + security/keys/keyctl.c:1291 invoked rcu_dereference_check() without protection! + + other info that might help us debug this: + + rcu_scheduler_active = 1, debug_locks = 0 + 1 lock held by keyctl-session-/2137: + #0: (tasklist_lock){.+.+..}, at: [] keyctl_session_to_parent+0x60/0x236 + + stack backtrace: + Pid: 2137, comm: keyctl-session- Not tainted 2.6.36-rc2-cachefs+ #1 + Call Trace: + [] lockdep_rcu_dereference+0xaa/0xb3 + [] keyctl_session_to_parent+0xed/0x236 + [] sys_keyctl+0xb4/0xb6 + [] system_call_fastpath+0x16/0x1b + +The code should take the RCU read lock to make sure the parents credentials +don't go away, even though it's holding a spinlock and has IRQ disabled. + +Signed-off-by: David Howells +Signed-off-by: Linus Torvalds +Acked-by: Jeff Mahoney +--- + + security/keys/keyctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -1259,6 +1259,7 @@ long keyctl_session_to_parent(void) + keyring_r = NULL; + + me = current; ++ rcu_read_lock(); + write_lock_irq(&tasklist_lock); + + parent = me->real_parent; +@@ -1313,6 +1314,7 @@ long keyctl_session_to_parent(void) + set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME); + + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + if (oldcred) + put_cred(oldcred); + return 0; +@@ -1321,6 +1323,7 @@ already_same: + ret = 0; + not_permitted: + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + put_cred(cred); + return ret; + diff --git a/patches.fixes/net-sched-fix-some-kernel-memory-leaks b/patches.fixes/net-sched-fix-some-kernel-memory-leaks new file mode 100644 index 0000000..919cecb --- /dev/null +++ b/patches.fixes/net-sched-fix-some-kernel-memory-leaks @@ -0,0 +1,165 @@ +From: Eric Dumazet +Date: Mon, 16 Aug 2010 20:04:22 +0000 (+0000) +Subject: net sched: fix some kernel memory leaks +Git-commit: 1c40be12f7d8ca1d387510d39787b12e512a7ce8 +Patch-mainline: 2.6.36-rc3 +References: CVE-2010-2942 bnc#632309 + +net sched: fix some kernel memory leaks + +We leak at least 32bits of kernel memory to user land in tc dump, +because we dont init all fields (capab ?) of the dumped structure. + +Use C99 initializers so that holes and non explicit fields are zeroed. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Acked-by: Jeff Mahoney +--- + + net/sched/act_gact.c | 21 ++++++++++++--------- + net/sched/act_mirred.c | 15 ++++++++------- + net/sched/act_police.c | 19 ++++++++----------- + net/sched/act_simple.c | 11 ++++++----- + net/sched/act_skbedit.c | 11 ++++++----- + 5 files changed, 40 insertions(+), 37 deletions(-) + +--- a/net/sched/act_gact.c ++++ b/net/sched/act_gact.c +@@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, + static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) + { + unsigned char *b = skb_tail_pointer(skb); +- struct tc_gact opt; + struct tcf_gact *gact = a->priv; ++ struct tc_gact opt = { ++ .index = gact->tcf_index, ++ .refcnt = gact->tcf_refcnt - ref, ++ .bindcnt = gact->tcf_bindcnt - bind, ++ .action = gact->tcf_action, ++ }; + struct tcf_t t; + +- opt.index = gact->tcf_index; +- opt.refcnt = gact->tcf_refcnt - ref; +- opt.bindcnt = gact->tcf_bindcnt - bind; +- opt.action = gact->tcf_action; + NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); + #ifdef CONFIG_GACT_PROB + if (gact->tcfg_ptype) { +- struct tc_gact_p p_opt; +- p_opt.paction = gact->tcfg_paction; +- p_opt.pval = gact->tcfg_pval; +- p_opt.ptype = gact->tcfg_ptype; ++ struct tc_gact_p p_opt = { ++ .paction = gact->tcfg_paction, ++ .pval = gact->tcfg_pval, ++ .ptype = gact->tcfg_ptype, ++ }; ++ + NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); + } + #endif +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -211,15 +211,16 @@ static int tcf_mirred_dump(struct sk_buf + { + unsigned char *b = skb_tail_pointer(skb); + struct tcf_mirred *m = a->priv; +- struct tc_mirred opt; ++ struct tc_mirred opt = { ++ .index = m->tcf_index, ++ .action = m->tcf_action, ++ .refcnt = m->tcf_refcnt - ref, ++ .bindcnt = m->tcf_bindcnt - bind, ++ .eaction = m->tcfm_eaction, ++ .ifindex = m->tcfm_ifindex, ++ }; + struct tcf_t t; + +- opt.index = m->tcf_index; +- opt.action = m->tcf_action; +- opt.refcnt = m->tcf_refcnt - ref; +- opt.bindcnt = m->tcf_bindcnt - bind; +- opt.eaction = m->tcfm_eaction; +- opt.ifindex = m->tcfm_ifindex; + NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); + t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); +--- a/net/sched/act_police.c ++++ b/net/sched/act_police.c +@@ -341,22 +341,19 @@ tcf_act_police_dump(struct sk_buff *skb, + { + unsigned char *b = skb_tail_pointer(skb); + struct tcf_police *police = a->priv; +- struct tc_police opt; ++ struct tc_police opt = { ++ .index = police->tcf_index, ++ .action = police->tcf_action, ++ .mtu = police->tcfp_mtu, ++ .burst = police->tcfp_burst, ++ .refcnt = police->tcf_refcnt - ref, ++ .bindcnt = police->tcf_bindcnt - bind, ++ }; + +- opt.index = police->tcf_index; +- opt.action = police->tcf_action; +- opt.mtu = police->tcfp_mtu; +- opt.burst = police->tcfp_burst; +- opt.refcnt = police->tcf_refcnt - ref; +- opt.bindcnt = police->tcf_bindcnt - bind; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; +- else +- memset(&opt.rate, 0, sizeof(opt.rate)); + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; +- else +- memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); + if (police->tcfp_result) + NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result); +--- a/net/sched/act_simple.c ++++ b/net/sched/act_simple.c +@@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct s + { + unsigned char *b = skb_tail_pointer(skb); + struct tcf_defact *d = a->priv; +- struct tc_defact opt; ++ struct tc_defact opt = { ++ .index = d->tcf_index, ++ .refcnt = d->tcf_refcnt - ref, ++ .bindcnt = d->tcf_bindcnt - bind, ++ .action = d->tcf_action, ++ }; + struct tcf_t t; + +- opt.index = d->tcf_index; +- opt.refcnt = d->tcf_refcnt - ref; +- opt.bindcnt = d->tcf_bindcnt - bind; +- opt.action = d->tcf_action; + NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); + NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); +--- a/net/sched/act_skbedit.c ++++ b/net/sched/act_skbedit.c +@@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struc + { + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbedit *d = a->priv; +- struct tc_skbedit opt; ++ struct tc_skbedit opt = { ++ .index = d->tcf_index, ++ .refcnt = d->tcf_refcnt - ref, ++ .bindcnt = d->tcf_bindcnt - bind, ++ .action = d->tcf_action, ++ }; + struct tcf_t t; + +- opt.index = d->tcf_index; +- opt.refcnt = d->tcf_refcnt - ref; +- opt.bindcnt = d->tcf_bindcnt - bind; +- opt.action = d->tcf_action; + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); + if (d->flags & SKBEDIT_F_PRIORITY) + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), diff --git a/patches.fixes/setup_arg_pages-diagnose-excessive-argument-size b/patches.fixes/setup_arg_pages-diagnose-excessive-argument-size new file mode 100644 index 0000000..1bc1308 --- /dev/null +++ b/patches.fixes/setup_arg_pages-diagnose-excessive-argument-size @@ -0,0 +1,46 @@ +From: Roland McGrath +Date: Wed, 8 Sep 2010 02:35:49 +0000 (-0700) +Subject: setup_arg_pages: diagnose excessive argument size +Git-commit: 1b528181b2ffa14721fb28ad1bd539fe1732c583 +Patch-mainline: 2.6.36-rc4 +References: bnc#635425 +Introduced-by: 2.6.23 + +setup_arg_pages: diagnose excessive argument size + +The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not +check the size of the argument/environment area on the stack. +When it is unworkably large, shift_arg_pages() hits its BUG_ON. +This is exploitable with a very large RLIMIT_STACK limit, to +create a crash pretty easily. + +Check that the initial stack is not too large to make it possible +to map in any executable. We're not checking that the actual +executable (or intepreter, for binfmt_elf) will fit. So those +mappings might clobber part of the initial stack mapping. But +that is just userland lossage that userland made happen, not a +kernel problem. + +Signed-off-by: Roland McGrath +Reviewed-by: KOSAKI Motohiro +Signed-off-by: Linus Torvalds +Acked-by: Jeff Mahoney +--- + + fs/exec.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -593,6 +593,11 @@ int setup_arg_pages(struct linux_binprm + #else + stack_top = arch_align_stack(stack_top); + stack_top = PAGE_ALIGN(stack_top); ++ ++ if (unlikely(stack_top < mmap_min_addr) || ++ unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) ++ return -ENOMEM; ++ + stack_shift = vma->vm_end - stack_top; + + bprm->p -= stack_shift; diff --git a/patches.fixes/use-rcu-lock-in-setpgid.patch b/patches.fixes/use-rcu-lock-in-setpgid.patch new file mode 100644 index 0000000..8883e8c --- /dev/null +++ b/patches.fixes/use-rcu-lock-in-setpgid.patch @@ -0,0 +1,62 @@ +From 950eaaca681c44aab87a46225c9e44f902c080aa Mon Sep 17 00:00:00 2001 +From: Paul E. McKenney +Date: Tue, 31 Aug 2010 17:00:18 -0700 +Subject: pid: make setpgid() system call use RCU read-side critical section +Git-commit: 950eaaca681c44aab87a46225c9e44f902c080aa +Patch-mainline: yes +References: bnc#639728 + +[ 23.584719] +[ 23.584720] =================================================== +[ 23.585059] [ INFO: suspicious rcu_dereference_check() usage. ] +[ 23.585176] --------------------------------------------------- +[ 23.585176] kernel/pid.c:419 invoked rcu_dereference_check() without protection! +[ 23.585176] +[ 23.585176] other info that might help us debug this: +[ 23.585176] +[ 23.585176] +[ 23.585176] rcu_scheduler_active = 1, debug_locks = 1 +[ 23.585176] 1 lock held by rc.sysinit/728: +[ 23.585176] #0: (tasklist_lock){.+.+..}, at: [] sys_setpgid+0x5f/0x193 +[ 23.585176] +[ 23.585176] stack backtrace: +[ 23.585176] Pid: 728, comm: rc.sysinit Not tainted 2.6.36-rc2 #2 +[ 23.585176] Call Trace: +[ 23.585176] [] lockdep_rcu_dereference+0x99/0xa2 +[ 23.585176] [] find_task_by_pid_ns+0x50/0x6a +[ 23.585176] [] find_task_by_vpid+0x1d/0x1f +[ 23.585176] [] sys_setpgid+0x67/0x193 +[ 23.585176] [] system_call_fastpath+0x16/0x1b +[ 24.959669] type=1400 audit(1282938522.956:4): avc: denied { module_request } for pid=766 comm="hwclock" kmod="char-major-10-135" scontext=system_u:system_r:hwclock_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclas + +It turns out that the setpgid() system call fails to enter an RCU +read-side critical section before doing a PID-to-task_struct translation. +This commit therefore does rcu_read_lock() before the translation, and +also does rcu_read_unlock() after the last use of the returned pointer. + +Reported-by: Andrew Morton +Signed-off-by: Paul E. McKenney +Acked-by: David Howells +Signed-off-by: Jiri Slaby +--- + kernel/sys.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -962,6 +962,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid + pgid = pid; + if (pgid < 0) + return -EINVAL; ++ rcu_read_lock(); + + /* From this point forward we keep holding onto the tasklist lock + * so that our parent does not change from under us. -DaveM +@@ -1015,6 +1016,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid + out: + /* All paths lead to here, thus we are safe. -DaveM */ + write_unlock_irq(&tasklist_lock); ++ rcu_read_unlock(); + return err; + } + diff --git a/patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing b/patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing new file mode 100644 index 0000000..6dd270f --- /dev/null +++ b/patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing @@ -0,0 +1,53 @@ +From: Roland McGrath +Date: Tue, 14 Sep 2010 19:22:58 +0000 (-0700) +Subject: x86-64, compat: Retruncate rax after ia32 syscall entry tracing +Git-commit: eefdca043e8391dcd719711716492063030b55ac +References: CVE-2010-3301 bnc#639708 +Patch-mainline: 2.6.36 +Introduced-by: 2.6.27 + +x86-64, compat: Retruncate rax after ia32 syscall entry tracing + +In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a +32-bit tracee in system call entry. A %rax value set via ptrace at the +entry tracing stop gets used whole as a 32-bit syscall number, while we +only check the low 32 bits for validity. + +Fix it by truncating %rax back to 32 bits after syscall_trace_enter, +in addition to testing the full 64 bits as has already been added. + +Reported-by: Ben Hawkes +Signed-off-by: Roland McGrath +Signed-off-by: H. Peter Anvin +Acked-by: Jeff Mahoney +--- + + arch/x86/ia32/ia32entry.S | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S +index 84e3a4e..518bb99 100644 +--- a/arch/x86/ia32/ia32entry.S ++++ b/arch/x86/ia32/ia32entry.S +@@ -50,7 +50,12 @@ + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned +- * the value it wants us to use in the table lookup. ++ * the %rax value we should see. Instead, we just truncate that ++ * value to 32 bits again as we did on entry from user mode. ++ * If it's a new value set by user_regset during entry tracing, ++ * this matches the normal truncation of the user-mode value. ++ * If it's -1 to make us punt the syscall, then (u32)-1 is still ++ * an appropriately invalid value. + */ + .macro LOAD_ARGS32 offset, _r9=0 + .if \_r9 +@@ -60,6 +65,7 @@ + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi + movl \offset+64(%rsp),%edi ++ movl %eax,%eax /* zero extension */ + .endm + + .macro CFI_STARTPROC32 simple diff --git a/patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax b/patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax new file mode 100644 index 0000000..b89a16e --- /dev/null +++ b/patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax @@ -0,0 +1,99 @@ +From: H. Peter Anvin +Date: Tue, 14 Sep 2010 19:42:41 +0000 (-0700) +Subject: x86-64, compat: Test %rax for the syscall number, not %eax +Git-commit: 36d001c70d8a0144ac1d038f6876c484849a74de +References: CVE-2010-3301 bnc#639708 +Patch-mainline: 2.6.36 +Introduced-by: 2.6.27 + +x86-64, compat: Test %rax for the syscall number, not %eax + +On 64 bits, we always, by necessity, jump through the system call +table via %rax. For 32-bit system calls, in theory the system call +number is stored in %eax, and the code was testing %eax for a valid +system call number. At one point we loaded the stored value back from +the stack to enforce zero-extension, but that was removed in checkin +d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process +will not be able to introduce a non-zero-extended number, but it can +happen via ptrace. + +Instead of re-introducing the zero-extension, test what we are +actually going to use, i.e. %rax. This only adds a handful of REX +prefixes to the code. + +Reported-by: Ben Hawkes +Signed-off-by: H. Peter Anvin +Cc: +Cc: Roland McGrath +Cc: Andrew Morton +Acked-by: Jeff Mahoney +--- + + arch/x86/ia32/ia32entry.S | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/arch/x86/ia32/ia32entry.S ++++ b/arch/x86/ia32/ia32entry.S +@@ -159,7 +159,7 @@ ENTRY(ia32_sysenter_target) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + CFI_REMEMBER_STATE + jnz sysenter_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + sysenter_do_call: + IA32_ARG_FIXUP +@@ -201,7 +201,7 @@ sysexit_from_sys_call: + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ +@@ -254,7 +254,7 @@ sysenter_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp sysenter_do_call + CFI_ENDPROC +@@ -320,7 +320,7 @@ ENTRY(ia32_cstar_target) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + CFI_REMEMBER_STATE + jnz cstar_tracesys +- cmpl $IA32_NR_syscalls-1,%eax ++ cmpq $IA32_NR_syscalls-1,%rax + ja ia32_badsys + cstar_do_call: + IA32_ARG_FIXUP 1 +@@ -373,7 +373,7 @@ cstar_tracesys: + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call + END(ia32_cstar_target) +@@ -431,7 +431,7 @@ ENTRY(ia32_syscall) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz ia32_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + ia32_do_call: + IA32_ARG_FIXUP +@@ -450,7 +450,7 @@ ia32_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + END(ia32_syscall) diff --git a/patches.xen/pcifront-claim.patch b/patches.xen/pcifront-claim.patch new file mode 100644 index 0000000..4a6aaae --- /dev/null +++ b/patches.xen/pcifront-claim.patch @@ -0,0 +1,55 @@ +http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commit;h=621d869f36b215d63bb99e7ecd7a11f029821b85 +xen-pcifront: Claim PCI resources before going live. +author Konrad Rzeszutek Wilk + Fri, 18 Jun 2010 19:31:47 +0000 (15:31 -0400) +committer Konrad Rzeszutek Wilk + Fri, 18 Jun 2010 19:40:37 +0000 (15:40 -0400) +We were missing the important step of claiming (and setting the +parent of IO and MEM regions to 'PCI IO' and 'PCI mem' respectivly) +of the BARs. This meant that during hot inserts we would get: + +igb 0000:01:00.1: device not available (can't reserve [mem 0xfb840000-0xfb85ffff]) + +even thought the memory region had been reserved before. + +Signed-off-by: Konrad Rzeszutek Wilk +--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig2 2010-09-29 16:31:58.702675503 +0200 ++++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:38:47.260675349 +0200 +@@ -426,7 +426,7 @@ static int pcifront_claim_resource(struc + r = &dev->resource[i]; + + if (!r->parent && r->start && r->flags) { +- dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n", ++ dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", + pci_name(dev), i); + pci_claim_resource(dev, i); + } +@@ -516,14 +516,15 @@ int __devinit pcifront_scan_root(struct + + list_add(&bus_entry->list, &pdev->root_buses); + ++ /* pci_scan_bus_parented skips devices which do not have a have ++ * devfn==0. The pcifront_scan_bus enumerates all devfn. */ ++ err = pcifront_scan_bus(pdev, domain, bus, b); ++ + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + pci_bus_add_devices(b); + +- /* pci_scan_bus_parented skips devices which do not have a have +- * devfn==0. The pcifront_scan_bus enumerates all devfn. */ +- err = pcifront_scan_bus(pdev, domain, bus, b); + + return err; + +@@ -560,6 +561,9 @@ int __devinit pcifront_rescan_root(struc + + err = pcifront_scan_bus(pdev, domain, bus, b); + ++ /* Claim resources before going "live" with our devices */ ++ pci_walk_bus(b, pcifront_claim_resource, pdev); ++ + return err; + } + diff --git a/patches.xen/pcifront-dont-race-udev.patch b/patches.xen/pcifront-dont-race-udev.patch new file mode 100644 index 0000000..886a7c1 --- /dev/null +++ b/patches.xen/pcifront-dont-race-udev.patch @@ -0,0 +1,56 @@ +http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commitdiff;h=4a65de894fc0af05397eedca180d0ea7d8c6caba#patch1 +git/pub/scm / linux/kernel/git/konrad/xen.git / commitdiff +? search: re +summary | shortlog | log | commit | commitdiff | tree +raw | patch (parent: 621d869) +xen-pcifront: Don't race with udev when discovering new devices. +author Konrad Rzeszutek Wilk + Fri, 23 Jul 2010 14:35:57 +0000 (10:35 -0400) +committer Konrad Rzeszutek Wilk + Fri, 23 Jul 2010 15:15:56 +0000 (11:15 -0400) +We inadvertly would call 'pci_bus_add_device' right after discovering +the device, but before claiming the BARs. This ended up firing off +a uevent and udev loading the module and the modules failing to +request_region as they were not claimed. We fix this by holding off +going live by calling 'pci_bus_add_devices' at the end. + +Signed-off-by: Konrad Rzeszutek Wilk +--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig3 2010-09-29 16:32:08.324675371 +0200 ++++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:37:23.215674973 +0200 +@@ -456,17 +456,10 @@ int __devinit pcifront_scan_bus(struct p + } + + d = pci_scan_single_device(b, devfn); +- if (d) { ++ if (d) + dev_info(&pdev->xdev->dev, "New device on " + "%04x:%02x:%02x.%02x found.\n", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); +- err = pci_bus_add_device(d); +- if (err) { +- dev_err(&pdev->xdev->dev, "Failed to add " +- " device to bus.\n"); +- return err; +- } +- } + } + + return 0; +@@ -523,6 +516,7 @@ int __devinit pcifront_scan_root(struct + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + ++ /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + +@@ -564,6 +558,9 @@ int __devinit pcifront_rescan_root(struc + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + ++ /* Create SysFS and notify udev of the devices. Aka: "going live" */ ++ pci_bus_add_devices(b); ++ + return err; + } + diff --git a/patches.xen/pcifront-enforce-scan.patch b/patches.xen/pcifront-enforce-scan.patch new file mode 100644 index 0000000..5217491 --- /dev/null +++ b/patches.xen/pcifront-enforce-scan.patch @@ -0,0 +1,127 @@ +http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commitdiff;h=978b7df39be386f9a875bb14fcd84145e8ad0ee2#patch1 +git/pub/scm / linux/kernel/git/konrad/xen.git / commitdiff +? search: re +summary | shortlog | log | commit | commitdiff | tree +raw | patch (parent: 28a4d3a) +xen-pcifront: Enforce scanning of device functions on initial execution. +author Konrad Rzeszutek Wilk + Tue, 8 Jun 2010 16:59:41 +0000 (12:59 -0400) +committer Konrad Rzeszutek Wilk + Fri, 18 Jun 2010 19:40:27 +0000 (15:40 -0400) +'pci_scan_slot' abondons scanning of functions above 0 if a device with +function has not been detected. We need to be able to scan functions +above 0 in case the user has passed in devices without the function 0 +for the slot/bus. To that end we are reusing the code that existed in +the rescan code path and make usage of it in the initial execution +path. + +Signed-off-by: Konrad Rzeszutek Wilk +--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig 2010-09-29 16:31:32.330675478 +0200 ++++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:39:23.197674096 +0200 +@@ -435,6 +435,43 @@ static int pcifront_claim_resource(struc + return 0; + } + ++int __devinit pcifront_scan_bus(struct pcifront_device *pdev, ++ unsigned int domain, unsigned int bus, ++ struct pci_bus *b) ++{ ++ struct pci_dev *d; ++ unsigned int devfn; ++ int err; ++ ++ /* Scan the bus for functions and add. ++ * We omit handling of PCI bridge attachment because pciback prevents ++ * bridges from being exported. ++ */ ++ for (devfn = 0; devfn < 0x100; devfn++) { ++ d = pci_get_slot(b, devfn); ++ if (d) { ++ /* Device is already known. */ ++ pci_dev_put(d); ++ continue; ++ } ++ ++ d = pci_scan_single_device(b, devfn); ++ if (d) { ++ dev_info(&pdev->xdev->dev, "New device on " ++ "%04x:%02x:%02x.%02x found.\n", domain, bus, ++ PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ err = pci_bus_add_device(d); ++ if (err) { ++ dev_err(&pdev->xdev->dev, "Failed to add " ++ " device to bus.\n"); ++ return err; ++ } ++ } ++ } ++ ++ return 0; ++} ++ + int __devinit pcifront_scan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) + { +@@ -484,7 +521,11 @@ int __devinit pcifront_scan_root(struct + + pci_bus_add_devices(b); + +- return 0; ++ /* pci_scan_bus_parented skips devices which do not have a have ++ * devfn==0. The pcifront_scan_bus enumerates all devfn. */ ++ err = pcifront_scan_bus(pdev, domain, bus, b); ++ ++ return err; + + err_out: + kfree(bus_entry); +@@ -496,10 +537,9 @@ int __devinit pcifront_scan_root(struct + int __devinit pcifront_rescan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) + { ++ int err; + struct pci_bus *b; +- struct pci_dev *d; +- unsigned int devfn; +- ++ + #ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, +@@ -518,34 +558,9 @@ int __devinit pcifront_rescan_root(struc + /* If the bus is unknown, create it. */ + return pcifront_scan_root(pdev, domain, bus); + +- /* Rescan the bus for newly attached functions and add. +- * We omit handling of PCI bridge attachment because pciback prevents +- * bridges from being exported. +- */ +- for (devfn = 0; devfn < 0x100; devfn++) { +- d = pci_get_slot(b, devfn); +- if(d) { +- /* Device is already known. */ +- pci_dev_put(d); +- continue; +- } +- +- d = pci_scan_single_device(b, devfn); +- if (d) { +- int err; +- +- dev_info(&pdev->xdev->dev, "New device on " +- "%04x:%02x:%02x.%02x found.\n", domain, bus, +- PCI_SLOT(devfn), PCI_FUNC(devfn)); +- err = pci_bus_add_device(d); +- if (err) +- dev_err(&pdev->xdev->dev, +- "error %d adding device, continuing.\n", +- err); +- } +- } ++ err = pcifront_scan_bus(pdev, domain, bus, b); + +- return 0; ++ return err; + } + + static void free_root_bus_devs(struct pci_bus *bus) diff --git a/patches.xen/pcifront-irq-not-evtchn.patch b/patches.xen/pcifront-irq-not-evtchn.patch new file mode 100644 index 0000000..3d44a0d --- /dev/null +++ b/patches.xen/pcifront-irq-not-evtchn.patch @@ -0,0 +1,14 @@ +unbind_from_irqhandler takes irq, not evtchn, as its first argument. + +Signed-off-by: Rafal Wojtczuk +--- linux-2.6.34.1/drivers/xen/pcifront/xenbus.c.orig 2010-09-29 16:47:39.961674359 +0200 ++++ linux-2.6.34.1/drivers/xen/pcifront/xenbus.c 2010-09-29 16:47:49.458675391 +0200 +@@ -61,7 +61,7 @@ static void free_pdev(struct pcifront_de + + /*For PCIE_AER error handling job*/ + flush_scheduled_work(); +- unbind_from_irqhandler(pdev->evtchn, pdev); ++ unbind_from_irqhandler(irq_from_evtchn(pdev->evtchn), pdev); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); diff --git a/patches.xen/xen3-x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing b/patches.xen/xen3-x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing new file mode 100644 index 0000000..57b7bf4 --- /dev/null +++ b/patches.xen/xen3-x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing @@ -0,0 +1,48 @@ +From: Roland McGrath +Date: Tue, 14 Sep 2010 19:22:58 +0000 (-0700) +Subject: x86-64, compat: Retruncate rax after ia32 syscall entry tracing +Git-commit: eefdca043e8391dcd719711716492063030b55ac +References: CVE-2010-3301 bnc#639708 +Patch-mainline: 2.6.36 +Introduced-by: 2.6.27 + +x86-64, compat: Retruncate rax after ia32 syscall entry tracing + +In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a +32-bit tracee in system call entry. A %rax value set via ptrace at the +entry tracing stop gets used whole as a 32-bit syscall number, while we +only check the low 32 bits for validity. + +Fix it by truncating %rax back to 32 bits after syscall_trace_enter, +in addition to testing the full 64 bits as has already been added. + +Reported-by: Ben Hawkes +Signed-off-by: Roland McGrath +Signed-off-by: H. Peter Anvin +Acked-by: Jeff Mahoney +Automatically created from "patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing" by xen-port-patches.py + +--- 11.3-2010-09-14.orig/arch/x86/ia32/ia32entry-xen.S 2010-05-12 09:08:52.000000000 +0200 ++++ 11.3-2010-09-14/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:22.000000000 +0200 +@@ -47,7 +47,12 @@ ia32_common: + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned +- * the value it wants us to use in the table lookup. ++ * the %rax value we should see. Instead, we just truncate that ++ * value to 32 bits again as we did on entry from user mode. ++ * If it's a new value set by user_regset during entry tracing, ++ * this matches the normal truncation of the user-mode value. ++ * If it's -1 to make us punt the syscall, then (u32)-1 is still ++ * an appropriately invalid value. + */ + .macro LOAD_ARGS32 offset, _r9=0 + .if \_r9 +@@ -57,6 +62,7 @@ ia32_common: + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi + movl \offset+64(%rsp),%edi ++ movl %eax,%eax /* zero extension */ + .endm + + .macro CFI_STARTPROC32 simple diff --git a/patches.xen/xen3-x86-64-compat-test-rax-for-the-syscall-number-not-eax b/patches.xen/xen3-x86-64-compat-test-rax-for-the-syscall-number-not-eax new file mode 100644 index 0000000..fb33935 --- /dev/null +++ b/patches.xen/xen3-x86-64-compat-test-rax-for-the-syscall-number-not-eax @@ -0,0 +1,96 @@ +From: H. Peter Anvin +Date: Tue, 14 Sep 2010 19:42:41 +0000 (-0700) +Subject: x86-64, compat: Test %rax for the syscall number, not %eax +Git-commit: 36d001c70d8a0144ac1d038f6876c484849a74de +References: CVE-2010-3301 bnc#639708 +Patch-mainline: 2.6.36 +Introduced-by: 2.6.27 + +x86-64, compat: Test %rax for the syscall number, not %eax + +On 64 bits, we always, by necessity, jump through the system call +table via %rax. For 32-bit system calls, in theory the system call +number is stored in %eax, and the code was testing %eax for a valid +system call number. At one point we loaded the stored value back from +the stack to enforce zero-extension, but that was removed in checkin +d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process +will not be able to introduce a non-zero-extended number, but it can +happen via ptrace. + +Instead of re-introducing the zero-extension, test what we are +actually going to use, i.e. %rax. This only adds a handful of REX +prefixes to the code. + +Reported-by: Ben Hawkes +Signed-off-by: H. Peter Anvin +Cc: +Cc: Roland McGrath +Cc: Andrew Morton +Acked-by: Jeff Mahoney +Automatically created from "patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax" by xen-port-patches.py + +--- 11.3-2010-09-14.orig/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:22.000000000 +0200 ++++ 11.3-2010-09-14/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:28.000000000 +0200 +@@ -145,7 +145,7 @@ ENTRY(ia32_sysenter_target) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz sysenter_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + jmp ia32_do_call + +@@ -159,7 +159,7 @@ ENTRY(ia32_sysenter_target) + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ +@@ -186,7 +186,7 @@ sysenter_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + CFI_ENDPROC +@@ -240,7 +240,7 @@ ENTRY(ia32_cstar_target) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz cstar_tracesys +- cmpl $IA32_NR_syscalls-1,%eax ++ cmpq $IA32_NR_syscalls-1,%rax + ja ia32_badsys + cstar_do_call: + IA32_ARG_FIXUP 1 +@@ -267,7 +267,7 @@ cstar_tracesys: + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call + END(ia32_cstar_target) +@@ -324,7 +324,7 @@ ENTRY(ia32_syscall) + orl $TS_COMPAT,TI_status(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + jnz ia32_tracesys +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + ia32_do_call: + IA32_ARG_FIXUP +@@ -343,7 +343,7 @@ ia32_tracesys: + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST +- cmpl $(IA32_NR_syscalls-1),%eax ++ cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call + END(ia32_syscall) diff --git a/series.conf b/series.conf index c8e5651..6794e82 100644 --- a/series.conf +++ b/series.conf @@ -103,6 +103,11 @@ patches.fixes/make-note_interrupt-fast.diff patches.fixes/twl6030-fix-note_interrupt-call + patches.fixes/use-rcu-lock-in-setpgid.patch + patches.fixes/compat-make-compat_alloc_user_space-incorporate-the-access_ok + patches.fixes/setup_arg_pages-diagnose-excessive-argument-size + patches.fixes/execve-improve-interactivity-with-large-arguments + patches.fixes/execve-make-responsive-to-sigkill-with-large-arguments ######################################################## # Architecture-specific patches. These used to be all @@ -147,6 +152,9 @@ patches.suse/x86-mark_rodata_rw.patch patches.fixes/dmar-fix-oops-with-no-dmar-table + patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing + patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax + ######################################################## # x86 MCE/MCA (Machine Check Error/Architecture) extensions @@ -321,6 +329,7 @@ # Networking, IPv6 ######################################################## patches.fixes/bridge-module-get-put.patch + patches.fixes/net-sched-fix-some-kernel-memory-leaks ######################################################## # NFS @@ -628,6 +637,8 @@ # Security stuff # ########################################################## + patches.fixes/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent + patches.fixes/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring ########################################################## # Audit @@ -786,6 +797,8 @@ patches.xen/xen3-seccomp-disable-tsc-option patches.xen/xen3-x86-mcp51-no-dac patches.xen/xen3-x86-mark_rodata_rw.patch + patches.xen/xen3-x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing + patches.xen/xen3-x86-64-compat-test-rax-for-the-syscall-number-not-eax patches.xen/xen3-acpi_processor_check_maxcpus.patch patches.xen/xen3-kdb-x86 patches.xen/xen3-stack-unwind @@ -837,4 +850,9 @@ patches.xen/xen-x86_64-dump-user-pgt patches.xen/xen-x86_64-note-init-p2m + patches.xen/pcifront-enforce-scan.patch + patches.xen/pcifront-claim.patch + patches.xen/pcifront-dont-race-udev.patch + patches.xen/pcifront-irq-not-evtchn.patch + patches.qubes/nuke_balloon_minimum_target.patch