Merge branch 'konrad' of git://qubes-os.org/rafal/kernel-dom0

This commit is contained in:
Joanna Rutkowska 2010-09-30 19:23:03 +02:00
commit 677867fd09
17 changed files with 1238 additions and 0 deletions

View File

@ -0,0 +1,175 @@
From: H. Peter Anvin <hpa@linux.intel.com>
Date: Tue, 7 Sep 2010 23:16:18 +0000 (-0700)
Subject: compat: Make compat_alloc_user_space() incorporate the access_ok()
Git-commit: c41d68a513c71e35a14f66d71782d27a79a81ea6
References: CVE-2010-3081 bnc#639709
Patch-mainline: 2.6.36
Introduced-by: Prior to 2.6.5
compat: Make compat_alloc_user_space() incorporate the access_ok()
compat_alloc_user_space() expects the caller to independently call
access_ok() to verify the returned area. A missing call could
introduce problems on some architectures.
This patch incorporates the access_ok() check into
compat_alloc_user_space() and also adds a sanity check on the length.
The existing compat_alloc_user_space() implementations are renamed
arch_compat_alloc_user_space() and are used as part of the
implementation of the new global function.
This patch assumes NULL will cause __get_user()/__put_user() to either
fail or access userspace on all architectures. This should be
followed by checking the return value of compat_access_user_space()
for NULL in the callers, at which time the access_ok() in the callers
can also be removed.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: <stable@kernel.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/ia64/include/asm/compat.h | 2 +-
arch/mips/include/asm/compat.h | 2 +-
arch/parisc/include/asm/compat.h | 2 +-
arch/powerpc/include/asm/compat.h | 2 +-
arch/s390/include/asm/compat.h | 2 +-
arch/sparc/include/asm/compat.h | 2 +-
arch/x86/include/asm/compat.h | 2 +-
include/linux/compat.h | 3 +++
kernel/compat.c | 21 +++++++++++++++++++++
9 files changed, 31 insertions(+), 7 deletions(-)
--- a/arch/ia64/include/asm/compat.h
+++ b/arch/ia64/include/asm/compat.h
@@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr)
}
static __inline__ void __user *
-compat_alloc_user_space (long len)
+arch_compat_alloc_user_space (long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compa
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = (struct pt_regs *)
((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compa
return (u32)(unsigned long)uptr;
}
-static __inline__ void __user *compat_alloc_user_space(long len)
+static __inline__ void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = &current->thread.regs;
return (void __user *)regs->gr[30];
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compa
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current->thread.regs;
unsigned long usp = regs->gpr[1];
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -181,7 +181,7 @@ static inline int is_compat_task(void)
#endif
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
unsigned long stack;
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compa
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current_thread_info()->kregs;
unsigned long usp = regs->u_regs[UREG_I6];
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compa
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *)regs->sp - len;
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvec
const struct compat_iovec __user *uvector, unsigned long nr_segs,
unsigned long fast_segs, struct iovec *fast_pointer,
struct iovec **ret_pointer);
+
+extern void __user *compat_alloc_user_space(unsigned long len);
+
#endif /* CONFIG_COMPAT */
#endif /* _LINUX_COMPAT_H */
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1137,3 +1137,24 @@ compat_sys_sysinfo(struct compat_sysinfo
return 0;
}
+
+/*
+ * Allocate user-space memory for the duration of a single system call,
+ * in order to marshall parameters inside a compat thunk.
+ */
+void __user *compat_alloc_user_space(unsigned long len)
+{
+ void __user *ptr;
+
+ /* If len would occupy more than half of the entire compat space... */
+ if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
+ return NULL;
+
+ ptr = arch_compat_alloc_user_space(len);
+
+ if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
+ return NULL;
+
+ return ptr;
+}
+EXPORT_SYMBOL_GPL(compat_alloc_user_space);

View File

@ -0,0 +1,42 @@
From: Roland McGrath <roland@redhat.com>
Date: Wed, 8 Sep 2010 02:36:28 +0000 (-0700)
Subject: execve: improve interactivity with large arguments
Git-commit: 7993bc1f4663c0db67bb8f0d98e6678145b387cd
Patch-mainline: 2.6.36-rc4
References: bnc#635425
Introduced-by: 2.6.23
execve: improve interactivity with large arguments
This adds a preemption point during the copying of the argument and
environment strings for execve, in copy_strings(). There is already
a preemption point in the count() loop, so this doesn't add any new
points in the abstract sense.
When the total argument+environment strings are very large, the time
spent copying them can be much more than a normal user time slice.
So this change improves the interactivity of the rest of the system
when one process is doing an execve with very large arguments.
Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
fs/exec.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/exec.c b/fs/exec.c
index 1b63237..6f2d777 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -419,6 +419,8 @@ static int copy_strings(int argc, const char __user *const __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ cond_resched();
+
offset = pos % PAGE_SIZE;
if (offset == 0)
offset = PAGE_SIZE;

View File

@ -0,0 +1,57 @@
From: Roland McGrath <roland@redhat.com>
Date: Wed, 8 Sep 2010 02:37:06 +0000 (-0700)
Subject: execve: make responsive to SIGKILL with large arguments
Git-commit: 9aea5a65aa7a1af9a4236dfaeb0088f1624f9919
Patch-mainline: 2.6.36-rc4
References: bnc#635425
Introduced-by: 2.6.23
execve: make responsive to SIGKILL with large arguments
An execve with a very large total of argument/environment strings
can take a really long time in the execve system call. It runs
uninterruptibly to count and copy all the strings. This change
makes it abort the exec quickly if sent a SIGKILL.
Note that this is the conservative change, to interrupt only for
SIGKILL, by using fatal_signal_pending(). It would be perfectly
correct semantics to let any signal interrupt the string-copying in
execve, i.e. use signal_pending() instead of fatal_signal_pending().
We'll save that change for later, since it could have user-visible
consequences, such as having a timer set too quickly make it so that
an execve can never complete, though it always happened to work before.
Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
fs/exec.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/exec.c b/fs/exec.c
index 6f2d777..828dd24 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
argv++;
if (i++ >= max)
return -E2BIG;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
cond_resched();
}
}
@@ -419,6 +422,10 @@ static int copy_strings(int argc, const char __user *const __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTNOHAND;
+ goto out;
+ }
cond_resched();
offset = pos % PAGE_SIZE;

View File

@ -0,0 +1,56 @@
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Sep 2010 08:59:51 +0000 (+0100)
Subject: KEYS: Fix bug in keyctl_session_to_parent() if parent has no session
keyring
Git-commit: 3d96406c7da1ed5811ea52a3b0905f4f0e295376
Patch-mainline: 2.6.36-rc4
References: CVE-2010-2960 bnc#634637
Introduced-by: 2.6.32
KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring
Fix a bug in keyctl_session_to_parent() whereby it tries to check the ownership
of the parent process's session keyring whether or not the parent has a session
keyring [CVE-2010-2960].
This results in the following oops:
BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
IP: [<ffffffff811ae4dd>] keyctl_session_to_parent+0x251/0x443
...
Call Trace:
[<ffffffff811ae2f3>] ? keyctl_session_to_parent+0x67/0x443
[<ffffffff8109d286>] ? __do_fault+0x24b/0x3d0
[<ffffffff811af98c>] sys_keyctl+0xb4/0xb8
[<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
if the parent process has no session keyring.
If the system is using pam_keyinit then it mostly protected against this as all
processes derived from a login will have inherited the session keyring created
by pam_keyinit during the log in procedure.
To test this, pam_keyinit calls need to be commented out in /etc/pam.d/.
Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
security/keys/keyctl.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1292,7 +1292,8 @@ long keyctl_session_to_parent(void)
goto not_permitted;
/* the keyrings must have the same UID */
- if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
+ if ((pcred->tgcred->session_keyring &&
+ pcred->tgcred->session_keyring->uid != mycred->euid) ||
mycred->tgcred->session_keyring->uid != mycred->euid)
goto not_permitted;

View File

@ -0,0 +1,69 @@
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Sep 2010 08:59:46 +0000 (+0100)
Subject: KEYS: Fix RCU no-lock warning in keyctl_session_to_parent()
Git-commit: 9d1ac65a9698513d00e5608d93fca0c53f536c14
Patch-mainline: 2.6.36-rc4
References: CVE-2010-2960 bnc#634637
Introduced-by: 2.6.32
KEYS: Fix RCU no-lock warning in keyctl_session_to_parent()
There's an protected access to the parent process's credentials in the middle
of keyctl_session_to_parent(). This results in the following RCU warning:
===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
security/keys/keyctl.c:1291 invoked rcu_dereference_check() without protection!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
1 lock held by keyctl-session-/2137:
#0: (tasklist_lock){.+.+..}, at: [<ffffffff811ae2ec>] keyctl_session_to_parent+0x60/0x236
stack backtrace:
Pid: 2137, comm: keyctl-session- Not tainted 2.6.36-rc2-cachefs+ #1
Call Trace:
[<ffffffff8105606a>] lockdep_rcu_dereference+0xaa/0xb3
[<ffffffff811ae379>] keyctl_session_to_parent+0xed/0x236
[<ffffffff811af77e>] sys_keyctl+0xb4/0xb6
[<ffffffff81001eab>] system_call_fastpath+0x16/0x1b
The code should take the RCU read lock to make sure the parents credentials
don't go away, even though it's holding a spinlock and has IRQ disabled.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
security/keys/keyctl.c | 3 +++
1 file changed, 3 insertions(+)
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1259,6 +1259,7 @@ long keyctl_session_to_parent(void)
keyring_r = NULL;
me = current;
+ rcu_read_lock();
write_lock_irq(&tasklist_lock);
parent = me->real_parent;
@@ -1313,6 +1314,7 @@ long keyctl_session_to_parent(void)
set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
write_unlock_irq(&tasklist_lock);
+ rcu_read_unlock();
if (oldcred)
put_cred(oldcred);
return 0;
@@ -1321,6 +1323,7 @@ already_same:
ret = 0;
not_permitted:
write_unlock_irq(&tasklist_lock);
+ rcu_read_unlock();
put_cred(cred);
return ret;

View File

@ -0,0 +1,165 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 16 Aug 2010 20:04:22 +0000 (+0000)
Subject: net sched: fix some kernel memory leaks
Git-commit: 1c40be12f7d8ca1d387510d39787b12e512a7ce8
Patch-mainline: 2.6.36-rc3
References: CVE-2010-2942 bnc#632309
net sched: fix some kernel memory leaks
We leak at least 32bits of kernel memory to user land in tc dump,
because we dont init all fields (capab ?) of the dumped structure.
Use C99 initializers so that holes and non explicit fields are zeroed.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
net/sched/act_gact.c | 21 ++++++++++++---------
net/sched/act_mirred.c | 15 ++++++++-------
net/sched/act_police.c | 19 ++++++++-----------
net/sched/act_simple.c | 11 ++++++-----
net/sched/act_skbedit.c | 11 ++++++-----
5 files changed, 40 insertions(+), 37 deletions(-)
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb,
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tc_gact opt;
struct tcf_gact *gact = a->priv;
+ struct tc_gact opt = {
+ .index = gact->tcf_index,
+ .refcnt = gact->tcf_refcnt - ref,
+ .bindcnt = gact->tcf_bindcnt - bind,
+ .action = gact->tcf_action,
+ };
struct tcf_t t;
- opt.index = gact->tcf_index;
- opt.refcnt = gact->tcf_refcnt - ref;
- opt.bindcnt = gact->tcf_bindcnt - bind;
- opt.action = gact->tcf_action;
NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
#ifdef CONFIG_GACT_PROB
if (gact->tcfg_ptype) {
- struct tc_gact_p p_opt;
- p_opt.paction = gact->tcfg_paction;
- p_opt.pval = gact->tcfg_pval;
- p_opt.ptype = gact->tcfg_ptype;
+ struct tc_gact_p p_opt = {
+ .paction = gact->tcfg_paction,
+ .pval = gact->tcfg_pval,
+ .ptype = gact->tcfg_ptype,
+ };
+
NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
}
#endif
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -211,15 +211,16 @@ static int tcf_mirred_dump(struct sk_buf
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = a->priv;
- struct tc_mirred opt;
+ struct tc_mirred opt = {
+ .index = m->tcf_index,
+ .action = m->tcf_action,
+ .refcnt = m->tcf_refcnt - ref,
+ .bindcnt = m->tcf_bindcnt - bind,
+ .eaction = m->tcfm_eaction,
+ .ifindex = m->tcfm_ifindex,
+ };
struct tcf_t t;
- opt.index = m->tcf_index;
- opt.action = m->tcf_action;
- opt.refcnt = m->tcf_refcnt - ref;
- opt.bindcnt = m->tcf_bindcnt - bind;
- opt.eaction = m->tcfm_eaction;
- opt.ifindex = m->tcfm_ifindex;
NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -341,22 +341,19 @@ tcf_act_police_dump(struct sk_buff *skb,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_police *police = a->priv;
- struct tc_police opt;
+ struct tc_police opt = {
+ .index = police->tcf_index,
+ .action = police->tcf_action,
+ .mtu = police->tcfp_mtu,
+ .burst = police->tcfp_burst,
+ .refcnt = police->tcf_refcnt - ref,
+ .bindcnt = police->tcf_bindcnt - bind,
+ };
- opt.index = police->tcf_index;
- opt.action = police->tcf_action;
- opt.mtu = police->tcfp_mtu;
- opt.burst = police->tcfp_burst;
- opt.refcnt = police->tcf_refcnt - ref;
- opt.bindcnt = police->tcf_bindcnt - bind;
if (police->tcfp_R_tab)
opt.rate = police->tcfp_R_tab->rate;
- else
- memset(&opt.rate, 0, sizeof(opt.rate));
if (police->tcfp_P_tab)
opt.peakrate = police->tcfp_P_tab->rate;
- else
- memset(&opt.peakrate, 0, sizeof(opt.peakrate));
NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
if (police->tcfp_result)
NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct s
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_defact *d = a->priv;
- struct tc_defact opt;
+ struct tc_defact opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
struct tcf_t t;
- opt.index = d->tcf_index;
- opt.refcnt = d->tcf_refcnt - ref;
- opt.bindcnt = d->tcf_bindcnt - bind;
- opt.action = d->tcf_action;
NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struc
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbedit *d = a->priv;
- struct tc_skbedit opt;
+ struct tc_skbedit opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
struct tcf_t t;
- opt.index = d->tcf_index;
- opt.refcnt = d->tcf_refcnt - ref;
- opt.bindcnt = d->tcf_bindcnt - bind;
- opt.action = d->tcf_action;
NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
if (d->flags & SKBEDIT_F_PRIORITY)
NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),

View File

@ -0,0 +1,46 @@
From: Roland McGrath <roland@redhat.com>
Date: Wed, 8 Sep 2010 02:35:49 +0000 (-0700)
Subject: setup_arg_pages: diagnose excessive argument size
Git-commit: 1b528181b2ffa14721fb28ad1bd539fe1732c583
Patch-mainline: 2.6.36-rc4
References: bnc#635425
Introduced-by: 2.6.23
setup_arg_pages: diagnose excessive argument size
The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not
check the size of the argument/environment area on the stack.
When it is unworkably large, shift_arg_pages() hits its BUG_ON.
This is exploitable with a very large RLIMIT_STACK limit, to
create a crash pretty easily.
Check that the initial stack is not too large to make it possible
to map in any executable. We're not checking that the actual
executable (or intepreter, for binfmt_elf) will fit. So those
mappings might clobber part of the initial stack mapping. But
that is just userland lossage that userland made happen, not a
kernel problem.
Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
fs/exec.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -593,6 +593,11 @@ int setup_arg_pages(struct linux_binprm
#else
stack_top = arch_align_stack(stack_top);
stack_top = PAGE_ALIGN(stack_top);
+
+ if (unlikely(stack_top < mmap_min_addr) ||
+ unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
+ return -ENOMEM;
+
stack_shift = vma->vm_end - stack_top;
bprm->p -= stack_shift;

View File

@ -0,0 +1,62 @@
From 950eaaca681c44aab87a46225c9e44f902c080aa Mon Sep 17 00:00:00 2001
From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date: Tue, 31 Aug 2010 17:00:18 -0700
Subject: pid: make setpgid() system call use RCU read-side critical section
Git-commit: 950eaaca681c44aab87a46225c9e44f902c080aa
Patch-mainline: yes
References: bnc#639728
[ 23.584719]
[ 23.584720] ===================================================
[ 23.585059] [ INFO: suspicious rcu_dereference_check() usage. ]
[ 23.585176] ---------------------------------------------------
[ 23.585176] kernel/pid.c:419 invoked rcu_dereference_check() without protection!
[ 23.585176]
[ 23.585176] other info that might help us debug this:
[ 23.585176]
[ 23.585176]
[ 23.585176] rcu_scheduler_active = 1, debug_locks = 1
[ 23.585176] 1 lock held by rc.sysinit/728:
[ 23.585176] #0: (tasklist_lock){.+.+..}, at: [<ffffffff8104771f>] sys_setpgid+0x5f/0x193
[ 23.585176]
[ 23.585176] stack backtrace:
[ 23.585176] Pid: 728, comm: rc.sysinit Not tainted 2.6.36-rc2 #2
[ 23.585176] Call Trace:
[ 23.585176] [<ffffffff8105b436>] lockdep_rcu_dereference+0x99/0xa2
[ 23.585176] [<ffffffff8104c324>] find_task_by_pid_ns+0x50/0x6a
[ 23.585176] [<ffffffff8104c35b>] find_task_by_vpid+0x1d/0x1f
[ 23.585176] [<ffffffff81047727>] sys_setpgid+0x67/0x193
[ 23.585176] [<ffffffff810029eb>] system_call_fastpath+0x16/0x1b
[ 24.959669] type=1400 audit(1282938522.956:4): avc: denied { module_request } for pid=766 comm="hwclock" kmod="char-major-10-135" scontext=system_u:system_r:hwclock_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclas
It turns out that the setpgid() system call fails to enter an RCU
read-side critical section before doing a PID-to-task_struct translation.
This commit therefore does rcu_read_lock() before the translation, and
also does rcu_read_unlock() after the last use of the returned pointer.
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
kernel/sys.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -962,6 +962,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid
pgid = pid;
if (pgid < 0)
return -EINVAL;
+ rcu_read_lock();
/* From this point forward we keep holding onto the tasklist lock
* so that our parent does not change from under us. -DaveM
@@ -1015,6 +1016,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid
out:
/* All paths lead to here, thus we are safe. -DaveM */
write_unlock_irq(&tasklist_lock);
+ rcu_read_unlock();
return err;
}

View File

@ -0,0 +1,53 @@
From: Roland McGrath <roland@redhat.com>
Date: Tue, 14 Sep 2010 19:22:58 +0000 (-0700)
Subject: x86-64, compat: Retruncate rax after ia32 syscall entry tracing
Git-commit: eefdca043e8391dcd719711716492063030b55ac
References: CVE-2010-3301 bnc#639708
Patch-mainline: 2.6.36
Introduced-by: 2.6.27
x86-64, compat: Retruncate rax after ia32 syscall entry tracing
In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a
32-bit tracee in system call entry. A %rax value set via ptrace at the
entry tracing stop gets used whole as a 32-bit syscall number, while we
only check the low 32 bits for validity.
Fix it by truncating %rax back to 32 bits after syscall_trace_enter,
in addition to testing the full 64 bits as has already been added.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/x86/ia32/ia32entry.S | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 84e3a4e..518bb99 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -50,7 +50,12 @@
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -60,6 +65,7 @@
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple

View File

@ -0,0 +1,99 @@
From: H. Peter Anvin <hpa@linux.intel.com>
Date: Tue, 14 Sep 2010 19:42:41 +0000 (-0700)
Subject: x86-64, compat: Test %rax for the syscall number, not %eax
Git-commit: 36d001c70d8a0144ac1d038f6876c484849a74de
References: CVE-2010-3301 bnc#639708
Patch-mainline: 2.6.36
Introduced-by: 2.6.27
x86-64, compat: Test %rax for the syscall number, not %eax
On 64 bits, we always, by necessity, jump through the system call
table via %rax. For 32-bit system calls, in theory the system call
number is stored in %eax, and the code was testing %eax for a valid
system call number. At one point we loaded the stored value back from
the stack to enforce zero-extension, but that was removed in checkin
d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process
will not be able to introduce a non-zero-extended number, but it can
happen via ptrace.
Instead of re-introducing the zero-extension, test what we are
actually going to use, i.e. %rax. This only adds a handful of REX
prefixes to the code.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@kernel.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
arch/x86/ia32/ia32entry.S | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -159,7 +159,7 @@ ENTRY(ia32_sysenter_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
@@ -201,7 +201,7 @@ sysexit_from_sys_call:
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -254,7 +254,7 @@ sysenter_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
@@ -320,7 +320,7 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -373,7 +373,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -431,7 +431,7 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -450,7 +450,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)

View File

@ -0,0 +1,55 @@
http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commit;h=621d869f36b215d63bb99e7ecd7a11f029821b85
xen-pcifront: Claim PCI resources before going live.
author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fri, 18 Jun 2010 19:31:47 +0000 (15:31 -0400)
committer Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fri, 18 Jun 2010 19:40:37 +0000 (15:40 -0400)
We were missing the important step of claiming (and setting the
parent of IO and MEM regions to 'PCI IO' and 'PCI mem' respectivly)
of the BARs. This meant that during hot inserts we would get:
igb 0000:01:00.1: device not available (can't reserve [mem 0xfb840000-0xfb85ffff])
even thought the memory region had been reserved before.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig2 2010-09-29 16:31:58.702675503 +0200
+++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:38:47.260675349 +0200
@@ -426,7 +426,7 @@ static int pcifront_claim_resource(struc
r = &dev->resource[i];
if (!r->parent && r->start && r->flags) {
- dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
+ dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
pci_name(dev), i);
pci_claim_resource(dev, i);
}
@@ -516,14 +516,15 @@ int __devinit pcifront_scan_root(struct
list_add(&bus_entry->list, &pdev->root_buses);
+ /* pci_scan_bus_parented skips devices which do not have a have
+ * devfn==0. The pcifront_scan_bus enumerates all devfn. */
+ err = pcifront_scan_bus(pdev, domain, bus, b);
+
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
pci_bus_add_devices(b);
- /* pci_scan_bus_parented skips devices which do not have a have
- * devfn==0. The pcifront_scan_bus enumerates all devfn. */
- err = pcifront_scan_bus(pdev, domain, bus, b);
return err;
@@ -560,6 +561,9 @@ int __devinit pcifront_rescan_root(struc
err = pcifront_scan_bus(pdev, domain, bus, b);
+ /* Claim resources before going "live" with our devices */
+ pci_walk_bus(b, pcifront_claim_resource, pdev);
+
return err;
}

View File

@ -0,0 +1,56 @@
http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commitdiff;h=4a65de894fc0af05397eedca180d0ea7d8c6caba#patch1
git/pub/scm / linux/kernel/git/konrad/xen.git / commitdiff
? search: re
summary | shortlog | log | commit | commitdiff | tree
raw | patch (parent: 621d869)
xen-pcifront: Don't race with udev when discovering new devices.
author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fri, 23 Jul 2010 14:35:57 +0000 (10:35 -0400)
committer Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fri, 23 Jul 2010 15:15:56 +0000 (11:15 -0400)
We inadvertly would call 'pci_bus_add_device' right after discovering
the device, but before claiming the BARs. This ended up firing off
a uevent and udev loading the module and the modules failing to
request_region as they were not claimed. We fix this by holding off
going live by calling 'pci_bus_add_devices' at the end.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig3 2010-09-29 16:32:08.324675371 +0200
+++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:37:23.215674973 +0200
@@ -456,17 +456,10 @@ int __devinit pcifront_scan_bus(struct p
}
d = pci_scan_single_device(b, devfn);
- if (d) {
+ if (d)
dev_info(&pdev->xdev->dev, "New device on "
"%04x:%02x:%02x.%02x found.\n", domain, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- err = pci_bus_add_device(d);
- if (err) {
- dev_err(&pdev->xdev->dev, "Failed to add "
- " device to bus.\n");
- return err;
- }
- }
}
return 0;
@@ -523,6 +516,7 @@ int __devinit pcifront_scan_root(struct
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
+ /* Create SysFS and notify udev of the devices. Aka: "going live" */
pci_bus_add_devices(b);
@@ -564,6 +558,9 @@ int __devinit pcifront_rescan_root(struc
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
+ /* Create SysFS and notify udev of the devices. Aka: "going live" */
+ pci_bus_add_devices(b);
+
return err;
}

View File

@ -0,0 +1,127 @@
http://git.kernel.org/?p=linux/kernel/git/konrad/xen.git;a=commitdiff;h=978b7df39be386f9a875bb14fcd84145e8ad0ee2#patch1
git/pub/scm / linux/kernel/git/konrad/xen.git / commitdiff
? search: re
summary | shortlog | log | commit | commitdiff | tree
raw | patch (parent: 28a4d3a)
xen-pcifront: Enforce scanning of device functions on initial execution.
author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tue, 8 Jun 2010 16:59:41 +0000 (12:59 -0400)
committer Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fri, 18 Jun 2010 19:40:27 +0000 (15:40 -0400)
'pci_scan_slot' abondons scanning of functions above 0 if a device with
function has not been detected. We need to be able to scan functions
above 0 in case the user has passed in devices without the function 0
for the slot/bus. To that end we are reusing the code that existed in
the rescan code path and make usage of it in the initial execution
path.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
--- linux-2.6.34.1/drivers/xen/pcifront/pci_op.c.orig 2010-09-29 16:31:32.330675478 +0200
+++ linux-2.6.34.1/drivers/xen/pcifront/pci_op.c 2010-09-29 16:39:23.197674096 +0200
@@ -435,6 +435,43 @@ static int pcifront_claim_resource(struc
return 0;
}
+int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
+ unsigned int domain, unsigned int bus,
+ struct pci_bus *b)
+{
+ struct pci_dev *d;
+ unsigned int devfn;
+ int err;
+
+ /* Scan the bus for functions and add.
+ * We omit handling of PCI bridge attachment because pciback prevents
+ * bridges from being exported.
+ */
+ for (devfn = 0; devfn < 0x100; devfn++) {
+ d = pci_get_slot(b, devfn);
+ if (d) {
+ /* Device is already known. */
+ pci_dev_put(d);
+ continue;
+ }
+
+ d = pci_scan_single_device(b, devfn);
+ if (d) {
+ dev_info(&pdev->xdev->dev, "New device on "
+ "%04x:%02x:%02x.%02x found.\n", domain, bus,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+ err = pci_bus_add_device(d);
+ if (err) {
+ dev_err(&pdev->xdev->dev, "Failed to add "
+ " device to bus.\n");
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
int __devinit pcifront_scan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
@@ -484,7 +521,11 @@ int __devinit pcifront_scan_root(struct
pci_bus_add_devices(b);
- return 0;
+ /* pci_scan_bus_parented skips devices which do not have a have
+ * devfn==0. The pcifront_scan_bus enumerates all devfn. */
+ err = pcifront_scan_bus(pdev, domain, bus, b);
+
+ return err;
err_out:
kfree(bus_entry);
@@ -496,10 +537,9 @@ int __devinit pcifront_scan_root(struct
int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
+ int err;
struct pci_bus *b;
- struct pci_dev *d;
- unsigned int devfn;
-
+
#ifndef CONFIG_PCI_DOMAINS
if (domain != 0) {
dev_err(&pdev->xdev->dev,
@@ -518,34 +558,9 @@ int __devinit pcifront_rescan_root(struc
/* If the bus is unknown, create it. */
return pcifront_scan_root(pdev, domain, bus);
- /* Rescan the bus for newly attached functions and add.
- * We omit handling of PCI bridge attachment because pciback prevents
- * bridges from being exported.
- */
- for (devfn = 0; devfn < 0x100; devfn++) {
- d = pci_get_slot(b, devfn);
- if(d) {
- /* Device is already known. */
- pci_dev_put(d);
- continue;
- }
-
- d = pci_scan_single_device(b, devfn);
- if (d) {
- int err;
-
- dev_info(&pdev->xdev->dev, "New device on "
- "%04x:%02x:%02x.%02x found.\n", domain, bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
- err = pci_bus_add_device(d);
- if (err)
- dev_err(&pdev->xdev->dev,
- "error %d adding device, continuing.\n",
- err);
- }
- }
+ err = pcifront_scan_bus(pdev, domain, bus, b);
- return 0;
+ return err;
}
static void free_root_bus_devs(struct pci_bus *bus)

View File

@ -0,0 +1,14 @@
unbind_from_irqhandler takes irq, not evtchn, as its first argument.
Signed-off-by: Rafal Wojtczuk <rafal@invisiblethingslab.com>
--- linux-2.6.34.1/drivers/xen/pcifront/xenbus.c.orig 2010-09-29 16:47:39.961674359 +0200
+++ linux-2.6.34.1/drivers/xen/pcifront/xenbus.c 2010-09-29 16:47:49.458675391 +0200
@@ -61,7 +61,7 @@ static void free_pdev(struct pcifront_de
/*For PCIE_AER error handling job*/
flush_scheduled_work();
- unbind_from_irqhandler(pdev->evtchn, pdev);
+ unbind_from_irqhandler(irq_from_evtchn(pdev->evtchn), pdev);
if (pdev->evtchn != INVALID_EVTCHN)
xenbus_free_evtchn(pdev->xdev, pdev->evtchn);

View File

@ -0,0 +1,48 @@
From: Roland McGrath <roland@redhat.com>
Date: Tue, 14 Sep 2010 19:22:58 +0000 (-0700)
Subject: x86-64, compat: Retruncate rax after ia32 syscall entry tracing
Git-commit: eefdca043e8391dcd719711716492063030b55ac
References: CVE-2010-3301 bnc#639708
Patch-mainline: 2.6.36
Introduced-by: 2.6.27
x86-64, compat: Retruncate rax after ia32 syscall entry tracing
In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a
32-bit tracee in system call entry. A %rax value set via ptrace at the
entry tracing stop gets used whole as a 32-bit syscall number, while we
only check the low 32 bits for validity.
Fix it by truncating %rax back to 32 bits after syscall_trace_enter,
in addition to testing the full 64 bits as has already been added.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing" by xen-port-patches.py
--- 11.3-2010-09-14.orig/arch/x86/ia32/ia32entry-xen.S 2010-05-12 09:08:52.000000000 +0200
+++ 11.3-2010-09-14/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:22.000000000 +0200
@@ -47,7 +47,12 @@ ia32_common:
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -57,6 +62,7 @@ ia32_common:
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple

View File

@ -0,0 +1,96 @@
From: H. Peter Anvin <hpa@linux.intel.com>
Date: Tue, 14 Sep 2010 19:42:41 +0000 (-0700)
Subject: x86-64, compat: Test %rax for the syscall number, not %eax
Git-commit: 36d001c70d8a0144ac1d038f6876c484849a74de
References: CVE-2010-3301 bnc#639708
Patch-mainline: 2.6.36
Introduced-by: 2.6.27
x86-64, compat: Test %rax for the syscall number, not %eax
On 64 bits, we always, by necessity, jump through the system call
table via %rax. For 32-bit system calls, in theory the system call
number is stored in %eax, and the code was testing %eax for a valid
system call number. At one point we loaded the stored value back from
the stack to enforce zero-extension, but that was removed in checkin
d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process
will not be able to introduce a non-zero-extended number, but it can
happen via ptrace.
Instead of re-introducing the zero-extension, test what we are
actually going to use, i.e. %rax. This only adds a handful of REX
prefixes to the code.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@kernel.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax" by xen-port-patches.py
--- 11.3-2010-09-14.orig/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:22.000000000 +0200
+++ 11.3-2010-09-14/arch/x86/ia32/ia32entry-xen.S 2010-09-17 11:18:28.000000000 +0200
@@ -145,7 +145,7 @@ ENTRY(ia32_sysenter_target)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz sysenter_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
jmp ia32_do_call
@@ -159,7 +159,7 @@ ENTRY(ia32_sysenter_target)
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -186,7 +186,7 @@ sysenter_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp ia32_do_call
CFI_ENDPROC
@@ -240,7 +240,7 @@ ENTRY(ia32_cstar_target)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -267,7 +267,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -324,7 +324,7 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -343,7 +343,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)

View File

@ -103,6 +103,11 @@
patches.fixes/make-note_interrupt-fast.diff
patches.fixes/twl6030-fix-note_interrupt-call
patches.fixes/use-rcu-lock-in-setpgid.patch
patches.fixes/compat-make-compat_alloc_user_space-incorporate-the-access_ok
patches.fixes/setup_arg_pages-diagnose-excessive-argument-size
patches.fixes/execve-improve-interactivity-with-large-arguments
patches.fixes/execve-make-responsive-to-sigkill-with-large-arguments
########################################################
# Architecture-specific patches. These used to be all
@ -147,6 +152,9 @@
patches.suse/x86-mark_rodata_rw.patch
patches.fixes/dmar-fix-oops-with-no-dmar-table
patches.fixes/x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing
patches.fixes/x86-64-compat-test-rax-for-the-syscall-number-not-eax
########################################################
# x86 MCE/MCA (Machine Check Error/Architecture) extensions
@ -321,6 +329,7 @@
# Networking, IPv6
########################################################
patches.fixes/bridge-module-get-put.patch
patches.fixes/net-sched-fix-some-kernel-memory-leaks
########################################################
# NFS
@ -628,6 +637,8 @@
# Security stuff
#
##########################################################
patches.fixes/keys-fix-rcu-no-lock-warning-in-keyctl_session_to_parent
patches.fixes/keys-fix-bug-in-keyctl_session_to_parent-if-parent-has-no-session-keyring
##########################################################
# Audit
@ -786,6 +797,8 @@
patches.xen/xen3-seccomp-disable-tsc-option
patches.xen/xen3-x86-mcp51-no-dac
patches.xen/xen3-x86-mark_rodata_rw.patch
patches.xen/xen3-x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing
patches.xen/xen3-x86-64-compat-test-rax-for-the-syscall-number-not-eax
patches.xen/xen3-acpi_processor_check_maxcpus.patch
patches.xen/xen3-kdb-x86
patches.xen/xen3-stack-unwind
@ -837,4 +850,9 @@
patches.xen/xen-x86_64-dump-user-pgt
patches.xen/xen-x86_64-note-init-p2m
patches.xen/pcifront-enforce-scan.patch
patches.xen/pcifront-claim.patch
patches.xen/pcifront-dont-race-udev.patch
patches.xen/pcifront-irq-not-evtchn.patch
patches.qubes/nuke_balloon_minimum_target.patch