Subject: DWARF2 EH-frame based stack unwinding From: jbeulich@novell.com Patch-mainline: no This includes reverting f1883f86dea84fe47a71a39fc1afccc005915ed8. Update Jan 17 2009 jeffm: - Something in 2.6.29-rc1 tweaked the frame pointer code somehow, so I fixed that up. Update Jul 02 2010 jbeulich: - fix after upstream commit 9e565292270a2d55524be38835104c564ac8f795 --- Makefile | 5 arch/x86/Kconfig | 2 arch/x86/Makefile | 2 arch/x86/include/asm/dwarf2.h | 3 arch/x86/include/asm/stacktrace.h | 4 arch/x86/include/asm/system.h | 10 arch/x86/include/asm/unwind.h | 163 ++++ arch/x86/kernel/dumpstack.c | 89 ++ arch/x86/kernel/dumpstack_32.c | 5 arch/x86/kernel/dumpstack_64.c | 8 arch/x86/kernel/entry_32.S | 35 + arch/x86/kernel/entry_64.S | 34 arch/x86/kernel/vmlinux.lds.S | 2 include/asm-generic/vmlinux.lds.h | 22 include/linux/module.h | 3 include/linux/unwind.h | 135 +++ init/main.c | 3 kernel/Makefile | 1 kernel/module.c | 32 kernel/unwind.c | 1303 ++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 18 21 files changed, 1874 insertions(+), 5 deletions(-) --- a/Makefile +++ b/Makefile @@ -589,6 +589,11 @@ KBUILD_CFLAGS += -fomit-frame-pointer endif endif +ifdef CONFIG_UNWIND_INFO +KBUILD_CFLAGS += -fasynchronous-unwind-tables +LDFLAGS_vmlinux += --eh-frame-hdr +endif + ifdef CONFIG_DEBUG_INFO KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -496,7 +496,7 @@ config X86_32_IRIS config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" - depends on X86 + depends on X86 && !STACK_UNWIND ---help--- Calculate simpler /proc//wchan values. If this option is disabled then wchan values will recurse back to the --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -110,7 +110,9 @@ KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # +ifneq ($(CONFIG_UNWIND_INFO),y) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +endif # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -34,7 +34,8 @@ #define CFI_SIGNAL_FRAME #endif -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) +#if !defined(CONFIG_UNWIND_INFO) && defined(CONFIG_AS_CFI_SECTIONS) \ + && defined(__ASSEMBLY__) /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. * The latter we currently just discard since we don't do DWARF --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -92,6 +92,10 @@ extern void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, char *log_lvl); +int try_stack_unwind(struct task_struct *task, struct pt_regs *regs, + unsigned long **stack, unsigned long *bp, + const struct stacktrace_ops *ops, void *data); + extern unsigned int code_bytes; /* The form of the top of the frame on the stack */ --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -123,12 +123,22 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +/* The stack unwind code needs this but it pollutes traces otherwise */ +#ifdef CONFIG_UNWIND_INFO +#define THREAD_RETURN_SYM \ + ".globl thread_return\n" \ + "thread_return:\n\t" +#else +#define THREAD_RETURN_SYM +#endif + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ + THREAD_RETURN_SYM \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ --- /dev/null +++ b/arch/x86/include/asm/unwind.h @@ -0,0 +1,163 @@ +#ifndef _ASM_X86_UNWIND_H +#define _ASM_X86_UNWIND_H + +/* + * Copyright (C) 2002-2009 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + */ + +#ifdef CONFIG_STACK_UNWIND + +#include +#include +#include + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define UNW_PC(frame) (frame)->regs.ip +#define UNW_SP(frame) (frame)->regs.sp +#ifdef CONFIG_FRAME_POINTER +#define UNW_FP(frame) (frame)->regs.bp +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.sp0) +#define TSK_STACK_TOP(tsk) ((tsk)->thread.sp0) +#else +#define UNW_FP(frame) ((void)(frame), 0UL) +#endif +/* On x86-64, might need to account for the special exception and interrupt + handling stacks here, since normally + EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER, + but the construct is needed only for getting across the stack switch to + the interrupt stack - thus considering the IRQ stack itself is unnecessary, + and the overhead of comparing against all exception handling stacks seems + not desirable. */ +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#ifdef CONFIG_X86_64 + +#include + +#define FRAME_RETADDR_OFFSET 8 + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(ax), \ + PTREGS_INFO(dx), \ + PTREGS_INFO(cx), \ + PTREGS_INFO(bx), \ + PTREGS_INFO(si), \ + PTREGS_INFO(di), \ + PTREGS_INFO(bp), \ + PTREGS_INFO(sp), \ + PTREGS_INFO(r8), \ + PTREGS_INFO(r9), \ + PTREGS_INFO(r10), \ + PTREGS_INFO(r11), \ + PTREGS_INFO(r12), \ + PTREGS_INFO(r13), \ + PTREGS_INFO(r14), \ + PTREGS_INFO(r15), \ + PTREGS_INFO(ip) + +#else /* X86_32 */ + +#include + +#define FRAME_RETADDR_OFFSET 4 + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(ax), \ + PTREGS_INFO(cx), \ + PTREGS_INFO(dx), \ + PTREGS_INFO(bx), \ + PTREGS_INFO(sp), \ + PTREGS_INFO(bp), \ + PTREGS_INFO(si), \ + PTREGS_INFO(di), \ + PTREGS_INFO(ip) + +#endif + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + sizeof(void *))) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + info->regs = *regs; +#else + if (user_mode_vm(regs)) + info->regs = *regs; + else { + memcpy(&info->regs, regs, offsetof(struct pt_regs, sp)); + info->regs.sp = (unsigned long)®s->sp; + info->regs.ss = __KERNEL_DS; + } +#endif +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ +#ifdef CONFIG_X86_64 + extern const char thread_return[]; + + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.ip = (unsigned long)thread_return; + info->regs.cs = __KERNEL_CS; + probe_kernel_address(info->task->thread.sp, info->regs.bp); + info->regs.sp = info->task->thread.sp; + info->regs.ss = __KERNEL_DS; +#else + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.ip = info->task->thread.ip; + info->regs.cs = __KERNEL_CS; + probe_kernel_address(info->task->thread.sp, info->regs.bp); + info->regs.sp = info->task->thread.sp; + info->regs.ss = __KERNEL_DS; + info->regs.ds = __USER_DS; + info->regs.es = __USER_DS; +#endif +} + +extern asmlinkage int +arch_unwind_init_running(struct unwind_frame_info *, + unwind_callback_fn, + const struct stacktrace_ops *, void *data); + +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) +{ +#ifdef CONFIG_X86_64 + return user_mode(&info->regs) + || (long)info->regs.ip >= 0 + || (info->regs.ip >= VSYSCALL_START && info->regs.ip < VSYSCALL_END) + || (long)info->regs.sp >= 0; +#else + return user_mode_vm(&info->regs) + || info->regs.ip < PAGE_OFFSET + || (info->regs.ip >= __fix_to_virt(FIX_VDSO) + && info->regs.ip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + || info->regs.sp < PAGE_OFFSET; +#endif +} + +#else + +#define UNW_PC(frame) ((void)(frame), 0UL) +#define UNW_SP(frame) ((void)(frame), 0UL) +#define UNW_FP(frame) ((void)(frame), 0UL) + +static inline int arch_unw_user_mode(const void *info) +{ + return 0; +} + +#endif + +#endif /* _ASM_X86_UNWIND_H */ --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -17,12 +17,18 @@ #include #include +#include int panic_on_unrecovered_nmi; int panic_on_io_nmi; unsigned int code_bytes = 64; int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; +#ifdef CONFIG_STACK_UNWIND +static int call_trace = 1; +#else +#define call_trace (-1) +#endif static int die_counter; void printk_address(unsigned long address, int reliable) @@ -62,6 +68,71 @@ print_ftrace_graph_addr(unsigned long ad { } #endif +int asmlinkage dump_trace_unwind(struct unwind_frame_info *info, + const struct stacktrace_ops *ops, void *data) +{ + int n = 0; +#ifdef CONFIG_UNWIND_INFO + unsigned long sp = UNW_SP(info); + + if (arch_unw_user_mode(info)) + return -1; + while (unwind(info) == 0 && UNW_PC(info)) { + n++; + ops->address(data, UNW_PC(info), 1); + if (arch_unw_user_mode(info)) + break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); + } +#endif + return n; +} + +int try_stack_unwind(struct task_struct *task, struct pt_regs *regs, + unsigned long **stack, unsigned long *bp, + const struct stacktrace_ops *ops, void *data) +{ +#ifdef CONFIG_UNWIND_INFO + int unw_ret = 0; + struct unwind_frame_info info; + if (call_trace < 0) + return 0; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) + unw_ret = dump_trace_unwind(&info, ops, data); + } else if (task == current) + unw_ret = unwind_init_running(&info, dump_trace_unwind, ops, data); + else { + if (unwind_init_blocked(&info, task) == 0) + unw_ret = dump_trace_unwind(&info, ops, data); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + UNW_PC(&info)); + if ((long)UNW_SP(&info) < 0) { + ops->warning(data, "Leftover inexact backtrace:\n"); + *stack = (unsigned long *)UNW_SP(&info); + if (!stack) { + *bp = UNW_FP(&info); + return -1; + } + } else + ops->warning(data, "Full inexact backtrace again:\n"); + } else if (call_trace >= 1) { + return -1; + } else + ops->warning(data, "Full inexact backtrace again:\n"); + } else + ops->warning(data, "Inexact backtrace:\n"); +#endif + return 0; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -373,3 +444,21 @@ static int __init code_bytes_setup(char return 1; } __setup("code_bytes=", code_bytes_setup); + +#ifdef CONFIG_STACK_UNWIND +static int __init call_trace_setup(char *s) +{ + if (!s) + return -EINVAL; + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "newfallback") == 0) + call_trace = 1; + else if (strcmp(s, "new") == 0) + call_trace = 2; + return 0; +} +early_param("call_trace", call_trace_setup); +#endif --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -27,6 +27,10 @@ void dump_trace(struct task_struct *task if (!task) task = current; + bp = stack_frame(task, regs); + if (try_stack_unwind(task, regs, &stack, &bp, ops, data)) + return; + if (!stack) { unsigned long dummy; @@ -35,7 +39,6 @@ void dump_trace(struct task_struct *task stack = (unsigned long *)task->thread.sp; } - bp = stack_frame(task, regs); for (;;) { struct thread_info *context; --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -14,6 +14,7 @@ #include #include +#include #include @@ -155,13 +156,18 @@ void dump_trace(struct task_struct *task if (!task) task = current; + bp = stack_frame(task, regs); + if (try_stack_unwind(task, regs, &stack, &bp, ops, data)) { + put_cpu(); + return; + } + if (!stack) { stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; } - bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1002,6 +1002,41 @@ END(spurious_interrupt_bug) */ .popsection +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movl 4(%esp), %edx + movl (%esp), %ecx + leal 4(%esp), %eax + movl %ebx, PT_EBX(%edx) + xorl %ebx, %ebx + movl %ebx, PT_ECX(%edx) + movl %ebx, PT_EDX(%edx) + movl %esi, PT_ESI(%edx) + movl %edi, PT_EDI(%edx) + movl %ebp, PT_EBP(%edx) + movl %ebx, PT_EAX(%edx) + movl $__USER_DS, PT_DS(%edx) + movl $__USER_DS, PT_ES(%edx) + movl $__KERNEL_PERCPU, PT_FS(%edx) + movl $__KERNEL_STACK_CANARY, PT_GS(%edx) + movl %eax, PT_OLDESP(%edx) + movl 16(%esp), %eax + movl %ebx, PT_ORIG_EAX(%edx) + movl %ecx, PT_EIP(%edx) + movl 12(%esp), %ecx + movl $__KERNEL_CS, PT_CS(%edx) + movl %eax, 12(%esp) + movl 8(%esp), %eax + movl %ecx, 8(%esp) + movl %ebx, PT_EFLAGS(%edx) + movl PT_EBX(%edx), %ebx + movl $__KERNEL_DS, PT_OLDSS(%edx) + jmpl *%eax + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) +#endif + ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder CFI_STARTPROC --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1212,6 +1212,40 @@ ENTRY(call_softirq) CFI_ENDPROC END(call_softirq) +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movq %r15, R15(%rdi) + movq %r14, R14(%rdi) + xchgq %rsi, %rdx + movq %r13, R13(%rdi) + movq %r12, R12(%rdi) + xorl %eax, %eax + movq %rbp, RBP(%rdi) + movq %rbx, RBX(%rdi) + movq (%rsp), %r9 + xchgq %rdx, %rcx + movq %rax, R11(%rdi) + movq %rax, R10(%rdi) + movq %rax, R9(%rdi) + movq %rax, R8(%rdi) + movq %rax, RAX(%rdi) + movq %rax, RCX(%rdi) + movq %rax, RDX(%rdi) + movq %rax, RSI(%rdi) + movq %rax, RDI(%rdi) + movq %rax, ORIG_RAX(%rdi) + movq %r9, RIP(%rdi) + leaq 8(%rsp), %r9 + movq $__KERNEL_CS, CS(%rdi) + movq %rax, EFLAGS(%rdi) + movq %r9, RSP(%rdi) + movq $__KERNEL_DS, SS(%rdi) + jmpq *%rcx + CFI_ENDPROC +END(arch_unwind_init_running) +#endif + #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -358,7 +358,9 @@ SECTIONS /* Sections to be discarded */ DISCARDS +#ifndef CONFIG_UNWIND_INFO /DISCARD/ : { *(.eh_frame) } +#endif } --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -359,6 +359,8 @@ MEM_KEEP(exit.rodata) \ } \ \ + EH_FRAME \ + \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ @@ -798,3 +800,23 @@ BSS(bss_align) \ . = ALIGN(stop_align); \ VMLINUX_SYMBOL(__bss_stop) = .; + +#ifdef CONFIG_STACK_UNWIND +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ + .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ + *(.eh_frame_hdr) \ + VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ + } +#else +#define EH_FRAME +#endif --- a/include/linux/module.h +++ b/include/linux/module.h @@ -338,6 +338,9 @@ struct module /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; + /* The handle returned from unwind_add_table. */ + void *unwind_info; + /* Arch-specific module values */ struct mod_arch_specific arch; --- /dev/null +++ b/include/linux/unwind.h @@ -0,0 +1,135 @@ +#ifndef _LINUX_UNWIND_H +#define _LINUX_UNWIND_H + +/* + * Copyright (C) 2002-2009 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include + +struct module; +struct stacktrace_ops; +struct unwind_frame_info; + +typedef asmlinkage int (*unwind_callback_fn)(struct unwind_frame_info *, + const struct stacktrace_ops *, + void *); + +#ifdef CONFIG_STACK_UNWIND + +#include +#include + +#ifndef ARCH_UNWIND_SECTION_NAME +#define ARCH_UNWIND_SECTION_NAME ".eh_frame" +#endif + +/* + * Initialize unwind support. + */ +extern void unwind_init(void); +extern void unwind_setup(void); + +#ifdef CONFIG_MODULES + +extern void *unwind_add_table(struct module *, + const void *table_start, + unsigned long table_size); + +extern void unwind_remove_table(void *handle, int init_only); + +#endif + +extern int unwind_init_frame_info(struct unwind_frame_info *, + struct task_struct *, + /*const*/ struct pt_regs *); + +/* + * Prepare to unwind a blocked task. + */ +extern int unwind_init_blocked(struct unwind_frame_info *, + struct task_struct *); + +/* + * Prepare to unwind the currently running thread. + */ +extern int unwind_init_running(struct unwind_frame_info *, + unwind_callback_fn, + const struct stacktrace_ops *, + void *data); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unwind(struct unwind_frame_info *); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unwind_to_user(struct unwind_frame_info *); + +#else /* CONFIG_STACK_UNWIND */ + +struct unwind_frame_info {}; + +static inline void unwind_init(void) {} +static inline void unwind_setup(void) {} + +#ifdef CONFIG_MODULES + +static inline void *unwind_add_table(struct module *mod, + const void *table_start, + unsigned long table_size) +{ + return NULL; +} + +#endif + +static inline void unwind_remove_table(void *handle, int init_only) +{ +} + +static inline int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + const struct pt_regs *regs) +{ + return -ENOSYS; +} + +static inline int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + return -ENOSYS; +} + +static inline int unwind_init_running(struct unwind_frame_info *info, + unwind_callback_fn cb, + const struct stacktrace_ops *ops, + void *data) +{ + return -ENOSYS; +} + +static inline int unwind(struct unwind_frame_info *info) +{ + return -ENOSYS; +} + +static inline int unwind_to_user(struct unwind_frame_info *info) +{ + return -ENOSYS; +} + +#endif /* CONFIG_STACK_UNWIND */ +#endif /* _LINUX_UNWIND_H */ --- a/init/main.c +++ b/init/main.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -552,6 +553,7 @@ asmlinkage void __init start_kernel(void * Need to run as early as possible, to initialize the * lockdep hash: */ + unwind_init(); lockdep_init(); debug_objects_early_init(); @@ -576,6 +578,7 @@ asmlinkage void __init start_kernel(void setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); + unwind_setup(); setup_nr_cpu_ids(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ --- a/kernel/Makefile +++ b/kernel/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock. obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o --- a/kernel/module.c +++ b/kernel/module.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -158,7 +159,7 @@ struct load_info { struct _ddebug *debug; unsigned int num_debug; struct { - unsigned int sym, str, mod, vers, info, pcpu; + unsigned int sym, str, mod, vers, info, pcpu, unwind; } index; }; @@ -532,6 +533,27 @@ bool is_module_percpu_address(unsigned l #endif /* CONFIG_SMP */ +static unsigned int find_unwind(struct load_info *info) +{ + int section = 0; +#ifdef ARCH_UNWIND_SECTION_NAME + section = find_sec(info, ARCH_UNWIND_SECTION_NAME); + if (section) + info->sechdrs[section].sh_flags |= SHF_ALLOC; +#endif + return section; +} + +static void add_unwind_table(struct module *mod, struct load_info *info) +{ + int index = info->index.unwind; + + /* Size of section 0 is 0, so this is ok if there is no unwind info. */ + mod->unwind_info = unwind_add_table(mod, + (void *)info->sechdrs[index].sh_addr, + info->sechdrs[index].sh_size); +} + #define MODINFO_ATTR(field) \ static void setup_modinfo_##field(struct module *mod, const char *s) \ { \ @@ -1759,6 +1781,8 @@ static void free_module(struct module *m /* Remove dynamic debug info */ ddebug_remove_module(mod->name); + unwind_remove_table(mod->unwind_info, 0); + /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -2464,6 +2488,8 @@ static struct module *setup_load_info(st info->index.pcpu = find_pcpusec(info); + info->index.unwind = find_unwind(info); + /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(info->sechdrs, info->index.vers, mod)) return ERR_PTR(-ENOEXEC); @@ -2885,6 +2911,9 @@ static struct module *load_module(void _ if (err < 0) goto unlink; + /* Initialize unwind table */ + add_unwind_table(mod, &info); + /* Get rid of temporary copy and strmap. */ kfree(info.strmap); free_copy(&info); @@ -2999,6 +3028,7 @@ SYSCALL_DEFINE3(init_module, void __user /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); + unwind_remove_table(mod->unwind_info, 1); #ifdef CONFIG_KALLSYMS mod->num_symtab = mod->core_num_syms; mod->symtab = mod->core_symtab; --- /dev/null +++ b/kernel/unwind.c @@ -0,0 +1,1305 @@ +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char __start_unwind[], __end_unwind[]; +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; + +#define MAX_STACK_DEPTH 8 + +#define EXTRA_INFO(f) { \ + BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ + % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + + offsetof(struct unwind_frame_info, f) \ + / FIELD_SIZEOF(struct unwind_frame_info, f), \ + FIELD_SIZEOF(struct unwind_frame_info, f) \ + } +#define PTREGS_INFO(f) EXTRA_INFO(regs.f) + +static const struct { + unsigned offs:BITS_PER_LONG / 2; + unsigned width:BITS_PER_LONG / 2; +} reg_info[] = { + UNW_REGISTER_INFO +}; + +#undef PTREGS_INFO +#undef EXTRA_INFO + +#ifndef REG_INVALID +#define REG_INVALID(r) (reg_info[r].width == 0) +#endif + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +#define DW_EH_PE_FORM 0x07 +#define DW_EH_PE_native 0x00 +#define DW_EH_PE_leb128 0x01 +#define DW_EH_PE_data2 0x02 +#define DW_EH_PE_data4 0x03 +#define DW_EH_PE_data8 0x04 +#define DW_EH_PE_signed 0x08 +#define DW_EH_PE_ADJUST 0x70 +#define DW_EH_PE_abs 0x00 +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 +#define DW_EH_PE_omit 0xff + +typedef unsigned long uleb128_t; +typedef signed long sleb128_t; +#define sleb128abs __builtin_labs + +static struct unwind_table { + struct { + unsigned long pc; + unsigned long range; + } core, init; + const void *address; + unsigned long size; + const unsigned char *header; + unsigned long hdrsz; + struct unwind_table *link; + const char *name; +} root_table; + +struct unwind_item { + enum item_location { + Nowhere, + Memory, + Register, + Value + } where; + uleb128_t value; +}; + +struct unwind_state { + uleb128_t loc, org; + const u8 *cieStart, *cieEnd; + uleb128_t codeAlign; + sleb128_t dataAlign; + struct cfa { + uleb128_t reg, offs; + } cfa; + struct unwind_item regs[ARRAY_SIZE(reg_info)]; + unsigned stackDepth:8; + unsigned version:8; + const u8 *label; + const u8 *stack[MAX_STACK_DEPTH]; +}; + +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; + +static unsigned unwind_debug; +static int __init unwind_debug_setup(char *s) +{ + unwind_debug = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("unwind_debug=", unwind_debug_setup); +#define dprintk(lvl, fmt, args...) \ + ((void)(lvl > unwind_debug \ + || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) + +static struct unwind_table *find_table(unsigned long pc) +{ + struct unwind_table *table; + + for (table = &root_table; table; table = table->link) + if ((pc >= table->core.pc + && pc < table->core.pc + table->core.range) + || (pc >= table->init.pc + && pc < table->init.pc + table->init.range)) + break; + + return table; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base); + +static void init_unwind_table(struct unwind_table *table, + const char *name, + const void *core_start, + unsigned long core_size, + const void *init_start, + unsigned long init_size, + const void *table_start, + unsigned long table_size, + const u8 *header_start, + unsigned long header_size) +{ + const u8 *ptr = header_start + 4; + const u8 *end = header_start + header_size; + + table->core.pc = (unsigned long)core_start; + table->core.range = core_size; + table->init.pc = (unsigned long)init_start; + table->init.range = init_size; + table->address = table_start; + table->size = table_size; + /* See if the linker provided table looks valid. */ + if (header_size <= 4 + || header_start[0] != 1 + || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) + != table_start + || !read_pointer(&ptr, end, header_start[2], 0, 0) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start)) + header_start = NULL; + table->hdrsz = header_size; + smp_wmb(); + table->header = header_start; + table->link = NULL; + table->name = name; +} + +void __init unwind_init(void) +{ + init_unwind_table(&root_table, "kernel", + _text, _end - _text, + NULL, 0, + __start_unwind, __end_unwind - __start_unwind, + __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); +} + +static const u32 bad_cie, not_fde; +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); +static signed fde_pointer_type(const u32 *cie); + +struct eh_frame_hdr_table_entry { + unsigned long start, fde; +}; + +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) +{ + const struct eh_frame_hdr_table_entry *e1 = p1; + const struct eh_frame_hdr_table_entry *e2 = p2; + + return (e1->start > e2->start) - (e1->start < e2->start); +} + +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) +{ + struct eh_frame_hdr_table_entry *e1 = p1; + struct eh_frame_hdr_table_entry *e2 = p2; + unsigned long v; + + v = e1->start; + e1->start = e2->start; + e2->start = v; + v = e1->fde; + e1->fde = e2->fde; + e2->fde = v; +} + +static void __init setup_unwind_table(struct unwind_table *table, + void *(*alloc)(unsigned long)) +{ + const u8 *ptr; + unsigned long tableSize = table->size, hdrSize; + unsigned n; + const u32 *fde; + struct { + u8 version; + u8 eh_frame_ptr_enc; + u8 fde_count_enc; + u8 table_enc; + unsigned long eh_frame_ptr; + unsigned int fde_count; + struct eh_frame_hdr_table_entry table[]; + } __attribute__((__packed__)) *header; + + if (table->header) + return; + + if (table->hdrsz) + printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", + table->name); + + if (tableSize & (sizeof(*fde) - 1)) + return; + + for (fde = table->address, n = 0; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = cie_for_fde(fde, table); + signed ptrType; + + if (cie == ¬_fde) + continue; + if (cie == NULL + || cie == &bad_cie + || (ptrType = fde_pointer_type(cie)) < 0) + return; + ptr = (const u8 *)(fde + 2); + if (!read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0)) + return; + ++n; + } + + if (tableSize || !n) + return; + + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + + 2 * n * sizeof(unsigned long); + dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); + header = alloc(hdrSize); + if (!header) + return; + header->version = 1; + header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; + header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; + header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; + put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); + BUILD_BUG_ON(offsetof(typeof(*header), fde_count) + % __alignof(typeof(header->fde_count))); + header->fde_count = n; + + BUILD_BUG_ON(offsetof(typeof(*header), table) + % __alignof(typeof(*header->table))); + for (fde = table->address, tableSize = table->size, n = 0; + tableSize; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); + + if (!fde[1]) + continue; /* this is a CIE */ + ptr = (const u8 *)(fde + 2); + header->table[n].start = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + fde_pointer_type(cie), 0, 0); + header->table[n].fde = (unsigned long)fde; + ++n; + } + WARN_ON(n != header->fde_count); + + sort(header->table, + n, + sizeof(*header->table), + cmp_eh_frame_hdr_table_entries, + swap_eh_frame_hdr_table_entries); + + table->hdrsz = hdrSize; + smp_wmb(); + table->header = (const void *)header; +} + +static void *__init balloc(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, + sizeof(unsigned int), + __pa(MAX_DMA_ADDRESS)); +} + +void __init unwind_setup(void) +{ + setup_unwind_table(&root_table, balloc); +} + +#ifdef CONFIG_MODULES + +static struct unwind_table *last_table; + +/* Must be called with module_mutex held. */ +void *unwind_add_table(struct module *module, + const void *table_start, + unsigned long table_size) +{ + struct unwind_table *table; + + if (table_size <= 0) + return NULL; + + table = kmalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return NULL; + + init_unwind_table(table, module->name, + module->module_core, module->core_size, + module->module_init, module->init_size, + table_start, table_size, + NULL, 0); + + if (last_table) + last_table->link = table; + else + root_table.link = table; + last_table = table; + + return table; +} + +struct unlink_table_info +{ + struct unwind_table *table; + int init_only; +}; + +static int unlink_table(void *arg) +{ + struct unlink_table_info *info = arg; + struct unwind_table *table = info->table, *prev; + + for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) + ; + + if (prev->link) { + if (info->init_only) { + table->init.pc = 0; + table->init.range = 0; + info->table = NULL; + } else { + prev->link = table->link; + if (!prev->link) + last_table = prev; + } + } else + info->table = NULL; + + return 0; +} + +/* Must be called with module_mutex held. */ +void unwind_remove_table(void *handle, int init_only) +{ + struct unwind_table *table = handle; + struct unlink_table_info info; + + if (!table || table == &root_table) + return; + + if (init_only && table == last_table) { + table->init.pc = 0; + table->init.range = 0; + return; + } + + info.table = table; + info.init_only = init_only; + stop_machine(unlink_table, &info, NULL); + + if (info.table) + kfree(table); +} + +#endif /* CONFIG_MODULES */ + +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + uleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (uleb128_t)(*cur & 0x7f) << shift; + if (!(*cur++ & 0x80)) + break; + } + *pcur = cur; + + return value; +} + +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + sleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (sleb128_t)(*cur & 0x7f) << shift; + if (!(*cur & 0x80)) { + value |= -(*cur++ & 0x40) << shift; + break; + } + } + *pcur = cur; + + return value; +} + +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) +{ + const u32 *cie; + + if (!*fde || (*fde & (sizeof(*fde) - 1))) + return &bad_cie; + if (!fde[1]) + return ¬_fde; /* this is a CIE */ + if ((fde[1] & (sizeof(*fde) - 1)) + || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) + return NULL; /* this is not a valid FDE */ + cie = fde + 1 - fde[1] / sizeof(*fde); + if (*cie <= sizeof(*cie) + 4 + || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || cie[1]) + return NULL; /* this is not a (valid) CIE */ + return cie; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base) +{ + unsigned long value = 0; + union { + const u8 *p8; + const u16 *p16u; + const s16 *p16s; + const u32 *p32u; + const s32 *p32s; + const unsigned long *pul; + } ptr; + + if (ptrType < 0 || ptrType == DW_EH_PE_omit) { + dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); + return 0; + } + ptr.p8 = *pLoc; + switch (ptrType & DW_EH_PE_FORM) { + case DW_EH_PE_data2: + if (end < (const void *)(ptr.p16u + 1)) { + dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if (ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p16s++); + else + value = get_unaligned(ptr.p16u++); + break; + case DW_EH_PE_data4: +#ifdef CONFIG_64BIT + if (end < (const void *)(ptr.p32u + 1)) { + dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if (ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p32s++); + else + value = get_unaligned(ptr.p32u++); + break; + case DW_EH_PE_data8: + BUILD_BUG_ON(sizeof(u64) != sizeof(value)); +#else + BUILD_BUG_ON(sizeof(u32) != sizeof(value)); +#endif + case DW_EH_PE_native: + if (end < (const void *)(ptr.pul + 1)) { + dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); + return 0; + } + value = get_unaligned(ptr.pul++); + break; + case DW_EH_PE_leb128: + BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); + value = ptrType & DW_EH_PE_signed + ? get_sleb128(&ptr.p8, end) + : get_uleb128(&ptr.p8, end); + if ((const void *)ptr.p8 > end) { + dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); + return 0; + } + break; + default: + dprintk(2, "Cannot decode pointer type %02X (%p,%p).", + ptrType, ptr.p8, end); + return 0; + } + switch (ptrType & DW_EH_PE_ADJUST) { + case DW_EH_PE_abs: + break; + case DW_EH_PE_pcrel: + value += (unsigned long)*pLoc; + break; + case DW_EH_PE_textrel: + if (likely(text_base)) { + value += text_base; + break; + } + dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", + ptrType, *pLoc, end); + return 0; + case DW_EH_PE_datarel: + if (likely(data_base)) { + value += data_base; + break; + } + dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", + ptrType, *pLoc, end); + return 0; + default: + dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", + ptrType, *pLoc, end); + return 0; + } + if ((ptrType & DW_EH_PE_indirect) + && probe_kernel_address(value, value)) { + dprintk(1, "Cannot read indirect value %lx (%p,%p).", + value, *pLoc, end); + return 0; + } + *pLoc = ptr.p8; + + return value; +} + +static signed fde_pointer_type(const u32 *cie) +{ + const u8 *ptr = (const u8 *)(cie + 2); + unsigned version = *ptr; + + if (version != 1) + return -1; /* unsupported */ + if (*++ptr) { + const char *aug; + const u8 *end = (const u8 *)(cie + 1) + *cie; + uleb128_t len; + + /* check if augmentation size is first (and thus present) */ + if (*ptr != 'z') + return -1; + /* check if augmentation string is nul-terminated */ + if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) + return -1; + ++ptr; /* skip terminator */ + get_uleb128(&ptr, end); /* skip code alignment */ + get_sleb128(&ptr, end); /* skip data alignment */ + /* skip return address column */ + version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); + len = get_uleb128(&ptr, end); /* augmentation length */ + if (ptr + len < ptr || ptr + len > end) + return -1; + end = ptr + len; + while (*++aug) { + if (ptr >= end) + return -1; + switch (*aug) { + case 'L': + ++ptr; + break; + case 'P': { + signed ptrType = *ptr++; + + if (!read_pointer(&ptr, end, ptrType, 0, 0) + || ptr > end) + return -1; + } + break; + case 'R': + return *ptr; + default: + return -1; + } + } + } + return DW_EH_PE_native|DW_EH_PE_abs; +} + +static int advance_loc(unsigned long delta, struct unwind_state *state) +{ + state->loc += delta * state->codeAlign; + + return delta > 0; +} + +static void set_rule(uleb128_t reg, + enum item_location where, + uleb128_t value, + struct unwind_state *state) +{ + if (reg < ARRAY_SIZE(state->regs)) { + state->regs[reg].where = where; + state->regs[reg].value = value; + } +} + +static int processCFI(const u8 *start, + const u8 *end, + unsigned long targetLoc, + signed ptrType, + struct unwind_state *state) +{ + union { + const u8 *p8; + const u16 *p16; + const u32 *p32; + } ptr; + int result = 1; + + if (start != state->cieStart) { + state->loc = state->org; + result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); + if (targetLoc == 0 && state->label == NULL) + return result; + } + for (ptr.p8 = start; result && ptr.p8 < end; ) { + switch (*ptr.p8 >> 6) { + uleb128_t value; + + case 0: + switch (*ptr.p8++) { + case DW_CFA_nop: + break; + case DW_CFA_set_loc: + state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); + if (state->loc == 0) + result = 0; + break; + case DW_CFA_advance_loc1: + result = ptr.p8 < end && advance_loc(*ptr.p8++, state); + break; + case DW_CFA_advance_loc2: + result = ptr.p8 <= end + 2 + && advance_loc(*ptr.p16++, state); + break; + case DW_CFA_advance_loc4: + result = ptr.p8 <= end + 4 + && advance_loc(*ptr.p32++, state); + break; + case DW_CFA_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_offset_extended_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); + break; + case DW_CFA_register: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Register, + get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_remember_state: + if (ptr.p8 == state->label) { + state->label = NULL; + return 1; + } + if (state->stackDepth >= MAX_STACK_DEPTH) { + dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); + return 0; + } + state->stack[state->stackDepth++] = ptr.p8; + break; + case DW_CFA_restore_state: + if (state->stackDepth) { + const uleb128_t loc = state->loc; + const u8 *label = state->label; + + state->label = state->stack[state->stackDepth - 1]; + memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); + memset(state->regs, 0, sizeof(state->regs)); + state->stackDepth = 0; + result = processCFI(start, end, 0, ptrType, state); + state->loc = loc; + state->label = label; + } else { + dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); + return 0; + } + break; + case DW_CFA_def_cfa: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset: + state->cfa.offs = get_uleb128(&ptr.p8, end); + break; + case DW_CFA_def_cfa_sf: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset_sf: + state->cfa.offs = get_sleb128(&ptr.p8, end) + * state->dataAlign; + break; + case DW_CFA_def_cfa_register: + state->cfa.reg = get_uleb128(&ptr.p8, end); + break; + /*todo case DW_CFA_def_cfa_expression: */ + /*todo case DW_CFA_expression: */ + /*todo case DW_CFA_val_expression: */ + case DW_CFA_GNU_args_size: + get_uleb128(&ptr.p8, end); + break; + case DW_CFA_GNU_negative_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Memory, + (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_GNU_window_save: + default: + dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); + result = 0; + break; + } + break; + case 1: + result = advance_loc(*ptr.p8++ & 0x3f, state); + break; + case 2: + value = *ptr.p8++ & 0x3f; + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case 3: + set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); + break; + } + if (ptr.p8 > end) { + dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); + result = 0; + } + if (result && targetLoc != 0 && targetLoc < state->loc) + return 1; + } + + if (result && ptr.p8 < end) + dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); + + return result + && ptr.p8 == end + && (targetLoc == 0 + || (/*todo While in theory this should apply, gcc in practice omits + everything past the function prolog, and hence the location + never reaches the end of the function. + targetLoc < state->loc &&*/ state->label == NULL)); +} + +/* Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. */ +int unwind(struct unwind_frame_info *frame) +{ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) + const u32 *fde = NULL, *cie = NULL; + const u8 *ptr = NULL, *end = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; + unsigned long startLoc = 0, endLoc = 0, cfa; + unsigned i; + signed ptrType = -1; + uleb128_t retAddrReg = 0; + const struct unwind_table *table; + struct unwind_state state; + + if (UNW_PC(frame) == 0) + return -EINVAL; + if ((table = find_table(pc)) != NULL + && !(table->size & (sizeof(*fde) - 1))) { + const u8 *hdr = table->header; + unsigned long tableSize; + + smp_rmb(); + if (hdr && hdr[0] == 1) { + switch (hdr[3] & DW_EH_PE_FORM) { + case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; + case DW_EH_PE_data2: tableSize = 2; break; + case DW_EH_PE_data4: tableSize = 4; break; + case DW_EH_PE_data8: tableSize = 8; break; + default: tableSize = 0; break; + } + ptr = hdr + 4; + end = hdr + table->hdrsz; + if (tableSize + && read_pointer(&ptr, end, hdr[1], 0, 0) + == (unsigned long)table->address + && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 + && i == (end - ptr) / (2 * tableSize) + && !((end - ptr) % (2 * tableSize))) { + do { + const u8 *cur = ptr + (i / 2) * (2 * tableSize); + + startLoc = read_pointer(&cur, + cur + tableSize, + hdr[3], 0, + (unsigned long)hdr); + if (pc < startLoc) + i /= 2; + else { + ptr = cur - tableSize; + i = (i + 1) / 2; + } + } while (startLoc && i > 1); + if (i == 1 + && (startLoc = read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr)) != 0 + && pc >= startLoc) + fde = (void *)read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr); + } + } + if (hdr && !fde) + dprintk(3, "Binary lookup for %lx failed.", pc); + + if (fde != NULL) { + cie = cie_for_fde(fde, table); + ptr = (const u8 *)(fde + 2); + if (cie != NULL + && cie != &bad_cie + && cie != ¬_fde + && (ptrType = fde_pointer_type(cie)) >= 0 + && read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0) == startLoc) { + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (pc >= endLoc) + fde = NULL; + } else + fde = NULL; + if (!fde) + dprintk(1, "Binary lookup result for %lx discarded.", pc); + } + if (fde == NULL) { + for (fde = table->address, tableSize = table->size; + cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, + fde += 1 + *fde / sizeof(*fde)) { + cie = cie_for_fde(fde, table); + if (cie == &bad_cie) { + cie = NULL; + break; + } + if (cie == NULL + || cie == ¬_fde + || (ptrType = fde_pointer_type(cie)) < 0) + continue; + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (!startLoc) + continue; + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (pc >= startLoc && pc < endLoc) + break; + } + if (!fde) + dprintk(3, "Linear lookup for %lx failed.", pc); + } + } + if (cie != NULL) { + memset(&state, 0, sizeof(state)); + state.cieEnd = ptr; /* keep here temporarily */ + ptr = (const u8 *)(cie + 2); + end = (const u8 *)(cie + 1) + *cie; + frame->call_frame = 1; + if ((state.version = *ptr) != 1) + cie = NULL; /* unsupported version */ + else if (*++ptr) { + /* check if augmentation size is first (and thus present) */ + if (*ptr == 'z') { + while (++ptr < end && *ptr) { + switch (*ptr) { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + case 'L': + case 'P': + case 'R': + continue; + case 'S': + frame->call_frame = 0; + continue; + default: + break; + } + break; + } + } + if (ptr >= end || *ptr) + cie = NULL; + } + if (!cie) + dprintk(1, "CIE unusable (%p,%p).", ptr, end); + ++ptr; + } + if (cie != NULL) { + /* get code aligment factor */ + state.codeAlign = get_uleb128(&ptr, end); + /* get data aligment factor */ + state.dataAlign = get_sleb128(&ptr, end); + if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) + cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EPERM; + } else { + retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + ptr += augSize; + } + if (ptr > end + || retAddrReg >= ARRAY_SIZE(reg_info) + || REG_INVALID(retAddrReg) + || reg_info[retAddrReg].width != sizeof(unsigned long)) + cie = NULL; + } + if (!cie) + dprintk(1, "CIE validation failed (%p,%p).", ptr, end); + } + if (cie != NULL) { + state.cieStart = ptr; + ptr = state.cieEnd; + state.cieEnd = end; + end = (const u8 *)(fde + 1) + *fde; + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + if ((ptr += augSize) > end) + fde = NULL; + } + if (!fde) + dprintk(1, "FDE validation failed (%p,%p).", ptr, end); + } + if (cie == NULL || fde == NULL) { +#ifdef CONFIG_FRAME_POINTER + unsigned long top = TSK_STACK_TOP(frame->task); + unsigned long bottom = STACK_BOTTOM(frame->task); + unsigned long fp = UNW_FP(frame); + unsigned long sp = UNW_SP(frame); + unsigned long link; + + if ((sp | fp) & (sizeof(unsigned long) - 1)) + return -EPERM; + +# if FRAME_RETADDR_OFFSET < 0 + if (!(sp < top && fp <= sp && bottom < fp)) +# else + if (!(sp > top && fp >= sp && bottom > fp)) +# endif + return -ENXIO; + + if (probe_kernel_address(fp + FRAME_LINK_OFFSET, link)) + return -ENXIO; + +# if FRAME_RETADDR_OFFSET < 0 + if (!(link > bottom && link < fp)) +# else + if (!(link < bottom && link > fp)) +# endif + return -ENXIO; + + if (link & (sizeof(link) - 1)) + return -ENXIO; + + fp += FRAME_RETADDR_OFFSET; + if (probe_kernel_address(fp, UNW_PC(frame))) + return -ENXIO; + + /* Ok, we can use it */ +# if FRAME_RETADDR_OFFSET < 0 + UNW_SP(frame) = fp - sizeof(UNW_PC(frame)); +# else + UNW_SP(frame) = fp + sizeof(UNW_PC(frame)); +# endif + UNW_FP(frame) = link; + return 0; +#else + return -ENXIO; +#endif + } + state.org = startLoc; + memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); + /* process instructions */ + if (!processCFI(ptr, end, pc, ptrType, &state) + || state.loc > endLoc + || state.regs[retAddrReg].where == Nowhere + || state.cfa.reg >= ARRAY_SIZE(reg_info) + || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) { + dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); + return -EIO; + } + /* update frame */ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME + if (frame->call_frame + && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) + frame->call_frame = 0; +#endif + cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; + startLoc = min((unsigned long)UNW_SP(frame), cfa); + endLoc = max((unsigned long)UNW_SP(frame), cfa); + if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { + startLoc = min(STACK_LIMIT(cfa), cfa); + endLoc = max(STACK_LIMIT(cfa), cfa); + } +#ifndef CONFIG_64BIT +# define CASES CASE(8); CASE(16); CASE(32) +#else +# define CASES CASE(8); CASE(16); CASE(32); CASE(64) +#endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) { + if (state.regs[i].where == Nowhere) + continue; + dprintk(1, "Cannot restore register %u (%d).", + i, state.regs[i].where); + return -EIO; + } + switch (state.regs[i].where) { + default: + break; + case Register: + if (state.regs[i].value >= ARRAY_SIZE(reg_info) + || REG_INVALID(state.regs[i].value) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dprintk(1, "Cannot restore register %u from register %lu.", + i, state.regs[i].value); + return -EIO; + } + switch (reg_info[state.regs[i].value].width) { +#define CASE(n) \ + case sizeof(u##n): \ + state.regs[i].value = FRAME_REG(state.regs[i].value, \ + const u##n); \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%lu).", + reg_info[state.regs[i].value].width, + state.regs[i].value); + return -EIO; + } + break; + } + } + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) + continue; + switch (state.regs[i].where) { + case Nowhere: + if (reg_info[i].width != sizeof(UNW_SP(frame)) + || &FRAME_REG(i, __typeof__(UNW_SP(frame))) + != &UNW_SP(frame)) + continue; + UNW_SP(frame) = cfa; + break; + case Register: + switch (reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + FRAME_REG(i, u##n) = state.regs[i].value; \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + break; + case Value: + if (reg_info[i].width != sizeof(unsigned long)) { + dprintk(1, "Unsupported value size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + FRAME_REG(i, unsigned long) = cfa + state.regs[i].value + * state.dataAlign; + break; + case Memory: { + unsigned long addr = cfa + state.regs[i].value + * state.dataAlign; + + if ((state.regs[i].value * state.dataAlign) + % sizeof(unsigned long) + || addr < startLoc + || addr + sizeof(unsigned long) < addr + || addr + sizeof(unsigned long) > endLoc) { + dprintk(1, "Bad memory location %lx (%lx).", + addr, state.regs[i].value); + return -EIO; + } + switch (reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + if (probe_kernel_address(addr, \ + FRAME_REG(i, u##n))) \ + return -EFAULT; \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported memory size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + } + break; + } + } + + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EIO; + } + if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { + dprintk(1, "No progress (%lx,%lx).", pc, sp); + return -EIO; + } + + return 0; +#undef CASES +#undef FRAME_REG +} +EXPORT_SYMBOL_GPL(unwind); + +int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + /*const*/ struct pt_regs *regs) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_frame_info(info, regs); + + return 0; +} +EXPORT_SYMBOL_GPL(unwind_init_frame_info); + +/* + * Prepare to unwind a blocked task. + */ +int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_blocked(info); + + return 0; +} +EXPORT_SYMBOL_GPL(unwind_init_blocked); + +/* + * Prepare to unwind the currently running thread. + */ +int unwind_init_running(struct unwind_frame_info *info, + asmlinkage unwind_callback_fn callback, + const struct stacktrace_ops *ops, void *data) +{ + info->task = current; + info->call_frame = 0; + + return arch_unwind_init_running(info, callback, ops, data); +} +EXPORT_SYMBOL_GPL(unwind_init_running); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +int unwind_to_user(struct unwind_frame_info *info) +{ + while (!arch_unw_user_mode(info)) { + int err = unwind(info); + + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(unwind_to_user); --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -828,6 +828,24 @@ config FRAME_POINTER larger and slower, but it gives very useful debugging information in case of kernel bugs. (precise oopses/stacktraces/warnings) +config UNWIND_INFO + bool "Compile the kernel with frame unwind information" + depends on !IA64 && !PARISC && !ARM + depends on !MODULES || !(MIPS || PPC || SUPERH || V850) + help + If you say Y here the resulting kernel image will be slightly larger + but not slower, and it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame unwind information or frame pointers. + +config STACK_UNWIND + bool "Stack unwind support" + depends on UNWIND_INFO + depends on X86 + help + This enables more precise stack traces, omitting all unrelated + occurrences of pointers into kernel code from the dump. + config BOOT_PRINTK_DELAY bool "Delay each boot printk message by N milliseconds" depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY