From: Nick Piggin Subject: avoid silent stack overflow over the heap Patch-mainline: no References: bnc#44807 bnc#211997 This is a rewrite of Andrea Arcangeli's patch, which implements a stack guard feature. That is, it prevents the stack from growing right next to another vma, and prevents other vmas being allocated right next to the stack. This will cause a segfault rather than the stack silently overwriting other memory areas (eg. the heap) in the case that the app has a stack overflow. I have rewritten it so as not to require changes to expand_stack prototype, support for growsup stacks, and support for powerpc and ia64. Signed-off-by: Nick Piggin --- arch/ia64/kernel/sys_ia64.c | 11 +++++ arch/powerpc/mm/slice.c | 82 +++++++++++++++++++++++++----------------- arch/x86/kernel/sys_x86_64.c | 52 ++++++++++++++++++++------ include/linux/mm.h | 1 kernel/sysctl.c | 7 +++ mm/mmap.c | 83 ++++++++++++++++++++++++++++++++++++------- 6 files changed, 177 insertions(+), 59 deletions(-) --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -59,6 +59,8 @@ arch_get_unmapped_area (struct file *fil start_addr = addr = (addr + align_mask) & ~align_mask; for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + unsigned long guard; + /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { if (start_addr != TASK_UNMAPPED_BASE) { @@ -68,7 +70,14 @@ arch_get_unmapped_area (struct file *fil } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)guard << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* Remember the address where we stopped this search: */ mm->free_area_cache = addr + len; return addr; --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -94,11 +94,21 @@ static int slice_area_is_free(struct mm_ unsigned long len) { struct vm_area_struct *vma; + unsigned long guard; if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + if (!vma) + return 1; + + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(mm->task_size - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) + return 1; + return 0; } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) @@ -242,8 +252,10 @@ static unsigned long slice_find_area_bot full_search: for (;;) { + unsigned long guard; + addr = _ALIGN_UP(addr, 1ul << pshift); - if ((TASK_SIZE - len) < addr) + if ((mm->task_size - len) < addr) break; vma = find_vma(mm, addr); BUG_ON(vma && (addr >= vma->vm_end)); @@ -256,7 +268,14 @@ full_search: addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); continue; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(mm->task_size - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* * Remember the place where we stopped the search: */ @@ -264,8 +283,8 @@ full_search: mm->free_area_cache = addr + len; return addr; } - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (use_cache && (addr + guard + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); addr = vma->vm_end; } @@ -284,37 +303,23 @@ static unsigned long slice_find_area_top int psize, int use_cache) { struct vm_area_struct *vma; - unsigned long addr; + unsigned long start_addr, addr; struct slice_mask mask; int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); - /* check if free_area_cache is useful for us */ if (use_cache) { if (len <= mm->cached_hole_size) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested - * address hole - */ - addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); - mask = slice_range_to_mask(addr, len); - if (slice_check_fit(mask, available) && - slice_area_is_free(mm, addr, len)) - /* remember the address as a hint for - * next time - */ - return (mm->free_area_cache = addr); - } - } + } else + start_addr = addr = mm->free_area_cache; + } else + start_addr = addr = mm->mmap_base; - addr = mm->mmap_base; +full_search: while (addr > len) { + unsigned long guard; + /* Go down by chunk size */ addr = _ALIGN_DOWN(addr - len, 1ul << pshift); @@ -336,7 +341,15 @@ static unsigned long slice_find_area_top * return with success: */ vma = find_vma(mm, addr); - if (!vma || (addr + len) <= vma->vm_start) { + + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(mm->task_size - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* remember the address as a hint for next time */ if (use_cache) mm->free_area_cache = addr; @@ -344,11 +357,16 @@ static unsigned long slice_find_area_top } /* remember the largest hole we saw so far */ - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (use_cache && (addr + guard + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); /* try just below the current vma->vm_start */ - addr = vma->vm_start; + addr = vma->vm_start - guard; + } + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; } /* --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -93,6 +93,8 @@ arch_get_unmapped_area(struct file *filp full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + unsigned long guard; + /* At this point: (!vma || addr < vma->vm_end). */ if (end - len < addr) { /* @@ -106,15 +108,22 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(end - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* * Remember the place where we stopped the search: */ mm->free_area_cache = addr + len; return addr; } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (addr + guard + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); addr = vma->vm_end; } @@ -161,34 +170,51 @@ arch_get_unmapped_area_topdown(struct fi /* make sure it can fit in the remaining address space */ if (addr > len) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) - /* remember the address as a hint for next time */ - return mm->free_area_cache = addr-len; + unsigned long guard; + + addr -= len; + vma = find_vma(mm, addr); + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) + goto got_it; } if (mm->mmap_base < len) goto bottomup; addr = mm->mmap_base-len; - do { + unsigned long guard; /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* remember the address as a hint for next time */ - return mm->free_area_cache = addr; + mm->free_area_cache = addr; + return addr; + } /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (addr + guard + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; + addr = vma->vm_start - (len + guard); } while (len < vma->vm_start); bottomup: --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1324,6 +1324,7 @@ unsigned long ra_submit(struct file_ra_s struct file *filp); /* Do stack extension */ +extern int heap_stack_gap; extern int expand_stack(struct vm_area_struct *vma, unsigned long address); #ifdef CONFIG_IA64 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1267,6 +1267,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = scan_unevictable_handler, }, + { + .procname = "heap-stack-gap", + .data = &heap_stack_gap, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_MEMORY_FAILURE { .procname = "memory_failure_early_kill", --- a/mm/mmap.c +++ b/mm/mmap.c @@ -86,6 +86,7 @@ int sysctl_overcommit_memory = OVERCOMMI int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; struct percpu_counter vm_committed_as; +int heap_stack_gap __read_mostly = 1; /* * Check that a process has enough memory to allocate a new virtual @@ -1360,6 +1361,8 @@ arch_get_unmapped_area(struct file *filp full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + unsigned long guard; + /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { /* @@ -1374,15 +1377,23 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* * Remember the place where we stopped the search: */ mm->free_area_cache = addr + len; return addr; } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (addr + guard + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); + addr = vma->vm_end; } } @@ -1440,34 +1451,51 @@ arch_get_unmapped_area_topdown(struct fi /* make sure it can fit in the remaining address space */ if (addr > len) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + unsigned long guard; + + addr -= len; + vma = find_vma(mm, addr); + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) + goto got_it; } if (mm->mmap_base < len) goto bottomup; addr = mm->mmap_base-len; - do { + unsigned long guard; /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) + if (!vma) + goto got_it; + guard = 0; + if (vma->vm_flags & VM_GROWSDOWN) + guard = min(TASK_SIZE - (addr + len), + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (addr + len + guard <= vma->vm_start) { +got_it: /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + mm->free_area_cache = addr; + return addr; + } /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + if (addr + guard + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - (addr + guard); /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; + addr = vma->vm_start - (len + guard); } while (len < vma->vm_start); bottomup: @@ -1699,6 +1727,19 @@ int expand_upwards(struct vm_area_struct /* Somebody else might have raced and expanded it already */ if (address > vma->vm_end) { unsigned long size, grow; +#ifdef CONFIG_STACK_GROWSUP + unsigned long guard; + struct vm_area_struct *vm_next; + + error = -ENOMEM; + guard = min(TASK_SIZE - address, + (unsigned long)heap_stack_gap << PAGE_SHIFT); + vm_next = find_vma(vma->vm_mm, address + guard); + if (unlikely(vm_next && vm_next != vma)) { + /* stack collision with another vma */ + goto out_unlock; + } +#endif size = address - vma->vm_start; grow = (address - vma->vm_end) >> PAGE_SHIFT; @@ -1707,6 +1748,7 @@ int expand_upwards(struct vm_area_struct if (!error) vma->vm_end = address; } +out_unlock: __maybe_unused anon_vma_unlock(vma); return error; } @@ -1743,7 +1785,21 @@ static int expand_downwards(struct vm_ar /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { unsigned long size, grow; + struct vm_area_struct *prev_vma; + + find_vma_prev(vma->vm_mm, address, &prev_vma); + error = -ENOMEM; + if (prev_vma) { + unsigned long guard; + + guard = min(TASK_SIZE - prev_vma->vm_end, + (unsigned long)heap_stack_gap << PAGE_SHIFT); + if (unlikely(prev_vma->vm_end + guard > address)) { + /* stack collision with another vma */ + goto out_unlock; + } + } size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; @@ -1753,6 +1809,7 @@ static int expand_downwards(struct vm_ar vma->vm_pgoff -= grow; } } + out_unlock: anon_vma_unlock(vma); return error; }