diff options
| author | Michael Corcoran <Michael.Corcoran@Sun.COM> | 2009-02-23 16:16:02 -0800 |
|---|---|---|
| committer | Michael Corcoran <Michael.Corcoran@Sun.COM> | 2009-02-23 16:16:02 -0800 |
| commit | 2af6eb52b537fd75202964edd660905fb4fa601d (patch) | |
| tree | 9877e0e6e516005ccdc0f6c1196d00efcb6eb842 | |
| parent | c3a96863fc7054253767fc2de22e9e9f3a3b36fa (diff) | |
| download | illumos-joyent-2af6eb52b537fd75202964edd660905fb4fa601d.tar.gz | |
6657218 page_numtopp_nolock cpu_vm_data_destroy race condition causes panic
6696311 kmem_cache_alloc's use of CPU macro is not preemption friendly, causes panic
| -rw-r--r-- | usr/src/uts/common/os/cpu.c | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/kmem_impl.h | 31 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_page.c | 34 |
3 files changed, 58 insertions, 11 deletions
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 74dd46fa17..904e507caf 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -1696,7 +1696,7 @@ cpu_list_init(cpu_t *cp) */ cpu_seq = &cpu_list; - cp->cpu_cache_offset = KMEM_CACHE_SIZE(cp->cpu_seqid); + cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); cp_default.cp_cpulist = cp; cp_default.cp_ncpus = 1; cp->cpu_next_part = cp; @@ -1752,7 +1752,7 @@ cpu_add_unit(cpu_t *cp) cp->cpu_seqid = seqid; ASSERT(ncpus < max_ncpus); ncpus++; - cp->cpu_cache_offset = KMEM_CACHE_SIZE(cp->cpu_seqid); + cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); cpu[cp->cpu_id] = cp; CPUSET_ADD(cpu_available, cp->cpu_id); cpu_seq[cp->cpu_seqid] = cp; diff --git a/usr/src/uts/common/sys/kmem_impl.h b/usr/src/uts/common/sys/kmem_impl.h index 96b88261b7..3b780f843e 100644 --- a/usr/src/uts/common/sys/kmem_impl.h +++ b/usr/src/uts/common/sys/kmem_impl.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_KMEM_IMPL_H #define _SYS_KMEM_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/kmem.h> #include <sys/vmem.h> #include <sys/thread.h> @@ -172,8 +170,26 @@ typedef struct kmem_buftag_lite { #define KMEM_SLAB(cp, buf) \ ((kmem_slab_t *)P2END((uintptr_t)(buf), (cp)->cache_slabsize) - 1) -#define KMEM_CPU_CACHE(cp) \ - (kmem_cpu_cache_t *)((char *)cp + CPU->cpu_cache_offset) +/* + * The "CPU" macro loads a cpu_t that refers to the cpu that the current + * thread is running on at the time the macro is executed. A context switch + * may occur immediately after loading this data structure, leaving this + * thread pointing at the cpu_t for the previous cpu. This is not a problem; + * we'd just end up checking the previous cpu's per-cpu cache, and then check + * the other layers of the kmem cache if need be. + * + * It's not even a problem if the old cpu gets DR'ed out during the context + * switch. The cpu-remove DR operation bzero()s the cpu_t, but doesn't free + * it. So the cpu_t's cpu_cache_offset would read as 0, causing us to use + * cpu 0's per-cpu cache. + * + * So, there is no need to disable kernel preemption while using the CPU macro + * below since if we have been context switched, there will not be any + * correctness problem, just a momentary use of a different per-cpu cache. + */ + +#define KMEM_CPU_CACHE(cp) \ + (kmem_cpu_cache_t *)((char *)(&cp->cache_cpu) + CPU->cpu_cache_offset) #define KMEM_MAGAZINE_VALID(cp, mp) \ (((kmem_slab_t *)P2END((uintptr_t)(mp), PAGESIZE) - 1)->slab_cache == \ @@ -235,6 +251,11 @@ typedef struct kmem_magtype { #define KMEM_CACHE_SIZE(ncpus) \ ((size_t)(&((kmem_cache_t *)0)->cache_cpu[ncpus])) +/* Offset from kmem_cache->cache_cpu for per cpu caches */ +#define KMEM_CPU_CACHE_OFFSET(cpuid) \ + ((size_t)(&((kmem_cache_t *)0)->cache_cpu[cpuid]) - \ + (size_t)(&((kmem_cache_t *)0)->cache_cpu)) + typedef struct kmem_cpu_cache { kmutex_t cc_lock; /* protects this cpu's local cache */ uint64_t cc_alloc; /* allocations from this cpu */ diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index 37f59cf206..836881f6d2 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -5787,8 +5787,17 @@ page_numtopp_nolock(pfn_t pfnum) { struct memseg *seg; page_t *pp; - vm_cpu_data_t *vc = CPU->cpu_vm_data; + vm_cpu_data_t *vc; + /* + * We need to disable kernel preemption while referencing the + * cpu_vm_data field in order to prevent us from being switched to + * another cpu and trying to reference it after it has been freed. + * This will keep us on cpu and prevent it from being removed while + * we are still on it. + */ + kpreempt_disable(); + vc = CPU->cpu_vm_data; ASSERT(vc != NULL); MEMSEG_STAT_INCR(nsearch); @@ -5798,8 +5807,10 @@ page_numtopp_nolock(pfn_t pfnum) (pfnum >= seg->pages_base) && (pfnum < seg->pages_end)) { MEMSEG_STAT_INCR(nlastwon); pp = seg->pages + (pfnum - seg->pages_base); - if (pp->p_pagenum == pfnum) + if (pp->p_pagenum == pfnum) { + kpreempt_enable(); return ((page_t *)pp); + } } /* Else Try hash */ @@ -5808,8 +5819,10 @@ page_numtopp_nolock(pfn_t pfnum) MEMSEG_STAT_INCR(nhashwon); vc->vc_pnum_memseg = seg; pp = seg->pages + (pfnum - seg->pages_base); - if (pp->p_pagenum == pfnum) + if (pp->p_pagenum == pfnum) { + kpreempt_enable(); return ((page_t *)pp); + } } /* Else Brute force */ @@ -5817,10 +5830,12 @@ page_numtopp_nolock(pfn_t pfnum) if (pfnum >= seg->pages_base && pfnum < seg->pages_end) { vc->vc_pnum_memseg = seg; pp = seg->pages + (pfnum - seg->pages_base); + kpreempt_enable(); return ((page_t *)pp); } } vc->vc_pnum_memseg = NULL; + kpreempt_enable(); MEMSEG_STAT_INCR(nnotfound); return ((page_t *)NULL); @@ -5862,7 +5877,17 @@ page_nextn(page_t *pp, ulong_t n) { struct memseg *seg; page_t *ppn; - vm_cpu_data_t *vc = (vm_cpu_data_t *)CPU->cpu_vm_data; + vm_cpu_data_t *vc; + + /* + * We need to disable kernel preemption while referencing the + * cpu_vm_data field in order to prevent us from being switched to + * another cpu and trying to reference it after it has been freed. + * This will keep us on cpu and prevent it from being removed while + * we are still on it. + */ + kpreempt_disable(); + vc = (vm_cpu_data_t *)CPU->cpu_vm_data; ASSERT(vc != NULL); @@ -5892,6 +5917,7 @@ page_nextn(page_t *pp, ulong_t n) pp = seg->pages; } vc->vc_pnext_memseg = seg; + kpreempt_enable(); return (ppn); } |
