diff options
author | paulsan <none@none> | 2007-09-17 15:08:19 -0700 |
---|---|---|
committer | paulsan <none@none> | 2007-09-17 15:08:19 -0700 |
commit | 7dacfc4494f6d14358974ef2830b5cd8c66a84de (patch) | |
tree | 36b58448c9c8410d22f977898985778d043e1bd4 /usr/src | |
parent | 538414560648ad8b2b92897b55592cda64706375 (diff) | |
download | illumos-gate-7dacfc4494f6d14358974ef2830b5cd8c66a84de.tar.gz |
6597746 Don't create srd_cache, region_cache and scd_cache on systems that don't use shared context
6597751 SFMMU_HASH_UNLOCK() call is missing in hat_unlock_region()
6600745 shared context code needs to be cleaned up
6603269 assertion failed: sfhme->hme_tte.ll != 0, file: ../../sfmmu/vm/hat_sfmmu.c, line: 11613
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 8 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vm/hat_i86.c | 10 | ||||
-rw-r--r-- | usr/src/uts/sfmmu/vm/hat_sfmmu.c | 204 | ||||
-rw-r--r-- | usr/src/uts/sfmmu/vm/hat_sfmmu.h | 24 | ||||
-rw-r--r-- | usr/src/uts/sun4/vm/sfmmu.c | 10 | ||||
-rw-r--r-- | usr/src/uts/sun4u/vm/mach_kpm.c | 4 | ||||
-rw-r--r-- | usr/src/uts/sun4v/os/fillsysinfo.c | 4 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/mmu.h | 4 | ||||
-rw-r--r-- | usr/src/uts/sun4v/vm/mach_sfmmu.c | 26 |
9 files changed, 169 insertions, 125 deletions
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 6234d01bac..5f7689ab08 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -1628,8 +1628,8 @@ out: /* - * callback function used by segvn_unmap to invoke free_vp_pages() for only - * those pages actually processed by the HAT + * callback function to invoke free_vp_pages() for only those pages actually + * processed by the HAT when a shared region is destroyed. */ extern int free_pages; @@ -1657,6 +1657,10 @@ segvn_hat_rgn_unload_callback(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr, free_vp_pages(vp, off, len); } +/* + * callback function used by segvn_unmap to invoke free_vp_pages() for only + * those pages actually processed by the HAT + */ static void segvn_hat_unload_callback(hat_callback_t *cb) { diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index 979778d8b5..53e42e74e4 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -1722,7 +1722,7 @@ hat_unlock(hat_t *hat, caddr_t addr, size_t len) /* ARGSUSED */ void -hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len, +hat_unlock_region(struct hat *hat, caddr_t addr, size_t len, hat_region_cookie_t rcookie) { panic("No shared region support on x86"); @@ -3877,13 +3877,13 @@ hati_update_pte(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new) /* ARGSUSED */ void -hat_join_srd(struct hat *sfmmup, vnode_t *evp) +hat_join_srd(struct hat *hat, vnode_t *evp) { } /* ARGSUSED */ hat_region_cookie_t -hat_join_region(struct hat *sfmmup, +hat_join_region(struct hat *hat, caddr_t r_saddr, size_t r_size, void *r_obj, @@ -3899,14 +3899,14 @@ hat_join_region(struct hat *sfmmup, /* ARGSUSED */ void -hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags) +hat_leave_region(struct hat *hat, hat_region_cookie_t rcookie, uint_t flags) { panic("No shared region support on x86"); } /* ARGSUSED */ void -hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie) +hat_dup_region(struct hat *hat, hat_region_cookie_t rcookie) { panic("No shared region support on x86"); } diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c index eeb44a0dd5..e4727578c0 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c @@ -823,10 +823,7 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag) * highbit() - 1) to get the size code for the smallest TSB that can represent * all of physical memory, while erring on the side of too much. * - * If the computed size code is less than the current tsb_max_growsize, we set - * tsb_max_growsize to the computed size code. In the case where the computed - * size code is greater than tsb_max_growsize, we have these restrictions that - * apply to increasing tsb_max_growsize: + * Restrict tsb_max_growsize to make sure that: * 1) TSBs can't grow larger than the TSB slab size * 2) TSBs can't grow larger than UTSB_MAX_SZCODE. */ @@ -1381,25 +1378,33 @@ hat_init(void) mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX); - srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS * - sizeof (srd_buckets[0]), KM_SLEEP); - for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) { - mutex_init(&srd_buckets[i].srdb_lock, NULL, MUTEX_DEFAULT, - NULL); - } /* - * 64 byte alignment is required in order to isolate certain field - * into its own cacheline. + * If Shared context support is disabled via /etc/system + * set shctx_on to 0 here if it was set to 1 earlier in boot + * sequence by cpu module initialization code. */ - srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), 64, - sfmmu_srdcache_constructor, sfmmu_srdcache_destructor, - NULL, NULL, NULL, 0); - region_cache = kmem_cache_create("region_cache", - sizeof (sf_region_t), 0, sfmmu_rgncache_constructor, - sfmmu_rgncache_destructor, NULL, NULL, NULL, 0); - scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), 0, - sfmmu_scdcache_constructor, sfmmu_scdcache_destructor, - NULL, NULL, NULL, 0); + if (shctx_on && disable_shctx) { + shctx_on = 0; + } + + if (shctx_on) { + srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS * + sizeof (srd_buckets[0]), KM_SLEEP); + for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) { + mutex_init(&srd_buckets[i].srdb_lock, NULL, + MUTEX_DEFAULT, NULL); + } + + srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), + 0, sfmmu_srdcache_constructor, sfmmu_srdcache_destructor, + NULL, NULL, NULL, 0); + region_cache = kmem_cache_create("region_cache", + sizeof (sf_region_t), 0, sfmmu_rgncache_constructor, + sfmmu_rgncache_destructor, NULL, NULL, NULL, 0); + scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), + 0, sfmmu_scdcache_constructor, sfmmu_scdcache_destructor, + NULL, NULL, NULL, 0); + } /* * Pre-allocate hrm_hashtab before enabling the collection of @@ -3716,7 +3721,8 @@ sfmmu_rgn_cb_noop(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr, } /* - * update *eaddrp only if hmeblk was unloaded. + * Searches for an hmeblk which maps addr, then unloads this mapping + * and updates *eaddrp, if the hmeblk is found. */ static void sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr, @@ -3760,13 +3766,6 @@ sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr, sfmmu_hblks_list_purge(&list); } -/* - * This routine can be optimized to eliminate scanning areas of smaller page - * size bitmaps when a corresponding bit is set in the bitmap for a bigger - * page size. For now assume the region will usually only have the primary - * size mappings so we'll scan only one bitmap anyway by checking rgn_hmeflags - * first. - */ static void sfmmu_unload_hmeregion(sf_srd_t *srdp, sf_region_t *rgnp) { @@ -3972,6 +3971,7 @@ hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len, HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, pr_hblk, prevpa, &list); if (hmeblkp == NULL) { + SFMMU_HASH_UNLOCK(hmebp); ttesz--; continue; } @@ -7407,9 +7407,11 @@ retry: * from the list. */ tmphme = sfhme->hme_next; + if (IS_PAHME(sfhme)) + continue; /* * If we are looking for large mappings and this hme doesn't - * reach the range we are seeking, just ignore its. + * reach the range we are seeking, just ignore it. */ hmeblkp = sfmmu_hmetohblk(sfhme); if (hmeblkp->hblk_xhat_bit) @@ -7917,7 +7919,7 @@ hat_getkpfnum(caddr_t addr) } /* - * This routine will return both pfn and tte for the addr. + * This routine will return both pfn and tte for the vaddr. */ static pfn_t sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup, tte_t *ttep) @@ -8129,7 +8131,7 @@ hat_page_getshare(page_t *pp) } /* - * Return 1 the number of mappings exceeds sh_thresh. Return 0 + * Return 1 if the number of mappings exceeds sh_thresh. Return 0 * otherwise. Count shared hmeblks by region's refcnt. */ int @@ -8156,10 +8158,23 @@ hat_page_checkshare(page_t *pp, ulong_t sh_thresh) again: for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) { tmphme = sfhme->hme_next; - if (hme_size(sfhme) != sz) { + if (IS_PAHME(sfhme)) { continue; } + hmeblkp = sfmmu_hmetohblk(sfhme); + if (hmeblkp->hblk_xhat_bit) { + cnt++; + if (cnt > sh_thresh) { + sfmmu_mlist_exit(pml); + return (1); + } + continue; + } + if (hme_size(sfhme) != sz) { + continue; + } + if (hmeblkp->hblk_shared) { sf_srd_t *srdp = hblktosrd(hmeblkp); uint_t rid = hmeblkp->hblk_tag.htag_rid; @@ -8238,6 +8253,7 @@ hat_page_demote(page_t *pp) ASSERT(PAGE_EXCL(pp)); ASSERT(!PP_ISFREE(pp)); + ASSERT(!PP_ISKAS(pp)); ASSERT(page_szc_lock_assert(pp)); pml = sfmmu_mlist_enter(pp); @@ -8264,6 +8280,7 @@ hat_page_demote(page_t *pp) rootpp = PP_GROUPLEADER(pp, sz); for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) { tmphme = sfhme->hme_next; + ASSERT(!IS_PAHME(sfhme)); hmeblkp = sfmmu_hmetohblk(sfhme); if (hme_size(sfhme) != sz) { continue; @@ -8649,10 +8666,7 @@ hat_share(struct hat *sfmmup, caddr_t addr, * sfmmu_check_page_sizes at the end of this routine. */ old_scdp = sfmmup->sfmmu_scdp; - /* - * Call hat_join_region without the hat lock, because it's - * used in hat_join_region. - */ + rcookie = hat_join_region(sfmmup, addr, len, (void *)ism_hatid, 0, PROT_ALL, ismszc, NULL, HAT_REGION_ISM); if (rcookie != HAT_INVALID_REGION_COOKIE) { @@ -8810,8 +8824,8 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) /* * After hat_leave_region, the sfmmup may leave SCD, - * in which case, we want to grow the private tsb size - * when call sfmmu_check_page_sizes at the end of the routine. + * in which case, we want to grow the private tsb size when + * calling sfmmu_check_page_sizes at the end of the routine. */ old_scdp = sfmmup->sfmmu_scdp; /* @@ -9195,6 +9209,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp) */ for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { tmphme = sfhmep->hme_next; + if (IS_PAHME(sfhmep)) + continue; hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; @@ -9221,6 +9237,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp) for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { tmphme = sfhmep->hme_next; + if (IS_PAHME(sfhmep)) + continue; hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; @@ -9368,6 +9386,8 @@ tst_tnc(page_t *pp, pgcnt_t npages) } for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) { + if (IS_PAHME(sfhme)) + continue; hmeblkp = sfmmu_hmetohblk(sfhme); if (hmeblkp->hblk_xhat_bit) continue; @@ -9514,6 +9534,8 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor) for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) { + if (IS_PAHME(sfhme)) + continue; hmeblkp = sfmmu_hmetohblk(sfhme); if (hmeblkp->hblk_xhat_bit) @@ -10064,7 +10086,10 @@ sfmmu_reprog_pgsz_arr(sfmmu_t *sfmmup, uint8_t *tmp_pgsz) sfmmu_hat_exit(hatlockp); } -/* Update scd_rttecnt for shme rgns in the SCD */ +/* + * The scd_rttecnt field in the SCD must be updated to take account of the + * regions which it contains. + */ static void sfmmu_set_scd_rttecnt(sf_srd_t *srdp, sf_scd_t *scdp) { @@ -11030,6 +11055,13 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, size, flags); } } else if (SFMMU_IS_SHMERID_VALID(rid)) { + /* + * Shared hmes use per region bitmaps in rgn_hmeflag + * rather than shadow hmeblks to keep track of the + * mapping sizes which have been allocated for the region. + * Here we cleanup old invalid hmeblks with this rid, + * which may be left around by pageunload(). + */ int ttesz; caddr_t va; caddr_t eva = vaddr + TTEBYTES(size); @@ -11317,11 +11349,6 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, hmeblkp->hblk_nextpa = hblkpa; hmeblkp->hblk_shw_bit = 0; - /* - * Clear ttebit map in the region this hmeblk belongs to. The region - * must exist as long as any of its hmeblks exist. This invariant - * holds because before region is freed all its hmeblks are removed. - */ if (hmeblkp->hblk_shared) { sf_srd_t *srdp; sf_region_t *rgnp; @@ -11334,7 +11361,6 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, ASSERT(rid < SFMMU_MAX_HME_REGIONS); rgnp = srdp->srd_hmergnp[rid]; ASSERT(rgnp != NULL); - vaddr = (caddr_t)get_hblk_base(hmeblkp); SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); hmeblkp->hblk_shared = 0; } @@ -11572,11 +11598,6 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, */ hmeblkp->hblk_shw_bit = 0; - /* - * Clear ttebit map in the region this hmeblk belongs to. The region - * must exist as long as any of its hmeblks exist. This invariant - * holds because before region is freed all its hmeblks are removed. - */ if (hmeblkp->hblk_shared) { sf_srd_t *srdp; sf_region_t *rgnp; @@ -11589,7 +11610,6 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, ASSERT(rid < SFMMU_MAX_HME_REGIONS); rgnp = srdp->srd_hmergnp[rid]; ASSERT(rgnp != NULL); - vaddr = (caddr_t)get_hblk_base(hmeblkp); SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); hmeblkp->hblk_shared = 0; } @@ -12087,6 +12107,12 @@ next: return (rcpuset); } +/* + * This routine takes an sfmmu pointer and the va for an adddress in an + * ISM region as input and returns the corresponding region id in ism_rid. + * The return value of 1 indicates that a region has been found and ism_rid + * is valid, otherwise 0 is returned. + */ static int find_ism_rid(sfmmu_t *sfmmup, sfmmu_t *ism_sfmmup, caddr_t va, uint_t *ism_rid) { @@ -13321,6 +13347,8 @@ sfmmu_rm_large_mappings(page_t *pp, int ttesz) */ for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) { + if (IS_PAHME(sfhmep)) + continue; hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; @@ -13357,7 +13385,7 @@ hat_supported(enum hat_features feature, void *arg) case HAT_VMODSORT: return (1); case HAT_SHARED_REGIONS: - if (!disable_shctx && shctx_on) + if (shctx_on) return (1); else return (0); @@ -13679,7 +13707,7 @@ hat_join_srd(struct hat *sfmmup, vnode_t *evp) ASSERT(sfmmup != ksfmmup); ASSERT(sfmmup->sfmmu_srdp == NULL); - if (disable_shctx || !shctx_on) { + if (!shctx_on) { return; } @@ -13901,7 +13929,6 @@ hat_join_region(struct hat *sfmmup, sf_region_t **rarrp; uint16_t *busyrgnsp; ulong_t rttecnt; - int rkmalloc = 0; uchar_t tteflag; uchar_t r_type = flags & HAT_REGION_TYPE_MASK; int text = (r_type == HAT_REGION_TEXT); @@ -14088,27 +14115,22 @@ rfound: ASSERT(MUTEX_HELD(&srdp->srd_mutex)); if (*freelistp != NULL) { - new_rgnp = *freelistp; - *freelistp = new_rgnp->rgn_next; - ASSERT(new_rgnp->rgn_id < *nextidp); - ASSERT(new_rgnp->rgn_id < maxids); - ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE); - ASSERT((new_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) + rgnp = *freelistp; + *freelistp = rgnp->rgn_next; + ASSERT(rgnp->rgn_id < *nextidp); + ASSERT(rgnp->rgn_id < maxids); + ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE); + ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type); - ASSERT(rarrp[new_rgnp->rgn_id] == new_rgnp); - - ASSERT(new_rgnp->rgn_hmeflags == 0); - } - - if (new_rgnp == NULL) { + ASSERT(rarrp[rgnp->rgn_id] == rgnp); + ASSERT(rgnp->rgn_hmeflags == 0); + } else { /* * release local locks before memory allocation. */ mutex_exit(&srdp->srd_mutex); - if (new_rgnp == NULL) { - rkmalloc = 1; - new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP); - } + + new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP); mutex_enter(&srdp->srd_mutex); for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL; @@ -14123,34 +14145,19 @@ rfound: } } if (rgnp != NULL) { - if (!rkmalloc) { - ASSERT(new_rgnp->rgn_flags & - SFMMU_REGION_FREE); - new_rgnp->rgn_next = *freelistp; - *freelistp = new_rgnp; - new_rgnp = NULL; - } goto rfound; } - if (rkmalloc) { - if (*nextidp >= maxids) { - mutex_exit(&srdp->srd_mutex); - goto fail; - } - rgnp = new_rgnp; - new_rgnp = NULL; - rgnp->rgn_id = (*nextidp)++; - ASSERT(rgnp->rgn_id < maxids); - ASSERT(rarrp[rgnp->rgn_id] == NULL); - rarrp[rgnp->rgn_id] = rgnp; - } else { - rgnp = new_rgnp; - new_rgnp = NULL; + if (*nextidp >= maxids) { + mutex_exit(&srdp->srd_mutex); + goto fail; } - } else { rgnp = new_rgnp; new_rgnp = NULL; + rgnp->rgn_id = (*nextidp)++; + ASSERT(rgnp->rgn_id < maxids); + ASSERT(rarrp[rgnp->rgn_id] == NULL); + rarrp[rgnp->rgn_id] = rgnp; } ASSERT(rgnp->rgn_sfmmu_head == NULL); @@ -14177,14 +14184,7 @@ rfound: fail: ASSERT(new_rgnp != NULL); - if (rkmalloc) { - kmem_cache_free(region_cache, new_rgnp); - } else { - /* put it back on the free list. */ - ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE); - new_rgnp->rgn_next = *freelistp; - *freelistp = new_rgnp; - } + kmem_cache_free(region_cache, new_rgnp); return (HAT_INVALID_REGION_COOKIE); } @@ -14543,7 +14543,7 @@ check_scd_sfmmu_list(sfmmu_t **headp, sfmmu_t *sfmmup, int onlist) #endif /* DEBUG */ /* - * Removes an sfmmu from the start of the queue. + * Removes an sfmmu from the SCD sfmmu list. */ static void sfmmu_from_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup) diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h index 2dc7183d85..16ea7bcfae 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h @@ -426,9 +426,9 @@ typedef struct sf_shared_region_domain { sf_region_t *srd_hmergnfree; /* pointer to the next free ism region */ sf_region_t *srd_ismrgnfree; - /* id of next ism rgn created */ + /* id of next ism region created */ uint16_t srd_next_ismrid; - /* pointer of next hme region created */ + /* id of next hme region created */ uint16_t srd_next_hmerid; uint16_t srd_ismbusyrgns; /* # of ism rgns in use */ uint16_t srd_hmebusyrgns; /* # of hme rgns in use */ @@ -468,6 +468,8 @@ typedef struct sf_srd_bucket { * This macro grabs hat lock and allocates level 2 hat chain * associated with a shme rgn. In the majority of cases, the macro * is called with alloc = 0, and lock = 0. + * A pointer to the level 2 sf_rgn_link_t structure is returned in the lnkp + * parameter. */ #define SFMMU_HMERID2RLINKP(sfmmup, rid, lnkp, alloc, lock) \ { \ @@ -619,8 +621,23 @@ typedef struct sfmmu_ctx { * tte counts should be protected by cas. * cpuset is protected by cas. * + * ttecnt accounting for mappings which do not use shared hme is carried out + * during pagefault handling. In the shared hme case, only the first process + * to access a mapping generates a pagefault, subsequent processes simply + * find the shared hme entry during trap handling and therefore there is no + * corresponding event to initiate ttecnt accounting. Currently, as shared + * hmes are only used for text segments, when joining a region we assume the + * worst case and add the the number of ttes required to map the entire region + * to the ttecnt corresponding to the region pagesize. However, if the region + * has a 4M pagesize, and memory is low, the allocation of 4M pages may fail + * then 8K pages will be allocated instead and the first TSB which stores 8K + * mappings will potentially be undersized. To compensate for the potential + * underaccounting in this case we always add 1/4 of the region size to the 8K + * ttecnt. + * * Note that sfmmu_xhat_provider MUST be the first element. */ + struct hat { void *sfmmu_xhat_provider; /* NULL for CPU hat */ cpuset_t sfmmu_cpusran; /* cpu bit mask for efficient xcalls */ @@ -704,9 +721,6 @@ struct sf_scd { #define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map #define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map -#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map -#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map - extern int disable_shctx; extern int shctx_on; diff --git a/usr/src/uts/sun4/vm/sfmmu.c b/usr/src/uts/sun4/vm/sfmmu.c index 99d2428c4f..78247431a3 100644 --- a/usr/src/uts/sun4/vm/sfmmu.c +++ b/usr/src/uts/sun4/vm/sfmmu.c @@ -152,7 +152,7 @@ va_to_pa(void *vaddr) if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) return ((uint64_t)-1); return (((uint64_t)pfn << MMU_PAGESHIFT) | - ((uint64_t)vaddr & MMU_PAGEOFFSET)); + ((uint64_t)vaddr & MMU_PAGEOFFSET)); } void @@ -195,7 +195,7 @@ hat_kern_setup(void) } } - if (!shctx_on || disable_shctx) { + if (!shctx_on) { sfmmu_patch_shctx(); } @@ -891,7 +891,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) if (enable_bigktsb) { ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) * sizeof (struct hmehash_bucket) <= - TSB_BYTES(TSB_1M_SZCODE)); + TSB_BYTES(TSB_1M_SZCODE)); max_nucuhme_buckets *= 2; max_nuckhme_buckets *= 2; @@ -906,7 +906,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) * physical memory only. */ hme_buckets = (npages * HMEHASH_FACTOR) / - (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); + (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); @@ -926,7 +926,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); if ((khmehash_num > max_nuckhme_buckets) || - (uhmehash_num > max_nucuhme_buckets)) { + (uhmehash_num > max_nucuhme_buckets)) { khme_hash = NULL; uhme_hash = NULL; } else { diff --git a/usr/src/uts/sun4u/vm/mach_kpm.c b/usr/src/uts/sun4u/vm/mach_kpm.c index e39c3a26d2..85ae7d50ad 100644 --- a/usr/src/uts/sun4u/vm/mach_kpm.c +++ b/usr/src/uts/sun4u/vm/mach_kpm.c @@ -1618,6 +1618,8 @@ sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr) */ for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { tmphme = sfhmep->hme_next; + if (IS_PAHME(sfhmep)) + continue; hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; @@ -1641,6 +1643,8 @@ sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr) for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { tmphme = sfhmep->hme_next; + if (IS_PAHME(sfhmep)) + continue; hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c index e1b12ce660..6d1ebfefdf 100644 --- a/usr/src/uts/sun4v/os/fillsysinfo.c +++ b/usr/src/uts/sun4v/os/fillsysinfo.c @@ -521,9 +521,7 @@ get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie) } /* - * Get the number of shared contexts from MD. This property more accurately - * describes the total number of contexts available, not just "shared contexts". - * If absent the default value is 1, + * Get the number of shared contexts from MD. If absent the default value is 0. * */ static uint64_t diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h index 697f31d700..0f520831d5 100644 --- a/usr/src/uts/sun4v/sys/mmu.h +++ b/usr/src/uts/sun4v/sys/mmu.h @@ -152,8 +152,8 @@ extern "C" { #define MAX_NCTXS (1ull << MAX_NCTXS_BITS) /* - * MIN_NCONTEXTS and MIN_NTSBS are the minimum number of contexts and tsbs - * necessary for shared context support. + * MIN_NSHCONTEXTS and MIN_NTSBS are the minimum number of shared contexts + * and tsbs necessary for shared context support. */ #define MIN_NSHCONTEXTS 1 #define MIN_NTSBS 4 diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.c b/usr/src/uts/sun4v/vm/mach_sfmmu.c index 52e69bceec..62a8a59da2 100644 --- a/usr/src/uts/sun4v/vm/mach_sfmmu.c +++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c @@ -322,7 +322,31 @@ sfmmu_clear_utsbinfo() } /* - * Set machine specific TSB information + * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu + * list to the order of tsbs in the tsb descriptor array passed to the hv, which + * is the search order used during Hardware Table Walk. + * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. + * + * The order of tsbs in the sfmmu list will be as follows: + * + * 0 8K - 512K private TSB + * 1 4M - 256M private TSB + * 2 8K - 512K shared TSB + * 3 4M - 256M shared TSB + * + * Shared TSBs are only used if a process is part of an SCD. + * + * So, e.g. tsbord[3] = 1; + * corresponds to searching the shared 4M TSB second. + * + * The search order is selected so that the 8K-512K private TSB is always first. + * Currently shared context is not expected to map many 8K-512K pages that cause + * TLB misses so we order the shared TSB for 4M-256M pages in front of the + * shared TSB for 8K-512K pages. We also expect more TLB misses against private + * context mappings than shared context mappings and place private TSBs ahead of + * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can + * be used to change the default ordering of private and shared TSBs for + * 4M-256M pages. */ void sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) |