summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorpaulsan <none@none>2007-09-17 15:08:19 -0700
committerpaulsan <none@none>2007-09-17 15:08:19 -0700
commit7dacfc4494f6d14358974ef2830b5cd8c66a84de (patch)
tree36b58448c9c8410d22f977898985778d043e1bd4 /usr/src
parent538414560648ad8b2b92897b55592cda64706375 (diff)
downloadillumos-gate-7dacfc4494f6d14358974ef2830b5cd8c66a84de.tar.gz
6597746 Don't create srd_cache, region_cache and scd_cache on systems that don't use shared context
6597751 SFMMU_HASH_UNLOCK() call is missing in hat_unlock_region() 6600745 shared context code needs to be cleaned up 6603269 assertion failed: sfhme->hme_tte.ll != 0, file: ../../sfmmu/vm/hat_sfmmu.c, line: 11613
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/vm/seg_vn.c8
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c10
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.c204
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.h24
-rw-r--r--usr/src/uts/sun4/vm/sfmmu.c10
-rw-r--r--usr/src/uts/sun4u/vm/mach_kpm.c4
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c4
-rw-r--r--usr/src/uts/sun4v/sys/mmu.h4
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu.c26
9 files changed, 169 insertions, 125 deletions
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 6234d01bac..5f7689ab08 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -1628,8 +1628,8 @@ out:
/*
- * callback function used by segvn_unmap to invoke free_vp_pages() for only
- * those pages actually processed by the HAT
+ * callback function to invoke free_vp_pages() for only those pages actually
+ * processed by the HAT when a shared region is destroyed.
*/
extern int free_pages;
@@ -1657,6 +1657,10 @@ segvn_hat_rgn_unload_callback(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr,
free_vp_pages(vp, off, len);
}
+/*
+ * callback function used by segvn_unmap to invoke free_vp_pages() for only
+ * those pages actually processed by the HAT
+ */
static void
segvn_hat_unload_callback(hat_callback_t *cb)
{
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 979778d8b5..53e42e74e4 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -1722,7 +1722,7 @@ hat_unlock(hat_t *hat, caddr_t addr, size_t len)
/* ARGSUSED */
void
-hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len,
+hat_unlock_region(struct hat *hat, caddr_t addr, size_t len,
hat_region_cookie_t rcookie)
{
panic("No shared region support on x86");
@@ -3877,13 +3877,13 @@ hati_update_pte(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new)
/* ARGSUSED */
void
-hat_join_srd(struct hat *sfmmup, vnode_t *evp)
+hat_join_srd(struct hat *hat, vnode_t *evp)
{
}
/* ARGSUSED */
hat_region_cookie_t
-hat_join_region(struct hat *sfmmup,
+hat_join_region(struct hat *hat,
caddr_t r_saddr,
size_t r_size,
void *r_obj,
@@ -3899,14 +3899,14 @@ hat_join_region(struct hat *sfmmup,
/* ARGSUSED */
void
-hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags)
+hat_leave_region(struct hat *hat, hat_region_cookie_t rcookie, uint_t flags)
{
panic("No shared region support on x86");
}
/* ARGSUSED */
void
-hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie)
+hat_dup_region(struct hat *hat, hat_region_cookie_t rcookie)
{
panic("No shared region support on x86");
}
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
index eeb44a0dd5..e4727578c0 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
@@ -823,10 +823,7 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag)
* highbit() - 1) to get the size code for the smallest TSB that can represent
* all of physical memory, while erring on the side of too much.
*
- * If the computed size code is less than the current tsb_max_growsize, we set
- * tsb_max_growsize to the computed size code. In the case where the computed
- * size code is greater than tsb_max_growsize, we have these restrictions that
- * apply to increasing tsb_max_growsize:
+ * Restrict tsb_max_growsize to make sure that:
* 1) TSBs can't grow larger than the TSB slab size
* 2) TSBs can't grow larger than UTSB_MAX_SZCODE.
*/
@@ -1381,25 +1378,33 @@ hat_init(void)
mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
- srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
- sizeof (srd_buckets[0]), KM_SLEEP);
- for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) {
- mutex_init(&srd_buckets[i].srdb_lock, NULL, MUTEX_DEFAULT,
- NULL);
- }
/*
- * 64 byte alignment is required in order to isolate certain field
- * into its own cacheline.
+ * If Shared context support is disabled via /etc/system
+ * set shctx_on to 0 here if it was set to 1 earlier in boot
+ * sequence by cpu module initialization code.
*/
- srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), 64,
- sfmmu_srdcache_constructor, sfmmu_srdcache_destructor,
- NULL, NULL, NULL, 0);
- region_cache = kmem_cache_create("region_cache",
- sizeof (sf_region_t), 0, sfmmu_rgncache_constructor,
- sfmmu_rgncache_destructor, NULL, NULL, NULL, 0);
- scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), 0,
- sfmmu_scdcache_constructor, sfmmu_scdcache_destructor,
- NULL, NULL, NULL, 0);
+ if (shctx_on && disable_shctx) {
+ shctx_on = 0;
+ }
+
+ if (shctx_on) {
+ srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
+ sizeof (srd_buckets[0]), KM_SLEEP);
+ for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) {
+ mutex_init(&srd_buckets[i].srdb_lock, NULL,
+ MUTEX_DEFAULT, NULL);
+ }
+
+ srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t),
+ 0, sfmmu_srdcache_constructor, sfmmu_srdcache_destructor,
+ NULL, NULL, NULL, 0);
+ region_cache = kmem_cache_create("region_cache",
+ sizeof (sf_region_t), 0, sfmmu_rgncache_constructor,
+ sfmmu_rgncache_destructor, NULL, NULL, NULL, 0);
+ scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t),
+ 0, sfmmu_scdcache_constructor, sfmmu_scdcache_destructor,
+ NULL, NULL, NULL, 0);
+ }
/*
* Pre-allocate hrm_hashtab before enabling the collection of
@@ -3716,7 +3721,8 @@ sfmmu_rgn_cb_noop(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr,
}
/*
- * update *eaddrp only if hmeblk was unloaded.
+ * Searches for an hmeblk which maps addr, then unloads this mapping
+ * and updates *eaddrp, if the hmeblk is found.
*/
static void
sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr,
@@ -3760,13 +3766,6 @@ sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr,
sfmmu_hblks_list_purge(&list);
}
-/*
- * This routine can be optimized to eliminate scanning areas of smaller page
- * size bitmaps when a corresponding bit is set in the bitmap for a bigger
- * page size. For now assume the region will usually only have the primary
- * size mappings so we'll scan only one bitmap anyway by checking rgn_hmeflags
- * first.
- */
static void
sfmmu_unload_hmeregion(sf_srd_t *srdp, sf_region_t *rgnp)
{
@@ -3972,6 +3971,7 @@ hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len,
HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa,
pr_hblk, prevpa, &list);
if (hmeblkp == NULL) {
+ SFMMU_HASH_UNLOCK(hmebp);
ttesz--;
continue;
}
@@ -7407,9 +7407,11 @@ retry:
* from the list.
*/
tmphme = sfhme->hme_next;
+ if (IS_PAHME(sfhme))
+ continue;
/*
* If we are looking for large mappings and this hme doesn't
- * reach the range we are seeking, just ignore its.
+ * reach the range we are seeking, just ignore it.
*/
hmeblkp = sfmmu_hmetohblk(sfhme);
if (hmeblkp->hblk_xhat_bit)
@@ -7917,7 +7919,7 @@ hat_getkpfnum(caddr_t addr)
}
/*
- * This routine will return both pfn and tte for the addr.
+ * This routine will return both pfn and tte for the vaddr.
*/
static pfn_t
sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup, tte_t *ttep)
@@ -8129,7 +8131,7 @@ hat_page_getshare(page_t *pp)
}
/*
- * Return 1 the number of mappings exceeds sh_thresh. Return 0
+ * Return 1 if the number of mappings exceeds sh_thresh. Return 0
* otherwise. Count shared hmeblks by region's refcnt.
*/
int
@@ -8156,10 +8158,23 @@ hat_page_checkshare(page_t *pp, ulong_t sh_thresh)
again:
for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
tmphme = sfhme->hme_next;
- if (hme_size(sfhme) != sz) {
+ if (IS_PAHME(sfhme)) {
continue;
}
+
hmeblkp = sfmmu_hmetohblk(sfhme);
+ if (hmeblkp->hblk_xhat_bit) {
+ cnt++;
+ if (cnt > sh_thresh) {
+ sfmmu_mlist_exit(pml);
+ return (1);
+ }
+ continue;
+ }
+ if (hme_size(sfhme) != sz) {
+ continue;
+ }
+
if (hmeblkp->hblk_shared) {
sf_srd_t *srdp = hblktosrd(hmeblkp);
uint_t rid = hmeblkp->hblk_tag.htag_rid;
@@ -8238,6 +8253,7 @@ hat_page_demote(page_t *pp)
ASSERT(PAGE_EXCL(pp));
ASSERT(!PP_ISFREE(pp));
+ ASSERT(!PP_ISKAS(pp));
ASSERT(page_szc_lock_assert(pp));
pml = sfmmu_mlist_enter(pp);
@@ -8264,6 +8280,7 @@ hat_page_demote(page_t *pp)
rootpp = PP_GROUPLEADER(pp, sz);
for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
tmphme = sfhme->hme_next;
+ ASSERT(!IS_PAHME(sfhme));
hmeblkp = sfmmu_hmetohblk(sfhme);
if (hme_size(sfhme) != sz) {
continue;
@@ -8649,10 +8666,7 @@ hat_share(struct hat *sfmmup, caddr_t addr,
* sfmmu_check_page_sizes at the end of this routine.
*/
old_scdp = sfmmup->sfmmu_scdp;
- /*
- * Call hat_join_region without the hat lock, because it's
- * used in hat_join_region.
- */
+
rcookie = hat_join_region(sfmmup, addr, len, (void *)ism_hatid, 0,
PROT_ALL, ismszc, NULL, HAT_REGION_ISM);
if (rcookie != HAT_INVALID_REGION_COOKIE) {
@@ -8810,8 +8824,8 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
/*
* After hat_leave_region, the sfmmup may leave SCD,
- * in which case, we want to grow the private tsb size
- * when call sfmmu_check_page_sizes at the end of the routine.
+ * in which case, we want to grow the private tsb size when
+ * calling sfmmu_check_page_sizes at the end of the routine.
*/
old_scdp = sfmmup->sfmmu_scdp;
/*
@@ -9195,6 +9209,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp)
*/
for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
tmphme = sfhmep->hme_next;
+ if (IS_PAHME(sfhmep))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
@@ -9221,6 +9237,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp)
for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
tmphme = sfhmep->hme_next;
+ if (IS_PAHME(sfhmep))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
@@ -9368,6 +9386,8 @@ tst_tnc(page_t *pp, pgcnt_t npages)
}
for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
+ if (IS_PAHME(sfhme))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhme);
if (hmeblkp->hblk_xhat_bit)
continue;
@@ -9514,6 +9534,8 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
+ if (IS_PAHME(sfhme))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhme);
if (hmeblkp->hblk_xhat_bit)
@@ -10064,7 +10086,10 @@ sfmmu_reprog_pgsz_arr(sfmmu_t *sfmmup, uint8_t *tmp_pgsz)
sfmmu_hat_exit(hatlockp);
}
-/* Update scd_rttecnt for shme rgns in the SCD */
+/*
+ * The scd_rttecnt field in the SCD must be updated to take account of the
+ * regions which it contains.
+ */
static void
sfmmu_set_scd_rttecnt(sf_srd_t *srdp, sf_scd_t *scdp)
{
@@ -11030,6 +11055,13 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
size, flags);
}
} else if (SFMMU_IS_SHMERID_VALID(rid)) {
+ /*
+ * Shared hmes use per region bitmaps in rgn_hmeflag
+ * rather than shadow hmeblks to keep track of the
+ * mapping sizes which have been allocated for the region.
+ * Here we cleanup old invalid hmeblks with this rid,
+ * which may be left around by pageunload().
+ */
int ttesz;
caddr_t va;
caddr_t eva = vaddr + TTEBYTES(size);
@@ -11317,11 +11349,6 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
hmeblkp->hblk_nextpa = hblkpa;
hmeblkp->hblk_shw_bit = 0;
- /*
- * Clear ttebit map in the region this hmeblk belongs to. The region
- * must exist as long as any of its hmeblks exist. This invariant
- * holds because before region is freed all its hmeblks are removed.
- */
if (hmeblkp->hblk_shared) {
sf_srd_t *srdp;
sf_region_t *rgnp;
@@ -11334,7 +11361,6 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
ASSERT(rid < SFMMU_MAX_HME_REGIONS);
rgnp = srdp->srd_hmergnp[rid];
ASSERT(rgnp != NULL);
- vaddr = (caddr_t)get_hblk_base(hmeblkp);
SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
hmeblkp->hblk_shared = 0;
}
@@ -11572,11 +11598,6 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
*/
hmeblkp->hblk_shw_bit = 0;
- /*
- * Clear ttebit map in the region this hmeblk belongs to. The region
- * must exist as long as any of its hmeblks exist. This invariant
- * holds because before region is freed all its hmeblks are removed.
- */
if (hmeblkp->hblk_shared) {
sf_srd_t *srdp;
sf_region_t *rgnp;
@@ -11589,7 +11610,6 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
ASSERT(rid < SFMMU_MAX_HME_REGIONS);
rgnp = srdp->srd_hmergnp[rid];
ASSERT(rgnp != NULL);
- vaddr = (caddr_t)get_hblk_base(hmeblkp);
SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
hmeblkp->hblk_shared = 0;
}
@@ -12087,6 +12107,12 @@ next:
return (rcpuset);
}
+/*
+ * This routine takes an sfmmu pointer and the va for an adddress in an
+ * ISM region as input and returns the corresponding region id in ism_rid.
+ * The return value of 1 indicates that a region has been found and ism_rid
+ * is valid, otherwise 0 is returned.
+ */
static int
find_ism_rid(sfmmu_t *sfmmup, sfmmu_t *ism_sfmmup, caddr_t va, uint_t *ism_rid)
{
@@ -13321,6 +13347,8 @@ sfmmu_rm_large_mappings(page_t *pp, int ttesz)
*/
for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
+ if (IS_PAHME(sfhmep))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
@@ -13357,7 +13385,7 @@ hat_supported(enum hat_features feature, void *arg)
case HAT_VMODSORT:
return (1);
case HAT_SHARED_REGIONS:
- if (!disable_shctx && shctx_on)
+ if (shctx_on)
return (1);
else
return (0);
@@ -13679,7 +13707,7 @@ hat_join_srd(struct hat *sfmmup, vnode_t *evp)
ASSERT(sfmmup != ksfmmup);
ASSERT(sfmmup->sfmmu_srdp == NULL);
- if (disable_shctx || !shctx_on) {
+ if (!shctx_on) {
return;
}
@@ -13901,7 +13929,6 @@ hat_join_region(struct hat *sfmmup,
sf_region_t **rarrp;
uint16_t *busyrgnsp;
ulong_t rttecnt;
- int rkmalloc = 0;
uchar_t tteflag;
uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
int text = (r_type == HAT_REGION_TEXT);
@@ -14088,27 +14115,22 @@ rfound:
ASSERT(MUTEX_HELD(&srdp->srd_mutex));
if (*freelistp != NULL) {
- new_rgnp = *freelistp;
- *freelistp = new_rgnp->rgn_next;
- ASSERT(new_rgnp->rgn_id < *nextidp);
- ASSERT(new_rgnp->rgn_id < maxids);
- ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
- ASSERT((new_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK)
+ rgnp = *freelistp;
+ *freelistp = rgnp->rgn_next;
+ ASSERT(rgnp->rgn_id < *nextidp);
+ ASSERT(rgnp->rgn_id < maxids);
+ ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE);
+ ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK)
== r_type);
- ASSERT(rarrp[new_rgnp->rgn_id] == new_rgnp);
-
- ASSERT(new_rgnp->rgn_hmeflags == 0);
- }
-
- if (new_rgnp == NULL) {
+ ASSERT(rarrp[rgnp->rgn_id] == rgnp);
+ ASSERT(rgnp->rgn_hmeflags == 0);
+ } else {
/*
* release local locks before memory allocation.
*/
mutex_exit(&srdp->srd_mutex);
- if (new_rgnp == NULL) {
- rkmalloc = 1;
- new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP);
- }
+
+ new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP);
mutex_enter(&srdp->srd_mutex);
for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
@@ -14123,34 +14145,19 @@ rfound:
}
}
if (rgnp != NULL) {
- if (!rkmalloc) {
- ASSERT(new_rgnp->rgn_flags &
- SFMMU_REGION_FREE);
- new_rgnp->rgn_next = *freelistp;
- *freelistp = new_rgnp;
- new_rgnp = NULL;
- }
goto rfound;
}
- if (rkmalloc) {
- if (*nextidp >= maxids) {
- mutex_exit(&srdp->srd_mutex);
- goto fail;
- }
- rgnp = new_rgnp;
- new_rgnp = NULL;
- rgnp->rgn_id = (*nextidp)++;
- ASSERT(rgnp->rgn_id < maxids);
- ASSERT(rarrp[rgnp->rgn_id] == NULL);
- rarrp[rgnp->rgn_id] = rgnp;
- } else {
- rgnp = new_rgnp;
- new_rgnp = NULL;
+ if (*nextidp >= maxids) {
+ mutex_exit(&srdp->srd_mutex);
+ goto fail;
}
- } else {
rgnp = new_rgnp;
new_rgnp = NULL;
+ rgnp->rgn_id = (*nextidp)++;
+ ASSERT(rgnp->rgn_id < maxids);
+ ASSERT(rarrp[rgnp->rgn_id] == NULL);
+ rarrp[rgnp->rgn_id] = rgnp;
}
ASSERT(rgnp->rgn_sfmmu_head == NULL);
@@ -14177,14 +14184,7 @@ rfound:
fail:
ASSERT(new_rgnp != NULL);
- if (rkmalloc) {
- kmem_cache_free(region_cache, new_rgnp);
- } else {
- /* put it back on the free list. */
- ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
- new_rgnp->rgn_next = *freelistp;
- *freelistp = new_rgnp;
- }
+ kmem_cache_free(region_cache, new_rgnp);
return (HAT_INVALID_REGION_COOKIE);
}
@@ -14543,7 +14543,7 @@ check_scd_sfmmu_list(sfmmu_t **headp, sfmmu_t *sfmmup, int onlist)
#endif /* DEBUG */
/*
- * Removes an sfmmu from the start of the queue.
+ * Removes an sfmmu from the SCD sfmmu list.
*/
static void
sfmmu_from_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup)
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
index 2dc7183d85..16ea7bcfae 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
@@ -426,9 +426,9 @@ typedef struct sf_shared_region_domain {
sf_region_t *srd_hmergnfree;
/* pointer to the next free ism region */
sf_region_t *srd_ismrgnfree;
- /* id of next ism rgn created */
+ /* id of next ism region created */
uint16_t srd_next_ismrid;
- /* pointer of next hme region created */
+ /* id of next hme region created */
uint16_t srd_next_hmerid;
uint16_t srd_ismbusyrgns; /* # of ism rgns in use */
uint16_t srd_hmebusyrgns; /* # of hme rgns in use */
@@ -468,6 +468,8 @@ typedef struct sf_srd_bucket {
* This macro grabs hat lock and allocates level 2 hat chain
* associated with a shme rgn. In the majority of cases, the macro
* is called with alloc = 0, and lock = 0.
+ * A pointer to the level 2 sf_rgn_link_t structure is returned in the lnkp
+ * parameter.
*/
#define SFMMU_HMERID2RLINKP(sfmmup, rid, lnkp, alloc, lock) \
{ \
@@ -619,8 +621,23 @@ typedef struct sfmmu_ctx {
* tte counts should be protected by cas.
* cpuset is protected by cas.
*
+ * ttecnt accounting for mappings which do not use shared hme is carried out
+ * during pagefault handling. In the shared hme case, only the first process
+ * to access a mapping generates a pagefault, subsequent processes simply
+ * find the shared hme entry during trap handling and therefore there is no
+ * corresponding event to initiate ttecnt accounting. Currently, as shared
+ * hmes are only used for text segments, when joining a region we assume the
+ * worst case and add the the number of ttes required to map the entire region
+ * to the ttecnt corresponding to the region pagesize. However, if the region
+ * has a 4M pagesize, and memory is low, the allocation of 4M pages may fail
+ * then 8K pages will be allocated instead and the first TSB which stores 8K
+ * mappings will potentially be undersized. To compensate for the potential
+ * underaccounting in this case we always add 1/4 of the region size to the 8K
+ * ttecnt.
+ *
* Note that sfmmu_xhat_provider MUST be the first element.
*/
+
struct hat {
void *sfmmu_xhat_provider; /* NULL for CPU hat */
cpuset_t sfmmu_cpusran; /* cpu bit mask for efficient xcalls */
@@ -704,9 +721,6 @@ struct sf_scd {
#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map
#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map
-#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map
-#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map
-
extern int disable_shctx;
extern int shctx_on;
diff --git a/usr/src/uts/sun4/vm/sfmmu.c b/usr/src/uts/sun4/vm/sfmmu.c
index 99d2428c4f..78247431a3 100644
--- a/usr/src/uts/sun4/vm/sfmmu.c
+++ b/usr/src/uts/sun4/vm/sfmmu.c
@@ -152,7 +152,7 @@ va_to_pa(void *vaddr)
if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID)
return ((uint64_t)-1);
return (((uint64_t)pfn << MMU_PAGESHIFT) |
- ((uint64_t)vaddr & MMU_PAGEOFFSET));
+ ((uint64_t)vaddr & MMU_PAGEOFFSET));
}
void
@@ -195,7 +195,7 @@ hat_kern_setup(void)
}
}
- if (!shctx_on || disable_shctx) {
+ if (!shctx_on) {
sfmmu_patch_shctx();
}
@@ -891,7 +891,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages)
if (enable_bigktsb) {
ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) *
sizeof (struct hmehash_bucket) <=
- TSB_BYTES(TSB_1M_SZCODE));
+ TSB_BYTES(TSB_1M_SZCODE));
max_nucuhme_buckets *= 2;
max_nuckhme_buckets *= 2;
@@ -906,7 +906,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages)
* physical memory only.
*/
hme_buckets = (npages * HMEHASH_FACTOR) /
- (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
+ (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT));
uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS);
@@ -926,7 +926,7 @@ ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages)
khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS);
if ((khmehash_num > max_nuckhme_buckets) ||
- (uhmehash_num > max_nucuhme_buckets)) {
+ (uhmehash_num > max_nucuhme_buckets)) {
khme_hash = NULL;
uhme_hash = NULL;
} else {
diff --git a/usr/src/uts/sun4u/vm/mach_kpm.c b/usr/src/uts/sun4u/vm/mach_kpm.c
index e39c3a26d2..85ae7d50ad 100644
--- a/usr/src/uts/sun4u/vm/mach_kpm.c
+++ b/usr/src/uts/sun4u/vm/mach_kpm.c
@@ -1618,6 +1618,8 @@ sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
*/
for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
tmphme = sfhmep->hme_next;
+ if (IS_PAHME(sfhmep))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
@@ -1641,6 +1643,8 @@ sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
tmphme = sfhmep->hme_next;
+ if (IS_PAHME(sfhmep))
+ continue;
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index e1b12ce660..6d1ebfefdf 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -521,9 +521,7 @@ get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie)
}
/*
- * Get the number of shared contexts from MD. This property more accurately
- * describes the total number of contexts available, not just "shared contexts".
- * If absent the default value is 1,
+ * Get the number of shared contexts from MD. If absent the default value is 0.
*
*/
static uint64_t
diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h
index 697f31d700..0f520831d5 100644
--- a/usr/src/uts/sun4v/sys/mmu.h
+++ b/usr/src/uts/sun4v/sys/mmu.h
@@ -152,8 +152,8 @@ extern "C" {
#define MAX_NCTXS (1ull << MAX_NCTXS_BITS)
/*
- * MIN_NCONTEXTS and MIN_NTSBS are the minimum number of contexts and tsbs
- * necessary for shared context support.
+ * MIN_NSHCONTEXTS and MIN_NTSBS are the minimum number of shared contexts
+ * and tsbs necessary for shared context support.
*/
#define MIN_NSHCONTEXTS 1
#define MIN_NTSBS 4
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.c b/usr/src/uts/sun4v/vm/mach_sfmmu.c
index 52e69bceec..62a8a59da2 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.c
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c
@@ -322,7 +322,31 @@ sfmmu_clear_utsbinfo()
}
/*
- * Set machine specific TSB information
+ * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu
+ * list to the order of tsbs in the tsb descriptor array passed to the hv, which
+ * is the search order used during Hardware Table Walk.
+ * So, the tsb with index i in the sfmmu list will have search order tsbord[i].
+ *
+ * The order of tsbs in the sfmmu list will be as follows:
+ *
+ * 0 8K - 512K private TSB
+ * 1 4M - 256M private TSB
+ * 2 8K - 512K shared TSB
+ * 3 4M - 256M shared TSB
+ *
+ * Shared TSBs are only used if a process is part of an SCD.
+ *
+ * So, e.g. tsbord[3] = 1;
+ * corresponds to searching the shared 4M TSB second.
+ *
+ * The search order is selected so that the 8K-512K private TSB is always first.
+ * Currently shared context is not expected to map many 8K-512K pages that cause
+ * TLB misses so we order the shared TSB for 4M-256M pages in front of the
+ * shared TSB for 8K-512K pages. We also expect more TLB misses against private
+ * context mappings than shared context mappings and place private TSBs ahead of
+ * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can
+ * be used to change the default ordering of private and shared TSBs for
+ * 4M-256M pages.
*/
void
sfmmu_setup_tsbinfo(sfmmu_t *sfmmup)