summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorpaulsan <none@none>2007-06-24 05:26:50 -0700
committerpaulsan <none@none>2007-06-24 05:26:50 -0700
commit05d3dc4b6755c54754109ffbe7e792f4e5b7c7c9 (patch)
treeae209198d4e61ebc1c922cacdc02a3614dca107b /usr
parent8654d0253136055bd4cc2423d87378e8a37f2eb5 (diff)
downloadillumos-joyent-05d3dc4b6755c54754109ffbe7e792f4e5b7c7c9.tar.gz
PSARC 2006/266 Shared Context Support
PSARC 2006/267 Shared Region HME Block support 6388600 do_virtual_coloring checks should be expunged from sfmmu 6449192 Integrate support for MMU Shared Contexts 6449195 Integrate support for Shared Region HME Blocks
Diffstat (limited to 'usr')
-rw-r--r--usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs11
-rw-r--r--usr/src/uts/common/os/exec.c2
-rw-r--r--usr/src/uts/common/os/mem_cage.c4
-rw-r--r--usr/src/uts/common/os/vm_pageout.c2
-rw-r--r--usr/src/uts/common/sys/exec.h1
-rw-r--r--usr/src/uts/common/vm/hat.h36
-rw-r--r--usr/src/uts/common/vm/seg_spt.c6
-rw-r--r--usr/src/uts/common/vm/seg_vn.c382
-rw-r--r--usr/src/uts/common/vm/seg_vn.h1
-rw-r--r--usr/src/uts/common/vm/vm_as.c1
-rw-r--r--usr/src/uts/common/vm/vm_page.c2
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c75
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_asm.s691
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_kdi.s10
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.c4251
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.h531
-rw-r--r--usr/src/uts/sun4/ml/offsets.in1
-rw-r--r--usr/src/uts/sun4/ml/swtch.s27
-rw-r--r--usr/src/uts/sun4/os/startup.c96
-rw-r--r--usr/src/uts/sun4/vm/sfmmu.c9
-rw-r--r--usr/src/uts/sun4/vm/vm_dep.c7
-rw-r--r--usr/src/uts/sun4u/cpu/opl_olympus.c11
-rw-r--r--usr/src/uts/sun4u/cpu/spitfire.c4
-rw-r--r--usr/src/uts/sun4u/cpu/us3_common.c9
-rw-r--r--usr/src/uts/sun4u/cpu/us3_common_mmu.c51
-rw-r--r--usr/src/uts/sun4u/ml/mach_offsets.in25
-rw-r--r--usr/src/uts/sun4u/ml/trap_table.s71
-rw-r--r--usr/src/uts/sun4u/sys/machsystm.h2
-rw-r--r--usr/src/uts/sun4u/vm/mach_sfmmu.h21
-rw-r--r--usr/src/uts/sun4u/vm/mach_sfmmu_asm.s8
-rw-r--r--usr/src/uts/sun4v/cpu/niagara.c2
-rw-r--r--usr/src/uts/sun4v/ml/mach_interrupt.s60
-rw-r--r--usr/src/uts/sun4v/ml/mach_locore.s14
-rw-r--r--usr/src/uts/sun4v/ml/mach_offsets.in36
-rw-r--r--usr/src/uts/sun4v/ml/trap_table.s30
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c56
-rw-r--r--usr/src/uts/sun4v/sys/machparam.h1
-rw-r--r--usr/src/uts/sun4v/sys/machsystm.h2
-rw-r--r--usr/src/uts/sun4v/sys/mmu.h12
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu.c145
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu.h173
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu_asm.s157
42 files changed, 5909 insertions, 1127 deletions
diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
index d4e71d41ff..d35a72689b 100644
--- a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
+++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -489,6 +489,10 @@ save_sfmmu_global_stat(HV *self, kstat_t *kp, int strip_str)
SAVE_INT32(self, sfmmugp, sf_tsb_alloc);
SAVE_INT32(self, sfmmugp, sf_tsb_allocfail);
SAVE_INT32(self, sfmmugp, sf_tsb_sectsb_create);
+ SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_alloc);
+ SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_alloc);
+ SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_allocfail);
+ SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_allocfail);
SAVE_INT32(self, sfmmugp, sf_tteload8k);
SAVE_INT32(self, sfmmugp, sf_tteload64k);
SAVE_INT32(self, sfmmugp, sf_tteload512k);
@@ -530,6 +534,11 @@ save_sfmmu_global_stat(HV *self, kstat_t *kp, int strip_str)
SAVE_INT32(self, sfmmugp, sf_user_vtop);
SAVE_INT32(self, sfmmugp, sf_ctx_inv);
SAVE_INT32(self, sfmmugp, sf_tlb_reprog_pgsz);
+ SAVE_INT32(self, sfmmugp, sf_region_remap_demap);
+ SAVE_INT32(self, sfmmugp, sf_create_scd);
+ SAVE_INT32(self, sfmmugp, sf_join_scd);
+ SAVE_INT32(self, sfmmugp, sf_leave_scd);
+ SAVE_INT32(self, sfmmugp, sf_destroy_scd);
}
#endif
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index a17678863b..652a01c34c 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -623,6 +623,7 @@ gexec(
args->stk_prot &= ~PROT_EXEC;
args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */
+ args->ex_vp = vp;
/*
* Traditionally, the setid flags told the sub processes whether
@@ -1819,6 +1820,7 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
if (p->p_model == DATAMODEL_ILP32)
as->a_userlimit = (caddr_t)USERLIMIT32;
(void) hat_setup(as->a_hat, HAT_ALLOC);
+ hat_join_srd(as->a_hat, args->ex_vp);
/*
* Finally, write out the contents of the new stack.
diff --git a/usr/src/uts/common/os/mem_cage.c b/usr/src/uts/common/os/mem_cage.c
index 97f4cce08c..beb2fe3cbe 100644
--- a/usr/src/uts/common/os/mem_cage.c
+++ b/usr/src/uts/common/os/mem_cage.c
@@ -1676,7 +1676,7 @@ kcage_cageout()
int last_pass;
int pages_skipped;
int shared_skipped;
- uint_t shared_level = 8;
+ ulong_t shared_level = 8;
pgcnt_t nfreed;
#ifdef KCAGE_STATS
clock_t scan_start;
@@ -1807,7 +1807,7 @@ again:
}
KCAGE_STAT_SET_SCAN(kt_skiplevel, shared_level);
- if (hat_page_getshare(pp) > shared_level) {
+ if (hat_page_checkshare(pp, shared_level)) {
page_unlock(pp);
pages_skipped = 1;
shared_skipped = 1;
diff --git a/usr/src/uts/common/os/vm_pageout.c b/usr/src/uts/common/os/vm_pageout.c
index 6a514e0174..e5c80e9bfd 100644
--- a/usr/src/uts/common/os/vm_pageout.c
+++ b/usr/src/uts/common/os/vm_pageout.c
@@ -950,7 +950,7 @@ checkpage(struct page *pp, int whichhand)
*/
top:
if ((PP_ISKAS(pp)) || (PP_ISFREE(pp)) ||
- (hat_page_getshare(pp) > po_share) || PAGE_LOCKED(pp)) {
+ hat_page_checkshare(pp, po_share) || PAGE_LOCKED(pp)) {
return (-1);
}
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index d1243a926f..a7ebf7dbb0 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -104,6 +104,7 @@ typedef struct uarg {
struct execsw *execswp;
uintptr_t entry;
uintptr_t thrptr;
+ vnode_t *ex_vp;
char *emulator;
char *brandname;
char *auxp_brand_phdr; /* addr of brand phdr auxv on user stack */
diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h
index 6192e1aacb..f19b3f59c6 100644
--- a/usr/src/uts/common/vm/hat.h
+++ b/usr/src/uts/common/vm/hat.h
@@ -80,6 +80,8 @@ typedef struct hat_callback {
void *hcb_data;
} hat_callback_t;
+typedef void *hat_region_cookie_t;
+
#ifdef _KERNEL
/*
@@ -205,9 +207,16 @@ void hat_thread_exit(kthread_t *);
void hat_memload(struct hat *, caddr_t, struct page *, uint_t, uint_t);
void hat_memload_array(struct hat *, caddr_t, size_t, struct page **,
uint_t, uint_t);
+void hat_memload_region(struct hat *, caddr_t, struct page *, uint_t,
+ uint_t, hat_region_cookie_t);
+void hat_memload_array_region(struct hat *, caddr_t, size_t, struct page **,
+ uint_t, uint_t, hat_region_cookie_t);
void hat_devload(struct hat *, caddr_t, size_t, pfn_t, uint_t, int);
+
void hat_unlock(struct hat *, caddr_t, size_t);
+void hat_unlock_region(struct hat *, caddr_t, size_t, hat_region_cookie_t);
+
void hat_unload(struct hat *, caddr_t, size_t, uint_t);
void hat_unload_callback(struct hat *, caddr_t, size_t, uint_t,
hat_callback_t *);
@@ -293,6 +302,7 @@ uint_t hat_page_getattr(struct page *, uint_t);
int hat_pageunload(struct page *, uint_t);
uint_t hat_pagesync(struct page *, uint_t);
ulong_t hat_page_getshare(struct page *);
+int hat_page_checkshare(struct page *, ulong_t);
faultcode_t hat_softlock(struct hat *, caddr_t, size_t *,
struct page **, uint_t);
void hat_page_demote(struct page *);
@@ -303,7 +313,8 @@ void hat_page_demote(struct page *);
enum hat_features {
HAT_SHARED_PT, /* Shared page tables */
HAT_DYNAMIC_ISM_UNMAP, /* hat_pageunload() handles ISM pages */
- HAT_VMODSORT /* support for VMODSORT flag of vnode */
+ HAT_VMODSORT, /* support for VMODSORT flag of vnode */
+ HAT_SHARED_REGIONS /* shared regions support */
};
int hat_supported(enum hat_features, void *);
@@ -445,6 +456,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t);
*/
#define HAT_DUP_ALL 1
#define HAT_DUP_COW 2
+#define HAT_DUP_SRD 3
/*
@@ -600,6 +612,28 @@ extern struct hrmstat **hrm_hashtab;
void hat_enter(struct hat *);
void hat_exit(struct hat *);
+typedef void (*hat_rgn_cb_func_t)(caddr_t, caddr_t, caddr_t,
+ size_t, void *, u_offset_t);
+
+void hat_join_srd(struct hat *, vnode_t *);
+
+hat_region_cookie_t hat_join_region(struct hat *, caddr_t, size_t, void *,
+ u_offset_t, uchar_t, uchar_t, hat_rgn_cb_func_t,
+ uint_t);
+void hat_leave_region(struct hat *, hat_region_cookie_t,
+ uint_t);
+void hat_dup_region(struct hat *, hat_region_cookie_t);
+
+#define HAT_INVALID_REGION_COOKIE ((hat_region_cookie_t)-1)
+#define HAT_IS_REGION_COOKIE_VALID(c) ((c) != HAT_INVALID_REGION_COOKIE)
+
+/* hat_join_region() flags */
+
+#define HAT_REGION_TEXT 0x1 /* passed by segvn */
+#define HAT_REGION_ISM 0x2 /* for hat_share()/hat_unshare() */
+
+#define HAT_REGION_TYPE_MASK (0x7)
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c
index d694d68d7d..1f8f0c9173 100644
--- a/usr/src/uts/common/vm/seg_spt.c
+++ b/usr/src/uts/common/vm/seg_spt.c
@@ -633,10 +633,10 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
npages = btop(len);
- hat_flags = HAT_UNLOAD_UNLOCK;
+ hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
(sptd->spt_flags & SHM_PAGEABLE)) {
- hat_flags = HAT_UNLOAD;
+ hat_flags = HAT_UNLOAD_UNMAP;
}
hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
@@ -679,7 +679,7 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
* permanent lock on it and invalidate the page.
*/
if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
- if (hat_flags == HAT_UNLOAD)
+ if (hat_flags == HAT_UNLOAD_UNMAP)
pp = page_lookup(vp, off, SE_EXCL);
else {
if ((pp = page_find(vp, off)) == NULL) {
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 4a63a73857..8240747290 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -195,6 +195,9 @@ static struct seg *segvn_split_seg(struct seg *, caddr_t);
static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t,
ulong_t, uint_t);
+static void segvn_hat_rgn_unload_callback(caddr_t, caddr_t, caddr_t,
+ size_t, void *, u_offset_t);
+
static int segvn_pp_lock_anonpages(page_t *, int);
static void segvn_pp_unlock_anonpages(page_t *, int);
@@ -298,6 +301,8 @@ ulong_t segvn_faultvnmpss_align_err4;
ulong_t segvn_faultvnmpss_align_err5;
ulong_t segvn_vmpss_pageio_deadlk_err;
+int segvn_use_regions = 1;
+
/*
* Segvn supports text replication optimization for NUMA platforms. Text
* replica's are represented by anon maps (amp). There's one amp per text file
@@ -407,6 +412,21 @@ segvn_init(void)
if (segvn_maxpgszc == 0 || segvn_maxpgszc > maxszc)
segvn_maxpgszc = maxszc;
+ if (segvn_use_regions && !hat_supported(HAT_SHARED_REGIONS, NULL))
+ segvn_use_regions = 0;
+
+ /*
+ * For now shared regions and text replication segvn support
+ * are mutually exclusive. This is acceptable because
+ * currently significant benefit from text replication was
+ * only observed on AMD64 NUMA platforms (due to relatively
+ * small L2$ size) and currently we don't support shared
+ * regions on x86.
+ */
+ if (segvn_use_regions && !segvn_disable_textrepl) {
+ segvn_disable_textrepl = 1;
+ }
+
if (lgrp_optimizations() && textrepl_size_thresh != (size_t)-1 &&
!segvn_disable_textrepl) {
ulong_t i;
@@ -476,9 +496,9 @@ segvn_create(struct seg *seg, void *argsp)
int error = 0;
size_t pgsz;
lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT;
+ int use_rgn = 0;
int trok = 0;
-
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) {
@@ -495,6 +515,12 @@ segvn_create(struct seg *seg, void *argsp)
/*NOTREACHED*/
}
+ if (a->type == MAP_PRIVATE && (a->flags & MAP_TEXT) &&
+ a->vp != NULL && a->prot == (PROT_USER | PROT_READ | PROT_EXEC) &&
+ segvn_use_regions) {
+ use_rgn = 1;
+ }
+
/* MAP_NORESERVE on a MAP_SHARED segment is meaningless. */
if (a->type == MAP_SHARED)
a->flags &= ~MAP_NORESERVE;
@@ -548,8 +574,13 @@ segvn_create(struct seg *seg, void *argsp)
/*
* Reserve any mapping structures that may be required.
+ *
+ * Don't do it for segments that may use regions. It's currently a
+ * noop in the hat implementations anyway.
*/
- hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
+ if (!use_rgn) {
+ hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
+ }
if (a->cred) {
cred = a->cred;
@@ -571,10 +602,15 @@ segvn_create(struct seg *seg, void *argsp)
seg, swresv, 0);
}
crfree(cred);
- hat_unload(seg->s_as->a_hat, seg->s_base,
- seg->s_size, HAT_UNLOAD_UNMAP);
+ if (!use_rgn) {
+ hat_unload(seg->s_as->a_hat, seg->s_base,
+ seg->s_size, HAT_UNLOAD_UNMAP);
+ }
return (error);
}
+ /*
+ * svntr_hashtab will be NULL if we support shared regions.
+ */
trok = ((a->flags & MAP_TEXT) &&
(seg->s_size > textrepl_size_thresh ||
(a->flags & _MAP_TEXTREPL)) &&
@@ -582,6 +618,8 @@ segvn_create(struct seg *seg, void *argsp)
a->type == MAP_PRIVATE && swresv == 0 &&
!(a->flags & MAP_NORESERVE) &&
seg->s_as != &kas && a->vp->v_type == VREG);
+
+ ASSERT(!trok || !use_rgn);
}
/*
@@ -590,7 +628,7 @@ segvn_create(struct seg *seg, void *argsp)
* explicit anon_map structure was supplied (e.g., SystemV shared
* memory) or if we'll use text replication for this segment.
*/
- if (a->amp == NULL && !trok) {
+ if (a->amp == NULL && !use_rgn && !trok) {
struct seg *pseg, *nseg;
struct segvn_data *psvd, *nsvd;
lgrp_mem_policy_t ppolicy, npolicy;
@@ -730,6 +768,8 @@ segvn_create(struct seg *seg, void *argsp)
svd->pageadvice = 0;
svd->flags = (ushort_t)a->flags;
svd->softlockcnt = 0;
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+
if (a->szc != 0 && a->vp != NULL) {
segvn_setvnode_mpss(a->vp);
}
@@ -858,6 +898,7 @@ segvn_create(struct seg *seg, void *argsp)
ASSERT(seg->s_szc == 0);
ASSERT(!IS_VMODSORT(pp->p_vnode));
+ ASSERT(use_rgn == 0);
hat_memload(seg->s_as->a_hat, addr, pp,
svd->prot & ~PROT_WRITE, hat_flag);
@@ -882,6 +923,15 @@ segvn_create(struct seg *seg, void *argsp)
(void) lgrp_shm_policy_set(mpolicy, svd->amp, svd->anon_index,
svd->vp, svd->offset, seg->s_size);
+ if (use_rgn) {
+ ASSERT(!trok);
+ ASSERT(svd->amp == NULL);
+ svd->rcookie = hat_join_region(seg->s_as->a_hat, seg->s_base,
+ seg->s_size, (void *)svd->vp, svd->offset, svd->prot,
+ (uchar_t)seg->s_szc, segvn_hat_rgn_unload_callback,
+ HAT_REGION_TEXT);
+ }
+
ASSERT(!trok || !(svd->prot & PROT_WRITE));
svd->tr_state = trok ? SEGVN_TR_INIT : SEGVN_TR_OFF;
@@ -910,6 +960,11 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat)
ASSERT(AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock));
ASSERT(seg1->s_ops == seg2->s_ops);
+ if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie) ||
+ HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
+ return (-1);
+ }
+
/* both segments exist, try to merge them */
#define incompat(x) (svd1->x != svd2->x)
if (incompat(vp) || incompat(maxprot) ||
@@ -968,26 +1023,22 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat)
if ((nvpage = kmem_zalloc(nvpsize, KM_NOSLEEP)) == NULL) {
return (-2);
}
+
if (vpage1 != NULL) {
bcopy(vpage1, nvpage, vpgtob(npages1));
- }
- if (vpage2 != NULL) {
- bcopy(vpage2, nvpage + npages1, vpgtob(npages2));
- }
- for (vp = nvpage; vp < nvpage + npages1; vp++) {
- if (svd2->pageprot && !svd1->pageprot) {
+ } else {
+ for (vp = nvpage; vp < nvpage + npages1; vp++) {
VPP_SETPROT(vp, svd1->prot);
- }
- if (svd2->pageadvice && !svd1->pageadvice) {
VPP_SETADVICE(vp, svd1->advice);
}
}
- for (vp = nvpage + npages1;
- vp < nvpage + npages1 + npages2; vp++) {
- if (svd1->pageprot && !svd2->pageprot) {
+
+ if (vpage2 != NULL) {
+ bcopy(vpage2, nvpage + npages1, vpgtob(npages2));
+ } else {
+ for (vp = nvpage + npages1;
+ vp < nvpage + npages1 + npages2; vp++) {
VPP_SETPROT(vp, svd2->prot);
- }
- if (svd1->pageadvice && !svd2->pageadvice) {
VPP_SETADVICE(vp, svd2->advice);
}
}
@@ -1126,6 +1177,10 @@ segvn_extend_prev(seg1, seg2, a, swresv)
*/
ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock));
+ if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie)) {
+ return (-1);
+ }
+
/* second segment is new, try to extend first */
/* XXX - should also check cred */
if (svd1->vp != a->vp || svd1->maxprot != a->maxprot ||
@@ -1183,6 +1238,7 @@ segvn_extend_prev(seg1, seg2, a, swresv)
ANON_LOCK_EXIT(&amp1->a_rwlock);
}
if (svd1->vpage != NULL) {
+ struct vpage *vp, *evp;
new_vpage =
kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)),
KM_NOSLEEP);
@@ -1191,14 +1247,11 @@ segvn_extend_prev(seg1, seg2, a, swresv)
bcopy(svd1->vpage, new_vpage, vpgtob(seg_pages(seg1)));
kmem_free(svd1->vpage, vpgtob(seg_pages(seg1)));
svd1->vpage = new_vpage;
- if (svd1->pageprot) {
- struct vpage *vp, *evp;
- vp = new_vpage + seg_pages(seg1);
- evp = vp + seg_pages(seg2);
- for (; vp < evp; vp++)
- VPP_SETPROT(vp, a->prot);
- }
+ vp = new_vpage + seg_pages(seg1);
+ evp = vp + seg_pages(seg2);
+ for (; vp < evp; vp++)
+ VPP_SETPROT(vp, a->prot);
}
size = seg2->s_size;
seg_free(seg2);
@@ -1236,6 +1289,10 @@ segvn_extend_next(
*/
ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as, &seg2->s_as->a_lock));
+ if (HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
+ return (-1);
+ }
+
/* first segment is new, try to extend second */
/* XXX - should also check cred */
if (svd2->vp != a->vp || svd2->maxprot != a->maxprot ||
@@ -1288,6 +1345,7 @@ segvn_extend_next(
ANON_LOCK_EXIT(&amp2->a_rwlock);
}
if (svd2->vpage != NULL) {
+ struct vpage *vp, *evp;
new_vpage =
kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)),
KM_NOSLEEP);
@@ -1301,14 +1359,11 @@ segvn_extend_next(
vpgtob(seg_pages(seg2)));
kmem_free(svd2->vpage, vpgtob(seg_pages(seg2)));
svd2->vpage = new_vpage;
- if (svd2->pageprot) {
- struct vpage *vp, *evp;
- vp = new_vpage;
- evp = vp + seg_pages(seg1);
- for (; vp < evp; vp++)
- VPP_SETPROT(vp, a->prot);
- }
+ vp = new_vpage;
+ evp = vp + seg_pages(seg1);
+ for (; vp < evp; vp++)
+ VPP_SETPROT(vp, a->prot);
}
size = seg1->s_size;
seg_free(seg1);
@@ -1379,10 +1434,14 @@ segvn_dup(struct seg *seg, struct seg *newseg)
newsvd->flags = svd->flags;
newsvd->softlockcnt = 0;
newsvd->policy_info = svd->policy_info;
+ newsvd->rcookie = HAT_INVALID_REGION_COOKIE;
+
if ((amp = svd->amp) == NULL || svd->tr_state == SEGVN_TR_ON) {
/*
* Not attaching to a shared anon object.
*/
+ ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie) ||
+ svd->tr_state == SEGVN_TR_OFF);
if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(newsvd->vp != NULL && amp != NULL);
newsvd->tr_state = SEGVN_TR_INIT;
@@ -1392,6 +1451,8 @@ segvn_dup(struct seg *seg, struct seg *newseg)
newsvd->amp = NULL;
newsvd->anon_index = 0;
} else {
+ /* regions for now are only used on pure vnode segments */
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
ASSERT(svd->tr_state == SEGVN_TR_OFF);
newsvd->tr_state = SEGVN_TR_OFF;
if (svd->type == MAP_SHARED) {
@@ -1555,6 +1616,12 @@ retry:
newsvd->maxprot, newsvd->type, newsvd->cred);
}
out:
+ if (error == 0 && HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(newsvd->amp == NULL);
+ ASSERT(newsvd->tr_state == SEGVN_TR_OFF);
+ newsvd->rcookie = svd->rcookie;
+ hat_dup_region(newseg->s_as->a_hat, newsvd->rcookie);
+ }
return (error);
}
@@ -1566,6 +1633,30 @@ out:
extern int free_pages;
static void
+segvn_hat_rgn_unload_callback(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr,
+ size_t r_size, void *r_obj, u_offset_t r_objoff)
+{
+ u_offset_t off;
+ size_t len;
+ vnode_t *vp = (vnode_t *)r_obj;
+
+ ASSERT(eaddr > saddr);
+ ASSERT(saddr >= r_saddr);
+ ASSERT(saddr < r_saddr + r_size);
+ ASSERT(eaddr > r_saddr);
+ ASSERT(eaddr <= r_saddr + r_size);
+ ASSERT(vp != NULL);
+
+ if (!free_pages) {
+ return;
+ }
+
+ len = eaddr - saddr;
+ off = (saddr - r_saddr) + r_objoff;
+ free_vp_pages(vp, off, len);
+}
+
+static void
segvn_hat_unload_callback(hat_callback_t *cb)
{
struct seg *seg = cb->hcb_data;
@@ -1582,7 +1673,6 @@ segvn_hat_unload_callback(hat_callback_t *cb)
free_vp_pages(svd->vp, svd->offset + off, len);
}
-
static int
segvn_unmap(struct seg *seg, caddr_t addr, size_t len)
{
@@ -1599,7 +1689,6 @@ segvn_unmap(struct seg *seg, caddr_t addr, size_t len)
size_t nsize;
size_t oswresv;
int reclaim = 1;
- int unmap = 1;
/*
* We don't need any segment level locks for "segvn" data
@@ -1641,7 +1730,19 @@ retry:
int err;
if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) {
ASSERT(seg->s_base != addr || seg->s_size != len);
- if (svd->tr_state == SEGVN_TR_INIT) {
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ hat_leave_region(seg->s_as->a_hat,
+ svd->rcookie, HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+ /*
+ * could pass a flag to segvn_demote_range()
+ * below to tell it not to do any unloads but
+ * this case is rare enough to not bother for
+ * now.
+ */
+ } else if (svd->tr_state == SEGVN_TR_INIT) {
svd->tr_state = SEGVN_TR_OFF;
} else if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->amp != NULL);
@@ -1671,25 +1772,35 @@ retry:
return (error);
}
- if (svd->tr_state == SEGVN_TR_INIT) {
- svd->tr_state = SEGVN_TR_OFF;
+ /*
+ * Remove any page locks set through this mapping.
+ * If text replication is not off no page locks could have been
+ * established via this mapping.
+ */
+ if (svd->tr_state == SEGVN_TR_OFF) {
+ (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0);
+ }
+
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ ASSERT(svd->type == MAP_PRIVATE);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
} else if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->amp != NULL);
ASSERT(svd->pageprot == 0 && !(svd->prot & PROT_WRITE));
segvn_textunrepl(seg, 1);
ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF);
- unmap = 0;
- }
-
- /*
- * Remove any page locks set through this mapping.
- */
- (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0);
-
- if (unmap) {
+ } else {
+ if (svd->tr_state != SEGVN_TR_OFF) {
+ ASSERT(svd->tr_state == SEGVN_TR_INIT);
+ svd->tr_state = SEGVN_TR_OFF;
+ }
/*
* Unload any hardware translations in the range to be taken
- * out. Use a callback to invoke free_vp_pages() effectively.
+ * out. Use a callback to invoke free_vp_pages() effectively.
*/
if (svd->vp != NULL && free_pages != 0) {
callback.hcb_data = seg;
@@ -1892,6 +2003,7 @@ retry:
nsvd->offset = svd->offset + (uintptr_t)(nseg->s_base - seg->s_base);
nsvd->swresv = 0;
nsvd->softlockcnt = 0;
+ ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
if (svd->vp != NULL) {
VN_HOLD(nsvd->vp);
@@ -2033,6 +2145,8 @@ segvn_free(struct seg *seg)
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
+
/*
* Be sure to unlock pages. XXX Why do things get free'ed instead
* of unmapped? XXX
@@ -2294,7 +2408,12 @@ segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
if ((amp = svd->amp) != NULL)
anon_index = svd->anon_index + seg_page(seg, addr);
- hat_unlock(seg->s_as->a_hat, addr, len);
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ hat_unlock_region(seg->s_as->a_hat, addr, len, svd->rcookie);
+ } else {
+ hat_unlock(seg->s_as->a_hat, addr, len);
+ }
for (adr = addr; adr < addr + len; adr += PAGESIZE) {
if (amp != NULL) {
ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
@@ -2453,6 +2572,7 @@ segvn_faultpage(
ASSERT(SEGVN_READ_HELD(seg->s_as, &svd->lock));
ASSERT(seg->s_szc == 0);
+ ASSERT(svd->tr_state != SEGVN_TR_INIT);
/*
* Initialize protection value for this page.
@@ -2616,6 +2736,7 @@ segvn_faultpage(
mutex_exit(&p->p_lock);
}
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
hat_memload(hat, addr, pp, prot, hat_flag);
if (!(hat_flag & HAT_LOAD_LOCK))
@@ -2740,7 +2861,12 @@ segvn_faultpage(
prot &= ~PROT_WRITE;
}
- hat_memload(hat, addr, opp, prot & vpprot, hat_flag);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE ||
+ (!svd->pageprot && svd->prot == (prot & vpprot)));
+ ASSERT(amp == NULL ||
+ svd->rcookie == HAT_INVALID_REGION_COOKIE);
+ hat_memload_region(hat, addr, opp, prot & vpprot, hat_flag,
+ svd->rcookie);
if (!(hat_flag & HAT_LOAD_LOCK))
page_unlock(opp);
@@ -2751,6 +2877,8 @@ segvn_faultpage(
return (0);
}
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
+
hat_setref(opp);
ASSERT(amp != NULL && anon_lock);
@@ -2784,6 +2912,7 @@ segvn_faultpage(
* after unloading our translation.
*/
if (hat_page_is_mapped(opp)) {
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
hat_unload(seg->s_as->a_hat, addr, PAGESIZE,
HAT_UNLOAD);
}
@@ -2872,6 +3001,7 @@ segvn_faultpage(
prot &= ~PROT_WRITE;
}
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
hat_memload(hat, addr, pp, prot, hat_flag);
if (!(hat_flag & HAT_LOAD_LOCK))
@@ -3642,6 +3772,7 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
ASSERT(seg->s_szc < NBBY * sizeof (int));
ASSERT(type != F_SOFTLOCK || lpgeaddr - a == maxpgsz);
+ ASSERT(svd->tr_state != SEGVN_TR_INIT);
VM_STAT_COND_ADD(type == F_SOFTLOCK, segvnvmstats.fltvnpages[0]);
VM_STAT_COND_ADD(type != F_SOFTLOCK, segvnvmstats.fltvnpages[1]);
@@ -3962,6 +4093,8 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
* p_szc can't be changed for locked
* swapfs pages.
*/
+ ASSERT(svd->rcookie ==
+ HAT_INVALID_REGION_COOKIE);
hat_memload_array(hat, a, pgsz, ppa, prot,
hat_flag);
@@ -3976,9 +4109,12 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
goto next;
}
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE ||
+ (!svd->pageprot && svd->prot == (prot & vpprot)));
+
pfn = page_pptonum(ppa[0]);
/*
- * hat_page_demote() needs an EXCl lock on one of
+ * hat_page_demote() needs an SE_EXCL lock on one of
* constituent page_t's and it decreases root's p_szc
* last. This means if root's p_szc is equal szc and
* all its constituent pages are locked
@@ -4036,14 +4172,16 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
SEGVN_UPDATE_MODBITS(ppa, pages, rw,
prot, vpprot);
if (!xhat) {
- hat_memload_array(hat, a, pgsz, ppa,
- prot & vpprot, hat_flag);
+ hat_memload_array_region(hat, a, pgsz,
+ ppa, prot & vpprot, hat_flag,
+ svd->rcookie);
} else {
/*
* avoid large xhat mappings to FS
* pages so that hat_page_demote()
* doesn't need to check for xhat
* large mappings.
+ * Don't use regions with xhats.
*/
for (i = 0; i < pages; i++) {
hat_memload(hat,
@@ -4149,14 +4287,15 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
prot, vpprot);
if (upgrdfail && segvn_anypgsz_vnode) {
/* SOFTLOCK case */
- hat_memload_array(hat, a, pgsz,
- ppa, prot & vpprot, hat_flag);
+ hat_memload_array_region(hat, a, pgsz,
+ ppa, prot & vpprot, hat_flag,
+ svd->rcookie);
} else {
for (i = 0; i < pages; i++) {
- hat_memload(hat,
+ hat_memload_region(hat,
a + (i << PAGESHIFT),
ppa[i], prot & vpprot,
- hat_flag);
+ hat_flag, svd->rcookie);
}
}
if (!(hat_flag & HAT_LOAD_LOCK)) {
@@ -4214,8 +4353,8 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
}
SEGVN_UPDATE_MODBITS(ppa, pages, rw,
prot, vpprot);
- hat_memload_array(hat, a, pgsz, ppa,
- prot & vpprot, hat_flag);
+ hat_memload_array_region(hat, a, pgsz, ppa,
+ prot & vpprot, hat_flag, svd->rcookie);
mutex_exit(szcmtx);
if (!(hat_flag & HAT_LOAD_LOCK)) {
for (i = 0; i < pages; i++) {
@@ -4267,13 +4406,15 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
ASSERT(type == F_SOFTLOCK);
for (i = 0; i < pages; i++) {
ASSERT(ppa[i]->p_szc < szc);
- hat_memload(hat, a + (i << PAGESHIFT),
- ppa[i], prot & vpprot, hat_flag);
+ hat_memload_region(hat,
+ a + (i << PAGESHIFT),
+ ppa[i], prot & vpprot, hat_flag,
+ svd->rcookie);
}
} else {
ASSERT(pplist != NULL || type == F_SOFTLOCK);
- hat_memload_array(hat, a, pgsz, ppa,
- prot & vpprot, hat_flag);
+ hat_memload_array_region(hat, a, pgsz, ppa,
+ prot & vpprot, hat_flag, svd->rcookie);
}
if (!(hat_flag & HAT_LOAD_LOCK)) {
for (i = 0; i < pages; i++) {
@@ -4452,6 +4593,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
ASSERT(type != F_SOFTUNLOCK);
ASSERT(IS_P2ALIGNED(a, maxpgsz));
ASSERT(!brkcow || svd->tr_state == SEGVN_TR_OFF);
+ ASSERT(svd->tr_state != SEGVN_TR_INIT);
ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
@@ -4559,6 +4701,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr,
if (lgrp_optimizations())
page_migrate(seg, a, ppa, pages);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
if (type == F_SOFTLOCK && svd->vp == NULL) {
/*
* All pages in ppa array belong to the same
@@ -4769,6 +4912,7 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+ ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
/*
* First handle the easy stuff
@@ -4788,6 +4932,8 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
return (0);
}
+ ASSERT(svd->tr_state == SEGVN_TR_OFF ||
+ !HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
if (brkcow == 0) {
if (svd->tr_state == SEGVN_TR_INIT) {
SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
@@ -4804,6 +4950,13 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
}
} else if (svd->tr_state != SEGVN_TR_OFF) {
SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
+
+ if (rw == S_WRITE && svd->tr_state != SEGVN_TR_OFF) {
+ ASSERT(!svd->pageprot && !(svd->prot & PROT_WRITE));
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (FC_PROT);
+ }
+
if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->vp != NULL && svd->amp != NULL);
segvn_textunrepl(seg, 0);
@@ -4850,6 +5003,26 @@ top:
}
}
+ if (brkcow && HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ /* this must be SOFTLOCK S_READ fault */
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ /*
+ * this must be the first ever non S_READ_NOCOW
+ * softlock for this segment.
+ */
+ ASSERT(svd->softlockcnt == 0);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+ }
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ goto top;
+ }
+
/*
* We can't allow the long term use of softlocks for vmpss segments,
* because in some file truncation cases we should be able to demote
@@ -4917,6 +5090,7 @@ top:
* Check to see if we need to allocate an anon_map structure.
*/
if (svd->amp == NULL && (svd->vp == NULL || brkcow)) {
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
/*
* Drop the "read" lock on the segment and acquire
* the "write" version since we have to allocate the
@@ -4977,6 +5151,7 @@ top:
page = seg_page(seg, addr);
if (amp != NULL) {
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
anon_index = svd->anon_index + page;
if (type == F_PROT && rw == S_READ &&
@@ -5379,9 +5554,13 @@ slow:
* for migration, so they will get migrated
* properly on fault
*/
+ ASSERT(amp == NULL ||
+ svd->rcookie == HAT_INVALID_REGION_COOKIE);
if ((prot & PROT_READ) && !PP_ISMIGRATE(pp)) {
- hat_memload(hat, seg->s_base + diff,
- pp, prot, hat_flag);
+ hat_memload_region(hat,
+ seg->s_base + diff,
+ pp, prot, hat_flag,
+ svd->rcookie);
}
}
if (amp != NULL)
@@ -5466,6 +5645,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
size_t pgsz;
pgcnt_t pgcnt;
anon_sync_obj_t cookie;
+ int unload_done = 0;
ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
@@ -5500,12 +5680,20 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
}
}
- if (svd->tr_state == SEGVN_TR_INIT) {
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+ unload_done = 1;
+ } else if (svd->tr_state == SEGVN_TR_INIT) {
svd->tr_state = SEGVN_TR_OFF;
} else if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->amp != NULL);
segvn_textunrepl(seg, 0);
ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF);
+ unload_done = 1;
}
if ((prot & PROT_WRITE) && svd->type == MAP_SHARED &&
@@ -5513,7 +5701,6 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
ASSERT(vn_is_mapped(svd->vp, V_WRITE));
segvn_inval_trcache(svd->vp);
}
-
if (seg->s_szc != 0) {
int err;
pgsz = page_get_pagesize(seg->s_szc);
@@ -5590,7 +5777,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
}
}
- if (addr == seg->s_base && len == seg->s_size && svd->pageprot == 0) {
+ if (addr == seg->s_base && len == seg->s_size && svd->vpage == NULL) {
if (svd->prot == prot) {
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
return (0); /* all done */
@@ -5613,6 +5800,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
* the operation.
*/
segvn_vpage(seg);
+ svd->pageprot = 1;
if ((amp = svd->amp) != NULL) {
anon_idx = svd->anon_index + seg_page(seg, addr);
ASSERT(seg->s_szc == 0 ||
@@ -5699,6 +5887,10 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
* the translations to the things we've updated so far.
*/
if (svp != evp) {
+ if (unload_done) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (IE_NOMEM);
+ }
len = (svp - &svd->vpage[seg_page(seg, addr)]) *
PAGESIZE;
ASSERT(seg->s_szc == 0 || IS_P2ALIGNED(len, pgsz));
@@ -5710,12 +5902,18 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
}
} else {
segvn_vpage(seg);
+ svd->pageprot = 1;
evp = &svd->vpage[seg_page(seg, addr + len)];
for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) {
VPP_SETPROT(svp, prot);
}
}
+ if (unload_done) {
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (0);
+ }
+
if (((prot & PROT_WRITE) != 0 &&
(svd->vp != NULL || svd->type == MAP_PRIVATE)) ||
(prot & ~PROT_USER) == PROT_NONE) {
@@ -5848,7 +6046,13 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
}
}
- if (svd->tr_state == SEGVN_TR_INIT) {
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+ } else if (svd->tr_state == SEGVN_TR_INIT) {
svd->tr_state = SEGVN_TR_OFF;
} else if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->amp != NULL);
@@ -5924,6 +6128,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
if (err != 0) {
return (err);
}
+ ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
err = segvn_concat(seg, nseg, 1);
if (err == -1) {
return (EINVAL);
@@ -6028,27 +6233,34 @@ segvn_clrszc(struct seg *seg)
struct anon *ap, *oldap;
uint_t prot = svd->prot, vpprot;
int pageflag = 0;
- int unmap = 1;
ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
+ ASSERT(svd->softlockcnt == 0);
if (vp == NULL && amp == NULL) {
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
seg->s_szc = 0;
return (0);
}
- if (svd->tr_state == SEGVN_TR_INIT) {
- svd->tr_state = SEGVN_TR_OFF;
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
} else if (svd->tr_state == SEGVN_TR_ON) {
ASSERT(svd->amp != NULL);
segvn_textunrepl(seg, 1);
ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF);
amp = NULL;
- unmap = 0;
- }
+ } else {
+ if (svd->tr_state != SEGVN_TR_OFF) {
+ ASSERT(svd->tr_state == SEGVN_TR_INIT);
+ svd->tr_state = SEGVN_TR_OFF;
+ }
- if (unmap) {
/*
* do HAT_UNLOAD_UNMAP since we are changing the pagesize.
* unload argument is 0 when we are freeing the segment
@@ -6223,6 +6435,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
ASSERT(addr >= seg->s_base);
ASSERT(addr <= seg->s_base + seg->s_size);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
if (addr == seg->s_base || addr == seg->s_base + seg->s_size)
return (seg);
@@ -6236,6 +6449,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr)
nseg->s_data = (void *)nsvd;
nseg->s_szc = seg->s_szc;
*nsvd = *svd;
+ ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
nsvd->seg = nseg;
rw_init(&nsvd->lock, NULL, RW_DEFAULT, NULL);
@@ -6369,6 +6583,7 @@ segvn_demote_range(
ASSERT(seg->s_base != addr || seg->s_size != len);
ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
ASSERT(svd->softlockcnt == 0);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED));
CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
@@ -7328,6 +7543,7 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
* by lazily testing for its existence.
*/
if (op == MC_LOCK && svd->amp == NULL && svd->vp == NULL) {
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
svd->amp = anonmap_alloc(seg->s_size, 0, ANON_SLEEP);
svd->amp->a_szc = seg->s_szc;
}
@@ -7681,7 +7897,8 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
* if don't need to do lgroup optimizations on this system
*/
- if ((behav == MADV_SEQUENTIAL && seg->s_szc != 0) ||
+ if ((behav == MADV_SEQUENTIAL &&
+ (seg->s_szc != 0 || HAT_IS_REGION_COOKIE_VALID(svd->rcookie))) ||
(!lgrp_optimizations() && (behav == MADV_ACCESS_DEFAULT ||
behav == MADV_ACCESS_LWP || behav == MADV_ACCESS_MANY))) {
SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
@@ -7834,6 +8051,7 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
* detection in segvn_fault
*/
ASSERT(seg->s_szc == 0);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
hat_unload(seg->s_as->a_hat, addr, len,
HAT_UNLOAD);
/* FALLTHROUGH */
@@ -7932,6 +8150,15 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
if (already_set || svd->type == MAP_SHARED)
break;
+ if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
+ ASSERT(svd->amp == NULL);
+ ASSERT(svd->tr_state == SEGVN_TR_OFF);
+ ASSERT(svd->softlockcnt == 0);
+ hat_leave_region(seg->s_as->a_hat, svd->rcookie,
+ HAT_REGION_TEXT);
+ svd->rcookie = HAT_INVALID_REGION_COOKIE;
+ }
+
/*
* Split off new segment if advice only applies to a
* portion of existing segment starting in middle
@@ -8053,6 +8280,7 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
break;
case MADV_SEQUENTIAL:
ASSERT(seg->s_szc == 0);
+ ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
/* FALLTHROUGH */
case MADV_NORMAL:
@@ -8091,7 +8319,6 @@ segvn_vpage(struct seg *seg)
* and the advice from the segment itself to the individual pages.
*/
if (svd->vpage == NULL) {
- svd->pageprot = 1;
svd->pageadvice = 1;
svd->vpage = kmem_zalloc(seg_pages(seg) * sizeof (struct vpage),
KM_SLEEP);
@@ -8724,6 +8951,7 @@ segvn_textrepl(struct seg *seg)
ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
ASSERT(p != NULL);
ASSERT(svd->tr_state == SEGVN_TR_INIT);
+ ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
ASSERT(svd->flags & MAP_TEXT);
ASSERT(svd->type == MAP_PRIVATE);
ASSERT(vp != NULL && svd->amp == NULL);
@@ -8991,6 +9219,7 @@ segvn_textunrepl(struct seg *seg, int unload_unmap)
ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
ASSERT(svd->tr_state == SEGVN_TR_ON);
+ ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
ASSERT(svd->amp != NULL);
ASSERT(svd->amp->refcnt >= 1);
ASSERT(svd->anon_index == 0);
@@ -9185,6 +9414,7 @@ segvn_trupdate_seg(struct seg *seg,
ASSERT(seg->s_data == (void *)svd);
ASSERT(seg->s_szc == svntrp->tr_szc);
ASSERT(svd->tr_state == SEGVN_TR_ON);
+ ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
ASSERT(svd->amp != NULL);
ASSERT(svd->tr_policy_info.mem_policy == LGRP_MEM_POLICY_NEXT_SEG);
ASSERT(svd->tr_policy_info.mem_lgrpid != LGRP_NONE);
diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h
index d8c8be8ff4..26bd202636 100644
--- a/usr/src/uts/common/vm/seg_vn.h
+++ b/usr/src/uts/common/vm/seg_vn.h
@@ -103,6 +103,7 @@ typedef struct segvn_data {
ushort_t flags; /* flags - from sys/mman.h */
ssize_t softlockcnt; /* # of pages SOFTLOCKED in seg */
lgrp_mem_policy_info_t policy_info; /* memory allocation policy */
+ hat_region_cookie_t rcookie; /* region for hat calls */
lgrp_mem_policy_info_t tr_policy_info; /* memory allocation for TR */
struct seg *seg; /* pointer back to seg */
struct segvn_data *svn_trnext; /* textrepl list next link */
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index e28e2aaa4c..f5ff9d43cd 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -783,6 +783,7 @@ as_dup(struct as *as, struct as **outas)
AS_SETBUSY(newas);
mutex_exit(&newas->a_contents);
+ (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index ab7581fb36..33139517b3 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -6188,7 +6188,7 @@ page_share_cnt(page_t *pp)
int
page_isshared(page_t *pp)
{
- return (hat_page_getshare(pp) > 1);
+ return (hat_page_checkshare(pp, 1));
}
int
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index e13af4e1ef..007c73214c 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -1466,6 +1466,14 @@ hat_memload(
panic("unexpected hati_load_common() failure");
}
+/* ARGSUSED */
+void
+hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
+ uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
+{
+ hat_memload(hat, addr, pp, attr, flags);
+}
+
/*
* Load the given array of page structs using large pages when possible
*/
@@ -1559,6 +1567,15 @@ hat_memload_array(
}
}
+/* ARGSUSED */
+void
+hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
+ struct page **pps, uint_t attr, uint_t flags,
+ hat_region_cookie_t rcookie)
+{
+ hat_memload_array(hat, addr, len, pps, attr, flags);
+}
+
/*
* void hat_devload(hat, addr, len, pf, attr, flags)
* load/lock the given page frame number
@@ -1713,6 +1730,14 @@ hat_unlock(hat_t *hat, caddr_t addr, size_t len)
htable_release(ht);
}
+/* ARGSUSED */
+void
+hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len,
+ hat_region_cookie_t rcookie)
+{
+ panic("No shared region support on x86");
+}
+
/*
* Cross call service routine to demap a virtual page on
* the current CPU or flush all mappings in TLB.
@@ -3512,6 +3537,16 @@ hat_page_getshare(page_t *pp)
}
/*
+ * Return 1 the number of mappings exceeds sh_thresh. Return 0
+ * otherwise.
+ */
+int
+hat_page_checkshare(page_t *pp, ulong_t sh_thresh)
+{
+ return (hat_page_getshare(pp) > sh_thresh);
+}
+
+/*
* hat_softlock isn't supported anymore
*/
/*ARGSUSED*/
@@ -3546,6 +3581,9 @@ hat_supported(enum hat_features feature, void *arg)
case HAT_VMODSORT:
return (1);
+ case HAT_SHARED_REGIONS:
+ return (0);
+
default:
panic("hat_supported() - unknown feature");
}
@@ -3847,6 +3885,43 @@ hati_update_pte(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new)
return (0);
}
+/* ARGSUSED */
+void
+hat_join_srd(struct hat *sfmmup, vnode_t *evp)
+{
+}
+
+/* ARGSUSED */
+hat_region_cookie_t
+hat_join_region(struct hat *sfmmup,
+ caddr_t r_saddr,
+ size_t r_size,
+ void *r_obj,
+ u_offset_t r_objoff,
+ uchar_t r_perm,
+ uchar_t r_pgszc,
+ hat_rgn_cb_func_t r_cb_function,
+ uint_t flags)
+{
+ panic("No shared region support on x86");
+ return (HAT_INVALID_REGION_COOKIE);
+}
+
+/* ARGSUSED */
+void
+hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags)
+{
+ panic("No shared region support on x86");
+}
+
+/* ARGSUSED */
+void
+hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie)
+{
+ panic("No shared region support on x86");
+}
+
+
/*
* Kernel Physical Mapping (kpm) facility
*
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_asm.s b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
index b1e6348e6d..eff84e9e60 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_asm.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
@@ -492,10 +492,10 @@ sfmmu_enable_intrs(uint_t pstate_save)
{}
/* ARGSUSED */
-void
-sfmmu_alloc_ctx(sfmmu_t *sfmmup, int allocflag, struct cpu *cp)
-{}
-
+int
+sfmmu_alloc_ctx(sfmmu_t *sfmmup, int allocflag, struct cpu *cp, int shflag)
+{ return(0); }
+
/*
* Use cas, if tte has changed underneath us then reread and try again.
* In the case of a retry, it will update sttep with the new original.
@@ -562,7 +562,11 @@ sfmmu_panic8:
.global sfmmu_panic9
sfmmu_panic9:
.asciz "sfmmu_asm: cnum is greater than MAX_SFMMU_CTX_VAL"
-
+
+ .global sfmmu_panic10
+sfmmu_panic10:
+ .asciz "sfmmu_asm: valid SCD with no 3rd scd TSB"
+
ENTRY(sfmmu_disable_intrs)
rdpr %pstate, %o0
#ifdef DEBUG
@@ -596,13 +600,17 @@ sfmmu_panic9:
* %o0 - sfmmup
* %o1 - allocflag
* %o2 - CPU
+ * %o3 - sfmmu private/shared flag
+ *
+ * ret - 0: no ctx is allocated
+ * 1: a ctx is allocated
*/
ENTRY_NP(sfmmu_alloc_ctx)
#ifdef DEBUG
- sethi %hi(ksfmmup), %o3
- ldx [%o3 + %lo(ksfmmup)], %o3
- cmp %o3, %o0
+ sethi %hi(ksfmmup), %g1
+ ldx [%g1 + %lo(ksfmmup)], %g1
+ cmp %g1, %o0
bne,pt %xcc, 0f
nop
@@ -618,12 +626,14 @@ sfmmu_panic9:
7:
retl
- nop
+ mov %g0, %o0 ! %o0 = ret = 0
0:
PANIC_IF_INTR_ENABLED_PSTR(sfmmu_ei_l1, %g1)
-#endif /* DEBUG */
-
+#endif /* DEBUG */
+
+ mov %o3, %g1 ! save sfmmu pri/sh flag in %g1
+
! load global mmu_ctxp info
ldx [%o2 + CPU_MMU_CTXP], %o3 ! %o3 = mmu_ctx_t ptr
lduw [%o2 + CPU_MMU_IDX], %g2 ! %g2 = mmu index
@@ -639,13 +649,16 @@ sfmmu_panic9:
sethi %hi(panicstr), %g1 ! test if panicstr is already set
ldx [%g1 + %lo(panicstr)], %g1
tst %g1
- bnz,pn %icc, 3f
+ bnz,pn %icc, 1f
nop
sethi %hi(sfmmu_panic8), %o0
call panic
or %o0, %lo(sfmmu_panic8), %o0
-3:
+1:
+ retl
+ mov %g0, %o0 ! %o0 = ret = 0
+3:
#endif
! load HAT sfmmu_ctxs[mmuid] gnum, cnum
@@ -668,6 +681,7 @@ sfmmu_panic9:
nop
! cnum == INVALID, check allocflag
+ mov %g0, %g4 ! %g4 = ret = 0
brz,pt %o1, 8f ! allocflag == 0, skip ctx allocation, bail
mov %g6, %o1
@@ -677,6 +691,7 @@ sfmmu_panic9:
1:
! valid HAT cnum, check gnum
cmp %g5, %o4
+ mov 1, %g4 !%g4 = ret = 1
be,a,pt %icc, 8f ! gnum unchanged, go to done
mov %g6, %o1
@@ -710,6 +725,7 @@ sfmmu_panic9:
nop
! cnum == INVALID, check allocflag
+ mov %g0, %g4 ! %g4 = ret = 0
brz,pt %o1, 2f ! allocflag == 0, called from resume, set hw
mov %g6, %o1
@@ -719,6 +735,7 @@ sfmmu_panic9:
1:
! valid HAT cnum, check gnum
cmp %g5, %o4
+ mov 1, %g4 ! %g4 = ret = 1
be,a,pt %icc, 2f ! gnum unchanged, go to done
mov %g6, %o1
@@ -757,18 +774,20 @@ sfmmu_panic9:
add %o1, 1, %o5 ! %o5 = mmu_ctxp->cnum + 1
/*
- * cnum reachs max, update HAT with INVALID
+ * cnum reachs max, bail, so wrap around can be performed later.
*/
set INVALID_CONTEXT, %o1
-
- /*
- * update hat cnum to INVALID, sun4v sfmmu_load_mmustate checks
- * hat cnum to determine if set the number of TSBs to 0.
+ /*
+ * When the routine is called by shared ctx, we want to set
+ * both private and shared ctx regs to INVALID. In order to
+ * do so, we set the sfmmu priv/shared flag to 'private' regardless.
+ * so that private ctx reg will be set to invalid.
+ * Note that values written to private context register are
+ * automatically written to shared context register as well.
*/
- sllx %o4, SFMMU_MMU_GNUM_RSHIFT, %o4
- or %o4, %o1, %o4
- stx %o4, [%g2 + SFMMU_CTXS]
-
+ mov %g0, %g1 ! %g1 = sfmmu private/shared flag
+ mov %g0, %g4 ! %g4 = ret = 0
+
membar #LoadStore|#StoreStore
ba,pt %icc, 8f
clrb [%o0 + SFMMU_CTX_LOCK]
@@ -798,30 +817,28 @@ sfmmu_panic9:
membar #LoadStore|#StoreStore
clrb [%o0 + SFMMU_CTX_LOCK]
-
+
+ mov 1, %g4 ! %g4 = ret = 1
8:
/*
* program the secondary context register
*
* %o1 = cnum
+ * %g1 = sfmmu private/shared flag (0:private, 1:shared)
*/
+
#ifdef sun4u
ldub [%o0 + SFMMU_CEXT], %o2
sll %o2, CTXREG_EXT_SHIFT, %o2
or %o1, %o2, %o1
#endif
-
- mov MMU_SCONTEXT, %o4
- sethi %hi(FLUSH_ADDR), %o5
- stxa %o1, [%o4]ASI_MMU_CTX ! set 2nd context reg.
- flush %o5
-
+ SET_SECCTX(%o1, %g1, %o4, %o5)
+
retl
- nop
+ mov %g4, %o0 ! %o0 = ret
SET_SIZE(sfmmu_alloc_ctx)
-
ENTRY_NP(sfmmu_modifytte)
ldx [%o2], %g3 /* current */
ldx [%o0], %g1 /* original */
@@ -915,6 +932,11 @@ sfmmu_kpm_patch_tsbm(void)
{
}
+void
+sfmmu_patch_shctx(void)
+{
+}
+
/* ARGSUSED */
void
sfmmu_load_tsbe(struct tsbe *tsbep, uint64_t vaddr, tte_t *ttep, int phys)
@@ -1122,6 +1144,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
sethi %hi(iktsb), %o0 ! to search
call sfmmu_fixup_mmu_asi ! patch kitlb miss
or %o0, %lo(iktsb), %o0
+ mov 6, %o3 ! number of instructions
+ sethi %hi(iktsb4m), %o0 ! to search
+ call sfmmu_fixup_mmu_asi ! patch kitlb4m miss
+ or %o0, %lo(iktsb4m), %o0
mov %o4, %o7 ! retore return pc -- leaf
retl
nop
@@ -1155,6 +1181,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
sethi %hi(ktsb4m_szcode), %o1
ld [%o1 + %lo(ktsb4m_szcode)], %o1 /* %o1 = ktsb4m size code */
+ sethi %hi(iktsb4m), %o0
+ call sfmmu_fix_ktlb_traptable
+ or %o0, %lo(iktsb4m), %o0
+
sethi %hi(dktsb4m), %o0
call sfmmu_fix_ktlb_traptable
or %o0, %lo(dktsb4m), %o0
@@ -1194,6 +1224,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
call sfmmu_fixup_setx ! patch value of ktsb4m base addr
or %o0, %lo(dktsb4mbase), %o0
+ sethi %hi(iktsb4mbase), %o0
+ call sfmmu_fixup_setx ! patch value of ktsb4m base addr
+ or %o0, %lo(iktsb4mbase), %o0
+
sethi %hi(sfmmu_kprot_patch_ktsb4m_base), %o0
call sfmmu_fixup_setx ! patch value of ktsb4m base addr
or %o0, %lo(sfmmu_kprot_patch_ktsb4m_base), %o0
@@ -1301,7 +1335,7 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
*/
set tsb_slab_shift, %o1
set MMU_PAGESHIFT4M, %o4
- ldsw [%o1], %o3
+ lduw [%o1], %o3
subcc %o4, %o3, %o4
bz,pt %icc, 1f
/* delay slot safe */
@@ -1320,7 +1354,7 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
1:
/* patch TSBREG_VAMASK used to set up TSB base register */
set tsb_slab_mask, %o1
- lduw [%o1], %o4
+ ldx [%o1], %o4
sethi %hi(sfmmu_tsb_1st_tsbreg_vamask), %o0
call sfmmu_fixup_or
or %o0, %lo(sfmmu_tsb_1st_tsbreg_vamask), %o0
@@ -1333,6 +1367,38 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift)
#endif /* UTSB_PHYS */
SET_SIZE(sfmmu_patch_utsb)
+ ENTRY_NP(sfmmu_patch_shctx)
+#ifdef sun4u
+ retl
+ nop
+#else /* sun4u */
+ set sfmmu_shctx_cpu_mondo_patch, %o0
+ MAKE_JMP_INSTR(5, %o1, %o2) ! jmp %g5
+ st %o1, [%o0]
+ flush %o0
+ MAKE_NOP_INSTR(%o1)
+ add %o0, I_SIZE, %o0 ! next instr
+ st %o1, [%o0]
+ flush %o0
+
+ set sfmmu_shctx_user_rtt_patch, %o0
+ st %o1, [%o0] ! nop 1st instruction
+ flush %o0
+ add %o0, I_SIZE, %o0
+ st %o1, [%o0] ! nop 2nd instruction
+ flush %o0
+ add %o0, I_SIZE, %o0
+ st %o1, [%o0] ! nop 3rd instruction
+ flush %o0
+ add %o0, I_SIZE, %o0
+ st %o1, [%o0] ! nop 4th instruction
+ flush %o0
+ add %o0, I_SIZE, %o0
+ st %o1, [%o0] ! nop 5th instruction
+ retl
+ flush %o0
+#endif /* sun4u */
+ SET_SIZE(sfmmu_patch_shctx)
/*
* Routine that loads an entry into a tsb using virtual addresses.
@@ -2136,7 +2202,7 @@ label/**/1: ;\
label/**/2: ;\
brz,pt ismseg, label/**/3 /* no mapping */ ;\
add ismhat, IMAP_VB_SHIFT, tmp1 /* tmp1 = vb_shift addr */ ;\
- lduha [tmp1]ASI_MEM, tmp1 /* tmp1 = vb shift*/ ;\
+ lduba [tmp1]ASI_MEM, tmp1 /* tmp1 = vb shift*/ ;\
srlx ismseg, tmp1, tmp2 /* tmp2 = vbase */ ;\
srlx tagacc, tmp1, tmp1 /* tmp1 = va seg*/ ;\
sub tmp1, tmp2, tmp2 /* tmp2 = va - vbase */ ;\
@@ -2195,7 +2261,9 @@ label/**/2: ;\
#define MAKE_HASHTAG(vapg, hatid, hmeshift, hashno, hblktag) \
sllx vapg, hmeshift, vapg ;\
- or vapg, hashno, hblktag
+ mov hashno, hblktag ;\
+ sllx hblktag, HTAG_REHASH_SHIFT, hblktag ;\
+ or vapg, hblktag, hblktag
/*
* Function to traverse hmeblk hash link list and find corresponding match.
@@ -2238,6 +2306,53 @@ label/**/1: ;\
ldxa [hmebp]ASI_MEM, hmeblkpa /* hmeblk ptr pa */ ;\
label/**/2:
+/*
+ * Function to traverse hmeblk hash link list and find corresponding match.
+ * The search is done using physical pointers. It returns the physical address
+ * and virtual address pointers to the hmeblk that matches with the tag
+ * provided.
+ * Parameters:
+ * hmeblktag = register with hmeblk tag match (rid field is 0)
+ * hatid = register with hatid (pointer to SRD)
+ * hmeblkpa = register where physical ptr will be stored
+ * hmeblkva = register where virtual ptr will be stored
+ * tmp1 = tmp reg
+ * tmp2 = tmp reg
+ * label: temporary label
+ */
+
+#define HMEHASH_SEARCH_SHME(hmeblktag, hatid, hmeblkpa, hmeblkva, \
+ tsbarea, tmp1, tmp2, label) \
+label/**/1: ;\
+ brz,pn hmeblkva, label/**/4 ;\
+ HAT_HLINK_DBSTAT(hatid, tsbarea, tmp1, tmp2) ;\
+ add hmeblkpa, HMEBLK_TAG, tmp2 ;\
+ ldxa [tmp2]ASI_MEM, tmp1 /* read 1st part of tag */ ;\
+ add tmp2, CLONGSIZE, tmp2 ;\
+ ldxa [tmp2]ASI_MEM, tmp2 /* read 2nd part of tag */ ;\
+ xor tmp1, hmeblktag, tmp1 ;\
+ xor tmp2, hatid, tmp2 ;\
+ brz,pn tmp2, label/**/3 /* branch on hit */ ;\
+ add hmeblkpa, HMEBLK_NEXT, tmp2 ;\
+label/**/2: ;\
+ ldna [tmp2]ASI_MEM, hmeblkva /* hmeblk ptr va */ ;\
+ add hmeblkpa, HMEBLK_NEXTPA, tmp2 ;\
+ ba,pt %xcc, label/**/1 ;\
+ ldxa [tmp2]ASI_MEM, hmeblkpa /* hmeblk ptr pa */ ;\
+label/**/3: ;\
+ cmp tmp1, SFMMU_MAX_HME_REGIONS ;\
+ bgeu,pt %xcc, label/**/2 ;\
+ add hmeblkpa, HMEBLK_NEXT, tmp2 ;\
+ and tmp1, BT_ULMASK, tmp2 ;\
+ srlx tmp1, BT_ULSHIFT, tmp1 ;\
+ sllx tmp1, CLONGSHIFT, tmp1 ;\
+ add tsbarea, tmp1, tmp1 ;\
+ ldx [tmp1 + TSBMISS_SHMERMAP], tmp1 ;\
+ srlx tmp1, tmp2, tmp1 ;\
+ btst 0x1, tmp1 ;\
+ bz,pn %xcc, label/**/2 ;\
+ add hmeblkpa, HMEBLK_NEXT, tmp2 ;\
+label/**/4:
#if ((1 << SFHME_SHIFT) != SFHME_SIZE)
#error HMEBLK_TO_HMENT assumes sf_hment is power of 2 in size
@@ -2247,16 +2362,19 @@ label/**/2:
* HMEBLK_TO_HMENT is a macro that given an hmeblk and a vaddr returns
* he offset for the corresponding hment.
* Parameters:
- * vaddr = register with virtual address
- * hmeblkpa = physical pointer to hme_blk
- * hment = register where address of hment will be stored
- * hmentoff = register where hment offset will be stored
- * label1 = temporary label
+ * In:
+ * vaddr = register with virtual address
+ * hmeblkpa = physical pointer to hme_blk
+ * Out:
+ * hmentoff = register where hment offset will be stored
+ * hmemisc = hblk_misc
+ * Scratch:
+ * tmp1
*/
-#define HMEBLK_TO_HMENT(vaddr, hmeblkpa, hmentoff, tmp1, label1) \
+#define HMEBLK_TO_HMENT(vaddr, hmeblkpa, hmentoff, hmemisc, tmp1, label1)\
add hmeblkpa, HMEBLK_MISC, hmentoff ;\
- lda [hmentoff]ASI_MEM, tmp1 ;\
- andcc tmp1, HBLK_SZMASK, %g0 /* tmp1 = get_hblk_sz(%g5) */ ;\
+ lda [hmentoff]ASI_MEM, hmemisc ;\
+ andcc hmemisc, HBLK_SZMASK, %g0 ;\
bnz,a,pn %icc, label1 /* if sz != TTE8K branch */ ;\
or %g0, HMEBLK_HME1, hmentoff ;\
srl vaddr, MMU_PAGESHIFT, tmp1 ;\
@@ -2274,26 +2392,23 @@ label1:
* hmeblkpa = PA of hment if found, otherwise clobbered (out)
* hmeblkva = VA of hment if found, otherwise clobbered (out)
* tsbarea = pointer to the tsbmiss area for this cpu. (in)
- * hmentoff = temporarily stores hment offset (clobbered)
+ * hmemisc = hblk_misc if TTE is found (out), otherwise clobbered
* hmeshift = constant/register to shift VA to obtain the virtual pfn
* for this page size.
* hashno = constant/register hash number
* label = temporary label for branching within macro.
* foundlabel = label to jump to when tte is found.
* suspendlabel= label to jump to when tte is suspended.
- * exitlabel = label to jump to when tte is not found. The hmebp lock
- * is still held at this time.
+ * exitlabel = label to jump to when tte is not found.
*
- * The caller should set up the tsbmiss->scratch[2] field correctly before
- * calling this funciton (aka TSBMISS_SCRATCH + TSBMISS_HATID)
*/
-#define GET_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, hmentoff, \
+#define GET_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, hmemisc, \
hmeshift, hashno, label, foundlabel, suspendlabel, exitlabel) \
;\
stn tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)] ;\
stn hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)] ;\
HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte, \
- hmeblkpa, label/**/5, hmentoff, hmeblkva) ;\
+ hmeblkpa, label/**/5, hmemisc, hmeblkva) ;\
;\
/* ;\
* tagacc = tagacc ;\
@@ -2301,21 +2416,22 @@ label1:
* tsbarea = tsbarea ;\
* tte = hmebp (hme bucket pointer) ;\
* hmeblkpa = vapg (virtual page) ;\
- * hmentoff, hmeblkva = scratch ;\
+ * hmemisc, hmeblkva = scratch ;\
*/ ;\
- MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmentoff) ;\
+ MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc) ;\
+ or hmemisc, SFMMU_INVALID_SHMERID, hmemisc ;\
;\
/* ;\
* tagacc = tagacc ;\
* hatid = hatid ;\
* tte = hmebp ;\
* hmeblkpa = CLOBBERED ;\
- * hmentoff = htag_bspage & hashno ;\
+ * hmemisc = htag_bspage+hashno+invalid_rid ;\
* hmeblkva = scratch ;\
*/ ;\
stn tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)] ;\
HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM) ;\
- HMEHASH_SEARCH(tte, hmentoff, hatid, hmeblkpa, hmeblkva, \
+ HMEHASH_SEARCH(tte, hmemisc, hatid, hmeblkpa, hmeblkva, \
tsbarea, tagacc, label/**/1) ;\
/* ;\
* tagacc = CLOBBERED ;\
@@ -2335,26 +2451,160 @@ label/**/4: ;\
* Now we calculate the corresponding tte. ;\
* ;\
* tagacc = tagacc ;\
- * hatid = clobbered ;\
+ * hatid = hatid ;\
+ * tte = clobbered ;\
+ * hmeblkpa = hmeblkpa ;\
+ * hmemisc = hblktag ;\
+ * hmeblkva = hmeblkva ;\
+ */ ;\
+ HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte, \
+ label/**/2) ;\
+ ;\
+ /* ;\
+ * tagacc = tagacc ;\
+ * hatid = hmentoff ;\
+ * tte = clobbered ;\
+ * hmeblkpa = hmeblkpa ;\
+ * hmemisc = hblk_misc ;\
+ * hmeblkva = hmeblkva ;\
+ */ ;\
+ ;\
+ add hatid, SFHME_TTE, hatid ;\
+ add hmeblkpa, hatid, hmeblkpa ;\
+ ldxa [hmeblkpa]ASI_MEM, tte /* MMU_READTTE through pa */ ;\
+ add hmeblkva, hatid, hmeblkva ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\
+ HMELOCK_EXIT(hatid, hatid, ASI_MEM) /* drop lock */ ;\
+ set TTE_SUSPEND, hatid ;\
+ TTE_SUSPEND_INT_SHIFT(hatid) ;\
+ btst tte, hatid ;\
+ bz,pt %xcc, foundlabel ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\
+ ;\
+ /* ;\
+ * Mapping is suspended, so goto suspend label. ;\
+ */ ;\
+ ba,pt %xcc, suspendlabel ;\
+ nop
+
+/*
+ * GET_SHME_TTE is similar to GET_TTE() except it searches
+ * shared hmeblks via HMEHASH_SEARCH_SHME() macro.
+ * If valid tte is found, hmemisc = shctx flag, i.e., shme is
+ * either 0 (not part of scd) or 1 (part of scd).
+ */
+#define GET_SHME_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, \
+ hmemisc, hmeshift, hashno, label, foundlabel, \
+ suspendlabel, exitlabel) \
+ ;\
+ stn tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)] ;\
+ stn hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)] ;\
+ HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte, \
+ hmeblkpa, label/**/5, hmemisc, hmeblkva) ;\
+ ;\
+ /* ;\
+ * tagacc = tagacc ;\
+ * hatid = hatid ;\
+ * tsbarea = tsbarea ;\
+ * tte = hmebp (hme bucket pointer) ;\
+ * hmeblkpa = vapg (virtual page) ;\
+ * hmemisc, hmeblkva = scratch ;\
+ */ ;\
+ MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc) ;\
+ ;\
+ /* ;\
+ * tagacc = tagacc ;\
+ * hatid = hatid ;\
+ * tsbarea = tsbarea ;\
* tte = hmebp ;\
+ * hmemisc = htag_bspage + hashno + 0 (for rid) ;\
+ * hmeblkpa = CLOBBERED ;\
+ * hmeblkva = scratch ;\
+ */ ;\
+ stn tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)] ;\
+ HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM) ;\
+ ;\
+ add tte, HMEBUCK_NEXTPA, hmeblkpa ;\
+ ldxa [hmeblkpa]ASI_MEM, hmeblkpa ;\
+ add tte, HMEBUCK_HBLK, hmeblkva ;\
+ ldxa [hmeblkva]ASI_MEM, hmeblkva ;\
+ HAT_HSEARCH_DBSTAT(hatid, tsbarea, tagacc, tte) ;\
+ ;\
+label/**/8: ;\
+ HMEHASH_SEARCH_SHME(hmemisc, hatid, hmeblkpa, hmeblkva, \
+ tsbarea, tagacc, tte, label/**/1) ;\
+ /* ;\
+ * tagacc = CLOBBERED ;\
+ * tte = CLOBBERED ;\
+ * hmeblkpa = hmeblkpa ;\
+ * hmeblkva = hmeblkva ;\
+ */ ;\
+ brnz,pt hmeblkva, label/**/4 /* branch if hmeblk found */ ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)], tagacc ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmeblkva ;\
+ HMELOCK_EXIT(hmeblkva, hmeblkva, ASI_MEM) /* drop lock */ ;\
+ ba,pt %xcc, exitlabel /* exit if hblk not found */ ;\
+ nop ;\
+label/**/4: ;\
+ /* ;\
+ * We have found the hmeblk containing the hment. ;\
+ * Now we calculate the corresponding tte. ;\
+ * ;\
+ * tagacc = tagacc ;\
+ * hatid = hatid ;\
+ * tte = clobbered ;\
* hmeblkpa = hmeblkpa ;\
- * hmentoff = hblktag ;\
+ * hmemisc = hblktag ;\
* hmeblkva = hmeblkva ;\
+ * tsbarea = tsbmiss area ;\
*/ ;\
- HMEBLK_TO_HMENT(tagacc, hmeblkpa, hmentoff, hatid, label/**/2) ;\
+ HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte, \
+ label/**/2) ;\
;\
- add hmentoff, SFHME_TTE, hmentoff ;\
- add hmeblkpa, hmentoff, hmeblkpa ;\
+ /* ;\
+ * tagacc = tagacc ;\
+ * hatid = hmentoff ;\
+ * tte = clobbered ;\
+ * hmeblkpa = hmeblkpa ;\
+ * hmemisc = hblk_misc ;\
+ * hmeblkva = hmeblkva ;\
+ * tsbarea = tsbmiss area ;\
+ */ ;\
+ ;\
+ add hatid, SFHME_TTE, hatid ;\
+ add hmeblkpa, hatid, hmeblkpa ;\
ldxa [hmeblkpa]ASI_MEM, tte /* MMU_READTTE through pa */ ;\
- add hmeblkva, hmentoff, hmeblkva ;\
+ brlz,pt tte, label/**/6 ;\
+ add hmeblkva, hatid, hmeblkva ;\
+ btst HBLK_SZMASK, hmemisc ;\
+ bnz,a,pt %icc, label/**/7 ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\
+ ;\
+ /* ;\
+ * We found an invalid 8K tte in shme. ;\
+ * it may not belong to shme's region since ;\
+ * region size/alignment granularity is 8K but different ;\
+ * regions don't share hmeblks. Continue the search. ;\
+ */ ;\
+ sub hmeblkpa, hatid, hmeblkpa ;\
ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\
- ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmentoff ;\
- HMELOCK_EXIT(hmentoff, hmentoff, ASI_MEM) /* drop lock */ ;\
- set TTE_SUSPEND, hmentoff ;\
- TTE_SUSPEND_INT_SHIFT(hmentoff) ;\
- btst tte, hmentoff ;\
+ srlx tagacc, hmeshift, tte ;\
+ add hmeblkpa, HMEBLK_NEXT, hmeblkva ;\
+ ldxa [hmeblkva]ASI_MEM, hmeblkva ;\
+ add hmeblkpa, HMEBLK_NEXTPA, hmeblkpa ;\
+ ldxa [hmeblkpa]ASI_MEM, hmeblkpa ;\
+ MAKE_HASHTAG(tte, hatid, hmeshift, hashno, hmemisc) ;\
+ ba,a,pt %xcc, label/**/8 ;\
+label/**/6: ;\
+ GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc) ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\
+label/**/7: ;\
+ HMELOCK_EXIT(hatid, hatid, ASI_MEM) /* drop lock */ ;\
+ set TTE_SUSPEND, hatid ;\
+ TTE_SUSPEND_INT_SHIFT(hatid) ;\
+ btst tte, hatid ;\
bz,pt %xcc, foundlabel ;\
- nop ;\
+ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\
;\
/* ;\
* Mapping is suspended, so goto suspend label. ;\
@@ -2393,8 +2643,8 @@ sfmmu_kprot_patch_ktsb4m_szcode:
GET_TSBE_POINTER(MMU_PAGESHIFT4M, %g3, %g7, %g6, %g5)
! %g3 = 4M tsb entry pointer, as TSB miss handler expects
- CPU_TSBMISS_AREA(%g6, %g7)
- HAT_PERCPU_STAT16(%g6, TSBMISS_KPROTS, %g7)
+ CPU_TSBMISS_AREA(%g6, %g7)
+ HAT_PERCPU_STAT16(%g6, TSBMISS_KPROTS, %g7)
ba,pt %xcc, sfmmu_tsb_miss_tt
nop
@@ -2412,8 +2662,8 @@ sfmmu_kprot_patch_ktsb4m_szcode:
/* %g1 = first TSB entry ptr now, %g2 preserved */
GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3) /* get 2nd utsbreg */
- brlz,pt %g3, 9f /* check for 2nd TSB */
- mov %g0, %g3 /* clear second tsbe ptr */
+ brlz,pt %g3, 9f /* check for 2nd TSB */
+ nop
GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
/* %g3 = second TSB entry ptr now, %g2 preserved */
@@ -2422,14 +2672,14 @@ sfmmu_kprot_patch_ktsb4m_szcode:
#ifdef UTSB_PHYS
/* g1 = first TSB entry ptr */
GET_2ND_TSBREG(%g3)
- brlz,a,pt %g3, 9f /* check for 2nd TSB */
- mov %g0, %g3 /* clear second tsbe ptr */
+ brlz,pt %g3, 9f /* check for 2nd TSB */
+ nop
GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
/* %g3 = second TSB entry ptr now, %g2 preserved */
#else /* UTSB_PHYS */
brgez,pt %g1, 9f /* check for 2nd TSB */
- mov %g0, %g3 /* clear second tsbe ptr */
+ mov -1, %g3 /* set second tsbe ptr to -1 */
mov %g2, %g7
GET_2ND_TSBE_PTR(%g7, %g1, %g3, %g4, %g5, sfmmu_uprot)
@@ -2452,7 +2702,7 @@ sfmmu_kprot_patch_ktsb4m_szcode:
* %g1 = 8K TSB pointer register (not used, clobbered)
* %g2 = tag access register (used)
* %g3 = faulting context id (used)
- * %g7 = 4M virtual page number for tag matching (used)
+ * %g7 = TSB tag to match (used)
*/
.align 64
ALTENTRY(sfmmu_kitlb_miss)
@@ -2472,8 +2722,27 @@ iktsb: sllx %g2, 64-(TAGACC_SHIFT + TSB_START_SIZE + RUNTIME_PATCH), %g1
or %g4, %g1, %g1 ! form tsb ptr
ldda [%g1]RUNTIME_PATCH, %g4 ! %g4 = tag, %g5 = data
cmp %g4, %g7
+ bne,pn %xcc, iktsb4mbase ! check 4m ktsb
+ srlx %g2, MMU_PAGESHIFT4M, %g3 ! use 4m virt-page as TSB index
+
+ andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit
+ bz,pn %icc, exec_fault
+ nop
+ TT_TRACE(trace_tsbhit) ! 2 instr traptrace
+ ITLB_STUFF(%g5, %g1, %g2, %g3, %g4)
+ retry
+
+iktsb4mbase:
+ RUNTIME_PATCH_SETX(%g4, %g6)
+ /* %g4 = contents of ktsb4m_base or ktsb4m_pbase */
+iktsb4m:
+ sllx %g3, 64-(TSB_START_SIZE + RUNTIME_PATCH), %g3
+ srlx %g3, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g3
+ add %g4, %g3, %g3 ! %g3 = 4m tsbe ptr
+ ldda [%g3]RUNTIME_PATCH, %g4 ! %g4 = tag, %g5 = data
+ cmp %g4, %g7
bne,pn %xcc, sfmmu_tsb_miss_tt ! branch on miss
- andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit
+ andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit
bz,pn %icc, exec_fault
nop
TT_TRACE(trace_tsbhit) ! 2 instr traptrace
@@ -2629,7 +2898,7 @@ dktsb4m_kpmcheck:
PROBE_1ST_ITSB(%g1, %g7, uitlb_fast_8k_probefail)
/* g4 - g5 = clobbered by PROBE_1ST_ITSB */
ba,pn %xcc, sfmmu_tsb_miss_tt
- mov %g0, %g3
+ mov -1, %g3
/*
* User data miss w/ single TSB.
@@ -2648,7 +2917,7 @@ dktsb4m_kpmcheck:
PROBE_1ST_DTSB(%g1, %g7, udtlb_fast_8k_probefail)
/* g4 - g5 = clobbered by PROBE_1ST_DTSB */
ba,pn %xcc, sfmmu_tsb_miss_tt
- mov %g0, %g3
+ mov -1, %g3
/*
* User instruction miss w/ multiple TSBs (sun4v).
@@ -2834,7 +3103,7 @@ udtlb_miss_probesecond:
*
* g1 = First TSB entry pointer
* g2 = tag access register
- * g3 = 4M TSB entry pointer; NULL if no 2nd TSB
+ * g3 = 4M TSB entry pointer; -1 if no 2nd TSB
* g4 - g7 = scratch registers
*/
@@ -2878,6 +3147,12 @@ udtlb_miss_probesecond:
be,pn %icc, tsb_tl0_noctxt /* no ctx miss exception */
stn %g7, [%g6 + (TSBMISS_SCRATCH + TSBMISS_HATID)]
+#ifdef sun4v
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g7 /* clear ctx1 flag set from */
+ andn %g7, HAT_CHKCTX1_FLAG, %g7 /* the previous tsb miss */
+ stub %g7, [%g6 + TSBMISS_URTTEFLAGS]
+#endif
+
ISM_CHECK(%g2, %g6, %g3, %g4, %g5, %g7, %g1, tsb_l1, tsb_ism)
/*
* The miss wasn't in an ISM segment.
@@ -2902,10 +3177,9 @@ udtlb_miss_probesecond:
/* NOT REACHED */
tsb_512K:
- ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3
- sllx %g3, TAGACC_CTX_LSHIFT, %g5
+ sllx %g2, TAGACC_CTX_LSHIFT, %g5
brz,pn %g5, 3f
- lduh [%g6 + TSBMISS_HATFLAGS], %g4
+ ldub [%g6 + TSBMISS_UTTEFLAGS], %g4
and %g4, HAT_512K_FLAG, %g5
/*
@@ -2932,10 +3206,9 @@ tsb_512K:
/* NOT REACHED */
tsb_4M:
- ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3
- sllx %g3, TAGACC_CTX_LSHIFT, %g5
+ sllx %g2, TAGACC_CTX_LSHIFT, %g5
brz,pn %g5, 4f
- lduh [%g6 + TSBMISS_HATFLAGS], %g4
+ ldub [%g6 + TSBMISS_UTTEFLAGS], %g4
and %g4, HAT_4M_FLAG, %g5
brz,pn %g5, tsb_32M
nop
@@ -2950,25 +3223,13 @@ tsb_4M:
/* NOT REACHED */
tsb_32M:
-#ifndef sun4v
- GET_CPU_IMPL(%g5)
- cmp %g5, OLYMPUS_C_IMPL
- be,pn %xcc, 0f
- nop
- cmp %g5, PANTHER_IMPL
- bne,pt %xcc, tsb_pagefault
- nop
-#endif
-
-0:
- ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3
- sllx %g3, TAGACC_CTX_LSHIFT, %g5
+ sllx %g2, TAGACC_CTX_LSHIFT, %g5
#ifdef sun4v
brz,pn %g5, 6f
#else
brz,pn %g5, tsb_pagefault
-#endif
- lduh [%g6 + TSBMISS_HATFLAGS], %g4
+#endif
+ ldub [%g6 + TSBMISS_UTTEFLAGS], %g4
and %g4, HAT_32M_FLAG, %g5
brz,pn %g5, tsb_256M
nop
@@ -2982,10 +3243,13 @@ tsb_32M:
sfmmu_suspend_tl, tsb_256M)
/* NOT REACHED */
+#ifdef sun4u
+#define tsb_shme tsb_pagefault
+#endif
tsb_256M:
- lduh [%g6 + TSBMISS_HATFLAGS], %g4
+ ldub [%g6 + TSBMISS_UTTEFLAGS], %g4
and %g4, HAT_256M_FLAG, %g5
- brz,pn %g5, tsb_pagefault
+ brz,pn %g5, tsb_shme
nop
6:
/*
@@ -2994,24 +3258,138 @@ tsb_256M:
GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
MMU_PAGESHIFT256M, TTE256M, tsb_l256M, tsb_checktte,
- sfmmu_suspend_tl, tsb_pagefault)
+ sfmmu_suspend_tl, tsb_shme)
/* NOT REACHED */
tsb_checktte:
/*
+ * g1 = hblk_misc
+ * g2 = tagacc
+ * g3 = tte
+ * g4 = tte pa
+ * g5 = tte va
+ * g6 = tsbmiss area
+ * g7 = hatid
+ */
+ brlz,a,pt %g3, tsb_validtte
+ rdpr %tt, %g7
+
+#ifdef sun4u
+#undef tsb_shme
+ ba tsb_pagefault
+ nop
+#else
+
+tsb_shme:
+ /*
+ * g2 = tagacc
+ * g6 = tsbmiss area
+ */
+ sllx %g2, TAGACC_CTX_LSHIFT, %g5
+ brz,pn %g5, tsb_pagefault
+ nop
+ ldx [%g6 + TSBMISS_SHARED_UHATID], %g7 /* g7 = srdp */
+ brz,pn %g7, tsb_pagefault
+ nop
+
+ GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
+ MMU_PAGESHIFT64K, TTE64K, tsb_shme_l8K, tsb_shme_checktte,
+ sfmmu_suspend_tl, tsb_shme_512K)
+ /* NOT REACHED */
+
+tsb_shme_512K:
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g4
+ and %g4, HAT_512K_FLAG, %g5
+ brz,pn %g5, tsb_shme_4M
+ nop
+
+ /*
+ * 512K hash
+ */
+
+ GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
+ MMU_PAGESHIFT512K, TTE512K, tsb_shme_l512K, tsb_shme_checktte,
+ sfmmu_suspend_tl, tsb_shme_4M)
+ /* NOT REACHED */
+
+tsb_shme_4M:
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g4
+ and %g4, HAT_4M_FLAG, %g5
+ brz,pn %g5, tsb_shme_32M
+ nop
+4:
+ /*
+ * 4M hash
+ */
+ GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
+ MMU_PAGESHIFT4M, TTE4M, tsb_shme_l4M, tsb_shme_checktte,
+ sfmmu_suspend_tl, tsb_shme_32M)
+ /* NOT REACHED */
+
+tsb_shme_32M:
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g4
+ and %g4, HAT_32M_FLAG, %g5
+ brz,pn %g5, tsb_shme_256M
+ nop
+
+ /*
+ * 32M hash
+ */
+
+ GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
+ MMU_PAGESHIFT32M, TTE32M, tsb_shme_l32M, tsb_shme_checktte,
+ sfmmu_suspend_tl, tsb_shme_256M)
+ /* NOT REACHED */
+
+tsb_shme_256M:
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g4
+ and %g4, HAT_256M_FLAG, %g5
+ brz,pn %g5, tsb_pagefault
+ nop
+
+ /*
+ * 256M hash
+ */
+
+ GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1,
+ MMU_PAGESHIFT256M, TTE256M, tsb_shme_l256M, tsb_shme_checktte,
+ sfmmu_suspend_tl, tsb_pagefault)
+ /* NOT REACHED */
+
+tsb_shme_checktte:
+
+ brgez,pn %g3, tsb_pagefault
+ rdpr %tt, %g7
+ /*
+ * g1 = ctx1 flag
* g3 = tte
* g4 = tte pa
* g5 = tte va
* g6 = tsbmiss area
+ * g7 = tt
*/
- brgez,pn %g3, tsb_pagefault /* if tte invalid branch */
+
+ brz,pt %g1, tsb_validtte
nop
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g1
+ or %g1, HAT_CHKCTX1_FLAG, %g1
+ stub %g1, [%g6 + TSBMISS_URTTEFLAGS]
+
+ SAVE_CTX1(%g7, %g2, %g1, tsb_shmel)
+#endif /* sun4u */
tsb_validtte:
/*
+ * g3 = tte
+ * g4 = tte pa
+ * g5 = tte va
+ * g6 = tsbmiss area
+ * g7 = tt
+ */
+
+ /*
* Set ref/mod bits if this is a prot trap. Usually, it isn't.
*/
- rdpr %tt, %g7
cmp %g7, FAST_PROT_TT
bne,pt %icc, 4f
nop
@@ -3021,6 +3399,10 @@ tsb_validtte:
rdpr %tt, %g5
GET_MMU_D_TTARGET(%g2, %g7) /* %g2 = ttarget */
+#ifdef sun4v
+ MMU_FAULT_STATUS_AREA(%g7)
+ ldx [%g7 + MMFSA_D_ADDR], %g5 /* save fault addr for later */
+#endif
ba,pt %xcc, tsb_update_tl1
nop
@@ -3065,6 +3447,7 @@ tsb_validtte:
ldx [%g2 + MMFSA_CTX_], %g7
sllx %g7, TTARGET_CTX_SHIFT, %g7
ldx [%g2 + MMFSA_ADDR_], %g2
+ mov %g2, %g5 ! save the fault addr for later use
srlx %g2, TTARGET_VA_SHIFT, %g2
or %g2, %g7, %g2
#else
@@ -3102,6 +3485,17 @@ tsb_user:
#endif /* sun4v */
tsb_user8k:
+#ifdef sun4v
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g7
+ and %g7, HAT_CHKCTX1_FLAG, %g1
+ brz,a,pn %g1, 1f
+ ldn [%g6 + TSBMISS_TSBPTR], %g1 ! g1 = first TSB ptr
+ GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR, %g1)
+ brlz,a,pn %g1, ptl1_panic ! if no shared tsb
+ mov PTL1_NO_SCDTSB8K, %g1 ! panic
+ GET_3RD_TSBE_PTR(%g5, %g1, %g6, %g7)
+1:
+#else
ldn [%g6 + TSBMISS_TSBPTR], %g1 ! g1 = first TSB ptr
#ifndef UTSB_PHYS
@@ -3109,9 +3503,12 @@ tsb_user8k:
mov %g7, %asi
#endif /* UTSB_PHYS */
+#endif /* sun4v */
+
TSB_UPDATE_TL(%g1, %g3, %g2, %g4, %g7, %g6, 5)
#ifdef sun4v
+ rdpr %tt, %g5
cmp %g5, T_INSTR_MMU_MISS
be,a,pn %xcc, 9f
mov %g3, %g5
@@ -3129,9 +3526,20 @@ tsb_user8k:
retry
tsb_user4m:
- ldn [%g6 + TSBMISS_TSBPTR4M], %g1 /* g1 = tsbp */
+#ifdef sun4v
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g7
+ and %g7, HAT_CHKCTX1_FLAG, %g1
+ brz,a,pn %g1, 4f
+ ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = TSB ptr
+ GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR4M, %g1)
+ brlz,a,pn %g1, 5f ! if no shared 2nd tsb
+ nop
+ GET_4TH_TSBE_PTR(%g5, %g1, %g6, %g7)
+#else
+ ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = TSB ptr
+#endif
4:
- brz,pn %g1, 5f /* Check to see if we have 2nd TSB programmed */
+ brlz,pn %g1, 5f /* Check to see if we have 2nd TSB programmed */
nop
#ifndef UTSB_PHYS
@@ -3143,6 +3551,7 @@ tsb_user4m:
5:
#ifdef sun4v
+ rdpr %tt, %g5
cmp %g5, T_INSTR_MMU_MISS
be,a,pn %xcc, 9f
mov %g3, %g5
@@ -3182,7 +3591,7 @@ tsb_user_pn_synth:
andcc %g3, TTE_EXECPRM_INT, %g0 /* is execprm bit set */
bz,pn %icc, 4b /* if not, been here before */
ldn [%g6 + TSBMISS_TSBPTR4M], %g1 /* g1 = tsbp */
- brz,a,pn %g1, 5f /* no 2nd tsb */
+ brlz,a,pn %g1, 5f /* no 2nd tsb */
mov %g3, %g5
mov MMU_TAG_ACCESS, %g7
@@ -3202,7 +3611,7 @@ tsb_user_itlb_synth:
mov MMU_TAG_ACCESS, %g7
ldxa [%g7]ASI_IMMU, %g6 /* get tag access va */
GET_4M_PFN_OFF(%g3, %g6, %g5, %g7, 2) /* make 4M pfn offset */
- brz,a,pn %g1, 7f /* Check to see if we have 2nd TSB programmed */
+ brlz,a,pn %g1, 7f /* Check to see if we have 2nd TSB programmed */
or %g5, %g3, %g5 /* add 4M bits to TTE */
mov ASI_N, %g7 /* user TSBs always accessed by VA */
@@ -3216,6 +3625,7 @@ tsb_user_itlb_synth:
tsb_kernel:
#ifdef sun4v
+ rdpr %tt, %g5
cmp %g7, TTE4M
bge,pn %icc, 5f
#else
@@ -3228,7 +3638,7 @@ tsb_kernel:
nop
5:
ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = 4m tsbptr
- brz,pn %g1, 3f /* skip programming if 4m TSB ptr is NULL */
+ brlz,pn %g1, 3f /* skip programming if 4m TSB ptr is -1 */
nop
6:
#ifndef sun4v
@@ -3270,26 +3680,38 @@ tsb_ism:
mov PTL1_BAD_ISM, %g1
/* g5 = pa of imap_vb_shift */
sub %g4, (IMAP_ISMHAT - IMAP_VB_SHIFT), %g5
- lduha [%g5]ASI_MEM, %g4 /* g4 = imap_vb_shift */
+ lduba [%g5]ASI_MEM, %g4 /* g4 = imap_vb_shift */
srlx %g3, %g4, %g3 /* clr size field */
- set TAGACC_CTX_MASK, %g1 /* mask off ctx number/type */
- sllx %g3, %g4, %g3 /* g3 = ism vbase */
- and %g2, %g1, %g4 /* g4 = ctx number */
- andn %g2, %g1, %g1 /* g1 = tlb miss vaddr */
- sub %g1, %g3, %g2 /* g2 = offset in ISM seg */
- or %g2, %g4, %g2 /* g2 = (pseudo-)tagacc */
-
+ set TAGACC_CTX_MASK, %g1 /* mask off ctx number */
+ sllx %g3, %g4, %g3 /* g3 = ism vbase */
+ and %g2, %g1, %g4 /* g4 = ctx number */
+ andn %g2, %g1, %g1 /* g1 = tlb miss vaddr */
+ sub %g1, %g3, %g2 /* g2 = offset in ISM seg */
+ or %g2, %g4, %g2 /* g2 = (pseudo-)tagacc */
+ sub %g5, (IMAP_VB_SHIFT - IMAP_HATFLAGS), %g5
+ lduha [%g5]ASI_MEM, %g4 /* g5 = pa of imap_hatflags */
+#ifdef sun4v
+ and %g4, HAT_CTX1_FLAG, %g5 /* g5 = imap_hatflags */
+ brz,pt %g5, tsb_chk4M_ism
+ nop
+ ldub [%g6 + TSBMISS_URTTEFLAGS], %g5
+ or %g5, HAT_CHKCTX1_FLAG, %g5
+ stub %g5, [%g6 + TSBMISS_URTTEFLAGS]
+ rdpr %tt, %g5
+ SAVE_CTX1(%g5, %g3, %g1, tsb_shctxl)
+#endif
/*
* ISM pages are always locked down.
* If we can't find the tte then pagefault
- * and let the spt segment driver resovle it.
+ * and let the spt segment driver resolve it.
*
- * g2 = ISM vaddr (offset in ISM seg)
+ * g2 = tagacc w/ISM vaddr (offset in ISM seg)
+ * g4 = imap_hatflags
* g6 = tsb miss area
* g7 = ISM hatid
*/
- sub %g5, (IMAP_VB_SHIFT - IMAP_HATFLAGS), %g5
- lduha [%g5]ASI_MEM, %g4 /* g5 = pa of imap_hatflags */
+
+tsb_chk4M_ism:
and %g4, HAT_4M_FLAG, %g5 /* g4 = imap_hatflags */
brnz,pt %g5, tsb_ism_4M /* branch if 4M pages */
nop
@@ -3309,8 +3731,8 @@ tsb_ism_32M:
/* NOT REACHED */
tsb_ism_32M_found:
- brlz,pt %g3, tsb_validtte
- nop
+ brlz,a,pt %g3, tsb_validtte
+ rdpr %tt, %g7
ba,pt %xcc, tsb_ism_4M
nop
@@ -3327,8 +3749,8 @@ tsb_ism_256M:
tsb_ism_4M)
tsb_ism_256M_found:
- brlz,pt %g3, tsb_validtte
- nop
+ brlz,a,pt %g3, tsb_validtte
+ rdpr %tt, %g7
tsb_ism_4M:
/*
@@ -3340,8 +3762,8 @@ tsb_ism_4M:
/* NOT REACHED */
tsb_ism_4M_found:
- brlz,pt %g3, tsb_validtte
- nop
+ brlz,a,pt %g3, tsb_validtte
+ rdpr %tt, %g7
tsb_ism_8K:
/*
@@ -3354,8 +3776,8 @@ tsb_ism_8K:
/* NOT REACHED */
tsb_ism_8K_found:
- brlz,pt %g3, tsb_validtte
- nop
+ brlz,a,pt %g3, tsb_validtte
+ rdpr %tt, %g7
tsb_pagefault:
rdpr %tt, %g7
@@ -3457,7 +3879,7 @@ tsb_protfault:
add %g1, %g2, %g1
lduh [%g1 + CPUC_DTRACE_FLAGS], %g2
andcc %g2, CPU_DTRACE_NOFAULT, %g0
- bz sfmmu_pagefault
+ bz sfmmu_mmu_trap
or %g2, CPU_DTRACE_BADADDR, %g2
stuh %g2, [%g1 + CPUC_DTRACE_FLAGS]
GET_MMU_D_ADDR(%g3, %g4)
@@ -3531,11 +3953,6 @@ tsb_protfault:
ba,pt %icc, sfmmu_window_trap
nop
SET_SIZE(sfmmu_tsb_miss)
-
-#if (1<< TSBMISS_SHIFT) != TSBMISS_SIZE
-#error - TSBMISS_SHIFT does not correspond to size of tsbmiss struct
-#endif
-
#endif /* lint */
#if defined (lint)
@@ -4442,8 +4859,8 @@ sfmmu_dslow_patch_ktsb4m_szcode:
/* %g1 = first TSB entry ptr now, %g2 preserved */
GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3) /* get 2nd utsbreg */
- brlz,a,pt %g3, sfmmu_tsb_miss_tt /* done if no 2nd TSB */
- mov %g0, %g3
+ brlz,pt %g3, sfmmu_tsb_miss_tt /* done if no 2nd TSB */
+ nop
GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5)
/* %g3 = second TSB entry ptr now, %g2 preserved */
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
index 9b110396b5..53c6392859 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -104,7 +104,7 @@ hash_done:
* {
* uint_t hmeshift = HME_HASH_SHIFT(rehash);
* uint64_t bspage = HME_HASH_BSPAGE(va, hmeshift);
- * return (rehash | (bspage << HTAG_REHASHSZ));
+ * return (rehash | (bspage << HTAG_BSPAGE_SHIFT));
* }
*/
@@ -127,8 +127,10 @@ bspage: /* TTE_PAGE_SHIFT in %g5 */ \
sllx %g6, %g5, %g5; \
\
/* BSPAGE in %g5 */ \
- sllx %g5, HTAG_REHASHSZ, %g5; \
- or %g5, %g3, %g5
+ sllx %g5, HTAG_BSPAGE_SHIFT, %g5; \
+ sllx %g3, HTAG_REHASH_SHIFT, %g6; \
+ or %g6, SFMMU_INVALID_SHMERID, %g6; \
+ or %g5, %g6, %g5
/*
* uint64_t
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
index 1a66900b63..eeb44a0dd5 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
@@ -84,6 +84,67 @@
#include <vm/xhat_sfmmu.h>
#include <sys/fpu/fpusystm.h>
#include <vm/mach_kpm.h>
+#include <sys/callb.h>
+
+#ifdef DEBUG
+#define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len) \
+ if (SFMMU_IS_SHMERID_VALID(rid)) { \
+ caddr_t _eaddr = (saddr) + (len); \
+ sf_srd_t *_srdp; \
+ sf_region_t *_rgnp; \
+ ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \
+ ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid)); \
+ ASSERT((hat) != ksfmmup); \
+ _srdp = (hat)->sfmmu_srdp; \
+ ASSERT(_srdp != NULL); \
+ ASSERT(_srdp->srd_refcnt != 0); \
+ _rgnp = _srdp->srd_hmergnp[(rid)]; \
+ ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid); \
+ ASSERT(_rgnp->rgn_refcnt != 0); \
+ ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \
+ ASSERT((_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == \
+ SFMMU_REGION_HME); \
+ ASSERT((saddr) >= _rgnp->rgn_saddr); \
+ ASSERT((saddr) < _rgnp->rgn_saddr + _rgnp->rgn_size); \
+ ASSERT(_eaddr > _rgnp->rgn_saddr); \
+ ASSERT(_eaddr <= _rgnp->rgn_saddr + _rgnp->rgn_size); \
+ }
+
+#define SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid) \
+{ \
+ caddr_t _hsva; \
+ caddr_t _heva; \
+ caddr_t _rsva; \
+ caddr_t _reva; \
+ int _ttesz = get_hblk_ttesz(hmeblkp); \
+ int _flagtte; \
+ ASSERT((srdp)->srd_refcnt != 0); \
+ ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \
+ ASSERT((rgnp)->rgn_id == rid); \
+ ASSERT(!((rgnp)->rgn_flags & SFMMU_REGION_FREE)); \
+ ASSERT(((rgnp)->rgn_flags & SFMMU_REGION_TYPE_MASK) == \
+ SFMMU_REGION_HME); \
+ ASSERT(_ttesz <= (rgnp)->rgn_pgszc); \
+ _hsva = (caddr_t)get_hblk_base(hmeblkp); \
+ _heva = get_hblk_endaddr(hmeblkp); \
+ _rsva = (caddr_t)P2ALIGN( \
+ (uintptr_t)(rgnp)->rgn_saddr, HBLK_MIN_BYTES); \
+ _reva = (caddr_t)P2ROUNDUP( \
+ (uintptr_t)((rgnp)->rgn_saddr + (rgnp)->rgn_size), \
+ HBLK_MIN_BYTES); \
+ ASSERT(_hsva >= _rsva); \
+ ASSERT(_hsva < _reva); \
+ ASSERT(_heva > _rsva); \
+ ASSERT(_heva <= _reva); \
+ _flagtte = (_ttesz < HBLK_MIN_TTESZ) ? HBLK_MIN_TTESZ : \
+ _ttesz; \
+ ASSERT(rgnp->rgn_hmeflags & (0x1 << _flagtte)); \
+}
+
+#else /* DEBUG */
+#define SFMMU_VALIDATE_HMERID(hat, rid, addr, len)
+#define SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid)
+#endif /* DEBUG */
#if defined(SF_ERRATA_57)
extern caddr_t errata57_limit;
@@ -166,6 +227,7 @@ static struct kmem_cache *mmuctxdom_cache;
static struct kmem_cache *sfmmu_tsbinfo_cache;
static struct kmem_cache *sfmmu_tsb8k_cache;
static struct kmem_cache *sfmmu_tsb_cache[NLGRPS_MAX];
+static vmem_t *kmem_bigtsb_arena;
static vmem_t *kmem_tsb_arena;
/*
@@ -185,11 +247,63 @@ static struct kmem_cache *ism_ment_cache;
#define ISMID_STARTADDR NULL
/*
- * Whether to delay TLB flushes and use Cheetah's flush-all support
- * when removing contexts from the dirty list.
+ * Region management data structures and function declarations.
*/
-int delay_tlb_flush;
-int disable_delay_tlb_flush;
+
+static void sfmmu_leave_srd(sfmmu_t *);
+static int sfmmu_srdcache_constructor(void *, void *, int);
+static void sfmmu_srdcache_destructor(void *, void *);
+static int sfmmu_rgncache_constructor(void *, void *, int);
+static void sfmmu_rgncache_destructor(void *, void *);
+static int sfrgnmap_isnull(sf_region_map_t *);
+static int sfhmergnmap_isnull(sf_hmeregion_map_t *);
+static int sfmmu_scdcache_constructor(void *, void *, int);
+static void sfmmu_scdcache_destructor(void *, void *);
+static void sfmmu_rgn_cb_noop(caddr_t, caddr_t, caddr_t,
+ size_t, void *, u_offset_t);
+
+static uint_t srd_hashmask = SFMMU_MAX_SRD_BUCKETS - 1;
+static sf_srd_bucket_t *srd_buckets;
+static struct kmem_cache *srd_cache;
+static uint_t srd_rgn_hashmask = SFMMU_MAX_REGION_BUCKETS - 1;
+static struct kmem_cache *region_cache;
+static struct kmem_cache *scd_cache;
+
+#ifdef sun4v
+int use_bigtsb_arena = 1;
+#else
+int use_bigtsb_arena = 0;
+#endif
+
+/* External /etc/system tunable, for turning on&off the shctx support */
+int disable_shctx = 0;
+/* Internal variable, set by MD if the HW supports shctx feature */
+int shctx_on = 0;
+
+#ifdef DEBUG
+static void check_scd_sfmmu_list(sfmmu_t **, sfmmu_t *, int);
+#endif
+static void sfmmu_to_scd_list(sfmmu_t **, sfmmu_t *);
+static void sfmmu_from_scd_list(sfmmu_t **, sfmmu_t *);
+
+static sf_scd_t *sfmmu_alloc_scd(sf_srd_t *, sf_region_map_t *);
+static void sfmmu_find_scd(sfmmu_t *);
+static void sfmmu_join_scd(sf_scd_t *, sfmmu_t *);
+static void sfmmu_finish_join_scd(sfmmu_t *);
+static void sfmmu_leave_scd(sfmmu_t *, uchar_t);
+static void sfmmu_destroy_scd(sf_srd_t *, sf_scd_t *, sf_region_map_t *);
+static int sfmmu_alloc_scd_tsbs(sf_srd_t *, sf_scd_t *);
+static void sfmmu_free_scd_tsbs(sfmmu_t *);
+static void sfmmu_tsb_inv_ctx(sfmmu_t *);
+static int find_ism_rid(sfmmu_t *, sfmmu_t *, caddr_t, uint_t *);
+static void sfmmu_ism_hatflags(sfmmu_t *, int);
+static int sfmmu_srd_lock_held(sf_srd_t *);
+static void sfmmu_remove_scd(sf_scd_t **, sf_scd_t *);
+static void sfmmu_add_scd(sf_scd_t **headp, sf_scd_t *);
+static void sfmmu_link_scd_to_regions(sf_srd_t *, sf_scd_t *);
+static void sfmmu_unlink_scd_from_regions(sf_srd_t *, sf_scd_t *);
+static void sfmmu_link_to_hmeregion(sfmmu_t *, sf_region_t *);
+static void sfmmu_unlink_from_hmeregion(sfmmu_t *, sf_region_t *);
/*
* ``hat_lock'' is a hashed mutex lock for protecting sfmmu TSB lists,
@@ -279,7 +393,8 @@ int hat_check_vtop = 0;
*/
static struct hme_blk *sfmmu_shadow_hcreate(sfmmu_t *, caddr_t, int, uint_t);
static struct hme_blk *sfmmu_hblk_alloc(sfmmu_t *, caddr_t,
- struct hmehash_bucket *, uint_t, hmeblk_tag, uint_t);
+ struct hmehash_bucket *, uint_t, hmeblk_tag, uint_t,
+ uint_t);
static caddr_t sfmmu_hblk_unload(struct hat *, struct hme_blk *, caddr_t,
caddr_t, demap_range_t *, uint_t);
static caddr_t sfmmu_hblk_sync(struct hat *, struct hme_blk *, caddr_t,
@@ -295,22 +410,27 @@ static int sfmmu_steal_this_hblk(struct hmehash_bucket *,
struct hme_blk *);
static caddr_t sfmmu_hblk_unlock(struct hme_blk *, caddr_t, caddr_t);
+static void hat_do_memload_array(struct hat *, caddr_t, size_t,
+ struct page **, uint_t, uint_t, uint_t);
+static void hat_do_memload(struct hat *, caddr_t, struct page *,
+ uint_t, uint_t, uint_t);
static void sfmmu_memload_batchsmall(struct hat *, caddr_t, page_t **,
- uint_t, uint_t, pgcnt_t);
+ uint_t, uint_t, pgcnt_t, uint_t);
void sfmmu_tteload(struct hat *, tte_t *, caddr_t, page_t *,
uint_t);
static int sfmmu_tteload_array(sfmmu_t *, tte_t *, caddr_t, page_t **,
- uint_t);
+ uint_t, uint_t);
static struct hmehash_bucket *sfmmu_tteload_acquire_hashbucket(sfmmu_t *,
- caddr_t, int);
+ caddr_t, int, uint_t);
static struct hme_blk *sfmmu_tteload_find_hmeblk(sfmmu_t *,
- struct hmehash_bucket *, caddr_t, uint_t, uint_t);
+ struct hmehash_bucket *, caddr_t, uint_t, uint_t,
+ uint_t);
static int sfmmu_tteload_addentry(sfmmu_t *, struct hme_blk *, tte_t *,
- caddr_t, page_t **, uint_t);
+ caddr_t, page_t **, uint_t, uint_t);
static void sfmmu_tteload_release_hashbucket(struct hmehash_bucket *);
static int sfmmu_pagearray_setup(caddr_t, page_t **, tte_t *, int);
-pfn_t sfmmu_uvatopfn(caddr_t, sfmmu_t *);
+static pfn_t sfmmu_uvatopfn(caddr_t, sfmmu_t *, tte_t *);
void sfmmu_memtte(tte_t *, pfn_t, uint_t, int);
#ifdef VAC
static void sfmmu_vac_conflict(struct hat *, caddr_t, page_t *);
@@ -322,7 +442,6 @@ void conv_tnc(page_t *pp, int);
static void sfmmu_get_ctx(sfmmu_t *);
static void sfmmu_free_sfmmu(sfmmu_t *);
-static void sfmmu_gettte(struct hat *, caddr_t, tte_t *);
static void sfmmu_ttesync(struct hat *, caddr_t, tte_t *, page_t *);
static void sfmmu_chgattr(struct hat *, caddr_t, size_t, uint_t, int);
@@ -334,6 +453,8 @@ void sfmmu_page_cache_array(page_t *, int, int, pgcnt_t);
static void sfmmu_page_cache(page_t *, int, int, int);
#endif
+cpuset_t sfmmu_rgntlb_demap(caddr_t, sf_region_t *,
+ struct hme_blk *, int);
static void sfmmu_tlbcache_demap(caddr_t, sfmmu_t *, struct hme_blk *,
pfn_t, int, int, int, int);
static void sfmmu_ismtlbcache_demap(caddr_t, sfmmu_t *, struct hme_blk *,
@@ -350,7 +471,7 @@ static void sfmmu_tsb_free(struct tsb_info *);
static void sfmmu_tsbinfo_free(struct tsb_info *);
static int sfmmu_init_tsbinfo(struct tsb_info *, int, int, uint_t,
sfmmu_t *);
-
+static void sfmmu_tsb_chk_reloc(sfmmu_t *, hatlock_t *);
static void sfmmu_tsb_swapin(sfmmu_t *, hatlock_t *);
static int sfmmu_select_tsb_szc(pgcnt_t);
static void sfmmu_mod_tsb(sfmmu_t *, caddr_t, tte_t *, int);
@@ -383,20 +504,24 @@ static void sfmmu_hblkcache_reclaim(void *);
static void sfmmu_shadow_hcleanup(sfmmu_t *, struct hme_blk *,
struct hmehash_bucket *);
static void sfmmu_free_hblks(sfmmu_t *, caddr_t, caddr_t, int);
+static void sfmmu_cleanup_rhblk(sf_srd_t *, caddr_t, uint_t, int);
+static void sfmmu_unload_hmeregion_va(sf_srd_t *, uint_t, caddr_t, caddr_t,
+ int, caddr_t *);
+static void sfmmu_unload_hmeregion(sf_srd_t *, sf_region_t *);
+
static void sfmmu_rm_large_mappings(page_t *, int);
static void hat_lock_init(void);
static void hat_kstat_init(void);
static int sfmmu_kstat_percpu_update(kstat_t *ksp, int rw);
+static void sfmmu_set_scd_rttecnt(sf_srd_t *, sf_scd_t *);
+static int sfmmu_is_rgnva(sf_srd_t *, caddr_t, ulong_t, ulong_t);
static void sfmmu_check_page_sizes(sfmmu_t *, int);
int fnd_mapping_sz(page_t *);
static void iment_add(struct ism_ment *, struct hat *);
static void iment_sub(struct ism_ment *, struct hat *);
static pgcnt_t ism_tsb_entries(sfmmu_t *, int szc);
extern void sfmmu_setup_tsbinfo(sfmmu_t *);
-#ifdef sun4v
-extern void sfmmu_invalidate_tsbinfo(sfmmu_t *);
-#endif /* sun4v */
extern void sfmmu_clear_utsbinfo(void);
static void sfmmu_ctx_wrap_around(mmu_ctx_t *);
@@ -466,6 +591,7 @@ caddr_t utsb4m_vabase; /* for trap handler TSB accesses */
#endif /* sun4v */
uint64_t tsb_alloc_bytes = 0; /* bytes allocated to TSBs */
vmem_t *kmem_tsb_default_arena[NLGRPS_MAX]; /* For dynamic TSBs */
+vmem_t *kmem_bigtsb_default_arena[NLGRPS_MAX]; /* dynamic 256M TSBs */
/*
* Size to use for TSB slabs. Future platforms that support page sizes
@@ -473,13 +599,24 @@ vmem_t *kmem_tsb_default_arena[NLGRPS_MAX]; /* For dynamic TSBs */
* assembly macros for building and decoding the TSB base register contents.
* Note disable_large_pages will override the value set here.
*/
-uint_t tsb_slab_ttesz = TTE4M;
-uint_t tsb_slab_size;
-uint_t tsb_slab_shift;
-uint_t tsb_slab_mask; /* PFN mask for TTE */
+static uint_t tsb_slab_ttesz = TTE4M;
+size_t tsb_slab_size = MMU_PAGESIZE4M;
+uint_t tsb_slab_shift = MMU_PAGESHIFT4M;
+/* PFN mask for TTE */
+size_t tsb_slab_mask = MMU_PAGEOFFSET4M >> MMU_PAGESHIFT;
+
+/*
+ * Size to use for TSB slabs. These are used only when 256M tsb arenas
+ * exist.
+ */
+static uint_t bigtsb_slab_ttesz = TTE256M;
+static size_t bigtsb_slab_size = MMU_PAGESIZE256M;
+static uint_t bigtsb_slab_shift = MMU_PAGESHIFT256M;
+/* 256M page alignment for 8K pfn */
+static size_t bigtsb_slab_mask = MMU_PAGEOFFSET256M >> MMU_PAGESHIFT;
/* largest TSB size to grow to, will be smaller on smaller memory systems */
-int tsb_max_growsize = UTSB_MAX_SZCODE;
+static int tsb_max_growsize = 0;
/*
* Tunable parameters dealing with TSB policies.
@@ -546,7 +683,12 @@ int tsb_remap_ttes = 1;
* assumed to have at least 8 available entries. Platforms with a
* larger fully-associative TLB could probably override the default.
*/
+
+#ifdef sun4v
+int tsb_sectsb_threshold = 0;
+#else
int tsb_sectsb_threshold = 8;
+#endif
/*
* kstat data
@@ -689,51 +831,44 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag)
* 2) TSBs can't grow larger than UTSB_MAX_SZCODE.
*/
#define SFMMU_SET_TSB_MAX_GROWSIZE(pages) { \
- int i, szc; \
+ int _i, _szc, _slabszc, _tsbszc; \
\
- i = highbit(pages); \
- if ((1 << (i - 1)) == (pages)) \
- i--; /* 2^n case, round down */ \
- szc = i - TSB_START_SIZE; \
- if (szc < tsb_max_growsize) \
- tsb_max_growsize = szc; \
- else if ((szc > tsb_max_growsize) && \
- (szc <= tsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT))) \
- tsb_max_growsize = MIN(szc, UTSB_MAX_SZCODE); \
+ _i = highbit(pages); \
+ if ((1 << (_i - 1)) == (pages)) \
+ _i--; /* 2^n case, round down */ \
+ _szc = _i - TSB_START_SIZE; \
+ _slabszc = bigtsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT); \
+ _tsbszc = MIN(_szc, _slabszc); \
+ tsb_max_growsize = MIN(_tsbszc, UTSB_MAX_SZCODE); \
}
/*
* Given a pointer to an sfmmu and a TTE size code, return a pointer to the
* tsb_info which handles that TTE size.
*/
-#define SFMMU_GET_TSBINFO(tsbinfop, sfmmup, tte_szc) \
+#define SFMMU_GET_TSBINFO(tsbinfop, sfmmup, tte_szc) { \
(tsbinfop) = (sfmmup)->sfmmu_tsb; \
- ASSERT(sfmmu_hat_lock_held(sfmmup)); \
- if ((tte_szc) >= TTE4M) \
- (tsbinfop) = (tsbinfop)->tsb_next;
-
-/*
- * Return the number of mappings present in the HAT
- * for a particular process and page size.
- */
-#define SFMMU_TTE_CNT(sfmmup, szc) \
- (sfmmup)->sfmmu_iblk? \
- (sfmmup)->sfmmu_ismttecnt[(szc)] + \
- (sfmmup)->sfmmu_ttecnt[(szc)] : \
- (sfmmup)->sfmmu_ttecnt[(szc)];
+ ASSERT(((tsbinfop)->tsb_flags & TSB_SHAREDCTX) || \
+ sfmmu_hat_lock_held(sfmmup)); \
+ if ((tte_szc) >= TTE4M) { \
+ ASSERT((tsbinfop) != NULL); \
+ (tsbinfop) = (tsbinfop)->tsb_next; \
+ } \
+}
/*
* Macro to use to unload entries from the TSB.
* It has knowledge of which page sizes get replicated in the TSB
* and will call the appropriate unload routine for the appropriate size.
*/
-#define SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp) \
+#define SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, ismhat) \
{ \
int ttesz = get_hblk_ttesz(hmeblkp); \
if (ttesz == TTE8K || ttesz == TTE4M) { \
sfmmu_unload_tsb(sfmmup, addr, ttesz); \
} else { \
- caddr_t sva = (caddr_t)get_hblk_base(hmeblkp); \
+ caddr_t sva = ismhat ? addr : \
+ (caddr_t)get_hblk_base(hmeblkp); \
caddr_t eva = sva + get_hblk_span(hmeblkp); \
ASSERT(addr >= sva && addr < eva); \
sfmmu_unload_tsb_range(sfmmup, sva, eva, ttesz); \
@@ -744,7 +879,7 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag)
/* Update tsb_alloc_hiwater after memory is configured. */
/*ARGSUSED*/
static void
-sfmmu_update_tsb_post_add(void *arg, pgcnt_t delta_pages)
+sfmmu_update_post_add(void *arg, pgcnt_t delta_pages)
{
/* Assumes physmem has already been updated. */
SFMMU_SET_TSB_ALLOC_HIWATER(physmem);
@@ -758,7 +893,7 @@ sfmmu_update_tsb_post_add(void *arg, pgcnt_t delta_pages)
*/
/*ARGSUSED*/
static int
-sfmmu_update_tsb_pre_del(void *arg, pgcnt_t delta_pages)
+sfmmu_update_pre_del(void *arg, pgcnt_t delta_pages)
{
return (0);
}
@@ -766,7 +901,7 @@ sfmmu_update_tsb_pre_del(void *arg, pgcnt_t delta_pages)
/* Update tsb_alloc_hiwater after memory fails to be unconfigured. */
/*ARGSUSED*/
static void
-sfmmu_update_tsb_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
+sfmmu_update_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
{
/*
* Whether the delete was cancelled or not, just go ahead and update
@@ -776,11 +911,11 @@ sfmmu_update_tsb_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
SFMMU_SET_TSB_MAX_GROWSIZE(physmem);
}
-static kphysm_setup_vector_t sfmmu_update_tsb_vec = {
+static kphysm_setup_vector_t sfmmu_update_vec = {
KPHYSM_SETUP_VECTOR_VERSION, /* version */
- sfmmu_update_tsb_post_add, /* post_add */
- sfmmu_update_tsb_pre_del, /* pre_del */
- sfmmu_update_tsb_post_del /* post_del */
+ sfmmu_update_post_add, /* post_add */
+ sfmmu_update_pre_del, /* pre_del */
+ sfmmu_update_post_del /* post_del */
};
@@ -936,7 +1071,6 @@ hat_init(void)
{
int i;
uint_t sz;
- uint_t maxtsb;
size_t size;
hat_lock_init();
@@ -1048,7 +1182,25 @@ hat_init(void)
}
SFMMU_SET_TSB_ALLOC_HIWATER(physmem);
- /* Set tsb_max_growsize. */
+ for (sz = tsb_slab_ttesz; sz > 0; sz--) {
+ if (!(disable_large_pages & (1 << sz)))
+ break;
+ }
+
+ if (sz < tsb_slab_ttesz) {
+ tsb_slab_ttesz = sz;
+ tsb_slab_shift = MMU_PAGESHIFT + (sz << 1) + sz;
+ tsb_slab_size = 1 << tsb_slab_shift;
+ tsb_slab_mask = (1 << (tsb_slab_shift - MMU_PAGESHIFT)) - 1;
+ use_bigtsb_arena = 0;
+ } else if (use_bigtsb_arena &&
+ (disable_large_pages & (1 << bigtsb_slab_ttesz))) {
+ use_bigtsb_arena = 0;
+ }
+
+ if (!use_bigtsb_arena) {
+ bigtsb_slab_shift = tsb_slab_shift;
+ }
SFMMU_SET_TSB_MAX_GROWSIZE(physmem);
/*
@@ -1059,28 +1211,28 @@ hat_init(void)
* The trap handlers need to be patched with the final slab shift,
* since they need to be able to construct the TSB pointer at runtime.
*/
- if (tsb_max_growsize <= TSB_512K_SZCODE)
+ if ((tsb_max_growsize <= TSB_512K_SZCODE) &&
+ !(disable_large_pages & (1 << TTE512K))) {
tsb_slab_ttesz = TTE512K;
-
- for (sz = tsb_slab_ttesz; sz > 0; sz--) {
- if (!(disable_large_pages & (1 << sz)))
- break;
+ tsb_slab_shift = MMU_PAGESHIFT512K;
+ tsb_slab_size = MMU_PAGESIZE512K;
+ tsb_slab_mask = MMU_PAGEOFFSET512K >> MMU_PAGESHIFT;
+ use_bigtsb_arena = 0;
}
- tsb_slab_ttesz = sz;
- tsb_slab_shift = MMU_PAGESHIFT + (sz << 1) + sz;
- tsb_slab_size = 1 << tsb_slab_shift;
- tsb_slab_mask = (1 << (tsb_slab_shift - MMU_PAGESHIFT)) - 1;
+ if (!use_bigtsb_arena) {
+ bigtsb_slab_ttesz = tsb_slab_ttesz;
+ bigtsb_slab_shift = tsb_slab_shift;
+ bigtsb_slab_size = tsb_slab_size;
+ bigtsb_slab_mask = tsb_slab_mask;
+ }
- maxtsb = tsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT);
- if (tsb_max_growsize > maxtsb)
- tsb_max_growsize = maxtsb;
/*
* Set up memory callback to update tsb_alloc_hiwater and
* tsb_max_growsize.
*/
- i = kphysm_setup_func_register(&sfmmu_update_tsb_vec, (void *) 0);
+ i = kphysm_setup_func_register(&sfmmu_update_vec, (void *) 0);
ASSERT(i == 0);
/*
@@ -1099,30 +1251,56 @@ hat_init(void)
* because vmem_create doesn't allow us to specify alignment
* requirements. If this ever changes the code could be
* simplified to use only one level of arenas.
+ *
+ * If 256M page support exists on sun4v, 256MB kmem_bigtsb_arena
+ * will be provided in addition to the 4M kmem_tsb_arena.
*/
+ if (use_bigtsb_arena) {
+ kmem_bigtsb_arena = vmem_create("kmem_bigtsb", NULL, 0,
+ bigtsb_slab_size, sfmmu_vmem_xalloc_aligned_wrapper,
+ vmem_xfree, heap_arena, 0, VM_SLEEP);
+ }
+
kmem_tsb_arena = vmem_create("kmem_tsb", NULL, 0, tsb_slab_size,
- sfmmu_vmem_xalloc_aligned_wrapper, vmem_xfree, heap_arena,
- 0, VM_SLEEP);
+ sfmmu_vmem_xalloc_aligned_wrapper,
+ vmem_xfree, heap_arena, 0, VM_SLEEP);
if (tsb_lgrp_affinity) {
char s[50];
for (i = 0; i < NLGRPS_MAX; i++) {
+ if (use_bigtsb_arena) {
+ (void) sprintf(s, "kmem_bigtsb_lgrp%d", i);
+ kmem_bigtsb_default_arena[i] = vmem_create(s,
+ NULL, 0, 2 * tsb_slab_size,
+ sfmmu_tsb_segkmem_alloc,
+ sfmmu_tsb_segkmem_free, kmem_bigtsb_arena,
+ 0, VM_SLEEP | VM_BESTFIT);
+ }
+
(void) sprintf(s, "kmem_tsb_lgrp%d", i);
- kmem_tsb_default_arena[i] =
- vmem_create(s, NULL, 0, PAGESIZE,
- sfmmu_tsb_segkmem_alloc, sfmmu_tsb_segkmem_free,
- kmem_tsb_arena, 0, VM_SLEEP | VM_BESTFIT);
+ kmem_tsb_default_arena[i] = vmem_create(s,
+ NULL, 0, PAGESIZE, sfmmu_tsb_segkmem_alloc,
+ sfmmu_tsb_segkmem_free, kmem_tsb_arena, 0,
+ VM_SLEEP | VM_BESTFIT);
+
(void) sprintf(s, "sfmmu_tsb_lgrp%d_cache", i);
- sfmmu_tsb_cache[i] = kmem_cache_create(s, PAGESIZE,
- PAGESIZE, NULL, NULL, NULL, NULL,
+ sfmmu_tsb_cache[i] = kmem_cache_create(s,
+ PAGESIZE, PAGESIZE, NULL, NULL, NULL, NULL,
kmem_tsb_default_arena[i], 0);
}
} else {
+ if (use_bigtsb_arena) {
+ kmem_bigtsb_default_arena[0] =
+ vmem_create("kmem_bigtsb_default", NULL, 0,
+ 2 * tsb_slab_size, sfmmu_tsb_segkmem_alloc,
+ sfmmu_tsb_segkmem_free, kmem_bigtsb_arena, 0,
+ VM_SLEEP | VM_BESTFIT);
+ }
+
kmem_tsb_default_arena[0] = vmem_create("kmem_tsb_default",
NULL, 0, PAGESIZE, sfmmu_tsb_segkmem_alloc,
sfmmu_tsb_segkmem_free, kmem_tsb_arena, 0,
VM_SLEEP | VM_BESTFIT);
-
sfmmu_tsb_cache[0] = kmem_cache_create("sfmmu_tsb_cache",
PAGESIZE, PAGESIZE, NULL, NULL, NULL, NULL,
kmem_tsb_default_arena[0], 0);
@@ -1203,6 +1381,26 @@ hat_init(void)
mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
+ srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
+ sizeof (srd_buckets[0]), KM_SLEEP);
+ for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) {
+ mutex_init(&srd_buckets[i].srdb_lock, NULL, MUTEX_DEFAULT,
+ NULL);
+ }
+ /*
+ * 64 byte alignment is required in order to isolate certain field
+ * into its own cacheline.
+ */
+ srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), 64,
+ sfmmu_srdcache_constructor, sfmmu_srdcache_destructor,
+ NULL, NULL, NULL, 0);
+ region_cache = kmem_cache_create("region_cache",
+ sizeof (sf_region_t), 0, sfmmu_rgncache_constructor,
+ sfmmu_rgncache_destructor, NULL, NULL, NULL, 0);
+ scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), 0,
+ sfmmu_scdcache_constructor, sfmmu_scdcache_destructor,
+ NULL, NULL, NULL, 0);
+
/*
* Pre-allocate hrm_hashtab before enabling the collection of
* refmod statistics. Allocating on the fly would mean us
@@ -1263,6 +1461,8 @@ hat_alloc(struct as *as)
sfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP);
sfmmup->sfmmu_as = as;
sfmmup->sfmmu_flags = 0;
+ sfmmup->sfmmu_tteflags = 0;
+ sfmmup->sfmmu_rtteflags = 0;
LOCK_INIT_CLEAR(&sfmmup->sfmmu_ctx_lock);
if (as == &kas) {
@@ -1303,7 +1503,7 @@ hat_alloc(struct as *as)
(void) sfmmu_tsbinfo_alloc(&sfmmup->sfmmu_tsb,
default_tsb_size,
TSB8K|TSB64K|TSB512K, 0, sfmmup);
- sfmmup->sfmmu_flags = HAT_SWAPPED;
+ sfmmup->sfmmu_flags = HAT_SWAPPED | HAT_ALLCTX_INVALID;
ASSERT(sfmmup->sfmmu_tsb != NULL);
}
@@ -1313,15 +1513,17 @@ hat_alloc(struct as *as)
sfmmup->sfmmu_ctxs[i].gnum = 0;
}
- sfmmu_setup_tsbinfo(sfmmup);
for (i = 0; i < max_mmu_page_sizes; i++) {
sfmmup->sfmmu_ttecnt[i] = 0;
+ sfmmup->sfmmu_scdrttecnt[i] = 0;
sfmmup->sfmmu_ismttecnt[i] = 0;
+ sfmmup->sfmmu_scdismttecnt[i] = 0;
sfmmup->sfmmu_pgsz[i] = TTE8K;
}
-
+ sfmmup->sfmmu_tsb0_4minflcnt = 0;
sfmmup->sfmmu_iblk = NULL;
sfmmup->sfmmu_ismhat = 0;
+ sfmmup->sfmmu_scdhat = 0;
sfmmup->sfmmu_ismblkpa = (uint64_t)-1;
if (sfmmup == ksfmmup) {
CPUSET_ALL(sfmmup->sfmmu_cpusran);
@@ -1333,6 +1535,12 @@ hat_alloc(struct as *as)
sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart;
sfmmup->sfmmu_xhat_provider = NULL;
cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL);
+ sfmmup->sfmmu_srdp = NULL;
+ SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map);
+ bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
+ sfmmup->sfmmu_scdp = NULL;
+ sfmmup->sfmmu_scd_link.next = NULL;
+ sfmmup->sfmmu_scd_link.prev = NULL;
return (sfmmup);
}
@@ -1531,11 +1739,11 @@ hat_setup(struct hat *sfmmup, int allocflag)
kpreempt_disable();
CPUSET_ADD(sfmmup->sfmmu_cpusran, CPU->cpu_id);
-
/*
* sfmmu_setctx_sec takes <pgsz|cnum> as a parameter,
* pagesize bits don't matter in this case since we are passing
* INVALID_CONTEXT to it.
+ * Compatibility Note: hw takes care of MMU_SCONTEXT1
*/
sfmmu_setctx_sec(INVALID_CONTEXT);
sfmmu_clear_utsbinfo();
@@ -1557,6 +1765,11 @@ hat_free_start(struct hat *sfmmup)
ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
sfmmup->sfmmu_free = 1;
+ if (sfmmup->sfmmu_scdp != NULL) {
+ sfmmu_leave_scd(sfmmup, 0);
+ }
+
+ ASSERT(sfmmup->sfmmu_scdp == NULL);
}
void
@@ -1565,20 +1778,13 @@ hat_free_end(struct hat *sfmmup)
int i;
ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
- if (sfmmup->sfmmu_ismhat) {
- for (i = 0; i < mmu_page_sizes; i++) {
- sfmmup->sfmmu_ttecnt[i] = 0;
- sfmmup->sfmmu_ismttecnt[i] = 0;
- }
- } else {
- /* EMPTY */
- ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
- ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
- ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
- ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
- ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
- ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
- }
+ ASSERT(sfmmup->sfmmu_free == 1);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
+ ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
if (sfmmup->sfmmu_rmstat) {
hat_freestat(sfmmup->sfmmu_as, NULL);
@@ -1589,8 +1795,26 @@ hat_free_end(struct hat *sfmmup)
sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb);
sfmmup->sfmmu_tsb = next;
}
+
+ if (sfmmup->sfmmu_srdp != NULL) {
+ sfmmu_leave_srd(sfmmup);
+ ASSERT(sfmmup->sfmmu_srdp == NULL);
+ for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
+ if (sfmmup->sfmmu_hmeregion_links[i] != NULL) {
+ kmem_free(sfmmup->sfmmu_hmeregion_links[i],
+ SFMMU_L2_HMERLINKS_SIZE);
+ sfmmup->sfmmu_hmeregion_links[i] = NULL;
+ }
+ }
+ }
sfmmu_free_sfmmu(sfmmup);
+#ifdef DEBUG
+ for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
+ ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL);
+ }
+#endif
+
kmem_cache_free(sfmmuid_cache, sfmmup);
}
@@ -1658,6 +1882,7 @@ hat_swapout(struct hat *sfmmup)
if ((hmeblkp->hblk_tag.htag_id == sfmmup) &&
!hmeblkp->hblk_shw_bit && !hmeblkp->hblk_lckcnt) {
+ ASSERT(!hmeblkp->hblk_shared);
(void) sfmmu_hblk_unload(sfmmup, hmeblkp,
(caddr_t)get_hblk_base(hmeblkp),
get_hblk_endaddr(hmeblkp),
@@ -1739,11 +1964,6 @@ hat_swapout(struct hat *sfmmup)
tsbinfop->tsb_tte.ll = 0;
}
-#ifdef sun4v
- if (freelist)
- sfmmu_invalidate_tsbinfo(sfmmup);
-#endif /* sun4v */
-
/* Now we can drop the lock and free the TSB memory. */
sfmmu_hat_exit(hatlockp);
for (; freelist != NULL; freelist = next) {
@@ -1760,14 +1980,61 @@ int
hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
uint_t flag)
{
+ sf_srd_t *srdp;
+ sf_scd_t *scdp;
+ int i;
extern uint_t get_color_start(struct as *);
ASSERT(hat->sfmmu_xhat_provider == NULL);
- ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW));
+ ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
+ (flag == HAT_DUP_SRD));
+ ASSERT(hat != ksfmmup);
+ ASSERT(newhat != ksfmmup);
+ ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
if (flag == HAT_DUP_COW) {
panic("hat_dup: HAT_DUP_COW not supported");
}
+
+ if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
+ ASSERT(srdp->srd_evp != NULL);
+ VN_HOLD(srdp->srd_evp);
+ ASSERT(srdp->srd_refcnt > 0);
+ newhat->sfmmu_srdp = srdp;
+ atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
+ }
+
+ /*
+ * HAT_DUP_ALL flag is used after as duplication is done.
+ */
+ if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) {
+ ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2);
+ newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags;
+ if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) {
+ newhat->sfmmu_flags |= HAT_4MTEXT_FLAG;
+ }
+
+ /* check if need to join scd */
+ if ((scdp = hat->sfmmu_scdp) != NULL &&
+ newhat->sfmmu_scdp != scdp) {
+ int ret;
+ SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map,
+ &scdp->scd_region_map, ret);
+ ASSERT(ret);
+ sfmmu_join_scd(scdp, newhat);
+ ASSERT(newhat->sfmmu_scdp == scdp &&
+ scdp->scd_refcnt >= 2);
+ for (i = 0; i < max_mmu_page_sizes; i++) {
+ newhat->sfmmu_ismttecnt[i] =
+ hat->sfmmu_ismttecnt[i];
+ newhat->sfmmu_scdismttecnt[i] =
+ hat->sfmmu_scdismttecnt[i];
+ }
+ }
+
+ sfmmu_check_page_sizes(newhat, 1);
+ }
+
if (flag == HAT_DUP_ALL && consistent_coloring == 0 &&
update_proc_pgcolorbase_after_fork != 0) {
hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as);
@@ -1775,14 +2042,38 @@ hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
return (0);
}
+void
+hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
+ uint_t attr, uint_t flags)
+{
+ hat_do_memload(hat, addr, pp, attr, flags,
+ SFMMU_INVALID_SHMERID);
+}
+
+void
+hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
+ uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
+{
+ uint_t rid;
+ if (rcookie == HAT_INVALID_REGION_COOKIE ||
+ hat->sfmmu_xhat_provider != NULL) {
+ hat_do_memload(hat, addr, pp, attr, flags,
+ SFMMU_INVALID_SHMERID);
+ return;
+ }
+ rid = (uint_t)((uint64_t)rcookie);
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ hat_do_memload(hat, addr, pp, attr, flags, rid);
+}
+
/*
* Set up addr to map to page pp with protection prot.
* As an optimization we also load the TSB with the
* corresponding tte but it is no big deal if the tte gets kicked out.
*/
-void
-hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
- uint_t attr, uint_t flags)
+static void
+hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp,
+ uint_t attr, uint_t flags, uint_t rid)
{
tte_t tte;
@@ -1792,6 +2083,7 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
+ SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
if (PP_ISFREE(pp)) {
panic("hat_memload: loading a mapping to free page %p",
@@ -1799,6 +2091,8 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
}
if (hat->sfmmu_xhat_provider) {
+ /* no regions for xhats */
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
XHAT_MEMLOAD(hat, addr, pp, attr, flags);
return;
}
@@ -1824,7 +2118,7 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
#endif
sfmmu_memtte(&tte, pp->p_pagenum, attr, TTE8K);
- (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags);
+ (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags, rid);
/*
* Check TSB and TLB page sizes.
@@ -1931,7 +2225,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
if (!use_lgpg) {
sfmmu_memtte(&tte, pfn, attr, TTE8K);
(void) sfmmu_tteload_array(hat, &tte, addr, &pp,
- flags);
+ flags, SFMMU_INVALID_SHMERID);
len -= MMU_PAGESIZE;
addr += MMU_PAGESIZE;
pfn++;
@@ -1947,7 +2241,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
!(mmu_ptob(pfn) & MMU_PAGEOFFSET4M)) {
sfmmu_memtte(&tte, pfn, attr, TTE4M);
(void) sfmmu_tteload_array(hat, &tte, addr, &pp,
- flags);
+ flags, SFMMU_INVALID_SHMERID);
len -= MMU_PAGESIZE4M;
addr += MMU_PAGESIZE4M;
pfn += MMU_PAGESIZE4M / MMU_PAGESIZE;
@@ -1957,7 +2251,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
!(mmu_ptob(pfn) & MMU_PAGEOFFSET512K)) {
sfmmu_memtte(&tte, pfn, attr, TTE512K);
(void) sfmmu_tteload_array(hat, &tte, addr, &pp,
- flags);
+ flags, SFMMU_INVALID_SHMERID);
len -= MMU_PAGESIZE512K;
addr += MMU_PAGESIZE512K;
pfn += MMU_PAGESIZE512K / MMU_PAGESIZE;
@@ -1967,14 +2261,14 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
!(mmu_ptob(pfn) & MMU_PAGEOFFSET64K)) {
sfmmu_memtte(&tte, pfn, attr, TTE64K);
(void) sfmmu_tteload_array(hat, &tte, addr, &pp,
- flags);
+ flags, SFMMU_INVALID_SHMERID);
len -= MMU_PAGESIZE64K;
addr += MMU_PAGESIZE64K;
pfn += MMU_PAGESIZE64K / MMU_PAGESIZE;
} else {
sfmmu_memtte(&tte, pfn, attr, TTE8K);
(void) sfmmu_tteload_array(hat, &tte, addr, &pp,
- flags);
+ flags, SFMMU_INVALID_SHMERID);
len -= MMU_PAGESIZE;
addr += MMU_PAGESIZE;
pfn++;
@@ -1989,6 +2283,31 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
}
}
+void
+hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
+ struct page **pps, uint_t attr, uint_t flags)
+{
+ hat_do_memload_array(hat, addr, len, pps, attr, flags,
+ SFMMU_INVALID_SHMERID);
+}
+
+void
+hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
+ struct page **pps, uint_t attr, uint_t flags,
+ hat_region_cookie_t rcookie)
+{
+ uint_t rid;
+ if (rcookie == HAT_INVALID_REGION_COOKIE ||
+ hat->sfmmu_xhat_provider != NULL) {
+ hat_do_memload_array(hat, addr, len, pps, attr, flags,
+ SFMMU_INVALID_SHMERID);
+ return;
+ }
+ rid = (uint_t)((uint64_t)rcookie);
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ hat_do_memload_array(hat, addr, len, pps, attr, flags, rid);
+}
+
/*
* Map the largest extend possible out of the page array. The array may NOT
* be in order. The largest possible mapping a page can have
@@ -2000,9 +2319,9 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
* should consist of properly aligned contigous pages that are
* part of a big page for a large mapping to be created.
*/
-void
-hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
- struct page **pps, uint_t attr, uint_t flags)
+static void
+hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len,
+ struct page **pps, uint_t attr, uint_t flags, uint_t rid)
{
int ttesz;
size_t mapsz;
@@ -2012,8 +2331,10 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
uint_t large_pages_disable;
ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
+ SFMMU_VALIDATE_HMERID(hat, rid, addr, len);
if (hat->sfmmu_xhat_provider) {
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
XHAT_MEMLOAD_ARRAY(hat, addr, len, pps, attr, flags);
return;
}
@@ -2041,7 +2362,8 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
}
if (npgs < NHMENTS || large_pages_disable == LARGE_PAGES_OFF) {
- sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs);
+ sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs,
+ rid);
return;
}
@@ -2074,7 +2396,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
sfmmu_memtte(&tte, (*pps)->p_pagenum,
attr, ttesz);
if (!sfmmu_tteload_array(hat, &tte, addr,
- pps, flags)) {
+ pps, flags, rid)) {
break;
}
}
@@ -2090,7 +2412,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
ASSERT(numpg <= npgs);
mapsz = numpg * MMU_PAGESIZE;
sfmmu_memload_batchsmall(hat, addr, pps, attr, flags,
- numpg);
+ numpg, rid);
}
addr += mapsz;
npgs -= numpg;
@@ -2098,7 +2420,8 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
}
if (npgs) {
- sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs);
+ sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs,
+ rid);
}
/*
@@ -2114,7 +2437,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
*/
static void
sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps,
- uint_t attr, uint_t flags, pgcnt_t npgs)
+ uint_t attr, uint_t flags, pgcnt_t npgs, uint_t rid)
{
tte_t tte;
page_t *pp;
@@ -2126,14 +2449,15 @@ sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps,
/*
* Acquire the hash bucket.
*/
- hmebp = sfmmu_tteload_acquire_hashbucket(hat, vaddr, TTE8K);
+ hmebp = sfmmu_tteload_acquire_hashbucket(hat, vaddr, TTE8K,
+ rid);
ASSERT(hmebp);
/*
* Find the hment block.
*/
hmeblkp = sfmmu_tteload_find_hmeblk(hat, hmebp, vaddr,
- TTE8K, flags);
+ TTE8K, flags, rid);
ASSERT(hmeblkp);
do {
@@ -2147,7 +2471,7 @@ sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps,
* Add the translation.
*/
(void) sfmmu_tteload_addentry(hat, hmeblkp, &tte,
- vaddr, pps, flags);
+ vaddr, pps, flags, rid);
/*
* Goto next page.
@@ -2223,12 +2547,17 @@ sfmmu_memtte(tte_t *ttep, pfn_t pfn, uint_t attr, int tte_sz)
* If a page structure is specified then it will add the
* corresponding hment to the mapping list.
* It will also update the hmenum field for the tte.
+ *
+ * Currently this function is only used for kernel mappings.
+ * So pass invalid region to sfmmu_tteload_array().
*/
void
sfmmu_tteload(struct hat *sfmmup, tte_t *ttep, caddr_t vaddr, page_t *pp,
uint_t flags)
{
- (void) sfmmu_tteload_array(sfmmup, ttep, vaddr, &pp, flags);
+ ASSERT(sfmmup == ksfmmup);
+ (void) sfmmu_tteload_array(sfmmup, ttep, vaddr, &pp, flags,
+ SFMMU_INVALID_SHMERID);
}
/*
@@ -2427,7 +2756,7 @@ sfmmu_select_tsb_szc(pgcnt_t pgcnt)
*/
static int
sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr,
- page_t **pps, uint_t flags)
+ page_t **pps, uint_t flags, uint_t rid)
{
struct hmehash_bucket *hmebp;
struct hme_blk *hmeblkp;
@@ -2443,19 +2772,21 @@ sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr,
/*
* Acquire the hash bucket.
*/
- hmebp = sfmmu_tteload_acquire_hashbucket(sfmmup, vaddr, size);
+ hmebp = sfmmu_tteload_acquire_hashbucket(sfmmup, vaddr, size, rid);
ASSERT(hmebp);
/*
* Find the hment block.
*/
- hmeblkp = sfmmu_tteload_find_hmeblk(sfmmup, hmebp, vaddr, size, flags);
+ hmeblkp = sfmmu_tteload_find_hmeblk(sfmmup, hmebp, vaddr, size, flags,
+ rid);
ASSERT(hmeblkp);
/*
* Add the translation.
*/
- ret = sfmmu_tteload_addentry(sfmmup, hmeblkp, ttep, vaddr, pps, flags);
+ ret = sfmmu_tteload_addentry(sfmmup, hmeblkp, ttep, vaddr, pps, flags,
+ rid);
/*
* Release the hash bucket.
@@ -2469,14 +2800,18 @@ sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr,
* Function locks and returns a pointer to the hash bucket for vaddr and size.
*/
static struct hmehash_bucket *
-sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size)
+sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size,
+ uint_t rid)
{
struct hmehash_bucket *hmebp;
int hmeshift;
+ void *htagid = sfmmutohtagid(sfmmup, rid);
+
+ ASSERT(htagid != NULL);
hmeshift = HME_HASH_SHIFT(size);
- hmebp = HME_HASH_FUNCTION(sfmmup, vaddr, hmeshift);
+ hmebp = HME_HASH_FUNCTION(htagid, vaddr, hmeshift);
SFMMU_HASH_LOCK(hmebp);
@@ -2490,7 +2825,7 @@ sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size)
*/
static struct hme_blk *
sfmmu_tteload_find_hmeblk(sfmmu_t *sfmmup, struct hmehash_bucket *hmebp,
- caddr_t vaddr, uint_t size, uint_t flags)
+ caddr_t vaddr, uint_t size, uint_t flags, uint_t rid)
{
hmeblk_tag hblktag;
int hmeshift;
@@ -2499,10 +2834,14 @@ sfmmu_tteload_find_hmeblk(sfmmu_t *sfmmup, struct hmehash_bucket *hmebp,
struct kmem_cache *sfmmu_cache;
uint_t forcefree;
- hblktag.htag_id = sfmmup;
+ SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size));
+
+ hblktag.htag_id = sfmmutohtagid(sfmmup, rid);
+ ASSERT(hblktag.htag_id != NULL);
hmeshift = HME_HASH_SHIFT(size);
hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift);
hblktag.htag_rehash = HME_HASH_REHASH(size);
+ hblktag.htag_rid = rid;
ttearray_realloc:
@@ -2526,7 +2865,9 @@ ttearray_realloc:
if (hmeblkp == NULL) {
hmeblkp = sfmmu_hblk_alloc(sfmmup, vaddr, hmebp, size,
- hblktag, flags);
+ hblktag, flags, rid);
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared);
} else {
/*
* It is possible for 8k and 64k hblks to collide since they
@@ -2546,6 +2887,7 @@ ttearray_realloc:
* if the hblk was previously used as a shadow hblk then
* we will change it to a normal hblk
*/
+ ASSERT(!hmeblkp->hblk_shared);
if (hmeblkp->hblk_shw_mask) {
sfmmu_shadow_hcleanup(sfmmup, hmeblkp, hmebp);
ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
@@ -2577,6 +2919,9 @@ ttearray_realloc:
ASSERT(get_hblk_ttesz(hmeblkp) == size);
ASSERT(!hmeblkp->hblk_shw_bit);
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared);
+ ASSERT(hmeblkp->hblk_tag.htag_rid == rid);
return (hmeblkp);
}
@@ -2587,7 +2932,7 @@ ttearray_realloc:
*/
static int
sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
- caddr_t vaddr, page_t **pps, uint_t flags)
+ caddr_t vaddr, page_t **pps, uint_t flags, uint_t rid)
{
page_t *pp = *pps;
int hmenum, size, remap;
@@ -2598,6 +2943,7 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
struct sf_hment *sfhme;
kmutex_t *pml, *pmtx;
hatlock_t *hatlockp;
+ int myflt;
/*
* remove this panic when we decide to let user virtual address
@@ -2651,6 +2997,9 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
}
ASSERT(!((uintptr_t)vaddr & TTE_PAGE_OFFSET(size)));
+ SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size));
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared);
HBLKTOHME_IDX(sfhme, hmeblkp, vaddr, hmenum);
@@ -2732,11 +3081,11 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
ASSERT((!remap) ? sfhme->hme_next == NULL : 1);
if (flags & HAT_LOAD_LOCK) {
- if (((int)hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
+ if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) {
panic("too high lckcnt-hmeblk %p",
(void *)hmeblkp);
}
- atomic_add_16(&hmeblkp->hblk_lckcnt, 1);
+ atomic_add_32(&hmeblkp->hblk_lckcnt, 1);
HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK);
}
@@ -2767,59 +3116,70 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
chk_tte(&orig_old, &tteold, ttep, hmeblkp);
#endif /* DEBUG */
}
+ ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
if (!TTE_IS_VALID(&tteold)) {
atomic_add_16(&hmeblkp->hblk_vcnt, 1);
- atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1);
-
- /*
- * HAT_RELOAD_SHARE has been deprecated with lpg DISM.
- */
-
- if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
- sfmmup != ksfmmup) {
+ if (rid == SFMMU_INVALID_SHMERID) {
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1);
+ } else {
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ sf_region_t *rgnp = srdp->srd_hmergnp[rid];
/*
- * If this is the first large mapping for the process
- * we must force any CPUs running this process to TL=0
- * where they will reload the HAT flags from the
- * tsbmiss area. This is necessary to make the large
- * mappings we are about to load visible to those CPUs;
- * otherwise they'll loop forever calling pagefault()
- * since we don't search large hash chains by default.
+ * We already accounted for region ttecnt's in sfmmu
+ * during hat_join_region() processing. Here we
+ * only update ttecnt's in region struture.
*/
+ atomic_add_long(&rgnp->rgn_ttecnt[size], 1);
+ }
+ }
+
+ myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup);
+ if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 &&
+ sfmmup != ksfmmup) {
+ uchar_t tteflag = 1 << size;
+ if (rid == SFMMU_INVALID_SHMERID) {
+ if (!(sfmmup->sfmmu_tteflags & tteflag)) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ sfmmup->sfmmu_tteflags |= tteflag;
+ sfmmu_hat_exit(hatlockp);
+ }
+ } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
hatlockp = sfmmu_hat_enter(sfmmup);
- if (size == TTE512K &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_512K_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_512K_FLAG);
- sfmmu_sync_mmustate(sfmmup);
- } else if (size == TTE4M &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
- } else if (size == TTE64K &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_64K_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_64K_FLAG);
- /* no sync mmustate; 64K shares 8K hashes */
- } else if (mmu_page_sizes == max_mmu_page_sizes) {
- if (size == TTE32M &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_32M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
- } else if (size == TTE256M &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_256M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
+ sfmmup->sfmmu_rtteflags |= tteflag;
+ sfmmu_hat_exit(hatlockp);
+ }
+ /*
+ * Update the current CPU tsbmiss area, so the current thread
+ * won't need to take the tsbmiss for the new pagesize.
+ * The other threads in the process will update their tsb
+ * miss area lazily in sfmmu_tsbmiss_exception() when they
+ * fail to find the translation for a newly added pagesize.
+ */
+ if (size > TTE64K && myflt) {
+ struct tsbmiss *tsbmp;
+ kpreempt_disable();
+ tsbmp = &tsbmiss_area[CPU->cpu_id];
+ if (rid == SFMMU_INVALID_SHMERID) {
+ if (!(tsbmp->uhat_tteflags & tteflag)) {
+ tsbmp->uhat_tteflags |= tteflag;
+ }
+ } else {
+ if (!(tsbmp->uhat_rtteflags & tteflag)) {
+ tsbmp->uhat_rtteflags |= tteflag;
}
}
- if (size >= TTE4M && (flags & HAT_LOAD_TEXT) &&
- !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG);
- }
- sfmmu_hat_exit(hatlockp);
+ kpreempt_enable();
}
}
- ASSERT(TTE_IS_VALID(&sfhme->hme_tte));
+
+ if (size >= TTE4M && (flags & HAT_LOAD_TEXT) &&
+ !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG);
+ sfmmu_hat_exit(hatlockp);
+ }
flush_tte.tte_intlo = (tteold.tte_intlo ^ ttep->tte_intlo) &
hw_tte.tte_intlo;
@@ -2837,8 +3197,21 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
if (TTE_IS_MOD(&tteold)) {
sfmmu_ttesync(sfmmup, vaddr, &tteold, pp);
}
- sfmmu_tlb_demap(vaddr, sfmmup, hmeblkp, 0, 0);
- xt_sync(sfmmup->sfmmu_cpusran);
+ /*
+ * hwtte bits shouldn't change for SRD hmeblks as long as SRD
+ * hmes are only used for read only text. Adding this code for
+ * completeness and future use of shared hmeblks with writable
+ * mappings of VMODSORT vnodes.
+ */
+ if (hmeblkp->hblk_shared) {
+ cpuset_t cpuset = sfmmu_rgntlb_demap(vaddr,
+ sfmmup->sfmmu_srdp->srd_hmergnp[rid], hmeblkp, 1);
+ xt_sync(cpuset);
+ SFMMU_STAT_ADD(sf_region_remap_demap, 1);
+ } else {
+ sfmmu_tlb_demap(vaddr, sfmmup, hmeblkp, 0, 0);
+ xt_sync(sfmmup->sfmmu_cpusran);
+ }
}
if ((flags & SFMMU_NO_TSBLOAD) == 0) {
@@ -2848,8 +3221,18 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
* have a single, unique TSB entry. Ditto for 32M/256M.
*/
if (size == TTE8K || size == TTE4M) {
+ sf_scd_t *scdp;
hatlockp = sfmmu_hat_enter(sfmmup);
- sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte, size);
+ /*
+ * Don't preload private TSB if the mapping is used
+ * by the shctx in the SCD.
+ */
+ scdp = sfmmup->sfmmu_scdp;
+ if (rid == SFMMU_INVALID_SHMERID || scdp == NULL ||
+ !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
+ sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte,
+ size);
+ }
sfmmu_hat_exit(hatlockp);
}
}
@@ -3119,6 +3502,7 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags)
hmeshift = HME_HASH_SHIFT(size);
hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift);
hblktag.htag_rehash = HME_HASH_REHASH(size);
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
hmebp = HME_HASH_FUNCTION(sfmmup, vaddr, hmeshift);
SFMMU_HASH_LOCK(hmebp);
@@ -3127,7 +3511,7 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags)
ASSERT(hmeblkp != (struct hme_blk *)hblk_reserve);
if (hmeblkp == NULL) {
hmeblkp = sfmmu_hblk_alloc(sfmmup, vaddr, hmebp, size,
- hblktag, flags);
+ hblktag, flags, SFMMU_INVALID_SHMERID);
}
ASSERT(hmeblkp);
if (!hmeblkp->hblk_shw_mask) {
@@ -3142,7 +3526,8 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags)
panic("sfmmu_shadow_hcreate: shw bit not set in hmeblkp 0x%p",
(void *)hmeblkp);
}
-
+ ASSERT(hmeblkp->hblk_shw_bit == 1);
+ ASSERT(!hmeblkp->hblk_shared);
vshift = vaddr_to_vshift(hblktag, vaddr, size);
ASSERT(vshift < 8);
/*
@@ -3177,6 +3562,7 @@ sfmmu_shadow_hcleanup(sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
int hashno, size;
ASSERT(hmeblkp->hblk_shw_bit);
+ ASSERT(!hmeblkp->hblk_shared);
ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
@@ -3210,6 +3596,7 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr,
ASSERT(hashno > 0);
hblktag.htag_id = sfmmup;
hblktag.htag_rehash = hashno;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
hmeshift = HME_HASH_SHIFT(hashno);
@@ -3226,6 +3613,7 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr,
ASSERT(hblkpa == va_to_pa((caddr_t)hmeblkp));
if (HTAGS_EQ(hmeblkp->hblk_tag, hblktag)) {
/* found hme_blk */
+ ASSERT(!hmeblkp->hblk_shared);
if (hmeblkp->hblk_shw_bit) {
if (hmeblkp->hblk_shw_mask) {
shadow = 1;
@@ -3279,6 +3667,174 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr,
}
/*
+ * This routine's job is to delete stale invalid shared hmeregions hmeblks that
+ * may still linger on after pageunload.
+ */
+static void
+sfmmu_cleanup_rhblk(sf_srd_t *srdp, caddr_t addr, uint_t rid, int ttesz)
+{
+ int hmeshift;
+ hmeblk_tag hblktag;
+ struct hmehash_bucket *hmebp;
+ struct hme_blk *hmeblkp;
+ struct hme_blk *pr_hblk;
+ struct hme_blk *list = NULL;
+ uint64_t hblkpa, prevpa;
+
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+
+ hmeshift = HME_HASH_SHIFT(ttesz);
+ hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
+ hblktag.htag_rehash = ttesz;
+ hblktag.htag_rid = rid;
+ hblktag.htag_id = srdp;
+ hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift);
+
+ SFMMU_HASH_LOCK(hmebp);
+ HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, pr_hblk,
+ prevpa, &list);
+ if (hmeblkp != NULL) {
+ ASSERT(hmeblkp->hblk_shared);
+ ASSERT(!hmeblkp->hblk_shw_bit);
+ if (hmeblkp->hblk_vcnt || hmeblkp->hblk_hmecnt) {
+ panic("sfmmu_cleanup_rhblk: valid hmeblk");
+ }
+ ASSERT(!hmeblkp->hblk_lckcnt);
+ sfmmu_hblk_hash_rm(hmebp, hmeblkp, prevpa, pr_hblk);
+ sfmmu_hblk_free(hmebp, hmeblkp, hblkpa, &list);
+ }
+ SFMMU_HASH_UNLOCK(hmebp);
+ sfmmu_hblks_list_purge(&list);
+}
+
+/* ARGSUSED */
+static void
+sfmmu_rgn_cb_noop(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr,
+ size_t r_size, void *r_obj, u_offset_t r_objoff)
+{
+}
+
+/*
+ * update *eaddrp only if hmeblk was unloaded.
+ */
+static void
+sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr,
+ caddr_t eaddr, int ttesz, caddr_t *eaddrp)
+{
+ int hmeshift;
+ hmeblk_tag hblktag;
+ struct hmehash_bucket *hmebp;
+ struct hme_blk *hmeblkp;
+ struct hme_blk *pr_hblk;
+ struct hme_blk *list = NULL;
+ uint64_t hblkpa, prevpa;
+
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(ttesz >= HBLK_MIN_TTESZ);
+
+ hmeshift = HME_HASH_SHIFT(ttesz);
+ hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
+ hblktag.htag_rehash = ttesz;
+ hblktag.htag_rid = rid;
+ hblktag.htag_id = srdp;
+ hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift);
+
+ SFMMU_HASH_LOCK(hmebp);
+ HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, pr_hblk,
+ prevpa, &list);
+ if (hmeblkp != NULL) {
+ ASSERT(hmeblkp->hblk_shared);
+ ASSERT(!hmeblkp->hblk_lckcnt);
+ if (hmeblkp->hblk_vcnt || hmeblkp->hblk_hmecnt) {
+ *eaddrp = sfmmu_hblk_unload(NULL, hmeblkp, addr,
+ eaddr, NULL, HAT_UNLOAD);
+ ASSERT(*eaddrp > addr);
+ }
+ ASSERT(!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt);
+ sfmmu_hblk_hash_rm(hmebp, hmeblkp, prevpa, pr_hblk);
+ sfmmu_hblk_free(hmebp, hmeblkp, hblkpa, &list);
+ }
+ SFMMU_HASH_UNLOCK(hmebp);
+ sfmmu_hblks_list_purge(&list);
+}
+
+/*
+ * This routine can be optimized to eliminate scanning areas of smaller page
+ * size bitmaps when a corresponding bit is set in the bitmap for a bigger
+ * page size. For now assume the region will usually only have the primary
+ * size mappings so we'll scan only one bitmap anyway by checking rgn_hmeflags
+ * first.
+ */
+static void
+sfmmu_unload_hmeregion(sf_srd_t *srdp, sf_region_t *rgnp)
+{
+ int ttesz = rgnp->rgn_pgszc;
+ size_t rsz = rgnp->rgn_size;
+ caddr_t rsaddr = rgnp->rgn_saddr;
+ caddr_t readdr = rsaddr + rsz;
+ caddr_t rhsaddr;
+ caddr_t va;
+ uint_t rid = rgnp->rgn_id;
+ caddr_t cbsaddr;
+ caddr_t cbeaddr;
+ hat_rgn_cb_func_t rcbfunc;
+ ulong_t cnt;
+
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+
+ ASSERT(IS_P2ALIGNED(rsaddr, TTEBYTES(ttesz)));
+ ASSERT(IS_P2ALIGNED(rsz, TTEBYTES(ttesz)));
+ if (ttesz < HBLK_MIN_TTESZ) {
+ ttesz = HBLK_MIN_TTESZ;
+ rhsaddr = (caddr_t)P2ALIGN((uintptr_t)rsaddr, HBLK_MIN_BYTES);
+ } else {
+ rhsaddr = rsaddr;
+ }
+
+ if ((rcbfunc = rgnp->rgn_cb_function) == NULL) {
+ rcbfunc = sfmmu_rgn_cb_noop;
+ }
+
+ while (ttesz >= HBLK_MIN_TTESZ) {
+ cbsaddr = rsaddr;
+ cbeaddr = rsaddr;
+ if (!(rgnp->rgn_hmeflags & (1 << ttesz))) {
+ ttesz--;
+ continue;
+ }
+ cnt = 0;
+ va = rsaddr;
+ while (va < readdr) {
+ ASSERT(va >= rhsaddr);
+ if (va != cbeaddr) {
+ if (cbeaddr != cbsaddr) {
+ ASSERT(cbeaddr > cbsaddr);
+ (*rcbfunc)(cbsaddr, cbeaddr,
+ rsaddr, rsz, rgnp->rgn_obj,
+ rgnp->rgn_objoff);
+ }
+ cbsaddr = va;
+ cbeaddr = va;
+ }
+ sfmmu_unload_hmeregion_va(srdp, rid, va, readdr,
+ ttesz, &cbeaddr);
+ cnt++;
+ va = rhsaddr + (cnt << TTE_PAGE_SHIFT(ttesz));
+ }
+ if (cbeaddr != cbsaddr) {
+ ASSERT(cbeaddr > cbsaddr);
+ (*rcbfunc)(cbsaddr, cbeaddr, rsaddr,
+ rsz, rgnp->rgn_obj,
+ rgnp->rgn_objoff);
+ }
+ ttesz--;
+ }
+}
+
+/*
* Release one hardware address translation lock on the given address range.
*/
void
@@ -3298,6 +3854,7 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
ASSERT((len & MMU_PAGEOFFSET) == 0);
endaddr = addr + len;
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
/*
* Spitfire supports 4 page sizes.
@@ -3316,6 +3873,7 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list);
if (hmeblkp != NULL) {
+ ASSERT(!hmeblkp->hblk_shared);
/*
* If we encounter a shadow hmeblk then
* we know there are no valid hmeblks mapping
@@ -3351,6 +3909,87 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
sfmmu_hblks_list_purge(&list);
}
+void
+hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len,
+ hat_region_cookie_t rcookie)
+{
+ sf_srd_t *srdp;
+ sf_region_t *rgnp;
+ int ttesz;
+ uint_t rid;
+ caddr_t eaddr;
+ caddr_t va;
+ int hmeshift;
+ hmeblk_tag hblktag;
+ struct hmehash_bucket *hmebp;
+ struct hme_blk *hmeblkp;
+ struct hme_blk *pr_hblk;
+ struct hme_blk *list;
+ uint64_t hblkpa, prevpa;
+
+ if (rcookie == HAT_INVALID_REGION_COOKIE) {
+ hat_unlock(sfmmup, addr, len);
+ return;
+ }
+
+ ASSERT(sfmmup != NULL);
+ ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
+ ASSERT(sfmmup != ksfmmup);
+
+ srdp = sfmmup->sfmmu_srdp;
+ rid = (uint_t)((uint64_t)rcookie);
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ eaddr = addr + len;
+ va = addr;
+ list = NULL;
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len);
+
+ ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc)));
+ ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc)));
+ if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) {
+ ttesz = HBLK_MIN_TTESZ;
+ } else {
+ ttesz = rgnp->rgn_pgszc;
+ }
+ while (va < eaddr) {
+ while (ttesz < rgnp->rgn_pgszc &&
+ IS_P2ALIGNED(va, TTEBYTES(ttesz + 1))) {
+ ttesz++;
+ }
+ while (ttesz >= HBLK_MIN_TTESZ) {
+ if (!(rgnp->rgn_hmeflags & (1 << ttesz))) {
+ ttesz--;
+ continue;
+ }
+ hmeshift = HME_HASH_SHIFT(ttesz);
+ hblktag.htag_bspage = HME_HASH_BSPAGE(va, hmeshift);
+ hblktag.htag_rehash = ttesz;
+ hblktag.htag_rid = rid;
+ hblktag.htag_id = srdp;
+ hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift);
+ SFMMU_HASH_LOCK(hmebp);
+ HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa,
+ pr_hblk, prevpa, &list);
+ if (hmeblkp == NULL) {
+ ttesz--;
+ continue;
+ }
+ ASSERT(hmeblkp->hblk_shared);
+ va = sfmmu_hblk_unlock(hmeblkp, va, eaddr);
+ ASSERT(va >= eaddr ||
+ IS_P2ALIGNED((uintptr_t)va, TTEBYTES(ttesz)));
+ SFMMU_HASH_UNLOCK(hmebp);
+ break;
+ }
+ if (ttesz < HBLK_MIN_TTESZ) {
+ panic("hat_unlock_region: addr not found "
+ "addr %p hat %p", va, sfmmup);
+ }
+ }
+ sfmmu_hblks_list_purge(&list);
+}
+
/*
* Function to unlock a range of addresses in an hmeblk. It returns the
* next address that needs to be unlocked.
@@ -3391,7 +4030,7 @@ readtte:
panic("can't unlock large tte");
ASSERT(hmeblkp->hblk_lckcnt > 0);
- atomic_add_16(&hmeblkp->hblk_lckcnt, -1);
+ atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
} else {
panic("sfmmu_hblk_unlock: invalid tte");
@@ -3609,6 +4248,7 @@ rehash:
hashno++) {
hmeshift = HME_HASH_SHIFT(hashno);
hblktag.htag_id = ksfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
hblktag.htag_bspage = HME_HASH_BSPAGE(saddr, hmeshift);
hblktag.htag_rehash = hashno;
hmebp = HME_HASH_FUNCTION(ksfmmup, saddr, hmeshift);
@@ -3627,6 +4267,8 @@ rehash:
return (ENXIO);
}
+ ASSERT(!hmeblkp->hblk_shared);
+
HBLKTOHME(osfhmep, hmeblkp, saddr);
sfmmu_copytte(&osfhmep->hme_tte, &tte);
@@ -3814,6 +4456,7 @@ rehash:
hashno++) {
hmeshift = HME_HASH_SHIFT(hashno);
hblktag.htag_id = ksfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
hblktag.htag_bspage = HME_HASH_BSPAGE(saddr, hmeshift);
hblktag.htag_rehash = hashno;
hmebp = HME_HASH_FUNCTION(ksfmmup, saddr, hmeshift);
@@ -3829,6 +4472,8 @@ rehash:
if (hmeblkp == NULL)
return;
+ ASSERT(!hmeblkp->hblk_shared);
+
HBLKTOHME(osfhmep, hmeblkp, saddr);
sfmmu_copytte(&osfhmep->hme_tte, &tte);
@@ -4010,7 +4655,7 @@ hat_probe(struct hat *sfmmup, caddr_t addr)
sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
}
} else {
- pfn = sfmmu_uvatopfn(addr, sfmmup);
+ pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
}
if (pfn != PFN_INVALID)
@@ -4026,76 +4671,18 @@ hat_getpagesize(struct hat *sfmmup, caddr_t addr)
ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
- sfmmu_gettte(sfmmup, addr, &tte);
- if (TTE_IS_VALID(&tte)) {
- return (TTEBYTES(TTE_CSZ(&tte)));
- }
- return (-1);
-}
-
-static void
-sfmmu_gettte(struct hat *sfmmup, caddr_t addr, tte_t *ttep)
-{
- struct hmehash_bucket *hmebp;
- hmeblk_tag hblktag;
- int hmeshift, hashno = 1;
- struct hme_blk *hmeblkp, *list = NULL;
- struct sf_hment *sfhmep;
-
- /* support for ISM */
- ism_map_t *ism_map;
- ism_blk_t *ism_blkp;
- int i;
- sfmmu_t *ism_hatid = NULL;
- sfmmu_t *locked_hatid = NULL;
-
- ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
-
- ism_blkp = sfmmup->sfmmu_iblk;
- if (ism_blkp) {
- sfmmu_ismhat_enter(sfmmup, 0);
- locked_hatid = sfmmup;
- }
- while (ism_blkp && ism_hatid == NULL) {
- ism_map = ism_blkp->iblk_maps;
- for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) {
- if (addr >= ism_start(ism_map[i]) &&
- addr < ism_end(ism_map[i])) {
- sfmmup = ism_hatid = ism_map[i].imap_ismhat;
- addr = (caddr_t)(addr -
- ism_start(ism_map[i]));
- break;
- }
+ if (sfmmup == ksfmmup) {
+ if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
+ return (-1);
}
- ism_blkp = ism_blkp->iblk_next;
- }
- if (locked_hatid) {
- sfmmu_ismhat_exit(locked_hatid, 0);
- }
-
- hblktag.htag_id = sfmmup;
- ttep->ll = 0;
-
- do {
- hmeshift = HME_HASH_SHIFT(hashno);
- hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
- hblktag.htag_rehash = hashno;
- hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
-
- SFMMU_HASH_LOCK(hmebp);
-
- HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list);
- if (hmeblkp != NULL) {
- HBLKTOHME(sfhmep, hmeblkp, addr);
- sfmmu_copytte(&sfhmep->hme_tte, ttep);
- SFMMU_HASH_UNLOCK(hmebp);
- break;
+ } else {
+ if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
+ return (-1);
}
- SFMMU_HASH_UNLOCK(hmebp);
- hashno++;
- } while (HME_REHASH(sfmmup) && (hashno <= mmu_hashcnt));
+ }
- sfmmu_hblks_list_purge(&list);
+ ASSERT(TTE_IS_VALID(&tte));
+ return (TTEBYTES(TTE_CSZ(&tte)));
}
uint_t
@@ -4105,7 +4692,15 @@ hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr)
ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
- sfmmu_gettte(sfmmup, addr, &tte);
+ if (sfmmup == ksfmmup) {
+ if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
+ tte.ll = 0;
+ }
+ } else {
+ if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
+ tte.ll = 0;
+ }
+ }
if (TTE_IS_VALID(&tte)) {
*attr = sfmmu_ptov_attr(&tte);
return (0);
@@ -4214,6 +4809,7 @@ sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
endaddr = addr + len;
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
DEMAP_RANGE_INIT(sfmmup, &dmr);
while (addr < endaddr) {
@@ -4226,6 +4822,7 @@ sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list);
if (hmeblkp != NULL) {
+ ASSERT(!hmeblkp->hblk_shared);
/*
* We've encountered a shadow hmeblk so skip the range
* of the next smaller mapping size.
@@ -4299,6 +4896,7 @@ sfmmu_hblk_chgattr(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
ASSERT(in_hblk_range(hmeblkp, addr));
ASSERT(hmeblkp->hblk_shw_bit == 0);
+ ASSERT(!hmeblkp->hblk_shared);
endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp));
ttesz = get_hblk_ttesz(hmeblkp);
@@ -4552,6 +5150,7 @@ hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
}
endaddr = addr + len;
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
DEMAP_RANGE_INIT(sfmmup, &dmr);
while (addr < endaddr) {
@@ -4564,6 +5163,7 @@ hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list);
if (hmeblkp != NULL) {
+ ASSERT(!hmeblkp->hblk_shared);
/*
* We've encountered a shadow hmeblk so skip the range
* of the next smaller mapping size.
@@ -4638,6 +5238,7 @@ sfmmu_hblk_chgprot(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
ASSERT(in_hblk_range(hmeblkp, addr));
ASSERT(hmeblkp->hblk_shw_bit == 0);
+ ASSERT(!hmeblkp->hblk_shared);
#ifdef DEBUG
if (get_hblk_ttesz(hmeblkp) != TTE8K &&
@@ -4868,6 +5469,7 @@ hat_unload_large_virtual(
goto next_block;
}
+ ASSERT(!hmeblkp->hblk_shared);
/*
* unload if there are any current valid mappings
*/
@@ -5032,6 +5634,7 @@ hat_unload_callback(
DEMAP_RANGE_INIT(sfmmup, dmrp);
endaddr = addr + len;
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
/*
* It is likely for the vm to call unload over a wide range of
@@ -5113,6 +5716,7 @@ hat_unload_callback(
}
}
ASSERT(hmeblkp);
+ ASSERT(!hmeblkp->hblk_shared);
if (!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt) {
/*
* If the valid count is zero we can skip the range
@@ -5320,6 +5924,10 @@ sfmmu_hblk_unload(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
ASSERT(in_hblk_range(hmeblkp, addr));
ASSERT(!hmeblkp->hblk_shw_bit);
+ ASSERT(sfmmup != NULL || hmeblkp->hblk_shared);
+ ASSERT(sfmmup == NULL || !hmeblkp->hblk_shared);
+ ASSERT(dmrp == NULL || !hmeblkp->hblk_shared);
+
#ifdef DEBUG
if (get_hblk_ttesz(hmeblkp) != TTE8K &&
(endaddr < get_hblk_endaddr(hmeblkp))) {
@@ -5330,8 +5938,9 @@ sfmmu_hblk_unload(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp));
ttesz = get_hblk_ttesz(hmeblkp);
- use_demap_range = (do_virtual_coloring &&
- ((dmrp == NULL) || TTEBYTES(ttesz) == DEMAP_RANGE_PGSZ(dmrp)));
+ use_demap_range = ((dmrp == NULL) ||
+ (TTEBYTES(ttesz) == DEMAP_RANGE_PGSZ(dmrp)));
+
if (use_demap_range) {
DEMAP_RANGE_CONTINUE(dmrp, addr, endaddr);
} else {
@@ -5411,7 +6020,7 @@ again:
if (flags & HAT_UNLOAD_UNLOCK) {
ASSERT(hmeblkp->hblk_lckcnt > 0);
- atomic_add_16(&hmeblkp->hblk_lckcnt, -1);
+ atomic_add_32(&hmeblkp->hblk_lckcnt, -1);
HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK);
}
@@ -5425,12 +6034,12 @@ again:
* Given: va1 and va2 are two virtual address
* that alias and map the same physical
* address.
- * 1. mapping exists from va1 to pa and data
+ * 1. mapping exists from va1 to pa and data
* has been read into the cache.
- * 2. unload va1.
- * 3. load va2 and modify data using va2.
- * 4 unload va2.
- * 5. load va1 and reference data. Unless we
+ * 2. unload va1.
+ * 3. load va2 and modify data using va2.
+ * 4 unload va2.
+ * 5. load va1 and reference data. Unless we
* flush the data cache when we unload we will
* get stale data.
* Fortunately, page coloring eliminates the
@@ -5447,18 +6056,10 @@ again:
*/
DEMAP_RANGE_MARKPG(dmrp, addr);
} else {
- if (do_virtual_coloring) {
- sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
- sfmmup->sfmmu_free, 0);
- } else {
- pfn_t pfnum;
-
- pfnum = TTE_TO_PFN(addr, &tte);
- sfmmu_tlbcache_demap(addr, sfmmup,
- hmeblkp, pfnum, sfmmup->sfmmu_free,
- FLUSH_NECESSARY_CPUS,
- CACHE_FLUSH, 0);
- }
+ ASSERT(sfmmup != NULL);
+ ASSERT(!hmeblkp->hblk_shared);
+ sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
+ sfmmup->sfmmu_free, 0);
}
if (pp) {
@@ -5568,8 +6169,14 @@ tte_unloaded:
sfhmep++;
DEMAP_RANGE_NEXTPG(dmrp);
}
- if (ttecnt > 0)
+ /*
+ * For shared hmeblks this routine is only called when region is freed
+ * and no longer referenced. So no need to decrement ttecnt
+ * in the region structure here.
+ */
+ if (ttecnt > 0 && sfmmup != NULL) {
atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -ttecnt);
+ }
return (addr);
}
@@ -5600,6 +6207,8 @@ hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
endaddr = addr + len;
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
+
/*
* Spitfire supports 4 page sizes.
* Most pages are expected to be of the smallest page
@@ -5618,6 +6227,7 @@ hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list);
if (hmeblkp != NULL) {
+ ASSERT(!hmeblkp->hblk_shared);
/*
* We've encountered a shadow hmeblk so skip the range
* of the next smaller mapping size.
@@ -5674,6 +6284,7 @@ sfmmu_hblk_sync(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
int ret;
ASSERT(hmeblkp->hblk_shw_bit == 0);
+ ASSERT(!hmeblkp->hblk_shared);
endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp));
@@ -5759,7 +6370,7 @@ sfmmu_ttesync(struct hat *sfmmup, caddr_t addr, tte_t *ttep, page_t *pp)
}
sz = TTE_CSZ(ttep);
- if (sfmmup->sfmmu_rmstat) {
+ if (sfmmup != NULL && sfmmup->sfmmu_rmstat) {
int i;
caddr_t vaddr = addr;
@@ -6025,6 +6636,7 @@ again:
sfmmup = hblktosfmmu(hmeblkp);
ASSERT(sfmmup == ksfmmup);
+ ASSERT(!hmeblkp->hblk_shared);
addr = tte_to_vaddr(hmeblkp, tte);
@@ -6033,7 +6645,7 @@ again:
* not being relocated since it is ksfmmup and thus it
* will never be relocated.
*/
- SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp);
+ SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0);
/*
* Update xcall stats
@@ -6580,55 +7192,60 @@ readtte:
addr = tte_to_vaddr(hmeblkp, tte);
- sfmmu_ttesync(sfmmup, addr, &tte, pp);
-
- atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1);
-
- /*
- * We need to flush the page from the virtual cache
- * in order to prevent a virtual cache alias
- * inconsistency. The particular scenario we need
- * to worry about is:
- * Given: va1 and va2 are two virtual address that
- * alias and will map the same physical address.
- * 1. mapping exists from va1 to pa and data has
- * been read into the cache.
- * 2. unload va1.
- * 3. load va2 and modify data using va2.
- * 4 unload va2.
- * 5. load va1 and reference data. Unless we flush
- * the data cache when we unload we will get
- * stale data.
- * This scenario is taken care of by using virtual
- * page coloring.
- */
- if (sfmmup->sfmmu_ismhat) {
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = (sf_srd_t *)sfmmup;
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
+ cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1);
+ sfmmu_ttesync(NULL, addr, &tte, pp);
+ ASSERT(rgnp->rgn_ttecnt[ttesz] > 0);
+ atomic_add_long(&rgnp->rgn_ttecnt[ttesz], -1);
+ } else {
+ sfmmu_ttesync(sfmmup, addr, &tte, pp);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1);
+
/*
- * Flush TSBs, TLBs and caches
- * of every process
- * sharing this ism segment.
+ * We need to flush the page from the virtual cache
+ * in order to prevent a virtual cache alias
+ * inconsistency. The particular scenario we need
+ * to worry about is:
+ * Given: va1 and va2 are two virtual address that
+ * alias and will map the same physical address.
+ * 1. mapping exists from va1 to pa and data has
+ * been read into the cache.
+ * 2. unload va1.
+ * 3. load va2 and modify data using va2.
+ * 4 unload va2.
+ * 5. load va1 and reference data. Unless we flush
+ * the data cache when we unload we will get
+ * stale data.
+ * This scenario is taken care of by using virtual
+ * page coloring.
*/
- sfmmu_hat_lock_all();
- mutex_enter(&ism_mlist_lock);
- kpreempt_disable();
- if (do_virtual_coloring)
+ if (sfmmup->sfmmu_ismhat) {
+ /*
+ * Flush TSBs, TLBs and caches
+ * of every process
+ * sharing this ism segment.
+ */
+ sfmmu_hat_lock_all();
+ mutex_enter(&ism_mlist_lock);
+ kpreempt_disable();
sfmmu_ismtlbcache_demap(addr, sfmmup, hmeblkp,
pp->p_pagenum, CACHE_NO_FLUSH);
- else
- sfmmu_ismtlbcache_demap(addr, sfmmup, hmeblkp,
- pp->p_pagenum, CACHE_FLUSH);
- kpreempt_enable();
- mutex_exit(&ism_mlist_lock);
- sfmmu_hat_unlock_all();
- cpuset = cpu_ready_set;
- } else if (do_virtual_coloring) {
- sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0);
- cpuset = sfmmup->sfmmu_cpusran;
- } else {
- sfmmu_tlbcache_demap(addr, sfmmup, hmeblkp,
- pp->p_pagenum, 0, FLUSH_NECESSARY_CPUS,
- CACHE_FLUSH, 0);
- cpuset = sfmmup->sfmmu_cpusran;
+ kpreempt_enable();
+ mutex_exit(&ism_mlist_lock);
+ sfmmu_hat_unlock_all();
+ cpuset = cpu_ready_set;
+ } else {
+ sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0);
+ cpuset = sfmmup->sfmmu_cpusran;
+ }
}
/*
@@ -6747,6 +7364,8 @@ hat_pagesync(struct page *pp, uint_t clearflag)
int index, cons;
extern ulong_t po_share;
page_t *save_pp = pp;
+ int stop_on_sh = 0;
+ uint_t shcnt;
CPUSET_ZERO(cpuset);
@@ -6767,11 +7386,15 @@ hat_pagesync(struct page *pp, uint_t clearflag)
if ((clearflag & HAT_SYNC_STOPON_SHARED) != 0 &&
(pp->p_share > po_share) &&
!(clearflag & HAT_SYNC_ZERORM)) {
- if (PP_ISRO(pp))
- hat_page_setattr(pp, P_REF);
+ hat_page_setattr(pp, P_REF);
return (PP_GENERIC_ATTR(pp));
}
+ if ((clearflag & HAT_SYNC_STOPON_SHARED) &&
+ !(clearflag & HAT_SYNC_ZERORM)) {
+ stop_on_sh = 1;
+ shcnt = 0;
+ }
clearflag &= ~HAT_SYNC_STOPON_SHARED;
pml = sfmmu_mlist_enter(pp);
index = PP_MAPINDEX(pp);
@@ -6794,16 +7417,43 @@ retry:
if (hme_size(sfhme) < cons)
continue;
+
+ if (stop_on_sh) {
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = hblktosrd(hmeblkp);
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
+ rgnp, rid);
+ shcnt += rgnp->rgn_refcnt;
+ } else {
+ shcnt++;
+ }
+ if (shcnt > po_share) {
+ /*
+ * tell the pager to spare the page this time
+ * around.
+ */
+ hat_page_setattr(save_pp, P_REF);
+ index = 0;
+ break;
+ }
+ }
tset = sfmmu_pagesync(pp, sfhme,
clearflag & ~HAT_SYNC_STOPON_RM);
CPUSET_OR(cpuset, tset);
+
/*
* If clearflag is HAT_SYNC_DONTZERO, break out as soon
- * as the "ref" or "mod" is set.
+ * as the "ref" or "mod" is set or share cnt exceeds po_share.
*/
if ((clearflag & ~HAT_SYNC_STOPON_RM) == HAT_SYNC_DONTZERO &&
- ((clearflag & HAT_SYNC_STOPON_MOD) && PP_ISMOD(save_pp)) ||
- ((clearflag & HAT_SYNC_STOPON_REF) && PP_ISREF(save_pp))) {
+ (((clearflag & HAT_SYNC_STOPON_MOD) && PP_ISMOD(save_pp)) ||
+ ((clearflag & HAT_SYNC_STOPON_REF) && PP_ISREF(save_pp)))) {
index = 0;
break;
}
@@ -6869,12 +7519,28 @@ sfmmu_pagesync_retry:
if (ret > 0) {
/* we win the cas */
- sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0);
- cpuset = sfmmup->sfmmu_cpusran;
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = (sf_srd_t *)sfmmup;
+ uint_t rid =
+ hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp,
+ srdp, rgnp, rid);
+ cpuset = sfmmu_rgntlb_demap(addr,
+ rgnp, hmeblkp, 1);
+ } else {
+ sfmmu_tlb_demap(addr, sfmmup, hmeblkp,
+ 0, 0);
+ cpuset = sfmmup->sfmmu_cpusran;
+ }
}
}
-
- sfmmu_ttesync(sfmmup, addr, &tte, pp);
+ sfmmu_ttesync(hmeblkp->hblk_shared ? NULL : sfmmup, addr,
+ &tte, pp);
}
return (cpuset);
}
@@ -6930,8 +7596,22 @@ retry:
/* we win the cas */
if (ret > 0) {
- sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0);
- cpuset = sfmmup->sfmmu_cpusran;
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = (sf_srd_t *)sfmmup;
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp,
+ srdp, rgnp, rid);
+ cpuset = sfmmu_rgntlb_demap(addr,
+ rgnp, hmeblkp, 1);
+ } else {
+ sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0);
+ cpuset = sfmmup->sfmmu_cpusran;
+ }
}
}
@@ -7181,7 +7861,7 @@ hat_getpfnum(struct hat *hat, caddr_t addr)
sfmmu_check_kpfn(pfn);
return (pfn);
} else {
- return (sfmmu_uvatopfn(addr, hat));
+ return (sfmmu_uvatopfn(addr, hat, NULL));
}
}
@@ -7236,16 +7916,19 @@ hat_getkpfnum(caddr_t addr)
return (pfn);
}
-pfn_t
-sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup)
+/*
+ * This routine will return both pfn and tte for the addr.
+ */
+static pfn_t
+sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup, tte_t *ttep)
{
struct hmehash_bucket *hmebp;
hmeblk_tag hblktag;
int hmeshift, hashno = 1;
struct hme_blk *hmeblkp = NULL;
+ tte_t tte;
struct sf_hment *sfhmep;
- tte_t tte;
pfn_t pfn;
/* support for ISM */
@@ -7254,7 +7937,15 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup)
int i;
sfmmu_t *ism_hatid = NULL;
sfmmu_t *locked_hatid = NULL;
+ sfmmu_t *sv_sfmmup = sfmmup;
+ caddr_t sv_vaddr = vaddr;
+ sf_srd_t *srdp;
+ if (ttep == NULL) {
+ ttep = &tte;
+ } else {
+ ttep->ll = 0;
+ }
ASSERT(sfmmup != ksfmmup);
SFMMU_STAT(sf_user_vtop);
@@ -7262,11 +7953,11 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup)
* Set ism_hatid if vaddr falls in a ISM segment.
*/
ism_blkp = sfmmup->sfmmu_iblk;
- if (ism_blkp) {
+ if (ism_blkp != NULL) {
sfmmu_ismhat_enter(sfmmup, 0);
locked_hatid = sfmmup;
}
- while (ism_blkp && ism_hatid == NULL) {
+ while (ism_blkp != NULL && ism_hatid == NULL) {
ism_map = ism_blkp->iblk_maps;
for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) {
if (vaddr >= ism_start(ism_map[i]) &&
@@ -7284,6 +7975,7 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup)
}
hblktag.htag_id = sfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
do {
hmeshift = HME_HASH_SHIFT(hashno);
hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift);
@@ -7294,19 +7986,85 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup)
HME_HASH_FAST_SEARCH(hmebp, hblktag, hmeblkp);
if (hmeblkp != NULL) {
+ ASSERT(!hmeblkp->hblk_shared);
HBLKTOHME(sfhmep, hmeblkp, vaddr);
- sfmmu_copytte(&sfhmep->hme_tte, &tte);
- if (TTE_IS_VALID(&tte)) {
- pfn = TTE_TO_PFN(vaddr, &tte);
- } else {
- pfn = PFN_INVALID;
- }
+ sfmmu_copytte(&sfhmep->hme_tte, ttep);
SFMMU_HASH_UNLOCK(hmebp);
- return (pfn);
+ if (TTE_IS_VALID(ttep)) {
+ pfn = TTE_TO_PFN(vaddr, ttep);
+ return (pfn);
+ }
+ break;
}
SFMMU_HASH_UNLOCK(hmebp);
hashno++;
} while (HME_REHASH(sfmmup) && (hashno <= mmu_hashcnt));
+
+ if (SF_HMERGNMAP_ISNULL(sv_sfmmup)) {
+ return (PFN_INVALID);
+ }
+ srdp = sv_sfmmup->sfmmu_srdp;
+ ASSERT(srdp != NULL);
+ ASSERT(srdp->srd_refcnt != 0);
+ hblktag.htag_id = srdp;
+ hashno = 1;
+ do {
+ hmeshift = HME_HASH_SHIFT(hashno);
+ hblktag.htag_bspage = HME_HASH_BSPAGE(sv_vaddr, hmeshift);
+ hblktag.htag_rehash = hashno;
+ hmebp = HME_HASH_FUNCTION(srdp, sv_vaddr, hmeshift);
+
+ SFMMU_HASH_LOCK(hmebp);
+ for (hmeblkp = hmebp->hmeblkp; hmeblkp != NULL;
+ hmeblkp = hmeblkp->hblk_next) {
+ uint_t rid;
+ sf_region_t *rgnp;
+ caddr_t rsaddr;
+ caddr_t readdr;
+
+ if (!HTAGS_EQ_SHME(hmeblkp->hblk_tag, hblktag,
+ sv_sfmmup->sfmmu_hmeregion_map)) {
+ continue;
+ }
+ ASSERT(hmeblkp->hblk_shared);
+ rid = hmeblkp->hblk_tag.htag_rid;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
+ HBLKTOHME(sfhmep, hmeblkp, sv_vaddr);
+ sfmmu_copytte(&sfhmep->hme_tte, ttep);
+ rsaddr = rgnp->rgn_saddr;
+ readdr = rsaddr + rgnp->rgn_size;
+#ifdef DEBUG
+ if (TTE_IS_VALID(ttep) ||
+ get_hblk_ttesz(hmeblkp) > TTE8K) {
+ caddr_t eva = tte_to_evaddr(hmeblkp, ttep);
+ ASSERT(eva > sv_vaddr);
+ ASSERT(sv_vaddr >= rsaddr);
+ ASSERT(sv_vaddr < readdr);
+ ASSERT(eva <= readdr);
+ }
+#endif /* DEBUG */
+ /*
+ * Continue the search if we
+ * found an invalid 8K tte outside of the area
+ * covered by this hmeblk's region.
+ */
+ if (TTE_IS_VALID(ttep)) {
+ SFMMU_HASH_UNLOCK(hmebp);
+ pfn = TTE_TO_PFN(sv_vaddr, ttep);
+ return (pfn);
+ } else if (get_hblk_ttesz(hmeblkp) > TTE8K ||
+ (sv_vaddr >= rsaddr && sv_vaddr < readdr)) {
+ SFMMU_HASH_UNLOCK(hmebp);
+ pfn = PFN_INVALID;
+ return (pfn);
+ }
+ }
+ SFMMU_HASH_UNLOCK(hmebp);
+ hashno++;
+ } while (hashno <= mmu_hashcnt);
return (PFN_INVALID);
}
@@ -7323,9 +8081,12 @@ hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags)
}
/*
- * Return the number of mappings to a particular page.
- * This number is an approximation of the number of
- * number of people sharing the page.
+ * Return the number of mappings to a particular page. This number is an
+ * approximation of the number of people sharing the page.
+ *
+ * shared hmeblks or ism hmeblks are counted as 1 mapping here.
+ * hat_page_checkshare() can be used to compare threshold to share
+ * count that reflects the number of region sharers albeit at higher cost.
*/
ulong_t
hat_page_getshare(page_t *pp)
@@ -7368,6 +8129,73 @@ hat_page_getshare(page_t *pp)
}
/*
+ * Return 1 the number of mappings exceeds sh_thresh. Return 0
+ * otherwise. Count shared hmeblks by region's refcnt.
+ */
+int
+hat_page_checkshare(page_t *pp, ulong_t sh_thresh)
+{
+ kmutex_t *pml;
+ ulong_t cnt = 0;
+ int index, sz = TTE8K;
+ struct sf_hment *sfhme, *tmphme = NULL;
+ struct hme_blk *hmeblkp;
+
+ pml = sfmmu_mlist_enter(pp);
+
+ if (kpm_enable)
+ cnt = pp->p_kpmref;
+
+ if (pp->p_share + cnt > sh_thresh) {
+ sfmmu_mlist_exit(pml);
+ return (1);
+ }
+
+ index = PP_MAPINDEX(pp);
+
+again:
+ for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
+ tmphme = sfhme->hme_next;
+ if (hme_size(sfhme) != sz) {
+ continue;
+ }
+ hmeblkp = sfmmu_hmetohblk(sfhme);
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = hblktosrd(hmeblkp);
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
+ rgnp, rid);
+ cnt += rgnp->rgn_refcnt;
+ } else {
+ cnt++;
+ }
+ if (cnt > sh_thresh) {
+ sfmmu_mlist_exit(pml);
+ return (1);
+ }
+ }
+
+ index >>= 1;
+ sz++;
+ while (index) {
+ pp = PP_GROUPLEADER(pp, sz);
+ ASSERT(sfmmu_mlist_held(pp));
+ if (index & 0x1) {
+ goto again;
+ }
+ index >>= 1;
+ sz++;
+ }
+ sfmmu_mlist_exit(pml);
+ return (0);
+}
+
+/*
* Unload all large mappings to the pp and reset the p_szc field of every
* constituent page according to the remaining mappings.
*
@@ -7516,15 +8344,35 @@ ism_tsb_entries(sfmmu_t *sfmmup, int szc)
ism_blk_t *ism_blkp = sfmmup->sfmmu_iblk;
ism_map_t *ism_map;
pgcnt_t npgs = 0;
+ pgcnt_t npgs_scd = 0;
int j;
+ sf_scd_t *scdp;
+ uchar_t rid;
ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
+ scdp = sfmmup->sfmmu_scdp;
+
for (; ism_blkp != NULL; ism_blkp = ism_blkp->iblk_next) {
ism_map = ism_blkp->iblk_maps;
- for (j = 0; ism_map[j].imap_ismhat && j < ISM_MAP_SLOTS; j++)
- npgs += ism_map[j].imap_ismhat->sfmmu_ttecnt[szc];
+ for (j = 0; ism_map[j].imap_ismhat && j < ISM_MAP_SLOTS; j++) {
+ rid = ism_map[j].imap_rid;
+ ASSERT(rid == SFMMU_INVALID_ISMRID ||
+ rid < sfmmup->sfmmu_srdp->srd_next_ismrid);
+
+ if (scdp != NULL && rid != SFMMU_INVALID_ISMRID &&
+ SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) {
+ /* ISM is in sfmmup's SCD */
+ npgs_scd +=
+ ism_map[j].imap_ismhat->sfmmu_ttecnt[szc];
+ } else {
+ /* ISMs is not in SCD */
+ npgs +=
+ ism_map[j].imap_ismhat->sfmmu_ttecnt[szc];
+ }
+ }
}
sfmmup->sfmmu_ismttecnt[szc] = npgs;
+ sfmmup->sfmmu_scdismttecnt[szc] = npgs_scd;
return (npgs);
}
@@ -7554,13 +8402,15 @@ hat_get_mapped_size(struct hat *hat)
ASSERT(hat->sfmmu_xhat_provider == NULL);
for (i = 0; i < mmu_page_sizes; i++)
- assize += (pgcnt_t)hat->sfmmu_ttecnt[i] * TTEBYTES(i);
+ assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] +
+ (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i);
if (hat->sfmmu_iblk == NULL)
return (assize);
for (i = 0; i < mmu_page_sizes; i++)
- assize += (pgcnt_t)hat->sfmmu_ismttecnt[i] * TTEBYTES(i);
+ assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] +
+ (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i);
return (assize);
}
@@ -7592,7 +8442,8 @@ hat_stats_disable(struct hat *hat)
/*
* Routines for entering or removing ourselves from the
- * ism_hat's mapping list.
+ * ism_hat's mapping list. This is used for both private and
+ * SCD hats.
*/
static void
iment_add(struct ism_ment *iment, struct hat *ism_hat)
@@ -7663,6 +8514,8 @@ hat_share(struct hat *sfmmup, caddr_t addr,
uint_t ismmask = (uint_t)ismpgsz - 1;
size_t sh_size = ISM_SHIFT(ismshift, len);
ushort_t ismhatflag;
+ hat_region_cookie_t rcookie;
+ sf_scd_t *old_scdp;
#ifdef DEBUG
caddr_t eaddr = addr + len;
@@ -7717,7 +8570,7 @@ hat_share(struct hat *sfmmup, caddr_t addr,
* Make sure mapping does not already exist.
*/
ism_blkp = sfmmup->sfmmu_iblk;
- while (ism_blkp) {
+ while (ism_blkp != NULL) {
ism_map = ism_blkp->iblk_maps;
for (i = 0; i < ISM_MAP_SLOTS && ism_map[i].imap_ismhat; i++) {
if ((addr >= ism_start(ism_map[i]) &&
@@ -7750,7 +8603,8 @@ hat_share(struct hat *sfmmup, caddr_t addr,
if (ism_map[i].imap_ismhat == NULL) {
ism_map[i].imap_ismhat = ism_hatid;
- ism_map[i].imap_vb_shift = (ushort_t)ismshift;
+ ism_map[i].imap_vb_shift = (uchar_t)ismshift;
+ ism_map[i].imap_rid = SFMMU_INVALID_ISMRID;
ism_map[i].imap_hatflags = ismhatflag;
ism_map[i].imap_sz_mask = ismmask;
/*
@@ -7768,7 +8622,6 @@ hat_share(struct hat *sfmmup, caddr_t addr,
ism_ment->iment_hat = sfmmup;
ism_ment->iment_base_va = addr;
ism_hatid->sfmmu_ismhat = 1;
- ism_hatid->sfmmu_flags = 0;
mutex_enter(&ism_mlist_lock);
iment_add(ism_ment, ism_hatid);
mutex_exit(&ism_mlist_lock);
@@ -7790,6 +8643,22 @@ hat_share(struct hat *sfmmup, caddr_t addr,
}
/*
+ * After calling hat_join_region, sfmmup may join a new SCD or
+ * move from the old scd to a new scd, in which case, we want to
+ * shrink the sfmmup's private tsb size, i.e., pass shrink to
+ * sfmmu_check_page_sizes at the end of this routine.
+ */
+ old_scdp = sfmmup->sfmmu_scdp;
+ /*
+ * Call hat_join_region without the hat lock, because it's
+ * used in hat_join_region.
+ */
+ rcookie = hat_join_region(sfmmup, addr, len, (void *)ism_hatid, 0,
+ PROT_ALL, ismszc, NULL, HAT_REGION_ISM);
+ if (rcookie != HAT_INVALID_REGION_COOKIE) {
+ ism_map[i].imap_rid = (uchar_t)((uint64_t)rcookie);
+ }
+ /*
* Update our counters for this sfmmup's ism mappings.
*/
for (i = 0; i <= ismszc; i++) {
@@ -7797,45 +8666,29 @@ hat_share(struct hat *sfmmup, caddr_t addr,
(void) ism_tsb_entries(sfmmup, i);
}
- hatlockp = sfmmu_hat_enter(sfmmup);
-
/*
- * For ISM and DISM we do not support 512K pages, so we only
- * only search the 4M and 8K/64K hashes for 4 pagesize cpus, and search
- * the 256M or 32M, and 4M and 8K/64K hashes for 6 pagesize cpus.
+ * For ISM and DISM we do not support 512K pages, so we only only
+ * search the 4M and 8K/64K hashes for 4 pagesize cpus, and search the
+ * 256M or 32M, and 4M and 8K/64K hashes for 6 pagesize cpus.
+ *
+ * Need to set 32M/256M ISM flags to make sure
+ * sfmmu_check_page_sizes() enables them on Panther.
*/
ASSERT((disable_ism_large_pages & (1 << TTE512K)) != 0);
- if (ismszc > TTE4M && !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG))
- SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG);
-
- if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_64K_FLAG))
- SFMMU_FLAGS_SET(sfmmup, HAT_64K_FLAG);
-
- /*
- * If we updated the ismblkpa for this HAT or we need
- * to start searching the 256M or 32M or 4M hash, we must
- * make sure all CPUs running this process reload their
- * tsbmiss area. Otherwise they will fail to load the mappings
- * in the tsbmiss handler and will loop calling pagefault().
- */
switch (ismszc) {
case TTE256M:
- if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_256M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
+ if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM)) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ SFMMU_FLAGS_SET(sfmmup, HAT_256M_ISM);
+ sfmmu_hat_exit(hatlockp);
}
break;
case TTE32M:
- if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_32M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
- }
- break;
- case TTE4M:
- if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG)) {
- SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG);
- sfmmu_sync_mmustate(sfmmup);
+ if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_ISM)) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ SFMMU_FLAGS_SET(sfmmup, HAT_32M_ISM);
+ sfmmu_hat_exit(hatlockp);
}
break;
default:
@@ -7843,10 +8696,18 @@ hat_share(struct hat *sfmmup, caddr_t addr,
}
/*
- * Now we can drop the locks.
+ * If we updated the ismblkpa for this HAT we must make
+ * sure all CPUs running this process reload their tsbmiss area.
+ * Otherwise they will fail to load the mappings in the tsbmiss
+ * handler and will loop calling pagefault().
*/
- sfmmu_ismhat_exit(sfmmup, 1);
- sfmmu_hat_exit(hatlockp);
+ if (reload_mmu) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ sfmmu_sync_mmustate(sfmmup);
+ sfmmu_hat_exit(hatlockp);
+ }
+
+ sfmmu_ismhat_exit(sfmmup, 0);
/*
* Free up ismblk if we didn't use it.
@@ -7857,8 +8718,11 @@ hat_share(struct hat *sfmmup, caddr_t addr,
/*
* Check TSB and TLB page sizes.
*/
- sfmmu_check_page_sizes(sfmmup, 1);
-
+ if (sfmmup->sfmmu_scdp != NULL && old_scdp != sfmmup->sfmmu_scdp) {
+ sfmmu_check_page_sizes(sfmmup, 0);
+ } else {
+ sfmmu_check_page_sizes(sfmmup, 1);
+ }
return (0);
}
@@ -7879,6 +8743,8 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
struct tsb_info *tsbinfo;
uint_t ismshift = page_get_shift(ismszc);
size_t sh_size = ISM_SHIFT(ismshift, len);
+ uchar_t ism_rid;
+ sf_scd_t *old_scdp;
ASSERT(ISM_ALIGNED(ismshift, addr));
ASSERT(ISM_ALIGNED(ismshift, len));
@@ -7923,7 +8789,7 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
*/
found = 0;
ism_blkp = sfmmup->sfmmu_iblk;
- while (!found && ism_blkp) {
+ while (!found && ism_blkp != NULL) {
ism_map = ism_blkp->iblk_maps;
for (i = 0; i < ISM_MAP_SLOTS; i++) {
if (addr == ism_start(ism_map[i]) &&
@@ -7938,35 +8804,48 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
if (found) {
ism_hatid = ism_map[i].imap_ismhat;
+ ism_rid = ism_map[i].imap_rid;
ASSERT(ism_hatid != NULL);
ASSERT(ism_hatid->sfmmu_ismhat == 1);
/*
- * First remove ourselves from the ism mapping list.
+ * After hat_leave_region, the sfmmup may leave SCD,
+ * in which case, we want to grow the private tsb size
+ * when call sfmmu_check_page_sizes at the end of the routine.
*/
- mutex_enter(&ism_mlist_lock);
- iment_sub(ism_map[i].imap_ment, ism_hatid);
- mutex_exit(&ism_mlist_lock);
- free_ment = ism_map[i].imap_ment;
+ old_scdp = sfmmup->sfmmu_scdp;
+ /*
+ * Then remove ourselves from the region.
+ */
+ if (ism_rid != SFMMU_INVALID_ISMRID) {
+ hat_leave_region(sfmmup, (void *)((uint64_t)ism_rid),
+ HAT_REGION_ISM);
+ }
/*
- * Now gurantee that any other cpu
+ * And now guarantee that any other cpu
* that tries to process an ISM miss
* will go to tl=0.
*/
hatlockp = sfmmu_hat_enter(sfmmup);
-
sfmmu_invalidate_ctx(sfmmup);
-
sfmmu_hat_exit(hatlockp);
/*
+ * Remove ourselves from the ism mapping list.
+ */
+ mutex_enter(&ism_mlist_lock);
+ iment_sub(ism_map[i].imap_ment, ism_hatid);
+ mutex_exit(&ism_mlist_lock);
+ free_ment = ism_map[i].imap_ment;
+
+ /*
* We delete the ism map by copying
* the next map over the current one.
* We will take the next one in the maps
* array or from the next ism_blk.
*/
- while (ism_blkp) {
+ while (ism_blkp != NULL) {
ism_map = ism_blkp->iblk_maps;
while (i < (ISM_MAP_SLOTS - 1)) {
ism_map[i] = ism_map[i + 1];
@@ -7974,12 +8853,13 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
}
/* i == (ISM_MAP_SLOTS - 1) */
ism_blkp = ism_blkp->iblk_next;
- if (ism_blkp) {
+ if (ism_blkp != NULL) {
ism_map[i] = ism_blkp->iblk_maps[0];
i = 0;
} else {
ism_map[i].imap_seg = 0;
ism_map[i].imap_vb_shift = 0;
+ ism_map[i].imap_rid = SFMMU_INVALID_ISMRID;
ism_map[i].imap_hatflags = 0;
ism_map[i].imap_sz_mask = 0;
ism_map[i].imap_ismhat = NULL;
@@ -8001,6 +8881,12 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
tsbinfo = tsbinfo->tsb_next) {
if (tsbinfo->tsb_flags & TSB_SWAPPED)
continue;
+ if (tsbinfo->tsb_flags & TSB_RELOC_FLAG) {
+ tsbinfo->tsb_flags |=
+ TSB_FLUSH_NEEDED;
+ continue;
+ }
+
sfmmu_inv_tsb(tsbinfo->tsb_va,
TSB_BYTES(tsbinfo->tsb_szc));
}
@@ -8029,8 +8915,13 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
/*
* Check TSB and TLB page sizes if the process isn't exiting.
*/
- if (!sfmmup->sfmmu_free)
- sfmmu_check_page_sizes(sfmmup, 0);
+ if (!sfmmup->sfmmu_free) {
+ if (found && old_scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
+ sfmmu_check_page_sizes(sfmmup, 1);
+ } else {
+ sfmmu_check_page_sizes(sfmmup, 0);
+ }
+ }
}
/* ARGSUSED */
@@ -8038,6 +8929,8 @@ static int
sfmmu_idcache_constructor(void *buf, void *cdrarg, int kmflags)
{
/* void *buf is sfmmu_t pointer */
+ bzero(buf, sizeof (sfmmu_t));
+
return (0);
}
@@ -8308,7 +9201,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp)
tmphat = hblktosfmmu(hmeblkp);
sfmmu_copytte(&sfhmep->hme_tte, &tte);
ASSERT(TTE_IS_VALID(&tte));
- if ((tmphat == hat) || hmeblkp->hblk_lckcnt) {
+ if (hmeblkp->hblk_shared || tmphat == hat ||
+ hmeblkp->hblk_lckcnt) {
/*
* We have an uncache conflict
*/
@@ -8330,6 +9224,7 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp)
hmeblkp = sfmmu_hmetohblk(sfhmep);
if (hmeblkp->hblk_xhat_bit)
continue;
+ ASSERT(!hmeblkp->hblk_shared);
(void) sfmmu_pageunload(pp, sfhmep, TTE8K);
}
@@ -8657,7 +9552,20 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
/*
* Flush TSBs, TLBs and caches
*/
- if (sfmmup->sfmmu_ismhat) {
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = (sf_srd_t *)sfmmup;
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp,
+ srdp, rgnp, rid);
+ (void) sfmmu_rgntlb_demap(vaddr, rgnp,
+ hmeblkp, 0);
+ sfmmu_cache_flush(pfn, addr_to_vcolor(vaddr));
+ } else if (sfmmup->sfmmu_ismhat) {
if (flags & HAT_CACHE) {
SFMMU_STAT(sf_ism_recache);
} else {
@@ -8676,11 +9584,22 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
*/
cache_flush_flag = CACHE_NO_FLUSH;
} else {
-
/*
* Flush only TSBs and TLBs.
*/
- if (sfmmup->sfmmu_ismhat) {
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp = (sf_srd_t *)sfmmup;
+ uint_t rid = hmeblkp->hblk_tag.htag_rid;
+ sf_region_t *rgnp;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ ASSERT(srdp != NULL);
+ rgnp = srdp->srd_hmergnp[rid];
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp,
+ srdp, rgnp, rid);
+ (void) sfmmu_rgntlb_demap(vaddr, rgnp,
+ hmeblkp, 0);
+ } else if (sfmmup->sfmmu_ismhat) {
if (flags & HAT_CACHE) {
SFMMU_STAT(sf_ism_recache);
} else {
@@ -8737,10 +9656,18 @@ sfmmu_get_ctx(sfmmu_t *sfmmup)
{
mmu_ctx_t *mmu_ctxp;
uint_t pstate_save;
+#ifdef sun4v
+ int ret;
+#endif
ASSERT(sfmmu_hat_lock_held(sfmmup));
ASSERT(sfmmup != ksfmmup);
+ if (SFMMU_FLAGS_ISSET(sfmmup, HAT_ALLCTX_INVALID)) {
+ sfmmu_setup_tsbinfo(sfmmup);
+ SFMMU_FLAGS_CLEAR(sfmmup, HAT_ALLCTX_INVALID);
+ }
+
kpreempt_disable();
mmu_ctxp = CPU_MMU_CTXP(CPU);
@@ -8772,7 +9699,19 @@ sfmmu_get_ctx(sfmmu_t *sfmmup)
*/
pstate_save = sfmmu_disable_intrs();
- sfmmu_alloc_ctx(sfmmup, 1, CPU);
+#ifdef sun4u
+ (void) sfmmu_alloc_ctx(sfmmup, 1, CPU, SFMMU_PRIVATE);
+#else
+ if (sfmmu_alloc_ctx(sfmmup, 1, CPU, SFMMU_PRIVATE) &&
+ sfmmup->sfmmu_scdp != NULL) {
+ sf_scd_t *scdp = sfmmup->sfmmu_scdp;
+ sfmmu_t *scsfmmup = scdp->scd_sfmmup;
+ ret = sfmmu_alloc_ctx(scsfmmup, 1, CPU, SFMMU_SHARED);
+ /* debug purpose only */
+ ASSERT(!ret || scsfmmup->sfmmu_ctxs[CPU_MMU_IDX(CPU)].cnum
+ != INVALID_CONTEXT);
+ }
+#endif
sfmmu_load_mmustate(sfmmup);
sfmmu_enable_intrs(pstate_save);
@@ -8977,10 +9916,21 @@ sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc,
/*
* All initialization is done inside of sfmmu_tsbinfo_alloc().
* If we fail to allocate a TSB, exit.
+ *
+ * If tsb grows with new tsb size > 4M and old tsb size < 4M,
+ * then try 4M slab after the initial alloc fails.
+ *
+ * If tsb swapin with tsb size > 4M, then try 4M after the
+ * initial alloc fails.
*/
sfmmu_hat_exit(hatlockp);
- if (sfmmu_tsbinfo_alloc(&new_tsbinfo, szc, tte_sz_mask,
- flags, sfmmup)) {
+ if (sfmmu_tsbinfo_alloc(&new_tsbinfo, szc,
+ tte_sz_mask, flags, sfmmup) &&
+ (!(flags & (TSB_GROW | TSB_SWAPIN)) || (szc <= TSB_4M_SZCODE) ||
+ (!(flags & TSB_SWAPIN) &&
+ (old_tsbinfo->tsb_szc >= TSB_4M_SZCODE)) ||
+ sfmmu_tsbinfo_alloc(&new_tsbinfo, TSB_4M_SZCODE,
+ tte_sz_mask, flags, sfmmup))) {
(void) sfmmu_hat_enter(sfmmup);
if (!(flags & TSB_SWAPIN))
SFMMU_STAT(sf_tsb_resize_failures);
@@ -9062,7 +10012,6 @@ sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc,
else
sfmmup->sfmmu_tsb = new_tsbinfo;
membar_enter(); /* make sure new TSB globally visible */
- sfmmu_setup_tsbinfo(sfmmup);
/*
* We need to migrate TSB entries from the old TSB to the new TSB
@@ -9115,6 +10064,55 @@ sfmmu_reprog_pgsz_arr(sfmmu_t *sfmmup, uint8_t *tmp_pgsz)
sfmmu_hat_exit(hatlockp);
}
+/* Update scd_rttecnt for shme rgns in the SCD */
+static void
+sfmmu_set_scd_rttecnt(sf_srd_t *srdp, sf_scd_t *scdp)
+{
+ uint_t rid;
+ uint_t i, j;
+ ulong_t w;
+ sf_region_t *rgnp;
+
+ ASSERT(srdp != NULL);
+
+ for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) {
+ if ((w = scdp->scd_region_map.bitmap[i]) == 0) {
+ continue;
+ }
+
+ j = 0;
+ while (w) {
+ if (!(w & 0x1)) {
+ j++;
+ w >>= 1;
+ continue;
+ }
+ rid = (i << BT_ULSHIFT) | j;
+ j++;
+ w >>= 1;
+
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_refcnt > 0);
+ ASSERT(rgnp->rgn_id == rid);
+
+ scdp->scd_rttecnt[rgnp->rgn_pgszc] +=
+ rgnp->rgn_size >> TTE_PAGE_SHIFT(rgnp->rgn_pgszc);
+
+ /*
+ * Maintain the tsb0 inflation cnt for the regions
+ * in the SCD.
+ */
+ if (rgnp->rgn_pgszc >= TTE4M) {
+ scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt +=
+ rgnp->rgn_size >>
+ (TTE_PAGE_SHIFT(TTE8K) + 2);
+ }
+ }
+ }
+}
+
/*
* This function assumes that there are either four or six supported page
* sizes and at most two programmable TLBs, so we need to decide which
@@ -9144,12 +10142,13 @@ sfmmu_check_page_sizes(sfmmu_t *sfmmup, int growing)
if (sfmmup == ksfmmup || sfmmup->sfmmu_ismhat != NULL)
return;
- if ((sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) == 0 &&
+ if (!SFMMU_LGPGS_INUSE(sfmmup) &&
sfmmup->sfmmu_ttecnt[TTE8K] <= tsb_rss_factor)
return;
for (i = 0; i < mmu_page_sizes; i++) {
- ttecnt[i] = SFMMU_TTE_CNT(sfmmup, i);
+ ttecnt[i] = sfmmup->sfmmu_ttecnt[i] +
+ sfmmup->sfmmu_ismttecnt[i];
}
/* Check pagesizes in use, and possibly reprogram DTLB. */
@@ -9172,6 +10171,11 @@ sfmmu_check_page_sizes(sfmmu_t *sfmmup, int growing)
}
/*
+ * Inflate tte8k_cnt to allow for region large page allocation failure.
+ */
+ tte8k_cnt += sfmmup->sfmmu_tsb0_4minflcnt;
+
+ /*
* Inflate TSB sizes by a factor of 2 if this process
* uses 4M text pages to minimize extra conflict misses
* in the first TSB since without counting text pages
@@ -9274,18 +10278,22 @@ sfmmu_size_tsb(sfmmu_t *sfmmup, int growing, uint64_t tte8k_cnt,
tsb_bits = (mmu_page_sizes == max_mmu_page_sizes)?
TSB4M|TSB32M|TSB256M:TSB4M;
if ((sfmmu_tsbinfo_alloc(&newtsb, tsb_szc, tsb_bits,
- allocflags, sfmmup) != 0) &&
- (sfmmu_tsbinfo_alloc(&newtsb, TSB_MIN_SZCODE,
- tsb_bits, allocflags, sfmmup) != 0)) {
+ allocflags, sfmmup)) &&
+ (tsb_szc <= TSB_4M_SZCODE ||
+ sfmmu_tsbinfo_alloc(&newtsb, TSB_4M_SZCODE,
+ tsb_bits, allocflags, sfmmup)) &&
+ sfmmu_tsbinfo_alloc(&newtsb, TSB_MIN_SZCODE,
+ tsb_bits, allocflags, sfmmup)) {
return;
}
hatlockp = sfmmu_hat_enter(sfmmup);
+ sfmmu_invalidate_ctx(sfmmup);
+
if (sfmmup->sfmmu_tsb->tsb_next == NULL) {
sfmmup->sfmmu_tsb->tsb_next = newtsb;
SFMMU_STAT(sf_tsb_sectsb_create);
- sfmmu_setup_tsbinfo(sfmmup);
sfmmu_hat_exit(hatlockp);
return;
} else {
@@ -9351,6 +10359,7 @@ sfmmu_free_sfmmu(sfmmu_t *sfmmup)
ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
+ ASSERT(SF_RGNMAP_ISNULL(sfmmup));
sfmmup->sfmmu_free = 0;
sfmmup->sfmmu_ismhat = 0;
@@ -9656,6 +10665,7 @@ sfmmu_hblk_swap(struct hme_blk *new)
struct hme_blk *found;
#endif
old = HBLK_RESERVE;
+ ASSERT(!old->hblk_shared);
/*
* save pa before bcopy clobbers it
@@ -9668,7 +10678,8 @@ sfmmu_hblk_swap(struct hme_blk *new)
/*
* acquire hash bucket lock.
*/
- hmebp = sfmmu_tteload_acquire_hashbucket(ksfmmup, base, TTE8K);
+ hmebp = sfmmu_tteload_acquire_hashbucket(ksfmmup, base, TTE8K,
+ SFMMU_INVALID_SHMERID);
/*
* copy contents from old to new
@@ -9742,6 +10753,7 @@ sfmmu_hblk_swap(struct hme_blk *new)
#ifdef DEBUG
hblktag.htag_id = ksfmmup;
+ hblktag.htag_rid = SFMMU_INVALID_SHMERID;
hblktag.htag_bspage = HME_HASH_BSPAGE(base, HME_HASH_SHIFT(TTE8K));
hblktag.htag_rehash = HME_HASH_REHASH(TTE8K);
HME_HASH_FAST_SEARCH(hmebp, hblktag, found);
@@ -9941,7 +10953,7 @@ sfmmu_ismhat_exit(sfmmu_t *sfmmup, int hatlock_held)
static struct hme_blk *
sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
struct hmehash_bucket *hmebp, uint_t size, hmeblk_tag hblktag,
- uint_t flags)
+ uint_t flags, uint_t rid)
{
struct hme_blk *hmeblkp = NULL;
struct hme_blk *newhblkp;
@@ -9952,8 +10964,14 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
uint_t owner; /* set to 1 if using hblk_reserve */
uint_t forcefree;
int sleep;
+ sf_srd_t *srdp;
+ sf_region_t *rgnp;
ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
+ ASSERT(hblktag.htag_rid == rid);
+ SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size));
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid) ||
+ IS_P2ALIGNED(vaddr, TTEBYTES(size)));
/*
* If segkmem is not created yet, allocate from static hmeblks
@@ -9963,6 +10981,8 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
*/
if (!hblk_alloc_dynamic) {
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
+
if (size == TTE8K) {
index = nucleus_hblk8.index;
if (index >= nucleus_hblk8.len) {
@@ -9999,7 +11019,7 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
SFMMU_HASH_UNLOCK(hmebp);
- if (sfmmup != KHATID) {
+ if (sfmmup != KHATID && !SFMMU_IS_SHMERID_VALID(rid)) {
if (mmu_page_sizes == max_mmu_page_sizes) {
if (size < TTE256M)
shw_hblkp = sfmmu_shadow_hcreate(sfmmup, vaddr,
@@ -10009,6 +11029,36 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr,
shw_hblkp = sfmmu_shadow_hcreate(sfmmup, vaddr,
size, flags);
}
+ } else if (SFMMU_IS_SHMERID_VALID(rid)) {
+ int ttesz;
+ caddr_t va;
+ caddr_t eva = vaddr + TTEBYTES(size);
+
+ ASSERT(sfmmup != KHATID);
+
+ srdp = sfmmup->sfmmu_srdp;
+ ASSERT(srdp != NULL && srdp->srd_refcnt != 0);
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp != NULL && rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt != 0);
+ ASSERT(size <= rgnp->rgn_pgszc);
+
+ ttesz = HBLK_MIN_TTESZ;
+ do {
+ if (!(rgnp->rgn_hmeflags & (0x1 << ttesz))) {
+ continue;
+ }
+
+ if (ttesz > size && ttesz != HBLK_MIN_TTESZ) {
+ sfmmu_cleanup_rhblk(srdp, vaddr, rid, ttesz);
+ } else if (ttesz < size) {
+ for (va = vaddr; va < eva;
+ va += TTEBYTES(ttesz)) {
+ sfmmu_cleanup_rhblk(srdp, va, rid,
+ ttesz);
+ }
+ }
+ } while (++ttesz <= rgnp->rgn_pgszc);
}
fill_hblk:
@@ -10016,6 +11066,7 @@ fill_hblk:
if (owner && size == TTE8K) {
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
/*
* We are really in a tight spot. We already own
* hblk_reserve and we need another hblk. In anticipation
@@ -10151,6 +11202,10 @@ re_verify:
* _only if_ we are the owner of hblk_reserve.
*/
if (newhblkp != HBLK_RESERVE || owner) {
+ ASSERT(!SFMMU_IS_SHMERID_VALID(rid) ||
+ newhblkp->hblk_shared);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid) ||
+ !newhblkp->hblk_shared);
return (newhblkp);
} else {
/*
@@ -10177,6 +11232,17 @@ re_verify:
}
hblk_init:
+ if (SFMMU_IS_SHMERID_VALID(rid)) {
+ uint16_t tteflag = 0x1 <<
+ ((size < HBLK_MIN_TTESZ) ? HBLK_MIN_TTESZ : size);
+
+ if (!(rgnp->rgn_hmeflags & tteflag)) {
+ atomic_or_16(&rgnp->rgn_hmeflags, tteflag);
+ }
+ hmeblkp->hblk_shared = 1;
+ } else {
+ hmeblkp->hblk_shared = 0;
+ }
set_hblk_sz(hmeblkp, size);
ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
hmeblkp->hblk_next = (struct hme_blk *)NULL;
@@ -10207,7 +11273,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
int shw_size, vshift;
struct hme_blk *shw_hblkp;
uint_t shw_mask, newshw_mask;
- uintptr_t vaddr;
+ caddr_t vaddr;
int size;
uint_t critical;
@@ -10224,6 +11290,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
shw_hblkp = hmeblkp->hblk_shadow;
if (shw_hblkp) {
ASSERT(hblktosfmmu(hmeblkp) != KHATID);
+ ASSERT(!hmeblkp->hblk_shared);
if (mmu_page_sizes == max_mmu_page_sizes) {
ASSERT(size < TTE256M);
} else {
@@ -10231,7 +11298,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
}
shw_size = get_hblk_ttesz(shw_hblkp);
- vaddr = get_hblk_base(hmeblkp);
+ vaddr = (caddr_t)get_hblk_base(hmeblkp);
vshift = vaddr_to_vshift(shw_hblkp->hblk_tag, vaddr, shw_size);
ASSERT(vshift < 8);
/*
@@ -10250,6 +11317,28 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
hmeblkp->hblk_nextpa = hblkpa;
hmeblkp->hblk_shw_bit = 0;
+ /*
+ * Clear ttebit map in the region this hmeblk belongs to. The region
+ * must exist as long as any of its hmeblks exist. This invariant
+ * holds because before region is freed all its hmeblks are removed.
+ */
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp;
+ sf_region_t *rgnp;
+ uint_t rid;
+
+ srdp = hblktosrd(hmeblkp);
+ ASSERT(srdp != NULL && srdp->srd_refcnt != 0);
+ rid = hmeblkp->hblk_tag.htag_rid;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp != NULL);
+ vaddr = (caddr_t)get_hblk_base(hmeblkp);
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
+ hmeblkp->hblk_shared = 0;
+ }
+
if (hmeblkp->hblk_nuc_bit == 0) {
if (size == TTE8K && sfmmu_put_free_hblk(hmeblkp, critical))
@@ -10419,7 +11508,7 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
{
int shw_size, vshift;
struct hme_blk *shw_hblkp;
- uintptr_t vaddr;
+ caddr_t vaddr;
uint_t shw_mask, newshw_mask;
ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp));
@@ -10432,6 +11521,9 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
demap_range_t dmr;
sfmmup = hblktosfmmu(hmeblkp);
+ if (hmeblkp->hblk_shared || sfmmup->sfmmu_ismhat) {
+ return (0);
+ }
DEMAP_RANGE_INIT(sfmmup, &dmr);
(void) sfmmu_hblk_unload(sfmmup, hmeblkp,
(caddr_t)get_hblk_base(hmeblkp),
@@ -10455,8 +11547,9 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
shw_hblkp = hmeblkp->hblk_shadow;
if (shw_hblkp) {
+ ASSERT(!hmeblkp->hblk_shared);
shw_size = get_hblk_ttesz(shw_hblkp);
- vaddr = get_hblk_base(hmeblkp);
+ vaddr = (caddr_t)get_hblk_base(hmeblkp);
vshift = vaddr_to_vshift(shw_hblkp->hblk_tag, vaddr, shw_size);
ASSERT(vshift < 8);
/*
@@ -10479,6 +11572,28 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp,
*/
hmeblkp->hblk_shw_bit = 0;
+ /*
+ * Clear ttebit map in the region this hmeblk belongs to. The region
+ * must exist as long as any of its hmeblks exist. This invariant
+ * holds because before region is freed all its hmeblks are removed.
+ */
+ if (hmeblkp->hblk_shared) {
+ sf_srd_t *srdp;
+ sf_region_t *rgnp;
+ uint_t rid;
+
+ srdp = hblktosrd(hmeblkp);
+ ASSERT(srdp != NULL && srdp->srd_refcnt != 0);
+ rid = hmeblkp->hblk_tag.htag_rid;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp != NULL);
+ vaddr = (caddr_t)get_hblk_base(hmeblkp);
+ SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid);
+ hmeblkp->hblk_shared = 0;
+ }
+
sfmmu_hblk_steal_count++;
SFMMU_STAT(sf_steal_count);
@@ -10553,6 +11668,8 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp)
SFMMU_FLAGS_CLEAR(sfmmup, HAT_SWAPPED|HAT_SWAPIN);
cv_broadcast(&sfmmup->sfmmu_tsb_cv);
return;
+ case TSB_LOSTRACE:
+ break;
case TSB_ALLOCFAIL:
break;
default:
@@ -10587,15 +11704,44 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp)
rc = sfmmu_replace_tsb(sfmmup, tsbinfop, TSB_MIN_SZCODE,
hatlockp, TSB_SWAPIN | TSB_FORCEALLOC);
ASSERT(rc == TSB_SUCCESS);
- } else {
- /* update machine specific tsbinfo */
- sfmmu_setup_tsbinfo(sfmmup);
}
SFMMU_FLAGS_CLEAR(sfmmup, HAT_SWAPPED|HAT_SWAPIN);
cv_broadcast(&sfmmup->sfmmu_tsb_cv);
}
+static int
+sfmmu_is_rgnva(sf_srd_t *srdp, caddr_t addr, ulong_t w, ulong_t bmw)
+{
+ ulong_t bix = 0;
+ uint_t rid;
+ sf_region_t *rgnp;
+
+ ASSERT(srdp != NULL);
+ ASSERT(srdp->srd_refcnt != 0);
+
+ w <<= BT_ULSHIFT;
+ while (bmw) {
+ if (!(bmw & 0x1)) {
+ bix++;
+ bmw >>= 1;
+ continue;
+ }
+ rid = w | bix;
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_refcnt > 0);
+ ASSERT(rgnp->rgn_id == rid);
+ if (addr < rgnp->rgn_saddr ||
+ addr >= (rgnp->rgn_saddr + rgnp->rgn_size)) {
+ bix++;
+ bmw >>= 1;
+ } else {
+ return (1);
+ }
+ }
+ return (0);
+}
+
/*
* Handle exceptions for low level tsb_handler.
*
@@ -10620,12 +11766,14 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp)
void
sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype)
{
- sfmmu_t *sfmmup;
+ sfmmu_t *sfmmup, *shsfmmup;
uint_t ctxtype;
klwp_id_t lwp;
char lwp_save_state;
- hatlock_t *hatlockp;
+ hatlock_t *hatlockp, *shatlockp;
struct tsb_info *tsbinfop;
+ struct tsbmiss *tsbmp;
+ sf_scd_t *scdp;
SFMMU_STAT(sf_tsb_exceptions);
SFMMU_MMU_STAT(mmu_tsb_exceptions);
@@ -10638,24 +11786,79 @@ sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype)
ASSERT(sfmmup != ksfmmup && ctxtype != KCONTEXT);
ASSERT(sfmmup->sfmmu_ismhat == 0);
- /*
- * First, make sure we come out of here with a valid ctx,
- * since if we don't get one we'll simply loop on the
- * faulting instruction.
- *
- * If the ISM mappings are changing, the TSB is being relocated, or
- * the process is swapped out we serialize behind the controlling
- * thread with the sfmmu_flags and sfmmu_tsb_cv condition variable.
- * Otherwise we synchronize with the context stealer or the thread
- * that required us to change out our MMU registers (such
- * as a thread changing out our TSB while we were running) by
- * locking the HAT and grabbing the rwlock on the context as a
- * reader temporarily.
- */
ASSERT(!SFMMU_FLAGS_ISSET(sfmmup, HAT_SWAPPED) ||
ctxtype == INVALID_CONTEXT);
- if (ctxtype == INVALID_CONTEXT) {
+ if (ctxtype != INVALID_CONTEXT && traptype != T_DATA_PROT) {
+ /*
+ * We may land here because shme bitmap and pagesize
+ * flags are updated lazily in tsbmiss area on other cpus.
+ * If we detect here that tsbmiss area is out of sync with
+ * sfmmu update it and retry the trapped instruction.
+ * Otherwise call trap().
+ */
+ int ret = 0;
+ uchar_t tteflag_mask = (1 << TTE64K) | (1 << TTE8K);
+ caddr_t addr = (caddr_t)(tagaccess & TAGACC_VADDR_MASK);
+
+ /*
+ * Must set lwp state to LWP_SYS before
+ * trying to acquire any adaptive lock
+ */
+ lwp = ttolwp(curthread);
+ ASSERT(lwp);
+ lwp_save_state = lwp->lwp_state;
+ lwp->lwp_state = LWP_SYS;
+
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ kpreempt_disable();
+ tsbmp = &tsbmiss_area[CPU->cpu_id];
+ ASSERT(sfmmup == tsbmp->usfmmup);
+ if (((tsbmp->uhat_tteflags ^ sfmmup->sfmmu_tteflags) &
+ ~tteflag_mask) ||
+ ((tsbmp->uhat_rtteflags ^ sfmmup->sfmmu_rtteflags) &
+ ~tteflag_mask)) {
+ tsbmp->uhat_tteflags = sfmmup->sfmmu_tteflags;
+ tsbmp->uhat_rtteflags = sfmmup->sfmmu_rtteflags;
+ ret = 1;
+ }
+ if (sfmmup->sfmmu_srdp != NULL) {
+ ulong_t *sm = sfmmup->sfmmu_hmeregion_map.bitmap;
+ ulong_t *tm = tsbmp->shmermap;
+ ulong_t i;
+ for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) {
+ ulong_t d = tm[i] ^ sm[i];
+ if (d) {
+ if (d & sm[i]) {
+ if (!ret && sfmmu_is_rgnva(
+ sfmmup->sfmmu_srdp,
+ addr, i, d & sm[i])) {
+ ret = 1;
+ }
+ }
+ tm[i] = sm[i];
+ }
+ }
+ }
+ kpreempt_enable();
+ sfmmu_hat_exit(hatlockp);
+ lwp->lwp_state = lwp_save_state;
+ if (ret) {
+ return;
+ }
+ } else if (ctxtype == INVALID_CONTEXT) {
+ /*
+ * First, make sure we come out of here with a valid ctx,
+ * since if we don't get one we'll simply loop on the
+ * faulting instruction.
+ *
+ * If the ISM mappings are changing, the TSB is relocated,
+ * the process is swapped, the process is joining SCD or
+ * leaving SCD or shared regions we serialize behind the
+ * controlling thread with hat lock, sfmmu_flags and
+ * sfmmu_tsb_cv condition variable.
+ */
+
/*
* Must set lwp state to LWP_SYS before
* trying to acquire any adaptive lock
@@ -10667,6 +11870,33 @@ sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype)
hatlockp = sfmmu_hat_enter(sfmmup);
retry:
+ if ((scdp = sfmmup->sfmmu_scdp) != NULL) {
+ shsfmmup = scdp->scd_sfmmup;
+ ASSERT(shsfmmup != NULL);
+
+ for (tsbinfop = shsfmmup->sfmmu_tsb; tsbinfop != NULL;
+ tsbinfop = tsbinfop->tsb_next) {
+ if (tsbinfop->tsb_flags & TSB_RELOC_FLAG) {
+ /* drop the private hat lock */
+ sfmmu_hat_exit(hatlockp);
+ /* acquire the shared hat lock */
+ shatlockp = sfmmu_hat_enter(shsfmmup);
+ /*
+ * recheck to see if anything changed
+ * after we drop the private hat lock.
+ */
+ if (sfmmup->sfmmu_scdp == scdp &&
+ shsfmmup == scdp->scd_sfmmup) {
+ sfmmu_tsb_chk_reloc(shsfmmup,
+ shatlockp);
+ }
+ sfmmu_hat_exit(shatlockp);
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ goto retry;
+ }
+ }
+ }
+
for (tsbinfop = sfmmup->sfmmu_tsb; tsbinfop != NULL;
tsbinfop = tsbinfop->tsb_next) {
if (tsbinfop->tsb_flags & TSB_RELOC_FLAG) {
@@ -10685,6 +11915,17 @@ retry:
goto retry;
}
+ /* Is this process joining an SCD? */
+ if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
+ /*
+ * Flush private TSB and setup shared TSB.
+ * sfmmu_finish_join_scd() does not drop the
+ * hat lock.
+ */
+ sfmmu_finish_join_scd(sfmmup);
+ SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD);
+ }
+
/*
* If we're swapping in, get TSB(s). Note that we must do
* this before we get a ctx or load the MMU state. Once
@@ -10705,21 +11946,27 @@ retry:
* it anyway.
*/
lwp->lwp_state = lwp_save_state;
- if (sfmmup->sfmmu_ttecnt[TTE8K] != 0 ||
- sfmmup->sfmmu_ttecnt[TTE64K] != 0 ||
- sfmmup->sfmmu_ttecnt[TTE512K] != 0 ||
- sfmmup->sfmmu_ttecnt[TTE4M] != 0 ||
- sfmmup->sfmmu_ttecnt[TTE32M] != 0 ||
- sfmmup->sfmmu_ttecnt[TTE256M] != 0) {
- return;
- }
- if (traptype == T_DATA_PROT) {
- traptype = T_DATA_MMU_MISS;
- }
+ return;
}
trap(rp, (caddr_t)tagaccess, traptype, 0);
}
+static void
+sfmmu_tsb_chk_reloc(sfmmu_t *sfmmup, hatlock_t *hatlockp)
+{
+ struct tsb_info *tp;
+
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+
+ for (tp = sfmmup->sfmmu_tsb; tp != NULL; tp = tp->tsb_next) {
+ if (tp->tsb_flags & TSB_RELOC_FLAG) {
+ cv_wait(&sfmmup->sfmmu_tsb_cv,
+ HATLOCK_MUTEXP(hatlockp));
+ break;
+ }
+ }
+}
+
/*
* sfmmu_vatopfn_suspended is called from GET_TTE when TL=0 and
* TTE_SUSPENDED bit set in tte we block on aquiring a page lock
@@ -10755,6 +12002,124 @@ sfmmu_tsbmiss_suspended(struct regs *rp, uintptr_t tagacc, uint_t traptype)
}
/*
+ * This routine could be optimized to reduce the number of xcalls by flushing
+ * the entire TLBs if region reference count is above some threshold but the
+ * tradeoff will depend on the size of the TLB. So for now flush the specific
+ * page a context at a time.
+ *
+ * If uselocks is 0 then it's called after all cpus were captured and all the
+ * hat locks were taken. In this case don't take the region lock by relying on
+ * the order of list region update operations in hat_join_region(),
+ * hat_leave_region() and hat_dup_region(). The ordering in those routines
+ * guarantees that list is always forward walkable and reaches active sfmmus
+ * regardless of where xc_attention() captures a cpu.
+ */
+cpuset_t
+sfmmu_rgntlb_demap(caddr_t addr, sf_region_t *rgnp,
+ struct hme_blk *hmeblkp, int uselocks)
+{
+ sfmmu_t *sfmmup;
+ cpuset_t cpuset;
+ cpuset_t rcpuset;
+ hatlock_t *hatlockp;
+ uint_t rid = rgnp->rgn_id;
+ sf_rgn_link_t *rlink;
+ sf_scd_t *scdp;
+
+ ASSERT(hmeblkp->hblk_shared);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+
+ CPUSET_ZERO(rcpuset);
+ if (uselocks) {
+ mutex_enter(&rgnp->rgn_mutex);
+ }
+ sfmmup = rgnp->rgn_sfmmu_head;
+ while (sfmmup != NULL) {
+ if (uselocks) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ }
+
+ /*
+ * When an SCD is created the SCD hat is linked on the sfmmu
+ * region lists for each hme region which is part of the
+ * SCD. If we find an SCD hat, when walking these lists,
+ * then we flush the shared TSBs, if we find a private hat,
+ * which is part of an SCD, but where the region
+ * is not part of the SCD then we flush the private TSBs.
+ */
+ if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
+ !SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
+ scdp = sfmmup->sfmmu_scdp;
+ if (SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
+ if (uselocks) {
+ sfmmu_hat_exit(hatlockp);
+ }
+ goto next;
+ }
+ }
+
+ SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0);
+
+ kpreempt_disable();
+ cpuset = sfmmup->sfmmu_cpusran;
+ CPUSET_AND(cpuset, cpu_ready_set);
+ CPUSET_DEL(cpuset, CPU->cpu_id);
+ SFMMU_XCALL_STATS(sfmmup);
+ xt_some(cpuset, vtag_flushpage_tl1,
+ (uint64_t)addr, (uint64_t)sfmmup);
+ vtag_flushpage(addr, (uint64_t)sfmmup);
+ if (uselocks) {
+ sfmmu_hat_exit(hatlockp);
+ }
+ kpreempt_enable();
+ CPUSET_OR(rcpuset, cpuset);
+
+next:
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 0, 0);
+ ASSERT(rlink != NULL);
+ sfmmup = rlink->next;
+ }
+ if (uselocks) {
+ mutex_exit(&rgnp->rgn_mutex);
+ }
+ return (rcpuset);
+}
+
+static int
+find_ism_rid(sfmmu_t *sfmmup, sfmmu_t *ism_sfmmup, caddr_t va, uint_t *ism_rid)
+{
+ ism_blk_t *ism_blkp;
+ int i;
+ ism_map_t *ism_map;
+#ifdef DEBUG
+ struct hat *ism_hatid;
+#endif
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+
+ ism_blkp = sfmmup->sfmmu_iblk;
+ while (ism_blkp != NULL) {
+ ism_map = ism_blkp->iblk_maps;
+ for (i = 0; i < ISM_MAP_SLOTS && ism_map[i].imap_ismhat; i++) {
+ if ((va >= ism_start(ism_map[i])) &&
+ (va < ism_end(ism_map[i]))) {
+
+ *ism_rid = ism_map[i].imap_rid;
+#ifdef DEBUG
+ ism_hatid = ism_map[i].imap_ismhat;
+ ASSERT(ism_hatid == ism_sfmmup);
+ ASSERT(ism_hatid->sfmmu_ismhat);
+#endif
+ return (1);
+ }
+ }
+ ism_blkp = ism_blkp->iblk_next;
+ }
+ return (0);
+}
+
+/*
* Special routine to flush out ism mappings- TSBs, TLBs and D-caches.
* This routine may be called with all cpu's captured. Therefore, the
* caller is responsible for holding all locks and disabling kernel
@@ -10772,8 +12137,11 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup,
#ifdef VAC
int vcolor;
#endif
- int ttesz;
+ sf_scd_t *scdp;
+ uint_t ism_rid;
+
+ ASSERT(!hmeblkp->hblk_shared);
/*
* Walk the ism_hat's mapping list and flush the page
* from every hat sharing this ism_hat. This routine
@@ -10787,6 +12155,7 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup,
ASSERT(ism_sfmmup->sfmmu_ismhat);
ASSERT(MUTEX_HELD(&ism_mlist_lock));
addr = addr - ISMID_STARTADDR;
+
for (ment = ism_sfmmup->sfmmu_iment; ment; ment = ment->iment_next) {
sfmmup = ment->iment_hat;
@@ -10795,27 +12164,38 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup,
va = (caddr_t)((uintptr_t)va + (uintptr_t)addr);
/*
- * Flush TSB of ISM mappings.
+ * When an SCD is created the SCD hat is linked on the ism
+ * mapping lists for each ISM segment which is part of the
+ * SCD. If we find an SCD hat, when walking these lists,
+ * then we flush the shared TSBs, if we find a private hat,
+ * which is part of an SCD, but where the region
+ * corresponding to this va is not part of the SCD then we
+ * flush the private TSBs.
*/
- ttesz = get_hblk_ttesz(hmeblkp);
- if (ttesz == TTE8K || ttesz == TTE4M) {
- sfmmu_unload_tsb(sfmmup, va, ttesz);
- } else {
- caddr_t sva = va;
- caddr_t eva;
- ASSERT(addr == (caddr_t)get_hblk_base(hmeblkp));
- eva = sva + get_hblk_span(hmeblkp);
- sfmmu_unload_tsb_range(sfmmup, sva, eva, ttesz);
+ if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
+ !SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD) &&
+ !SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)) {
+ if (!find_ism_rid(sfmmup, ism_sfmmup, va,
+ &ism_rid)) {
+ cmn_err(CE_PANIC,
+ "can't find matching ISM rid!");
+ }
+
+ scdp = sfmmup->sfmmu_scdp;
+ if (SFMMU_IS_ISMRID_VALID(ism_rid) &&
+ SF_RGNMAP_TEST(scdp->scd_ismregion_map,
+ ism_rid)) {
+ continue;
+ }
}
+ SFMMU_UNLOAD_TSB(va, sfmmup, hmeblkp, 1);
cpuset = sfmmup->sfmmu_cpusran;
CPUSET_AND(cpuset, cpu_ready_set);
CPUSET_DEL(cpuset, CPU->cpu_id);
-
SFMMU_XCALL_STATS(sfmmup);
xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)va,
(uint64_t)sfmmup);
-
vtag_flushpage(va, (uint64_t)sfmmup);
#ifdef VAC
@@ -10854,11 +12234,14 @@ sfmmu_tlbcache_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
cpuset_t cpuset;
hatlock_t *hatlockp;
+ ASSERT(!hmeblkp->hblk_shared);
+
#if defined(lint) && !defined(VAC)
pfnum = pfnum;
cpu_flag = cpu_flag;
cache_flush_flag = cache_flush_flag;
#endif
+
/*
* There is no longer a need to protect against ctx being
* stolen here since we don't store the ctx in the TSB anymore.
@@ -10884,7 +12267,7 @@ sfmmu_tlbcache_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
/*
* Flush the TSB and TLB.
*/
- SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp);
+ SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0);
cpuset = sfmmup->sfmmu_cpusran;
CPUSET_AND(cpuset, cpu_ready_set);
@@ -10936,6 +12319,8 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
cpuset_t cpuset;
hatlock_t *hatlockp;
+ ASSERT(!hmeblkp->hblk_shared);
+
/*
* If the process is exiting we have nothing to do.
*/
@@ -10947,7 +12332,7 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
*/
if (!hat_lock_held)
hatlockp = sfmmu_hat_enter(sfmmup);
- SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp);
+ SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0);
kpreempt_disable();
@@ -10973,6 +12358,9 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp,
*/
static int sfmmu_xcall_save;
+/*
+ * this routine is never used for demaping addresses backed by SRD hmeblks.
+ */
static void
sfmmu_tlb_range_demap(demap_range_t *dmrp)
{
@@ -11154,10 +12542,13 @@ sfmmu_invalidate_ctx(sfmmu_t *sfmmup)
*/
if ((sfmmu_getctx_sec() == currcnum) &&
(currcnum != INVALID_CONTEXT)) {
+ /* sets shared context to INVALID too */
sfmmu_setctx_sec(INVALID_CONTEXT);
sfmmu_clear_utsbinfo();
}
+ SFMMU_FLAGS_SET(sfmmup, HAT_ALLCTX_INVALID);
+
kpreempt_enable();
/*
@@ -11219,18 +12610,60 @@ sfmmu_cache_flushcolor(int vcolor, pfn_t pfnum)
static int
sfmmu_tsb_pre_relocator(caddr_t va, uint_t tsbsz, uint_t flags, void *tsbinfo)
{
- hatlock_t *hatlockp;
struct tsb_info *tsbinfop = (struct tsb_info *)tsbinfo;
sfmmu_t *sfmmup = tsbinfop->tsb_sfmmu;
- extern uint32_t sendmondo_in_recover;
+ hatlock_t *hatlockp;
+ sf_scd_t *scdp;
if (flags != HAT_PRESUSPEND)
return (0);
+ /*
+ * If tsb is a shared TSB with TSB_SHAREDCTX set, sfmmup must
+ * be a shared hat, then set SCD's tsbinfo's flag.
+ * If tsb is not shared, sfmmup is a private hat, then set
+ * its private tsbinfo's flag.
+ */
hatlockp = sfmmu_hat_enter(sfmmup);
-
tsbinfop->tsb_flags |= TSB_RELOC_FLAG;
+ if (!(tsbinfop->tsb_flags & TSB_SHAREDCTX)) {
+ sfmmu_tsb_inv_ctx(sfmmup);
+ sfmmu_hat_exit(hatlockp);
+ } else {
+ /* release lock on the shared hat */
+ sfmmu_hat_exit(hatlockp);
+ /* sfmmup is a shared hat */
+ ASSERT(sfmmup->sfmmu_scdhat);
+ scdp = sfmmup->sfmmu_scdp;
+ ASSERT(scdp != NULL);
+ /* get private hat from the scd list */
+ mutex_enter(&scdp->scd_mutex);
+ sfmmup = scdp->scd_sf_list;
+ while (sfmmup != NULL) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ /*
+ * We do not call sfmmu_tsb_inv_ctx here because
+ * sendmondo_in_recover check is only needed for
+ * sun4u.
+ */
+ sfmmu_invalidate_ctx(sfmmup);
+ sfmmu_hat_exit(hatlockp);
+ sfmmup = sfmmup->sfmmu_scd_link.next;
+
+ }
+ mutex_exit(&scdp->scd_mutex);
+ }
+ return (0);
+}
+
+static void
+sfmmu_tsb_inv_ctx(sfmmu_t *sfmmup)
+{
+ extern uint32_t sendmondo_in_recover;
+
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+
/*
* For Cheetah+ Erratum 25:
* Wait for any active recovery to finish. We can't risk
@@ -11262,9 +12695,6 @@ sfmmu_tsb_pre_relocator(caddr_t va, uint_t tsbsz, uint_t flags, void *tsbinfo)
}
sfmmu_invalidate_ctx(sfmmup);
- sfmmu_hat_exit(hatlockp);
-
- return (0);
}
/* ARGSUSED */
@@ -11291,7 +12721,6 @@ sfmmu_tsb_post_relocator(caddr_t va, uint_t tsbsz, uint_t flags,
if ((tsbinfop->tsb_flags & TSB_SWAPPED) == 0) {
ASSERT(va == tsbinfop->tsb_va);
sfmmu_tsbinfo_setup_phys(tsbinfop, newpfn);
- sfmmu_setup_tsbinfo(sfmmup);
if (tsbinfop->tsb_flags & TSB_FLUSH_NEEDED) {
sfmmu_inv_tsb(tsbinfop->tsb_va,
@@ -11351,11 +12780,18 @@ sfmmu_tsb_free(struct tsb_info *tsbinfo)
* need to uninstall the callback handler.
*/
if (tsbinfo->tsb_cache != sfmmu_tsb8k_cache) {
- uintptr_t slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT;
- caddr_t slab_vaddr = (caddr_t)((uintptr_t)tsbva & slab_mask);
+ uintptr_t slab_mask;
+ caddr_t slab_vaddr;
page_t **ppl;
int ret;
+ ASSERT(tsb_size <= MMU_PAGESIZE4M || use_bigtsb_arena);
+ if (tsb_size > MMU_PAGESIZE4M)
+ slab_mask = ~((uintptr_t)bigtsb_slab_mask) << PAGESHIFT;
+ else
+ slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT;
+ slab_vaddr = (caddr_t)((uintptr_t)tsbva & slab_mask);
+
ret = as_pagelock(&kas, &ppl, slab_vaddr, PAGESIZE, S_WRITE);
ASSERT(ret == 0);
hat_delete_callback(tsbva, (uint_t)tsb_size, (void *)tsbinfo,
@@ -11436,7 +12872,7 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask,
{
caddr_t vaddr = NULL;
caddr_t slab_vaddr;
- uintptr_t slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT;
+ uintptr_t slab_mask;
int tsbbytes = TSB_BYTES(tsbcode);
int lowmem = 0;
struct kmem_cache *kmem_cachep = NULL;
@@ -11447,6 +12883,12 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask,
page_t **pplist;
int ret;
+ ASSERT(tsbbytes <= MMU_PAGESIZE4M || use_bigtsb_arena);
+ if (tsbbytes > MMU_PAGESIZE4M)
+ slab_mask = ~((uintptr_t)bigtsb_slab_mask) << PAGESHIFT;
+ else
+ slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT;
+
if (flags & (TSB_FORCEALLOC | TSB_SWAPIN | TSB_GROW | TSB_SHRINK))
flags |= TSB_ALLOC;
@@ -11524,9 +12966,15 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask,
lgrpid = 0; /* use lgrp of boot CPU */
if (tsbbytes > MMU_PAGESIZE) {
- vmp = kmem_tsb_default_arena[lgrpid];
- vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes, 0, 0,
- NULL, NULL, VM_NOSLEEP);
+ if (tsbbytes > MMU_PAGESIZE4M) {
+ vmp = kmem_bigtsb_default_arena[lgrpid];
+ vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes,
+ 0, 0, NULL, NULL, VM_NOSLEEP);
+ } else {
+ vmp = kmem_tsb_default_arena[lgrpid];
+ vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes,
+ 0, 0, NULL, NULL, VM_NOSLEEP);
+ }
#ifdef DEBUG
} else if (lowmem || (flags & TSB_FORCEALLOC) || tsb_forceheap) {
#else /* !DEBUG */
@@ -11595,11 +13043,12 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask,
sfmmu_tsbinfo_setup_phys(tsbinfo, pfn);
+ sfmmu_inv_tsb(vaddr, tsbbytes);
+
if (kmem_cachep != sfmmu_tsb8k_cache) {
as_pageunlock(&kas, pplist, slab_vaddr, PAGESIZE, S_WRITE);
}
- sfmmu_inv_tsb(vaddr, tsbbytes);
return (0);
}
@@ -11907,6 +13356,11 @@ hat_supported(enum hat_features feature, void *arg)
case HAT_DYNAMIC_ISM_UNMAP:
case HAT_VMODSORT:
return (1);
+ case HAT_SHARED_REGIONS:
+ if (!disable_shctx && shctx_on)
+ return (1);
+ else
+ return (0);
default:
return (0);
}
@@ -11980,29 +13434,19 @@ sfmmu_kstat_percpu_update(kstat_t *ksp, int rw)
ASSERT(cpu_kstat);
if (rw == KSTAT_READ) {
for (i = 0; i < NCPU; cpu_kstat++, tsbm++, kpmtsbm++, i++) {
- cpu_kstat->sf_itlb_misses = tsbm->itlb_misses;
- cpu_kstat->sf_dtlb_misses = tsbm->dtlb_misses;
+ cpu_kstat->sf_itlb_misses = 0;
+ cpu_kstat->sf_dtlb_misses = 0;
cpu_kstat->sf_utsb_misses = tsbm->utsb_misses -
tsbm->uprot_traps;
cpu_kstat->sf_ktsb_misses = tsbm->ktsb_misses +
kpmtsbm->kpm_tsb_misses - tsbm->kprot_traps;
-
- if (tsbm->itlb_misses > 0 && tsbm->dtlb_misses > 0) {
- cpu_kstat->sf_tsb_hits =
- (tsbm->itlb_misses + tsbm->dtlb_misses) -
- (tsbm->utsb_misses + tsbm->ktsb_misses +
- kpmtsbm->kpm_tsb_misses);
- } else {
- cpu_kstat->sf_tsb_hits = 0;
- }
+ cpu_kstat->sf_tsb_hits = 0;
cpu_kstat->sf_umod_faults = tsbm->uprot_traps;
cpu_kstat->sf_kmod_faults = tsbm->kprot_traps;
}
} else {
/* KSTAT_WRITE is used to clear stats */
for (i = 0; i < NCPU; tsbm++, kpmtsbm++, i++) {
- tsbm->itlb_misses = 0;
- tsbm->dtlb_misses = 0;
tsbm->utsb_misses = 0;
tsbm->ktsb_misses = 0;
tsbm->uprot_traps = 0;
@@ -12189,7 +13633,7 @@ hat_dump(void)
void
hat_thread_exit(kthread_t *thd)
{
- uint64_t pgsz_cnum;
+ uint_t pgsz_cnum;
uint_t pstate_save;
ASSERT(thd->t_procp->p_as == &kas);
@@ -12198,6 +13642,7 @@ hat_thread_exit(kthread_t *thd)
#ifdef sun4u
pgsz_cnum |= (ksfmmup->sfmmu_cext << CTXREG_EXT_SHIFT);
#endif
+
/*
* Note that sfmmu_load_mmustate() is currently a no-op for
* kernel threads. We need to disable interrupts here,
@@ -12205,7 +13650,1817 @@ hat_thread_exit(kthread_t *thd)
* if the caller does not disable interrupts.
*/
pstate_save = sfmmu_disable_intrs();
+
+ /* Compatibility Note: hw takes care of MMU_SCONTEXT1 */
sfmmu_setctx_sec(pgsz_cnum);
sfmmu_load_mmustate(ksfmmup);
sfmmu_enable_intrs(pstate_save);
}
+
+
+/*
+ * SRD support
+ */
+#define SRD_HASH_FUNCTION(vp) (((((uintptr_t)(vp)) >> 4) ^ \
+ (((uintptr_t)(vp)) >> 11)) & \
+ srd_hashmask)
+
+/*
+ * Attach the process to the srd struct associated with the exec vnode
+ * from which the process is started.
+ */
+void
+hat_join_srd(struct hat *sfmmup, vnode_t *evp)
+{
+ uint_t hash = SRD_HASH_FUNCTION(evp);
+ sf_srd_t *srdp;
+ sf_srd_t *newsrdp;
+
+ ASSERT(sfmmup != ksfmmup);
+ ASSERT(sfmmup->sfmmu_srdp == NULL);
+
+ if (disable_shctx || !shctx_on) {
+ return;
+ }
+
+ VN_HOLD(evp);
+
+ if (srd_buckets[hash].srdb_srdp != NULL) {
+ mutex_enter(&srd_buckets[hash].srdb_lock);
+ for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
+ srdp = srdp->srd_hash) {
+ if (srdp->srd_evp == evp) {
+ ASSERT(srdp->srd_refcnt >= 0);
+ sfmmup->sfmmu_srdp = srdp;
+ atomic_add_32(
+ (volatile uint_t *)&srdp->srd_refcnt, 1);
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+ return;
+ }
+ }
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+ }
+ newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP);
+ ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0);
+
+ newsrdp->srd_evp = evp;
+ newsrdp->srd_refcnt = 1;
+ newsrdp->srd_hmergnfree = NULL;
+ newsrdp->srd_ismrgnfree = NULL;
+
+ mutex_enter(&srd_buckets[hash].srdb_lock);
+ for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL;
+ srdp = srdp->srd_hash) {
+ if (srdp->srd_evp == evp) {
+ ASSERT(srdp->srd_refcnt >= 0);
+ sfmmup->sfmmu_srdp = srdp;
+ atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1);
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+ kmem_cache_free(srd_cache, newsrdp);
+ return;
+ }
+ }
+ newsrdp->srd_hash = srd_buckets[hash].srdb_srdp;
+ srd_buckets[hash].srdb_srdp = newsrdp;
+ sfmmup->sfmmu_srdp = newsrdp;
+
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+
+}
+
+static void
+sfmmu_leave_srd(sfmmu_t *sfmmup)
+{
+ vnode_t *evp;
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ uint_t hash;
+ sf_srd_t **prev_srdpp;
+ sf_region_t *rgnp;
+ sf_region_t *nrgnp;
+#ifdef DEBUG
+ int rgns = 0;
+#endif
+ int i;
+
+ ASSERT(sfmmup != ksfmmup);
+ ASSERT(srdp != NULL);
+ ASSERT(srdp->srd_refcnt > 0);
+ ASSERT(sfmmup->sfmmu_scdp == NULL);
+ ASSERT(sfmmup->sfmmu_free == 1);
+
+ sfmmup->sfmmu_srdp = NULL;
+ evp = srdp->srd_evp;
+ ASSERT(evp != NULL);
+ if (atomic_add_32_nv(
+ (volatile uint_t *)&srdp->srd_refcnt, -1)) {
+ VN_RELE(evp);
+ return;
+ }
+
+ hash = SRD_HASH_FUNCTION(evp);
+ mutex_enter(&srd_buckets[hash].srdb_lock);
+ for (prev_srdpp = &srd_buckets[hash].srdb_srdp;
+ (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) {
+ if (srdp->srd_evp == evp) {
+ break;
+ }
+ }
+ if (srdp == NULL || srdp->srd_refcnt) {
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+ VN_RELE(evp);
+ return;
+ }
+ *prev_srdpp = srdp->srd_hash;
+ mutex_exit(&srd_buckets[hash].srdb_lock);
+
+ ASSERT(srdp->srd_refcnt == 0);
+ VN_RELE(evp);
+
+#ifdef DEBUG
+ for (i = 0; i < SFMMU_MAX_REGION_BUCKETS; i++) {
+ ASSERT(srdp->srd_rgnhash[i] == NULL);
+ }
+#endif /* DEBUG */
+
+ /* free each hme regions in the srd */
+ for (rgnp = srdp->srd_hmergnfree; rgnp != NULL; rgnp = nrgnp) {
+ nrgnp = rgnp->rgn_next;
+ ASSERT(rgnp->rgn_id < srdp->srd_next_hmerid);
+ ASSERT(rgnp->rgn_refcnt == 0);
+ ASSERT(rgnp->rgn_sfmmu_head == NULL);
+ ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE);
+ ASSERT(rgnp->rgn_hmeflags == 0);
+ ASSERT(srdp->srd_hmergnp[rgnp->rgn_id] == rgnp);
+#ifdef DEBUG
+ for (i = 0; i < MMU_PAGE_SIZES; i++) {
+ ASSERT(rgnp->rgn_ttecnt[i] == 0);
+ }
+ rgns++;
+#endif /* DEBUG */
+ kmem_cache_free(region_cache, rgnp);
+ }
+ ASSERT(rgns == srdp->srd_next_hmerid);
+
+#ifdef DEBUG
+ rgns = 0;
+#endif
+ /* free each ism rgns in the srd */
+ for (rgnp = srdp->srd_ismrgnfree; rgnp != NULL; rgnp = nrgnp) {
+ nrgnp = rgnp->rgn_next;
+ ASSERT(rgnp->rgn_id < srdp->srd_next_ismrid);
+ ASSERT(rgnp->rgn_refcnt == 0);
+ ASSERT(rgnp->rgn_sfmmu_head == NULL);
+ ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE);
+ ASSERT(srdp->srd_ismrgnp[rgnp->rgn_id] == rgnp);
+#ifdef DEBUG
+ for (i = 0; i < MMU_PAGE_SIZES; i++) {
+ ASSERT(rgnp->rgn_ttecnt[i] == 0);
+ }
+ rgns++;
+#endif /* DEBUG */
+ kmem_cache_free(region_cache, rgnp);
+ }
+ ASSERT(rgns == srdp->srd_next_ismrid);
+ ASSERT(srdp->srd_ismbusyrgns == 0);
+ ASSERT(srdp->srd_hmebusyrgns == 0);
+
+ srdp->srd_next_ismrid = 0;
+ srdp->srd_next_hmerid = 0;
+
+ bzero((void *)srdp->srd_ismrgnp,
+ sizeof (sf_region_t *) * SFMMU_MAX_ISM_REGIONS);
+ bzero((void *)srdp->srd_hmergnp,
+ sizeof (sf_region_t *) * SFMMU_MAX_HME_REGIONS);
+
+ ASSERT(srdp->srd_scdp == NULL);
+ kmem_cache_free(srd_cache, srdp);
+}
+
+/* ARGSUSED */
+static int
+sfmmu_srdcache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ sf_srd_t *srdp = (sf_srd_t *)buf;
+ bzero(buf, sizeof (*srdp));
+
+ mutex_init(&srdp->srd_mutex, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&srdp->srd_scd_mutex, NULL, MUTEX_DEFAULT, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+sfmmu_srdcache_destructor(void *buf, void *cdrarg)
+{
+ sf_srd_t *srdp = (sf_srd_t *)buf;
+
+ mutex_destroy(&srdp->srd_mutex);
+ mutex_destroy(&srdp->srd_scd_mutex);
+}
+
+/*
+ * The caller makes sure hat_join_region()/hat_leave_region() can't be called
+ * at the same time for the same process and address range. This is ensured by
+ * the fact that address space is locked as writer when a process joins the
+ * regions. Therefore there's no need to hold an srd lock during the entire
+ * execution of hat_join_region()/hat_leave_region().
+ */
+
+#define RGN_HASH_FUNCTION(obj) (((((uintptr_t)(obj)) >> 4) ^ \
+ (((uintptr_t)(obj)) >> 11)) & \
+ srd_rgn_hashmask)
+/*
+ * This routine implements the shared context functionality required when
+ * attaching a segment to an address space. It must be called from
+ * hat_share() for D(ISM) segments and from segvn_create() for segments
+ * with the MAP_PRIVATE and MAP_TEXT flags set. It returns a region_cookie
+ * which is saved in the private segment data for hme segments and
+ * the ism_map structure for ism segments.
+ */
+hat_region_cookie_t
+hat_join_region(struct hat *sfmmup,
+ caddr_t r_saddr,
+ size_t r_size,
+ void *r_obj,
+ u_offset_t r_objoff,
+ uchar_t r_perm,
+ uchar_t r_pgszc,
+ hat_rgn_cb_func_t r_cb_function,
+ uint_t flags)
+{
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ uint_t rhash;
+ uint_t rid;
+ hatlock_t *hatlockp;
+ sf_region_t *rgnp;
+ sf_region_t *new_rgnp = NULL;
+ int i;
+ uint16_t *nextidp;
+ sf_region_t **freelistp;
+ int maxids;
+ sf_region_t **rarrp;
+ uint16_t *busyrgnsp;
+ ulong_t rttecnt;
+ int rkmalloc = 0;
+ uchar_t tteflag;
+ uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
+ int text = (r_type == HAT_REGION_TEXT);
+
+ if (srdp == NULL || r_size == 0) {
+ return (HAT_INVALID_REGION_COOKIE);
+ }
+
+ ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
+ ASSERT(sfmmup != ksfmmup);
+ ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
+ ASSERT(srdp->srd_refcnt > 0);
+ ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
+ ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
+ ASSERT(r_pgszc < mmu_page_sizes);
+ if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
+ !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
+ panic("hat_join_region: region addr or size is not aligned\n");
+ }
+
+
+ r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
+ SFMMU_REGION_HME;
+ /*
+ * Currently only support shared hmes for the main text region.
+ */
+ if (r_type == SFMMU_REGION_HME && r_obj != srdp->srd_evp) {
+ return (HAT_INVALID_REGION_COOKIE);
+ }
+
+ rhash = RGN_HASH_FUNCTION(r_obj);
+
+ if (r_type == SFMMU_REGION_ISM) {
+ nextidp = &srdp->srd_next_ismrid;
+ freelistp = &srdp->srd_ismrgnfree;
+ maxids = SFMMU_MAX_ISM_REGIONS;
+ rarrp = srdp->srd_ismrgnp;
+ busyrgnsp = &srdp->srd_ismbusyrgns;
+ } else {
+ nextidp = &srdp->srd_next_hmerid;
+ freelistp = &srdp->srd_hmergnfree;
+ maxids = SFMMU_MAX_HME_REGIONS;
+ rarrp = srdp->srd_hmergnp;
+ busyrgnsp = &srdp->srd_hmebusyrgns;
+ }
+
+ mutex_enter(&srdp->srd_mutex);
+
+ for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
+ rgnp = rgnp->rgn_hash) {
+ if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size &&
+ rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff &&
+ rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) {
+ break;
+ }
+ }
+
+rfound:
+ if (rgnp != NULL) {
+ ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
+ ASSERT(rgnp->rgn_cb_function == r_cb_function);
+ ASSERT(rgnp->rgn_refcnt >= 0);
+ rid = rgnp->rgn_id;
+ ASSERT(rid < maxids);
+ ASSERT(rarrp[rid] == rgnp);
+ ASSERT(rid < *nextidp);
+ atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
+ mutex_exit(&srdp->srd_mutex);
+ if (new_rgnp != NULL) {
+ kmem_cache_free(region_cache, new_rgnp);
+ }
+ if (r_type == SFMMU_REGION_HME) {
+ int myjoin =
+ (sfmmup == astosfmmu(curthread->t_procp->p_as));
+
+ sfmmu_link_to_hmeregion(sfmmup, rgnp);
+ /*
+ * bitmap should be updated after linking sfmmu on
+ * region list so that pageunload() doesn't skip
+ * TSB/TLB flush. As soon as bitmap is updated another
+ * thread in this process can already start accessing
+ * this region.
+ */
+ /*
+ * Normally ttecnt accounting is done as part of
+ * pagefault handling. But a process may not take any
+ * pagefaults on shared hmeblks created by some other
+ * process. To compensate for this assume that the
+ * entire region will end up faulted in using
+ * the region's pagesize.
+ *
+ */
+ if (r_pgszc > TTE8K) {
+ tteflag = 1 << r_pgszc;
+ if (disable_large_pages & tteflag) {
+ tteflag = 0;
+ }
+ } else {
+ tteflag = 0;
+ }
+ if (tteflag && !(sfmmup->sfmmu_rtteflags & tteflag)) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ sfmmup->sfmmu_rtteflags |= tteflag;
+ sfmmu_hat_exit(hatlockp);
+ }
+ hatlockp = sfmmu_hat_enter(sfmmup);
+
+ /*
+ * Preallocate 1/4 of ttecnt's in 8K TSB for >= 4M
+ * region to allow for large page allocation failure.
+ */
+ if (r_pgszc >= TTE4M) {
+ sfmmup->sfmmu_tsb0_4minflcnt +=
+ r_size >> (TTE_PAGE_SHIFT(TTE8K) + 2);
+ }
+
+ /* update sfmmu_ttecnt with the shme rgn ttecnt */
+ rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
+ rttecnt);
+
+ if (text && r_pgszc >= TTE4M &&
+ (tteflag || ((disable_large_pages >> TTE4M) &
+ ((1 << (r_pgszc - TTE4M + 1)) - 1))) &&
+ !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) {
+ SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG);
+ }
+
+ sfmmu_hat_exit(hatlockp);
+ /*
+ * On Panther we need to make sure TLB is programmed
+ * to accept 32M/256M pages. Call
+ * sfmmu_check_page_sizes() now to make sure TLB is
+ * setup before making hmeregions visible to other
+ * threads.
+ */
+ sfmmu_check_page_sizes(sfmmup, 1);
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ SF_RGNMAP_ADD(sfmmup->sfmmu_hmeregion_map, rid);
+
+ /*
+ * if context is invalid tsb miss exception code will
+ * call sfmmu_check_page_sizes() and update tsbmiss
+ * area later.
+ */
+ kpreempt_disable();
+ if (myjoin &&
+ (sfmmup->sfmmu_ctxs[CPU_MMU_IDX(CPU)].cnum
+ != INVALID_CONTEXT)) {
+ struct tsbmiss *tsbmp;
+
+ tsbmp = &tsbmiss_area[CPU->cpu_id];
+ ASSERT(sfmmup == tsbmp->usfmmup);
+ BT_SET(tsbmp->shmermap, rid);
+ if (r_pgszc > TTE64K) {
+ tsbmp->uhat_rtteflags |= tteflag;
+ }
+
+ }
+ kpreempt_enable();
+
+ sfmmu_hat_exit(hatlockp);
+ ASSERT((hat_region_cookie_t)((uint64_t)rid) !=
+ HAT_INVALID_REGION_COOKIE);
+ } else {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ SF_RGNMAP_ADD(sfmmup->sfmmu_ismregion_map, rid);
+ sfmmu_hat_exit(hatlockp);
+ }
+ ASSERT(rid < maxids);
+
+ if (r_type == SFMMU_REGION_ISM) {
+ sfmmu_find_scd(sfmmup);
+ }
+ return ((hat_region_cookie_t)((uint64_t)rid));
+ }
+
+ ASSERT(new_rgnp == NULL);
+
+ if (*busyrgnsp >= maxids) {
+ mutex_exit(&srdp->srd_mutex);
+ return (HAT_INVALID_REGION_COOKIE);
+ }
+
+ ASSERT(MUTEX_HELD(&srdp->srd_mutex));
+ if (*freelistp != NULL) {
+ new_rgnp = *freelistp;
+ *freelistp = new_rgnp->rgn_next;
+ ASSERT(new_rgnp->rgn_id < *nextidp);
+ ASSERT(new_rgnp->rgn_id < maxids);
+ ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
+ ASSERT((new_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK)
+ == r_type);
+ ASSERT(rarrp[new_rgnp->rgn_id] == new_rgnp);
+
+ ASSERT(new_rgnp->rgn_hmeflags == 0);
+ }
+
+ if (new_rgnp == NULL) {
+ /*
+ * release local locks before memory allocation.
+ */
+ mutex_exit(&srdp->srd_mutex);
+ if (new_rgnp == NULL) {
+ rkmalloc = 1;
+ new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP);
+ }
+
+ mutex_enter(&srdp->srd_mutex);
+ for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL;
+ rgnp = rgnp->rgn_hash) {
+ if (rgnp->rgn_saddr == r_saddr &&
+ rgnp->rgn_size == r_size &&
+ rgnp->rgn_obj == r_obj &&
+ rgnp->rgn_objoff == r_objoff &&
+ rgnp->rgn_perm == r_perm &&
+ rgnp->rgn_pgszc == r_pgszc) {
+ break;
+ }
+ }
+ if (rgnp != NULL) {
+ if (!rkmalloc) {
+ ASSERT(new_rgnp->rgn_flags &
+ SFMMU_REGION_FREE);
+ new_rgnp->rgn_next = *freelistp;
+ *freelistp = new_rgnp;
+ new_rgnp = NULL;
+ }
+ goto rfound;
+ }
+
+ if (rkmalloc) {
+ if (*nextidp >= maxids) {
+ mutex_exit(&srdp->srd_mutex);
+ goto fail;
+ }
+ rgnp = new_rgnp;
+ new_rgnp = NULL;
+ rgnp->rgn_id = (*nextidp)++;
+ ASSERT(rgnp->rgn_id < maxids);
+ ASSERT(rarrp[rgnp->rgn_id] == NULL);
+ rarrp[rgnp->rgn_id] = rgnp;
+ } else {
+ rgnp = new_rgnp;
+ new_rgnp = NULL;
+ }
+ } else {
+ rgnp = new_rgnp;
+ new_rgnp = NULL;
+ }
+
+ ASSERT(rgnp->rgn_sfmmu_head == NULL);
+ ASSERT(rgnp->rgn_hmeflags == 0);
+#ifdef DEBUG
+ for (i = 0; i < MMU_PAGE_SIZES; i++) {
+ ASSERT(rgnp->rgn_ttecnt[i] == 0);
+ }
+#endif
+ rgnp->rgn_saddr = r_saddr;
+ rgnp->rgn_size = r_size;
+ rgnp->rgn_obj = r_obj;
+ rgnp->rgn_objoff = r_objoff;
+ rgnp->rgn_perm = r_perm;
+ rgnp->rgn_pgszc = r_pgszc;
+ rgnp->rgn_flags = r_type;
+ rgnp->rgn_refcnt = 0;
+ rgnp->rgn_cb_function = r_cb_function;
+ rgnp->rgn_hash = srdp->srd_rgnhash[rhash];
+ srdp->srd_rgnhash[rhash] = rgnp;
+ (*busyrgnsp)++;
+ ASSERT(*busyrgnsp <= maxids);
+ goto rfound;
+
+fail:
+ ASSERT(new_rgnp != NULL);
+ if (rkmalloc) {
+ kmem_cache_free(region_cache, new_rgnp);
+ } else {
+ /* put it back on the free list. */
+ ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE);
+ new_rgnp->rgn_next = *freelistp;
+ *freelistp = new_rgnp;
+ }
+ return (HAT_INVALID_REGION_COOKIE);
+}
+
+/*
+ * This function implements the shared context functionality required
+ * when detaching a segment from an address space. It must be called
+ * from hat_unshare() for all D(ISM) segments and from segvn_unmap(),
+ * for segments with a valid region_cookie.
+ * It will also be called from all seg_vn routines which change a
+ * segment's attributes such as segvn_setprot(), segvn_setpagesize(),
+ * segvn_clrszc() & segvn_advise(), as well as in the case of COW fault
+ * from segvn_fault().
+ */
+void
+hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags)
+{
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ sf_scd_t *scdp;
+ uint_t rhash;
+ uint_t rid = (uint_t)((uint64_t)rcookie);
+ hatlock_t *hatlockp = NULL;
+ sf_region_t *rgnp;
+ sf_region_t **prev_rgnpp;
+ sf_region_t *cur_rgnp;
+ void *r_obj;
+ int i;
+ caddr_t r_saddr;
+ caddr_t r_eaddr;
+ size_t r_size;
+ uchar_t r_pgszc;
+ uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
+
+ ASSERT(sfmmup != ksfmmup);
+ ASSERT(srdp != NULL);
+ ASSERT(srdp->srd_refcnt > 0);
+ ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
+ ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
+ ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
+
+ r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
+ SFMMU_REGION_HME;
+
+ if (r_type == SFMMU_REGION_ISM) {
+ ASSERT(SFMMU_IS_ISMRID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
+ rgnp = srdp->srd_ismrgnp[rid];
+ } else {
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+ rgnp = srdp->srd_hmergnp[rid];
+ }
+ ASSERT(rgnp != NULL);
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
+ ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
+ ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
+
+ ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
+ if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) {
+ xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr,
+ rgnp->rgn_size, 0, NULL);
+ }
+
+ if (sfmmup->sfmmu_free) {
+ ulong_t rttecnt;
+ r_pgszc = rgnp->rgn_pgszc;
+ r_size = rgnp->rgn_size;
+
+ ASSERT(sfmmup->sfmmu_scdp == NULL);
+ if (r_type == SFMMU_REGION_ISM) {
+ SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
+ } else {
+ /* update shme rgns ttecnt in sfmmu_ttecnt */
+ rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
+ ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
+
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
+ -rttecnt);
+
+ SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
+ }
+ } else if (r_type == SFMMU_REGION_ISM) {
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ ASSERT(rid < srdp->srd_next_ismrid);
+ SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
+ scdp = sfmmup->sfmmu_scdp;
+ if (scdp != NULL &&
+ SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) {
+ sfmmu_leave_scd(sfmmup, r_type);
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+ }
+ sfmmu_hat_exit(hatlockp);
+ } else {
+ ulong_t rttecnt;
+ r_pgszc = rgnp->rgn_pgszc;
+ r_saddr = rgnp->rgn_saddr;
+ r_size = rgnp->rgn_size;
+ r_eaddr = r_saddr + r_size;
+
+ ASSERT(r_type == SFMMU_REGION_HME);
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ ASSERT(rid < srdp->srd_next_hmerid);
+ SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
+
+ /*
+ * If region is part of an SCD call sfmmu_leave_scd().
+ * Otherwise if process is not exiting and has valid context
+ * just drop the context on the floor to lose stale TLB
+ * entries and force the update of tsb miss area to reflect
+ * the new region map. After that clean our TSB entries.
+ */
+ scdp = sfmmup->sfmmu_scdp;
+ if (scdp != NULL &&
+ SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
+ sfmmu_leave_scd(sfmmup, r_type);
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+ }
+ sfmmu_invalidate_ctx(sfmmup);
+
+ i = TTE8K;
+ while (i < mmu_page_sizes) {
+ if (rgnp->rgn_ttecnt[i] != 0) {
+ sfmmu_unload_tsb_range(sfmmup, r_saddr,
+ r_eaddr, i);
+ if (i < TTE4M) {
+ i = TTE4M;
+ continue;
+ } else {
+ break;
+ }
+ }
+ i++;
+ }
+ /* Remove the preallocated 1/4 8k ttecnt for 4M regions. */
+ if (r_pgszc >= TTE4M) {
+ rttecnt = r_size >> (TTE_PAGE_SHIFT(TTE8K) + 2);
+ ASSERT(sfmmup->sfmmu_tsb0_4minflcnt >=
+ rttecnt);
+ sfmmup->sfmmu_tsb0_4minflcnt -= rttecnt;
+ }
+
+ /* update shme rgns ttecnt in sfmmu_ttecnt */
+ rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
+ ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt);
+
+ sfmmu_hat_exit(hatlockp);
+ if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) {
+ /* sfmmup left the scd, grow private tsb */
+ sfmmu_check_page_sizes(sfmmup, 1);
+ } else {
+ sfmmu_check_page_sizes(sfmmup, 0);
+ }
+ }
+
+ if (r_type == SFMMU_REGION_HME) {
+ sfmmu_unlink_from_hmeregion(sfmmup, rgnp);
+ }
+
+ r_obj = rgnp->rgn_obj;
+ if (atomic_add_32_nv((volatile uint_t *)&rgnp->rgn_refcnt, -1)) {
+ return;
+ }
+
+ /*
+ * looks like nobody uses this region anymore. Free it.
+ */
+ rhash = RGN_HASH_FUNCTION(r_obj);
+ mutex_enter(&srdp->srd_mutex);
+ for (prev_rgnpp = &srdp->srd_rgnhash[rhash];
+ (cur_rgnp = *prev_rgnpp) != NULL;
+ prev_rgnpp = &cur_rgnp->rgn_hash) {
+ if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) {
+ break;
+ }
+ }
+
+ if (cur_rgnp == NULL) {
+ mutex_exit(&srdp->srd_mutex);
+ return;
+ }
+
+ ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
+ *prev_rgnpp = rgnp->rgn_hash;
+ if (r_type == SFMMU_REGION_ISM) {
+ rgnp->rgn_flags |= SFMMU_REGION_FREE;
+ ASSERT(rid < srdp->srd_next_ismrid);
+ rgnp->rgn_next = srdp->srd_ismrgnfree;
+ srdp->srd_ismrgnfree = rgnp;
+ ASSERT(srdp->srd_ismbusyrgns > 0);
+ srdp->srd_ismbusyrgns--;
+ mutex_exit(&srdp->srd_mutex);
+ return;
+ }
+ mutex_exit(&srdp->srd_mutex);
+
+ /*
+ * Destroy region's hmeblks.
+ */
+ sfmmu_unload_hmeregion(srdp, rgnp);
+
+ rgnp->rgn_hmeflags = 0;
+
+ ASSERT(rgnp->rgn_sfmmu_head == NULL);
+ ASSERT(rgnp->rgn_id == rid);
+ for (i = 0; i < MMU_PAGE_SIZES; i++) {
+ rgnp->rgn_ttecnt[i] = 0;
+ }
+ rgnp->rgn_flags |= SFMMU_REGION_FREE;
+ mutex_enter(&srdp->srd_mutex);
+ ASSERT(rid < srdp->srd_next_hmerid);
+ rgnp->rgn_next = srdp->srd_hmergnfree;
+ srdp->srd_hmergnfree = rgnp;
+ ASSERT(srdp->srd_hmebusyrgns > 0);
+ srdp->srd_hmebusyrgns--;
+ mutex_exit(&srdp->srd_mutex);
+}
+
+/*
+ * For now only called for hmeblk regions and not for ISM regions.
+ */
+void
+hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie)
+{
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ uint_t rid = (uint_t)((uint64_t)rcookie);
+ sf_region_t *rgnp;
+ sf_rgn_link_t *rlink;
+ sf_rgn_link_t *hrlink;
+ ulong_t rttecnt;
+
+ ASSERT(sfmmup != ksfmmup);
+ ASSERT(srdp != NULL);
+ ASSERT(srdp->srd_refcnt > 0);
+
+ ASSERT(rid < srdp->srd_next_hmerid);
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+ ASSERT(rid < SFMMU_MAX_HME_REGIONS);
+
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_refcnt > 0);
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME);
+ ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
+
+ atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1);
+
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0);
+ ASSERT(rlink != NULL);
+ mutex_enter(&rgnp->rgn_mutex);
+ ASSERT(rgnp->rgn_sfmmu_head != NULL);
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0);
+ ASSERT(hrlink != NULL);
+ ASSERT(hrlink->prev == NULL);
+ rlink->next = rgnp->rgn_sfmmu_head;
+ rlink->prev = NULL;
+ hrlink->prev = sfmmup;
+ /*
+ * make sure rlink's next field is correct
+ * before making this link visible.
+ */
+ membar_stst();
+ rgnp->rgn_sfmmu_head = sfmmup;
+ mutex_exit(&rgnp->rgn_mutex);
+
+ /* update sfmmu_ttecnt with the shme rgn ttecnt */
+ rttecnt = rgnp->rgn_size >> TTE_PAGE_SHIFT(rgnp->rgn_pgszc);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[rgnp->rgn_pgszc], rttecnt);
+ /* update tsb0 inflation count */
+ if (rgnp->rgn_pgszc >= TTE4M) {
+ sfmmup->sfmmu_tsb0_4minflcnt +=
+ rgnp->rgn_size >> (TTE_PAGE_SHIFT(TTE8K) + 2);
+ }
+ /*
+ * Update regionid bitmask without hat lock since no other thread
+ * can update this region bitmask right now.
+ */
+ SF_RGNMAP_ADD(sfmmup->sfmmu_hmeregion_map, rid);
+}
+
+/* ARGSUSED */
+static int
+sfmmu_rgncache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ sf_region_t *rgnp = (sf_region_t *)buf;
+ bzero(buf, sizeof (*rgnp));
+
+ mutex_init(&rgnp->rgn_mutex, NULL, MUTEX_DEFAULT, NULL);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+sfmmu_rgncache_destructor(void *buf, void *cdrarg)
+{
+ sf_region_t *rgnp = (sf_region_t *)buf;
+ mutex_destroy(&rgnp->rgn_mutex);
+}
+
+static int
+sfrgnmap_isnull(sf_region_map_t *map)
+{
+ int i;
+
+ for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) {
+ if (map->bitmap[i] != 0) {
+ return (0);
+ }
+ }
+ return (1);
+}
+
+static int
+sfhmergnmap_isnull(sf_hmeregion_map_t *map)
+{
+ int i;
+
+ for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) {
+ if (map->bitmap[i] != 0) {
+ return (0);
+ }
+ }
+ return (1);
+}
+
+#ifdef DEBUG
+static void
+check_scd_sfmmu_list(sfmmu_t **headp, sfmmu_t *sfmmup, int onlist)
+{
+ sfmmu_t *sp;
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+
+ for (sp = *headp; sp != NULL; sp = sp->sfmmu_scd_link.next) {
+ ASSERT(srdp == sp->sfmmu_srdp);
+ if (sp == sfmmup) {
+ if (onlist) {
+ return;
+ } else {
+ panic("shctx: sfmmu 0x%p found on scd"
+ "list 0x%p", sfmmup, *headp);
+ }
+ }
+ }
+ if (onlist) {
+ panic("shctx: sfmmu 0x%p not found on scd list 0x%p",
+ sfmmup, *headp);
+ } else {
+ return;
+ }
+}
+#else /* DEBUG */
+#define check_scd_sfmmu_list(headp, sfmmup, onlist)
+#endif /* DEBUG */
+
+/*
+ * Removes an sfmmu from the start of the queue.
+ */
+static void
+sfmmu_from_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup)
+{
+ ASSERT(sfmmup->sfmmu_srdp != NULL);
+ check_scd_sfmmu_list(headp, sfmmup, 1);
+ if (sfmmup->sfmmu_scd_link.prev != NULL) {
+ ASSERT(*headp != sfmmup);
+ sfmmup->sfmmu_scd_link.prev->sfmmu_scd_link.next =
+ sfmmup->sfmmu_scd_link.next;
+ } else {
+ ASSERT(*headp == sfmmup);
+ *headp = sfmmup->sfmmu_scd_link.next;
+ }
+ if (sfmmup->sfmmu_scd_link.next != NULL) {
+ sfmmup->sfmmu_scd_link.next->sfmmu_scd_link.prev =
+ sfmmup->sfmmu_scd_link.prev;
+ }
+}
+
+
+/*
+ * Adds an sfmmu to the start of the queue.
+ */
+static void
+sfmmu_to_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup)
+{
+ check_scd_sfmmu_list(headp, sfmmup, 0);
+ sfmmup->sfmmu_scd_link.prev = NULL;
+ sfmmup->sfmmu_scd_link.next = *headp;
+ if (*headp != NULL)
+ (*headp)->sfmmu_scd_link.prev = sfmmup;
+ *headp = sfmmup;
+}
+
+/*
+ * Remove an scd from the start of the queue.
+ */
+static void
+sfmmu_remove_scd(sf_scd_t **headp, sf_scd_t *scdp)
+{
+ if (scdp->scd_prev != NULL) {
+ ASSERT(*headp != scdp);
+ scdp->scd_prev->scd_next = scdp->scd_next;
+ } else {
+ ASSERT(*headp == scdp);
+ *headp = scdp->scd_next;
+ }
+
+ if (scdp->scd_next != NULL) {
+ scdp->scd_next->scd_prev = scdp->scd_prev;
+ }
+}
+
+/*
+ * Add an scd to the start of the queue.
+ */
+static void
+sfmmu_add_scd(sf_scd_t **headp, sf_scd_t *scdp)
+{
+ scdp->scd_prev = NULL;
+ scdp->scd_next = *headp;
+ if (*headp != NULL) {
+ (*headp)->scd_prev = scdp;
+ }
+ *headp = scdp;
+}
+
+static int
+sfmmu_alloc_scd_tsbs(sf_srd_t *srdp, sf_scd_t *scdp)
+{
+ uint_t rid;
+ uint_t i;
+ uint_t j;
+ ulong_t w;
+ sf_region_t *rgnp;
+ ulong_t tte8k_cnt = 0;
+ ulong_t tte4m_cnt = 0;
+ uint_t tsb_szc;
+ sfmmu_t *scsfmmup = scdp->scd_sfmmup;
+ sfmmu_t *ism_hatid;
+ struct tsb_info *newtsb;
+ int szc;
+
+ ASSERT(srdp != NULL);
+
+ for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) {
+ if ((w = scdp->scd_region_map.bitmap[i]) == 0) {
+ continue;
+ }
+ j = 0;
+ while (w) {
+ if (!(w & 0x1)) {
+ j++;
+ w >>= 1;
+ continue;
+ }
+ rid = (i << BT_ULSHIFT) | j;
+ j++;
+ w >>= 1;
+
+ if (rid < SFMMU_MAX_HME_REGIONS) {
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+
+ if (rgnp->rgn_pgszc < TTE4M) {
+ tte8k_cnt += rgnp->rgn_size >>
+ TTE_PAGE_SHIFT(TTE8K);
+ } else {
+ ASSERT(rgnp->rgn_pgszc >= TTE4M);
+ tte4m_cnt += rgnp->rgn_size >>
+ TTE_PAGE_SHIFT(TTE4M);
+ /*
+ * Inflate SCD tsb0 by preallocating
+ * 1/4 8k ttecnt for 4M regions to
+ * allow for lgpg alloc failure.
+ */
+ tte8k_cnt += rgnp->rgn_size >>
+ (TTE_PAGE_SHIFT(TTE8K) + 2);
+ }
+ } else {
+ rid -= SFMMU_MAX_HME_REGIONS;
+ rgnp = srdp->srd_ismrgnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+
+ ism_hatid = (sfmmu_t *)rgnp->rgn_obj;
+ ASSERT(ism_hatid->sfmmu_ismhat);
+
+ for (szc = 0; szc < TTE4M; szc++) {
+ tte8k_cnt +=
+ ism_hatid->sfmmu_ttecnt[szc] <<
+ TTE_BSZS_SHIFT(szc);
+ }
+
+ ASSERT(rgnp->rgn_pgszc >= TTE4M);
+ if (rgnp->rgn_pgszc >= TTE4M) {
+ tte4m_cnt += rgnp->rgn_size >>
+ TTE_PAGE_SHIFT(TTE4M);
+ }
+ }
+ }
+ }
+
+ tsb_szc = SELECT_TSB_SIZECODE(tte8k_cnt);
+
+ /* Allocate both the SCD TSBs here. */
+ if (sfmmu_tsbinfo_alloc(&scsfmmup->sfmmu_tsb,
+ tsb_szc, TSB8K|TSB64K|TSB512K, TSB_ALLOC, scsfmmup) &&
+ (tsb_szc <= TSB_4M_SZCODE ||
+ sfmmu_tsbinfo_alloc(&scsfmmup->sfmmu_tsb,
+ TSB_4M_SZCODE, TSB8K|TSB64K|TSB512K,
+ TSB_ALLOC, scsfmmup))) {
+
+ SFMMU_STAT(sf_scd_1sttsb_allocfail);
+ return (TSB_ALLOCFAIL);
+ } else {
+ scsfmmup->sfmmu_tsb->tsb_flags |= TSB_SHAREDCTX;
+
+ if (tte4m_cnt) {
+ tsb_szc = SELECT_TSB_SIZECODE(tte4m_cnt);
+ if (sfmmu_tsbinfo_alloc(&newtsb, tsb_szc,
+ TSB4M|TSB32M|TSB256M, TSB_ALLOC, scsfmmup) &&
+ (tsb_szc <= TSB_4M_SZCODE ||
+ sfmmu_tsbinfo_alloc(&newtsb, TSB_4M_SZCODE,
+ TSB4M|TSB32M|TSB256M,
+ TSB_ALLOC, scsfmmup))) {
+ /*
+ * If we fail to allocate the 2nd shared tsb,
+ * just free the 1st tsb, return failure.
+ */
+ sfmmu_tsbinfo_free(scsfmmup->sfmmu_tsb);
+ SFMMU_STAT(sf_scd_2ndtsb_allocfail);
+ return (TSB_ALLOCFAIL);
+ } else {
+ ASSERT(scsfmmup->sfmmu_tsb->tsb_next == NULL);
+ newtsb->tsb_flags |= TSB_SHAREDCTX;
+ scsfmmup->sfmmu_tsb->tsb_next = newtsb;
+ SFMMU_STAT(sf_scd_2ndtsb_alloc);
+ }
+ }
+ SFMMU_STAT(sf_scd_1sttsb_alloc);
+ }
+ return (TSB_SUCCESS);
+}
+
+static void
+sfmmu_free_scd_tsbs(sfmmu_t *scd_sfmmu)
+{
+ while (scd_sfmmu->sfmmu_tsb != NULL) {
+ struct tsb_info *next = scd_sfmmu->sfmmu_tsb->tsb_next;
+ sfmmu_tsbinfo_free(scd_sfmmu->sfmmu_tsb);
+ scd_sfmmu->sfmmu_tsb = next;
+ }
+}
+
+/*
+ * Link the sfmmu onto the hme region list.
+ */
+void
+sfmmu_link_to_hmeregion(sfmmu_t *sfmmup, sf_region_t *rgnp)
+{
+ uint_t rid;
+ sf_rgn_link_t *rlink;
+ sfmmu_t *head;
+ sf_rgn_link_t *hrlink;
+
+ rid = rgnp->rgn_id;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 1);
+ ASSERT(rlink != NULL);
+ mutex_enter(&rgnp->rgn_mutex);
+ if ((head = rgnp->rgn_sfmmu_head) == NULL) {
+ rlink->next = NULL;
+ rlink->prev = NULL;
+ /*
+ * make sure rlink's next field is NULL
+ * before making this link visible.
+ */
+ membar_stst();
+ rgnp->rgn_sfmmu_head = sfmmup;
+ } else {
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(head, rid, hrlink, 0, 0);
+ ASSERT(hrlink != NULL);
+ ASSERT(hrlink->prev == NULL);
+ rlink->next = head;
+ rlink->prev = NULL;
+ hrlink->prev = sfmmup;
+ /*
+ * make sure rlink's next field is correct
+ * before making this link visible.
+ */
+ membar_stst();
+ rgnp->rgn_sfmmu_head = sfmmup;
+ }
+ mutex_exit(&rgnp->rgn_mutex);
+}
+
+/*
+ * Unlink the sfmmu from the hme region list.
+ */
+void
+sfmmu_unlink_from_hmeregion(sfmmu_t *sfmmup, sf_region_t *rgnp)
+{
+ uint_t rid;
+ sf_rgn_link_t *rlink;
+
+ rid = rgnp->rgn_id;
+ ASSERT(SFMMU_IS_SHMERID_VALID(rid));
+
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 0, 0);
+ ASSERT(rlink != NULL);
+ mutex_enter(&rgnp->rgn_mutex);
+ if (rgnp->rgn_sfmmu_head == sfmmup) {
+ sfmmu_t *next = rlink->next;
+ rgnp->rgn_sfmmu_head = next;
+ /*
+ * if we are stopped by xc_attention() after this
+ * point the forward link walking in
+ * sfmmu_rgntlb_demap() will work correctly since the
+ * head correctly points to the next element.
+ */
+ membar_stst();
+ rlink->next = NULL;
+ ASSERT(rlink->prev == NULL);
+ if (next != NULL) {
+ sf_rgn_link_t *nrlink;
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(next, rid, nrlink, 0, 0);
+ ASSERT(nrlink != NULL);
+ ASSERT(nrlink->prev == sfmmup);
+ nrlink->prev = NULL;
+ }
+ } else {
+ sfmmu_t *next = rlink->next;
+ sfmmu_t *prev = rlink->prev;
+ sf_rgn_link_t *prlink;
+
+ ASSERT(prev != NULL);
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(prev, rid, prlink, 0, 0);
+ ASSERT(prlink != NULL);
+ ASSERT(prlink->next == sfmmup);
+ prlink->next = next;
+ /*
+ * if we are stopped by xc_attention()
+ * after this point the forward link walking
+ * will work correctly since the prev element
+ * correctly points to the next element.
+ */
+ membar_stst();
+ rlink->next = NULL;
+ rlink->prev = NULL;
+ if (next != NULL) {
+ sf_rgn_link_t *nrlink;
+ /* LINTED: constant in conditional context */
+ SFMMU_HMERID2RLINKP(next, rid, nrlink, 0, 0);
+ ASSERT(nrlink != NULL);
+ ASSERT(nrlink->prev == sfmmup);
+ nrlink->prev = prev;
+ }
+ }
+ mutex_exit(&rgnp->rgn_mutex);
+}
+
+/*
+ * Link scd sfmmu onto ism or hme region list for each region in the
+ * scd region map.
+ */
+void
+sfmmu_link_scd_to_regions(sf_srd_t *srdp, sf_scd_t *scdp)
+{
+ uint_t rid;
+ uint_t i;
+ uint_t j;
+ ulong_t w;
+ sf_region_t *rgnp;
+ sfmmu_t *scsfmmup;
+
+ scsfmmup = scdp->scd_sfmmup;
+ ASSERT(scsfmmup->sfmmu_scdhat);
+ for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) {
+ if ((w = scdp->scd_region_map.bitmap[i]) == 0) {
+ continue;
+ }
+ j = 0;
+ while (w) {
+ if (!(w & 0x1)) {
+ j++;
+ w >>= 1;
+ continue;
+ }
+ rid = (i << BT_ULSHIFT) | j;
+ j++;
+ w >>= 1;
+
+ if (rid < SFMMU_MAX_HME_REGIONS) {
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+ sfmmu_link_to_hmeregion(scsfmmup, rgnp);
+ } else {
+ sfmmu_t *ism_hatid = NULL;
+ ism_ment_t *ism_ment;
+ rid -= SFMMU_MAX_HME_REGIONS;
+ rgnp = srdp->srd_ismrgnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+
+ ism_hatid = (sfmmu_t *)rgnp->rgn_obj;
+ ASSERT(ism_hatid->sfmmu_ismhat);
+ ism_ment = &scdp->scd_ism_links[rid];
+ ism_ment->iment_hat = scsfmmup;
+ ism_ment->iment_base_va = rgnp->rgn_saddr;
+ mutex_enter(&ism_mlist_lock);
+ iment_add(ism_ment, ism_hatid);
+ mutex_exit(&ism_mlist_lock);
+
+ }
+ }
+ }
+}
+/*
+ * Unlink scd sfmmu from ism or hme region list for each region in the
+ * scd region map.
+ */
+void
+sfmmu_unlink_scd_from_regions(sf_srd_t *srdp, sf_scd_t *scdp)
+{
+ uint_t rid;
+ uint_t i;
+ uint_t j;
+ ulong_t w;
+ sf_region_t *rgnp;
+ sfmmu_t *scsfmmup;
+
+ scsfmmup = scdp->scd_sfmmup;
+ for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) {
+ if ((w = scdp->scd_region_map.bitmap[i]) == 0) {
+ continue;
+ }
+ j = 0;
+ while (w) {
+ if (!(w & 0x1)) {
+ j++;
+ w >>= 1;
+ continue;
+ }
+ rid = (i << BT_ULSHIFT) | j;
+ j++;
+ w >>= 1;
+
+ if (rid < SFMMU_MAX_HME_REGIONS) {
+ rgnp = srdp->srd_hmergnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+ sfmmu_unlink_from_hmeregion(scsfmmup,
+ rgnp);
+
+ } else {
+ sfmmu_t *ism_hatid = NULL;
+ ism_ment_t *ism_ment;
+ rid -= SFMMU_MAX_HME_REGIONS;
+ rgnp = srdp->srd_ismrgnp[rid];
+ ASSERT(rgnp->rgn_id == rid);
+ ASSERT(rgnp->rgn_refcnt > 0);
+
+ ism_hatid = (sfmmu_t *)rgnp->rgn_obj;
+ ASSERT(ism_hatid->sfmmu_ismhat);
+ ism_ment = &scdp->scd_ism_links[rid];
+ ASSERT(ism_ment->iment_hat == scdp->scd_sfmmup);
+ ASSERT(ism_ment->iment_base_va ==
+ rgnp->rgn_saddr);
+ ism_ment->iment_hat = NULL;
+ ism_ment->iment_base_va = 0;
+ mutex_enter(&ism_mlist_lock);
+ iment_sub(ism_ment, ism_hatid);
+ mutex_exit(&ism_mlist_lock);
+
+ }
+ }
+ }
+}
+/*
+ * Allocates and initialises a new SCD structure, this is called with
+ * the srd_scd_mutex held and returns with the reference count
+ * initialised to 1.
+ */
+static sf_scd_t *
+sfmmu_alloc_scd(sf_srd_t *srdp, sf_region_map_t *new_map)
+{
+ sf_scd_t *new_scdp;
+ sfmmu_t *scsfmmup;
+ int i;
+
+ ASSERT(MUTEX_HELD(&srdp->srd_scd_mutex));
+ new_scdp = kmem_cache_alloc(scd_cache, KM_SLEEP);
+
+ scsfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP);
+ new_scdp->scd_sfmmup = scsfmmup;
+ scsfmmup->sfmmu_srdp = srdp;
+ scsfmmup->sfmmu_scdp = new_scdp;
+ scsfmmup->sfmmu_tsb0_4minflcnt = 0;
+ scsfmmup->sfmmu_scdhat = 1;
+ CPUSET_ALL(scsfmmup->sfmmu_cpusran);
+ bzero(scsfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
+
+ ASSERT(max_mmu_ctxdoms > 0);
+ for (i = 0; i < max_mmu_ctxdoms; i++) {
+ scsfmmup->sfmmu_ctxs[i].cnum = INVALID_CONTEXT;
+ scsfmmup->sfmmu_ctxs[i].gnum = 0;
+ }
+
+ for (i = 0; i < MMU_PAGE_SIZES; i++) {
+ new_scdp->scd_rttecnt[i] = 0;
+ }
+
+ new_scdp->scd_region_map = *new_map;
+ new_scdp->scd_refcnt = 1;
+ if (sfmmu_alloc_scd_tsbs(srdp, new_scdp) != TSB_SUCCESS) {
+ kmem_cache_free(scd_cache, new_scdp);
+ kmem_cache_free(sfmmuid_cache, scsfmmup);
+ return (NULL);
+ }
+ return (new_scdp);
+}
+
+/*
+ * The first phase of a process joining an SCD. The hat structure is
+ * linked to the SCD queue and then the HAT_JOIN_SCD sfmmu flag is set
+ * and a cross-call with context invalidation is used to cause the
+ * remaining work to be carried out in the sfmmu_tsbmiss_exception()
+ * routine.
+ */
+static void
+sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup)
+{
+ hatlock_t *hatlockp;
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ int i;
+ sf_scd_t *old_scdp;
+
+ ASSERT(srdp != NULL);
+ ASSERT(scdp != NULL);
+ ASSERT(scdp->scd_refcnt > 0);
+ ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
+
+ if ((old_scdp = sfmmup->sfmmu_scdp) != NULL) {
+ ASSERT(old_scdp != scdp);
+
+ mutex_enter(&old_scdp->scd_mutex);
+ sfmmu_from_scd_list(&old_scdp->scd_sf_list, sfmmup);
+ mutex_exit(&old_scdp->scd_mutex);
+ /*
+ * sfmmup leaves the old scd. Update sfmmu_ttecnt to
+ * include the shme rgn ttecnt for rgns that
+ * were in the old SCD
+ */
+ for (i = 0; i < mmu_page_sizes; i++) {
+ ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
+ old_scdp->scd_rttecnt[i]);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
+ sfmmup->sfmmu_scdrttecnt[i]);
+ }
+ }
+
+ /*
+ * Move sfmmu to the scd lists.
+ */
+ mutex_enter(&scdp->scd_mutex);
+ sfmmu_to_scd_list(&scdp->scd_sf_list, sfmmup);
+ mutex_exit(&scdp->scd_mutex);
+ SF_SCD_INCR_REF(scdp);
+
+ hatlockp = sfmmu_hat_enter(sfmmup);
+ /*
+ * For a multi-thread process, we must stop
+ * all the other threads before joining the scd.
+ */
+
+ SFMMU_FLAGS_SET(sfmmup, HAT_JOIN_SCD);
+
+ sfmmu_invalidate_ctx(sfmmup);
+ sfmmup->sfmmu_scdp = scdp;
+
+ /*
+ * Copy scd_rttecnt into sfmmup's sfmmu_scdrttecnt, and update
+ * sfmmu_ttecnt to not include the rgn ttecnt just joined in SCD.
+ */
+ for (i = 0; i < mmu_page_sizes; i++) {
+ sfmmup->sfmmu_scdrttecnt[i] = scdp->scd_rttecnt[i];
+ ASSERT(sfmmup->sfmmu_ttecnt[i] >= scdp->scd_rttecnt[i]);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
+ -sfmmup->sfmmu_scdrttecnt[i]);
+ }
+ /* update tsb0 inflation count */
+ if (old_scdp != NULL) {
+ sfmmup->sfmmu_tsb0_4minflcnt +=
+ old_scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt;
+ }
+ ASSERT(sfmmup->sfmmu_tsb0_4minflcnt >=
+ scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt);
+ sfmmup->sfmmu_tsb0_4minflcnt -= scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt;
+
+ sfmmu_hat_exit(hatlockp);
+
+ if (old_scdp != NULL) {
+ SF_SCD_DECR_REF(srdp, old_scdp);
+ }
+
+}
+
+/*
+ * This routine is called by a process to become part of an SCD. It is called
+ * from sfmmu_tsbmiss_exception() once most of the initial work has been
+ * done by sfmmu_join_scd(). This routine must not drop the hat lock.
+ */
+static void
+sfmmu_finish_join_scd(sfmmu_t *sfmmup)
+{
+ struct tsb_info *tsbinfop;
+
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+ ASSERT(sfmmup->sfmmu_scdp != NULL);
+ ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD));
+ ASSERT(!SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
+ ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ALLCTX_INVALID));
+
+ for (tsbinfop = sfmmup->sfmmu_tsb; tsbinfop != NULL;
+ tsbinfop = tsbinfop->tsb_next) {
+ if (tsbinfop->tsb_flags & TSB_SWAPPED) {
+ continue;
+ }
+ ASSERT(!(tsbinfop->tsb_flags & TSB_RELOC_FLAG));
+
+ sfmmu_inv_tsb(tsbinfop->tsb_va,
+ TSB_BYTES(tsbinfop->tsb_szc));
+ }
+
+ /* Set HAT_CTX1_FLAG for all SCD ISMs */
+ sfmmu_ism_hatflags(sfmmup, 1);
+
+ SFMMU_STAT(sf_join_scd);
+}
+
+/*
+ * This routine is called in order to check if there is an SCD which matches
+ * the process's region map if not then a new SCD may be created.
+ */
+static void
+sfmmu_find_scd(sfmmu_t *sfmmup)
+{
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ sf_scd_t *scdp, *new_scdp;
+ int ret;
+
+ ASSERT(srdp != NULL);
+ ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
+
+ mutex_enter(&srdp->srd_scd_mutex);
+ for (scdp = srdp->srd_scdp; scdp != NULL;
+ scdp = scdp->scd_next) {
+ SF_RGNMAP_EQUAL(&scdp->scd_region_map,
+ &sfmmup->sfmmu_region_map, ret);
+ if (ret == 1) {
+ SF_SCD_INCR_REF(scdp);
+ mutex_exit(&srdp->srd_scd_mutex);
+ sfmmu_join_scd(scdp, sfmmup);
+ ASSERT(scdp->scd_refcnt >= 2);
+ atomic_add_32((volatile uint32_t *)
+ &scdp->scd_refcnt, -1);
+ return;
+ } else {
+ /*
+ * If the sfmmu region map is a subset of the scd
+ * region map, then the assumption is that this process
+ * will continue attaching to ISM segments until the
+ * region maps are equal.
+ */
+ SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map,
+ &sfmmup->sfmmu_region_map, ret);
+ if (ret == 1) {
+ mutex_exit(&srdp->srd_scd_mutex);
+ return;
+ }
+ }
+ }
+
+ ASSERT(scdp == NULL);
+ /*
+ * No matching SCD has been found, create a new one.
+ */
+ if ((new_scdp = sfmmu_alloc_scd(srdp, &sfmmup->sfmmu_region_map)) ==
+ NULL) {
+ mutex_exit(&srdp->srd_scd_mutex);
+ return;
+ }
+
+ /*
+ * sfmmu_alloc_scd() returns with a ref count of 1 on the scd.
+ */
+
+ /* Set scd_rttecnt for shme rgns in SCD */
+ sfmmu_set_scd_rttecnt(srdp, new_scdp);
+
+ /*
+ * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists.
+ */
+ sfmmu_link_scd_to_regions(srdp, new_scdp);
+ sfmmu_add_scd(&srdp->srd_scdp, new_scdp);
+ SFMMU_STAT_ADD(sf_create_scd, 1);
+
+ mutex_exit(&srdp->srd_scd_mutex);
+ sfmmu_join_scd(new_scdp, sfmmup);
+ ASSERT(new_scdp->scd_refcnt >= 2);
+ atomic_add_32((volatile uint32_t *)&new_scdp->scd_refcnt, -1);
+}
+
+/*
+ * This routine is called by a process to remove itself from an SCD. It is
+ * either called when the processes has detached from a segment or from
+ * hat_free_start() as a result of calling exit.
+ */
+static void
+sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
+{
+ sf_scd_t *scdp = sfmmup->sfmmu_scdp;
+ sf_srd_t *srdp = sfmmup->sfmmu_srdp;
+ hatlock_t *hatlockp = TSB_HASH(sfmmup);
+ int i;
+
+ ASSERT(scdp != NULL);
+ ASSERT(srdp != NULL);
+
+ if (sfmmup->sfmmu_free) {
+ /*
+ * If the process is part of an SCD the sfmmu is unlinked
+ * from scd_sf_list.
+ */
+ mutex_enter(&scdp->scd_mutex);
+ sfmmu_from_scd_list(&scdp->scd_sf_list, sfmmup);
+ mutex_exit(&scdp->scd_mutex);
+ /*
+ * Update sfmmu_ttecnt to include the rgn ttecnt for rgns that
+ * are about to leave the SCD
+ */
+ for (i = 0; i < mmu_page_sizes; i++) {
+ ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
+ scdp->scd_rttecnt[i]);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
+ sfmmup->sfmmu_scdrttecnt[i]);
+ sfmmup->sfmmu_scdrttecnt[i] = 0;
+ }
+ sfmmup->sfmmu_scdp = NULL;
+
+ SF_SCD_DECR_REF(srdp, scdp);
+ return;
+ }
+
+ ASSERT(r_type != SFMMU_REGION_ISM ||
+ SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
+ ASSERT(scdp->scd_refcnt);
+ ASSERT(!sfmmup->sfmmu_free);
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+ ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
+
+ /*
+ * Wait for ISM maps to be updated.
+ */
+ if (r_type != SFMMU_REGION_ISM) {
+ while (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY) &&
+ sfmmup->sfmmu_scdp != NULL) {
+ cv_wait(&sfmmup->sfmmu_tsb_cv,
+ HATLOCK_MUTEXP(hatlockp));
+ }
+
+ if (sfmmup->sfmmu_scdp == NULL) {
+ sfmmu_hat_exit(hatlockp);
+ return;
+ }
+ SFMMU_FLAGS_SET(sfmmup, HAT_ISMBUSY);
+ }
+
+ if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
+ SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD);
+ } else {
+ /*
+ * For a multi-thread process, we must stop
+ * all the other threads before leaving the scd.
+ */
+
+ sfmmu_invalidate_ctx(sfmmup);
+
+ /* Clear all the rid's for ISM, delete flags, etc */
+ ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
+ sfmmu_ism_hatflags(sfmmup, 0);
+ }
+ /*
+ * Update sfmmu_ttecnt to include the rgn ttecnt for rgns that
+ * are in SCD before this sfmmup leaves the SCD.
+ */
+ for (i = 0; i < mmu_page_sizes; i++) {
+ ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
+ scdp->scd_rttecnt[i]);
+ atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
+ sfmmup->sfmmu_scdrttecnt[i]);
+ sfmmup->sfmmu_scdrttecnt[i] = 0;
+ /* update ismttecnt to include SCD ism before hat leaves SCD */
+ sfmmup->sfmmu_ismttecnt[i] += sfmmup->sfmmu_scdismttecnt[i];
+ sfmmup->sfmmu_scdismttecnt[i] = 0;
+ }
+ /* update tsb0 inflation count */
+ sfmmup->sfmmu_tsb0_4minflcnt += scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt;
+
+ if (r_type != SFMMU_REGION_ISM) {
+ SFMMU_FLAGS_CLEAR(sfmmup, HAT_ISMBUSY);
+ }
+ sfmmup->sfmmu_scdp = NULL;
+
+ sfmmu_hat_exit(hatlockp);
+
+ /*
+ * Unlink sfmmu from scd_sf_list this can be done without holding
+ * the hat lock as we hold the sfmmu_as lock which prevents
+ * hat_join_region from adding this thread to the scd again. Other
+ * threads check if sfmmu_scdp is NULL under hat lock and if it's NULL
+ * they won't get here, since sfmmu_leave_scd() clears sfmmu_scdp
+ * while holding the hat lock.
+ */
+ mutex_enter(&scdp->scd_mutex);
+ sfmmu_from_scd_list(&scdp->scd_sf_list, sfmmup);
+ mutex_exit(&scdp->scd_mutex);
+ SFMMU_STAT(sf_leave_scd);
+
+ SF_SCD_DECR_REF(srdp, scdp);
+ hatlockp = sfmmu_hat_enter(sfmmup);
+
+}
+
+/*
+ * Unlink and free up an SCD structure with a reference count of 0.
+ */
+static void
+sfmmu_destroy_scd(sf_srd_t *srdp, sf_scd_t *scdp, sf_region_map_t *scd_rmap)
+{
+ sfmmu_t *scsfmmup;
+ sf_scd_t *sp;
+ hatlock_t *shatlockp;
+ int i, ret;
+
+ mutex_enter(&srdp->srd_scd_mutex);
+ for (sp = srdp->srd_scdp; sp != NULL; sp = sp->scd_next) {
+ if (sp == scdp)
+ break;
+ }
+ if (sp == NULL || sp->scd_refcnt) {
+ mutex_exit(&srdp->srd_scd_mutex);
+ return;
+ }
+
+ /*
+ * It is possible that the scd has been freed and reallocated with a
+ * different region map while we've been waiting for the srd_scd_mutex.
+ */
+ SF_RGNMAP_EQUAL(scd_rmap, &sp->scd_region_map, ret);
+ if (ret != 1) {
+ mutex_exit(&srdp->srd_scd_mutex);
+ return;
+ }
+
+ ASSERT(scdp->scd_sf_list == NULL);
+ /*
+ * Unlink scd from srd_scdp list.
+ */
+ sfmmu_remove_scd(&srdp->srd_scdp, scdp);
+ mutex_exit(&srdp->srd_scd_mutex);
+
+ sfmmu_unlink_scd_from_regions(srdp, scdp);
+
+ /* Clear shared context tsb and release ctx */
+ scsfmmup = scdp->scd_sfmmup;
+
+ /*
+ * create a barrier so that scd will not be destroyed
+ * if other thread still holds the same shared hat lock.
+ * E.g., sfmmu_tsbmiss_exception() needs to acquire the
+ * shared hat lock before checking the shared tsb reloc flag.
+ */
+ shatlockp = sfmmu_hat_enter(scsfmmup);
+ sfmmu_hat_exit(shatlockp);
+
+ sfmmu_free_scd_tsbs(scsfmmup);
+
+ for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
+ if (scsfmmup->sfmmu_hmeregion_links[i] != NULL) {
+ kmem_free(scsfmmup->sfmmu_hmeregion_links[i],
+ SFMMU_L2_HMERLINKS_SIZE);
+ scsfmmup->sfmmu_hmeregion_links[i] = NULL;
+ }
+ }
+ kmem_cache_free(sfmmuid_cache, scsfmmup);
+ kmem_cache_free(scd_cache, scdp);
+ SFMMU_STAT(sf_destroy_scd);
+}
+
+/*
+ * Modifies the HAT_CTX1_FLAG for each of the ISM segments which correspond to
+ * bits which are set in the ism_region_map parameter. This flag indicates to
+ * the tsbmiss handler that mapping for these segments should be loaded using
+ * the shared context.
+ */
+static void
+sfmmu_ism_hatflags(sfmmu_t *sfmmup, int addflag)
+{
+ sf_scd_t *scdp = sfmmup->sfmmu_scdp;
+ ism_blk_t *ism_blkp;
+ ism_map_t *ism_map;
+ int i, rid;
+
+ ASSERT(sfmmup->sfmmu_iblk != NULL);
+ ASSERT(scdp != NULL);
+ /*
+ * Note that the caller either set HAT_ISMBUSY flag or checked
+ * under hat lock that HAT_ISMBUSY was not set by another thread.
+ */
+ ASSERT(sfmmu_hat_lock_held(sfmmup));
+
+ ism_blkp = sfmmup->sfmmu_iblk;
+ while (ism_blkp != NULL) {
+ ism_map = ism_blkp->iblk_maps;
+ for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) {
+ rid = ism_map[i].imap_rid;
+ if (rid == SFMMU_INVALID_ISMRID) {
+ continue;
+ }
+ ASSERT(rid >= 0 && rid < SFMMU_MAX_ISM_REGIONS);
+ if (SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) {
+ if (addflag) {
+ ism_map[i].imap_hatflags |=
+ HAT_CTX1_FLAG;
+ } else {
+ ism_map[i].imap_hatflags &=
+ ~HAT_CTX1_FLAG;
+ }
+ }
+ }
+ ism_blkp = ism_blkp->iblk_next;
+ }
+}
+
+static int
+sfmmu_srd_lock_held(sf_srd_t *srdp)
+{
+ return (MUTEX_HELD(&srdp->srd_mutex));
+}
+
+/* ARGSUSED */
+static int
+sfmmu_scdcache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ sf_scd_t *scdp = (sf_scd_t *)buf;
+
+ bzero(buf, sizeof (sf_scd_t));
+ mutex_init(&scdp->scd_mutex, NULL, MUTEX_DEFAULT, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+sfmmu_scdcache_destructor(void *buf, void *cdrarg)
+{
+ sf_scd_t *scdp = (sf_scd_t *)buf;
+
+ mutex_destroy(&scdp->scd_mutex);
+}
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
index 1d11998521..2dc7183d85 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
@@ -72,6 +72,7 @@ extern "C" {
#include <sys/ksynch.h>
typedef struct hat sfmmu_t;
+typedef struct sf_scd sf_scd_t;
/*
* SFMMU attributes for hat_memload/hat_devload
@@ -186,7 +187,8 @@ typedef struct hat_lock {
*/
typedef struct ism_map {
uintptr_t imap_seg; /* base va + sz of ISM segment */
- ushort_t imap_vb_shift; /* mmu_pageshift for ism page size */
+ uchar_t imap_vb_shift; /* mmu_pageshift for ism page size */
+ uchar_t imap_rid; /* region id for ism */
ushort_t imap_hatflags; /* primary ism page size */
uint_t imap_sz_mask; /* mmu_pagemask for ism page size */
sfmmu_t *imap_ismhat; /* hat id of dummy ISM as */
@@ -263,6 +265,239 @@ struct tsb_info {
#define TSB_RELOC_FLAG 0x1
#define TSB_FLUSH_NEEDED 0x2
#define TSB_SWAPPED 0x4
+#define TSB_SHAREDCTX 0x8
+
+#endif /* !_ASM */
+
+/*
+ * Data structures for shared hmeblk support.
+ */
+
+/*
+ * Do not increase the maximum number of ism/hme regions without checking first
+ * the impact on ism_map_t, TSB miss area, hblk tag and region id type in
+ * sf_region structure.
+ * Initially, shared hmes will only be used for the main text segment
+ * therefore this value will be set to 64, it will be increased when shared
+ * libraries are included.
+ */
+
+#define SFMMU_MAX_HME_REGIONS (64)
+#define SFMMU_HMERGNMAP_WORDS BT_BITOUL(SFMMU_MAX_HME_REGIONS)
+
+#define SFMMU_PRIVATE 0
+#define SFMMU_SHARED 1
+
+#ifndef _ASM
+
+#define SFMMU_MAX_ISM_REGIONS (64)
+#define SFMMU_ISMRGNMAP_WORDS BT_BITOUL(SFMMU_MAX_ISM_REGIONS)
+
+#define SFMMU_RGNMAP_WORDS (SFMMU_HMERGNMAP_WORDS + SFMMU_ISMRGNMAP_WORDS)
+
+#define SFMMU_MAX_REGION_BUCKETS (128)
+#define SFMMU_MAX_SRD_BUCKETS (2048)
+
+typedef struct sf_hmeregion_map {
+ ulong_t bitmap[SFMMU_HMERGNMAP_WORDS];
+} sf_hmeregion_map_t;
+
+typedef struct sf_ismregion_map {
+ ulong_t bitmap[SFMMU_ISMRGNMAP_WORDS];
+} sf_ismregion_map_t;
+
+typedef union sf_region_map_u {
+ struct _h_rmap_s {
+ sf_hmeregion_map_t hmeregion_map;
+ sf_ismregion_map_t ismregion_map;
+ } h_rmap_s;
+ ulong_t bitmap[SFMMU_RGNMAP_WORDS];
+} sf_region_map_t;
+
+#define SF_RGNMAP_ZERO(map) { \
+ int _i; \
+ for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \
+ (map).bitmap[_i] = 0; \
+ } \
+}
+
+/*
+ * Returns 1 if map1 and map2 are equal.
+ */
+#define SF_RGNMAP_EQUAL(map1, map2, rval) { \
+ int _i; \
+ for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \
+ if ((map1)->bitmap[_i] != (map2)->bitmap[_i]) \
+ break; \
+ } \
+ if (_i < SFMMU_RGNMAP_WORDS) \
+ rval = 0; \
+ else \
+ rval = 1; \
+}
+
+#define SF_RGNMAP_ADD(map, r) BT_SET((map).bitmap, r)
+#define SF_RGNMAP_DEL(map, r) BT_CLEAR((map).bitmap, r)
+#define SF_RGNMAP_TEST(map, r) BT_TEST((map).bitmap, r)
+
+/*
+ * Tests whether map2 is a subset of map1, returns 1 if
+ * this assertion is true.
+ */
+#define SF_RGNMAP_IS_SUBSET(map1, map2, rval) { \
+ int _i; \
+ for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \
+ if (((map1)->bitmap[_i] & (map2)->bitmap[_i]) \
+ != (map2)->bitmap[_i]) { \
+ break; \
+ } \
+ } \
+ if (_i < SFMMU_RGNMAP_WORDS) \
+ rval = 0; \
+ else \
+ rval = 1; \
+}
+
+#define SF_SCD_INCR_REF(scdp) { \
+ atomic_add_32((volatile uint32_t *)&(scdp)->scd_refcnt, 1); \
+}
+
+#define SF_SCD_DECR_REF(srdp, scdp) { \
+ sf_region_map_t _scd_rmap = (scdp)->scd_region_map; \
+ if (!atomic_add_32_nv( \
+ (volatile uint32_t *)&(scdp)->scd_refcnt, -1)) { \
+ sfmmu_destroy_scd((srdp), (scdp), &_scd_rmap); \
+ } \
+}
+
+/*
+ * A sfmmup link in the link list of sfmmups that share the same region.
+ */
+typedef struct sf_rgn_link {
+ sfmmu_t *next;
+ sfmmu_t *prev;
+} sf_rgn_link_t;
+
+/*
+ * rgn_flags values.
+ */
+#define SFMMU_REGION_HME 0x1
+#define SFMMU_REGION_ISM 0x2
+#define SFMMU_REGION_FREE 0x8
+
+#define SFMMU_REGION_TYPE_MASK (0x3)
+
+/*
+ * sf_region defines a text or (D)ISM segment which map
+ * the same underlying physical object.
+ */
+typedef struct sf_region {
+ caddr_t rgn_saddr; /* base addr of attached seg */
+ size_t rgn_size; /* size of attached seg */
+ void *rgn_obj; /* the underlying object id */
+ u_offset_t rgn_objoff; /* offset in the object mapped */
+ uchar_t rgn_perm; /* PROT_READ/WRITE/EXEC */
+ uchar_t rgn_pgszc; /* page size of the region */
+ uchar_t rgn_flags; /* region type, free flag */
+ uchar_t rgn_id;
+ int rgn_refcnt; /* # of hats sharing the region */
+ /* callback function for hat_unload_callback */
+ hat_rgn_cb_func_t rgn_cb_function;
+ struct sf_region *rgn_hash; /* hash chain linking the rgns */
+ kmutex_t rgn_mutex; /* protect region sfmmu list */
+ /* A link list of processes attached to this region */
+ sfmmu_t *rgn_sfmmu_head;
+ ulong_t rgn_ttecnt[MMU_PAGE_SIZES];
+ uint16_t rgn_hmeflags; /* rgn tte size flags */
+} sf_region_t;
+
+#define rgn_next rgn_hash
+
+/* srd */
+typedef struct sf_shared_region_domain {
+ vnode_t *srd_evp; /* executable vnode */
+ /* hme region table */
+ sf_region_t *srd_hmergnp[SFMMU_MAX_HME_REGIONS];
+ /* ism region table */
+ sf_region_t *srd_ismrgnp[SFMMU_MAX_ISM_REGIONS];
+ /* hash chain linking srds */
+ struct sf_shared_region_domain *srd_hash;
+ /* pointer to the next free hme region */
+ sf_region_t *srd_hmergnfree;
+ /* pointer to the next free ism region */
+ sf_region_t *srd_ismrgnfree;
+ /* id of next ism rgn created */
+ uint16_t srd_next_ismrid;
+ /* pointer of next hme region created */
+ uint16_t srd_next_hmerid;
+ uint16_t srd_ismbusyrgns; /* # of ism rgns in use */
+ uint16_t srd_hmebusyrgns; /* # of hme rgns in use */
+ int srd_refcnt; /* # of procs in the srd */
+ kmutex_t srd_mutex; /* sync add/remove rgns */
+ kmutex_t srd_scd_mutex;
+ sf_scd_t *srd_scdp; /* list of scds in srd */
+ /* hash of regions associated with the same executable */
+ sf_region_t *srd_rgnhash[SFMMU_MAX_REGION_BUCKETS];
+} sf_srd_t;
+
+typedef struct sf_srd_bucket {
+ kmutex_t srdb_lock;
+ sf_srd_t *srdb_srdp;
+} sf_srd_bucket_t;
+
+/*
+ * The value of SFMMU_L1_HMERLINKS and SFMMU_L2_HMERLINKS will be increased
+ * to 16 when the use of shared hmes for shared libraries is enabled.
+ */
+
+#define SFMMU_L1_HMERLINKS (8)
+#define SFMMU_L2_HMERLINKS (8)
+#define SFMMU_L1_HMERLINKS_SHIFT (3)
+#define SFMMU_L1_HMERLINKS_MASK (SFMMU_L1_HMERLINKS - 1)
+#define SFMMU_L2_HMERLINKS_MASK (SFMMU_L2_HMERLINKS - 1)
+#define SFMMU_L1_HMERLINKS_SIZE \
+ (SFMMU_L1_HMERLINKS * sizeof (sf_rgn_link_t *))
+#define SFMMU_L2_HMERLINKS_SIZE \
+ (SFMMU_L2_HMERLINKS * sizeof (sf_rgn_link_t))
+
+#if (SFMMU_L1_HMERLINKS * SFMMU_L2_HMERLINKS < SFMMU_MAX_HME_REGIONS)
+#error Not Enough HMERLINKS
+#endif
+
+/*
+ * This macro grabs hat lock and allocates level 2 hat chain
+ * associated with a shme rgn. In the majority of cases, the macro
+ * is called with alloc = 0, and lock = 0.
+ */
+#define SFMMU_HMERID2RLINKP(sfmmup, rid, lnkp, alloc, lock) \
+{ \
+ int _l1ix = ((rid) >> SFMMU_L1_HMERLINKS_SHIFT) & \
+ SFMMU_L1_HMERLINKS_MASK; \
+ int _l2ix = ((rid) & SFMMU_L2_HMERLINKS_MASK); \
+ hatlock_t *_hatlockp; \
+ lnkp = (sfmmup)->sfmmu_hmeregion_links[_l1ix]; \
+ if (lnkp != NULL) { \
+ lnkp = &lnkp[_l2ix]; \
+ } else if (alloc && lock) { \
+ lnkp = kmem_zalloc(SFMMU_L2_HMERLINKS_SIZE, KM_SLEEP); \
+ _hatlockp = sfmmu_hat_enter(sfmmup); \
+ if ((sfmmup)->sfmmu_hmeregion_links[_l1ix] != NULL) { \
+ sfmmu_hat_exit(_hatlockp); \
+ kmem_free(lnkp, SFMMU_L2_HMERLINKS_SIZE); \
+ lnkp = (sfmmup)->sfmmu_hmeregion_links[_l1ix]; \
+ ASSERT(lnkp != NULL); \
+ } else { \
+ (sfmmup)->sfmmu_hmeregion_links[_l1ix] = lnkp; \
+ sfmmu_hat_exit(_hatlockp); \
+ } \
+ lnkp = &lnkp[_l2ix]; \
+ } else if (alloc) { \
+ lnkp = kmem_zalloc(SFMMU_L2_HMERLINKS_SIZE, KM_SLEEP); \
+ ASSERT((sfmmup)->sfmmu_hmeregion_links[_l1ix] == NULL); \
+ (sfmmup)->sfmmu_hmeregion_links[_l1ix] = lnkp; \
+ lnkp = &lnkp[_l2ix]; \
+ } \
+}
/*
* Per-MMU context domain kstats.
@@ -390,25 +625,40 @@ struct hat {
void *sfmmu_xhat_provider; /* NULL for CPU hat */
cpuset_t sfmmu_cpusran; /* cpu bit mask for efficient xcalls */
struct as *sfmmu_as; /* as this hat provides mapping for */
- ulong_t sfmmu_ttecnt[MMU_PAGE_SIZES]; /* per sz tte counts */
- ulong_t sfmmu_ismttecnt[MMU_PAGE_SIZES]; /* est. ism ttes */
+ /* per pgsz private ttecnt + shme rgns ttecnt for rgns not in SCD */
+ ulong_t sfmmu_ttecnt[MMU_PAGE_SIZES];
+ /* shme rgns ttecnt for rgns in SCD */
+ ulong_t sfmmu_scdrttecnt[MMU_PAGE_SIZES];
+ /* est. ism ttes that are NOT in a SCD */
+ ulong_t sfmmu_ismttecnt[MMU_PAGE_SIZES];
+ /* ttecnt for isms that are in a SCD */
+ ulong_t sfmmu_scdismttecnt[MMU_PAGE_SIZES];
+ /* inflate tsb0 to allow for large page alloc failure in region */
+ ulong_t sfmmu_tsb0_4minflcnt;
union _h_un {
ism_blk_t *sfmmu_iblkp; /* maps to ismhat(s) */
ism_ment_t *sfmmu_imentp; /* ism hat's mapping list */
} h_un;
uint_t sfmmu_free:1; /* hat to be freed - set on as_free */
uint_t sfmmu_ismhat:1; /* hat is dummy ism hatid */
- uint_t sfmmu_ctxflushed:1; /* ctx has been flushed */
+ uint_t sfmmu_scdhat:1; /* hat is dummy scd hatid */
uchar_t sfmmu_rmstat; /* refmod stats refcnt */
ushort_t sfmmu_clrstart; /* start color bin for page coloring */
ushort_t sfmmu_clrbin; /* per as phys page coloring bin */
ushort_t sfmmu_flags; /* flags */
+ uchar_t sfmmu_tteflags; /* pgsz flags */
+ uchar_t sfmmu_rtteflags; /* pgsz flags for SRD hmes */
struct tsb_info *sfmmu_tsb; /* list of per as tsbs */
uint64_t sfmmu_ismblkpa; /* pa of sfmmu_iblkp, or -1 */
lock_t sfmmu_ctx_lock; /* sync ctx alloc and invalidation */
kcondvar_t sfmmu_tsb_cv; /* signals TSB swapin or relocation */
uchar_t sfmmu_cext; /* context page size encoding */
uint8_t sfmmu_pgsz[MMU_PAGE_SIZES]; /* ranking for MMU */
+ sf_srd_t *sfmmu_srdp;
+ sf_scd_t *sfmmu_scdp; /* scd this address space belongs to */
+ sf_region_map_t sfmmu_region_map;
+ sf_rgn_link_t *sfmmu_hmeregion_links[SFMMU_L1_HMERLINKS];
+ sf_rgn_link_t sfmmu_scd_link; /* link to scd or pending queue */
#ifdef sun4v
struct hv_tsb_block sfmmu_hvblock;
#endif
@@ -427,6 +677,39 @@ struct hat {
#define sfmmu_iblk h_un.sfmmu_iblkp
#define sfmmu_iment h_un.sfmmu_imentp
+#define sfmmu_hmeregion_map sfmmu_region_map.h_rmap_s.hmeregion_map
+#define sfmmu_ismregion_map sfmmu_region_map.h_rmap_s.ismregion_map
+
+#define SF_RGNMAP_ISNULL(sfmmup) \
+ (sfrgnmap_isnull(&(sfmmup)->sfmmu_region_map))
+#define SF_HMERGNMAP_ISNULL(sfmmup) \
+ (sfhmergnmap_isnull(&(sfmmup)->sfmmu_hmeregion_map))
+
+struct sf_scd {
+ sfmmu_t *scd_sfmmup; /* shared context hat */
+ /* per pgsz ttecnt for shme rgns in SCD */
+ ulong_t scd_rttecnt[MMU_PAGE_SIZES];
+ uint_t scd_refcnt; /* address spaces attached to scd */
+ sf_region_map_t scd_region_map; /* bit mask of attached segments */
+ sf_scd_t *scd_next; /* link pointers for srd_scd list */
+ sf_scd_t *scd_prev;
+ sfmmu_t *scd_sf_list; /* list of doubly linked hat structs */
+ kmutex_t scd_mutex;
+ /*
+ * Link used to add an scd to the sfmmu_iment list.
+ */
+ ism_ment_t scd_ism_links[SFMMU_MAX_ISM_REGIONS];
+};
+
+#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map
+#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map
+
+#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map
+#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map
+
+extern int disable_shctx;
+extern int shctx_on;
+
/*
* bit mask for managing vac conflicts on large pages.
* bit 1 is for uncache flag.
@@ -510,63 +793,39 @@ struct ctx_trace {
(ASSERT(sfmmu_hat_lock_held((sfmmup))), \
(sfmmup)->sfmmu_flags |= (flags))
-/*
- * sfmmu HAT flags
- */
-#define HAT_64K_FLAG 0x01
-#define HAT_512K_FLAG 0x02
-#define HAT_4M_FLAG 0x04
-#define HAT_32M_FLAG 0x08
-#define HAT_256M_FLAG 0x10
-#define HAT_4MTEXT_FLAG 0x80
-#define HAT_SWAPPED 0x100 /* swapped out */
-#define HAT_SWAPIN 0x200 /* swapping in */
-#define HAT_BUSY 0x400 /* replacing TSB(s) */
-#define HAT_ISMBUSY 0x800 /* adding/removing/traversing ISM maps */
-
-#define HAT_LGPG_FLAGS \
- (HAT_64K_FLAG | HAT_512K_FLAG | HAT_4M_FLAG | \
- HAT_32M_FLAG | HAT_256M_FLAG)
+#define SFMMU_TTEFLAGS_ISSET(sfmmup, flags) \
+ ((((sfmmup)->sfmmu_tteflags | (sfmmup)->sfmmu_rtteflags) & (flags)) == \
+ (flags))
-#define HAT_FLAGS_MASK \
- (HAT_LGPG_FLAGS | HAT_4MTEXT_FLAG | HAT_SWAPPED | \
- HAT_SWAPIN | HAT_BUSY | HAT_ISMBUSY)
/*
- * Context flags
+ * sfmmu tte HAT flags, must fit in 8 bits
*/
-#define CTX_FREE_FLAG 0x1
-#define CTX_FLAGS_MASK 0x1
-
-#define CTX_SET_FLAGS(ctx, flag) \
-{ \
- uint32_t old, new; \
- \
- do { \
- new = old = (ctx)->ctx_flags; \
- new &= CTX_FLAGS_MASK; \
- new |= flag; \
- new = cas32(&(ctx)->ctx_flags, old, new); \
- } while (new != old); \
-}
-
-#define CTX_CLEAR_FLAGS(ctx, flag) \
-{ \
- uint32_t old, new; \
- \
- do { \
- new = old = (ctx)->ctx_flags; \
- new &= CTX_FLAGS_MASK & ~(flag); \
- new = cas32(&(ctx)->ctx_flags, old, new); \
- } while (new != old); \
-}
-
-#define ctxtoctxnum(ctx) ((ushort_t)((ctx) - ctxs))
+#define HAT_CHKCTX1_FLAG 0x1
+#define HAT_64K_FLAG (0x1 << TTE64K)
+#define HAT_512K_FLAG (0x1 << TTE512K)
+#define HAT_4M_FLAG (0x1 << TTE4M)
+#define HAT_32M_FLAG (0x1 << TTE32M)
+#define HAT_256M_FLAG (0x1 << TTE256M)
/*
- * Defines needed for ctx stealing.
+ * sfmmu HAT flags, 16 bits at the moment.
*/
-#define GET_CTX_RETRY_CNT 100
+#define HAT_4MTEXT_FLAG 0x01
+#define HAT_32M_ISM 0x02
+#define HAT_256M_ISM 0x04
+#define HAT_SWAPPED 0x08 /* swapped out */
+#define HAT_SWAPIN 0x10 /* swapping in */
+#define HAT_BUSY 0x20 /* replacing TSB(s) */
+#define HAT_ISMBUSY 0x40 /* adding/removing/traversing ISM maps */
+
+#define HAT_CTX1_FLAG 0x100 /* ISM imap hatflag for ctx1 */
+#define HAT_JOIN_SCD 0x200 /* region is joining scd */
+#define HAT_ALLCTX_INVALID 0x400 /* all per-MMU ctxs are invalidated */
+
+#define SFMMU_LGPGS_INUSE(sfmmup) \
+ (((sfmmup)->sfmmu_tteflags | (sfmmup)->sfmmu_rtteflags) || \
+ ((sfmmup)->sfmmu_iblk != NULL))
/*
* Starting with context 0, the first NUM_LOCKED_CTXS contexts
@@ -657,31 +916,71 @@ struct pa_hment {
* without checking those routines. See HTAG_SFMMUPSZ define.
*/
+/*
+ * In private hmeblks hblk_rid field must be SFMMU_INVALID_RID.
+ */
typedef union {
struct {
- uint64_t hblk_basepg: 51, /* hme_blk base pg # */
- hblk_rehash: 13; /* rehash number */
- sfmmu_t *sfmmup;
+ uint64_t hblk_basepg: 51, /* hme_blk base pg # */
+ hblk_rehash: 3, /* rehash number */
+ hblk_rid: 10; /* hme_blk region id */
+ void *hblk_id;
} hblk_tag_un;
uint64_t htag_tag[2];
} hmeblk_tag;
-#define htag_id hblk_tag_un.sfmmup
+#define htag_id hblk_tag_un.hblk_id
#define htag_bspage hblk_tag_un.hblk_basepg
#define htag_rehash hblk_tag_un.hblk_rehash
+#define htag_rid hblk_tag_un.hblk_rid
+
+#endif /* !_ASM */
+
+#define HTAG_REHASH_SHIFT 10
+#define HTAG_MAX_RID (((0x1 << HTAG_REHASH_SHIFT) - 1))
+#define HTAG_RID_MASK HTAG_MAX_RID
+
+/* used for tagging all per sfmmu (i.e. non SRD) private hmeblks */
+#define SFMMU_INVALID_SHMERID HTAG_MAX_RID
+
+#if SFMMU_INVALID_SHMERID < SFMMU_MAX_HME_REGIONS
+#error SFMMU_INVALID_SHMERID < SFMMU_MAX_HME_REGIONS
+#endif
+
+#define SFMMU_IS_SHMERID_VALID(rid) ((rid) != SFMMU_INVALID_SHMERID)
+
+/* ISM regions */
+#define SFMMU_INVALID_ISMRID 0xff
+
+#if SFMMU_INVALID_ISMRID < SFMMU_MAX_ISM_REGIONS
+#error SFMMU_INVALID_ISMRID < SFMMU_MAX_ISM_REGIONS
+#endif
+
+#define SFMMU_IS_ISMRID_VALID(rid) ((rid) != SFMMU_INVALID_ISMRID)
+
#define HTAGS_EQ(tag1, tag2) (((tag1.htag_tag[0] ^ tag2.htag_tag[0]) | \
(tag1.htag_tag[1] ^ tag2.htag_tag[1])) == 0)
+
+/*
+ * this macro must only be used for comparing tags in shared hmeblks.
+ */
+#define HTAGS_EQ_SHME(hmetag, tag, hrmap) \
+ (((hmetag).htag_rid != SFMMU_INVALID_SHMERID) && \
+ (((((hmetag).htag_tag[0] ^ (tag).htag_tag[0]) & \
+ ~HTAG_RID_MASK) | \
+ ((hmetag).htag_tag[1] ^ (tag).htag_tag[1])) == 0) && \
+ SF_RGNMAP_TEST(hrmap, hmetag.htag_rid))
+
#define HME_REHASH(sfmmup) \
((sfmmup)->sfmmu_ttecnt[TTE512K] != 0 || \
(sfmmup)->sfmmu_ttecnt[TTE4M] != 0 || \
(sfmmup)->sfmmu_ttecnt[TTE32M] != 0 || \
(sfmmup)->sfmmu_ttecnt[TTE256M] != 0)
-#endif /* !_ASM */
-
#define NHMENTS 8 /* # of hments in an 8k hme_blk */
/* needs to be multiple of 2 */
+
#ifndef _ASM
#ifdef HBLK_TRACE
@@ -730,8 +1029,8 @@ struct hblk_lockcnt_audit {
*/
struct hme_blk_misc {
- ushort_t locked_cnt; /* HAT_LOAD_LOCK ref cnt */
- uint_t notused:10;
+ uint_t notused:25;
+ uint_t shared_bit:1; /* set for SRD shared hmeblk */
uint_t xhat_bit:1; /* set for an xhat hme_blk */
uint_t shadow_bit:1; /* set for a shadow hme_blk */
uint_t nucleus_bit:1; /* set for a nucleus hme_blk */
@@ -760,6 +1059,8 @@ struct hme_blk {
uint_t hblk_shadow_mask;
} hblk_un;
+ uint_t hblk_lckcnt;
+
#ifdef HBLK_TRACE
kmutex_t hblk_audit_lock; /* lock to protect index */
uint_t hblk_audit_index; /* index into audit_cache */
@@ -769,7 +1070,7 @@ struct hme_blk {
struct sf_hment hblk_hme[1]; /* hment array */
};
-#define hblk_lckcnt hblk_misc.locked_cnt
+#define hblk_shared hblk_misc.shared_bit
#define hblk_xhat_bit hblk_misc.xhat_bit
#define hblk_shw_bit hblk_misc.shadow_bit
#define hblk_nuc_bit hblk_misc.nucleus_bit
@@ -778,7 +1079,7 @@ struct hme_blk {
#define hblk_vcnt hblk_un.hblk_counts.hblk_validcnt
#define hblk_shw_mask hblk_un.hblk_shadow_mask
-#define MAX_HBLK_LCKCNT 0xFFFF
+#define MAX_HBLK_LCKCNT 0xFFFFFFFF
#define HMEBLK_ALIGN 0x8 /* hmeblk has to be double aligned */
#ifdef HBLK_TRACE
@@ -864,7 +1165,6 @@ struct hmehash_bucket {
#endif /* !_ASM */
-/* Proc Count Project */
#define SFMMU_PGCNT_MASK 0x3f
#define SFMMU_PGCNT_SHIFT 6
#define INVALID_MMU_ID -1
@@ -881,7 +1181,7 @@ struct hmehash_bucket {
* bits.
*/
#define HTAG_SFMMUPSZ 0 /* Not really used for LP64 */
-#define HTAG_REHASHSZ 13
+#define HTAG_BSPAGE_SHIFT 13
/*
* Assembly routines need to be able to get to ttesz
@@ -918,6 +1218,9 @@ struct hmehash_bucket {
#define tte_to_vaddr(hmeblkp, tte) ((caddr_t)(get_hblk_base(hmeblkp) \
+ (TTEBYTES(TTE_CSZ(&tte)) * (tte).tte_hmenum)))
+#define tte_to_evaddr(hmeblkp, ttep) ((caddr_t)(get_hblk_base(hmeblkp) \
+ + (TTEBYTES(TTE_CSZ(ttep)) * ((ttep)->tte_hmenum + 1))))
+
#define vaddr_to_vshift(hblktag, vaddr, shwsz) \
((((uintptr_t)(vaddr) >> MMU_PAGESHIFT) - (hblktag.htag_bspage)) >>\
TTE_BSZS_SHIFT((shwsz) - 1))
@@ -980,6 +1283,9 @@ struct hmehash_bucket {
#define KHMEHASH_SZ khmehash_num
#define HMENT_HASHAVELEN 4
#define HBLK_RANGE_SHIFT MMU_PAGESHIFT64K /* shift for HBLK_BS_MASK */
+#define HBLK_MIN_TTESZ 1
+#define HBLK_MIN_BYTES MMU_PAGESIZE64K
+#define HBLK_MIN_SHIFT MMU_PAGESHIFT64K
#define MAX_HASHCNT 5
#define DEFAULT_MAX_HASHCNT 3
@@ -999,12 +1305,12 @@ struct hmehash_bucket {
#define HME_HASH_REHASH(ttesz) \
(((ttesz) < TTE512K)? 1 : (ttesz))
-#define HME_HASH_FUNCTION(hatid, vaddr, shift) \
- ((hatid != KHATID)? \
- (&uhme_hash[ (((uintptr_t)(hatid) ^ \
- ((uintptr_t)vaddr >> (shift))) & UHMEHASH_SZ) ]): \
- (&khme_hash[ (((uintptr_t)(hatid) ^ \
- ((uintptr_t)vaddr >> (shift))) & KHMEHASH_SZ) ]))
+#define HME_HASH_FUNCTION(hatid, vaddr, shift) \
+ ((((void *)hatid) != ((void *)KHATID)) ? \
+ (&uhme_hash[ (((uintptr_t)(hatid) ^ ((uintptr_t)vaddr >> (shift))) & \
+ UHMEHASH_SZ) ]): \
+ (&khme_hash[ (((uintptr_t)(hatid) ^ ((uintptr_t)vaddr >> (shift))) & \
+ KHMEHASH_SZ) ]))
/*
* This macro will traverse a hmeblk hash link list looking for an hme_blk
@@ -1067,7 +1373,6 @@ struct hmehash_bucket {
} \
}
-
#define SFMMU_HASH_LOCK(hmebp) \
(mutex_enter(&hmebp->hmehash_mutex))
@@ -1091,7 +1396,13 @@ struct hmehash_bucket {
#define astosfmmu(as) ((as)->a_hat)
#define hblktosfmmu(hmeblkp) ((sfmmu_t *)(hmeblkp)->hblk_tag.htag_id)
+#define hblktosrd(hmeblkp) ((sf_srd_t *)(hmeblkp)->hblk_tag.htag_id)
#define sfmmutoas(sfmmup) ((sfmmup)->sfmmu_as)
+
+#define sfmmutohtagid(sfmmup, rid) \
+ (((rid) == SFMMU_INVALID_SHMERID) ? (void *)(sfmmup) : \
+ (void *)((sfmmup)->sfmmu_srdp))
+
/*
* We use the sfmmu data structure to keep the per as page coloring info.
*/
@@ -1256,29 +1567,32 @@ struct tsbe {
struct tsbmiss {
sfmmu_t *ksfmmup; /* kernel hat id */
sfmmu_t *usfmmup; /* user hat id */
+ sf_srd_t *usrdp; /* user's SRD hat id */
struct tsbe *tsbptr; /* hardware computed ptr */
struct tsbe *tsbptr4m; /* hardware computed ptr */
+ struct tsbe *tsbscdptr; /* hardware computed ptr */
+ struct tsbe *tsbscdptr4m; /* hardware computed ptr */
uint64_t ismblkpa;
struct hmehash_bucket *khashstart;
struct hmehash_bucket *uhashstart;
uint_t khashsz;
uint_t uhashsz;
uint16_t dcache_line_mask; /* used to flush dcache */
- uint16_t hat_flags;
- uint32_t itlb_misses;
- uint32_t dtlb_misses;
+ uchar_t uhat_tteflags; /* private page sizes */
+ uchar_t uhat_rtteflags; /* SHME pagesizes */
uint32_t utsb_misses;
uint32_t ktsb_misses;
uint16_t uprot_traps;
uint16_t kprot_traps;
-
/*
* scratch[0] -> TSB_TAGACC
* scratch[1] -> TSBMISS_HMEBP
* scratch[2] -> TSBMISS_HATID
*/
uintptr_t scratch[3];
- uint8_t pad[0x10];
+ ulong_t shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */
+ ulong_t scd_shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */
+ uint8_t pad[48]; /* pad to 64 bytes */
};
/*
@@ -1311,10 +1625,9 @@ struct kpmtsbm {
uintptr_t pad[1];
};
-extern uint_t tsb_slab_size;
-extern uint_t tsb_slab_shift;
-extern uint_t tsb_slab_ttesz;
-extern uint_t tsb_slab_pamask;
+extern size_t tsb_slab_size;
+extern uint_t tsb_slab_shift;
+extern size_t tsb_slab_mask;
#endif /* !_ASM */
@@ -1336,7 +1649,12 @@ extern uint_t tsb_slab_pamask;
#define TSB_MIN_SZCODE TSB_8K_SZCODE /* min. supported TSB size */
#define TSB_MIN_OFFSET_MASK (TSB_OFFSET_MASK(TSB_MIN_SZCODE))
-#define UTSB_MAX_SZCODE TSB_1M_SZCODE /* max. supported TSB size */
+#ifdef sun4v
+#define UTSB_MAX_SZCODE TSB_256M_SZCODE /* max. supported TSB size */
+#else /* sun4u */
+#define UTSB_MAX_SZCODE TSB_1M_SZCODE /* max. supported TSB size */
+#endif /* sun4v */
+
#define UTSB_MAX_OFFSET_MASK (TSB_OFFSET_MASK(UTSB_MAX_SZCODE))
#define TSB_FREEMEM_MIN 0x1000 /* 32 mb */
@@ -1351,6 +1669,12 @@ extern uint_t tsb_slab_pamask;
#define TSB_1M_SZCODE 7 /* 64k entries */
#define TSB_2M_SZCODE 8 /* 128k entries */
#define TSB_4M_SZCODE 9 /* 256k entries */
+#define TSB_8M_SZCODE 10 /* 512k entries */
+#define TSB_16M_SZCODE 11 /* 1M entries */
+#define TSB_32M_SZCODE 12 /* 2M entries */
+#define TSB_64M_SZCODE 13 /* 4M entries */
+#define TSB_128M_SZCODE 14 /* 8M entries */
+#define TSB_256M_SZCODE 15 /* 16M entries */
#define TSB_ENTRY_SHIFT 4 /* each entry = 128 bits = 16 bytes */
#define TSB_ENTRY_SIZE (1 << 4)
#define TSB_START_SIZE 9
@@ -1479,6 +1803,19 @@ extern uint_t tsb_slab_pamask;
sethi %hi(0x1000000), reg
/*
+ * This macro constructs a SPARC V9 "jmpl <source reg>, %g0"
+ * instruction, with the source register specified by the jump_reg_number.
+ * The jmp opcode [24:19] = 11 1000 and source register is bits [18:14].
+ * The instruction is returned in reg. The macro is used to patch in a jmpl
+ * instruction at runtime.
+ */
+#define MAKE_JMP_INSTR(jump_reg_number, reg, tmp) \
+ sethi %hi(0x81c00000), reg; \
+ mov jump_reg_number, tmp; \
+ sll tmp, 14, tmp; \
+ or reg, tmp, reg
+
+/*
* Macro to get hat per-MMU cnum on this CPU.
* sfmmu - In, pass in "sfmmup" from the caller.
* cnum - Out, return 'cnum' to the caller
@@ -1513,7 +1850,7 @@ extern uint_t tsb_slab_pamask;
#define CPU_TSBMISS_AREA(tsbmiss, tmp1) \
CPU_INDEX(tmp1, tsbmiss); /* tmp1 = cpu idx */ \
sethi %hi(tsbmiss_area), tsbmiss; /* tsbmiss base ptr */ \
- sllx tmp1, TSBMISS_SHIFT, tmp1; /* byte offset */ \
+ mulx tmp1, TSBMISS_SIZE, tmp1; /* byte offset */ \
or tsbmiss, %lo(tsbmiss_area), tsbmiss; \
add tsbmiss, tmp1, tsbmiss /* tsbmiss area of CPU */
@@ -1756,7 +2093,7 @@ extern void sfmmu_init_tsbs(void);
extern caddr_t sfmmu_ktsb_alloc(caddr_t);
extern int sfmmu_getctx_pri(void);
extern int sfmmu_getctx_sec(void);
-extern void sfmmu_setctx_sec(int);
+extern void sfmmu_setctx_sec(uint_t);
extern void sfmmu_inv_tsb(caddr_t, uint_t);
extern void sfmmu_init_ktsbinfo(void);
extern int sfmmu_setup_4lp(void);
@@ -1773,7 +2110,7 @@ extern int hat_page_relocate(page_t **, page_t **, spgcnt_t *);
extern int sfmmu_get_ppvcolor(struct page *);
extern int sfmmu_get_addrvcolor(caddr_t);
extern int sfmmu_hat_lock_held(sfmmu_t *);
-extern void sfmmu_alloc_ctx(sfmmu_t *, int, struct cpu *);
+extern int sfmmu_alloc_ctx(sfmmu_t *, int, struct cpu *, int);
/*
* Functions exported to xhat_sfmmu.c
@@ -1821,7 +2158,7 @@ extern uint_t mml_shift;
extern uint_t hblk_alloc_dynamic;
extern struct tsbmiss tsbmiss_area[NCPU];
extern struct kpmtsbm kpmtsbm_area[NCPU];
-extern int tsb_max_growsize;
+
#ifndef sun4v
extern int dtlb_resv_ttenum;
extern caddr_t utsb_vabase;
@@ -1839,6 +2176,7 @@ extern uint_t disable_auto_text_large_pages;
extern pfn_t sfmmu_kpm_vatopfn(caddr_t);
extern void sfmmu_kpm_patch_tlbm(void);
extern void sfmmu_kpm_patch_tsbm(void);
+extern void sfmmu_patch_shctx(void);
extern void sfmmu_kpm_load_tsb(caddr_t, tte_t *, int);
extern void sfmmu_kpm_unload_tsb(caddr_t, int);
extern void sfmmu_kpm_tsbmtl(short *, uint_t *, int);
@@ -1922,6 +2260,12 @@ struct sfmmu_global_stat {
int sf_tsb_allocfail; /* # times TSB alloc fail */
int sf_tsb_sectsb_create; /* # times second TSB added */
+ int sf_scd_1sttsb_alloc; /* # SCD 1st TSB allocations */
+ int sf_scd_2ndtsb_alloc; /* # SCD 2nd TSB allocations */
+ int sf_scd_1sttsb_allocfail; /* # SCD 1st TSB alloc fail */
+ int sf_scd_2ndtsb_allocfail; /* # SCD 2nd TSB alloc fail */
+
+
int sf_tteload8k; /* calls to sfmmu_tteload */
int sf_tteload64k; /* calls to sfmmu_tteload */
int sf_tteload512k; /* calls to sfmmu_tteload */
@@ -1973,6 +2317,13 @@ struct sfmmu_global_stat {
int sf_ctx_inv; /* #times invalidate MMU ctx */
int sf_tlb_reprog_pgsz; /* # times switch TLB pgsz */
+
+ int sf_region_remap_demap; /* # times shme remap demap */
+
+ int sf_create_scd; /* # times SCD is created */
+ int sf_join_scd; /* # process joined scd */
+ int sf_leave_scd; /* # process left scd */
+ int sf_destroy_scd; /* # times SCD is destroyed */
};
struct sfmmu_tsbsize_stat {
@@ -1986,6 +2337,12 @@ struct sfmmu_tsbsize_stat {
int sf_tsbsz_1m;
int sf_tsbsz_2m;
int sf_tsbsz_4m;
+ int sf_tsbsz_8m;
+ int sf_tsbsz_16m;
+ int sf_tsbsz_32m;
+ int sf_tsbsz_64m;
+ int sf_tsbsz_128m;
+ int sf_tsbsz_256m;
};
struct sfmmu_percpu_stat {
diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in
index eb8c2ca9b4..a157f706bf 100644
--- a/usr/src/uts/sun4/ml/offsets.in
+++ b/usr/src/uts/sun4/ml/offsets.in
@@ -209,6 +209,7 @@ ism_blk ISMBLK_SIZE
ism_map_t ISM_MAP_SZ
imap_seg IMAP_SEG
imap_vb_shift IMAP_VB_SHIFT
+ imap_rid IMAP_RID
imap_hatflags IMAP_HATFLAGS
imap_sz_mask IMAP_SZ_MASK
imap_ismhat IMAP_ISMHAT
diff --git a/usr/src/uts/sun4/ml/swtch.s b/usr/src/uts/sun4/ml/swtch.s
index 98ec1edd55..f8892bd0c7 100644
--- a/usr/src/uts/sun4/ml/swtch.s
+++ b/usr/src/uts/sun4/ml/swtch.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -251,23 +251,40 @@ resume(kthread_id_t t)
call sfmmu_setctx_sec ! switch to kernel context
or %o0, %o1, %o0
-
- ba,a,pt %icc, 4f
+ ba,a,pt %icc, 4f
+
!
! Switch to user address space.
!
3:
mov %i5, %o0 ! %o0 = sfmmup
mov %i1, %o2 ! %o2 = CPU
+ set SFMMU_PRIVATE, %o3 ! %o3 = sfmmu private flag
call sfmmu_alloc_ctx
mov %g0, %o1 ! %o1 = allocate flag = 0
+#ifdef sun4v
+ brz,a,pt %o0, 4f ! %o0 == 0, no private alloc'ed
+ nop
+
+ ldn [%i5 + SFMMU_SCDP], %o0 ! using shared contexts?
+ brz,a,pt %o0, 4f
+ nop
+
+ ldn [%o0 + SCD_SFMMUP], %o0 ! %o0 = scdp->scd_sfmmup
+ mov %i1, %o2 ! %o2 = CPU
+ set SFMMU_SHARED, %o3 ! %o3 = sfmmu shared flag
+ call sfmmu_alloc_ctx
+ mov 1, %o1 ! %o1 = allocate flag = 1
+
+#endif
+
4:
call sfmmu_load_mmustate ! program MMU registers
mov %i5, %o0
-
- wrpr %g0, %i4, %pstate ! enable interrupts
+ wrpr %g0, %i4, %pstate ! enable interrupts
+
5:
!
! spin until dispatched thread's mutex has
diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c
index 7e4841f8c3..4dd6b92398 100644
--- a/usr/src/uts/sun4/os/startup.c
+++ b/usr/src/uts/sun4/os/startup.c
@@ -675,16 +675,16 @@ startup_init(void)
* behavior.)
*/
char sync_str[] =
- "warning @ warning off : sync "
- "%%tl-c %%tstate h# %p x! "
- "%%g1 h# %p x! %%g2 h# %p x! %%g3 h# %p x! "
- "%%g4 h# %p x! %%g5 h# %p x! %%g6 h# %p x! "
- "%%g7 h# %p x! %%o0 h# %p x! %%o1 h# %p x! "
- "%%o2 h# %p x! %%o3 h# %p x! %%o4 h# %p x! "
- "%%o5 h# %p x! %%o6 h# %p x! %%o7 h# %p x! "
- "%%tl-c %%tpc h# %p x! %%tl-c %%tnpc h# %p x! "
- "%%y h# %p l! %%tl-c %%tt h# %p x! "
- "sync ; warning !";
+ "warning @ warning off : sync "
+ "%%tl-c %%tstate h# %p x! "
+ "%%g1 h# %p x! %%g2 h# %p x! %%g3 h# %p x! "
+ "%%g4 h# %p x! %%g5 h# %p x! %%g6 h# %p x! "
+ "%%g7 h# %p x! %%o0 h# %p x! %%o1 h# %p x! "
+ "%%o2 h# %p x! %%o3 h# %p x! %%o4 h# %p x! "
+ "%%o5 h# %p x! %%o6 h# %p x! %%o7 h# %p x! "
+ "%%tl-c %%tpc h# %p x! %%tl-c %%tnpc h# %p x! "
+ "%%y h# %p l! %%tl-c %%tt h# %p x! "
+ "sync ; warning !";
/*
* 20 == num of %p substrings
@@ -716,16 +716,16 @@ startup_init(void)
* core file later.
*/
(void) sprintf((char *)bp, sync_str,
- (void *)&sync_reg_buf.r_tstate, (void *)&sync_reg_buf.r_g1,
- (void *)&sync_reg_buf.r_g2, (void *)&sync_reg_buf.r_g3,
- (void *)&sync_reg_buf.r_g4, (void *)&sync_reg_buf.r_g5,
- (void *)&sync_reg_buf.r_g6, (void *)&sync_reg_buf.r_g7,
- (void *)&sync_reg_buf.r_o0, (void *)&sync_reg_buf.r_o1,
- (void *)&sync_reg_buf.r_o2, (void *)&sync_reg_buf.r_o3,
- (void *)&sync_reg_buf.r_o4, (void *)&sync_reg_buf.r_o5,
- (void *)&sync_reg_buf.r_o6, (void *)&sync_reg_buf.r_o7,
- (void *)&sync_reg_buf.r_pc, (void *)&sync_reg_buf.r_npc,
- (void *)&sync_reg_buf.r_y, (void *)&sync_tt);
+ (void *)&sync_reg_buf.r_tstate, (void *)&sync_reg_buf.r_g1,
+ (void *)&sync_reg_buf.r_g2, (void *)&sync_reg_buf.r_g3,
+ (void *)&sync_reg_buf.r_g4, (void *)&sync_reg_buf.r_g5,
+ (void *)&sync_reg_buf.r_g6, (void *)&sync_reg_buf.r_g7,
+ (void *)&sync_reg_buf.r_o0, (void *)&sync_reg_buf.r_o1,
+ (void *)&sync_reg_buf.r_o2, (void *)&sync_reg_buf.r_o3,
+ (void *)&sync_reg_buf.r_o4, (void *)&sync_reg_buf.r_o5,
+ (void *)&sync_reg_buf.r_o6, (void *)&sync_reg_buf.r_o7,
+ (void *)&sync_reg_buf.r_pc, (void *)&sync_reg_buf.r_npc,
+ (void *)&sync_reg_buf.r_y, (void *)&sync_tt);
prom_interpret(bp, 0, 0, 0, 0, 0);
add_vx_handler("sync", 1, (void (*)(cell_t *))sync_handler);
}
@@ -1146,7 +1146,7 @@ startup_memlist(void)
for (mnode = 1; mnode < max_mem_nodes; mnode++) {
alloc_base = alloc_page_freelists(mnode, alloc_base,
- ecache_alignsize);
+ ecache_alignsize);
}
PRM_DEBUG(alloc_base);
}
@@ -1177,8 +1177,8 @@ startup_memlist(void)
* because there was not enough space within the nucleus.
*/
kpmptable_sz = (kpm_smallpages == 0) ?
- sizeof (kpm_hlk_t) * kpmp_table_sz :
- sizeof (kpm_shlk_t) * kpmp_stable_sz;
+ sizeof (kpm_hlk_t) * kpmp_table_sz :
+ sizeof (kpm_shlk_t) * kpmp_stable_sz;
alloc_sz = roundup(kpmptable_sz, alloc_alignsize);
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base,
@@ -2158,7 +2158,7 @@ startup_vm(void)
* kpm segment
*/
segmap_kpm = kpm_enable &&
- segmap_kpm && PAGESIZE == MAXBSIZE;
+ segmap_kpm && PAGESIZE == MAXBSIZE;
if (kpm_enable) {
rw_enter(&kas.a_lock, RW_WRITER);
@@ -2617,12 +2617,12 @@ memseg_list_add(struct memseg *memsegp)
if (kpm_enable) {
memsegp->nextpa = (memsegp->next) ?
- va_to_pa(memsegp->next) : MSEG_NULLPTR_PA;
+ va_to_pa(memsegp->next) : MSEG_NULLPTR_PA;
if (prev_memsegp != &memsegs) {
struct memseg *msp;
msp = (struct memseg *)((caddr_t)prev_memsegp -
- offsetof(struct memseg, next));
+ offsetof(struct memseg, next));
msp->nextpa = va_to_pa(memsegp);
} else {
memsegspa = va_to_pa(memsegs);
@@ -2722,14 +2722,14 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages,
msp->kpm_pages =
(kpm_page_t *)kpm_pp - 1;
kpm_pp = (uintptr_t)
- ((kpm_page_t *)kpm_pp
- + nelem - 1);
+ ((kpm_page_t *)kpm_pp
+ + nelem - 1);
} else {
msp->kpm_spages =
(kpm_spage_t *)kpm_pp - 1;
kpm_pp = (uintptr_t)
- ((kpm_spage_t *)kpm_pp
- + nelem - 1);
+ ((kpm_spage_t *)kpm_pp
+ + nelem - 1);
}
nelem_used += nelem - 1;
@@ -2738,14 +2738,14 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages,
msp->kpm_pages =
(kpm_page_t *)kpm_pp;
kpm_pp = (uintptr_t)
- ((kpm_page_t *)kpm_pp
- + nelem);
+ ((kpm_page_t *)kpm_pp
+ + nelem);
} else {
msp->kpm_spages =
(kpm_spage_t *)kpm_pp;
kpm_pp = (uintptr_t)
- ((kpm_spage_t *)
- kpm_pp + nelem);
+ ((kpm_spage_t *)
+ kpm_pp + nelem);
}
nelem_used += nelem;
}
@@ -2754,11 +2754,11 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages,
if (kpm_smallpages == 0) {
msp->kpm_pages = (kpm_page_t *)kpm_pp;
kpm_pp = (uintptr_t)
- ((kpm_page_t *)kpm_pp + nelem);
+ ((kpm_page_t *)kpm_pp + nelem);
} else {
msp->kpm_spages = (kpm_spage_t *)kpm_pp;
kpm_pp = (uintptr_t)
- ((kpm_spage_t *)kpm_pp + nelem);
+ ((kpm_spage_t *)kpm_pp + nelem);
}
nelem_used = nelem;
}
@@ -2823,9 +2823,9 @@ kvm_init(void)
(void) segkmem_create(&kvalloc);
if (kmem64_base) {
- (void) seg_attach(&kas, (caddr_t)kmem64_base,
- (size_t)(kmem64_end - kmem64_base), &kmem64);
- (void) segkmem_create(&kmem64);
+ (void) seg_attach(&kas, (caddr_t)kmem64_base,
+ (size_t)(kmem64_end - kmem64_base), &kmem64);
+ (void) segkmem_create(&kmem64);
}
/*
@@ -2874,7 +2874,9 @@ char obp_tte_str[] =
"h# %x constant HMEBP_HBLK "
"h# %x constant HMEBUCKET_SIZE "
"h# %x constant HTAG_SFMMUPSZ "
- "h# %x constant HTAG_REHASHSZ "
+ "h# %x constant HTAG_BSPAGE_SHIFT "
+ "h# %x constant HTAG_REHASH_SHIFT "
+ "h# %x constant SFMMU_INVALID_SHMERID "
"h# %x constant mmu_hashcnt "
"h# %p constant uhme_hash "
"h# %p constant khme_hash "
@@ -2949,8 +2951,10 @@ char obp_tte_str[] =
"; "
": HME_HASH_TAG ( sfmmup rehash addr -- hblktag ) "
- " over HME_HASH_SHIFT HME_HASH_BSPAGE ( sfmmup rehash bspage ) "
- " HTAG_REHASHSZ << or nip ( hblktag ) "
+ " over HME_HASH_SHIFT HME_HASH_BSPAGE ( sfmmup rehash bspage ) "
+ " HTAG_BSPAGE_SHIFT << ( sfmmup rehash htag-bspage )"
+ " swap HTAG_REHASH_SHIFT << or ( sfmmup htag-bspage-rehash )"
+ " SFMMU_INVALID_SHMERID or nip ( hblktag ) "
"; "
": HBLK_TO_TTEP ( hmeblkp addr -- ttep ) "
@@ -3023,7 +3027,9 @@ create_va_to_tte(void)
OFFSET(struct hmehash_bucket, hmeh_nextpa),
sizeof (struct hmehash_bucket),
HTAG_SFMMUPSZ,
- HTAG_REHASHSZ,
+ HTAG_BSPAGE_SHIFT,
+ HTAG_REHASH_SHIFT,
+ SFMMU_INVALID_SHMERID,
mmu_hashcnt,
(caddr_t)va_to_pa((caddr_t)uhme_hash),
(caddr_t)va_to_pa((caddr_t)khme_hash),
@@ -3136,8 +3142,8 @@ do_prom_version_check(void)
pnode_t node;
char buf[64];
static char drev[] = "Down-rev firmware detected%s\n"
- "\tPlease upgrade to the following minimum version:\n"
- "\t\t%s\n";
+ "\tPlease upgrade to the following minimum version:\n"
+ "\t\t%s\n";
i = prom_version_check(buf, sizeof (buf), &node);
diff --git a/usr/src/uts/sun4/vm/sfmmu.c b/usr/src/uts/sun4/vm/sfmmu.c
index 24ee075277..9a0b8208de 100644
--- a/usr/src/uts/sun4/vm/sfmmu.c
+++ b/usr/src/uts/sun4/vm/sfmmu.c
@@ -195,6 +195,10 @@ hat_kern_setup(void)
}
}
+ if (!shctx_on || disable_shctx) {
+ sfmmu_patch_shctx();
+ }
+
/*
* The 8K-indexed kernel TSB space is used to hold
* translations below...
@@ -1212,9 +1216,6 @@ sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
NULL, NULL, vmflag)) == NULL))
return (NULL);
- /* If we ever don't want TSB slab-sized pages, this will panic */
- ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0);
-
if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
if (inaddr == NULL)
vmem_xfree(vmp, addr, size);
@@ -1288,8 +1289,6 @@ sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
pgcnt_t pgs_left = npages;
page_t *rootpp = NULL;
- ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0);
-
hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c
index 4e249ece6d..763b3dc737 100644
--- a/usr/src/uts/sun4/vm/vm_dep.c
+++ b/usr/src/uts/sun4/vm/vm_dep.c
@@ -52,20 +52,17 @@
/*
* These variables are set by module specific config routines.
- * They are only set by modules which will use physical cache page coloring
- * and/or virtual cache page coloring.
+ * They are only set by modules which will use physical cache page coloring.
*/
int do_pg_coloring = 0;
-int do_virtual_coloring = 0;
/*
* These variables can be conveniently patched at kernel load time to
- * prevent do_pg_coloring or do_virtual_coloring from being enabled by
+ * prevent do_pg_coloring from being enabled by
* module specific config routines.
*/
int use_page_coloring = 1;
-int use_virtual_coloring = 1;
/*
* initialized by page_coloring_init()
diff --git a/usr/src/uts/sun4u/cpu/opl_olympus.c b/usr/src/uts/sun4u/cpu/opl_olympus.c
index c39cac38a9..1dc8c4fad4 100644
--- a/usr/src/uts/sun4u/cpu/opl_olympus.c
+++ b/usr/src/uts/sun4u/cpu/opl_olympus.c
@@ -493,7 +493,6 @@ void
cpu_setup(void)
{
extern int at_flags;
- extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int cpc_has_overflow_intr;
uint64_t cpu0_log;
extern uint64_t opl_cpu0_err_log;
@@ -536,8 +535,6 @@ cpu_setup(void)
if (use_page_coloring) {
do_pg_coloring = 1;
- if (use_virtual_coloring)
- do_virtual_coloring = 1;
}
isa_list =
@@ -579,12 +576,6 @@ cpu_setup(void)
cpc_has_overflow_intr = 1;
/*
- * Use SPARC64-VI flush-all support
- */
- if (!disable_delay_tlb_flush)
- delay_tlb_flush = 1;
-
- /*
* Declare that this architecture/cpu combination does not support
* fpRAS.
*/
@@ -920,7 +911,7 @@ mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
* the two most used page sizes changes and we're using
* large pages in this process.
*/
- if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) {
+ if (SFMMU_LGPGS_INUSE(sfmmup)) {
/* Sort page sizes. */
for (i = 0; i < mmu_page_sizes; i++) {
sortcnt[i] = ttecnt[i];
diff --git a/usr/src/uts/sun4u/cpu/spitfire.c b/usr/src/uts/sun4u/cpu/spitfire.c
index 786f942360..40fd06261d 100644
--- a/usr/src/uts/sun4u/cpu/spitfire.c
+++ b/usr/src/uts/sun4u/cpu/spitfire.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -451,8 +451,6 @@ cpu_setup(void)
if (use_page_coloring) {
do_pg_coloring = 1;
- if (use_virtual_coloring)
- do_virtual_coloring = 1;
}
/*
diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c
index dcb848f542..13999fd361 100644
--- a/usr/src/uts/sun4u/cpu/us3_common.c
+++ b/usr/src/uts/sun4u/cpu/us3_common.c
@@ -474,7 +474,6 @@ void
cpu_setup(void)
{
extern int at_flags;
- extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int cpc_has_overflow_intr;
/*
@@ -507,8 +506,6 @@ cpu_setup(void)
if (use_page_coloring) {
do_pg_coloring = 1;
- if (use_virtual_coloring)
- do_virtual_coloring = 1;
}
isa_list =
@@ -552,12 +549,6 @@ cpu_setup(void)
*/
cpc_has_overflow_intr = 1;
- /*
- * Use cheetah flush-all support
- */
- if (!disable_delay_tlb_flush)
- delay_tlb_flush = 1;
-
#if defined(CPU_IMP_DUAL_PAGESIZE)
/*
* Use Cheetah+ and later dual page size support.
diff --git a/usr/src/uts/sun4u/cpu/us3_common_mmu.c b/usr/src/uts/sun4u/cpu/us3_common_mmu.c
index 9fd37a9ed5..892be30295 100644
--- a/usr/src/uts/sun4u/cpu/us3_common_mmu.c
+++ b/usr/src/uts/sun4u/cpu/us3_common_mmu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -151,9 +151,9 @@ mmu_init_large_pages(size_t ism_pagesize)
if (cpu_impl_dual_pgsz == 0) { /* disable_dual_pgsz flag */
pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
mmu_disable_ism_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
return;
}
@@ -161,29 +161,29 @@ mmu_init_large_pages(size_t ism_pagesize)
case MMU_PAGESIZE4M:
pan_disable_large_pages = (1 << TTE256M);
mmu_disable_ism_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
break;
case MMU_PAGESIZE32M:
pan_disable_large_pages = (1 << TTE256M);
mmu_disable_ism_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE256M));
mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
+ (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
adjust_data_maxlpsize(ism_pagesize);
break;
case MMU_PAGESIZE256M:
pan_disable_large_pages = (1 << TTE32M);
mmu_disable_ism_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M));
+ (1 << TTE512K) | (1 << TTE32M));
mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
+ (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
adjust_data_maxlpsize(ism_pagesize);
break;
default:
cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
- ism_pagesize);
+ ism_pagesize);
break;
}
}
@@ -262,24 +262,37 @@ mmu_fixup_large_pages(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
/*
* Don't program 2nd dtlb for kernel and ism hat
*/
- ASSERT(hat->sfmmu_ismhat == NULL);
+ ASSERT(hat->sfmmu_ismhat == 0);
ASSERT(hat != ksfmmup);
ASSERT(cpu_impl_dual_pgsz == 1);
- ASSERT((!SFMMU_FLAGS_ISSET(hat, HAT_32M_FLAG)) ||
- (!SFMMU_FLAGS_ISSET(hat, HAT_256M_FLAG)));
+ ASSERT(!SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG) ||
+ !SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG));
+ ASSERT(!SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG) ||
+ !SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG));
+ ASSERT(!SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM) ||
+ !SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM));
+ ASSERT(!SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM) ||
+ !SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM));
+
+ if (SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG) ||
+ (ttecnt[TTE32M] != 0) ||
+ SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM)) {
- if ((SFMMU_FLAGS_ISSET(hat, HAT_32M_FLAG)) || (ttecnt[TTE32M] != 0)) {
spgsz = pgsz1;
pgsz1 = TTE32M;
if (pgsz0 == TTE32M)
pgsz0 = spgsz;
- } else if ((SFMMU_FLAGS_ISSET(hat, HAT_256M_FLAG)) ||
- (ttecnt[TTE256M] != 0)) {
+
+ } else if (SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG) ||
+ (ttecnt[TTE256M] != 0) ||
+ SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM)) {
+
spgsz = pgsz1;
pgsz1 = TTE256M;
if (pgsz0 == TTE256M)
pgsz0 = spgsz;
+
} else if ((pgsz1 == TTE512K) || (pgsz1 == TTE4M)) {
if ((pgsz0 != TTE512K) && (pgsz0 != TTE4M)) {
spgsz = pgsz0;
@@ -470,7 +483,7 @@ mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
* large pages in this process, except for Panther 32M/256M pages,
* which the Panther T16 does not support.
*/
- if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) {
+ if (SFMMU_LGPGS_INUSE(sfmmup)) {
/* Sort page sizes. */
for (i = 0; i < mmu_page_sizes; i++) {
sortcnt[i] = ttecnt[i];
@@ -569,7 +582,7 @@ mmu_init_kernel_pgsz(struct hat *hat)
hat->sfmmu_cext = new_cext_primary;
kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
- ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
+ ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
}
size_t
@@ -604,7 +617,7 @@ mmu_get_kernel_lpsize(size_t lpsize)
if (lpsize == TTEBYTES(p_lpgsz->tte) &&
(heaplp_use_dt512 == -1 ||
- heaplp_use_dt512 == p_lpgsz->use_dt512)) {
+ heaplp_use_dt512 == p_lpgsz->use_dt512)) {
tte = p_lpgsz->tte;
heaplp_use_dt512 = p_lpgsz->use_dt512;
diff --git a/usr/src/uts/sun4u/ml/mach_offsets.in b/usr/src/uts/sun4u/ml/mach_offsets.in
index 6f1f317f67..9ce9aea4fb 100644
--- a/usr/src/uts/sun4u/ml/mach_offsets.in
+++ b/usr/src/uts/sun4u/ml/mach_offsets.in
@@ -18,7 +18,7 @@
\
\ CDDL HEADER END
\
-\ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+\ Copyright 2007 Sun Microsystems, Inc. All rights reserved.
\ Use is subject to license terms.
\
\ offsets.in: input file to produce assym.h using the stabs program
@@ -103,7 +103,7 @@ hat HAT_SIZE
sfmmu_cpusran
sfmmu_tsb
sfmmu_ismblkpa
- sfmmu_flags
+ sfmmu_tteflags
sfmmu_cext
sfmmu_ctx_lock
sfmmu_ctxs
@@ -118,20 +118,19 @@ sfmmu_global_stat HATSTAT_SIZE
sf_hment SFHME_SIZE SFHME_SHIFT
hme_tte SFHME_TTE
-tsbmiss TSBMISS_SIZE TSBMISS_SHIFT
- tsbptr TSBMISS_TSBPTR
- tsbptr4m TSBMISS_TSBPTR4M
+tsbmiss TSBMISS_SIZE
ksfmmup TSBMISS_KHATID
usfmmup TSBMISS_UHATID
- khashsz TSBMISS_KHASHSZ
+ tsbptr TSBMISS_TSBPTR
+ tsbptr4m TSBMISS_TSBPTR4M
+ ismblkpa TSBMISS_ISMBLKPA
khashstart TSBMISS_KHASHSTART
- dcache_line_mask TSBMISS_DMASK
- uhashsz TSBMISS_UHASHSZ
uhashstart TSBMISS_UHASHSTART
- hat_flags TSBMISS_HATFLAGS
- ismblkpa TSBMISS_ISMBLKPA
- itlb_misses TSBMISS_ITLBMISS
- dtlb_misses TSBMISS_DTLBMISS
+ khashsz TSBMISS_KHASHSZ
+ uhashsz TSBMISS_UHASHSZ
+ dcache_line_mask TSBMISS_DMASK
+ uhat_tteflags TSBMISS_UTTEFLAGS
+ uhat_rtteflags TSBMISS_URTTEFLAGS
utsb_misses TSBMISS_UTSBMISS
ktsb_misses TSBMISS_KTSBMISS
uprot_traps TSBMISS_UPROTS
@@ -141,8 +140,6 @@ tsbmiss TSBMISS_SIZE TSBMISS_SHIFT
\#define TSB_TAGACC (0 * TSBMISS_SCRATCH_INCR)
\#define TSBMISS_HMEBP (1 * TSBMISS_SCRATCH_INCR)
\#define TSBMISS_HATID (2 * TSBMISS_SCRATCH_INCR)
-\#define TSBMISS_XMMURET (3 * TSBMISS_SCRATCH_INCR)
-\#define TSBMISS_XMMUPTR (4 * TSBMISS_SCRATCH_INCR)
kpmtsbm KPMTSBM_SIZE KPMTSBM_SHIFT
vbase KPMTSBM_VBASE
diff --git a/usr/src/uts/sun4u/ml/trap_table.s b/usr/src/uts/sun4u/ml/trap_table.s
index 42fd217205..160106d9d7 100644
--- a/usr/src/uts/sun4u/ml/trap_table.s
+++ b/usr/src/uts/sun4u/ml/trap_table.s
@@ -112,20 +112,6 @@
#endif
/*
- * This macro is used to update per cpu mmu stats in perf critical
- * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER
- * is defined.
- */
-#if defined(DEBUG) || defined(SFMMU_STAT_GATHER)
-#define HAT_PERCPU_DBSTAT(stat) \
- mov stat, %g1 ;\
- ba stat_mmu ;\
- rd %pc, %g7
-#else
-#define HAT_PERCPU_DBSTAT(stat)
-#endif /* DEBUG || SFMMU_STAT_GATHER */
-
-/*
* This first set are funneled to trap() with %tt as the type.
* Trap will then either panic or send the user a signal.
*/
@@ -1081,7 +1067,6 @@ tt1_dtlbmiss:
#define DTLB_MISS(table_name) ;\
.global table_name/**/_dtlbmiss ;\
table_name/**/_dtlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\
mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\
ldxa [%g0]ASI_DMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\
ldxa [%g6]ASI_DMMU, %g2 /* g2 = tag access */ ;\
@@ -1097,7 +1082,7 @@ table_name/**/_dtlbmiss: ;\
ldda [%g1]ASI_QUAD_LDD_PHYS, %g4 /* g4 = tag, %g5 data */;\
cmp %g4, %g7 ;\
bne,pn %xcc, sfmmu_tsb_miss_tt /* no 4M TSB, miss */ ;\
- mov %g0, %g3 /* clear 4M tsbe ptr */ ;\
+ mov -1, %g3 /* set 4M tsbe ptr to -1 */ ;\
TT_TRACE(trace_tsbhit) /* 2 instr ifdef TRAPTRACE */ ;\
stxa %g5, [%g0]ASI_DTLB_IN /* trapstat expects TTE */ ;\
retry /* in %g5 */ ;\
@@ -1110,12 +1095,14 @@ table_name/**/_dtlbmiss: ;\
unimp 0 ;\
unimp 0 ;\
unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
.align 128
#else /* UTSB_PHYS */
#define DTLB_MISS(table_name) ;\
.global table_name/**/_dtlbmiss ;\
table_name/**/_dtlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\
mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\
ldxa [%g0]ASI_DMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\
ldxa [%g6]ASI_DMMU, %g2 /* g2 = tag access */ ;\
@@ -1129,7 +1116,7 @@ table_name/**/_dtlbmiss: ;\
ldda [%g1]ASI_NQUAD_LD, %g4 /* g4 = tag, %g5 data */ ;\
cmp %g4, %g7 ;\
bne,pn %xcc, sfmmu_tsb_miss_tt /* no 4M TSB, miss */ ;\
- mov %g0, %g3 /* clear 4M tsbe ptr */ ;\
+ mov -1, %g3 /* set 4M tsbe ptr to -1 */ ;\
TT_TRACE(trace_tsbhit) /* 2 instr ifdef TRAPTRACE */ ;\
stxa %g5, [%g0]ASI_DTLB_IN /* trapstat expects TTE */ ;\
retry /* in %g5 */ ;\
@@ -1138,12 +1125,15 @@ table_name/**/_dtlbmiss: ;\
unimp 0 ;\
unimp 0 ;\
unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
.align 128
#endif /* UTSB_PHYS */
@@ -1169,7 +1159,6 @@ tt1_itlbmiss:
#define ITLB_MISS(table_name) \
.global table_name/**/_itlbmiss ;\
table_name/**/_itlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\
mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\
ldxa [%g0]ASI_IMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\
ldxa [%g6]ASI_IMMU, %g2 /* g2 = tag access */ ;\
@@ -1185,7 +1174,7 @@ table_name/**/_itlbmiss: ;\
ldda [%g1]ASI_QUAD_LDD_PHYS, %g4 /* g4 = tag, g5 = data */ ;\
cmp %g4, %g7 ;\
bne,pn %xcc, sfmmu_tsb_miss_tt /* br if 8k ptr miss */ ;\
- mov %g0, %g3 /* no 4M TSB */ ;\
+ mov -1, %g3 /* set 4M TSB ptr to -1 */ ;\
andcc %g5, TTE_EXECPRM_INT, %g0 /* check execute bit */ ;\
bz,pn %icc, exec_fault ;\
nop ;\
@@ -1198,12 +1187,14 @@ table_name/**/_itlbmiss: ;\
unimp 0 ;\
unimp 0 ;\
unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
.align 128
#else /* UTSB_PHYS */
#define ITLB_MISS(table_name) \
.global table_name/**/_itlbmiss ;\
table_name/**/_itlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\
mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\
ldxa [%g0]ASI_IMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\
ldxa [%g6]ASI_IMMU, %g2 /* g2 = tag access */ ;\
@@ -1217,7 +1208,7 @@ table_name/**/_itlbmiss: ;\
ldda [%g1]ASI_NQUAD_LD, %g4 /* g4 = tag, g5 = data */ ;\
cmp %g4, %g7 ;\
bne,pn %xcc, sfmmu_tsb_miss_tt /* br if 8k ptr miss */ ;\
- mov %g0, %g3 /* no 4M TSB */ ;\
+ mov -1, %g3 /* set 4M TSB ptr to -1 */ ;\
andcc %g5, TTE_EXECPRM_INT, %g0 /* check execute bit */ ;\
bz,pn %icc, exec_fault ;\
nop ;\
@@ -1229,9 +1220,12 @@ table_name/**/_itlbmiss: ;\
unimp 0 ;\
unimp 0 ;\
unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
- unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
+ unimp 0 ;\
.align 128
#endif /* UTSB_PHYS */
@@ -2926,21 +2920,6 @@ trace_dataprot:
#endif /* TRAPTRACE */
/*
- * expects offset into tsbmiss area in %g1 and return pc in %g7
- */
-stat_mmu:
- CPU_INDEX(%g5, %g6)
- sethi %hi(tsbmiss_area), %g6
- sllx %g5, TSBMISS_SHIFT, %g5
- or %g6, %lo(tsbmiss_area), %g6
- add %g6, %g5, %g6 /* g6 = tsbmiss area */
- ld [%g6 + %g1], %g5
- add %g5, 1, %g5
- jmp %g7 + 4
- st %g5, [%g6 + %g1]
-
-
-/*
* fast_trap_done, fast_trap_done_chk_intr:
*
* Due to the design of UltraSPARC pipeline, pending interrupts are not
diff --git a/usr/src/uts/sun4u/sys/machsystm.h b/usr/src/uts/sun4u/sys/machsystm.h
index 317b9e78b9..86885bbef4 100644
--- a/usr/src/uts/sun4u/sys/machsystm.h
+++ b/usr/src/uts/sun4u/sys/machsystm.h
@@ -269,9 +269,7 @@ extern int cpu_setsize; /* Maximum ecache setsize of configured cpus */
* VM
*/
extern int do_pg_coloring;
-extern int do_virtual_coloring;
extern int use_page_coloring;
-extern int use_virtual_coloring;
extern uint_t vac_colors_mask;
extern int ndata_alloc_page_freelists(struct memlist *, int);
diff --git a/usr/src/uts/sun4u/vm/mach_sfmmu.h b/usr/src/uts/sun4u/vm/mach_sfmmu.h
index 66640afb9e..45f6480715 100644
--- a/usr/src/uts/sun4u/vm/mach_sfmmu.h
+++ b/usr/src/uts/sun4u/vm/mach_sfmmu.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -70,6 +70,19 @@ extern "C" {
#ifdef _ASM
/*
+ * This macro is used to set private secondary context register in
+ * sfmmu_alloc_ctx().
+ * Input:
+ * cnum : cnum
+ * arg2 : unused
+ */
+#define SET_SECCTX(cnum, arg2, tmp1, tmp2) \
+ mov MMU_SCONTEXT, tmp1; \
+ sethi %hi(FLUSH_ADDR), tmp2; \
+ stxa cnum, [tmp1]ASI_MMU_CTX; \
+ flush tmp2
+
+/*
* This macro is used in the MMU code to check if TL should be lowered from
* 2 to 1 to pop trapstat's state. See the block comment in trapstat.c
* for details.
@@ -738,6 +751,12 @@ label/**/1: \
label/**/1: \
/* END CSTYLED */
+/*
+ * Macro to get SCD shared hme map on sun4v platforms
+ * (not applicable to sun4u platforms)
+ */
+#define GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc)
+
#ifndef TRAPTRACE
/*
* Same as above, with the following additions:
diff --git a/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s b/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s
index 2bd7ee732f..18d2c9dc6f 100644
--- a/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s
+++ b/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -86,7 +86,7 @@ sfmmu_getctx_sec()
/* ARGSUSED */
void
-sfmmu_setctx_sec(int ctx)
+sfmmu_setctx_sec(uint_t ctx)
{}
/* ARGSUSED */
@@ -509,9 +509,9 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
6: ldx [%o0 + SFMMU_ISMBLKPA], %o1 ! copy members of sfmmu
CPU_TSBMISS_AREA(%o2, %o3) ! we need to access from
stx %o1, [%o2 + TSBMISS_ISMBLKPA] ! sfmmu_tsb_miss into the
- lduh [%o0 + SFMMU_FLAGS], %o3 ! per-CPU tsbmiss area.
+ ldub [%o0 + SFMMU_TTEFLAGS], %o3 ! per-CPU tsbmiss area.
stx %o0, [%o2 + TSBMISS_UHATID]
- stuh %o3, [%o2 + TSBMISS_HATFLAGS]
+ stub %o3, [%o2 + TSBMISS_UTTEFLAGS]
3: retl
nop
diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c
index c685046a0b..1abca2c394 100644
--- a/usr/src/uts/sun4v/cpu/niagara.c
+++ b/usr/src/uts/sun4v/cpu/niagara.c
@@ -144,6 +144,8 @@ cpu_setup(void)
* Niagara has a performance counter overflow interrupt
*/
cpc_has_overflow_intr = 1;
+
+ shctx_on = 0;
}
#define MB(n) ((n) * 1024 * 1024)
diff --git a/usr/src/uts/sun4v/ml/mach_interrupt.s b/usr/src/uts/sun4v/ml/mach_interrupt.s
index 4697b55420..b585ac6c68 100644
--- a/usr/src/uts/sun4v/ml/mach_interrupt.s
+++ b/usr/src/uts/sun4v/ml/mach_interrupt.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,6 +41,8 @@
#include <sys/machasi.h>
#include <sys/scb.h>
#include <sys/error.h>
+#include <sys/mmu.h>
+#include <vm/hat_sfmmu.h>
#define INTR_REPORT_SIZE 64
#ifdef TRAPTRACE
@@ -75,7 +77,7 @@ cpu_mondo(void)
mov CPU_MONDO_Q_TL, %g4
ldxa [%g4]ASI_QUEUE, %g7 ! %g7 = tail ptr
cmp %g6, %g7
- be,pn %xcc, 0f ! head == tail
+ be,pn %xcc, 3f ! head == tail
nop
CPU_ADDR(%g1,%g2)
@@ -133,18 +135,64 @@ cpu_mondo(void)
*/
set KERNELBASE, %g4
cmp %g5, %g4
- bl,a,pn %xcc, 1f ! branch if bad %pc
- nop
+ bl,pn %xcc, 2f ! branch if bad %pc
+ nop
+
+ /*
+ * If this platform supports shared contexts and we are jumping
+ * to OBP code, then we need to invalidate both contexts to prevent OBP
+ * from corrupting the shared context registers.
+ *
+ * If shared contexts are not supported then the next two instructions
+ * will be patched with:
+ *
+ * jmp %g5
+ * nop
+ *
+ */
+ .global sfmmu_shctx_cpu_mondo_patch
+sfmmu_shctx_cpu_mondo_patch:
+ set OFW_START_ADDR, %g4 ! Check if this a call into OBP?
+ cmp %g5, %g4
+ bl,pt %xcc, 1f
+ nop
+ set OFW_END_ADDR, %g4
+ cmp %g5, %g4
+ bg,pn %xcc, 1f
+ nop
+ mov MMU_PCONTEXT, %g3
+ ldxa [%g3]ASI_MMU_CTX, %g4
+ cmp %g4, INVALID_CONTEXT ! Check if we are in kernel mode
+ ble,pn %xcc, 1f ! or the primary context is invalid
+ nop
+ set INVALID_CONTEXT, %g4 ! Invalidate contexts - compatability
+ stxa %g4, [%g3]ASI_MMU_CTX ! mode ensures shared contexts are also
+ mov MMU_SCONTEXT, %g3 ! invalidated.
+ stxa %g4, [%g3]ASI_MMU_CTX
+ membar #Sync
+ mov %o0, %g3 ! save output regs
+ mov %o1, %g4
+ mov %o5, %g6
+ clr %o0 ! Invalidate tsbs, set ntsb = 0
+ clr %o1 ! and HV_TSB_INFO_PA = 0
+ mov MMU_TSB_CTXNON0, %o5
+ ta FAST_TRAP ! set TSB info for user process
+ brnz,a,pn %o0, ptl1_panic
+ mov PTL1_BAD_HCALL, %g1
+ mov %g3, %o0 ! restore output regs
+ mov %g4, %o1
+ mov %g6, %o5
+1:
jmp %g5 ! jump to traphandler
nop
-1:
+2:
! invalid trap handler, discard it for now
set cpu_mondo_inval, %g4
ldx [%g4], %g5
inc %g5
stx %g5, [%g4]
-0:
+3:
retry
/* Never Reached */
SET_SIZE(cpu_mondo)
diff --git a/usr/src/uts/sun4v/ml/mach_locore.s b/usr/src/uts/sun4v/ml/mach_locore.s
index 1c80729869..fdd6b28992 100644
--- a/usr/src/uts/sun4v/ml/mach_locore.s
+++ b/usr/src/uts/sun4v/ml/mach_locore.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -798,7 +798,17 @@ have_win:
sethi %hi(FLUSH_ADDR), %g3
stxa %g2, [%g1]ASI_MMU_CTX
flush %g3 ! flush required by immu
-
+ !
+ ! If shared context support is not enabled, then the next five
+ ! instructions will be patched with nop instructions.
+ !
+ .global sfmmu_shctx_user_rtt_patch
+sfmmu_shctx_user_rtt_patch:
+ mov MMU_SCONTEXT1, %g1
+ ldxa [%g1]ASI_MMU_CTX, %g2
+ mov MMU_PCONTEXT1, %g1
+ stxa %g2, [%g1]ASI_MMU_CTX
+ flush %g3
!
! setup trap regs
!
diff --git a/usr/src/uts/sun4v/ml/mach_offsets.in b/usr/src/uts/sun4v/ml/mach_offsets.in
index b58ed79949..362d419c82 100644
--- a/usr/src/uts/sun4v/ml/mach_offsets.in
+++ b/usr/src/uts/sun4v/ml/mach_offsets.in
@@ -18,7 +18,7 @@
\
\ CDDL HEADER END
\
-\ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+\ Copyright 2007 Sun Microsystems, Inc. All rights reserved.
\ Use is subject to license terms.
\
\ offsets.in: input file to produce assym.h using the stabs program
@@ -137,12 +137,20 @@ hat HAT_SIZE
sfmmu_cpusran
sfmmu_tsb
sfmmu_ismblkpa
- sfmmu_flags
+ sfmmu_tteflags
+ sfmmu_rtteflags
+ sfmmu_srdp
+ sfmmu_region_map.h_rmap_s.hmeregion_map SFMMU_HMERMAP
+ sfmmu_scdp
sfmmu_hvblock
sfmmu_cext
sfmmu_ctx_lock
sfmmu_ctxs
+sf_scd SCD_SIZE
+ scd_sfmmup
+ scd_region_map.h_rmap_s.hmeregion_map SCD_HMERMAP
+
sfmmu_global_stat HATSTAT_SIZE
sf_pagefaults HATSTAT_PAGEFAULT
sf_uhash_searches HATSTAT_UHASH_SEARCH
@@ -153,30 +161,32 @@ sfmmu_global_stat HATSTAT_SIZE
sf_hment SFHME_SIZE SFHME_SHIFT
hme_tte SFHME_TTE
-tsbmiss TSBMISS_SIZE TSBMISS_SHIFT
- tsbptr TSBMISS_TSBPTR
- tsbptr4m TSBMISS_TSBPTR4M
+tsbmiss TSBMISS_SIZE
ksfmmup TSBMISS_KHATID
usfmmup TSBMISS_UHATID
- khashsz TSBMISS_KHASHSZ
+ usrdp TSBMISS_SHARED_UHATID
+ tsbptr TSBMISS_TSBPTR
+ tsbptr4m TSBMISS_TSBPTR4M
+ tsbscdptr TSBMISS_TSBSCDPTR
+ tsbscdptr4m TSBMISS_TSBSCDPTR4M
+ ismblkpa TSBMISS_ISMBLKPA
khashstart TSBMISS_KHASHSTART
- uhashsz TSBMISS_UHASHSZ
uhashstart TSBMISS_UHASHSTART
- hat_flags TSBMISS_HATFLAGS
- ismblkpa TSBMISS_ISMBLKPA
- itlb_misses TSBMISS_ITLBMISS
- dtlb_misses TSBMISS_DTLBMISS
+ khashsz TSBMISS_KHASHSZ
+ uhashsz TSBMISS_UHASHSZ
+ uhat_tteflags TSBMISS_UTTEFLAGS
+ uhat_rtteflags TSBMISS_URTTEFLAGS
utsb_misses TSBMISS_UTSBMISS
ktsb_misses TSBMISS_KTSBMISS
uprot_traps TSBMISS_UPROTS
kprot_traps TSBMISS_KPROTS
scratch TSBMISS_SCRATCH
+ shmermap TSBMISS_SHMERMAP
+ scd_shmermap TSBMISS_SCDSHMERMAP
\#define TSB_TAGACC (0 * TSBMISS_SCRATCH_INCR)
\#define TSBMISS_HMEBP (1 * TSBMISS_SCRATCH_INCR)
\#define TSBMISS_HATID (2 * TSBMISS_SCRATCH_INCR)
-\#define TSBMISS_XMMURET (3 * TSBMISS_SCRATCH_INCR)
-\#define TSBMISS_XMMUPTR (4 * TSBMISS_SCRATCH_INCR)
kpmtsbm KPMTSBM_SIZE KPMTSBM_SHIFT
vbase KPMTSBM_VBASE
diff --git a/usr/src/uts/sun4v/ml/trap_table.s b/usr/src/uts/sun4v/ml/trap_table.s
index 58b38b995e..a1376210ff 100644
--- a/usr/src/uts/sun4v/ml/trap_table.s
+++ b/usr/src/uts/sun4v/ml/trap_table.s
@@ -113,20 +113,6 @@
#endif
/*
- * This macro is used to update per cpu mmu stats in perf critical
- * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER
- * is defined.
- */
-#if defined(DEBUG) || defined(SFMMU_STAT_GATHER)
-#define HAT_PERCPU_DBSTAT(stat) \
- mov stat, %g1 ;\
- ba stat_mmu ;\
- rd %pc, %g7
-#else
-#define HAT_PERCPU_DBSTAT(stat)
-#endif /* DEBUG || SFMMU_STAT_GATHER */
-
-/*
* This first set are funneled to trap() with %tt as the type.
* Trap will then either panic or send the user a signal.
*/
@@ -953,7 +939,6 @@ tt1_dtlbmiss:
#define DTLB_MISS(table_name) ;\
.global table_name/**/_dtlbmiss ;\
table_name/**/_dtlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\
GET_MMU_D_PTAGACC_CTXTYPE(%g2, %g3) /* 8 instr */ ;\
cmp %g3, INVALID_CONTEXT ;\
ble,pn %xcc, sfmmu_kdtlb_miss ;\
@@ -998,7 +983,6 @@ tt1_itlbmiss:
#define ITLB_MISS(table_name) \
.global table_name/**/_itlbmiss ;\
table_name/**/_itlbmiss: ;\
- HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\
GET_MMU_I_PTAGACC_CTXTYPE(%g2, %g3) /* 8 instr */ ;\
cmp %g3, INVALID_CONTEXT ;\
ble,pn %xcc, sfmmu_kitlb_miss ;\
@@ -2779,20 +2763,6 @@ trace_dataprot:
ba,pt %xcc, .mmu_exception_end
mov T_DATA_EXCEPTION, %g1
SET_SIZE(.dmmu_exception)
-/*
- * expects offset into tsbmiss area in %g1 and return pc in %g7
- */
-stat_mmu:
- CPU_INDEX(%g5, %g6)
- sethi %hi(tsbmiss_area), %g6
- sllx %g5, TSBMISS_SHIFT, %g5
- or %g6, %lo(tsbmiss_area), %g6
- add %g6, %g5, %g6 /* g6 = tsbmiss area */
- ld [%g6 + %g1], %g5
- add %g5, 1, %g5
- jmp %g7 + 4
- st %g5, [%g6 + %g1]
-
/*
* fast_trap_done, fast_trap_done_chk_intr:
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index eaabde53ae..832054a708 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -65,6 +65,8 @@ uint64_t ncpu_guest_max;
void fill_cpu(md_t *, mde_cookie_t);
static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
+static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t);
+static uint64_t get_mmu_shcontexts(md_t *, mde_cookie_t);
static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
static char *construct_isalist(md_t *, mde_cookie_t, char **);
static void set_at_flags(char *, int, char **);
@@ -307,7 +309,6 @@ found:
void
cpu_setup_common(char **cpu_module_isa_set)
{
- extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int mmu_exported_pagesize_mask;
int nocpus, i;
size_t ra_limit;
@@ -328,12 +329,6 @@ cpu_setup_common(char **cpu_module_isa_set)
if (use_page_coloring) {
do_pg_coloring = 1;
- if (use_virtual_coloring) {
- /*
- * XXX Sun4v cpus don't have virtual caches
- */
- do_virtual_coloring = 1;
- }
}
/*
@@ -344,6 +339,14 @@ cpu_setup_common(char **cpu_module_isa_set)
*/
mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]);
+ /*
+ * Get the number of contexts and tsbs supported.
+ */
+ if (get_mmu_shcontexts(mdp, cpulist[0]) >= MIN_NSHCONTEXTS &&
+ get_mmu_tsbs(mdp, cpulist[0]) >= MIN_NTSBS) {
+ shctx_on = 1;
+ }
+
for (i = 0; i < nocpus; i++)
fill_cpu(mdp, cpulist[i]);
@@ -428,12 +431,6 @@ cpu_setup_common(char **cpu_module_isa_set)
* timestamping. The sun4v require use of %stick.
*/
traptrace_use_stick = 1;
-
- /*
- * sun4v provides demap_all
- */
- if (!disable_delay_tlb_flush)
- delay_tlb_flush = 1;
}
/*
@@ -456,6 +453,39 @@ get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
}
/*
+ * Get the number of tsbs from MD. If absent the default value is 0.
+ */
+static uint64_t
+get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie)
+{
+ uint64_t number_tsbs;
+
+ if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-#tsbs",
+ &number_tsbs))
+ number_tsbs = 0;
+
+ return (number_tsbs);
+}
+
+/*
+ * Get the number of shared contexts from MD. This property more accurately
+ * describes the total number of contexts available, not just "shared contexts".
+ * If absent the default value is 1,
+ *
+ */
+static uint64_t
+get_mmu_shcontexts(md_t *mdp, mde_cookie_t cpu_node_cookie)
+{
+ uint64_t number_contexts;
+
+ if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#shared-contexts",
+ &number_contexts))
+ number_contexts = 0;
+
+ return (number_contexts);
+}
+
+/*
* Initalize supported page sizes information.
* Set to 0, if the page sizes mask information is absent in MD.
*/
diff --git a/usr/src/uts/sun4v/sys/machparam.h b/usr/src/uts/sun4v/sys/machparam.h
index 3fd034bdc5..3065d190e6 100644
--- a/usr/src/uts/sun4v/sys/machparam.h
+++ b/usr/src/uts/sun4v/sys/machparam.h
@@ -304,6 +304,7 @@ extern "C" {
#define PTL1_BAD_HCALL_UNMAP_PERM_EINVAL 18
#define PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP 19
#define PTL1_BAD_RAISE_TSBEXCP 20
+#define PTL1_NO_SCDTSB8K 21
/*
* Defines the max trap level allowed
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index c7f3b1dd27..8b3e584fef 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -270,9 +270,7 @@ extern int cpu_setsize; /* Maximum ecache setsize of configured cpus */
* VM
*/
extern int do_pg_coloring;
-extern int do_virtual_coloring;
extern int use_page_coloring;
-extern int use_virtual_coloring;
extern uint_t vac_colors_mask;
extern caddr_t get_mmfsa_scratchpad(void);
diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h
index b4067b25df..697f31d700 100644
--- a/usr/src/uts/sun4v/sys/mmu.h
+++ b/usr/src/uts/sun4v/sys/mmu.h
@@ -58,6 +58,11 @@ extern "C" {
#define MMU_PCONTEXT 0x08 /* primary context number */
#define MMU_SCONTEXT 0x10 /* secondary context number */
+#define MMU_PCONTEXT0 MMU_PCONTEXT /* primary context# 0 */
+#define MMU_PCONTEXT1 0x108 /* primary context# 1 */
+#define MMU_SCONTEXT0 MMU_SCONTEXT /* secondary context# 0 */
+#define MMU_SCONTEXT1 0x110 /* secondary context# 1 */
+
/*
* Pseudo Synchronous Fault Status Register Layout
*
@@ -146,6 +151,13 @@ extern "C" {
#define MIN_NCTXS_BITS 2
#define MAX_NCTXS (1ull << MAX_NCTXS_BITS)
+/*
+ * MIN_NCONTEXTS and MIN_NTSBS are the minimum number of contexts and tsbs
+ * necessary for shared context support.
+ */
+#define MIN_NSHCONTEXTS 1
+#define MIN_NTSBS 4
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.c b/usr/src/uts/sun4v/vm/mach_sfmmu.c
index 3347332094..52e69bceec 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.c
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -66,6 +66,7 @@
* External routines and data structures
*/
extern void sfmmu_cache_flushcolor(int, pfn_t);
+extern uint_t mmu_page_sizes;
/*
* Static routines
@@ -79,11 +80,11 @@ caddr_t textva, datava;
tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */
int enable_bigktsb = 1;
+int shtsb4m_first = 0;
tte_t bigktsb_ttes[MAX_BIGKTSB_TTES];
int bigktsb_nttes = 0;
-
/*
* Controls the logic which enables the use of the
* QUAD_LDD_PHYS ASI for TSB accesses.
@@ -321,27 +322,27 @@ sfmmu_clear_utsbinfo()
}
/*
- * Invalidate machine specific TSB information, indicates all TSB memory
- * is being freed by hat_swapout().
- */
-void
-sfmmu_invalidate_tsbinfo(sfmmu_t *sfmmup)
-{
- ASSERT(sfmmup->sfmmu_tsb != NULL &&
- sfmmup->sfmmu_tsb->tsb_flags & TSB_SWAPPED);
-
- sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1;
- sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0;
-}
-
-/*
* Set machine specific TSB information
*/
void
sfmmu_setup_tsbinfo(sfmmu_t *sfmmup)
{
- struct tsb_info *tsbinfop;
- hv_tsb_info_t *tdp;
+ struct tsb_info *tsbinfop;
+ hv_tsb_info_t *tdp;
+ int i;
+ int j;
+ int scd = 0;
+ int tsbord[NHV_TSB_INFO];
+
+#ifdef DEBUG
+ ASSERT(max_mmu_ctxdoms > 0);
+ if (sfmmup != ksfmmup) {
+ /* Process should have INVALID_CONTEXT on all MMUs. */
+ for (i = 0; i < max_mmu_ctxdoms; i++) {
+ ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
+ }
+ }
+#endif
tsbinfop = sfmmup->sfmmu_tsb;
if (tsbinfop == NULL) {
@@ -349,29 +350,91 @@ sfmmu_setup_tsbinfo(sfmmu_t *sfmmup)
sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0;
return;
}
- tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[0];
- sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp);
- sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 1;
- tdp->hvtsb_idxpgsz = TTE8K;
- tdp->hvtsb_assoc = 1;
- tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc);
- tdp->hvtsb_ctx_index = 0;
- tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask;
- tdp->hvtsb_rsvd = 0;
- tdp->hvtsb_pa = tsbinfop->tsb_pa;
- if ((tsbinfop = tsbinfop->tsb_next) == NULL)
- return;
- sfmmup->sfmmu_hvblock.hv_tsb_info_cnt++;
- tdp++;
- tdp->hvtsb_idxpgsz = TTE4M;
- tdp->hvtsb_assoc = 1;
- tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc);
- tdp->hvtsb_ctx_index = 0;
- tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask;
- tdp->hvtsb_rsvd = 0;
- tdp->hvtsb_pa = tsbinfop->tsb_pa;
- /* Only allow for 2 TSBs */
- ASSERT(tsbinfop->tsb_next == NULL);
+
+ ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL);
+ ASSERT(sfmmup->sfmmu_scdp == NULL ||
+ sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL);
+
+ tsbord[0] = 0;
+ if (sfmmup->sfmmu_scdp == NULL) {
+ tsbord[1] = 1;
+ } else {
+ struct tsb_info *scd8ktsbp =
+ sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb;
+ ulong_t shared_4mttecnt = 0;
+ ulong_t priv_4mttecnt = 0;
+ int scd4mtsb = (scd8ktsbp->tsb_next != NULL);
+
+ for (i = TTE4M; i < MMU_PAGE_SIZES; i++) {
+ if (scd4mtsb) {
+ shared_4mttecnt +=
+ sfmmup->sfmmu_scdismttecnt[i] +
+ sfmmup->sfmmu_scdrttecnt[i];
+ }
+ if (tsbinfop->tsb_next != NULL) {
+ priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] +
+ sfmmup->sfmmu_ismttecnt[i];
+ }
+ }
+ if (tsbinfop->tsb_next == NULL) {
+ if (shared_4mttecnt) {
+ tsbord[1] = 2;
+ tsbord[2] = 1;
+ } else {
+ tsbord[1] = 1;
+ tsbord[2] = 2;
+ }
+ } else if (priv_4mttecnt) {
+ if (shared_4mttecnt) {
+ tsbord[1] = shtsb4m_first ? 2 : 1;
+ tsbord[2] = 3;
+ tsbord[3] = shtsb4m_first ? 1 : 2;
+ } else {
+ tsbord[1] = 1;
+ tsbord[2] = 2;
+ tsbord[3] = 3;
+ }
+ } else if (shared_4mttecnt) {
+ tsbord[1] = 3;
+ tsbord[2] = 2;
+ tsbord[3] = 1;
+ } else {
+ tsbord[1] = 2;
+ tsbord[2] = 1;
+ tsbord[3] = 3;
+ }
+ }
+
+ ASSERT(tsbinfop != NULL);
+ for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) {
+ if (i == 0) {
+ tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i];
+ sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp);
+ }
+
+
+ j = tsbord[i];
+
+ tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j];
+
+ ASSERT(tsbinfop->tsb_ttesz_mask != 0);
+ tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1;
+ tdp->hvtsb_assoc = 1;
+ tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc);
+ tdp->hvtsb_ctx_index = scd;
+ tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask;
+ tdp->hvtsb_rsvd = 0;
+ tdp->hvtsb_pa = tsbinfop->tsb_pa;
+
+ tsbinfop = tsbinfop->tsb_next;
+ if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) {
+ tsbinfop =
+ sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb;
+ scd = 1;
+ }
+ }
+ sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i;
+ ASSERT(tsbinfop == NULL);
}
/*
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.h b/usr/src/uts/sun4v/vm/mach_sfmmu.h
index e91c4cdd0f..06ae00fc32 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.h
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.h
@@ -52,7 +52,7 @@ extern "C" {
/*
* Hypervisor TSB info
*/
-#define NHV_TSB_INFO 2
+#define NHV_TSB_INFO 4
#ifndef _ASM
@@ -67,6 +67,20 @@ struct hv_tsb_block {
#ifdef _ASM
/*
+ * This macro is used to set private/shared secondary context register in
+ * sfmmu_alloc_ctx().
+ * Input:
+ * cnum = cnum
+ * is_shctx = sfmmu private/shared flag (0: private, 1: shared)
+ */
+#define SET_SECCTX(cnum, is_shctx, tmp1, tmp2) \
+ mov MMU_SCONTEXT, tmp1; \
+ movrnz is_shctx, MMU_SCONTEXT1, tmp1; \
+ sethi %hi(FLUSH_ADDR), tmp2; \
+ stxa cnum, [tmp1]ASI_MMU_CTX; /* set 2nd ctx reg. */ \
+ flush tmp2; \
+
+/*
* This macro is used in the MMU code to check if TL should be lowered from
* 2 to 1 to pop trapstat's state. See the block comment in trapstat.c
* for details.
@@ -382,10 +396,13 @@ label/**/2: \
/*
- * Load TSB base register into a dedicated scratchpad register.
+ * Load TSB base register into a dedicated scratchpad register
+ * for private contexts.
+ * Load TSB base register to TSBMISS area for shared contexts.
* This register contains utsb_pabase in bits 63:13, and TSB size
* code in bits 2:0.
*
+ * For private context
* In:
* tsbreg = value to load (ro)
* regnum = constant or register
@@ -399,7 +416,24 @@ label/**/2: \
stxa tsbreg, [tmp1]ASI_SCRATCHPAD /* save tsbreg */
/*
- * Get TSB base register from the scratchpad
+ * Load TSB base register to TSBMISS area for shared contexts.
+ * This register contains utsb_pabase in bits 63:13, and TSB size
+ * code in bits 2:0.
+ *
+ * In:
+ * tsbmiss = pointer to tsbmiss area
+ * tsbmissoffset = offset to right tsb pointer
+ * tsbreg = value to load (ro)
+ * Out:
+ * Specified tsbmiss area updated
+ *
+ */
+#define SET_UTSBREG_SHCTX(tsbmiss, tsbmissoffset, tsbreg) \
+ stx tsbreg, [tsbmiss + tsbmissoffset] /* save tsbreg */
+
+/*
+ * Get TSB base register from the scratchpad for
+ * private contexts
*
* In:
* regnum = constant or register
@@ -411,6 +445,20 @@ label/**/2: \
mov regnum, tsbreg; \
ldxa [tsbreg]ASI_SCRATCHPAD, tsbreg
+/*
+ * Get TSB base register from the scratchpad for
+ * shared contexts
+ *
+ * In:
+ * tsbmiss = pointer to tsbmiss area
+ * tsbmissoffset = offset to right tsb pointer
+ * tsbreg = scratch
+ * Out:
+ * tsbreg = tsbreg from the specified scratchpad register
+ */
+#define GET_UTSBREG_SHCTX(tsbmiss, tsbmissoffset, tsbreg) \
+ ldx [tsbmiss + tsbmissoffset], tsbreg
+
/*
* Get the location of the TSB entry in the first TSB to probe
@@ -563,6 +611,125 @@ label/**/1: \
/* END CSTYLED */
+/*
+ * Get the location in the 3rd TSB of the tsbe for this fault.
+ * The 3rd TSB corresponds to the shared context, and is used
+ * for 8K - 512k pages.
+ *
+ * In:
+ * tagacc = tag access register (not clobbered)
+ * tsbe = TSB base register
+ * tmp1, tmp2 = scratch registers
+ * Out:
+ * tsbe = pointer to the tsbe in the 3rd TSB
+ */
+#define GET_3RD_TSBE_PTR(tagacc, tsbe, tmp1, tmp2) \
+ and tsbe, TSB_SOFTSZ_MASK, tmp2; /* tmp2=szc */ \
+ andn tsbe, TSB_SOFTSZ_MASK, tsbe; /* tsbbase */ \
+ mov TSB_ENTRIES(0), tmp1; /* nentries in TSB size 0 */ \
+ sllx tmp1, tmp2, tmp1; /* tmp1 = nentries in TSB */ \
+ sub tmp1, 1, tmp1; /* mask = nentries - 1 */ \
+ srlx tagacc, MMU_PAGESHIFT, tmp2; \
+ and tmp2, tmp1, tmp1; /* tsbent = virtpage & mask */ \
+ sllx tmp1, TSB_ENTRY_SHIFT, tmp1; /* entry num --> ptr */ \
+ add tsbe, tmp1, tsbe /* add entry offset to TSB base */
+
+
+/*
+ * Get the location in the 4th TSB of the tsbe for this fault.
+ * The 4th TSB is for the shared context. It is used for 4M - 256M pages.
+ *
+ * In:
+ * tagacc = tag access register (not clobbered)
+ * tsbe = TSB base register
+ * tmp1, tmp2 = scratch registers
+ * Out:
+ * tsbe = pointer to the tsbe in the 4th TSB
+ */
+#define GET_4TH_TSBE_PTR(tagacc, tsbe, tmp1, tmp2) \
+ and tsbe, TSB_SOFTSZ_MASK, tmp2; /* tmp2=szc */ \
+ andn tsbe, TSB_SOFTSZ_MASK, tsbe; /* tsbbase */ \
+ mov TSB_ENTRIES(0), tmp1; /* nentries in TSB size 0 */ \
+ sllx tmp1, tmp2, tmp1; /* tmp1 = nentries in TSB */ \
+ sub tmp1, 1, tmp1; /* mask = nentries - 1 */ \
+ srlx tagacc, MMU_PAGESHIFT4M, tmp2; \
+ and tmp2, tmp1, tmp1; /* tsbent = virtpage & mask */ \
+ sllx tmp1, TSB_ENTRY_SHIFT, tmp1; /* entry num --> ptr */ \
+ add tsbe, tmp1, tsbe /* add entry offset to TSB base */
+
+/*
+ * Copy the sfmmu_region_map or scd_region_map to the tsbmiss
+ * shmermap or scd_shmermap, from sfmmu_load_mmustate.
+ */
+#define SET_REGION_MAP(rgn_map, tsbmiss_map, cnt, tmp, label) \
+ /* BEGIN CSTYLED */ \
+label: ;\
+ ldx [rgn_map], tmp ;\
+ dec cnt ;\
+ add rgn_map, CLONGSIZE, rgn_map ;\
+ stx tmp, [tsbmiss_map] ;\
+ brnz,pt cnt, label ;\
+ add tsbmiss_map, CLONGSIZE, tsbmiss_map \
+ /* END CSTYLED */
+
+/*
+ * If there is no scd, then zero the tsbmiss scd_shmermap,
+ * from sfmmu_load_mmustate.
+ */
+#define ZERO_REGION_MAP(tsbmiss_map, cnt, label) \
+ /* BEGIN CSTYLED */ \
+label: ;\
+ dec cnt ;\
+ stx %g0, [tsbmiss_map] ;\
+ brnz,pt cnt, label ;\
+ add tsbmiss_map, CLONGSIZE, tsbmiss_map \
+ /* END CSTYLED */
+
+/*
+ * Set hmemisc to 1 if the shared hme is also part of an scd.
+ * In:
+ * tsbarea = tsbmiss area (not clobbered)
+ * hmeblkpa = hmeblkpa + hmentoff + SFHME_TTE (not clobbered)
+ * hmentoff = hmentoff + SFHME_TTE = tte offset(clobbered)
+ * Out:
+ * use_shctx = 1 if shme is in scd and 0 otherwise
+ */
+#define GET_SCDSHMERMAP(tsbarea, hmeblkpa, hmentoff, use_shctx) \
+ /* BEGIN CSTYLED */ \
+ sub hmeblkpa, hmentoff, hmentoff /* hmentofff = hmeblkpa */ ;\
+ add hmentoff, HMEBLK_TAG, hmentoff ;\
+ ldxa [hmentoff]ASI_MEM, hmentoff /* read 1st part of tag */ ;\
+ and hmentoff, HTAG_RID_MASK, hmentoff /* mask off rid */ ;\
+ and hmentoff, BT_ULMASK, use_shctx /* mask bit index */ ;\
+ srlx hmentoff, BT_ULSHIFT, hmentoff /* extract word */ ;\
+ sllx hmentoff, CLONGSHIFT, hmentoff /* index */ ;\
+ add tsbarea, hmentoff, hmentoff /* add to tsbarea */ ;\
+ ldx [hmentoff + TSBMISS_SCDSHMERMAP], hmentoff /* scdrgn */ ;\
+ srlx hmentoff, use_shctx, use_shctx ;\
+ and use_shctx, 0x1, use_shctx \
+ /* END CSTYLED */
+
+/*
+ * 1. Get ctx1. The traptype is supplied by caller.
+ * 2. If iTSB miss, store in MMFSA_I_CTX
+ * 3. if dTSB miss, store in MMFSA_D_CTX
+ * 4. Thus the [D|I]TLB_STUFF will work as expected.
+ */
+#define SAVE_CTX1(traptype, ctx1, tmp, label) \
+ /* BEGIN CSTYLED */ \
+ mov MMU_SCONTEXT1, tmp ;\
+ ldxa [tmp]ASI_MMU_CTX, ctx1 ;\
+ MMU_FAULT_STATUS_AREA(tmp) ;\
+ cmp traptype, FAST_IMMU_MISS_TT ;\
+ be,a,pn %icc, label ;\
+ stx ctx1, [tmp + MMFSA_I_CTX] ;\
+ cmp traptype, T_INSTR_MMU_MISS ;\
+ be,a,pn %icc, label ;\
+ stx ctx1, [tmp + MMFSA_I_CTX] ;\
+ stx ctx1, [tmp + MMFSA_D_CTX] ;\
+label:
+ /* END CSTYLED */
+
#endif /* _ASM */
#ifdef __cplusplus
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
index bbec4ee675..0dc3dc5f44 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
@@ -73,7 +73,7 @@ sfmmu_getctx_sec()
/* ARGSUSED */
void
-sfmmu_setctx_sec(int ctx)
+sfmmu_setctx_sec(uint_t ctx)
{}
/* ARGSUSED */
@@ -154,8 +154,8 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
ta FAST_TRAP
brz,pt %o0, 5f
nop
- ba panic_bad_hcall
- mov MMU_DEMAP_ALL, %o1
+ ba ptl1_panic /* bad HV call */
+ mov PTL1_BAD_RAISE_TSBEXCP, %g1
5:
mov %g3, %o0
mov %g4, %o1
@@ -245,17 +245,17 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
stxa %o0, [%o1]ASI_MMU_CTX /* set 2nd context reg. */
flush %o4
- /*
- * if the routine is entered with intr enabled, then enable intr now.
- * otherwise, keep intr disabled, return without enabing intr.
- * %g1 - old intr state
- */
- btst PSTATE_IE, %g1
- bnz,a,pt %icc, 2f
- wrpr %g0, %g1, %pstate /* enable interrupts */
-2: retl
- nop
- SET_SIZE(sfmmu_setctx_sec)
+ /*
+ * if the routine is entered with intr enabled, then enable intr now.
+ * otherwise, keep intr disabled, return without enabing intr.
+ * %g1 - old intr state
+ */
+ btst PSTATE_IE, %g1
+ bnz,a,pt %icc, 2f
+ wrpr %g0, %g1, %pstate /* enable interrupts */
+2: retl
+ nop
+ SET_SIZE(sfmmu_setctx_sec)
/*
* set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS.
@@ -285,9 +285,36 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
sethi %hi(ksfmmup), %o3
ldx [%o3 + %lo(ksfmmup)], %o3
cmp %o3, %o0
- be,pn %xcc, 3f ! if kernel as, do nothing
+ be,pn %xcc, 7f ! if kernel as, do nothing
+ nop
+
+ set MMU_SCONTEXT, %o3
+ ldxa [%o3]ASI_MMU_CTX, %o5
+
+ cmp %o5, INVALID_CONTEXT ! ctx is invalid?
+ bne,pt %icc, 1f
nop
+ CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area
+ stx %o0, [%o2 + TSBMISS_UHATID]
+ stx %g0, [%o2 + TSBMISS_SHARED_UHATID]
+#ifdef DEBUG
+ /* check if hypervisor/hardware should handle user TSB */
+ sethi %hi(hv_use_non0_tsb), %o2
+ ld [%o2 + %lo(hv_use_non0_tsb)], %o2
+ brz,pn %o2, 0f
+ nop
+#endif /* DEBUG */
+ clr %o0 ! ntsb = 0 for invalid ctx
+ clr %o1 ! HV_TSB_INFO_PA = 0 if inv ctx
+ mov MMU_TSB_CTXNON0, %o5
+ ta FAST_TRAP ! set TSB info for user process
+ brnz,a,pn %o0, panic_bad_hcall
+ mov MMU_TSB_CTXNON0, %o1
+0:
+ retl
+ nop
+1:
/*
* We need to set up the TSB base register, tsbmiss
* area, and pass the TSB information into the hypervisor
@@ -307,52 +334,106 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
2:
SET_UTSBREG(SCRATCHPAD_UTSBREG2, %o2, %o3)
+ /* make 3rd and 4th TSB */
+ CPU_TSBMISS_AREA(%o4, %o3) ! %o4 = tsbmiss area
+
+ ldx [%o0 + SFMMU_SCDP], %g2 ! %g2 = sfmmu_scd
+ brz,pt %g2, 3f
+ mov -1, %o2 ! use -1 if no third TSB
+
+ ldx [%g2 + SCD_SFMMUP], %g3 ! %g3 = scdp->scd_sfmmup
+ ldx [%g3 + SFMMU_TSB], %o1 ! %o1 = first scd tsbinfo
+ brz,pn %o1, 9f
+ nop ! panic if no third TSB
+
+ /* make 3rd UTSBREG */
+ MAKE_UTSBREG(%o1, %o2, %o3) ! %o2 = user tsbreg
+3:
+ SET_UTSBREG_SHCTX(%o4, TSBMISS_TSBSCDPTR, %o2)
+
+ brz,pt %g2, 4f
+ mov -1, %o2 ! use -1 if no 3rd or 4th TSB
+
+ brz,pt %o1, 4f
+ mov -1, %o2 ! use -1 if no 3rd or 4th TSB
+ ldx [%o1 + TSBINFO_NEXTPTR], %g2 ! %g2 = second scd tsbinfo
+ brz,pt %g2, 4f
+ mov -1, %o2 ! use -1 if no 4th TSB
+
+ /* make 4th UTSBREG */
+ MAKE_UTSBREG(%g2, %o2, %o3) ! %o2 = user tsbreg
+4:
+ SET_UTSBREG_SHCTX(%o4, TSBMISS_TSBSCDPTR4M, %o2)
+
#ifdef DEBUG
/* check if hypervisor/hardware should handle user TSB */
sethi %hi(hv_use_non0_tsb), %o2
ld [%o2 + %lo(hv_use_non0_tsb)], %o2
- brz,pn %o2, 5f
+ brz,pn %o2, 6f
nop
#endif /* DEBUG */
CPU_ADDR(%o2, %o4) ! load CPU struct addr to %o2 using %o4
ldub [%o2 + CPU_TSTAT_FLAGS], %o1 ! load cpu_tstat_flag to %o1
-
- /*
- * %o0 = sfmmup
- * %o2 = returned sfmmu cnum on this CPU
- * %o4 = scratch
- */
- SFMMU_CPU_CNUM(%o0, %o2, %o4)
- mov %o5, %o4 ! preserve %o5 for resume
mov %o0, %o3 ! preserve %o0
btst TSTAT_TLB_STATS, %o1
- bnz,a,pn %icc, 4f ! ntsb = 0 if TLB stats enabled
+ bnz,a,pn %icc, 5f ! ntsb = 0 if TLB stats enabled
clr %o0
- cmp %o2, INVALID_CONTEXT
- be,a,pn %icc, 4f
- clr %o0 ! ntsb = 0 for invalid ctx
+
ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_CNT], %o0
-4:
- ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_PA], %o1
+5:
+ ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_PA], %o1
mov MMU_TSB_CTXNON0, %o5
ta FAST_TRAP ! set TSB info for user process
brnz,a,pn %o0, panic_bad_hcall
mov MMU_TSB_CTXNON0, %o1
mov %o3, %o0 ! restore %o0
- mov %o4, %o5 ! restore %o5
-5:
+6:
ldx [%o0 + SFMMU_ISMBLKPA], %o1 ! copy members of sfmmu
- CPU_TSBMISS_AREA(%o2, %o3) ! we need to access from
+ CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area
stx %o1, [%o2 + TSBMISS_ISMBLKPA] ! sfmmu_tsb_miss into the
- lduh [%o0 + SFMMU_FLAGS], %o3 ! per-CPU tsbmiss area.
+ ldub [%o0 + SFMMU_TTEFLAGS], %o3 ! per-CPU tsbmiss area.
+ ldub [%o0 + SFMMU_RTTEFLAGS], %o4
+ ldx [%o0 + SFMMU_SRDP], %o1
stx %o0, [%o2 + TSBMISS_UHATID]
- stuh %o3, [%o2 + TSBMISS_HATFLAGS]
-
-3: retl
+ stub %o3, [%o2 + TSBMISS_UTTEFLAGS]
+ stub %o4, [%o2 + TSBMISS_URTTEFLAGS]
+ stx %o1, [%o2 + TSBMISS_SHARED_UHATID]
+ brz,pn %o1, 7f ! check for sfmmu_srdp
+ add %o0, SFMMU_HMERMAP, %o1
+ add %o2, TSBMISS_SHMERMAP, %o2
+ mov SFMMU_HMERGNMAP_WORDS, %o3
+ ! set tsbmiss shmermap
+ SET_REGION_MAP(%o1, %o2, %o3, %o4, load_shme_mmustate)
+
+ ldx [%o0 + SFMMU_SCDP], %o4 ! %o4 = sfmmu_scd
+ CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area
+ mov SFMMU_HMERGNMAP_WORDS, %o3
+ brnz,pt %o4, 8f ! check for sfmmu_scdp else
+ add %o2, TSBMISS_SCDSHMERMAP, %o2 ! zero tsbmiss scd_shmermap
+ ZERO_REGION_MAP(%o2, %o3, zero_scd_mmustate)
+7:
+ retl
nop
- SET_SIZE(sfmmu_load_mmustate)
+8: ! set tsbmiss scd_shmermap
+ add %o4, SCD_HMERMAP, %o1
+ SET_REGION_MAP(%o1, %o2, %o3, %o4, load_scd_mmustate)
+ retl
+ nop
+9:
+ sethi %hi(panicstr), %g1 ! panic if no 3rd TSB
+ ldx [%g1 + %lo(panicstr)], %g1
+ tst %g1
+
+ bnz,pn %xcc, 7b
+ nop
+
+ sethi %hi(sfmmu_panic10), %o0
+ call panic
+ or %o0, %lo(sfmmu_panic10), %o0
+ SET_SIZE(sfmmu_load_mmustate)
+
#endif /* lint */
#if defined(lint)