diff options
| author | paulsan <none@none> | 2007-06-24 05:26:50 -0700 |
|---|---|---|
| committer | paulsan <none@none> | 2007-06-24 05:26:50 -0700 |
| commit | 05d3dc4b6755c54754109ffbe7e792f4e5b7c7c9 (patch) | |
| tree | ae209198d4e61ebc1c922cacdc02a3614dca107b /usr | |
| parent | 8654d0253136055bd4cc2423d87378e8a37f2eb5 (diff) | |
| download | illumos-joyent-05d3dc4b6755c54754109ffbe7e792f4e5b7c7c9.tar.gz | |
PSARC 2006/266 Shared Context Support
PSARC 2006/267 Shared Region HME Block support
6388600 do_virtual_coloring checks should be expunged from sfmmu
6449192 Integrate support for MMU Shared Contexts
6449195 Integrate support for Shared Region HME Blocks
Diffstat (limited to 'usr')
42 files changed, 5909 insertions, 1127 deletions
diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs index d4e71d41ff..d35a72689b 100644 --- a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs +++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -489,6 +489,10 @@ save_sfmmu_global_stat(HV *self, kstat_t *kp, int strip_str) SAVE_INT32(self, sfmmugp, sf_tsb_alloc); SAVE_INT32(self, sfmmugp, sf_tsb_allocfail); SAVE_INT32(self, sfmmugp, sf_tsb_sectsb_create); + SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_alloc); + SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_alloc); + SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_allocfail); + SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_allocfail); SAVE_INT32(self, sfmmugp, sf_tteload8k); SAVE_INT32(self, sfmmugp, sf_tteload64k); SAVE_INT32(self, sfmmugp, sf_tteload512k); @@ -530,6 +534,11 @@ save_sfmmu_global_stat(HV *self, kstat_t *kp, int strip_str) SAVE_INT32(self, sfmmugp, sf_user_vtop); SAVE_INT32(self, sfmmugp, sf_ctx_inv); SAVE_INT32(self, sfmmugp, sf_tlb_reprog_pgsz); + SAVE_INT32(self, sfmmugp, sf_region_remap_demap); + SAVE_INT32(self, sfmmugp, sf_create_scd); + SAVE_INT32(self, sfmmugp, sf_join_scd); + SAVE_INT32(self, sfmmugp, sf_leave_scd); + SAVE_INT32(self, sfmmugp, sf_destroy_scd); } #endif diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index a17678863b..652a01c34c 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -623,6 +623,7 @@ gexec( args->stk_prot &= ~PROT_EXEC; args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */ + args->ex_vp = vp; /* * Traditionally, the setid flags told the sub processes whether @@ -1819,6 +1820,7 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) if (p->p_model == DATAMODEL_ILP32) as->a_userlimit = (caddr_t)USERLIMIT32; (void) hat_setup(as->a_hat, HAT_ALLOC); + hat_join_srd(as->a_hat, args->ex_vp); /* * Finally, write out the contents of the new stack. diff --git a/usr/src/uts/common/os/mem_cage.c b/usr/src/uts/common/os/mem_cage.c index 97f4cce08c..beb2fe3cbe 100644 --- a/usr/src/uts/common/os/mem_cage.c +++ b/usr/src/uts/common/os/mem_cage.c @@ -1676,7 +1676,7 @@ kcage_cageout() int last_pass; int pages_skipped; int shared_skipped; - uint_t shared_level = 8; + ulong_t shared_level = 8; pgcnt_t nfreed; #ifdef KCAGE_STATS clock_t scan_start; @@ -1807,7 +1807,7 @@ again: } KCAGE_STAT_SET_SCAN(kt_skiplevel, shared_level); - if (hat_page_getshare(pp) > shared_level) { + if (hat_page_checkshare(pp, shared_level)) { page_unlock(pp); pages_skipped = 1; shared_skipped = 1; diff --git a/usr/src/uts/common/os/vm_pageout.c b/usr/src/uts/common/os/vm_pageout.c index 6a514e0174..e5c80e9bfd 100644 --- a/usr/src/uts/common/os/vm_pageout.c +++ b/usr/src/uts/common/os/vm_pageout.c @@ -950,7 +950,7 @@ checkpage(struct page *pp, int whichhand) */ top: if ((PP_ISKAS(pp)) || (PP_ISFREE(pp)) || - (hat_page_getshare(pp) > po_share) || PAGE_LOCKED(pp)) { + hat_page_checkshare(pp, po_share) || PAGE_LOCKED(pp)) { return (-1); } diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h index d1243a926f..a7ebf7dbb0 100644 --- a/usr/src/uts/common/sys/exec.h +++ b/usr/src/uts/common/sys/exec.h @@ -104,6 +104,7 @@ typedef struct uarg { struct execsw *execswp; uintptr_t entry; uintptr_t thrptr; + vnode_t *ex_vp; char *emulator; char *brandname; char *auxp_brand_phdr; /* addr of brand phdr auxv on user stack */ diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h index 6192e1aacb..f19b3f59c6 100644 --- a/usr/src/uts/common/vm/hat.h +++ b/usr/src/uts/common/vm/hat.h @@ -80,6 +80,8 @@ typedef struct hat_callback { void *hcb_data; } hat_callback_t; +typedef void *hat_region_cookie_t; + #ifdef _KERNEL /* @@ -205,9 +207,16 @@ void hat_thread_exit(kthread_t *); void hat_memload(struct hat *, caddr_t, struct page *, uint_t, uint_t); void hat_memload_array(struct hat *, caddr_t, size_t, struct page **, uint_t, uint_t); +void hat_memload_region(struct hat *, caddr_t, struct page *, uint_t, + uint_t, hat_region_cookie_t); +void hat_memload_array_region(struct hat *, caddr_t, size_t, struct page **, + uint_t, uint_t, hat_region_cookie_t); void hat_devload(struct hat *, caddr_t, size_t, pfn_t, uint_t, int); + void hat_unlock(struct hat *, caddr_t, size_t); +void hat_unlock_region(struct hat *, caddr_t, size_t, hat_region_cookie_t); + void hat_unload(struct hat *, caddr_t, size_t, uint_t); void hat_unload_callback(struct hat *, caddr_t, size_t, uint_t, hat_callback_t *); @@ -293,6 +302,7 @@ uint_t hat_page_getattr(struct page *, uint_t); int hat_pageunload(struct page *, uint_t); uint_t hat_pagesync(struct page *, uint_t); ulong_t hat_page_getshare(struct page *); +int hat_page_checkshare(struct page *, ulong_t); faultcode_t hat_softlock(struct hat *, caddr_t, size_t *, struct page **, uint_t); void hat_page_demote(struct page *); @@ -303,7 +313,8 @@ void hat_page_demote(struct page *); enum hat_features { HAT_SHARED_PT, /* Shared page tables */ HAT_DYNAMIC_ISM_UNMAP, /* hat_pageunload() handles ISM pages */ - HAT_VMODSORT /* support for VMODSORT flag of vnode */ + HAT_VMODSORT, /* support for VMODSORT flag of vnode */ + HAT_SHARED_REGIONS /* shared regions support */ }; int hat_supported(enum hat_features, void *); @@ -445,6 +456,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t); */ #define HAT_DUP_ALL 1 #define HAT_DUP_COW 2 +#define HAT_DUP_SRD 3 /* @@ -600,6 +612,28 @@ extern struct hrmstat **hrm_hashtab; void hat_enter(struct hat *); void hat_exit(struct hat *); +typedef void (*hat_rgn_cb_func_t)(caddr_t, caddr_t, caddr_t, + size_t, void *, u_offset_t); + +void hat_join_srd(struct hat *, vnode_t *); + +hat_region_cookie_t hat_join_region(struct hat *, caddr_t, size_t, void *, + u_offset_t, uchar_t, uchar_t, hat_rgn_cb_func_t, + uint_t); +void hat_leave_region(struct hat *, hat_region_cookie_t, + uint_t); +void hat_dup_region(struct hat *, hat_region_cookie_t); + +#define HAT_INVALID_REGION_COOKIE ((hat_region_cookie_t)-1) +#define HAT_IS_REGION_COOKIE_VALID(c) ((c) != HAT_INVALID_REGION_COOKIE) + +/* hat_join_region() flags */ + +#define HAT_REGION_TEXT 0x1 /* passed by segvn */ +#define HAT_REGION_ISM 0x2 /* for hat_share()/hat_unshare() */ + +#define HAT_REGION_TYPE_MASK (0x7) + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index d694d68d7d..1f8f0c9173 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -633,10 +633,10 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) npages = btop(len); - hat_flags = HAT_UNLOAD_UNLOCK; + hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP; if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) || (sptd->spt_flags & SHM_PAGEABLE)) { - hat_flags = HAT_UNLOAD; + hat_flags = HAT_UNLOAD_UNMAP; } hat_unload(seg->s_as->a_hat, addr, len, hat_flags); @@ -679,7 +679,7 @@ segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) * permanent lock on it and invalidate the page. */ if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { - if (hat_flags == HAT_UNLOAD) + if (hat_flags == HAT_UNLOAD_UNMAP) pp = page_lookup(vp, off, SE_EXCL); else { if ((pp = page_find(vp, off)) == NULL) { diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 4a63a73857..8240747290 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -195,6 +195,9 @@ static struct seg *segvn_split_seg(struct seg *, caddr_t); static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t, ulong_t, uint_t); +static void segvn_hat_rgn_unload_callback(caddr_t, caddr_t, caddr_t, + size_t, void *, u_offset_t); + static int segvn_pp_lock_anonpages(page_t *, int); static void segvn_pp_unlock_anonpages(page_t *, int); @@ -298,6 +301,8 @@ ulong_t segvn_faultvnmpss_align_err4; ulong_t segvn_faultvnmpss_align_err5; ulong_t segvn_vmpss_pageio_deadlk_err; +int segvn_use_regions = 1; + /* * Segvn supports text replication optimization for NUMA platforms. Text * replica's are represented by anon maps (amp). There's one amp per text file @@ -407,6 +412,21 @@ segvn_init(void) if (segvn_maxpgszc == 0 || segvn_maxpgszc > maxszc) segvn_maxpgszc = maxszc; + if (segvn_use_regions && !hat_supported(HAT_SHARED_REGIONS, NULL)) + segvn_use_regions = 0; + + /* + * For now shared regions and text replication segvn support + * are mutually exclusive. This is acceptable because + * currently significant benefit from text replication was + * only observed on AMD64 NUMA platforms (due to relatively + * small L2$ size) and currently we don't support shared + * regions on x86. + */ + if (segvn_use_regions && !segvn_disable_textrepl) { + segvn_disable_textrepl = 1; + } + if (lgrp_optimizations() && textrepl_size_thresh != (size_t)-1 && !segvn_disable_textrepl) { ulong_t i; @@ -476,9 +496,9 @@ segvn_create(struct seg *seg, void *argsp) int error = 0; size_t pgsz; lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT; + int use_rgn = 0; int trok = 0; - ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) { @@ -495,6 +515,12 @@ segvn_create(struct seg *seg, void *argsp) /*NOTREACHED*/ } + if (a->type == MAP_PRIVATE && (a->flags & MAP_TEXT) && + a->vp != NULL && a->prot == (PROT_USER | PROT_READ | PROT_EXEC) && + segvn_use_regions) { + use_rgn = 1; + } + /* MAP_NORESERVE on a MAP_SHARED segment is meaningless. */ if (a->type == MAP_SHARED) a->flags &= ~MAP_NORESERVE; @@ -548,8 +574,13 @@ segvn_create(struct seg *seg, void *argsp) /* * Reserve any mapping structures that may be required. + * + * Don't do it for segments that may use regions. It's currently a + * noop in the hat implementations anyway. */ - hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP); + if (!use_rgn) { + hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP); + } if (a->cred) { cred = a->cred; @@ -571,10 +602,15 @@ segvn_create(struct seg *seg, void *argsp) seg, swresv, 0); } crfree(cred); - hat_unload(seg->s_as->a_hat, seg->s_base, - seg->s_size, HAT_UNLOAD_UNMAP); + if (!use_rgn) { + hat_unload(seg->s_as->a_hat, seg->s_base, + seg->s_size, HAT_UNLOAD_UNMAP); + } return (error); } + /* + * svntr_hashtab will be NULL if we support shared regions. + */ trok = ((a->flags & MAP_TEXT) && (seg->s_size > textrepl_size_thresh || (a->flags & _MAP_TEXTREPL)) && @@ -582,6 +618,8 @@ segvn_create(struct seg *seg, void *argsp) a->type == MAP_PRIVATE && swresv == 0 && !(a->flags & MAP_NORESERVE) && seg->s_as != &kas && a->vp->v_type == VREG); + + ASSERT(!trok || !use_rgn); } /* @@ -590,7 +628,7 @@ segvn_create(struct seg *seg, void *argsp) * explicit anon_map structure was supplied (e.g., SystemV shared * memory) or if we'll use text replication for this segment. */ - if (a->amp == NULL && !trok) { + if (a->amp == NULL && !use_rgn && !trok) { struct seg *pseg, *nseg; struct segvn_data *psvd, *nsvd; lgrp_mem_policy_t ppolicy, npolicy; @@ -730,6 +768,8 @@ segvn_create(struct seg *seg, void *argsp) svd->pageadvice = 0; svd->flags = (ushort_t)a->flags; svd->softlockcnt = 0; + svd->rcookie = HAT_INVALID_REGION_COOKIE; + if (a->szc != 0 && a->vp != NULL) { segvn_setvnode_mpss(a->vp); } @@ -858,6 +898,7 @@ segvn_create(struct seg *seg, void *argsp) ASSERT(seg->s_szc == 0); ASSERT(!IS_VMODSORT(pp->p_vnode)); + ASSERT(use_rgn == 0); hat_memload(seg->s_as->a_hat, addr, pp, svd->prot & ~PROT_WRITE, hat_flag); @@ -882,6 +923,15 @@ segvn_create(struct seg *seg, void *argsp) (void) lgrp_shm_policy_set(mpolicy, svd->amp, svd->anon_index, svd->vp, svd->offset, seg->s_size); + if (use_rgn) { + ASSERT(!trok); + ASSERT(svd->amp == NULL); + svd->rcookie = hat_join_region(seg->s_as->a_hat, seg->s_base, + seg->s_size, (void *)svd->vp, svd->offset, svd->prot, + (uchar_t)seg->s_szc, segvn_hat_rgn_unload_callback, + HAT_REGION_TEXT); + } + ASSERT(!trok || !(svd->prot & PROT_WRITE)); svd->tr_state = trok ? SEGVN_TR_INIT : SEGVN_TR_OFF; @@ -910,6 +960,11 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat) ASSERT(AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); ASSERT(seg1->s_ops == seg2->s_ops); + if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie) || + HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) { + return (-1); + } + /* both segments exist, try to merge them */ #define incompat(x) (svd1->x != svd2->x) if (incompat(vp) || incompat(maxprot) || @@ -968,26 +1023,22 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat) if ((nvpage = kmem_zalloc(nvpsize, KM_NOSLEEP)) == NULL) { return (-2); } + if (vpage1 != NULL) { bcopy(vpage1, nvpage, vpgtob(npages1)); - } - if (vpage2 != NULL) { - bcopy(vpage2, nvpage + npages1, vpgtob(npages2)); - } - for (vp = nvpage; vp < nvpage + npages1; vp++) { - if (svd2->pageprot && !svd1->pageprot) { + } else { + for (vp = nvpage; vp < nvpage + npages1; vp++) { VPP_SETPROT(vp, svd1->prot); - } - if (svd2->pageadvice && !svd1->pageadvice) { VPP_SETADVICE(vp, svd1->advice); } } - for (vp = nvpage + npages1; - vp < nvpage + npages1 + npages2; vp++) { - if (svd1->pageprot && !svd2->pageprot) { + + if (vpage2 != NULL) { + bcopy(vpage2, nvpage + npages1, vpgtob(npages2)); + } else { + for (vp = nvpage + npages1; + vp < nvpage + npages1 + npages2; vp++) { VPP_SETPROT(vp, svd2->prot); - } - if (svd1->pageadvice && !svd2->pageadvice) { VPP_SETADVICE(vp, svd2->advice); } } @@ -1126,6 +1177,10 @@ segvn_extend_prev(seg1, seg2, a, swresv) */ ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock)); + if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie)) { + return (-1); + } + /* second segment is new, try to extend first */ /* XXX - should also check cred */ if (svd1->vp != a->vp || svd1->maxprot != a->maxprot || @@ -1183,6 +1238,7 @@ segvn_extend_prev(seg1, seg2, a, swresv) ANON_LOCK_EXIT(&1->a_rwlock); } if (svd1->vpage != NULL) { + struct vpage *vp, *evp; new_vpage = kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), KM_NOSLEEP); @@ -1191,14 +1247,11 @@ segvn_extend_prev(seg1, seg2, a, swresv) bcopy(svd1->vpage, new_vpage, vpgtob(seg_pages(seg1))); kmem_free(svd1->vpage, vpgtob(seg_pages(seg1))); svd1->vpage = new_vpage; - if (svd1->pageprot) { - struct vpage *vp, *evp; - vp = new_vpage + seg_pages(seg1); - evp = vp + seg_pages(seg2); - for (; vp < evp; vp++) - VPP_SETPROT(vp, a->prot); - } + vp = new_vpage + seg_pages(seg1); + evp = vp + seg_pages(seg2); + for (; vp < evp; vp++) + VPP_SETPROT(vp, a->prot); } size = seg2->s_size; seg_free(seg2); @@ -1236,6 +1289,10 @@ segvn_extend_next( */ ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as, &seg2->s_as->a_lock)); + if (HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) { + return (-1); + } + /* first segment is new, try to extend second */ /* XXX - should also check cred */ if (svd2->vp != a->vp || svd2->maxprot != a->maxprot || @@ -1288,6 +1345,7 @@ segvn_extend_next( ANON_LOCK_EXIT(&2->a_rwlock); } if (svd2->vpage != NULL) { + struct vpage *vp, *evp; new_vpage = kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), KM_NOSLEEP); @@ -1301,14 +1359,11 @@ segvn_extend_next( vpgtob(seg_pages(seg2))); kmem_free(svd2->vpage, vpgtob(seg_pages(seg2))); svd2->vpage = new_vpage; - if (svd2->pageprot) { - struct vpage *vp, *evp; - vp = new_vpage; - evp = vp + seg_pages(seg1); - for (; vp < evp; vp++) - VPP_SETPROT(vp, a->prot); - } + vp = new_vpage; + evp = vp + seg_pages(seg1); + for (; vp < evp; vp++) + VPP_SETPROT(vp, a->prot); } size = seg1->s_size; seg_free(seg1); @@ -1379,10 +1434,14 @@ segvn_dup(struct seg *seg, struct seg *newseg) newsvd->flags = svd->flags; newsvd->softlockcnt = 0; newsvd->policy_info = svd->policy_info; + newsvd->rcookie = HAT_INVALID_REGION_COOKIE; + if ((amp = svd->amp) == NULL || svd->tr_state == SEGVN_TR_ON) { /* * Not attaching to a shared anon object. */ + ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie) || + svd->tr_state == SEGVN_TR_OFF); if (svd->tr_state == SEGVN_TR_ON) { ASSERT(newsvd->vp != NULL && amp != NULL); newsvd->tr_state = SEGVN_TR_INIT; @@ -1392,6 +1451,8 @@ segvn_dup(struct seg *seg, struct seg *newseg) newsvd->amp = NULL; newsvd->anon_index = 0; } else { + /* regions for now are only used on pure vnode segments */ + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); ASSERT(svd->tr_state == SEGVN_TR_OFF); newsvd->tr_state = SEGVN_TR_OFF; if (svd->type == MAP_SHARED) { @@ -1555,6 +1616,12 @@ retry: newsvd->maxprot, newsvd->type, newsvd->cred); } out: + if (error == 0 && HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(newsvd->amp == NULL); + ASSERT(newsvd->tr_state == SEGVN_TR_OFF); + newsvd->rcookie = svd->rcookie; + hat_dup_region(newseg->s_as->a_hat, newsvd->rcookie); + } return (error); } @@ -1566,6 +1633,30 @@ out: extern int free_pages; static void +segvn_hat_rgn_unload_callback(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr, + size_t r_size, void *r_obj, u_offset_t r_objoff) +{ + u_offset_t off; + size_t len; + vnode_t *vp = (vnode_t *)r_obj; + + ASSERT(eaddr > saddr); + ASSERT(saddr >= r_saddr); + ASSERT(saddr < r_saddr + r_size); + ASSERT(eaddr > r_saddr); + ASSERT(eaddr <= r_saddr + r_size); + ASSERT(vp != NULL); + + if (!free_pages) { + return; + } + + len = eaddr - saddr; + off = (saddr - r_saddr) + r_objoff; + free_vp_pages(vp, off, len); +} + +static void segvn_hat_unload_callback(hat_callback_t *cb) { struct seg *seg = cb->hcb_data; @@ -1582,7 +1673,6 @@ segvn_hat_unload_callback(hat_callback_t *cb) free_vp_pages(svd->vp, svd->offset + off, len); } - static int segvn_unmap(struct seg *seg, caddr_t addr, size_t len) { @@ -1599,7 +1689,6 @@ segvn_unmap(struct seg *seg, caddr_t addr, size_t len) size_t nsize; size_t oswresv; int reclaim = 1; - int unmap = 1; /* * We don't need any segment level locks for "segvn" data @@ -1641,7 +1730,19 @@ retry: int err; if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { ASSERT(seg->s_base != addr || seg->s_size != len); - if (svd->tr_state == SEGVN_TR_INIT) { + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + hat_leave_region(seg->s_as->a_hat, + svd->rcookie, HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; + /* + * could pass a flag to segvn_demote_range() + * below to tell it not to do any unloads but + * this case is rare enough to not bother for + * now. + */ + } else if (svd->tr_state == SEGVN_TR_INIT) { svd->tr_state = SEGVN_TR_OFF; } else if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->amp != NULL); @@ -1671,25 +1772,35 @@ retry: return (error); } - if (svd->tr_state == SEGVN_TR_INIT) { - svd->tr_state = SEGVN_TR_OFF; + /* + * Remove any page locks set through this mapping. + * If text replication is not off no page locks could have been + * established via this mapping. + */ + if (svd->tr_state == SEGVN_TR_OFF) { + (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0); + } + + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + ASSERT(svd->type == MAP_PRIVATE); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; } else if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->amp != NULL); ASSERT(svd->pageprot == 0 && !(svd->prot & PROT_WRITE)); segvn_textunrepl(seg, 1); ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF); - unmap = 0; - } - - /* - * Remove any page locks set through this mapping. - */ - (void) segvn_lockop(seg, addr, len, 0, MC_UNLOCK, NULL, 0); - - if (unmap) { + } else { + if (svd->tr_state != SEGVN_TR_OFF) { + ASSERT(svd->tr_state == SEGVN_TR_INIT); + svd->tr_state = SEGVN_TR_OFF; + } /* * Unload any hardware translations in the range to be taken - * out. Use a callback to invoke free_vp_pages() effectively. + * out. Use a callback to invoke free_vp_pages() effectively. */ if (svd->vp != NULL && free_pages != 0) { callback.hcb_data = seg; @@ -1892,6 +2003,7 @@ retry: nsvd->offset = svd->offset + (uintptr_t)(nseg->s_base - seg->s_base); nsvd->swresv = 0; nsvd->softlockcnt = 0; + ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE); if (svd->vp != NULL) { VN_HOLD(nsvd->vp); @@ -2033,6 +2145,8 @@ segvn_free(struct seg *seg) ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); ASSERT(svd->tr_state == SEGVN_TR_OFF); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); + /* * Be sure to unlock pages. XXX Why do things get free'ed instead * of unmapped? XXX @@ -2294,7 +2408,12 @@ segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) if ((amp = svd->amp) != NULL) anon_index = svd->anon_index + seg_page(seg, addr); - hat_unlock(seg->s_as->a_hat, addr, len); + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->tr_state == SEGVN_TR_OFF); + hat_unlock_region(seg->s_as->a_hat, addr, len, svd->rcookie); + } else { + hat_unlock(seg->s_as->a_hat, addr, len); + } for (adr = addr; adr < addr + len; adr += PAGESIZE) { if (amp != NULL) { ANON_LOCK_ENTER(&->a_rwlock, RW_READER); @@ -2453,6 +2572,7 @@ segvn_faultpage( ASSERT(SEGVN_READ_HELD(seg->s_as, &svd->lock)); ASSERT(seg->s_szc == 0); + ASSERT(svd->tr_state != SEGVN_TR_INIT); /* * Initialize protection value for this page. @@ -2616,6 +2736,7 @@ segvn_faultpage( mutex_exit(&p->p_lock); } + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); hat_memload(hat, addr, pp, prot, hat_flag); if (!(hat_flag & HAT_LOAD_LOCK)) @@ -2740,7 +2861,12 @@ segvn_faultpage( prot &= ~PROT_WRITE; } - hat_memload(hat, addr, opp, prot & vpprot, hat_flag); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE || + (!svd->pageprot && svd->prot == (prot & vpprot))); + ASSERT(amp == NULL || + svd->rcookie == HAT_INVALID_REGION_COOKIE); + hat_memload_region(hat, addr, opp, prot & vpprot, hat_flag, + svd->rcookie); if (!(hat_flag & HAT_LOAD_LOCK)) page_unlock(opp); @@ -2751,6 +2877,8 @@ segvn_faultpage( return (0); } + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); + hat_setref(opp); ASSERT(amp != NULL && anon_lock); @@ -2784,6 +2912,7 @@ segvn_faultpage( * after unloading our translation. */ if (hat_page_is_mapped(opp)) { + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); hat_unload(seg->s_as->a_hat, addr, PAGESIZE, HAT_UNLOAD); } @@ -2872,6 +3001,7 @@ segvn_faultpage( prot &= ~PROT_WRITE; } + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); hat_memload(hat, addr, pp, prot, hat_flag); if (!(hat_flag & HAT_LOAD_LOCK)) @@ -3642,6 +3772,7 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); ASSERT(seg->s_szc < NBBY * sizeof (int)); ASSERT(type != F_SOFTLOCK || lpgeaddr - a == maxpgsz); + ASSERT(svd->tr_state != SEGVN_TR_INIT); VM_STAT_COND_ADD(type == F_SOFTLOCK, segvnvmstats.fltvnpages[0]); VM_STAT_COND_ADD(type != F_SOFTLOCK, segvnvmstats.fltvnpages[1]); @@ -3962,6 +4093,8 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, * p_szc can't be changed for locked * swapfs pages. */ + ASSERT(svd->rcookie == + HAT_INVALID_REGION_COOKIE); hat_memload_array(hat, a, pgsz, ppa, prot, hat_flag); @@ -3976,9 +4109,12 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, goto next; } + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE || + (!svd->pageprot && svd->prot == (prot & vpprot))); + pfn = page_pptonum(ppa[0]); /* - * hat_page_demote() needs an EXCl lock on one of + * hat_page_demote() needs an SE_EXCL lock on one of * constituent page_t's and it decreases root's p_szc * last. This means if root's p_szc is equal szc and * all its constituent pages are locked @@ -4036,14 +4172,16 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot); if (!xhat) { - hat_memload_array(hat, a, pgsz, ppa, - prot & vpprot, hat_flag); + hat_memload_array_region(hat, a, pgsz, + ppa, prot & vpprot, hat_flag, + svd->rcookie); } else { /* * avoid large xhat mappings to FS * pages so that hat_page_demote() * doesn't need to check for xhat * large mappings. + * Don't use regions with xhats. */ for (i = 0; i < pages; i++) { hat_memload(hat, @@ -4149,14 +4287,15 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, prot, vpprot); if (upgrdfail && segvn_anypgsz_vnode) { /* SOFTLOCK case */ - hat_memload_array(hat, a, pgsz, - ppa, prot & vpprot, hat_flag); + hat_memload_array_region(hat, a, pgsz, + ppa, prot & vpprot, hat_flag, + svd->rcookie); } else { for (i = 0; i < pages; i++) { - hat_memload(hat, + hat_memload_region(hat, a + (i << PAGESHIFT), ppa[i], prot & vpprot, - hat_flag); + hat_flag, svd->rcookie); } } if (!(hat_flag & HAT_LOAD_LOCK)) { @@ -4214,8 +4353,8 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, } SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot); - hat_memload_array(hat, a, pgsz, ppa, - prot & vpprot, hat_flag); + hat_memload_array_region(hat, a, pgsz, ppa, + prot & vpprot, hat_flag, svd->rcookie); mutex_exit(szcmtx); if (!(hat_flag & HAT_LOAD_LOCK)) { for (i = 0; i < pages; i++) { @@ -4267,13 +4406,15 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, ASSERT(type == F_SOFTLOCK); for (i = 0; i < pages; i++) { ASSERT(ppa[i]->p_szc < szc); - hat_memload(hat, a + (i << PAGESHIFT), - ppa[i], prot & vpprot, hat_flag); + hat_memload_region(hat, + a + (i << PAGESHIFT), + ppa[i], prot & vpprot, hat_flag, + svd->rcookie); } } else { ASSERT(pplist != NULL || type == F_SOFTLOCK); - hat_memload_array(hat, a, pgsz, ppa, - prot & vpprot, hat_flag); + hat_memload_array_region(hat, a, pgsz, ppa, + prot & vpprot, hat_flag, svd->rcookie); } if (!(hat_flag & HAT_LOAD_LOCK)) { for (i = 0; i < pages; i++) { @@ -4452,6 +4593,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, ASSERT(type != F_SOFTUNLOCK); ASSERT(IS_P2ALIGNED(a, maxpgsz)); ASSERT(!brkcow || svd->tr_state == SEGVN_TR_OFF); + ASSERT(svd->tr_state != SEGVN_TR_INIT); ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); @@ -4559,6 +4701,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, if (lgrp_optimizations()) page_migrate(seg, a, ppa, pages); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); if (type == F_SOFTLOCK && svd->vp == NULL) { /* * All pages in ppa array belong to the same @@ -4769,6 +4912,7 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, int brkcow = BREAK_COW_SHARE(rw, type, svd->type); ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE); /* * First handle the easy stuff @@ -4788,6 +4932,8 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, return (0); } + ASSERT(svd->tr_state == SEGVN_TR_OFF || + !HAT_IS_REGION_COOKIE_VALID(svd->rcookie)); if (brkcow == 0) { if (svd->tr_state == SEGVN_TR_INIT) { SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); @@ -4804,6 +4950,13 @@ segvn_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, } } else if (svd->tr_state != SEGVN_TR_OFF) { SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); + + if (rw == S_WRITE && svd->tr_state != SEGVN_TR_OFF) { + ASSERT(!svd->pageprot && !(svd->prot & PROT_WRITE)); + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (FC_PROT); + } + if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->vp != NULL && svd->amp != NULL); segvn_textunrepl(seg, 0); @@ -4850,6 +5003,26 @@ top: } } + if (brkcow && HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + /* this must be SOFTLOCK S_READ fault */ + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + /* + * this must be the first ever non S_READ_NOCOW + * softlock for this segment. + */ + ASSERT(svd->softlockcnt == 0); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; + } + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + goto top; + } + /* * We can't allow the long term use of softlocks for vmpss segments, * because in some file truncation cases we should be able to demote @@ -4917,6 +5090,7 @@ top: * Check to see if we need to allocate an anon_map structure. */ if (svd->amp == NULL && (svd->vp == NULL || brkcow)) { + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); /* * Drop the "read" lock on the segment and acquire * the "write" version since we have to allocate the @@ -4977,6 +5151,7 @@ top: page = seg_page(seg, addr); if (amp != NULL) { + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); anon_index = svd->anon_index + page; if (type == F_PROT && rw == S_READ && @@ -5379,9 +5554,13 @@ slow: * for migration, so they will get migrated * properly on fault */ + ASSERT(amp == NULL || + svd->rcookie == HAT_INVALID_REGION_COOKIE); if ((prot & PROT_READ) && !PP_ISMIGRATE(pp)) { - hat_memload(hat, seg->s_base + diff, - pp, prot, hat_flag); + hat_memload_region(hat, + seg->s_base + diff, + pp, prot, hat_flag, + svd->rcookie); } } if (amp != NULL) @@ -5466,6 +5645,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) size_t pgsz; pgcnt_t pgcnt; anon_sync_obj_t cookie; + int unload_done = 0; ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); @@ -5500,12 +5680,20 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) } } - if (svd->tr_state == SEGVN_TR_INIT) { + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; + unload_done = 1; + } else if (svd->tr_state == SEGVN_TR_INIT) { svd->tr_state = SEGVN_TR_OFF; } else if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->amp != NULL); segvn_textunrepl(seg, 0); ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF); + unload_done = 1; } if ((prot & PROT_WRITE) && svd->type == MAP_SHARED && @@ -5513,7 +5701,6 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) ASSERT(vn_is_mapped(svd->vp, V_WRITE)); segvn_inval_trcache(svd->vp); } - if (seg->s_szc != 0) { int err; pgsz = page_get_pagesize(seg->s_szc); @@ -5590,7 +5777,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) } } - if (addr == seg->s_base && len == seg->s_size && svd->pageprot == 0) { + if (addr == seg->s_base && len == seg->s_size && svd->vpage == NULL) { if (svd->prot == prot) { SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); return (0); /* all done */ @@ -5613,6 +5800,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) * the operation. */ segvn_vpage(seg); + svd->pageprot = 1; if ((amp = svd->amp) != NULL) { anon_idx = svd->anon_index + seg_page(seg, addr); ASSERT(seg->s_szc == 0 || @@ -5699,6 +5887,10 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) * the translations to the things we've updated so far. */ if (svp != evp) { + if (unload_done) { + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (IE_NOMEM); + } len = (svp - &svd->vpage[seg_page(seg, addr)]) * PAGESIZE; ASSERT(seg->s_szc == 0 || IS_P2ALIGNED(len, pgsz)); @@ -5710,12 +5902,18 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) } } else { segvn_vpage(seg); + svd->pageprot = 1; evp = &svd->vpage[seg_page(seg, addr + len)]; for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) { VPP_SETPROT(svp, prot); } } + if (unload_done) { + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (0); + } + if (((prot & PROT_WRITE) != 0 && (svd->vp != NULL || svd->type == MAP_PRIVATE)) || (prot & ~PROT_USER) == PROT_NONE) { @@ -5848,7 +6046,13 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) } } - if (svd->tr_state == SEGVN_TR_INIT) { + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; + } else if (svd->tr_state == SEGVN_TR_INIT) { svd->tr_state = SEGVN_TR_OFF; } else if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->amp != NULL); @@ -5924,6 +6128,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) if (err != 0) { return (err); } + ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE); err = segvn_concat(seg, nseg, 1); if (err == -1) { return (EINVAL); @@ -6028,27 +6233,34 @@ segvn_clrszc(struct seg *seg) struct anon *ap, *oldap; uint_t prot = svd->prot, vpprot; int pageflag = 0; - int unmap = 1; ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) || SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); + ASSERT(svd->softlockcnt == 0); if (vp == NULL && amp == NULL) { + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); seg->s_szc = 0; return (0); } - if (svd->tr_state == SEGVN_TR_INIT) { - svd->tr_state = SEGVN_TR_OFF; + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; } else if (svd->tr_state == SEGVN_TR_ON) { ASSERT(svd->amp != NULL); segvn_textunrepl(seg, 1); ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF); amp = NULL; - unmap = 0; - } + } else { + if (svd->tr_state != SEGVN_TR_OFF) { + ASSERT(svd->tr_state == SEGVN_TR_INIT); + svd->tr_state = SEGVN_TR_OFF; + } - if (unmap) { /* * do HAT_UNLOAD_UNMAP since we are changing the pagesize. * unload argument is 0 when we are freeing the segment @@ -6223,6 +6435,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr) ASSERT(addr >= seg->s_base); ASSERT(addr <= seg->s_base + seg->s_size); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); if (addr == seg->s_base || addr == seg->s_base + seg->s_size) return (seg); @@ -6236,6 +6449,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr) nseg->s_data = (void *)nsvd; nseg->s_szc = seg->s_szc; *nsvd = *svd; + ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE); nsvd->seg = nseg; rw_init(&nsvd->lock, NULL, RW_DEFAULT, NULL); @@ -6369,6 +6583,7 @@ segvn_demote_range( ASSERT(seg->s_base != addr || seg->s_size != len); ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size); ASSERT(svd->softlockcnt == 0); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED)); CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); @@ -7328,6 +7543,7 @@ segvn_lockop(struct seg *seg, caddr_t addr, size_t len, * by lazily testing for its existence. */ if (op == MC_LOCK && svd->amp == NULL && svd->vp == NULL) { + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); svd->amp = anonmap_alloc(seg->s_size, 0, ANON_SLEEP); svd->amp->a_szc = seg->s_szc; } @@ -7681,7 +7897,8 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) * if don't need to do lgroup optimizations on this system */ - if ((behav == MADV_SEQUENTIAL && seg->s_szc != 0) || + if ((behav == MADV_SEQUENTIAL && + (seg->s_szc != 0 || HAT_IS_REGION_COOKIE_VALID(svd->rcookie))) || (!lgrp_optimizations() && (behav == MADV_ACCESS_DEFAULT || behav == MADV_ACCESS_LWP || behav == MADV_ACCESS_MANY))) { SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); @@ -7834,6 +8051,7 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) * detection in segvn_fault */ ASSERT(seg->s_szc == 0); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD); /* FALLTHROUGH */ @@ -7932,6 +8150,15 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) if (already_set || svd->type == MAP_SHARED) break; + if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) { + ASSERT(svd->amp == NULL); + ASSERT(svd->tr_state == SEGVN_TR_OFF); + ASSERT(svd->softlockcnt == 0); + hat_leave_region(seg->s_as->a_hat, svd->rcookie, + HAT_REGION_TEXT); + svd->rcookie = HAT_INVALID_REGION_COOKIE; + } + /* * Split off new segment if advice only applies to a * portion of existing segment starting in middle @@ -8053,6 +8280,7 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) break; case MADV_SEQUENTIAL: ASSERT(seg->s_szc == 0); + ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE); hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD); /* FALLTHROUGH */ case MADV_NORMAL: @@ -8091,7 +8319,6 @@ segvn_vpage(struct seg *seg) * and the advice from the segment itself to the individual pages. */ if (svd->vpage == NULL) { - svd->pageprot = 1; svd->pageadvice = 1; svd->vpage = kmem_zalloc(seg_pages(seg) * sizeof (struct vpage), KM_SLEEP); @@ -8724,6 +8951,7 @@ segvn_textrepl(struct seg *seg) ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); ASSERT(p != NULL); ASSERT(svd->tr_state == SEGVN_TR_INIT); + ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie)); ASSERT(svd->flags & MAP_TEXT); ASSERT(svd->type == MAP_PRIVATE); ASSERT(vp != NULL && svd->amp == NULL); @@ -8991,6 +9219,7 @@ segvn_textunrepl(struct seg *seg, int unload_unmap) ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) || SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); ASSERT(svd->tr_state == SEGVN_TR_ON); + ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie)); ASSERT(svd->amp != NULL); ASSERT(svd->amp->refcnt >= 1); ASSERT(svd->anon_index == 0); @@ -9185,6 +9414,7 @@ segvn_trupdate_seg(struct seg *seg, ASSERT(seg->s_data == (void *)svd); ASSERT(seg->s_szc == svntrp->tr_szc); ASSERT(svd->tr_state == SEGVN_TR_ON); + ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie)); ASSERT(svd->amp != NULL); ASSERT(svd->tr_policy_info.mem_policy == LGRP_MEM_POLICY_NEXT_SEG); ASSERT(svd->tr_policy_info.mem_lgrpid != LGRP_NONE); diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h index d8c8be8ff4..26bd202636 100644 --- a/usr/src/uts/common/vm/seg_vn.h +++ b/usr/src/uts/common/vm/seg_vn.h @@ -103,6 +103,7 @@ typedef struct segvn_data { ushort_t flags; /* flags - from sys/mman.h */ ssize_t softlockcnt; /* # of pages SOFTLOCKED in seg */ lgrp_mem_policy_info_t policy_info; /* memory allocation policy */ + hat_region_cookie_t rcookie; /* region for hat calls */ lgrp_mem_policy_info_t tr_policy_info; /* memory allocation for TR */ struct seg *seg; /* pointer back to seg */ struct segvn_data *svn_trnext; /* textrepl list next link */ diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c index e28e2aaa4c..f5ff9d43cd 100644 --- a/usr/src/uts/common/vm/vm_as.c +++ b/usr/src/uts/common/vm/vm_as.c @@ -783,6 +783,7 @@ as_dup(struct as *as, struct as **outas) AS_SETBUSY(newas); mutex_exit(&newas->a_contents); + (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index ab7581fb36..33139517b3 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -6188,7 +6188,7 @@ page_share_cnt(page_t *pp) int page_isshared(page_t *pp) { - return (hat_page_getshare(pp) > 1); + return (hat_page_checkshare(pp, 1)); } int diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index e13af4e1ef..007c73214c 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -1466,6 +1466,14 @@ hat_memload( panic("unexpected hati_load_common() failure"); } +/* ARGSUSED */ +void +hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp, + uint_t attr, uint_t flags, hat_region_cookie_t rcookie) +{ + hat_memload(hat, addr, pp, attr, flags); +} + /* * Load the given array of page structs using large pages when possible */ @@ -1559,6 +1567,15 @@ hat_memload_array( } } +/* ARGSUSED */ +void +hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len, + struct page **pps, uint_t attr, uint_t flags, + hat_region_cookie_t rcookie) +{ + hat_memload_array(hat, addr, len, pps, attr, flags); +} + /* * void hat_devload(hat, addr, len, pf, attr, flags) * load/lock the given page frame number @@ -1713,6 +1730,14 @@ hat_unlock(hat_t *hat, caddr_t addr, size_t len) htable_release(ht); } +/* ARGSUSED */ +void +hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len, + hat_region_cookie_t rcookie) +{ + panic("No shared region support on x86"); +} + /* * Cross call service routine to demap a virtual page on * the current CPU or flush all mappings in TLB. @@ -3512,6 +3537,16 @@ hat_page_getshare(page_t *pp) } /* + * Return 1 the number of mappings exceeds sh_thresh. Return 0 + * otherwise. + */ +int +hat_page_checkshare(page_t *pp, ulong_t sh_thresh) +{ + return (hat_page_getshare(pp) > sh_thresh); +} + +/* * hat_softlock isn't supported anymore */ /*ARGSUSED*/ @@ -3546,6 +3581,9 @@ hat_supported(enum hat_features feature, void *arg) case HAT_VMODSORT: return (1); + case HAT_SHARED_REGIONS: + return (0); + default: panic("hat_supported() - unknown feature"); } @@ -3847,6 +3885,43 @@ hati_update_pte(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new) return (0); } +/* ARGSUSED */ +void +hat_join_srd(struct hat *sfmmup, vnode_t *evp) +{ +} + +/* ARGSUSED */ +hat_region_cookie_t +hat_join_region(struct hat *sfmmup, + caddr_t r_saddr, + size_t r_size, + void *r_obj, + u_offset_t r_objoff, + uchar_t r_perm, + uchar_t r_pgszc, + hat_rgn_cb_func_t r_cb_function, + uint_t flags) +{ + panic("No shared region support on x86"); + return (HAT_INVALID_REGION_COOKIE); +} + +/* ARGSUSED */ +void +hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags) +{ + panic("No shared region support on x86"); +} + +/* ARGSUSED */ +void +hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie) +{ + panic("No shared region support on x86"); +} + + /* * Kernel Physical Mapping (kpm) facility * diff --git a/usr/src/uts/sfmmu/ml/sfmmu_asm.s b/usr/src/uts/sfmmu/ml/sfmmu_asm.s index b1e6348e6d..eff84e9e60 100644 --- a/usr/src/uts/sfmmu/ml/sfmmu_asm.s +++ b/usr/src/uts/sfmmu/ml/sfmmu_asm.s @@ -492,10 +492,10 @@ sfmmu_enable_intrs(uint_t pstate_save) {} /* ARGSUSED */ -void -sfmmu_alloc_ctx(sfmmu_t *sfmmup, int allocflag, struct cpu *cp) -{} - +int +sfmmu_alloc_ctx(sfmmu_t *sfmmup, int allocflag, struct cpu *cp, int shflag) +{ return(0); } + /* * Use cas, if tte has changed underneath us then reread and try again. * In the case of a retry, it will update sttep with the new original. @@ -562,7 +562,11 @@ sfmmu_panic8: .global sfmmu_panic9 sfmmu_panic9: .asciz "sfmmu_asm: cnum is greater than MAX_SFMMU_CTX_VAL" - + + .global sfmmu_panic10 +sfmmu_panic10: + .asciz "sfmmu_asm: valid SCD with no 3rd scd TSB" + ENTRY(sfmmu_disable_intrs) rdpr %pstate, %o0 #ifdef DEBUG @@ -596,13 +600,17 @@ sfmmu_panic9: * %o0 - sfmmup * %o1 - allocflag * %o2 - CPU + * %o3 - sfmmu private/shared flag + * + * ret - 0: no ctx is allocated + * 1: a ctx is allocated */ ENTRY_NP(sfmmu_alloc_ctx) #ifdef DEBUG - sethi %hi(ksfmmup), %o3 - ldx [%o3 + %lo(ksfmmup)], %o3 - cmp %o3, %o0 + sethi %hi(ksfmmup), %g1 + ldx [%g1 + %lo(ksfmmup)], %g1 + cmp %g1, %o0 bne,pt %xcc, 0f nop @@ -618,12 +626,14 @@ sfmmu_panic9: 7: retl - nop + mov %g0, %o0 ! %o0 = ret = 0 0: PANIC_IF_INTR_ENABLED_PSTR(sfmmu_ei_l1, %g1) -#endif /* DEBUG */ - +#endif /* DEBUG */ + + mov %o3, %g1 ! save sfmmu pri/sh flag in %g1 + ! load global mmu_ctxp info ldx [%o2 + CPU_MMU_CTXP], %o3 ! %o3 = mmu_ctx_t ptr lduw [%o2 + CPU_MMU_IDX], %g2 ! %g2 = mmu index @@ -639,13 +649,16 @@ sfmmu_panic9: sethi %hi(panicstr), %g1 ! test if panicstr is already set ldx [%g1 + %lo(panicstr)], %g1 tst %g1 - bnz,pn %icc, 3f + bnz,pn %icc, 1f nop sethi %hi(sfmmu_panic8), %o0 call panic or %o0, %lo(sfmmu_panic8), %o0 -3: +1: + retl + mov %g0, %o0 ! %o0 = ret = 0 +3: #endif ! load HAT sfmmu_ctxs[mmuid] gnum, cnum @@ -668,6 +681,7 @@ sfmmu_panic9: nop ! cnum == INVALID, check allocflag + mov %g0, %g4 ! %g4 = ret = 0 brz,pt %o1, 8f ! allocflag == 0, skip ctx allocation, bail mov %g6, %o1 @@ -677,6 +691,7 @@ sfmmu_panic9: 1: ! valid HAT cnum, check gnum cmp %g5, %o4 + mov 1, %g4 !%g4 = ret = 1 be,a,pt %icc, 8f ! gnum unchanged, go to done mov %g6, %o1 @@ -710,6 +725,7 @@ sfmmu_panic9: nop ! cnum == INVALID, check allocflag + mov %g0, %g4 ! %g4 = ret = 0 brz,pt %o1, 2f ! allocflag == 0, called from resume, set hw mov %g6, %o1 @@ -719,6 +735,7 @@ sfmmu_panic9: 1: ! valid HAT cnum, check gnum cmp %g5, %o4 + mov 1, %g4 ! %g4 = ret = 1 be,a,pt %icc, 2f ! gnum unchanged, go to done mov %g6, %o1 @@ -757,18 +774,20 @@ sfmmu_panic9: add %o1, 1, %o5 ! %o5 = mmu_ctxp->cnum + 1 /* - * cnum reachs max, update HAT with INVALID + * cnum reachs max, bail, so wrap around can be performed later. */ set INVALID_CONTEXT, %o1 - - /* - * update hat cnum to INVALID, sun4v sfmmu_load_mmustate checks - * hat cnum to determine if set the number of TSBs to 0. + /* + * When the routine is called by shared ctx, we want to set + * both private and shared ctx regs to INVALID. In order to + * do so, we set the sfmmu priv/shared flag to 'private' regardless. + * so that private ctx reg will be set to invalid. + * Note that values written to private context register are + * automatically written to shared context register as well. */ - sllx %o4, SFMMU_MMU_GNUM_RSHIFT, %o4 - or %o4, %o1, %o4 - stx %o4, [%g2 + SFMMU_CTXS] - + mov %g0, %g1 ! %g1 = sfmmu private/shared flag + mov %g0, %g4 ! %g4 = ret = 0 + membar #LoadStore|#StoreStore ba,pt %icc, 8f clrb [%o0 + SFMMU_CTX_LOCK] @@ -798,30 +817,28 @@ sfmmu_panic9: membar #LoadStore|#StoreStore clrb [%o0 + SFMMU_CTX_LOCK] - + + mov 1, %g4 ! %g4 = ret = 1 8: /* * program the secondary context register * * %o1 = cnum + * %g1 = sfmmu private/shared flag (0:private, 1:shared) */ + #ifdef sun4u ldub [%o0 + SFMMU_CEXT], %o2 sll %o2, CTXREG_EXT_SHIFT, %o2 or %o1, %o2, %o1 #endif - - mov MMU_SCONTEXT, %o4 - sethi %hi(FLUSH_ADDR), %o5 - stxa %o1, [%o4]ASI_MMU_CTX ! set 2nd context reg. - flush %o5 - + SET_SECCTX(%o1, %g1, %o4, %o5) + retl - nop + mov %g4, %o0 ! %o0 = ret SET_SIZE(sfmmu_alloc_ctx) - ENTRY_NP(sfmmu_modifytte) ldx [%o2], %g3 /* current */ ldx [%o0], %g1 /* original */ @@ -915,6 +932,11 @@ sfmmu_kpm_patch_tsbm(void) { } +void +sfmmu_patch_shctx(void) +{ +} + /* ARGSUSED */ void sfmmu_load_tsbe(struct tsbe *tsbep, uint64_t vaddr, tte_t *ttep, int phys) @@ -1122,6 +1144,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) sethi %hi(iktsb), %o0 ! to search call sfmmu_fixup_mmu_asi ! patch kitlb miss or %o0, %lo(iktsb), %o0 + mov 6, %o3 ! number of instructions + sethi %hi(iktsb4m), %o0 ! to search + call sfmmu_fixup_mmu_asi ! patch kitlb4m miss + or %o0, %lo(iktsb4m), %o0 mov %o4, %o7 ! retore return pc -- leaf retl nop @@ -1155,6 +1181,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) sethi %hi(ktsb4m_szcode), %o1 ld [%o1 + %lo(ktsb4m_szcode)], %o1 /* %o1 = ktsb4m size code */ + sethi %hi(iktsb4m), %o0 + call sfmmu_fix_ktlb_traptable + or %o0, %lo(iktsb4m), %o0 + sethi %hi(dktsb4m), %o0 call sfmmu_fix_ktlb_traptable or %o0, %lo(dktsb4m), %o0 @@ -1194,6 +1224,10 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) call sfmmu_fixup_setx ! patch value of ktsb4m base addr or %o0, %lo(dktsb4mbase), %o0 + sethi %hi(iktsb4mbase), %o0 + call sfmmu_fixup_setx ! patch value of ktsb4m base addr + or %o0, %lo(iktsb4mbase), %o0 + sethi %hi(sfmmu_kprot_patch_ktsb4m_base), %o0 call sfmmu_fixup_setx ! patch value of ktsb4m base addr or %o0, %lo(sfmmu_kprot_patch_ktsb4m_base), %o0 @@ -1301,7 +1335,7 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) */ set tsb_slab_shift, %o1 set MMU_PAGESHIFT4M, %o4 - ldsw [%o1], %o3 + lduw [%o1], %o3 subcc %o4, %o3, %o4 bz,pt %icc, 1f /* delay slot safe */ @@ -1320,7 +1354,7 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) 1: /* patch TSBREG_VAMASK used to set up TSB base register */ set tsb_slab_mask, %o1 - lduw [%o1], %o4 + ldx [%o1], %o4 sethi %hi(sfmmu_tsb_1st_tsbreg_vamask), %o0 call sfmmu_fixup_or or %o0, %lo(sfmmu_tsb_1st_tsbreg_vamask), %o0 @@ -1333,6 +1367,38 @@ sfmmu_kpm_unload_tsb(caddr_t addr, int vpshift) #endif /* UTSB_PHYS */ SET_SIZE(sfmmu_patch_utsb) + ENTRY_NP(sfmmu_patch_shctx) +#ifdef sun4u + retl + nop +#else /* sun4u */ + set sfmmu_shctx_cpu_mondo_patch, %o0 + MAKE_JMP_INSTR(5, %o1, %o2) ! jmp %g5 + st %o1, [%o0] + flush %o0 + MAKE_NOP_INSTR(%o1) + add %o0, I_SIZE, %o0 ! next instr + st %o1, [%o0] + flush %o0 + + set sfmmu_shctx_user_rtt_patch, %o0 + st %o1, [%o0] ! nop 1st instruction + flush %o0 + add %o0, I_SIZE, %o0 + st %o1, [%o0] ! nop 2nd instruction + flush %o0 + add %o0, I_SIZE, %o0 + st %o1, [%o0] ! nop 3rd instruction + flush %o0 + add %o0, I_SIZE, %o0 + st %o1, [%o0] ! nop 4th instruction + flush %o0 + add %o0, I_SIZE, %o0 + st %o1, [%o0] ! nop 5th instruction + retl + flush %o0 +#endif /* sun4u */ + SET_SIZE(sfmmu_patch_shctx) /* * Routine that loads an entry into a tsb using virtual addresses. @@ -2136,7 +2202,7 @@ label/**/1: ;\ label/**/2: ;\ brz,pt ismseg, label/**/3 /* no mapping */ ;\ add ismhat, IMAP_VB_SHIFT, tmp1 /* tmp1 = vb_shift addr */ ;\ - lduha [tmp1]ASI_MEM, tmp1 /* tmp1 = vb shift*/ ;\ + lduba [tmp1]ASI_MEM, tmp1 /* tmp1 = vb shift*/ ;\ srlx ismseg, tmp1, tmp2 /* tmp2 = vbase */ ;\ srlx tagacc, tmp1, tmp1 /* tmp1 = va seg*/ ;\ sub tmp1, tmp2, tmp2 /* tmp2 = va - vbase */ ;\ @@ -2195,7 +2261,9 @@ label/**/2: ;\ #define MAKE_HASHTAG(vapg, hatid, hmeshift, hashno, hblktag) \ sllx vapg, hmeshift, vapg ;\ - or vapg, hashno, hblktag + mov hashno, hblktag ;\ + sllx hblktag, HTAG_REHASH_SHIFT, hblktag ;\ + or vapg, hblktag, hblktag /* * Function to traverse hmeblk hash link list and find corresponding match. @@ -2238,6 +2306,53 @@ label/**/1: ;\ ldxa [hmebp]ASI_MEM, hmeblkpa /* hmeblk ptr pa */ ;\ label/**/2: +/* + * Function to traverse hmeblk hash link list and find corresponding match. + * The search is done using physical pointers. It returns the physical address + * and virtual address pointers to the hmeblk that matches with the tag + * provided. + * Parameters: + * hmeblktag = register with hmeblk tag match (rid field is 0) + * hatid = register with hatid (pointer to SRD) + * hmeblkpa = register where physical ptr will be stored + * hmeblkva = register where virtual ptr will be stored + * tmp1 = tmp reg + * tmp2 = tmp reg + * label: temporary label + */ + +#define HMEHASH_SEARCH_SHME(hmeblktag, hatid, hmeblkpa, hmeblkva, \ + tsbarea, tmp1, tmp2, label) \ +label/**/1: ;\ + brz,pn hmeblkva, label/**/4 ;\ + HAT_HLINK_DBSTAT(hatid, tsbarea, tmp1, tmp2) ;\ + add hmeblkpa, HMEBLK_TAG, tmp2 ;\ + ldxa [tmp2]ASI_MEM, tmp1 /* read 1st part of tag */ ;\ + add tmp2, CLONGSIZE, tmp2 ;\ + ldxa [tmp2]ASI_MEM, tmp2 /* read 2nd part of tag */ ;\ + xor tmp1, hmeblktag, tmp1 ;\ + xor tmp2, hatid, tmp2 ;\ + brz,pn tmp2, label/**/3 /* branch on hit */ ;\ + add hmeblkpa, HMEBLK_NEXT, tmp2 ;\ +label/**/2: ;\ + ldna [tmp2]ASI_MEM, hmeblkva /* hmeblk ptr va */ ;\ + add hmeblkpa, HMEBLK_NEXTPA, tmp2 ;\ + ba,pt %xcc, label/**/1 ;\ + ldxa [tmp2]ASI_MEM, hmeblkpa /* hmeblk ptr pa */ ;\ +label/**/3: ;\ + cmp tmp1, SFMMU_MAX_HME_REGIONS ;\ + bgeu,pt %xcc, label/**/2 ;\ + add hmeblkpa, HMEBLK_NEXT, tmp2 ;\ + and tmp1, BT_ULMASK, tmp2 ;\ + srlx tmp1, BT_ULSHIFT, tmp1 ;\ + sllx tmp1, CLONGSHIFT, tmp1 ;\ + add tsbarea, tmp1, tmp1 ;\ + ldx [tmp1 + TSBMISS_SHMERMAP], tmp1 ;\ + srlx tmp1, tmp2, tmp1 ;\ + btst 0x1, tmp1 ;\ + bz,pn %xcc, label/**/2 ;\ + add hmeblkpa, HMEBLK_NEXT, tmp2 ;\ +label/**/4: #if ((1 << SFHME_SHIFT) != SFHME_SIZE) #error HMEBLK_TO_HMENT assumes sf_hment is power of 2 in size @@ -2247,16 +2362,19 @@ label/**/2: * HMEBLK_TO_HMENT is a macro that given an hmeblk and a vaddr returns * he offset for the corresponding hment. * Parameters: - * vaddr = register with virtual address - * hmeblkpa = physical pointer to hme_blk - * hment = register where address of hment will be stored - * hmentoff = register where hment offset will be stored - * label1 = temporary label + * In: + * vaddr = register with virtual address + * hmeblkpa = physical pointer to hme_blk + * Out: + * hmentoff = register where hment offset will be stored + * hmemisc = hblk_misc + * Scratch: + * tmp1 */ -#define HMEBLK_TO_HMENT(vaddr, hmeblkpa, hmentoff, tmp1, label1) \ +#define HMEBLK_TO_HMENT(vaddr, hmeblkpa, hmentoff, hmemisc, tmp1, label1)\ add hmeblkpa, HMEBLK_MISC, hmentoff ;\ - lda [hmentoff]ASI_MEM, tmp1 ;\ - andcc tmp1, HBLK_SZMASK, %g0 /* tmp1 = get_hblk_sz(%g5) */ ;\ + lda [hmentoff]ASI_MEM, hmemisc ;\ + andcc hmemisc, HBLK_SZMASK, %g0 ;\ bnz,a,pn %icc, label1 /* if sz != TTE8K branch */ ;\ or %g0, HMEBLK_HME1, hmentoff ;\ srl vaddr, MMU_PAGESHIFT, tmp1 ;\ @@ -2274,26 +2392,23 @@ label1: * hmeblkpa = PA of hment if found, otherwise clobbered (out) * hmeblkva = VA of hment if found, otherwise clobbered (out) * tsbarea = pointer to the tsbmiss area for this cpu. (in) - * hmentoff = temporarily stores hment offset (clobbered) + * hmemisc = hblk_misc if TTE is found (out), otherwise clobbered * hmeshift = constant/register to shift VA to obtain the virtual pfn * for this page size. * hashno = constant/register hash number * label = temporary label for branching within macro. * foundlabel = label to jump to when tte is found. * suspendlabel= label to jump to when tte is suspended. - * exitlabel = label to jump to when tte is not found. The hmebp lock - * is still held at this time. + * exitlabel = label to jump to when tte is not found. * - * The caller should set up the tsbmiss->scratch[2] field correctly before - * calling this funciton (aka TSBMISS_SCRATCH + TSBMISS_HATID) */ -#define GET_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, hmentoff, \ +#define GET_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, hmemisc, \ hmeshift, hashno, label, foundlabel, suspendlabel, exitlabel) \ ;\ stn tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)] ;\ stn hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)] ;\ HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte, \ - hmeblkpa, label/**/5, hmentoff, hmeblkva) ;\ + hmeblkpa, label/**/5, hmemisc, hmeblkva) ;\ ;\ /* ;\ * tagacc = tagacc ;\ @@ -2301,21 +2416,22 @@ label1: * tsbarea = tsbarea ;\ * tte = hmebp (hme bucket pointer) ;\ * hmeblkpa = vapg (virtual page) ;\ - * hmentoff, hmeblkva = scratch ;\ + * hmemisc, hmeblkva = scratch ;\ */ ;\ - MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmentoff) ;\ + MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc) ;\ + or hmemisc, SFMMU_INVALID_SHMERID, hmemisc ;\ ;\ /* ;\ * tagacc = tagacc ;\ * hatid = hatid ;\ * tte = hmebp ;\ * hmeblkpa = CLOBBERED ;\ - * hmentoff = htag_bspage & hashno ;\ + * hmemisc = htag_bspage+hashno+invalid_rid ;\ * hmeblkva = scratch ;\ */ ;\ stn tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)] ;\ HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM) ;\ - HMEHASH_SEARCH(tte, hmentoff, hatid, hmeblkpa, hmeblkva, \ + HMEHASH_SEARCH(tte, hmemisc, hatid, hmeblkpa, hmeblkva, \ tsbarea, tagacc, label/**/1) ;\ /* ;\ * tagacc = CLOBBERED ;\ @@ -2335,26 +2451,160 @@ label/**/4: ;\ * Now we calculate the corresponding tte. ;\ * ;\ * tagacc = tagacc ;\ - * hatid = clobbered ;\ + * hatid = hatid ;\ + * tte = clobbered ;\ + * hmeblkpa = hmeblkpa ;\ + * hmemisc = hblktag ;\ + * hmeblkva = hmeblkva ;\ + */ ;\ + HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte, \ + label/**/2) ;\ + ;\ + /* ;\ + * tagacc = tagacc ;\ + * hatid = hmentoff ;\ + * tte = clobbered ;\ + * hmeblkpa = hmeblkpa ;\ + * hmemisc = hblk_misc ;\ + * hmeblkva = hmeblkva ;\ + */ ;\ + ;\ + add hatid, SFHME_TTE, hatid ;\ + add hmeblkpa, hatid, hmeblkpa ;\ + ldxa [hmeblkpa]ASI_MEM, tte /* MMU_READTTE through pa */ ;\ + add hmeblkva, hatid, hmeblkva ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\ + HMELOCK_EXIT(hatid, hatid, ASI_MEM) /* drop lock */ ;\ + set TTE_SUSPEND, hatid ;\ + TTE_SUSPEND_INT_SHIFT(hatid) ;\ + btst tte, hatid ;\ + bz,pt %xcc, foundlabel ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\ + ;\ + /* ;\ + * Mapping is suspended, so goto suspend label. ;\ + */ ;\ + ba,pt %xcc, suspendlabel ;\ + nop + +/* + * GET_SHME_TTE is similar to GET_TTE() except it searches + * shared hmeblks via HMEHASH_SEARCH_SHME() macro. + * If valid tte is found, hmemisc = shctx flag, i.e., shme is + * either 0 (not part of scd) or 1 (part of scd). + */ +#define GET_SHME_TTE(tagacc, hatid, tte, hmeblkpa, hmeblkva, tsbarea, \ + hmemisc, hmeshift, hashno, label, foundlabel, \ + suspendlabel, exitlabel) \ + ;\ + stn tagacc, [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)] ;\ + stn hatid, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)] ;\ + HMEHASH_FUNC_ASM(tagacc, hatid, tsbarea, hmeshift, tte, \ + hmeblkpa, label/**/5, hmemisc, hmeblkva) ;\ + ;\ + /* ;\ + * tagacc = tagacc ;\ + * hatid = hatid ;\ + * tsbarea = tsbarea ;\ + * tte = hmebp (hme bucket pointer) ;\ + * hmeblkpa = vapg (virtual page) ;\ + * hmemisc, hmeblkva = scratch ;\ + */ ;\ + MAKE_HASHTAG(hmeblkpa, hatid, hmeshift, hashno, hmemisc) ;\ + ;\ + /* ;\ + * tagacc = tagacc ;\ + * hatid = hatid ;\ + * tsbarea = tsbarea ;\ * tte = hmebp ;\ + * hmemisc = htag_bspage + hashno + 0 (for rid) ;\ + * hmeblkpa = CLOBBERED ;\ + * hmeblkva = scratch ;\ + */ ;\ + stn tte, [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)] ;\ + HMELOCK_ENTER(tte, hmeblkpa, hmeblkva, label/**/3, ASI_MEM) ;\ + ;\ + add tte, HMEBUCK_NEXTPA, hmeblkpa ;\ + ldxa [hmeblkpa]ASI_MEM, hmeblkpa ;\ + add tte, HMEBUCK_HBLK, hmeblkva ;\ + ldxa [hmeblkva]ASI_MEM, hmeblkva ;\ + HAT_HSEARCH_DBSTAT(hatid, tsbarea, tagacc, tte) ;\ + ;\ +label/**/8: ;\ + HMEHASH_SEARCH_SHME(hmemisc, hatid, hmeblkpa, hmeblkva, \ + tsbarea, tagacc, tte, label/**/1) ;\ + /* ;\ + * tagacc = CLOBBERED ;\ + * tte = CLOBBERED ;\ + * hmeblkpa = hmeblkpa ;\ + * hmeblkva = hmeblkva ;\ + */ ;\ + brnz,pt hmeblkva, label/**/4 /* branch if hmeblk found */ ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSB_TAGACC)], tagacc ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmeblkva ;\ + HMELOCK_EXIT(hmeblkva, hmeblkva, ASI_MEM) /* drop lock */ ;\ + ba,pt %xcc, exitlabel /* exit if hblk not found */ ;\ + nop ;\ +label/**/4: ;\ + /* ;\ + * We have found the hmeblk containing the hment. ;\ + * Now we calculate the corresponding tte. ;\ + * ;\ + * tagacc = tagacc ;\ + * hatid = hatid ;\ + * tte = clobbered ;\ * hmeblkpa = hmeblkpa ;\ - * hmentoff = hblktag ;\ + * hmemisc = hblktag ;\ * hmeblkva = hmeblkva ;\ + * tsbarea = tsbmiss area ;\ */ ;\ - HMEBLK_TO_HMENT(tagacc, hmeblkpa, hmentoff, hatid, label/**/2) ;\ + HMEBLK_TO_HMENT(tagacc, hmeblkpa, hatid, hmemisc, tte, \ + label/**/2) ;\ ;\ - add hmentoff, SFHME_TTE, hmentoff ;\ - add hmeblkpa, hmentoff, hmeblkpa ;\ + /* ;\ + * tagacc = tagacc ;\ + * hatid = hmentoff ;\ + * tte = clobbered ;\ + * hmeblkpa = hmeblkpa ;\ + * hmemisc = hblk_misc ;\ + * hmeblkva = hmeblkva ;\ + * tsbarea = tsbmiss area ;\ + */ ;\ + ;\ + add hatid, SFHME_TTE, hatid ;\ + add hmeblkpa, hatid, hmeblkpa ;\ ldxa [hmeblkpa]ASI_MEM, tte /* MMU_READTTE through pa */ ;\ - add hmeblkva, hmentoff, hmeblkva ;\ + brlz,pt tte, label/**/6 ;\ + add hmeblkva, hatid, hmeblkva ;\ + btst HBLK_SZMASK, hmemisc ;\ + bnz,a,pt %icc, label/**/7 ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\ + ;\ + /* ;\ + * We found an invalid 8K tte in shme. ;\ + * it may not belong to shme's region since ;\ + * region size/alignment granularity is 8K but different ;\ + * regions don't share hmeblks. Continue the search. ;\ + */ ;\ + sub hmeblkpa, hatid, hmeblkpa ;\ ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\ - ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hmentoff ;\ - HMELOCK_EXIT(hmentoff, hmentoff, ASI_MEM) /* drop lock */ ;\ - set TTE_SUSPEND, hmentoff ;\ - TTE_SUSPEND_INT_SHIFT(hmentoff) ;\ - btst tte, hmentoff ;\ + srlx tagacc, hmeshift, tte ;\ + add hmeblkpa, HMEBLK_NEXT, hmeblkva ;\ + ldxa [hmeblkva]ASI_MEM, hmeblkva ;\ + add hmeblkpa, HMEBLK_NEXTPA, hmeblkpa ;\ + ldxa [hmeblkpa]ASI_MEM, hmeblkpa ;\ + MAKE_HASHTAG(tte, hatid, hmeshift, hashno, hmemisc) ;\ + ba,a,pt %xcc, label/**/8 ;\ +label/**/6: ;\ + GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc) ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\ +label/**/7: ;\ + HMELOCK_EXIT(hatid, hatid, ASI_MEM) /* drop lock */ ;\ + set TTE_SUSPEND, hatid ;\ + TTE_SUSPEND_INT_SHIFT(hatid) ;\ + btst tte, hatid ;\ bz,pt %xcc, foundlabel ;\ - nop ;\ + ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HATID)], hatid ;\ ;\ /* ;\ * Mapping is suspended, so goto suspend label. ;\ @@ -2393,8 +2643,8 @@ sfmmu_kprot_patch_ktsb4m_szcode: GET_TSBE_POINTER(MMU_PAGESHIFT4M, %g3, %g7, %g6, %g5) ! %g3 = 4M tsb entry pointer, as TSB miss handler expects - CPU_TSBMISS_AREA(%g6, %g7) - HAT_PERCPU_STAT16(%g6, TSBMISS_KPROTS, %g7) + CPU_TSBMISS_AREA(%g6, %g7) + HAT_PERCPU_STAT16(%g6, TSBMISS_KPROTS, %g7) ba,pt %xcc, sfmmu_tsb_miss_tt nop @@ -2412,8 +2662,8 @@ sfmmu_kprot_patch_ktsb4m_szcode: /* %g1 = first TSB entry ptr now, %g2 preserved */ GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3) /* get 2nd utsbreg */ - brlz,pt %g3, 9f /* check for 2nd TSB */ - mov %g0, %g3 /* clear second tsbe ptr */ + brlz,pt %g3, 9f /* check for 2nd TSB */ + nop GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) /* %g3 = second TSB entry ptr now, %g2 preserved */ @@ -2422,14 +2672,14 @@ sfmmu_kprot_patch_ktsb4m_szcode: #ifdef UTSB_PHYS /* g1 = first TSB entry ptr */ GET_2ND_TSBREG(%g3) - brlz,a,pt %g3, 9f /* check for 2nd TSB */ - mov %g0, %g3 /* clear second tsbe ptr */ + brlz,pt %g3, 9f /* check for 2nd TSB */ + nop GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) /* %g3 = second TSB entry ptr now, %g2 preserved */ #else /* UTSB_PHYS */ brgez,pt %g1, 9f /* check for 2nd TSB */ - mov %g0, %g3 /* clear second tsbe ptr */ + mov -1, %g3 /* set second tsbe ptr to -1 */ mov %g2, %g7 GET_2ND_TSBE_PTR(%g7, %g1, %g3, %g4, %g5, sfmmu_uprot) @@ -2452,7 +2702,7 @@ sfmmu_kprot_patch_ktsb4m_szcode: * %g1 = 8K TSB pointer register (not used, clobbered) * %g2 = tag access register (used) * %g3 = faulting context id (used) - * %g7 = 4M virtual page number for tag matching (used) + * %g7 = TSB tag to match (used) */ .align 64 ALTENTRY(sfmmu_kitlb_miss) @@ -2472,8 +2722,27 @@ iktsb: sllx %g2, 64-(TAGACC_SHIFT + TSB_START_SIZE + RUNTIME_PATCH), %g1 or %g4, %g1, %g1 ! form tsb ptr ldda [%g1]RUNTIME_PATCH, %g4 ! %g4 = tag, %g5 = data cmp %g4, %g7 + bne,pn %xcc, iktsb4mbase ! check 4m ktsb + srlx %g2, MMU_PAGESHIFT4M, %g3 ! use 4m virt-page as TSB index + + andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit + bz,pn %icc, exec_fault + nop + TT_TRACE(trace_tsbhit) ! 2 instr traptrace + ITLB_STUFF(%g5, %g1, %g2, %g3, %g4) + retry + +iktsb4mbase: + RUNTIME_PATCH_SETX(%g4, %g6) + /* %g4 = contents of ktsb4m_base or ktsb4m_pbase */ +iktsb4m: + sllx %g3, 64-(TSB_START_SIZE + RUNTIME_PATCH), %g3 + srlx %g3, 64-(TSB_START_SIZE + TSB_ENTRY_SHIFT + RUNTIME_PATCH), %g3 + add %g4, %g3, %g3 ! %g3 = 4m tsbe ptr + ldda [%g3]RUNTIME_PATCH, %g4 ! %g4 = tag, %g5 = data + cmp %g4, %g7 bne,pn %xcc, sfmmu_tsb_miss_tt ! branch on miss - andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit + andcc %g5, TTE_EXECPRM_INT, %g0 ! check exec bit bz,pn %icc, exec_fault nop TT_TRACE(trace_tsbhit) ! 2 instr traptrace @@ -2629,7 +2898,7 @@ dktsb4m_kpmcheck: PROBE_1ST_ITSB(%g1, %g7, uitlb_fast_8k_probefail) /* g4 - g5 = clobbered by PROBE_1ST_ITSB */ ba,pn %xcc, sfmmu_tsb_miss_tt - mov %g0, %g3 + mov -1, %g3 /* * User data miss w/ single TSB. @@ -2648,7 +2917,7 @@ dktsb4m_kpmcheck: PROBE_1ST_DTSB(%g1, %g7, udtlb_fast_8k_probefail) /* g4 - g5 = clobbered by PROBE_1ST_DTSB */ ba,pn %xcc, sfmmu_tsb_miss_tt - mov %g0, %g3 + mov -1, %g3 /* * User instruction miss w/ multiple TSBs (sun4v). @@ -2834,7 +3103,7 @@ udtlb_miss_probesecond: * * g1 = First TSB entry pointer * g2 = tag access register - * g3 = 4M TSB entry pointer; NULL if no 2nd TSB + * g3 = 4M TSB entry pointer; -1 if no 2nd TSB * g4 - g7 = scratch registers */ @@ -2878,6 +3147,12 @@ udtlb_miss_probesecond: be,pn %icc, tsb_tl0_noctxt /* no ctx miss exception */ stn %g7, [%g6 + (TSBMISS_SCRATCH + TSBMISS_HATID)] +#ifdef sun4v + ldub [%g6 + TSBMISS_URTTEFLAGS], %g7 /* clear ctx1 flag set from */ + andn %g7, HAT_CHKCTX1_FLAG, %g7 /* the previous tsb miss */ + stub %g7, [%g6 + TSBMISS_URTTEFLAGS] +#endif + ISM_CHECK(%g2, %g6, %g3, %g4, %g5, %g7, %g1, tsb_l1, tsb_ism) /* * The miss wasn't in an ISM segment. @@ -2902,10 +3177,9 @@ udtlb_miss_probesecond: /* NOT REACHED */ tsb_512K: - ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3 - sllx %g3, TAGACC_CTX_LSHIFT, %g5 + sllx %g2, TAGACC_CTX_LSHIFT, %g5 brz,pn %g5, 3f - lduh [%g6 + TSBMISS_HATFLAGS], %g4 + ldub [%g6 + TSBMISS_UTTEFLAGS], %g4 and %g4, HAT_512K_FLAG, %g5 /* @@ -2932,10 +3206,9 @@ tsb_512K: /* NOT REACHED */ tsb_4M: - ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3 - sllx %g3, TAGACC_CTX_LSHIFT, %g5 + sllx %g2, TAGACC_CTX_LSHIFT, %g5 brz,pn %g5, 4f - lduh [%g6 + TSBMISS_HATFLAGS], %g4 + ldub [%g6 + TSBMISS_UTTEFLAGS], %g4 and %g4, HAT_4M_FLAG, %g5 brz,pn %g5, tsb_32M nop @@ -2950,25 +3223,13 @@ tsb_4M: /* NOT REACHED */ tsb_32M: -#ifndef sun4v - GET_CPU_IMPL(%g5) - cmp %g5, OLYMPUS_C_IMPL - be,pn %xcc, 0f - nop - cmp %g5, PANTHER_IMPL - bne,pt %xcc, tsb_pagefault - nop -#endif - -0: - ldn [%g6 + (TSBMISS_SCRATCH + TSB_TAGACC)], %g3 - sllx %g3, TAGACC_CTX_LSHIFT, %g5 + sllx %g2, TAGACC_CTX_LSHIFT, %g5 #ifdef sun4v brz,pn %g5, 6f #else brz,pn %g5, tsb_pagefault -#endif - lduh [%g6 + TSBMISS_HATFLAGS], %g4 +#endif + ldub [%g6 + TSBMISS_UTTEFLAGS], %g4 and %g4, HAT_32M_FLAG, %g5 brz,pn %g5, tsb_256M nop @@ -2982,10 +3243,13 @@ tsb_32M: sfmmu_suspend_tl, tsb_256M) /* NOT REACHED */ +#ifdef sun4u +#define tsb_shme tsb_pagefault +#endif tsb_256M: - lduh [%g6 + TSBMISS_HATFLAGS], %g4 + ldub [%g6 + TSBMISS_UTTEFLAGS], %g4 and %g4, HAT_256M_FLAG, %g5 - brz,pn %g5, tsb_pagefault + brz,pn %g5, tsb_shme nop 6: /* @@ -2994,24 +3258,138 @@ tsb_256M: GET_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, MMU_PAGESHIFT256M, TTE256M, tsb_l256M, tsb_checktte, - sfmmu_suspend_tl, tsb_pagefault) + sfmmu_suspend_tl, tsb_shme) /* NOT REACHED */ tsb_checktte: /* + * g1 = hblk_misc + * g2 = tagacc + * g3 = tte + * g4 = tte pa + * g5 = tte va + * g6 = tsbmiss area + * g7 = hatid + */ + brlz,a,pt %g3, tsb_validtte + rdpr %tt, %g7 + +#ifdef sun4u +#undef tsb_shme + ba tsb_pagefault + nop +#else + +tsb_shme: + /* + * g2 = tagacc + * g6 = tsbmiss area + */ + sllx %g2, TAGACC_CTX_LSHIFT, %g5 + brz,pn %g5, tsb_pagefault + nop + ldx [%g6 + TSBMISS_SHARED_UHATID], %g7 /* g7 = srdp */ + brz,pn %g7, tsb_pagefault + nop + + GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, + MMU_PAGESHIFT64K, TTE64K, tsb_shme_l8K, tsb_shme_checktte, + sfmmu_suspend_tl, tsb_shme_512K) + /* NOT REACHED */ + +tsb_shme_512K: + ldub [%g6 + TSBMISS_URTTEFLAGS], %g4 + and %g4, HAT_512K_FLAG, %g5 + brz,pn %g5, tsb_shme_4M + nop + + /* + * 512K hash + */ + + GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, + MMU_PAGESHIFT512K, TTE512K, tsb_shme_l512K, tsb_shme_checktte, + sfmmu_suspend_tl, tsb_shme_4M) + /* NOT REACHED */ + +tsb_shme_4M: + ldub [%g6 + TSBMISS_URTTEFLAGS], %g4 + and %g4, HAT_4M_FLAG, %g5 + brz,pn %g5, tsb_shme_32M + nop +4: + /* + * 4M hash + */ + GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, + MMU_PAGESHIFT4M, TTE4M, tsb_shme_l4M, tsb_shme_checktte, + sfmmu_suspend_tl, tsb_shme_32M) + /* NOT REACHED */ + +tsb_shme_32M: + ldub [%g6 + TSBMISS_URTTEFLAGS], %g4 + and %g4, HAT_32M_FLAG, %g5 + brz,pn %g5, tsb_shme_256M + nop + + /* + * 32M hash + */ + + GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, + MMU_PAGESHIFT32M, TTE32M, tsb_shme_l32M, tsb_shme_checktte, + sfmmu_suspend_tl, tsb_shme_256M) + /* NOT REACHED */ + +tsb_shme_256M: + ldub [%g6 + TSBMISS_URTTEFLAGS], %g4 + and %g4, HAT_256M_FLAG, %g5 + brz,pn %g5, tsb_pagefault + nop + + /* + * 256M hash + */ + + GET_SHME_TTE(%g2, %g7, %g3, %g4, %g5, %g6, %g1, + MMU_PAGESHIFT256M, TTE256M, tsb_shme_l256M, tsb_shme_checktte, + sfmmu_suspend_tl, tsb_pagefault) + /* NOT REACHED */ + +tsb_shme_checktte: + + brgez,pn %g3, tsb_pagefault + rdpr %tt, %g7 + /* + * g1 = ctx1 flag * g3 = tte * g4 = tte pa * g5 = tte va * g6 = tsbmiss area + * g7 = tt */ - brgez,pn %g3, tsb_pagefault /* if tte invalid branch */ + + brz,pt %g1, tsb_validtte nop + ldub [%g6 + TSBMISS_URTTEFLAGS], %g1 + or %g1, HAT_CHKCTX1_FLAG, %g1 + stub %g1, [%g6 + TSBMISS_URTTEFLAGS] + + SAVE_CTX1(%g7, %g2, %g1, tsb_shmel) +#endif /* sun4u */ tsb_validtte: /* + * g3 = tte + * g4 = tte pa + * g5 = tte va + * g6 = tsbmiss area + * g7 = tt + */ + + /* * Set ref/mod bits if this is a prot trap. Usually, it isn't. */ - rdpr %tt, %g7 cmp %g7, FAST_PROT_TT bne,pt %icc, 4f nop @@ -3021,6 +3399,10 @@ tsb_validtte: rdpr %tt, %g5 GET_MMU_D_TTARGET(%g2, %g7) /* %g2 = ttarget */ +#ifdef sun4v + MMU_FAULT_STATUS_AREA(%g7) + ldx [%g7 + MMFSA_D_ADDR], %g5 /* save fault addr for later */ +#endif ba,pt %xcc, tsb_update_tl1 nop @@ -3065,6 +3447,7 @@ tsb_validtte: ldx [%g2 + MMFSA_CTX_], %g7 sllx %g7, TTARGET_CTX_SHIFT, %g7 ldx [%g2 + MMFSA_ADDR_], %g2 + mov %g2, %g5 ! save the fault addr for later use srlx %g2, TTARGET_VA_SHIFT, %g2 or %g2, %g7, %g2 #else @@ -3102,6 +3485,17 @@ tsb_user: #endif /* sun4v */ tsb_user8k: +#ifdef sun4v + ldub [%g6 + TSBMISS_URTTEFLAGS], %g7 + and %g7, HAT_CHKCTX1_FLAG, %g1 + brz,a,pn %g1, 1f + ldn [%g6 + TSBMISS_TSBPTR], %g1 ! g1 = first TSB ptr + GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR, %g1) + brlz,a,pn %g1, ptl1_panic ! if no shared tsb + mov PTL1_NO_SCDTSB8K, %g1 ! panic + GET_3RD_TSBE_PTR(%g5, %g1, %g6, %g7) +1: +#else ldn [%g6 + TSBMISS_TSBPTR], %g1 ! g1 = first TSB ptr #ifndef UTSB_PHYS @@ -3109,9 +3503,12 @@ tsb_user8k: mov %g7, %asi #endif /* UTSB_PHYS */ +#endif /* sun4v */ + TSB_UPDATE_TL(%g1, %g3, %g2, %g4, %g7, %g6, 5) #ifdef sun4v + rdpr %tt, %g5 cmp %g5, T_INSTR_MMU_MISS be,a,pn %xcc, 9f mov %g3, %g5 @@ -3129,9 +3526,20 @@ tsb_user8k: retry tsb_user4m: - ldn [%g6 + TSBMISS_TSBPTR4M], %g1 /* g1 = tsbp */ +#ifdef sun4v + ldub [%g6 + TSBMISS_URTTEFLAGS], %g7 + and %g7, HAT_CHKCTX1_FLAG, %g1 + brz,a,pn %g1, 4f + ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = TSB ptr + GET_UTSBREG_SHCTX(%g6, TSBMISS_TSBSCDPTR4M, %g1) + brlz,a,pn %g1, 5f ! if no shared 2nd tsb + nop + GET_4TH_TSBE_PTR(%g5, %g1, %g6, %g7) +#else + ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = TSB ptr +#endif 4: - brz,pn %g1, 5f /* Check to see if we have 2nd TSB programmed */ + brlz,pn %g1, 5f /* Check to see if we have 2nd TSB programmed */ nop #ifndef UTSB_PHYS @@ -3143,6 +3551,7 @@ tsb_user4m: 5: #ifdef sun4v + rdpr %tt, %g5 cmp %g5, T_INSTR_MMU_MISS be,a,pn %xcc, 9f mov %g3, %g5 @@ -3182,7 +3591,7 @@ tsb_user_pn_synth: andcc %g3, TTE_EXECPRM_INT, %g0 /* is execprm bit set */ bz,pn %icc, 4b /* if not, been here before */ ldn [%g6 + TSBMISS_TSBPTR4M], %g1 /* g1 = tsbp */ - brz,a,pn %g1, 5f /* no 2nd tsb */ + brlz,a,pn %g1, 5f /* no 2nd tsb */ mov %g3, %g5 mov MMU_TAG_ACCESS, %g7 @@ -3202,7 +3611,7 @@ tsb_user_itlb_synth: mov MMU_TAG_ACCESS, %g7 ldxa [%g7]ASI_IMMU, %g6 /* get tag access va */ GET_4M_PFN_OFF(%g3, %g6, %g5, %g7, 2) /* make 4M pfn offset */ - brz,a,pn %g1, 7f /* Check to see if we have 2nd TSB programmed */ + brlz,a,pn %g1, 7f /* Check to see if we have 2nd TSB programmed */ or %g5, %g3, %g5 /* add 4M bits to TTE */ mov ASI_N, %g7 /* user TSBs always accessed by VA */ @@ -3216,6 +3625,7 @@ tsb_user_itlb_synth: tsb_kernel: #ifdef sun4v + rdpr %tt, %g5 cmp %g7, TTE4M bge,pn %icc, 5f #else @@ -3228,7 +3638,7 @@ tsb_kernel: nop 5: ldn [%g6 + TSBMISS_TSBPTR4M], %g1 ! g1 = 4m tsbptr - brz,pn %g1, 3f /* skip programming if 4m TSB ptr is NULL */ + brlz,pn %g1, 3f /* skip programming if 4m TSB ptr is -1 */ nop 6: #ifndef sun4v @@ -3270,26 +3680,38 @@ tsb_ism: mov PTL1_BAD_ISM, %g1 /* g5 = pa of imap_vb_shift */ sub %g4, (IMAP_ISMHAT - IMAP_VB_SHIFT), %g5 - lduha [%g5]ASI_MEM, %g4 /* g4 = imap_vb_shift */ + lduba [%g5]ASI_MEM, %g4 /* g4 = imap_vb_shift */ srlx %g3, %g4, %g3 /* clr size field */ - set TAGACC_CTX_MASK, %g1 /* mask off ctx number/type */ - sllx %g3, %g4, %g3 /* g3 = ism vbase */ - and %g2, %g1, %g4 /* g4 = ctx number */ - andn %g2, %g1, %g1 /* g1 = tlb miss vaddr */ - sub %g1, %g3, %g2 /* g2 = offset in ISM seg */ - or %g2, %g4, %g2 /* g2 = (pseudo-)tagacc */ - + set TAGACC_CTX_MASK, %g1 /* mask off ctx number */ + sllx %g3, %g4, %g3 /* g3 = ism vbase */ + and %g2, %g1, %g4 /* g4 = ctx number */ + andn %g2, %g1, %g1 /* g1 = tlb miss vaddr */ + sub %g1, %g3, %g2 /* g2 = offset in ISM seg */ + or %g2, %g4, %g2 /* g2 = (pseudo-)tagacc */ + sub %g5, (IMAP_VB_SHIFT - IMAP_HATFLAGS), %g5 + lduha [%g5]ASI_MEM, %g4 /* g5 = pa of imap_hatflags */ +#ifdef sun4v + and %g4, HAT_CTX1_FLAG, %g5 /* g5 = imap_hatflags */ + brz,pt %g5, tsb_chk4M_ism + nop + ldub [%g6 + TSBMISS_URTTEFLAGS], %g5 + or %g5, HAT_CHKCTX1_FLAG, %g5 + stub %g5, [%g6 + TSBMISS_URTTEFLAGS] + rdpr %tt, %g5 + SAVE_CTX1(%g5, %g3, %g1, tsb_shctxl) +#endif /* * ISM pages are always locked down. * If we can't find the tte then pagefault - * and let the spt segment driver resovle it. + * and let the spt segment driver resolve it. * - * g2 = ISM vaddr (offset in ISM seg) + * g2 = tagacc w/ISM vaddr (offset in ISM seg) + * g4 = imap_hatflags * g6 = tsb miss area * g7 = ISM hatid */ - sub %g5, (IMAP_VB_SHIFT - IMAP_HATFLAGS), %g5 - lduha [%g5]ASI_MEM, %g4 /* g5 = pa of imap_hatflags */ + +tsb_chk4M_ism: and %g4, HAT_4M_FLAG, %g5 /* g4 = imap_hatflags */ brnz,pt %g5, tsb_ism_4M /* branch if 4M pages */ nop @@ -3309,8 +3731,8 @@ tsb_ism_32M: /* NOT REACHED */ tsb_ism_32M_found: - brlz,pt %g3, tsb_validtte - nop + brlz,a,pt %g3, tsb_validtte + rdpr %tt, %g7 ba,pt %xcc, tsb_ism_4M nop @@ -3327,8 +3749,8 @@ tsb_ism_256M: tsb_ism_4M) tsb_ism_256M_found: - brlz,pt %g3, tsb_validtte - nop + brlz,a,pt %g3, tsb_validtte + rdpr %tt, %g7 tsb_ism_4M: /* @@ -3340,8 +3762,8 @@ tsb_ism_4M: /* NOT REACHED */ tsb_ism_4M_found: - brlz,pt %g3, tsb_validtte - nop + brlz,a,pt %g3, tsb_validtte + rdpr %tt, %g7 tsb_ism_8K: /* @@ -3354,8 +3776,8 @@ tsb_ism_8K: /* NOT REACHED */ tsb_ism_8K_found: - brlz,pt %g3, tsb_validtte - nop + brlz,a,pt %g3, tsb_validtte + rdpr %tt, %g7 tsb_pagefault: rdpr %tt, %g7 @@ -3457,7 +3879,7 @@ tsb_protfault: add %g1, %g2, %g1 lduh [%g1 + CPUC_DTRACE_FLAGS], %g2 andcc %g2, CPU_DTRACE_NOFAULT, %g0 - bz sfmmu_pagefault + bz sfmmu_mmu_trap or %g2, CPU_DTRACE_BADADDR, %g2 stuh %g2, [%g1 + CPUC_DTRACE_FLAGS] GET_MMU_D_ADDR(%g3, %g4) @@ -3531,11 +3953,6 @@ tsb_protfault: ba,pt %icc, sfmmu_window_trap nop SET_SIZE(sfmmu_tsb_miss) - -#if (1<< TSBMISS_SHIFT) != TSBMISS_SIZE -#error - TSBMISS_SHIFT does not correspond to size of tsbmiss struct -#endif - #endif /* lint */ #if defined (lint) @@ -4442,8 +4859,8 @@ sfmmu_dslow_patch_ktsb4m_szcode: /* %g1 = first TSB entry ptr now, %g2 preserved */ GET_UTSBREG(SCRATCHPAD_UTSBREG2, %g3) /* get 2nd utsbreg */ - brlz,a,pt %g3, sfmmu_tsb_miss_tt /* done if no 2nd TSB */ - mov %g0, %g3 + brlz,pt %g3, sfmmu_tsb_miss_tt /* done if no 2nd TSB */ + nop GET_2ND_TSBE_PTR(%g2, %g3, %g4, %g5) /* %g3 = second TSB entry ptr now, %g2 preserved */ diff --git a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s index 9b110396b5..53c6392859 100644 --- a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s +++ b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -104,7 +104,7 @@ hash_done: * { * uint_t hmeshift = HME_HASH_SHIFT(rehash); * uint64_t bspage = HME_HASH_BSPAGE(va, hmeshift); - * return (rehash | (bspage << HTAG_REHASHSZ)); + * return (rehash | (bspage << HTAG_BSPAGE_SHIFT)); * } */ @@ -127,8 +127,10 @@ bspage: /* TTE_PAGE_SHIFT in %g5 */ \ sllx %g6, %g5, %g5; \ \ /* BSPAGE in %g5 */ \ - sllx %g5, HTAG_REHASHSZ, %g5; \ - or %g5, %g3, %g5 + sllx %g5, HTAG_BSPAGE_SHIFT, %g5; \ + sllx %g3, HTAG_REHASH_SHIFT, %g6; \ + or %g6, SFMMU_INVALID_SHMERID, %g6; \ + or %g5, %g6, %g5 /* * uint64_t diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c index 1a66900b63..eeb44a0dd5 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c @@ -84,6 +84,67 @@ #include <vm/xhat_sfmmu.h> #include <sys/fpu/fpusystm.h> #include <vm/mach_kpm.h> +#include <sys/callb.h> + +#ifdef DEBUG +#define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len) \ + if (SFMMU_IS_SHMERID_VALID(rid)) { \ + caddr_t _eaddr = (saddr) + (len); \ + sf_srd_t *_srdp; \ + sf_region_t *_rgnp; \ + ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \ + ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid)); \ + ASSERT((hat) != ksfmmup); \ + _srdp = (hat)->sfmmu_srdp; \ + ASSERT(_srdp != NULL); \ + ASSERT(_srdp->srd_refcnt != 0); \ + _rgnp = _srdp->srd_hmergnp[(rid)]; \ + ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid); \ + ASSERT(_rgnp->rgn_refcnt != 0); \ + ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \ + ASSERT((_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == \ + SFMMU_REGION_HME); \ + ASSERT((saddr) >= _rgnp->rgn_saddr); \ + ASSERT((saddr) < _rgnp->rgn_saddr + _rgnp->rgn_size); \ + ASSERT(_eaddr > _rgnp->rgn_saddr); \ + ASSERT(_eaddr <= _rgnp->rgn_saddr + _rgnp->rgn_size); \ + } + +#define SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid) \ +{ \ + caddr_t _hsva; \ + caddr_t _heva; \ + caddr_t _rsva; \ + caddr_t _reva; \ + int _ttesz = get_hblk_ttesz(hmeblkp); \ + int _flagtte; \ + ASSERT((srdp)->srd_refcnt != 0); \ + ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \ + ASSERT((rgnp)->rgn_id == rid); \ + ASSERT(!((rgnp)->rgn_flags & SFMMU_REGION_FREE)); \ + ASSERT(((rgnp)->rgn_flags & SFMMU_REGION_TYPE_MASK) == \ + SFMMU_REGION_HME); \ + ASSERT(_ttesz <= (rgnp)->rgn_pgszc); \ + _hsva = (caddr_t)get_hblk_base(hmeblkp); \ + _heva = get_hblk_endaddr(hmeblkp); \ + _rsva = (caddr_t)P2ALIGN( \ + (uintptr_t)(rgnp)->rgn_saddr, HBLK_MIN_BYTES); \ + _reva = (caddr_t)P2ROUNDUP( \ + (uintptr_t)((rgnp)->rgn_saddr + (rgnp)->rgn_size), \ + HBLK_MIN_BYTES); \ + ASSERT(_hsva >= _rsva); \ + ASSERT(_hsva < _reva); \ + ASSERT(_heva > _rsva); \ + ASSERT(_heva <= _reva); \ + _flagtte = (_ttesz < HBLK_MIN_TTESZ) ? HBLK_MIN_TTESZ : \ + _ttesz; \ + ASSERT(rgnp->rgn_hmeflags & (0x1 << _flagtte)); \ +} + +#else /* DEBUG */ +#define SFMMU_VALIDATE_HMERID(hat, rid, addr, len) +#define SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid) +#endif /* DEBUG */ #if defined(SF_ERRATA_57) extern caddr_t errata57_limit; @@ -166,6 +227,7 @@ static struct kmem_cache *mmuctxdom_cache; static struct kmem_cache *sfmmu_tsbinfo_cache; static struct kmem_cache *sfmmu_tsb8k_cache; static struct kmem_cache *sfmmu_tsb_cache[NLGRPS_MAX]; +static vmem_t *kmem_bigtsb_arena; static vmem_t *kmem_tsb_arena; /* @@ -185,11 +247,63 @@ static struct kmem_cache *ism_ment_cache; #define ISMID_STARTADDR NULL /* - * Whether to delay TLB flushes and use Cheetah's flush-all support - * when removing contexts from the dirty list. + * Region management data structures and function declarations. */ -int delay_tlb_flush; -int disable_delay_tlb_flush; + +static void sfmmu_leave_srd(sfmmu_t *); +static int sfmmu_srdcache_constructor(void *, void *, int); +static void sfmmu_srdcache_destructor(void *, void *); +static int sfmmu_rgncache_constructor(void *, void *, int); +static void sfmmu_rgncache_destructor(void *, void *); +static int sfrgnmap_isnull(sf_region_map_t *); +static int sfhmergnmap_isnull(sf_hmeregion_map_t *); +static int sfmmu_scdcache_constructor(void *, void *, int); +static void sfmmu_scdcache_destructor(void *, void *); +static void sfmmu_rgn_cb_noop(caddr_t, caddr_t, caddr_t, + size_t, void *, u_offset_t); + +static uint_t srd_hashmask = SFMMU_MAX_SRD_BUCKETS - 1; +static sf_srd_bucket_t *srd_buckets; +static struct kmem_cache *srd_cache; +static uint_t srd_rgn_hashmask = SFMMU_MAX_REGION_BUCKETS - 1; +static struct kmem_cache *region_cache; +static struct kmem_cache *scd_cache; + +#ifdef sun4v +int use_bigtsb_arena = 1; +#else +int use_bigtsb_arena = 0; +#endif + +/* External /etc/system tunable, for turning on&off the shctx support */ +int disable_shctx = 0; +/* Internal variable, set by MD if the HW supports shctx feature */ +int shctx_on = 0; + +#ifdef DEBUG +static void check_scd_sfmmu_list(sfmmu_t **, sfmmu_t *, int); +#endif +static void sfmmu_to_scd_list(sfmmu_t **, sfmmu_t *); +static void sfmmu_from_scd_list(sfmmu_t **, sfmmu_t *); + +static sf_scd_t *sfmmu_alloc_scd(sf_srd_t *, sf_region_map_t *); +static void sfmmu_find_scd(sfmmu_t *); +static void sfmmu_join_scd(sf_scd_t *, sfmmu_t *); +static void sfmmu_finish_join_scd(sfmmu_t *); +static void sfmmu_leave_scd(sfmmu_t *, uchar_t); +static void sfmmu_destroy_scd(sf_srd_t *, sf_scd_t *, sf_region_map_t *); +static int sfmmu_alloc_scd_tsbs(sf_srd_t *, sf_scd_t *); +static void sfmmu_free_scd_tsbs(sfmmu_t *); +static void sfmmu_tsb_inv_ctx(sfmmu_t *); +static int find_ism_rid(sfmmu_t *, sfmmu_t *, caddr_t, uint_t *); +static void sfmmu_ism_hatflags(sfmmu_t *, int); +static int sfmmu_srd_lock_held(sf_srd_t *); +static void sfmmu_remove_scd(sf_scd_t **, sf_scd_t *); +static void sfmmu_add_scd(sf_scd_t **headp, sf_scd_t *); +static void sfmmu_link_scd_to_regions(sf_srd_t *, sf_scd_t *); +static void sfmmu_unlink_scd_from_regions(sf_srd_t *, sf_scd_t *); +static void sfmmu_link_to_hmeregion(sfmmu_t *, sf_region_t *); +static void sfmmu_unlink_from_hmeregion(sfmmu_t *, sf_region_t *); /* * ``hat_lock'' is a hashed mutex lock for protecting sfmmu TSB lists, @@ -279,7 +393,8 @@ int hat_check_vtop = 0; */ static struct hme_blk *sfmmu_shadow_hcreate(sfmmu_t *, caddr_t, int, uint_t); static struct hme_blk *sfmmu_hblk_alloc(sfmmu_t *, caddr_t, - struct hmehash_bucket *, uint_t, hmeblk_tag, uint_t); + struct hmehash_bucket *, uint_t, hmeblk_tag, uint_t, + uint_t); static caddr_t sfmmu_hblk_unload(struct hat *, struct hme_blk *, caddr_t, caddr_t, demap_range_t *, uint_t); static caddr_t sfmmu_hblk_sync(struct hat *, struct hme_blk *, caddr_t, @@ -295,22 +410,27 @@ static int sfmmu_steal_this_hblk(struct hmehash_bucket *, struct hme_blk *); static caddr_t sfmmu_hblk_unlock(struct hme_blk *, caddr_t, caddr_t); +static void hat_do_memload_array(struct hat *, caddr_t, size_t, + struct page **, uint_t, uint_t, uint_t); +static void hat_do_memload(struct hat *, caddr_t, struct page *, + uint_t, uint_t, uint_t); static void sfmmu_memload_batchsmall(struct hat *, caddr_t, page_t **, - uint_t, uint_t, pgcnt_t); + uint_t, uint_t, pgcnt_t, uint_t); void sfmmu_tteload(struct hat *, tte_t *, caddr_t, page_t *, uint_t); static int sfmmu_tteload_array(sfmmu_t *, tte_t *, caddr_t, page_t **, - uint_t); + uint_t, uint_t); static struct hmehash_bucket *sfmmu_tteload_acquire_hashbucket(sfmmu_t *, - caddr_t, int); + caddr_t, int, uint_t); static struct hme_blk *sfmmu_tteload_find_hmeblk(sfmmu_t *, - struct hmehash_bucket *, caddr_t, uint_t, uint_t); + struct hmehash_bucket *, caddr_t, uint_t, uint_t, + uint_t); static int sfmmu_tteload_addentry(sfmmu_t *, struct hme_blk *, tte_t *, - caddr_t, page_t **, uint_t); + caddr_t, page_t **, uint_t, uint_t); static void sfmmu_tteload_release_hashbucket(struct hmehash_bucket *); static int sfmmu_pagearray_setup(caddr_t, page_t **, tte_t *, int); -pfn_t sfmmu_uvatopfn(caddr_t, sfmmu_t *); +static pfn_t sfmmu_uvatopfn(caddr_t, sfmmu_t *, tte_t *); void sfmmu_memtte(tte_t *, pfn_t, uint_t, int); #ifdef VAC static void sfmmu_vac_conflict(struct hat *, caddr_t, page_t *); @@ -322,7 +442,6 @@ void conv_tnc(page_t *pp, int); static void sfmmu_get_ctx(sfmmu_t *); static void sfmmu_free_sfmmu(sfmmu_t *); -static void sfmmu_gettte(struct hat *, caddr_t, tte_t *); static void sfmmu_ttesync(struct hat *, caddr_t, tte_t *, page_t *); static void sfmmu_chgattr(struct hat *, caddr_t, size_t, uint_t, int); @@ -334,6 +453,8 @@ void sfmmu_page_cache_array(page_t *, int, int, pgcnt_t); static void sfmmu_page_cache(page_t *, int, int, int); #endif +cpuset_t sfmmu_rgntlb_demap(caddr_t, sf_region_t *, + struct hme_blk *, int); static void sfmmu_tlbcache_demap(caddr_t, sfmmu_t *, struct hme_blk *, pfn_t, int, int, int, int); static void sfmmu_ismtlbcache_demap(caddr_t, sfmmu_t *, struct hme_blk *, @@ -350,7 +471,7 @@ static void sfmmu_tsb_free(struct tsb_info *); static void sfmmu_tsbinfo_free(struct tsb_info *); static int sfmmu_init_tsbinfo(struct tsb_info *, int, int, uint_t, sfmmu_t *); - +static void sfmmu_tsb_chk_reloc(sfmmu_t *, hatlock_t *); static void sfmmu_tsb_swapin(sfmmu_t *, hatlock_t *); static int sfmmu_select_tsb_szc(pgcnt_t); static void sfmmu_mod_tsb(sfmmu_t *, caddr_t, tte_t *, int); @@ -383,20 +504,24 @@ static void sfmmu_hblkcache_reclaim(void *); static void sfmmu_shadow_hcleanup(sfmmu_t *, struct hme_blk *, struct hmehash_bucket *); static void sfmmu_free_hblks(sfmmu_t *, caddr_t, caddr_t, int); +static void sfmmu_cleanup_rhblk(sf_srd_t *, caddr_t, uint_t, int); +static void sfmmu_unload_hmeregion_va(sf_srd_t *, uint_t, caddr_t, caddr_t, + int, caddr_t *); +static void sfmmu_unload_hmeregion(sf_srd_t *, sf_region_t *); + static void sfmmu_rm_large_mappings(page_t *, int); static void hat_lock_init(void); static void hat_kstat_init(void); static int sfmmu_kstat_percpu_update(kstat_t *ksp, int rw); +static void sfmmu_set_scd_rttecnt(sf_srd_t *, sf_scd_t *); +static int sfmmu_is_rgnva(sf_srd_t *, caddr_t, ulong_t, ulong_t); static void sfmmu_check_page_sizes(sfmmu_t *, int); int fnd_mapping_sz(page_t *); static void iment_add(struct ism_ment *, struct hat *); static void iment_sub(struct ism_ment *, struct hat *); static pgcnt_t ism_tsb_entries(sfmmu_t *, int szc); extern void sfmmu_setup_tsbinfo(sfmmu_t *); -#ifdef sun4v -extern void sfmmu_invalidate_tsbinfo(sfmmu_t *); -#endif /* sun4v */ extern void sfmmu_clear_utsbinfo(void); static void sfmmu_ctx_wrap_around(mmu_ctx_t *); @@ -466,6 +591,7 @@ caddr_t utsb4m_vabase; /* for trap handler TSB accesses */ #endif /* sun4v */ uint64_t tsb_alloc_bytes = 0; /* bytes allocated to TSBs */ vmem_t *kmem_tsb_default_arena[NLGRPS_MAX]; /* For dynamic TSBs */ +vmem_t *kmem_bigtsb_default_arena[NLGRPS_MAX]; /* dynamic 256M TSBs */ /* * Size to use for TSB slabs. Future platforms that support page sizes @@ -473,13 +599,24 @@ vmem_t *kmem_tsb_default_arena[NLGRPS_MAX]; /* For dynamic TSBs */ * assembly macros for building and decoding the TSB base register contents. * Note disable_large_pages will override the value set here. */ -uint_t tsb_slab_ttesz = TTE4M; -uint_t tsb_slab_size; -uint_t tsb_slab_shift; -uint_t tsb_slab_mask; /* PFN mask for TTE */ +static uint_t tsb_slab_ttesz = TTE4M; +size_t tsb_slab_size = MMU_PAGESIZE4M; +uint_t tsb_slab_shift = MMU_PAGESHIFT4M; +/* PFN mask for TTE */ +size_t tsb_slab_mask = MMU_PAGEOFFSET4M >> MMU_PAGESHIFT; + +/* + * Size to use for TSB slabs. These are used only when 256M tsb arenas + * exist. + */ +static uint_t bigtsb_slab_ttesz = TTE256M; +static size_t bigtsb_slab_size = MMU_PAGESIZE256M; +static uint_t bigtsb_slab_shift = MMU_PAGESHIFT256M; +/* 256M page alignment for 8K pfn */ +static size_t bigtsb_slab_mask = MMU_PAGEOFFSET256M >> MMU_PAGESHIFT; /* largest TSB size to grow to, will be smaller on smaller memory systems */ -int tsb_max_growsize = UTSB_MAX_SZCODE; +static int tsb_max_growsize = 0; /* * Tunable parameters dealing with TSB policies. @@ -546,7 +683,12 @@ int tsb_remap_ttes = 1; * assumed to have at least 8 available entries. Platforms with a * larger fully-associative TLB could probably override the default. */ + +#ifdef sun4v +int tsb_sectsb_threshold = 0; +#else int tsb_sectsb_threshold = 8; +#endif /* * kstat data @@ -689,51 +831,44 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag) * 2) TSBs can't grow larger than UTSB_MAX_SZCODE. */ #define SFMMU_SET_TSB_MAX_GROWSIZE(pages) { \ - int i, szc; \ + int _i, _szc, _slabszc, _tsbszc; \ \ - i = highbit(pages); \ - if ((1 << (i - 1)) == (pages)) \ - i--; /* 2^n case, round down */ \ - szc = i - TSB_START_SIZE; \ - if (szc < tsb_max_growsize) \ - tsb_max_growsize = szc; \ - else if ((szc > tsb_max_growsize) && \ - (szc <= tsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT))) \ - tsb_max_growsize = MIN(szc, UTSB_MAX_SZCODE); \ + _i = highbit(pages); \ + if ((1 << (_i - 1)) == (pages)) \ + _i--; /* 2^n case, round down */ \ + _szc = _i - TSB_START_SIZE; \ + _slabszc = bigtsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT); \ + _tsbszc = MIN(_szc, _slabszc); \ + tsb_max_growsize = MIN(_tsbszc, UTSB_MAX_SZCODE); \ } /* * Given a pointer to an sfmmu and a TTE size code, return a pointer to the * tsb_info which handles that TTE size. */ -#define SFMMU_GET_TSBINFO(tsbinfop, sfmmup, tte_szc) \ +#define SFMMU_GET_TSBINFO(tsbinfop, sfmmup, tte_szc) { \ (tsbinfop) = (sfmmup)->sfmmu_tsb; \ - ASSERT(sfmmu_hat_lock_held(sfmmup)); \ - if ((tte_szc) >= TTE4M) \ - (tsbinfop) = (tsbinfop)->tsb_next; - -/* - * Return the number of mappings present in the HAT - * for a particular process and page size. - */ -#define SFMMU_TTE_CNT(sfmmup, szc) \ - (sfmmup)->sfmmu_iblk? \ - (sfmmup)->sfmmu_ismttecnt[(szc)] + \ - (sfmmup)->sfmmu_ttecnt[(szc)] : \ - (sfmmup)->sfmmu_ttecnt[(szc)]; + ASSERT(((tsbinfop)->tsb_flags & TSB_SHAREDCTX) || \ + sfmmu_hat_lock_held(sfmmup)); \ + if ((tte_szc) >= TTE4M) { \ + ASSERT((tsbinfop) != NULL); \ + (tsbinfop) = (tsbinfop)->tsb_next; \ + } \ +} /* * Macro to use to unload entries from the TSB. * It has knowledge of which page sizes get replicated in the TSB * and will call the appropriate unload routine for the appropriate size. */ -#define SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp) \ +#define SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, ismhat) \ { \ int ttesz = get_hblk_ttesz(hmeblkp); \ if (ttesz == TTE8K || ttesz == TTE4M) { \ sfmmu_unload_tsb(sfmmup, addr, ttesz); \ } else { \ - caddr_t sva = (caddr_t)get_hblk_base(hmeblkp); \ + caddr_t sva = ismhat ? addr : \ + (caddr_t)get_hblk_base(hmeblkp); \ caddr_t eva = sva + get_hblk_span(hmeblkp); \ ASSERT(addr >= sva && addr < eva); \ sfmmu_unload_tsb_range(sfmmup, sva, eva, ttesz); \ @@ -744,7 +879,7 @@ sfmmu_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag) /* Update tsb_alloc_hiwater after memory is configured. */ /*ARGSUSED*/ static void -sfmmu_update_tsb_post_add(void *arg, pgcnt_t delta_pages) +sfmmu_update_post_add(void *arg, pgcnt_t delta_pages) { /* Assumes physmem has already been updated. */ SFMMU_SET_TSB_ALLOC_HIWATER(physmem); @@ -758,7 +893,7 @@ sfmmu_update_tsb_post_add(void *arg, pgcnt_t delta_pages) */ /*ARGSUSED*/ static int -sfmmu_update_tsb_pre_del(void *arg, pgcnt_t delta_pages) +sfmmu_update_pre_del(void *arg, pgcnt_t delta_pages) { return (0); } @@ -766,7 +901,7 @@ sfmmu_update_tsb_pre_del(void *arg, pgcnt_t delta_pages) /* Update tsb_alloc_hiwater after memory fails to be unconfigured. */ /*ARGSUSED*/ static void -sfmmu_update_tsb_post_del(void *arg, pgcnt_t delta_pages, int cancelled) +sfmmu_update_post_del(void *arg, pgcnt_t delta_pages, int cancelled) { /* * Whether the delete was cancelled or not, just go ahead and update @@ -776,11 +911,11 @@ sfmmu_update_tsb_post_del(void *arg, pgcnt_t delta_pages, int cancelled) SFMMU_SET_TSB_MAX_GROWSIZE(physmem); } -static kphysm_setup_vector_t sfmmu_update_tsb_vec = { +static kphysm_setup_vector_t sfmmu_update_vec = { KPHYSM_SETUP_VECTOR_VERSION, /* version */ - sfmmu_update_tsb_post_add, /* post_add */ - sfmmu_update_tsb_pre_del, /* pre_del */ - sfmmu_update_tsb_post_del /* post_del */ + sfmmu_update_post_add, /* post_add */ + sfmmu_update_pre_del, /* pre_del */ + sfmmu_update_post_del /* post_del */ }; @@ -936,7 +1071,6 @@ hat_init(void) { int i; uint_t sz; - uint_t maxtsb; size_t size; hat_lock_init(); @@ -1048,7 +1182,25 @@ hat_init(void) } SFMMU_SET_TSB_ALLOC_HIWATER(physmem); - /* Set tsb_max_growsize. */ + for (sz = tsb_slab_ttesz; sz > 0; sz--) { + if (!(disable_large_pages & (1 << sz))) + break; + } + + if (sz < tsb_slab_ttesz) { + tsb_slab_ttesz = sz; + tsb_slab_shift = MMU_PAGESHIFT + (sz << 1) + sz; + tsb_slab_size = 1 << tsb_slab_shift; + tsb_slab_mask = (1 << (tsb_slab_shift - MMU_PAGESHIFT)) - 1; + use_bigtsb_arena = 0; + } else if (use_bigtsb_arena && + (disable_large_pages & (1 << bigtsb_slab_ttesz))) { + use_bigtsb_arena = 0; + } + + if (!use_bigtsb_arena) { + bigtsb_slab_shift = tsb_slab_shift; + } SFMMU_SET_TSB_MAX_GROWSIZE(physmem); /* @@ -1059,28 +1211,28 @@ hat_init(void) * The trap handlers need to be patched with the final slab shift, * since they need to be able to construct the TSB pointer at runtime. */ - if (tsb_max_growsize <= TSB_512K_SZCODE) + if ((tsb_max_growsize <= TSB_512K_SZCODE) && + !(disable_large_pages & (1 << TTE512K))) { tsb_slab_ttesz = TTE512K; - - for (sz = tsb_slab_ttesz; sz > 0; sz--) { - if (!(disable_large_pages & (1 << sz))) - break; + tsb_slab_shift = MMU_PAGESHIFT512K; + tsb_slab_size = MMU_PAGESIZE512K; + tsb_slab_mask = MMU_PAGEOFFSET512K >> MMU_PAGESHIFT; + use_bigtsb_arena = 0; } - tsb_slab_ttesz = sz; - tsb_slab_shift = MMU_PAGESHIFT + (sz << 1) + sz; - tsb_slab_size = 1 << tsb_slab_shift; - tsb_slab_mask = (1 << (tsb_slab_shift - MMU_PAGESHIFT)) - 1; + if (!use_bigtsb_arena) { + bigtsb_slab_ttesz = tsb_slab_ttesz; + bigtsb_slab_shift = tsb_slab_shift; + bigtsb_slab_size = tsb_slab_size; + bigtsb_slab_mask = tsb_slab_mask; + } - maxtsb = tsb_slab_shift - (TSB_START_SIZE + TSB_ENTRY_SHIFT); - if (tsb_max_growsize > maxtsb) - tsb_max_growsize = maxtsb; /* * Set up memory callback to update tsb_alloc_hiwater and * tsb_max_growsize. */ - i = kphysm_setup_func_register(&sfmmu_update_tsb_vec, (void *) 0); + i = kphysm_setup_func_register(&sfmmu_update_vec, (void *) 0); ASSERT(i == 0); /* @@ -1099,30 +1251,56 @@ hat_init(void) * because vmem_create doesn't allow us to specify alignment * requirements. If this ever changes the code could be * simplified to use only one level of arenas. + * + * If 256M page support exists on sun4v, 256MB kmem_bigtsb_arena + * will be provided in addition to the 4M kmem_tsb_arena. */ + if (use_bigtsb_arena) { + kmem_bigtsb_arena = vmem_create("kmem_bigtsb", NULL, 0, + bigtsb_slab_size, sfmmu_vmem_xalloc_aligned_wrapper, + vmem_xfree, heap_arena, 0, VM_SLEEP); + } + kmem_tsb_arena = vmem_create("kmem_tsb", NULL, 0, tsb_slab_size, - sfmmu_vmem_xalloc_aligned_wrapper, vmem_xfree, heap_arena, - 0, VM_SLEEP); + sfmmu_vmem_xalloc_aligned_wrapper, + vmem_xfree, heap_arena, 0, VM_SLEEP); if (tsb_lgrp_affinity) { char s[50]; for (i = 0; i < NLGRPS_MAX; i++) { + if (use_bigtsb_arena) { + (void) sprintf(s, "kmem_bigtsb_lgrp%d", i); + kmem_bigtsb_default_arena[i] = vmem_create(s, + NULL, 0, 2 * tsb_slab_size, + sfmmu_tsb_segkmem_alloc, + sfmmu_tsb_segkmem_free, kmem_bigtsb_arena, + 0, VM_SLEEP | VM_BESTFIT); + } + (void) sprintf(s, "kmem_tsb_lgrp%d", i); - kmem_tsb_default_arena[i] = - vmem_create(s, NULL, 0, PAGESIZE, - sfmmu_tsb_segkmem_alloc, sfmmu_tsb_segkmem_free, - kmem_tsb_arena, 0, VM_SLEEP | VM_BESTFIT); + kmem_tsb_default_arena[i] = vmem_create(s, + NULL, 0, PAGESIZE, sfmmu_tsb_segkmem_alloc, + sfmmu_tsb_segkmem_free, kmem_tsb_arena, 0, + VM_SLEEP | VM_BESTFIT); + (void) sprintf(s, "sfmmu_tsb_lgrp%d_cache", i); - sfmmu_tsb_cache[i] = kmem_cache_create(s, PAGESIZE, - PAGESIZE, NULL, NULL, NULL, NULL, + sfmmu_tsb_cache[i] = kmem_cache_create(s, + PAGESIZE, PAGESIZE, NULL, NULL, NULL, NULL, kmem_tsb_default_arena[i], 0); } } else { + if (use_bigtsb_arena) { + kmem_bigtsb_default_arena[0] = + vmem_create("kmem_bigtsb_default", NULL, 0, + 2 * tsb_slab_size, sfmmu_tsb_segkmem_alloc, + sfmmu_tsb_segkmem_free, kmem_bigtsb_arena, 0, + VM_SLEEP | VM_BESTFIT); + } + kmem_tsb_default_arena[0] = vmem_create("kmem_tsb_default", NULL, 0, PAGESIZE, sfmmu_tsb_segkmem_alloc, sfmmu_tsb_segkmem_free, kmem_tsb_arena, 0, VM_SLEEP | VM_BESTFIT); - sfmmu_tsb_cache[0] = kmem_cache_create("sfmmu_tsb_cache", PAGESIZE, PAGESIZE, NULL, NULL, NULL, NULL, kmem_tsb_default_arena[0], 0); @@ -1203,6 +1381,26 @@ hat_init(void) mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX); + srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS * + sizeof (srd_buckets[0]), KM_SLEEP); + for (i = 0; i < SFMMU_MAX_SRD_BUCKETS; i++) { + mutex_init(&srd_buckets[i].srdb_lock, NULL, MUTEX_DEFAULT, + NULL); + } + /* + * 64 byte alignment is required in order to isolate certain field + * into its own cacheline. + */ + srd_cache = kmem_cache_create("srd_cache", sizeof (sf_srd_t), 64, + sfmmu_srdcache_constructor, sfmmu_srdcache_destructor, + NULL, NULL, NULL, 0); + region_cache = kmem_cache_create("region_cache", + sizeof (sf_region_t), 0, sfmmu_rgncache_constructor, + sfmmu_rgncache_destructor, NULL, NULL, NULL, 0); + scd_cache = kmem_cache_create("scd_cache", sizeof (sf_scd_t), 0, + sfmmu_scdcache_constructor, sfmmu_scdcache_destructor, + NULL, NULL, NULL, 0); + /* * Pre-allocate hrm_hashtab before enabling the collection of * refmod statistics. Allocating on the fly would mean us @@ -1263,6 +1461,8 @@ hat_alloc(struct as *as) sfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP); sfmmup->sfmmu_as = as; sfmmup->sfmmu_flags = 0; + sfmmup->sfmmu_tteflags = 0; + sfmmup->sfmmu_rtteflags = 0; LOCK_INIT_CLEAR(&sfmmup->sfmmu_ctx_lock); if (as == &kas) { @@ -1303,7 +1503,7 @@ hat_alloc(struct as *as) (void) sfmmu_tsbinfo_alloc(&sfmmup->sfmmu_tsb, default_tsb_size, TSB8K|TSB64K|TSB512K, 0, sfmmup); - sfmmup->sfmmu_flags = HAT_SWAPPED; + sfmmup->sfmmu_flags = HAT_SWAPPED | HAT_ALLCTX_INVALID; ASSERT(sfmmup->sfmmu_tsb != NULL); } @@ -1313,15 +1513,17 @@ hat_alloc(struct as *as) sfmmup->sfmmu_ctxs[i].gnum = 0; } - sfmmu_setup_tsbinfo(sfmmup); for (i = 0; i < max_mmu_page_sizes; i++) { sfmmup->sfmmu_ttecnt[i] = 0; + sfmmup->sfmmu_scdrttecnt[i] = 0; sfmmup->sfmmu_ismttecnt[i] = 0; + sfmmup->sfmmu_scdismttecnt[i] = 0; sfmmup->sfmmu_pgsz[i] = TTE8K; } - + sfmmup->sfmmu_tsb0_4minflcnt = 0; sfmmup->sfmmu_iblk = NULL; sfmmup->sfmmu_ismhat = 0; + sfmmup->sfmmu_scdhat = 0; sfmmup->sfmmu_ismblkpa = (uint64_t)-1; if (sfmmup == ksfmmup) { CPUSET_ALL(sfmmup->sfmmu_cpusran); @@ -1333,6 +1535,12 @@ hat_alloc(struct as *as) sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart; sfmmup->sfmmu_xhat_provider = NULL; cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL); + sfmmup->sfmmu_srdp = NULL; + SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map); + bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE); + sfmmup->sfmmu_scdp = NULL; + sfmmup->sfmmu_scd_link.next = NULL; + sfmmup->sfmmu_scd_link.prev = NULL; return (sfmmup); } @@ -1531,11 +1739,11 @@ hat_setup(struct hat *sfmmup, int allocflag) kpreempt_disable(); CPUSET_ADD(sfmmup->sfmmu_cpusran, CPU->cpu_id); - /* * sfmmu_setctx_sec takes <pgsz|cnum> as a parameter, * pagesize bits don't matter in this case since we are passing * INVALID_CONTEXT to it. + * Compatibility Note: hw takes care of MMU_SCONTEXT1 */ sfmmu_setctx_sec(INVALID_CONTEXT); sfmmu_clear_utsbinfo(); @@ -1557,6 +1765,11 @@ hat_free_start(struct hat *sfmmup) ASSERT(sfmmup->sfmmu_xhat_provider == NULL); sfmmup->sfmmu_free = 1; + if (sfmmup->sfmmu_scdp != NULL) { + sfmmu_leave_scd(sfmmup, 0); + } + + ASSERT(sfmmup->sfmmu_scdp == NULL); } void @@ -1565,20 +1778,13 @@ hat_free_end(struct hat *sfmmup) int i; ASSERT(sfmmup->sfmmu_xhat_provider == NULL); - if (sfmmup->sfmmu_ismhat) { - for (i = 0; i < mmu_page_sizes; i++) { - sfmmup->sfmmu_ttecnt[i] = 0; - sfmmup->sfmmu_ismttecnt[i] = 0; - } - } else { - /* EMPTY */ - ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0); - ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0); - ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0); - ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0); - ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0); - ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0); - } + ASSERT(sfmmup->sfmmu_free == 1); + ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0); + ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0); + ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0); + ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0); + ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0); + ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0); if (sfmmup->sfmmu_rmstat) { hat_freestat(sfmmup->sfmmu_as, NULL); @@ -1589,8 +1795,26 @@ hat_free_end(struct hat *sfmmup) sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb); sfmmup->sfmmu_tsb = next; } + + if (sfmmup->sfmmu_srdp != NULL) { + sfmmu_leave_srd(sfmmup); + ASSERT(sfmmup->sfmmu_srdp == NULL); + for (i = 0; i < SFMMU_L1_HMERLINKS; i++) { + if (sfmmup->sfmmu_hmeregion_links[i] != NULL) { + kmem_free(sfmmup->sfmmu_hmeregion_links[i], + SFMMU_L2_HMERLINKS_SIZE); + sfmmup->sfmmu_hmeregion_links[i] = NULL; + } + } + } sfmmu_free_sfmmu(sfmmup); +#ifdef DEBUG + for (i = 0; i < SFMMU_L1_HMERLINKS; i++) { + ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL); + } +#endif + kmem_cache_free(sfmmuid_cache, sfmmup); } @@ -1658,6 +1882,7 @@ hat_swapout(struct hat *sfmmup) if ((hmeblkp->hblk_tag.htag_id == sfmmup) && !hmeblkp->hblk_shw_bit && !hmeblkp->hblk_lckcnt) { + ASSERT(!hmeblkp->hblk_shared); (void) sfmmu_hblk_unload(sfmmup, hmeblkp, (caddr_t)get_hblk_base(hmeblkp), get_hblk_endaddr(hmeblkp), @@ -1739,11 +1964,6 @@ hat_swapout(struct hat *sfmmup) tsbinfop->tsb_tte.ll = 0; } -#ifdef sun4v - if (freelist) - sfmmu_invalidate_tsbinfo(sfmmup); -#endif /* sun4v */ - /* Now we can drop the lock and free the TSB memory. */ sfmmu_hat_exit(hatlockp); for (; freelist != NULL; freelist = next) { @@ -1760,14 +1980,61 @@ int hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len, uint_t flag) { + sf_srd_t *srdp; + sf_scd_t *scdp; + int i; extern uint_t get_color_start(struct as *); ASSERT(hat->sfmmu_xhat_provider == NULL); - ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW)); + ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) || + (flag == HAT_DUP_SRD)); + ASSERT(hat != ksfmmup); + ASSERT(newhat != ksfmmup); + ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp); if (flag == HAT_DUP_COW) { panic("hat_dup: HAT_DUP_COW not supported"); } + + if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) { + ASSERT(srdp->srd_evp != NULL); + VN_HOLD(srdp->srd_evp); + ASSERT(srdp->srd_refcnt > 0); + newhat->sfmmu_srdp = srdp; + atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1); + } + + /* + * HAT_DUP_ALL flag is used after as duplication is done. + */ + if (flag == HAT_DUP_ALL && ((srdp = newhat->sfmmu_srdp) != NULL)) { + ASSERT(newhat->sfmmu_srdp->srd_refcnt >= 2); + newhat->sfmmu_rtteflags = hat->sfmmu_rtteflags; + if (hat->sfmmu_flags & HAT_4MTEXT_FLAG) { + newhat->sfmmu_flags |= HAT_4MTEXT_FLAG; + } + + /* check if need to join scd */ + if ((scdp = hat->sfmmu_scdp) != NULL && + newhat->sfmmu_scdp != scdp) { + int ret; + SF_RGNMAP_IS_SUBSET(&newhat->sfmmu_region_map, + &scdp->scd_region_map, ret); + ASSERT(ret); + sfmmu_join_scd(scdp, newhat); + ASSERT(newhat->sfmmu_scdp == scdp && + scdp->scd_refcnt >= 2); + for (i = 0; i < max_mmu_page_sizes; i++) { + newhat->sfmmu_ismttecnt[i] = + hat->sfmmu_ismttecnt[i]; + newhat->sfmmu_scdismttecnt[i] = + hat->sfmmu_scdismttecnt[i]; + } + } + + sfmmu_check_page_sizes(newhat, 1); + } + if (flag == HAT_DUP_ALL && consistent_coloring == 0 && update_proc_pgcolorbase_after_fork != 0) { hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as); @@ -1775,14 +2042,38 @@ hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len, return (0); } +void +hat_memload(struct hat *hat, caddr_t addr, struct page *pp, + uint_t attr, uint_t flags) +{ + hat_do_memload(hat, addr, pp, attr, flags, + SFMMU_INVALID_SHMERID); +} + +void +hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp, + uint_t attr, uint_t flags, hat_region_cookie_t rcookie) +{ + uint_t rid; + if (rcookie == HAT_INVALID_REGION_COOKIE || + hat->sfmmu_xhat_provider != NULL) { + hat_do_memload(hat, addr, pp, attr, flags, + SFMMU_INVALID_SHMERID); + return; + } + rid = (uint_t)((uint64_t)rcookie); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + hat_do_memload(hat, addr, pp, attr, flags, rid); +} + /* * Set up addr to map to page pp with protection prot. * As an optimization we also load the TSB with the * corresponding tte but it is no big deal if the tte gets kicked out. */ -void -hat_memload(struct hat *hat, caddr_t addr, struct page *pp, - uint_t attr, uint_t flags) +static void +hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp, + uint_t attr, uint_t flags, uint_t rid) { tte_t tte; @@ -1792,6 +2083,7 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp, ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET)); ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG)); ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR)); + SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE); if (PP_ISFREE(pp)) { panic("hat_memload: loading a mapping to free page %p", @@ -1799,6 +2091,8 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp, } if (hat->sfmmu_xhat_provider) { + /* no regions for xhats */ + ASSERT(!SFMMU_IS_SHMERID_VALID(rid)); XHAT_MEMLOAD(hat, addr, pp, attr, flags); return; } @@ -1824,7 +2118,7 @@ hat_memload(struct hat *hat, caddr_t addr, struct page *pp, #endif sfmmu_memtte(&tte, pp->p_pagenum, attr, TTE8K); - (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags); + (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags, rid); /* * Check TSB and TLB page sizes. @@ -1931,7 +2225,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, if (!use_lgpg) { sfmmu_memtte(&tte, pfn, attr, TTE8K); (void) sfmmu_tteload_array(hat, &tte, addr, &pp, - flags); + flags, SFMMU_INVALID_SHMERID); len -= MMU_PAGESIZE; addr += MMU_PAGESIZE; pfn++; @@ -1947,7 +2241,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, !(mmu_ptob(pfn) & MMU_PAGEOFFSET4M)) { sfmmu_memtte(&tte, pfn, attr, TTE4M); (void) sfmmu_tteload_array(hat, &tte, addr, &pp, - flags); + flags, SFMMU_INVALID_SHMERID); len -= MMU_PAGESIZE4M; addr += MMU_PAGESIZE4M; pfn += MMU_PAGESIZE4M / MMU_PAGESIZE; @@ -1957,7 +2251,7 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, !(mmu_ptob(pfn) & MMU_PAGEOFFSET512K)) { sfmmu_memtte(&tte, pfn, attr, TTE512K); (void) sfmmu_tteload_array(hat, &tte, addr, &pp, - flags); + flags, SFMMU_INVALID_SHMERID); len -= MMU_PAGESIZE512K; addr += MMU_PAGESIZE512K; pfn += MMU_PAGESIZE512K / MMU_PAGESIZE; @@ -1967,14 +2261,14 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, !(mmu_ptob(pfn) & MMU_PAGEOFFSET64K)) { sfmmu_memtte(&tte, pfn, attr, TTE64K); (void) sfmmu_tteload_array(hat, &tte, addr, &pp, - flags); + flags, SFMMU_INVALID_SHMERID); len -= MMU_PAGESIZE64K; addr += MMU_PAGESIZE64K; pfn += MMU_PAGESIZE64K / MMU_PAGESIZE; } else { sfmmu_memtte(&tte, pfn, attr, TTE8K); (void) sfmmu_tteload_array(hat, &tte, addr, &pp, - flags); + flags, SFMMU_INVALID_SHMERID); len -= MMU_PAGESIZE; addr += MMU_PAGESIZE; pfn++; @@ -1989,6 +2283,31 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, } } +void +hat_memload_array(struct hat *hat, caddr_t addr, size_t len, + struct page **pps, uint_t attr, uint_t flags) +{ + hat_do_memload_array(hat, addr, len, pps, attr, flags, + SFMMU_INVALID_SHMERID); +} + +void +hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len, + struct page **pps, uint_t attr, uint_t flags, + hat_region_cookie_t rcookie) +{ + uint_t rid; + if (rcookie == HAT_INVALID_REGION_COOKIE || + hat->sfmmu_xhat_provider != NULL) { + hat_do_memload_array(hat, addr, len, pps, attr, flags, + SFMMU_INVALID_SHMERID); + return; + } + rid = (uint_t)((uint64_t)rcookie); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + hat_do_memload_array(hat, addr, len, pps, attr, flags, rid); +} + /* * Map the largest extend possible out of the page array. The array may NOT * be in order. The largest possible mapping a page can have @@ -2000,9 +2319,9 @@ hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn, * should consist of properly aligned contigous pages that are * part of a big page for a large mapping to be created. */ -void -hat_memload_array(struct hat *hat, caddr_t addr, size_t len, - struct page **pps, uint_t attr, uint_t flags) +static void +hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len, + struct page **pps, uint_t attr, uint_t flags, uint_t rid) { int ttesz; size_t mapsz; @@ -2012,8 +2331,10 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, uint_t large_pages_disable; ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET)); + SFMMU_VALIDATE_HMERID(hat, rid, addr, len); if (hat->sfmmu_xhat_provider) { + ASSERT(!SFMMU_IS_SHMERID_VALID(rid)); XHAT_MEMLOAD_ARRAY(hat, addr, len, pps, attr, flags); return; } @@ -2041,7 +2362,8 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, } if (npgs < NHMENTS || large_pages_disable == LARGE_PAGES_OFF) { - sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs); + sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs, + rid); return; } @@ -2074,7 +2396,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, sfmmu_memtte(&tte, (*pps)->p_pagenum, attr, ttesz); if (!sfmmu_tteload_array(hat, &tte, addr, - pps, flags)) { + pps, flags, rid)) { break; } } @@ -2090,7 +2412,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, ASSERT(numpg <= npgs); mapsz = numpg * MMU_PAGESIZE; sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, - numpg); + numpg, rid); } addr += mapsz; npgs -= numpg; @@ -2098,7 +2420,8 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, } if (npgs) { - sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs); + sfmmu_memload_batchsmall(hat, addr, pps, attr, flags, npgs, + rid); } /* @@ -2114,7 +2437,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, */ static void sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps, - uint_t attr, uint_t flags, pgcnt_t npgs) + uint_t attr, uint_t flags, pgcnt_t npgs, uint_t rid) { tte_t tte; page_t *pp; @@ -2126,14 +2449,15 @@ sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps, /* * Acquire the hash bucket. */ - hmebp = sfmmu_tteload_acquire_hashbucket(hat, vaddr, TTE8K); + hmebp = sfmmu_tteload_acquire_hashbucket(hat, vaddr, TTE8K, + rid); ASSERT(hmebp); /* * Find the hment block. */ hmeblkp = sfmmu_tteload_find_hmeblk(hat, hmebp, vaddr, - TTE8K, flags); + TTE8K, flags, rid); ASSERT(hmeblkp); do { @@ -2147,7 +2471,7 @@ sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps, * Add the translation. */ (void) sfmmu_tteload_addentry(hat, hmeblkp, &tte, - vaddr, pps, flags); + vaddr, pps, flags, rid); /* * Goto next page. @@ -2223,12 +2547,17 @@ sfmmu_memtte(tte_t *ttep, pfn_t pfn, uint_t attr, int tte_sz) * If a page structure is specified then it will add the * corresponding hment to the mapping list. * It will also update the hmenum field for the tte. + * + * Currently this function is only used for kernel mappings. + * So pass invalid region to sfmmu_tteload_array(). */ void sfmmu_tteload(struct hat *sfmmup, tte_t *ttep, caddr_t vaddr, page_t *pp, uint_t flags) { - (void) sfmmu_tteload_array(sfmmup, ttep, vaddr, &pp, flags); + ASSERT(sfmmup == ksfmmup); + (void) sfmmu_tteload_array(sfmmup, ttep, vaddr, &pp, flags, + SFMMU_INVALID_SHMERID); } /* @@ -2427,7 +2756,7 @@ sfmmu_select_tsb_szc(pgcnt_t pgcnt) */ static int sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr, - page_t **pps, uint_t flags) + page_t **pps, uint_t flags, uint_t rid) { struct hmehash_bucket *hmebp; struct hme_blk *hmeblkp; @@ -2443,19 +2772,21 @@ sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr, /* * Acquire the hash bucket. */ - hmebp = sfmmu_tteload_acquire_hashbucket(sfmmup, vaddr, size); + hmebp = sfmmu_tteload_acquire_hashbucket(sfmmup, vaddr, size, rid); ASSERT(hmebp); /* * Find the hment block. */ - hmeblkp = sfmmu_tteload_find_hmeblk(sfmmup, hmebp, vaddr, size, flags); + hmeblkp = sfmmu_tteload_find_hmeblk(sfmmup, hmebp, vaddr, size, flags, + rid); ASSERT(hmeblkp); /* * Add the translation. */ - ret = sfmmu_tteload_addentry(sfmmup, hmeblkp, ttep, vaddr, pps, flags); + ret = sfmmu_tteload_addentry(sfmmup, hmeblkp, ttep, vaddr, pps, flags, + rid); /* * Release the hash bucket. @@ -2469,14 +2800,18 @@ sfmmu_tteload_array(sfmmu_t *sfmmup, tte_t *ttep, caddr_t vaddr, * Function locks and returns a pointer to the hash bucket for vaddr and size. */ static struct hmehash_bucket * -sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size) +sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size, + uint_t rid) { struct hmehash_bucket *hmebp; int hmeshift; + void *htagid = sfmmutohtagid(sfmmup, rid); + + ASSERT(htagid != NULL); hmeshift = HME_HASH_SHIFT(size); - hmebp = HME_HASH_FUNCTION(sfmmup, vaddr, hmeshift); + hmebp = HME_HASH_FUNCTION(htagid, vaddr, hmeshift); SFMMU_HASH_LOCK(hmebp); @@ -2490,7 +2825,7 @@ sfmmu_tteload_acquire_hashbucket(sfmmu_t *sfmmup, caddr_t vaddr, int size) */ static struct hme_blk * sfmmu_tteload_find_hmeblk(sfmmu_t *sfmmup, struct hmehash_bucket *hmebp, - caddr_t vaddr, uint_t size, uint_t flags) + caddr_t vaddr, uint_t size, uint_t flags, uint_t rid) { hmeblk_tag hblktag; int hmeshift; @@ -2499,10 +2834,14 @@ sfmmu_tteload_find_hmeblk(sfmmu_t *sfmmup, struct hmehash_bucket *hmebp, struct kmem_cache *sfmmu_cache; uint_t forcefree; - hblktag.htag_id = sfmmup; + SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size)); + + hblktag.htag_id = sfmmutohtagid(sfmmup, rid); + ASSERT(hblktag.htag_id != NULL); hmeshift = HME_HASH_SHIFT(size); hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift); hblktag.htag_rehash = HME_HASH_REHASH(size); + hblktag.htag_rid = rid; ttearray_realloc: @@ -2526,7 +2865,9 @@ ttearray_realloc: if (hmeblkp == NULL) { hmeblkp = sfmmu_hblk_alloc(sfmmup, vaddr, hmebp, size, - hblktag, flags); + hblktag, flags, rid); + ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared); + ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared); } else { /* * It is possible for 8k and 64k hblks to collide since they @@ -2546,6 +2887,7 @@ ttearray_realloc: * if the hblk was previously used as a shadow hblk then * we will change it to a normal hblk */ + ASSERT(!hmeblkp->hblk_shared); if (hmeblkp->hblk_shw_mask) { sfmmu_shadow_hcleanup(sfmmup, hmeblkp, hmebp); ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp)); @@ -2577,6 +2919,9 @@ ttearray_realloc: ASSERT(get_hblk_ttesz(hmeblkp) == size); ASSERT(!hmeblkp->hblk_shw_bit); + ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared); + ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared); + ASSERT(hmeblkp->hblk_tag.htag_rid == rid); return (hmeblkp); } @@ -2587,7 +2932,7 @@ ttearray_realloc: */ static int sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, - caddr_t vaddr, page_t **pps, uint_t flags) + caddr_t vaddr, page_t **pps, uint_t flags, uint_t rid) { page_t *pp = *pps; int hmenum, size, remap; @@ -2598,6 +2943,7 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, struct sf_hment *sfhme; kmutex_t *pml, *pmtx; hatlock_t *hatlockp; + int myflt; /* * remove this panic when we decide to let user virtual address @@ -2651,6 +2997,9 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, } ASSERT(!((uintptr_t)vaddr & TTE_PAGE_OFFSET(size))); + SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size)); + ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || hmeblkp->hblk_shared); + ASSERT(SFMMU_IS_SHMERID_VALID(rid) || !hmeblkp->hblk_shared); HBLKTOHME_IDX(sfhme, hmeblkp, vaddr, hmenum); @@ -2732,11 +3081,11 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, ASSERT((!remap) ? sfhme->hme_next == NULL : 1); if (flags & HAT_LOAD_LOCK) { - if (((int)hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) { + if ((hmeblkp->hblk_lckcnt + 1) >= MAX_HBLK_LCKCNT) { panic("too high lckcnt-hmeblk %p", (void *)hmeblkp); } - atomic_add_16(&hmeblkp->hblk_lckcnt, 1); + atomic_add_32(&hmeblkp->hblk_lckcnt, 1); HBLK_STACK_TRACE(hmeblkp, HBLK_LOCK); } @@ -2767,59 +3116,70 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, chk_tte(&orig_old, &tteold, ttep, hmeblkp); #endif /* DEBUG */ } + ASSERT(TTE_IS_VALID(&sfhme->hme_tte)); if (!TTE_IS_VALID(&tteold)) { atomic_add_16(&hmeblkp->hblk_vcnt, 1); - atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1); - - /* - * HAT_RELOAD_SHARE has been deprecated with lpg DISM. - */ - - if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 && - sfmmup != ksfmmup) { + if (rid == SFMMU_INVALID_SHMERID) { + atomic_add_long(&sfmmup->sfmmu_ttecnt[size], 1); + } else { + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + sf_region_t *rgnp = srdp->srd_hmergnp[rid]; /* - * If this is the first large mapping for the process - * we must force any CPUs running this process to TL=0 - * where they will reload the HAT flags from the - * tsbmiss area. This is necessary to make the large - * mappings we are about to load visible to those CPUs; - * otherwise they'll loop forever calling pagefault() - * since we don't search large hash chains by default. + * We already accounted for region ttecnt's in sfmmu + * during hat_join_region() processing. Here we + * only update ttecnt's in region struture. */ + atomic_add_long(&rgnp->rgn_ttecnt[size], 1); + } + } + + myflt = (astosfmmu(curthread->t_procp->p_as) == sfmmup); + if (size > TTE8K && (flags & HAT_LOAD_SHARE) == 0 && + sfmmup != ksfmmup) { + uchar_t tteflag = 1 << size; + if (rid == SFMMU_INVALID_SHMERID) { + if (!(sfmmup->sfmmu_tteflags & tteflag)) { + hatlockp = sfmmu_hat_enter(sfmmup); + sfmmup->sfmmu_tteflags |= tteflag; + sfmmu_hat_exit(hatlockp); + } + } else if (!(sfmmup->sfmmu_rtteflags & tteflag)) { hatlockp = sfmmu_hat_enter(sfmmup); - if (size == TTE512K && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_512K_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_512K_FLAG); - sfmmu_sync_mmustate(sfmmup); - } else if (size == TTE4M && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG); - sfmmu_sync_mmustate(sfmmup); - } else if (size == TTE64K && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_64K_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_64K_FLAG); - /* no sync mmustate; 64K shares 8K hashes */ - } else if (mmu_page_sizes == max_mmu_page_sizes) { - if (size == TTE32M && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_32M_FLAG); - sfmmu_sync_mmustate(sfmmup); - } else if (size == TTE256M && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_256M_FLAG); - sfmmu_sync_mmustate(sfmmup); + sfmmup->sfmmu_rtteflags |= tteflag; + sfmmu_hat_exit(hatlockp); + } + /* + * Update the current CPU tsbmiss area, so the current thread + * won't need to take the tsbmiss for the new pagesize. + * The other threads in the process will update their tsb + * miss area lazily in sfmmu_tsbmiss_exception() when they + * fail to find the translation for a newly added pagesize. + */ + if (size > TTE64K && myflt) { + struct tsbmiss *tsbmp; + kpreempt_disable(); + tsbmp = &tsbmiss_area[CPU->cpu_id]; + if (rid == SFMMU_INVALID_SHMERID) { + if (!(tsbmp->uhat_tteflags & tteflag)) { + tsbmp->uhat_tteflags |= tteflag; + } + } else { + if (!(tsbmp->uhat_rtteflags & tteflag)) { + tsbmp->uhat_rtteflags |= tteflag; } } - if (size >= TTE4M && (flags & HAT_LOAD_TEXT) && - !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG); - } - sfmmu_hat_exit(hatlockp); + kpreempt_enable(); } } - ASSERT(TTE_IS_VALID(&sfhme->hme_tte)); + + if (size >= TTE4M && (flags & HAT_LOAD_TEXT) && + !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) { + hatlockp = sfmmu_hat_enter(sfmmup); + SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG); + sfmmu_hat_exit(hatlockp); + } flush_tte.tte_intlo = (tteold.tte_intlo ^ ttep->tte_intlo) & hw_tte.tte_intlo; @@ -2837,8 +3197,21 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, if (TTE_IS_MOD(&tteold)) { sfmmu_ttesync(sfmmup, vaddr, &tteold, pp); } - sfmmu_tlb_demap(vaddr, sfmmup, hmeblkp, 0, 0); - xt_sync(sfmmup->sfmmu_cpusran); + /* + * hwtte bits shouldn't change for SRD hmeblks as long as SRD + * hmes are only used for read only text. Adding this code for + * completeness and future use of shared hmeblks with writable + * mappings of VMODSORT vnodes. + */ + if (hmeblkp->hblk_shared) { + cpuset_t cpuset = sfmmu_rgntlb_demap(vaddr, + sfmmup->sfmmu_srdp->srd_hmergnp[rid], hmeblkp, 1); + xt_sync(cpuset); + SFMMU_STAT_ADD(sf_region_remap_demap, 1); + } else { + sfmmu_tlb_demap(vaddr, sfmmup, hmeblkp, 0, 0); + xt_sync(sfmmup->sfmmu_cpusran); + } } if ((flags & SFMMU_NO_TSBLOAD) == 0) { @@ -2848,8 +3221,18 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep, * have a single, unique TSB entry. Ditto for 32M/256M. */ if (size == TTE8K || size == TTE4M) { + sf_scd_t *scdp; hatlockp = sfmmu_hat_enter(sfmmup); - sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte, size); + /* + * Don't preload private TSB if the mapping is used + * by the shctx in the SCD. + */ + scdp = sfmmup->sfmmu_scdp; + if (rid == SFMMU_INVALID_SHMERID || scdp == NULL || + !SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) { + sfmmu_load_tsb(sfmmup, vaddr, &sfhme->hme_tte, + size); + } sfmmu_hat_exit(hatlockp); } } @@ -3119,6 +3502,7 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags) hmeshift = HME_HASH_SHIFT(size); hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift); hblktag.htag_rehash = HME_HASH_REHASH(size); + hblktag.htag_rid = SFMMU_INVALID_SHMERID; hmebp = HME_HASH_FUNCTION(sfmmup, vaddr, hmeshift); SFMMU_HASH_LOCK(hmebp); @@ -3127,7 +3511,7 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags) ASSERT(hmeblkp != (struct hme_blk *)hblk_reserve); if (hmeblkp == NULL) { hmeblkp = sfmmu_hblk_alloc(sfmmup, vaddr, hmebp, size, - hblktag, flags); + hblktag, flags, SFMMU_INVALID_SHMERID); } ASSERT(hmeblkp); if (!hmeblkp->hblk_shw_mask) { @@ -3142,7 +3526,8 @@ sfmmu_shadow_hcreate(sfmmu_t *sfmmup, caddr_t vaddr, int ttesz, uint_t flags) panic("sfmmu_shadow_hcreate: shw bit not set in hmeblkp 0x%p", (void *)hmeblkp); } - + ASSERT(hmeblkp->hblk_shw_bit == 1); + ASSERT(!hmeblkp->hblk_shared); vshift = vaddr_to_vshift(hblktag, vaddr, size); ASSERT(vshift < 8); /* @@ -3177,6 +3562,7 @@ sfmmu_shadow_hcleanup(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, int hashno, size; ASSERT(hmeblkp->hblk_shw_bit); + ASSERT(!hmeblkp->hblk_shared); ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp)); @@ -3210,6 +3596,7 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr, ASSERT(hashno > 0); hblktag.htag_id = sfmmup; hblktag.htag_rehash = hashno; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; hmeshift = HME_HASH_SHIFT(hashno); @@ -3226,6 +3613,7 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr, ASSERT(hblkpa == va_to_pa((caddr_t)hmeblkp)); if (HTAGS_EQ(hmeblkp->hblk_tag, hblktag)) { /* found hme_blk */ + ASSERT(!hmeblkp->hblk_shared); if (hmeblkp->hblk_shw_bit) { if (hmeblkp->hblk_shw_mask) { shadow = 1; @@ -3279,6 +3667,174 @@ sfmmu_free_hblks(sfmmu_t *sfmmup, caddr_t addr, caddr_t endaddr, } /* + * This routine's job is to delete stale invalid shared hmeregions hmeblks that + * may still linger on after pageunload. + */ +static void +sfmmu_cleanup_rhblk(sf_srd_t *srdp, caddr_t addr, uint_t rid, int ttesz) +{ + int hmeshift; + hmeblk_tag hblktag; + struct hmehash_bucket *hmebp; + struct hme_blk *hmeblkp; + struct hme_blk *pr_hblk; + struct hme_blk *list = NULL; + uint64_t hblkpa, prevpa; + + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + + hmeshift = HME_HASH_SHIFT(ttesz); + hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift); + hblktag.htag_rehash = ttesz; + hblktag.htag_rid = rid; + hblktag.htag_id = srdp; + hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift); + + SFMMU_HASH_LOCK(hmebp); + HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, pr_hblk, + prevpa, &list); + if (hmeblkp != NULL) { + ASSERT(hmeblkp->hblk_shared); + ASSERT(!hmeblkp->hblk_shw_bit); + if (hmeblkp->hblk_vcnt || hmeblkp->hblk_hmecnt) { + panic("sfmmu_cleanup_rhblk: valid hmeblk"); + } + ASSERT(!hmeblkp->hblk_lckcnt); + sfmmu_hblk_hash_rm(hmebp, hmeblkp, prevpa, pr_hblk); + sfmmu_hblk_free(hmebp, hmeblkp, hblkpa, &list); + } + SFMMU_HASH_UNLOCK(hmebp); + sfmmu_hblks_list_purge(&list); +} + +/* ARGSUSED */ +static void +sfmmu_rgn_cb_noop(caddr_t saddr, caddr_t eaddr, caddr_t r_saddr, + size_t r_size, void *r_obj, u_offset_t r_objoff) +{ +} + +/* + * update *eaddrp only if hmeblk was unloaded. + */ +static void +sfmmu_unload_hmeregion_va(sf_srd_t *srdp, uint_t rid, caddr_t addr, + caddr_t eaddr, int ttesz, caddr_t *eaddrp) +{ + int hmeshift; + hmeblk_tag hblktag; + struct hmehash_bucket *hmebp; + struct hme_blk *hmeblkp; + struct hme_blk *pr_hblk; + struct hme_blk *list = NULL; + uint64_t hblkpa, prevpa; + + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(ttesz >= HBLK_MIN_TTESZ); + + hmeshift = HME_HASH_SHIFT(ttesz); + hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift); + hblktag.htag_rehash = ttesz; + hblktag.htag_rid = rid; + hblktag.htag_id = srdp; + hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift); + + SFMMU_HASH_LOCK(hmebp); + HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, pr_hblk, + prevpa, &list); + if (hmeblkp != NULL) { + ASSERT(hmeblkp->hblk_shared); + ASSERT(!hmeblkp->hblk_lckcnt); + if (hmeblkp->hblk_vcnt || hmeblkp->hblk_hmecnt) { + *eaddrp = sfmmu_hblk_unload(NULL, hmeblkp, addr, + eaddr, NULL, HAT_UNLOAD); + ASSERT(*eaddrp > addr); + } + ASSERT(!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt); + sfmmu_hblk_hash_rm(hmebp, hmeblkp, prevpa, pr_hblk); + sfmmu_hblk_free(hmebp, hmeblkp, hblkpa, &list); + } + SFMMU_HASH_UNLOCK(hmebp); + sfmmu_hblks_list_purge(&list); +} + +/* + * This routine can be optimized to eliminate scanning areas of smaller page + * size bitmaps when a corresponding bit is set in the bitmap for a bigger + * page size. For now assume the region will usually only have the primary + * size mappings so we'll scan only one bitmap anyway by checking rgn_hmeflags + * first. + */ +static void +sfmmu_unload_hmeregion(sf_srd_t *srdp, sf_region_t *rgnp) +{ + int ttesz = rgnp->rgn_pgszc; + size_t rsz = rgnp->rgn_size; + caddr_t rsaddr = rgnp->rgn_saddr; + caddr_t readdr = rsaddr + rsz; + caddr_t rhsaddr; + caddr_t va; + uint_t rid = rgnp->rgn_id; + caddr_t cbsaddr; + caddr_t cbeaddr; + hat_rgn_cb_func_t rcbfunc; + ulong_t cnt; + + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + + ASSERT(IS_P2ALIGNED(rsaddr, TTEBYTES(ttesz))); + ASSERT(IS_P2ALIGNED(rsz, TTEBYTES(ttesz))); + if (ttesz < HBLK_MIN_TTESZ) { + ttesz = HBLK_MIN_TTESZ; + rhsaddr = (caddr_t)P2ALIGN((uintptr_t)rsaddr, HBLK_MIN_BYTES); + } else { + rhsaddr = rsaddr; + } + + if ((rcbfunc = rgnp->rgn_cb_function) == NULL) { + rcbfunc = sfmmu_rgn_cb_noop; + } + + while (ttesz >= HBLK_MIN_TTESZ) { + cbsaddr = rsaddr; + cbeaddr = rsaddr; + if (!(rgnp->rgn_hmeflags & (1 << ttesz))) { + ttesz--; + continue; + } + cnt = 0; + va = rsaddr; + while (va < readdr) { + ASSERT(va >= rhsaddr); + if (va != cbeaddr) { + if (cbeaddr != cbsaddr) { + ASSERT(cbeaddr > cbsaddr); + (*rcbfunc)(cbsaddr, cbeaddr, + rsaddr, rsz, rgnp->rgn_obj, + rgnp->rgn_objoff); + } + cbsaddr = va; + cbeaddr = va; + } + sfmmu_unload_hmeregion_va(srdp, rid, va, readdr, + ttesz, &cbeaddr); + cnt++; + va = rhsaddr + (cnt << TTE_PAGE_SHIFT(ttesz)); + } + if (cbeaddr != cbsaddr) { + ASSERT(cbeaddr > cbsaddr); + (*rcbfunc)(cbsaddr, cbeaddr, rsaddr, + rsz, rgnp->rgn_obj, + rgnp->rgn_objoff); + } + ttesz--; + } +} + +/* * Release one hardware address translation lock on the given address range. */ void @@ -3298,6 +3854,7 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len) ASSERT((len & MMU_PAGEOFFSET) == 0); endaddr = addr + len; hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; /* * Spitfire supports 4 page sizes. @@ -3316,6 +3873,7 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len) HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list); if (hmeblkp != NULL) { + ASSERT(!hmeblkp->hblk_shared); /* * If we encounter a shadow hmeblk then * we know there are no valid hmeblks mapping @@ -3351,6 +3909,87 @@ hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len) sfmmu_hblks_list_purge(&list); } +void +hat_unlock_region(struct hat *sfmmup, caddr_t addr, size_t len, + hat_region_cookie_t rcookie) +{ + sf_srd_t *srdp; + sf_region_t *rgnp; + int ttesz; + uint_t rid; + caddr_t eaddr; + caddr_t va; + int hmeshift; + hmeblk_tag hblktag; + struct hmehash_bucket *hmebp; + struct hme_blk *hmeblkp; + struct hme_blk *pr_hblk; + struct hme_blk *list; + uint64_t hblkpa, prevpa; + + if (rcookie == HAT_INVALID_REGION_COOKIE) { + hat_unlock(sfmmup, addr, len); + return; + } + + ASSERT(sfmmup != NULL); + ASSERT(sfmmup->sfmmu_xhat_provider == NULL); + ASSERT(sfmmup != ksfmmup); + + srdp = sfmmup->sfmmu_srdp; + rid = (uint_t)((uint64_t)rcookie); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + eaddr = addr + len; + va = addr; + list = NULL; + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len); + + ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc))); + ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc))); + if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) { + ttesz = HBLK_MIN_TTESZ; + } else { + ttesz = rgnp->rgn_pgszc; + } + while (va < eaddr) { + while (ttesz < rgnp->rgn_pgszc && + IS_P2ALIGNED(va, TTEBYTES(ttesz + 1))) { + ttesz++; + } + while (ttesz >= HBLK_MIN_TTESZ) { + if (!(rgnp->rgn_hmeflags & (1 << ttesz))) { + ttesz--; + continue; + } + hmeshift = HME_HASH_SHIFT(ttesz); + hblktag.htag_bspage = HME_HASH_BSPAGE(va, hmeshift); + hblktag.htag_rehash = ttesz; + hblktag.htag_rid = rid; + hblktag.htag_id = srdp; + hmebp = HME_HASH_FUNCTION(srdp, addr, hmeshift); + SFMMU_HASH_LOCK(hmebp); + HME_HASH_SEARCH_PREV(hmebp, hblktag, hmeblkp, hblkpa, + pr_hblk, prevpa, &list); + if (hmeblkp == NULL) { + ttesz--; + continue; + } + ASSERT(hmeblkp->hblk_shared); + va = sfmmu_hblk_unlock(hmeblkp, va, eaddr); + ASSERT(va >= eaddr || + IS_P2ALIGNED((uintptr_t)va, TTEBYTES(ttesz))); + SFMMU_HASH_UNLOCK(hmebp); + break; + } + if (ttesz < HBLK_MIN_TTESZ) { + panic("hat_unlock_region: addr not found " + "addr %p hat %p", va, sfmmup); + } + } + sfmmu_hblks_list_purge(&list); +} + /* * Function to unlock a range of addresses in an hmeblk. It returns the * next address that needs to be unlocked. @@ -3391,7 +4030,7 @@ readtte: panic("can't unlock large tte"); ASSERT(hmeblkp->hblk_lckcnt > 0); - atomic_add_16(&hmeblkp->hblk_lckcnt, -1); + atomic_add_32(&hmeblkp->hblk_lckcnt, -1); HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK); } else { panic("sfmmu_hblk_unlock: invalid tte"); @@ -3609,6 +4248,7 @@ rehash: hashno++) { hmeshift = HME_HASH_SHIFT(hashno); hblktag.htag_id = ksfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; hblktag.htag_bspage = HME_HASH_BSPAGE(saddr, hmeshift); hblktag.htag_rehash = hashno; hmebp = HME_HASH_FUNCTION(ksfmmup, saddr, hmeshift); @@ -3627,6 +4267,8 @@ rehash: return (ENXIO); } + ASSERT(!hmeblkp->hblk_shared); + HBLKTOHME(osfhmep, hmeblkp, saddr); sfmmu_copytte(&osfhmep->hme_tte, &tte); @@ -3814,6 +4456,7 @@ rehash: hashno++) { hmeshift = HME_HASH_SHIFT(hashno); hblktag.htag_id = ksfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; hblktag.htag_bspage = HME_HASH_BSPAGE(saddr, hmeshift); hblktag.htag_rehash = hashno; hmebp = HME_HASH_FUNCTION(ksfmmup, saddr, hmeshift); @@ -3829,6 +4472,8 @@ rehash: if (hmeblkp == NULL) return; + ASSERT(!hmeblkp->hblk_shared); + HBLKTOHME(osfhmep, hmeblkp, saddr); sfmmu_copytte(&osfhmep->hme_tte, &tte); @@ -4010,7 +4655,7 @@ hat_probe(struct hat *sfmmup, caddr_t addr) sfmmu_vatopfn_suspended(addr, sfmmup, &tte); } } else { - pfn = sfmmu_uvatopfn(addr, sfmmup); + pfn = sfmmu_uvatopfn(addr, sfmmup, NULL); } if (pfn != PFN_INVALID) @@ -4026,76 +4671,18 @@ hat_getpagesize(struct hat *sfmmup, caddr_t addr) ASSERT(sfmmup->sfmmu_xhat_provider == NULL); - sfmmu_gettte(sfmmup, addr, &tte); - if (TTE_IS_VALID(&tte)) { - return (TTEBYTES(TTE_CSZ(&tte))); - } - return (-1); -} - -static void -sfmmu_gettte(struct hat *sfmmup, caddr_t addr, tte_t *ttep) -{ - struct hmehash_bucket *hmebp; - hmeblk_tag hblktag; - int hmeshift, hashno = 1; - struct hme_blk *hmeblkp, *list = NULL; - struct sf_hment *sfhmep; - - /* support for ISM */ - ism_map_t *ism_map; - ism_blk_t *ism_blkp; - int i; - sfmmu_t *ism_hatid = NULL; - sfmmu_t *locked_hatid = NULL; - - ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET)); - - ism_blkp = sfmmup->sfmmu_iblk; - if (ism_blkp) { - sfmmu_ismhat_enter(sfmmup, 0); - locked_hatid = sfmmup; - } - while (ism_blkp && ism_hatid == NULL) { - ism_map = ism_blkp->iblk_maps; - for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) { - if (addr >= ism_start(ism_map[i]) && - addr < ism_end(ism_map[i])) { - sfmmup = ism_hatid = ism_map[i].imap_ismhat; - addr = (caddr_t)(addr - - ism_start(ism_map[i])); - break; - } + if (sfmmup == ksfmmup) { + if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) { + return (-1); } - ism_blkp = ism_blkp->iblk_next; - } - if (locked_hatid) { - sfmmu_ismhat_exit(locked_hatid, 0); - } - - hblktag.htag_id = sfmmup; - ttep->ll = 0; - - do { - hmeshift = HME_HASH_SHIFT(hashno); - hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift); - hblktag.htag_rehash = hashno; - hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift); - - SFMMU_HASH_LOCK(hmebp); - - HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list); - if (hmeblkp != NULL) { - HBLKTOHME(sfhmep, hmeblkp, addr); - sfmmu_copytte(&sfhmep->hme_tte, ttep); - SFMMU_HASH_UNLOCK(hmebp); - break; + } else { + if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) { + return (-1); } - SFMMU_HASH_UNLOCK(hmebp); - hashno++; - } while (HME_REHASH(sfmmup) && (hashno <= mmu_hashcnt)); + } - sfmmu_hblks_list_purge(&list); + ASSERT(TTE_IS_VALID(&tte)); + return (TTEBYTES(TTE_CSZ(&tte))); } uint_t @@ -4105,7 +4692,15 @@ hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr) ASSERT(sfmmup->sfmmu_xhat_provider == NULL); - sfmmu_gettte(sfmmup, addr, &tte); + if (sfmmup == ksfmmup) { + if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) { + tte.ll = 0; + } + } else { + if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) { + tte.ll = 0; + } + } if (TTE_IS_VALID(&tte)) { *attr = sfmmu_ptov_attr(&tte); return (0); @@ -4214,6 +4809,7 @@ sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr, endaddr = addr + len; hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; DEMAP_RANGE_INIT(sfmmup, &dmr); while (addr < endaddr) { @@ -4226,6 +4822,7 @@ sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr, HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list); if (hmeblkp != NULL) { + ASSERT(!hmeblkp->hblk_shared); /* * We've encountered a shadow hmeblk so skip the range * of the next smaller mapping size. @@ -4299,6 +4896,7 @@ sfmmu_hblk_chgattr(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr, ASSERT(in_hblk_range(hmeblkp, addr)); ASSERT(hmeblkp->hblk_shw_bit == 0); + ASSERT(!hmeblkp->hblk_shared); endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp)); ttesz = get_hblk_ttesz(hmeblkp); @@ -4552,6 +5150,7 @@ hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot) } endaddr = addr + len; hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; DEMAP_RANGE_INIT(sfmmup, &dmr); while (addr < endaddr) { @@ -4564,6 +5163,7 @@ hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot) HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list); if (hmeblkp != NULL) { + ASSERT(!hmeblkp->hblk_shared); /* * We've encountered a shadow hmeblk so skip the range * of the next smaller mapping size. @@ -4638,6 +5238,7 @@ sfmmu_hblk_chgprot(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, caddr_t addr, ASSERT(in_hblk_range(hmeblkp, addr)); ASSERT(hmeblkp->hblk_shw_bit == 0); + ASSERT(!hmeblkp->hblk_shared); #ifdef DEBUG if (get_hblk_ttesz(hmeblkp) != TTE8K && @@ -4868,6 +5469,7 @@ hat_unload_large_virtual( goto next_block; } + ASSERT(!hmeblkp->hblk_shared); /* * unload if there are any current valid mappings */ @@ -5032,6 +5634,7 @@ hat_unload_callback( DEMAP_RANGE_INIT(sfmmup, dmrp); endaddr = addr + len; hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; /* * It is likely for the vm to call unload over a wide range of @@ -5113,6 +5716,7 @@ hat_unload_callback( } } ASSERT(hmeblkp); + ASSERT(!hmeblkp->hblk_shared); if (!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt) { /* * If the valid count is zero we can skip the range @@ -5320,6 +5924,10 @@ sfmmu_hblk_unload(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr, ASSERT(in_hblk_range(hmeblkp, addr)); ASSERT(!hmeblkp->hblk_shw_bit); + ASSERT(sfmmup != NULL || hmeblkp->hblk_shared); + ASSERT(sfmmup == NULL || !hmeblkp->hblk_shared); + ASSERT(dmrp == NULL || !hmeblkp->hblk_shared); + #ifdef DEBUG if (get_hblk_ttesz(hmeblkp) != TTE8K && (endaddr < get_hblk_endaddr(hmeblkp))) { @@ -5330,8 +5938,9 @@ sfmmu_hblk_unload(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr, endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp)); ttesz = get_hblk_ttesz(hmeblkp); - use_demap_range = (do_virtual_coloring && - ((dmrp == NULL) || TTEBYTES(ttesz) == DEMAP_RANGE_PGSZ(dmrp))); + use_demap_range = ((dmrp == NULL) || + (TTEBYTES(ttesz) == DEMAP_RANGE_PGSZ(dmrp))); + if (use_demap_range) { DEMAP_RANGE_CONTINUE(dmrp, addr, endaddr); } else { @@ -5411,7 +6020,7 @@ again: if (flags & HAT_UNLOAD_UNLOCK) { ASSERT(hmeblkp->hblk_lckcnt > 0); - atomic_add_16(&hmeblkp->hblk_lckcnt, -1); + atomic_add_32(&hmeblkp->hblk_lckcnt, -1); HBLK_STACK_TRACE(hmeblkp, HBLK_UNLOCK); } @@ -5425,12 +6034,12 @@ again: * Given: va1 and va2 are two virtual address * that alias and map the same physical * address. - * 1. mapping exists from va1 to pa and data + * 1. mapping exists from va1 to pa and data * has been read into the cache. - * 2. unload va1. - * 3. load va2 and modify data using va2. - * 4 unload va2. - * 5. load va1 and reference data. Unless we + * 2. unload va1. + * 3. load va2 and modify data using va2. + * 4 unload va2. + * 5. load va1 and reference data. Unless we * flush the data cache when we unload we will * get stale data. * Fortunately, page coloring eliminates the @@ -5447,18 +6056,10 @@ again: */ DEMAP_RANGE_MARKPG(dmrp, addr); } else { - if (do_virtual_coloring) { - sfmmu_tlb_demap(addr, sfmmup, hmeblkp, - sfmmup->sfmmu_free, 0); - } else { - pfn_t pfnum; - - pfnum = TTE_TO_PFN(addr, &tte); - sfmmu_tlbcache_demap(addr, sfmmup, - hmeblkp, pfnum, sfmmup->sfmmu_free, - FLUSH_NECESSARY_CPUS, - CACHE_FLUSH, 0); - } + ASSERT(sfmmup != NULL); + ASSERT(!hmeblkp->hblk_shared); + sfmmu_tlb_demap(addr, sfmmup, hmeblkp, + sfmmup->sfmmu_free, 0); } if (pp) { @@ -5568,8 +6169,14 @@ tte_unloaded: sfhmep++; DEMAP_RANGE_NEXTPG(dmrp); } - if (ttecnt > 0) + /* + * For shared hmeblks this routine is only called when region is freed + * and no longer referenced. So no need to decrement ttecnt + * in the region structure here. + */ + if (ttecnt > 0 && sfmmup != NULL) { atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -ttecnt); + } return (addr); } @@ -5600,6 +6207,8 @@ hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag) endaddr = addr + len; hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; + /* * Spitfire supports 4 page sizes. * Most pages are expected to be of the smallest page @@ -5618,6 +6227,7 @@ hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag) HME_HASH_SEARCH(hmebp, hblktag, hmeblkp, &list); if (hmeblkp != NULL) { + ASSERT(!hmeblkp->hblk_shared); /* * We've encountered a shadow hmeblk so skip the range * of the next smaller mapping size. @@ -5674,6 +6284,7 @@ sfmmu_hblk_sync(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr, int ret; ASSERT(hmeblkp->hblk_shw_bit == 0); + ASSERT(!hmeblkp->hblk_shared); endaddr = MIN(endaddr, get_hblk_endaddr(hmeblkp)); @@ -5759,7 +6370,7 @@ sfmmu_ttesync(struct hat *sfmmup, caddr_t addr, tte_t *ttep, page_t *pp) } sz = TTE_CSZ(ttep); - if (sfmmup->sfmmu_rmstat) { + if (sfmmup != NULL && sfmmup->sfmmu_rmstat) { int i; caddr_t vaddr = addr; @@ -6025,6 +6636,7 @@ again: sfmmup = hblktosfmmu(hmeblkp); ASSERT(sfmmup == ksfmmup); + ASSERT(!hmeblkp->hblk_shared); addr = tte_to_vaddr(hmeblkp, tte); @@ -6033,7 +6645,7 @@ again: * not being relocated since it is ksfmmup and thus it * will never be relocated. */ - SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp); + SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0); /* * Update xcall stats @@ -6580,55 +7192,60 @@ readtte: addr = tte_to_vaddr(hmeblkp, tte); - sfmmu_ttesync(sfmmup, addr, &tte, pp); - - atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1); - - /* - * We need to flush the page from the virtual cache - * in order to prevent a virtual cache alias - * inconsistency. The particular scenario we need - * to worry about is: - * Given: va1 and va2 are two virtual address that - * alias and will map the same physical address. - * 1. mapping exists from va1 to pa and data has - * been read into the cache. - * 2. unload va1. - * 3. load va2 and modify data using va2. - * 4 unload va2. - * 5. load va1 and reference data. Unless we flush - * the data cache when we unload we will get - * stale data. - * This scenario is taken care of by using virtual - * page coloring. - */ - if (sfmmup->sfmmu_ismhat) { + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = (sf_srd_t *)sfmmup; + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); + cpuset = sfmmu_rgntlb_demap(addr, rgnp, hmeblkp, 1); + sfmmu_ttesync(NULL, addr, &tte, pp); + ASSERT(rgnp->rgn_ttecnt[ttesz] > 0); + atomic_add_long(&rgnp->rgn_ttecnt[ttesz], -1); + } else { + sfmmu_ttesync(sfmmup, addr, &tte, pp); + atomic_add_long(&sfmmup->sfmmu_ttecnt[ttesz], -1); + /* - * Flush TSBs, TLBs and caches - * of every process - * sharing this ism segment. + * We need to flush the page from the virtual cache + * in order to prevent a virtual cache alias + * inconsistency. The particular scenario we need + * to worry about is: + * Given: va1 and va2 are two virtual address that + * alias and will map the same physical address. + * 1. mapping exists from va1 to pa and data has + * been read into the cache. + * 2. unload va1. + * 3. load va2 and modify data using va2. + * 4 unload va2. + * 5. load va1 and reference data. Unless we flush + * the data cache when we unload we will get + * stale data. + * This scenario is taken care of by using virtual + * page coloring. */ - sfmmu_hat_lock_all(); - mutex_enter(&ism_mlist_lock); - kpreempt_disable(); - if (do_virtual_coloring) + if (sfmmup->sfmmu_ismhat) { + /* + * Flush TSBs, TLBs and caches + * of every process + * sharing this ism segment. + */ + sfmmu_hat_lock_all(); + mutex_enter(&ism_mlist_lock); + kpreempt_disable(); sfmmu_ismtlbcache_demap(addr, sfmmup, hmeblkp, pp->p_pagenum, CACHE_NO_FLUSH); - else - sfmmu_ismtlbcache_demap(addr, sfmmup, hmeblkp, - pp->p_pagenum, CACHE_FLUSH); - kpreempt_enable(); - mutex_exit(&ism_mlist_lock); - sfmmu_hat_unlock_all(); - cpuset = cpu_ready_set; - } else if (do_virtual_coloring) { - sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0); - cpuset = sfmmup->sfmmu_cpusran; - } else { - sfmmu_tlbcache_demap(addr, sfmmup, hmeblkp, - pp->p_pagenum, 0, FLUSH_NECESSARY_CPUS, - CACHE_FLUSH, 0); - cpuset = sfmmup->sfmmu_cpusran; + kpreempt_enable(); + mutex_exit(&ism_mlist_lock); + sfmmu_hat_unlock_all(); + cpuset = cpu_ready_set; + } else { + sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0); + cpuset = sfmmup->sfmmu_cpusran; + } } /* @@ -6747,6 +7364,8 @@ hat_pagesync(struct page *pp, uint_t clearflag) int index, cons; extern ulong_t po_share; page_t *save_pp = pp; + int stop_on_sh = 0; + uint_t shcnt; CPUSET_ZERO(cpuset); @@ -6767,11 +7386,15 @@ hat_pagesync(struct page *pp, uint_t clearflag) if ((clearflag & HAT_SYNC_STOPON_SHARED) != 0 && (pp->p_share > po_share) && !(clearflag & HAT_SYNC_ZERORM)) { - if (PP_ISRO(pp)) - hat_page_setattr(pp, P_REF); + hat_page_setattr(pp, P_REF); return (PP_GENERIC_ATTR(pp)); } + if ((clearflag & HAT_SYNC_STOPON_SHARED) && + !(clearflag & HAT_SYNC_ZERORM)) { + stop_on_sh = 1; + shcnt = 0; + } clearflag &= ~HAT_SYNC_STOPON_SHARED; pml = sfmmu_mlist_enter(pp); index = PP_MAPINDEX(pp); @@ -6794,16 +7417,43 @@ retry: if (hme_size(sfhme) < cons) continue; + + if (stop_on_sh) { + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = hblktosrd(hmeblkp); + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, + rgnp, rid); + shcnt += rgnp->rgn_refcnt; + } else { + shcnt++; + } + if (shcnt > po_share) { + /* + * tell the pager to spare the page this time + * around. + */ + hat_page_setattr(save_pp, P_REF); + index = 0; + break; + } + } tset = sfmmu_pagesync(pp, sfhme, clearflag & ~HAT_SYNC_STOPON_RM); CPUSET_OR(cpuset, tset); + /* * If clearflag is HAT_SYNC_DONTZERO, break out as soon - * as the "ref" or "mod" is set. + * as the "ref" or "mod" is set or share cnt exceeds po_share. */ if ((clearflag & ~HAT_SYNC_STOPON_RM) == HAT_SYNC_DONTZERO && - ((clearflag & HAT_SYNC_STOPON_MOD) && PP_ISMOD(save_pp)) || - ((clearflag & HAT_SYNC_STOPON_REF) && PP_ISREF(save_pp))) { + (((clearflag & HAT_SYNC_STOPON_MOD) && PP_ISMOD(save_pp)) || + ((clearflag & HAT_SYNC_STOPON_REF) && PP_ISREF(save_pp)))) { index = 0; break; } @@ -6869,12 +7519,28 @@ sfmmu_pagesync_retry: if (ret > 0) { /* we win the cas */ - sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0); - cpuset = sfmmup->sfmmu_cpusran; + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = (sf_srd_t *)sfmmup; + uint_t rid = + hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, + srdp, rgnp, rid); + cpuset = sfmmu_rgntlb_demap(addr, + rgnp, hmeblkp, 1); + } else { + sfmmu_tlb_demap(addr, sfmmup, hmeblkp, + 0, 0); + cpuset = sfmmup->sfmmu_cpusran; + } } } - - sfmmu_ttesync(sfmmup, addr, &tte, pp); + sfmmu_ttesync(hmeblkp->hblk_shared ? NULL : sfmmup, addr, + &tte, pp); } return (cpuset); } @@ -6930,8 +7596,22 @@ retry: /* we win the cas */ if (ret > 0) { - sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0); - cpuset = sfmmup->sfmmu_cpusran; + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = (sf_srd_t *)sfmmup; + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, + srdp, rgnp, rid); + cpuset = sfmmu_rgntlb_demap(addr, + rgnp, hmeblkp, 1); + } else { + sfmmu_tlb_demap(addr, sfmmup, hmeblkp, 0, 0); + cpuset = sfmmup->sfmmu_cpusran; + } } } @@ -7181,7 +7861,7 @@ hat_getpfnum(struct hat *hat, caddr_t addr) sfmmu_check_kpfn(pfn); return (pfn); } else { - return (sfmmu_uvatopfn(addr, hat)); + return (sfmmu_uvatopfn(addr, hat, NULL)); } } @@ -7236,16 +7916,19 @@ hat_getkpfnum(caddr_t addr) return (pfn); } -pfn_t -sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup) +/* + * This routine will return both pfn and tte for the addr. + */ +static pfn_t +sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup, tte_t *ttep) { struct hmehash_bucket *hmebp; hmeblk_tag hblktag; int hmeshift, hashno = 1; struct hme_blk *hmeblkp = NULL; + tte_t tte; struct sf_hment *sfhmep; - tte_t tte; pfn_t pfn; /* support for ISM */ @@ -7254,7 +7937,15 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup) int i; sfmmu_t *ism_hatid = NULL; sfmmu_t *locked_hatid = NULL; + sfmmu_t *sv_sfmmup = sfmmup; + caddr_t sv_vaddr = vaddr; + sf_srd_t *srdp; + if (ttep == NULL) { + ttep = &tte; + } else { + ttep->ll = 0; + } ASSERT(sfmmup != ksfmmup); SFMMU_STAT(sf_user_vtop); @@ -7262,11 +7953,11 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup) * Set ism_hatid if vaddr falls in a ISM segment. */ ism_blkp = sfmmup->sfmmu_iblk; - if (ism_blkp) { + if (ism_blkp != NULL) { sfmmu_ismhat_enter(sfmmup, 0); locked_hatid = sfmmup; } - while (ism_blkp && ism_hatid == NULL) { + while (ism_blkp != NULL && ism_hatid == NULL) { ism_map = ism_blkp->iblk_maps; for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) { if (vaddr >= ism_start(ism_map[i]) && @@ -7284,6 +7975,7 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup) } hblktag.htag_id = sfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; do { hmeshift = HME_HASH_SHIFT(hashno); hblktag.htag_bspage = HME_HASH_BSPAGE(vaddr, hmeshift); @@ -7294,19 +7986,85 @@ sfmmu_uvatopfn(caddr_t vaddr, struct hat *sfmmup) HME_HASH_FAST_SEARCH(hmebp, hblktag, hmeblkp); if (hmeblkp != NULL) { + ASSERT(!hmeblkp->hblk_shared); HBLKTOHME(sfhmep, hmeblkp, vaddr); - sfmmu_copytte(&sfhmep->hme_tte, &tte); - if (TTE_IS_VALID(&tte)) { - pfn = TTE_TO_PFN(vaddr, &tte); - } else { - pfn = PFN_INVALID; - } + sfmmu_copytte(&sfhmep->hme_tte, ttep); SFMMU_HASH_UNLOCK(hmebp); - return (pfn); + if (TTE_IS_VALID(ttep)) { + pfn = TTE_TO_PFN(vaddr, ttep); + return (pfn); + } + break; } SFMMU_HASH_UNLOCK(hmebp); hashno++; } while (HME_REHASH(sfmmup) && (hashno <= mmu_hashcnt)); + + if (SF_HMERGNMAP_ISNULL(sv_sfmmup)) { + return (PFN_INVALID); + } + srdp = sv_sfmmup->sfmmu_srdp; + ASSERT(srdp != NULL); + ASSERT(srdp->srd_refcnt != 0); + hblktag.htag_id = srdp; + hashno = 1; + do { + hmeshift = HME_HASH_SHIFT(hashno); + hblktag.htag_bspage = HME_HASH_BSPAGE(sv_vaddr, hmeshift); + hblktag.htag_rehash = hashno; + hmebp = HME_HASH_FUNCTION(srdp, sv_vaddr, hmeshift); + + SFMMU_HASH_LOCK(hmebp); + for (hmeblkp = hmebp->hmeblkp; hmeblkp != NULL; + hmeblkp = hmeblkp->hblk_next) { + uint_t rid; + sf_region_t *rgnp; + caddr_t rsaddr; + caddr_t readdr; + + if (!HTAGS_EQ_SHME(hmeblkp->hblk_tag, hblktag, + sv_sfmmup->sfmmu_hmeregion_map)) { + continue; + } + ASSERT(hmeblkp->hblk_shared); + rid = hmeblkp->hblk_tag.htag_rid; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); + HBLKTOHME(sfhmep, hmeblkp, sv_vaddr); + sfmmu_copytte(&sfhmep->hme_tte, ttep); + rsaddr = rgnp->rgn_saddr; + readdr = rsaddr + rgnp->rgn_size; +#ifdef DEBUG + if (TTE_IS_VALID(ttep) || + get_hblk_ttesz(hmeblkp) > TTE8K) { + caddr_t eva = tte_to_evaddr(hmeblkp, ttep); + ASSERT(eva > sv_vaddr); + ASSERT(sv_vaddr >= rsaddr); + ASSERT(sv_vaddr < readdr); + ASSERT(eva <= readdr); + } +#endif /* DEBUG */ + /* + * Continue the search if we + * found an invalid 8K tte outside of the area + * covered by this hmeblk's region. + */ + if (TTE_IS_VALID(ttep)) { + SFMMU_HASH_UNLOCK(hmebp); + pfn = TTE_TO_PFN(sv_vaddr, ttep); + return (pfn); + } else if (get_hblk_ttesz(hmeblkp) > TTE8K || + (sv_vaddr >= rsaddr && sv_vaddr < readdr)) { + SFMMU_HASH_UNLOCK(hmebp); + pfn = PFN_INVALID; + return (pfn); + } + } + SFMMU_HASH_UNLOCK(hmebp); + hashno++; + } while (hashno <= mmu_hashcnt); return (PFN_INVALID); } @@ -7323,9 +8081,12 @@ hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags) } /* - * Return the number of mappings to a particular page. - * This number is an approximation of the number of - * number of people sharing the page. + * Return the number of mappings to a particular page. This number is an + * approximation of the number of people sharing the page. + * + * shared hmeblks or ism hmeblks are counted as 1 mapping here. + * hat_page_checkshare() can be used to compare threshold to share + * count that reflects the number of region sharers albeit at higher cost. */ ulong_t hat_page_getshare(page_t *pp) @@ -7368,6 +8129,73 @@ hat_page_getshare(page_t *pp) } /* + * Return 1 the number of mappings exceeds sh_thresh. Return 0 + * otherwise. Count shared hmeblks by region's refcnt. + */ +int +hat_page_checkshare(page_t *pp, ulong_t sh_thresh) +{ + kmutex_t *pml; + ulong_t cnt = 0; + int index, sz = TTE8K; + struct sf_hment *sfhme, *tmphme = NULL; + struct hme_blk *hmeblkp; + + pml = sfmmu_mlist_enter(pp); + + if (kpm_enable) + cnt = pp->p_kpmref; + + if (pp->p_share + cnt > sh_thresh) { + sfmmu_mlist_exit(pml); + return (1); + } + + index = PP_MAPINDEX(pp); + +again: + for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) { + tmphme = sfhme->hme_next; + if (hme_size(sfhme) != sz) { + continue; + } + hmeblkp = sfmmu_hmetohblk(sfhme); + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = hblktosrd(hmeblkp); + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, + rgnp, rid); + cnt += rgnp->rgn_refcnt; + } else { + cnt++; + } + if (cnt > sh_thresh) { + sfmmu_mlist_exit(pml); + return (1); + } + } + + index >>= 1; + sz++; + while (index) { + pp = PP_GROUPLEADER(pp, sz); + ASSERT(sfmmu_mlist_held(pp)); + if (index & 0x1) { + goto again; + } + index >>= 1; + sz++; + } + sfmmu_mlist_exit(pml); + return (0); +} + +/* * Unload all large mappings to the pp and reset the p_szc field of every * constituent page according to the remaining mappings. * @@ -7516,15 +8344,35 @@ ism_tsb_entries(sfmmu_t *sfmmup, int szc) ism_blk_t *ism_blkp = sfmmup->sfmmu_iblk; ism_map_t *ism_map; pgcnt_t npgs = 0; + pgcnt_t npgs_scd = 0; int j; + sf_scd_t *scdp; + uchar_t rid; ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)); + scdp = sfmmup->sfmmu_scdp; + for (; ism_blkp != NULL; ism_blkp = ism_blkp->iblk_next) { ism_map = ism_blkp->iblk_maps; - for (j = 0; ism_map[j].imap_ismhat && j < ISM_MAP_SLOTS; j++) - npgs += ism_map[j].imap_ismhat->sfmmu_ttecnt[szc]; + for (j = 0; ism_map[j].imap_ismhat && j < ISM_MAP_SLOTS; j++) { + rid = ism_map[j].imap_rid; + ASSERT(rid == SFMMU_INVALID_ISMRID || + rid < sfmmup->sfmmu_srdp->srd_next_ismrid); + + if (scdp != NULL && rid != SFMMU_INVALID_ISMRID && + SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) { + /* ISM is in sfmmup's SCD */ + npgs_scd += + ism_map[j].imap_ismhat->sfmmu_ttecnt[szc]; + } else { + /* ISMs is not in SCD */ + npgs += + ism_map[j].imap_ismhat->sfmmu_ttecnt[szc]; + } + } } sfmmup->sfmmu_ismttecnt[szc] = npgs; + sfmmup->sfmmu_scdismttecnt[szc] = npgs_scd; return (npgs); } @@ -7554,13 +8402,15 @@ hat_get_mapped_size(struct hat *hat) ASSERT(hat->sfmmu_xhat_provider == NULL); for (i = 0; i < mmu_page_sizes; i++) - assize += (pgcnt_t)hat->sfmmu_ttecnt[i] * TTEBYTES(i); + assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] + + (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i); if (hat->sfmmu_iblk == NULL) return (assize); for (i = 0; i < mmu_page_sizes; i++) - assize += (pgcnt_t)hat->sfmmu_ismttecnt[i] * TTEBYTES(i); + assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] + + (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i); return (assize); } @@ -7592,7 +8442,8 @@ hat_stats_disable(struct hat *hat) /* * Routines for entering or removing ourselves from the - * ism_hat's mapping list. + * ism_hat's mapping list. This is used for both private and + * SCD hats. */ static void iment_add(struct ism_ment *iment, struct hat *ism_hat) @@ -7663,6 +8514,8 @@ hat_share(struct hat *sfmmup, caddr_t addr, uint_t ismmask = (uint_t)ismpgsz - 1; size_t sh_size = ISM_SHIFT(ismshift, len); ushort_t ismhatflag; + hat_region_cookie_t rcookie; + sf_scd_t *old_scdp; #ifdef DEBUG caddr_t eaddr = addr + len; @@ -7717,7 +8570,7 @@ hat_share(struct hat *sfmmup, caddr_t addr, * Make sure mapping does not already exist. */ ism_blkp = sfmmup->sfmmu_iblk; - while (ism_blkp) { + while (ism_blkp != NULL) { ism_map = ism_blkp->iblk_maps; for (i = 0; i < ISM_MAP_SLOTS && ism_map[i].imap_ismhat; i++) { if ((addr >= ism_start(ism_map[i]) && @@ -7750,7 +8603,8 @@ hat_share(struct hat *sfmmup, caddr_t addr, if (ism_map[i].imap_ismhat == NULL) { ism_map[i].imap_ismhat = ism_hatid; - ism_map[i].imap_vb_shift = (ushort_t)ismshift; + ism_map[i].imap_vb_shift = (uchar_t)ismshift; + ism_map[i].imap_rid = SFMMU_INVALID_ISMRID; ism_map[i].imap_hatflags = ismhatflag; ism_map[i].imap_sz_mask = ismmask; /* @@ -7768,7 +8622,6 @@ hat_share(struct hat *sfmmup, caddr_t addr, ism_ment->iment_hat = sfmmup; ism_ment->iment_base_va = addr; ism_hatid->sfmmu_ismhat = 1; - ism_hatid->sfmmu_flags = 0; mutex_enter(&ism_mlist_lock); iment_add(ism_ment, ism_hatid); mutex_exit(&ism_mlist_lock); @@ -7790,6 +8643,22 @@ hat_share(struct hat *sfmmup, caddr_t addr, } /* + * After calling hat_join_region, sfmmup may join a new SCD or + * move from the old scd to a new scd, in which case, we want to + * shrink the sfmmup's private tsb size, i.e., pass shrink to + * sfmmu_check_page_sizes at the end of this routine. + */ + old_scdp = sfmmup->sfmmu_scdp; + /* + * Call hat_join_region without the hat lock, because it's + * used in hat_join_region. + */ + rcookie = hat_join_region(sfmmup, addr, len, (void *)ism_hatid, 0, + PROT_ALL, ismszc, NULL, HAT_REGION_ISM); + if (rcookie != HAT_INVALID_REGION_COOKIE) { + ism_map[i].imap_rid = (uchar_t)((uint64_t)rcookie); + } + /* * Update our counters for this sfmmup's ism mappings. */ for (i = 0; i <= ismszc; i++) { @@ -7797,45 +8666,29 @@ hat_share(struct hat *sfmmup, caddr_t addr, (void) ism_tsb_entries(sfmmup, i); } - hatlockp = sfmmu_hat_enter(sfmmup); - /* - * For ISM and DISM we do not support 512K pages, so we only - * only search the 4M and 8K/64K hashes for 4 pagesize cpus, and search - * the 256M or 32M, and 4M and 8K/64K hashes for 6 pagesize cpus. + * For ISM and DISM we do not support 512K pages, so we only only + * search the 4M and 8K/64K hashes for 4 pagesize cpus, and search the + * 256M or 32M, and 4M and 8K/64K hashes for 6 pagesize cpus. + * + * Need to set 32M/256M ISM flags to make sure + * sfmmu_check_page_sizes() enables them on Panther. */ ASSERT((disable_ism_large_pages & (1 << TTE512K)) != 0); - if (ismszc > TTE4M && !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG)) - SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG); - - if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_64K_FLAG)) - SFMMU_FLAGS_SET(sfmmup, HAT_64K_FLAG); - - /* - * If we updated the ismblkpa for this HAT or we need - * to start searching the 256M or 32M or 4M hash, we must - * make sure all CPUs running this process reload their - * tsbmiss area. Otherwise they will fail to load the mappings - * in the tsbmiss handler and will loop calling pagefault(). - */ switch (ismszc) { case TTE256M: - if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_256M_FLAG); - sfmmu_sync_mmustate(sfmmup); + if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM)) { + hatlockp = sfmmu_hat_enter(sfmmup); + SFMMU_FLAGS_SET(sfmmup, HAT_256M_ISM); + sfmmu_hat_exit(hatlockp); } break; case TTE32M: - if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_32M_FLAG); - sfmmu_sync_mmustate(sfmmup); - } - break; - case TTE4M: - if (reload_mmu || !SFMMU_FLAGS_ISSET(sfmmup, HAT_4M_FLAG)) { - SFMMU_FLAGS_SET(sfmmup, HAT_4M_FLAG); - sfmmu_sync_mmustate(sfmmup); + if (!SFMMU_FLAGS_ISSET(sfmmup, HAT_32M_ISM)) { + hatlockp = sfmmu_hat_enter(sfmmup); + SFMMU_FLAGS_SET(sfmmup, HAT_32M_ISM); + sfmmu_hat_exit(hatlockp); } break; default: @@ -7843,10 +8696,18 @@ hat_share(struct hat *sfmmup, caddr_t addr, } /* - * Now we can drop the locks. + * If we updated the ismblkpa for this HAT we must make + * sure all CPUs running this process reload their tsbmiss area. + * Otherwise they will fail to load the mappings in the tsbmiss + * handler and will loop calling pagefault(). */ - sfmmu_ismhat_exit(sfmmup, 1); - sfmmu_hat_exit(hatlockp); + if (reload_mmu) { + hatlockp = sfmmu_hat_enter(sfmmup); + sfmmu_sync_mmustate(sfmmup); + sfmmu_hat_exit(hatlockp); + } + + sfmmu_ismhat_exit(sfmmup, 0); /* * Free up ismblk if we didn't use it. @@ -7857,8 +8718,11 @@ hat_share(struct hat *sfmmup, caddr_t addr, /* * Check TSB and TLB page sizes. */ - sfmmu_check_page_sizes(sfmmup, 1); - + if (sfmmup->sfmmu_scdp != NULL && old_scdp != sfmmup->sfmmu_scdp) { + sfmmu_check_page_sizes(sfmmup, 0); + } else { + sfmmu_check_page_sizes(sfmmup, 1); + } return (0); } @@ -7879,6 +8743,8 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) struct tsb_info *tsbinfo; uint_t ismshift = page_get_shift(ismszc); size_t sh_size = ISM_SHIFT(ismshift, len); + uchar_t ism_rid; + sf_scd_t *old_scdp; ASSERT(ISM_ALIGNED(ismshift, addr)); ASSERT(ISM_ALIGNED(ismshift, len)); @@ -7923,7 +8789,7 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) */ found = 0; ism_blkp = sfmmup->sfmmu_iblk; - while (!found && ism_blkp) { + while (!found && ism_blkp != NULL) { ism_map = ism_blkp->iblk_maps; for (i = 0; i < ISM_MAP_SLOTS; i++) { if (addr == ism_start(ism_map[i]) && @@ -7938,35 +8804,48 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) if (found) { ism_hatid = ism_map[i].imap_ismhat; + ism_rid = ism_map[i].imap_rid; ASSERT(ism_hatid != NULL); ASSERT(ism_hatid->sfmmu_ismhat == 1); /* - * First remove ourselves from the ism mapping list. + * After hat_leave_region, the sfmmup may leave SCD, + * in which case, we want to grow the private tsb size + * when call sfmmu_check_page_sizes at the end of the routine. */ - mutex_enter(&ism_mlist_lock); - iment_sub(ism_map[i].imap_ment, ism_hatid); - mutex_exit(&ism_mlist_lock); - free_ment = ism_map[i].imap_ment; + old_scdp = sfmmup->sfmmu_scdp; + /* + * Then remove ourselves from the region. + */ + if (ism_rid != SFMMU_INVALID_ISMRID) { + hat_leave_region(sfmmup, (void *)((uint64_t)ism_rid), + HAT_REGION_ISM); + } /* - * Now gurantee that any other cpu + * And now guarantee that any other cpu * that tries to process an ISM miss * will go to tl=0. */ hatlockp = sfmmu_hat_enter(sfmmup); - sfmmu_invalidate_ctx(sfmmup); - sfmmu_hat_exit(hatlockp); /* + * Remove ourselves from the ism mapping list. + */ + mutex_enter(&ism_mlist_lock); + iment_sub(ism_map[i].imap_ment, ism_hatid); + mutex_exit(&ism_mlist_lock); + free_ment = ism_map[i].imap_ment; + + /* * We delete the ism map by copying * the next map over the current one. * We will take the next one in the maps * array or from the next ism_blk. */ - while (ism_blkp) { + while (ism_blkp != NULL) { ism_map = ism_blkp->iblk_maps; while (i < (ISM_MAP_SLOTS - 1)) { ism_map[i] = ism_map[i + 1]; @@ -7974,12 +8853,13 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) } /* i == (ISM_MAP_SLOTS - 1) */ ism_blkp = ism_blkp->iblk_next; - if (ism_blkp) { + if (ism_blkp != NULL) { ism_map[i] = ism_blkp->iblk_maps[0]; i = 0; } else { ism_map[i].imap_seg = 0; ism_map[i].imap_vb_shift = 0; + ism_map[i].imap_rid = SFMMU_INVALID_ISMRID; ism_map[i].imap_hatflags = 0; ism_map[i].imap_sz_mask = 0; ism_map[i].imap_ismhat = NULL; @@ -8001,6 +8881,12 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) tsbinfo = tsbinfo->tsb_next) { if (tsbinfo->tsb_flags & TSB_SWAPPED) continue; + if (tsbinfo->tsb_flags & TSB_RELOC_FLAG) { + tsbinfo->tsb_flags |= + TSB_FLUSH_NEEDED; + continue; + } + sfmmu_inv_tsb(tsbinfo->tsb_va, TSB_BYTES(tsbinfo->tsb_szc)); } @@ -8029,8 +8915,13 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc) /* * Check TSB and TLB page sizes if the process isn't exiting. */ - if (!sfmmup->sfmmu_free) - sfmmu_check_page_sizes(sfmmup, 0); + if (!sfmmup->sfmmu_free) { + if (found && old_scdp != NULL && sfmmup->sfmmu_scdp == NULL) { + sfmmu_check_page_sizes(sfmmup, 1); + } else { + sfmmu_check_page_sizes(sfmmup, 0); + } + } } /* ARGSUSED */ @@ -8038,6 +8929,8 @@ static int sfmmu_idcache_constructor(void *buf, void *cdrarg, int kmflags) { /* void *buf is sfmmu_t pointer */ + bzero(buf, sizeof (sfmmu_t)); + return (0); } @@ -8308,7 +9201,8 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp) tmphat = hblktosfmmu(hmeblkp); sfmmu_copytte(&sfhmep->hme_tte, &tte); ASSERT(TTE_IS_VALID(&tte)); - if ((tmphat == hat) || hmeblkp->hblk_lckcnt) { + if (hmeblkp->hblk_shared || tmphat == hat || + hmeblkp->hblk_lckcnt) { /* * We have an uncache conflict */ @@ -8330,6 +9224,7 @@ sfmmu_vac_conflict(struct hat *hat, caddr_t addr, page_t *pp) hmeblkp = sfmmu_hmetohblk(sfhmep); if (hmeblkp->hblk_xhat_bit) continue; + ASSERT(!hmeblkp->hblk_shared); (void) sfmmu_pageunload(pp, sfhmep, TTE8K); } @@ -8657,7 +9552,20 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor) /* * Flush TSBs, TLBs and caches */ - if (sfmmup->sfmmu_ismhat) { + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = (sf_srd_t *)sfmmup; + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, + srdp, rgnp, rid); + (void) sfmmu_rgntlb_demap(vaddr, rgnp, + hmeblkp, 0); + sfmmu_cache_flush(pfn, addr_to_vcolor(vaddr)); + } else if (sfmmup->sfmmu_ismhat) { if (flags & HAT_CACHE) { SFMMU_STAT(sf_ism_recache); } else { @@ -8676,11 +9584,22 @@ sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor) */ cache_flush_flag = CACHE_NO_FLUSH; } else { - /* * Flush only TSBs and TLBs. */ - if (sfmmup->sfmmu_ismhat) { + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp = (sf_srd_t *)sfmmup; + uint_t rid = hmeblkp->hblk_tag.htag_rid; + sf_region_t *rgnp; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + ASSERT(srdp != NULL); + rgnp = srdp->srd_hmergnp[rid]; + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, + srdp, rgnp, rid); + (void) sfmmu_rgntlb_demap(vaddr, rgnp, + hmeblkp, 0); + } else if (sfmmup->sfmmu_ismhat) { if (flags & HAT_CACHE) { SFMMU_STAT(sf_ism_recache); } else { @@ -8737,10 +9656,18 @@ sfmmu_get_ctx(sfmmu_t *sfmmup) { mmu_ctx_t *mmu_ctxp; uint_t pstate_save; +#ifdef sun4v + int ret; +#endif ASSERT(sfmmu_hat_lock_held(sfmmup)); ASSERT(sfmmup != ksfmmup); + if (SFMMU_FLAGS_ISSET(sfmmup, HAT_ALLCTX_INVALID)) { + sfmmu_setup_tsbinfo(sfmmup); + SFMMU_FLAGS_CLEAR(sfmmup, HAT_ALLCTX_INVALID); + } + kpreempt_disable(); mmu_ctxp = CPU_MMU_CTXP(CPU); @@ -8772,7 +9699,19 @@ sfmmu_get_ctx(sfmmu_t *sfmmup) */ pstate_save = sfmmu_disable_intrs(); - sfmmu_alloc_ctx(sfmmup, 1, CPU); +#ifdef sun4u + (void) sfmmu_alloc_ctx(sfmmup, 1, CPU, SFMMU_PRIVATE); +#else + if (sfmmu_alloc_ctx(sfmmup, 1, CPU, SFMMU_PRIVATE) && + sfmmup->sfmmu_scdp != NULL) { + sf_scd_t *scdp = sfmmup->sfmmu_scdp; + sfmmu_t *scsfmmup = scdp->scd_sfmmup; + ret = sfmmu_alloc_ctx(scsfmmup, 1, CPU, SFMMU_SHARED); + /* debug purpose only */ + ASSERT(!ret || scsfmmup->sfmmu_ctxs[CPU_MMU_IDX(CPU)].cnum + != INVALID_CONTEXT); + } +#endif sfmmu_load_mmustate(sfmmup); sfmmu_enable_intrs(pstate_save); @@ -8977,10 +9916,21 @@ sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc, /* * All initialization is done inside of sfmmu_tsbinfo_alloc(). * If we fail to allocate a TSB, exit. + * + * If tsb grows with new tsb size > 4M and old tsb size < 4M, + * then try 4M slab after the initial alloc fails. + * + * If tsb swapin with tsb size > 4M, then try 4M after the + * initial alloc fails. */ sfmmu_hat_exit(hatlockp); - if (sfmmu_tsbinfo_alloc(&new_tsbinfo, szc, tte_sz_mask, - flags, sfmmup)) { + if (sfmmu_tsbinfo_alloc(&new_tsbinfo, szc, + tte_sz_mask, flags, sfmmup) && + (!(flags & (TSB_GROW | TSB_SWAPIN)) || (szc <= TSB_4M_SZCODE) || + (!(flags & TSB_SWAPIN) && + (old_tsbinfo->tsb_szc >= TSB_4M_SZCODE)) || + sfmmu_tsbinfo_alloc(&new_tsbinfo, TSB_4M_SZCODE, + tte_sz_mask, flags, sfmmup))) { (void) sfmmu_hat_enter(sfmmup); if (!(flags & TSB_SWAPIN)) SFMMU_STAT(sf_tsb_resize_failures); @@ -9062,7 +10012,6 @@ sfmmu_replace_tsb(sfmmu_t *sfmmup, struct tsb_info *old_tsbinfo, uint_t szc, else sfmmup->sfmmu_tsb = new_tsbinfo; membar_enter(); /* make sure new TSB globally visible */ - sfmmu_setup_tsbinfo(sfmmup); /* * We need to migrate TSB entries from the old TSB to the new TSB @@ -9115,6 +10064,55 @@ sfmmu_reprog_pgsz_arr(sfmmu_t *sfmmup, uint8_t *tmp_pgsz) sfmmu_hat_exit(hatlockp); } +/* Update scd_rttecnt for shme rgns in the SCD */ +static void +sfmmu_set_scd_rttecnt(sf_srd_t *srdp, sf_scd_t *scdp) +{ + uint_t rid; + uint_t i, j; + ulong_t w; + sf_region_t *rgnp; + + ASSERT(srdp != NULL); + + for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) { + if ((w = scdp->scd_region_map.bitmap[i]) == 0) { + continue; + } + + j = 0; + while (w) { + if (!(w & 0x1)) { + j++; + w >>= 1; + continue; + } + rid = (i << BT_ULSHIFT) | j; + j++; + w >>= 1; + + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_refcnt > 0); + ASSERT(rgnp->rgn_id == rid); + + scdp->scd_rttecnt[rgnp->rgn_pgszc] += + rgnp->rgn_size >> TTE_PAGE_SHIFT(rgnp->rgn_pgszc); + + /* + * Maintain the tsb0 inflation cnt for the regions + * in the SCD. + */ + if (rgnp->rgn_pgszc >= TTE4M) { + scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt += + rgnp->rgn_size >> + (TTE_PAGE_SHIFT(TTE8K) + 2); + } + } + } +} + /* * This function assumes that there are either four or six supported page * sizes and at most two programmable TLBs, so we need to decide which @@ -9144,12 +10142,13 @@ sfmmu_check_page_sizes(sfmmu_t *sfmmup, int growing) if (sfmmup == ksfmmup || sfmmup->sfmmu_ismhat != NULL) return; - if ((sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) == 0 && + if (!SFMMU_LGPGS_INUSE(sfmmup) && sfmmup->sfmmu_ttecnt[TTE8K] <= tsb_rss_factor) return; for (i = 0; i < mmu_page_sizes; i++) { - ttecnt[i] = SFMMU_TTE_CNT(sfmmup, i); + ttecnt[i] = sfmmup->sfmmu_ttecnt[i] + + sfmmup->sfmmu_ismttecnt[i]; } /* Check pagesizes in use, and possibly reprogram DTLB. */ @@ -9172,6 +10171,11 @@ sfmmu_check_page_sizes(sfmmu_t *sfmmup, int growing) } /* + * Inflate tte8k_cnt to allow for region large page allocation failure. + */ + tte8k_cnt += sfmmup->sfmmu_tsb0_4minflcnt; + + /* * Inflate TSB sizes by a factor of 2 if this process * uses 4M text pages to minimize extra conflict misses * in the first TSB since without counting text pages @@ -9274,18 +10278,22 @@ sfmmu_size_tsb(sfmmu_t *sfmmup, int growing, uint64_t tte8k_cnt, tsb_bits = (mmu_page_sizes == max_mmu_page_sizes)? TSB4M|TSB32M|TSB256M:TSB4M; if ((sfmmu_tsbinfo_alloc(&newtsb, tsb_szc, tsb_bits, - allocflags, sfmmup) != 0) && - (sfmmu_tsbinfo_alloc(&newtsb, TSB_MIN_SZCODE, - tsb_bits, allocflags, sfmmup) != 0)) { + allocflags, sfmmup)) && + (tsb_szc <= TSB_4M_SZCODE || + sfmmu_tsbinfo_alloc(&newtsb, TSB_4M_SZCODE, + tsb_bits, allocflags, sfmmup)) && + sfmmu_tsbinfo_alloc(&newtsb, TSB_MIN_SZCODE, + tsb_bits, allocflags, sfmmup)) { return; } hatlockp = sfmmu_hat_enter(sfmmup); + sfmmu_invalidate_ctx(sfmmup); + if (sfmmup->sfmmu_tsb->tsb_next == NULL) { sfmmup->sfmmu_tsb->tsb_next = newtsb; SFMMU_STAT(sf_tsb_sectsb_create); - sfmmu_setup_tsbinfo(sfmmup); sfmmu_hat_exit(hatlockp); return; } else { @@ -9351,6 +10359,7 @@ sfmmu_free_sfmmu(sfmmu_t *sfmmup) ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0); ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0); ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0); + ASSERT(SF_RGNMAP_ISNULL(sfmmup)); sfmmup->sfmmu_free = 0; sfmmup->sfmmu_ismhat = 0; @@ -9656,6 +10665,7 @@ sfmmu_hblk_swap(struct hme_blk *new) struct hme_blk *found; #endif old = HBLK_RESERVE; + ASSERT(!old->hblk_shared); /* * save pa before bcopy clobbers it @@ -9668,7 +10678,8 @@ sfmmu_hblk_swap(struct hme_blk *new) /* * acquire hash bucket lock. */ - hmebp = sfmmu_tteload_acquire_hashbucket(ksfmmup, base, TTE8K); + hmebp = sfmmu_tteload_acquire_hashbucket(ksfmmup, base, TTE8K, + SFMMU_INVALID_SHMERID); /* * copy contents from old to new @@ -9742,6 +10753,7 @@ sfmmu_hblk_swap(struct hme_blk *new) #ifdef DEBUG hblktag.htag_id = ksfmmup; + hblktag.htag_rid = SFMMU_INVALID_SHMERID; hblktag.htag_bspage = HME_HASH_BSPAGE(base, HME_HASH_SHIFT(TTE8K)); hblktag.htag_rehash = HME_HASH_REHASH(TTE8K); HME_HASH_FAST_SEARCH(hmebp, hblktag, found); @@ -9941,7 +10953,7 @@ sfmmu_ismhat_exit(sfmmu_t *sfmmup, int hatlock_held) static struct hme_blk * sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, struct hmehash_bucket *hmebp, uint_t size, hmeblk_tag hblktag, - uint_t flags) + uint_t flags, uint_t rid) { struct hme_blk *hmeblkp = NULL; struct hme_blk *newhblkp; @@ -9952,8 +10964,14 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, uint_t owner; /* set to 1 if using hblk_reserve */ uint_t forcefree; int sleep; + sf_srd_t *srdp; + sf_region_t *rgnp; ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp)); + ASSERT(hblktag.htag_rid == rid); + SFMMU_VALIDATE_HMERID(sfmmup, rid, vaddr, TTEBYTES(size)); + ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || + IS_P2ALIGNED(vaddr, TTEBYTES(size))); /* * If segkmem is not created yet, allocate from static hmeblks @@ -9963,6 +10981,8 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, */ if (!hblk_alloc_dynamic) { + ASSERT(!SFMMU_IS_SHMERID_VALID(rid)); + if (size == TTE8K) { index = nucleus_hblk8.index; if (index >= nucleus_hblk8.len) { @@ -9999,7 +11019,7 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, SFMMU_HASH_UNLOCK(hmebp); - if (sfmmup != KHATID) { + if (sfmmup != KHATID && !SFMMU_IS_SHMERID_VALID(rid)) { if (mmu_page_sizes == max_mmu_page_sizes) { if (size < TTE256M) shw_hblkp = sfmmu_shadow_hcreate(sfmmup, vaddr, @@ -10009,6 +11029,36 @@ sfmmu_hblk_alloc(sfmmu_t *sfmmup, caddr_t vaddr, shw_hblkp = sfmmu_shadow_hcreate(sfmmup, vaddr, size, flags); } + } else if (SFMMU_IS_SHMERID_VALID(rid)) { + int ttesz; + caddr_t va; + caddr_t eva = vaddr + TTEBYTES(size); + + ASSERT(sfmmup != KHATID); + + srdp = sfmmup->sfmmu_srdp; + ASSERT(srdp != NULL && srdp->srd_refcnt != 0); + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp != NULL && rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt != 0); + ASSERT(size <= rgnp->rgn_pgszc); + + ttesz = HBLK_MIN_TTESZ; + do { + if (!(rgnp->rgn_hmeflags & (0x1 << ttesz))) { + continue; + } + + if (ttesz > size && ttesz != HBLK_MIN_TTESZ) { + sfmmu_cleanup_rhblk(srdp, vaddr, rid, ttesz); + } else if (ttesz < size) { + for (va = vaddr; va < eva; + va += TTEBYTES(ttesz)) { + sfmmu_cleanup_rhblk(srdp, va, rid, + ttesz); + } + } + } while (++ttesz <= rgnp->rgn_pgszc); } fill_hblk: @@ -10016,6 +11066,7 @@ fill_hblk: if (owner && size == TTE8K) { + ASSERT(!SFMMU_IS_SHMERID_VALID(rid)); /* * We are really in a tight spot. We already own * hblk_reserve and we need another hblk. In anticipation @@ -10151,6 +11202,10 @@ re_verify: * _only if_ we are the owner of hblk_reserve. */ if (newhblkp != HBLK_RESERVE || owner) { + ASSERT(!SFMMU_IS_SHMERID_VALID(rid) || + newhblkp->hblk_shared); + ASSERT(SFMMU_IS_SHMERID_VALID(rid) || + !newhblkp->hblk_shared); return (newhblkp); } else { /* @@ -10177,6 +11232,17 @@ re_verify: } hblk_init: + if (SFMMU_IS_SHMERID_VALID(rid)) { + uint16_t tteflag = 0x1 << + ((size < HBLK_MIN_TTESZ) ? HBLK_MIN_TTESZ : size); + + if (!(rgnp->rgn_hmeflags & tteflag)) { + atomic_or_16(&rgnp->rgn_hmeflags, tteflag); + } + hmeblkp->hblk_shared = 1; + } else { + hmeblkp->hblk_shared = 0; + } set_hblk_sz(hmeblkp, size); ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp)); hmeblkp->hblk_next = (struct hme_blk *)NULL; @@ -10207,7 +11273,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, int shw_size, vshift; struct hme_blk *shw_hblkp; uint_t shw_mask, newshw_mask; - uintptr_t vaddr; + caddr_t vaddr; int size; uint_t critical; @@ -10224,6 +11290,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, shw_hblkp = hmeblkp->hblk_shadow; if (shw_hblkp) { ASSERT(hblktosfmmu(hmeblkp) != KHATID); + ASSERT(!hmeblkp->hblk_shared); if (mmu_page_sizes == max_mmu_page_sizes) { ASSERT(size < TTE256M); } else { @@ -10231,7 +11298,7 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, } shw_size = get_hblk_ttesz(shw_hblkp); - vaddr = get_hblk_base(hmeblkp); + vaddr = (caddr_t)get_hblk_base(hmeblkp); vshift = vaddr_to_vshift(shw_hblkp->hblk_tag, vaddr, shw_size); ASSERT(vshift < 8); /* @@ -10250,6 +11317,28 @@ sfmmu_hblk_free(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, hmeblkp->hblk_nextpa = hblkpa; hmeblkp->hblk_shw_bit = 0; + /* + * Clear ttebit map in the region this hmeblk belongs to. The region + * must exist as long as any of its hmeblks exist. This invariant + * holds because before region is freed all its hmeblks are removed. + */ + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp; + sf_region_t *rgnp; + uint_t rid; + + srdp = hblktosrd(hmeblkp); + ASSERT(srdp != NULL && srdp->srd_refcnt != 0); + rid = hmeblkp->hblk_tag.htag_rid; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp != NULL); + vaddr = (caddr_t)get_hblk_base(hmeblkp); + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); + hmeblkp->hblk_shared = 0; + } + if (hmeblkp->hblk_nuc_bit == 0) { if (size == TTE8K && sfmmu_put_free_hblk(hmeblkp, critical)) @@ -10419,7 +11508,7 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, { int shw_size, vshift; struct hme_blk *shw_hblkp; - uintptr_t vaddr; + caddr_t vaddr; uint_t shw_mask, newshw_mask; ASSERT(SFMMU_HASH_LOCK_ISHELD(hmebp)); @@ -10432,6 +11521,9 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, demap_range_t dmr; sfmmup = hblktosfmmu(hmeblkp); + if (hmeblkp->hblk_shared || sfmmup->sfmmu_ismhat) { + return (0); + } DEMAP_RANGE_INIT(sfmmup, &dmr); (void) sfmmu_hblk_unload(sfmmup, hmeblkp, (caddr_t)get_hblk_base(hmeblkp), @@ -10455,8 +11547,9 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, shw_hblkp = hmeblkp->hblk_shadow; if (shw_hblkp) { + ASSERT(!hmeblkp->hblk_shared); shw_size = get_hblk_ttesz(shw_hblkp); - vaddr = get_hblk_base(hmeblkp); + vaddr = (caddr_t)get_hblk_base(hmeblkp); vshift = vaddr_to_vshift(shw_hblkp->hblk_tag, vaddr, shw_size); ASSERT(vshift < 8); /* @@ -10479,6 +11572,28 @@ sfmmu_steal_this_hblk(struct hmehash_bucket *hmebp, struct hme_blk *hmeblkp, */ hmeblkp->hblk_shw_bit = 0; + /* + * Clear ttebit map in the region this hmeblk belongs to. The region + * must exist as long as any of its hmeblks exist. This invariant + * holds because before region is freed all its hmeblks are removed. + */ + if (hmeblkp->hblk_shared) { + sf_srd_t *srdp; + sf_region_t *rgnp; + uint_t rid; + + srdp = hblktosrd(hmeblkp); + ASSERT(srdp != NULL && srdp->srd_refcnt != 0); + rid = hmeblkp->hblk_tag.htag_rid; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp != NULL); + vaddr = (caddr_t)get_hblk_base(hmeblkp); + SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp, rgnp, rid); + hmeblkp->hblk_shared = 0; + } + sfmmu_hblk_steal_count++; SFMMU_STAT(sf_steal_count); @@ -10553,6 +11668,8 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp) SFMMU_FLAGS_CLEAR(sfmmup, HAT_SWAPPED|HAT_SWAPIN); cv_broadcast(&sfmmup->sfmmu_tsb_cv); return; + case TSB_LOSTRACE: + break; case TSB_ALLOCFAIL: break; default: @@ -10587,15 +11704,44 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp) rc = sfmmu_replace_tsb(sfmmup, tsbinfop, TSB_MIN_SZCODE, hatlockp, TSB_SWAPIN | TSB_FORCEALLOC); ASSERT(rc == TSB_SUCCESS); - } else { - /* update machine specific tsbinfo */ - sfmmu_setup_tsbinfo(sfmmup); } SFMMU_FLAGS_CLEAR(sfmmup, HAT_SWAPPED|HAT_SWAPIN); cv_broadcast(&sfmmup->sfmmu_tsb_cv); } +static int +sfmmu_is_rgnva(sf_srd_t *srdp, caddr_t addr, ulong_t w, ulong_t bmw) +{ + ulong_t bix = 0; + uint_t rid; + sf_region_t *rgnp; + + ASSERT(srdp != NULL); + ASSERT(srdp->srd_refcnt != 0); + + w <<= BT_ULSHIFT; + while (bmw) { + if (!(bmw & 0x1)) { + bix++; + bmw >>= 1; + continue; + } + rid = w | bix; + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_refcnt > 0); + ASSERT(rgnp->rgn_id == rid); + if (addr < rgnp->rgn_saddr || + addr >= (rgnp->rgn_saddr + rgnp->rgn_size)) { + bix++; + bmw >>= 1; + } else { + return (1); + } + } + return (0); +} + /* * Handle exceptions for low level tsb_handler. * @@ -10620,12 +11766,14 @@ sfmmu_tsb_swapin(sfmmu_t *sfmmup, hatlock_t *hatlockp) void sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype) { - sfmmu_t *sfmmup; + sfmmu_t *sfmmup, *shsfmmup; uint_t ctxtype; klwp_id_t lwp; char lwp_save_state; - hatlock_t *hatlockp; + hatlock_t *hatlockp, *shatlockp; struct tsb_info *tsbinfop; + struct tsbmiss *tsbmp; + sf_scd_t *scdp; SFMMU_STAT(sf_tsb_exceptions); SFMMU_MMU_STAT(mmu_tsb_exceptions); @@ -10638,24 +11786,79 @@ sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype) ASSERT(sfmmup != ksfmmup && ctxtype != KCONTEXT); ASSERT(sfmmup->sfmmu_ismhat == 0); - /* - * First, make sure we come out of here with a valid ctx, - * since if we don't get one we'll simply loop on the - * faulting instruction. - * - * If the ISM mappings are changing, the TSB is being relocated, or - * the process is swapped out we serialize behind the controlling - * thread with the sfmmu_flags and sfmmu_tsb_cv condition variable. - * Otherwise we synchronize with the context stealer or the thread - * that required us to change out our MMU registers (such - * as a thread changing out our TSB while we were running) by - * locking the HAT and grabbing the rwlock on the context as a - * reader temporarily. - */ ASSERT(!SFMMU_FLAGS_ISSET(sfmmup, HAT_SWAPPED) || ctxtype == INVALID_CONTEXT); - if (ctxtype == INVALID_CONTEXT) { + if (ctxtype != INVALID_CONTEXT && traptype != T_DATA_PROT) { + /* + * We may land here because shme bitmap and pagesize + * flags are updated lazily in tsbmiss area on other cpus. + * If we detect here that tsbmiss area is out of sync with + * sfmmu update it and retry the trapped instruction. + * Otherwise call trap(). + */ + int ret = 0; + uchar_t tteflag_mask = (1 << TTE64K) | (1 << TTE8K); + caddr_t addr = (caddr_t)(tagaccess & TAGACC_VADDR_MASK); + + /* + * Must set lwp state to LWP_SYS before + * trying to acquire any adaptive lock + */ + lwp = ttolwp(curthread); + ASSERT(lwp); + lwp_save_state = lwp->lwp_state; + lwp->lwp_state = LWP_SYS; + + hatlockp = sfmmu_hat_enter(sfmmup); + kpreempt_disable(); + tsbmp = &tsbmiss_area[CPU->cpu_id]; + ASSERT(sfmmup == tsbmp->usfmmup); + if (((tsbmp->uhat_tteflags ^ sfmmup->sfmmu_tteflags) & + ~tteflag_mask) || + ((tsbmp->uhat_rtteflags ^ sfmmup->sfmmu_rtteflags) & + ~tteflag_mask)) { + tsbmp->uhat_tteflags = sfmmup->sfmmu_tteflags; + tsbmp->uhat_rtteflags = sfmmup->sfmmu_rtteflags; + ret = 1; + } + if (sfmmup->sfmmu_srdp != NULL) { + ulong_t *sm = sfmmup->sfmmu_hmeregion_map.bitmap; + ulong_t *tm = tsbmp->shmermap; + ulong_t i; + for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) { + ulong_t d = tm[i] ^ sm[i]; + if (d) { + if (d & sm[i]) { + if (!ret && sfmmu_is_rgnva( + sfmmup->sfmmu_srdp, + addr, i, d & sm[i])) { + ret = 1; + } + } + tm[i] = sm[i]; + } + } + } + kpreempt_enable(); + sfmmu_hat_exit(hatlockp); + lwp->lwp_state = lwp_save_state; + if (ret) { + return; + } + } else if (ctxtype == INVALID_CONTEXT) { + /* + * First, make sure we come out of here with a valid ctx, + * since if we don't get one we'll simply loop on the + * faulting instruction. + * + * If the ISM mappings are changing, the TSB is relocated, + * the process is swapped, the process is joining SCD or + * leaving SCD or shared regions we serialize behind the + * controlling thread with hat lock, sfmmu_flags and + * sfmmu_tsb_cv condition variable. + */ + /* * Must set lwp state to LWP_SYS before * trying to acquire any adaptive lock @@ -10667,6 +11870,33 @@ sfmmu_tsbmiss_exception(struct regs *rp, uintptr_t tagaccess, uint_t traptype) hatlockp = sfmmu_hat_enter(sfmmup); retry: + if ((scdp = sfmmup->sfmmu_scdp) != NULL) { + shsfmmup = scdp->scd_sfmmup; + ASSERT(shsfmmup != NULL); + + for (tsbinfop = shsfmmup->sfmmu_tsb; tsbinfop != NULL; + tsbinfop = tsbinfop->tsb_next) { + if (tsbinfop->tsb_flags & TSB_RELOC_FLAG) { + /* drop the private hat lock */ + sfmmu_hat_exit(hatlockp); + /* acquire the shared hat lock */ + shatlockp = sfmmu_hat_enter(shsfmmup); + /* + * recheck to see if anything changed + * after we drop the private hat lock. + */ + if (sfmmup->sfmmu_scdp == scdp && + shsfmmup == scdp->scd_sfmmup) { + sfmmu_tsb_chk_reloc(shsfmmup, + shatlockp); + } + sfmmu_hat_exit(shatlockp); + hatlockp = sfmmu_hat_enter(sfmmup); + goto retry; + } + } + } + for (tsbinfop = sfmmup->sfmmu_tsb; tsbinfop != NULL; tsbinfop = tsbinfop->tsb_next) { if (tsbinfop->tsb_flags & TSB_RELOC_FLAG) { @@ -10685,6 +11915,17 @@ retry: goto retry; } + /* Is this process joining an SCD? */ + if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) { + /* + * Flush private TSB and setup shared TSB. + * sfmmu_finish_join_scd() does not drop the + * hat lock. + */ + sfmmu_finish_join_scd(sfmmup); + SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD); + } + /* * If we're swapping in, get TSB(s). Note that we must do * this before we get a ctx or load the MMU state. Once @@ -10705,21 +11946,27 @@ retry: * it anyway. */ lwp->lwp_state = lwp_save_state; - if (sfmmup->sfmmu_ttecnt[TTE8K] != 0 || - sfmmup->sfmmu_ttecnt[TTE64K] != 0 || - sfmmup->sfmmu_ttecnt[TTE512K] != 0 || - sfmmup->sfmmu_ttecnt[TTE4M] != 0 || - sfmmup->sfmmu_ttecnt[TTE32M] != 0 || - sfmmup->sfmmu_ttecnt[TTE256M] != 0) { - return; - } - if (traptype == T_DATA_PROT) { - traptype = T_DATA_MMU_MISS; - } + return; } trap(rp, (caddr_t)tagaccess, traptype, 0); } +static void +sfmmu_tsb_chk_reloc(sfmmu_t *sfmmup, hatlock_t *hatlockp) +{ + struct tsb_info *tp; + + ASSERT(sfmmu_hat_lock_held(sfmmup)); + + for (tp = sfmmup->sfmmu_tsb; tp != NULL; tp = tp->tsb_next) { + if (tp->tsb_flags & TSB_RELOC_FLAG) { + cv_wait(&sfmmup->sfmmu_tsb_cv, + HATLOCK_MUTEXP(hatlockp)); + break; + } + } +} + /* * sfmmu_vatopfn_suspended is called from GET_TTE when TL=0 and * TTE_SUSPENDED bit set in tte we block on aquiring a page lock @@ -10755,6 +12002,124 @@ sfmmu_tsbmiss_suspended(struct regs *rp, uintptr_t tagacc, uint_t traptype) } /* + * This routine could be optimized to reduce the number of xcalls by flushing + * the entire TLBs if region reference count is above some threshold but the + * tradeoff will depend on the size of the TLB. So for now flush the specific + * page a context at a time. + * + * If uselocks is 0 then it's called after all cpus were captured and all the + * hat locks were taken. In this case don't take the region lock by relying on + * the order of list region update operations in hat_join_region(), + * hat_leave_region() and hat_dup_region(). The ordering in those routines + * guarantees that list is always forward walkable and reaches active sfmmus + * regardless of where xc_attention() captures a cpu. + */ +cpuset_t +sfmmu_rgntlb_demap(caddr_t addr, sf_region_t *rgnp, + struct hme_blk *hmeblkp, int uselocks) +{ + sfmmu_t *sfmmup; + cpuset_t cpuset; + cpuset_t rcpuset; + hatlock_t *hatlockp; + uint_t rid = rgnp->rgn_id; + sf_rgn_link_t *rlink; + sf_scd_t *scdp; + + ASSERT(hmeblkp->hblk_shared); + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + + CPUSET_ZERO(rcpuset); + if (uselocks) { + mutex_enter(&rgnp->rgn_mutex); + } + sfmmup = rgnp->rgn_sfmmu_head; + while (sfmmup != NULL) { + if (uselocks) { + hatlockp = sfmmu_hat_enter(sfmmup); + } + + /* + * When an SCD is created the SCD hat is linked on the sfmmu + * region lists for each hme region which is part of the + * SCD. If we find an SCD hat, when walking these lists, + * then we flush the shared TSBs, if we find a private hat, + * which is part of an SCD, but where the region + * is not part of the SCD then we flush the private TSBs. + */ + if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL && + !SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) { + scdp = sfmmup->sfmmu_scdp; + if (SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) { + if (uselocks) { + sfmmu_hat_exit(hatlockp); + } + goto next; + } + } + + SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0); + + kpreempt_disable(); + cpuset = sfmmup->sfmmu_cpusran; + CPUSET_AND(cpuset, cpu_ready_set); + CPUSET_DEL(cpuset, CPU->cpu_id); + SFMMU_XCALL_STATS(sfmmup); + xt_some(cpuset, vtag_flushpage_tl1, + (uint64_t)addr, (uint64_t)sfmmup); + vtag_flushpage(addr, (uint64_t)sfmmup); + if (uselocks) { + sfmmu_hat_exit(hatlockp); + } + kpreempt_enable(); + CPUSET_OR(rcpuset, cpuset); + +next: + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 0, 0); + ASSERT(rlink != NULL); + sfmmup = rlink->next; + } + if (uselocks) { + mutex_exit(&rgnp->rgn_mutex); + } + return (rcpuset); +} + +static int +find_ism_rid(sfmmu_t *sfmmup, sfmmu_t *ism_sfmmup, caddr_t va, uint_t *ism_rid) +{ + ism_blk_t *ism_blkp; + int i; + ism_map_t *ism_map; +#ifdef DEBUG + struct hat *ism_hatid; +#endif + ASSERT(sfmmu_hat_lock_held(sfmmup)); + + ism_blkp = sfmmup->sfmmu_iblk; + while (ism_blkp != NULL) { + ism_map = ism_blkp->iblk_maps; + for (i = 0; i < ISM_MAP_SLOTS && ism_map[i].imap_ismhat; i++) { + if ((va >= ism_start(ism_map[i])) && + (va < ism_end(ism_map[i]))) { + + *ism_rid = ism_map[i].imap_rid; +#ifdef DEBUG + ism_hatid = ism_map[i].imap_ismhat; + ASSERT(ism_hatid == ism_sfmmup); + ASSERT(ism_hatid->sfmmu_ismhat); +#endif + return (1); + } + } + ism_blkp = ism_blkp->iblk_next; + } + return (0); +} + +/* * Special routine to flush out ism mappings- TSBs, TLBs and D-caches. * This routine may be called with all cpu's captured. Therefore, the * caller is responsible for holding all locks and disabling kernel @@ -10772,8 +12137,11 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup, #ifdef VAC int vcolor; #endif - int ttesz; + sf_scd_t *scdp; + uint_t ism_rid; + + ASSERT(!hmeblkp->hblk_shared); /* * Walk the ism_hat's mapping list and flush the page * from every hat sharing this ism_hat. This routine @@ -10787,6 +12155,7 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup, ASSERT(ism_sfmmup->sfmmu_ismhat); ASSERT(MUTEX_HELD(&ism_mlist_lock)); addr = addr - ISMID_STARTADDR; + for (ment = ism_sfmmup->sfmmu_iment; ment; ment = ment->iment_next) { sfmmup = ment->iment_hat; @@ -10795,27 +12164,38 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup, va = (caddr_t)((uintptr_t)va + (uintptr_t)addr); /* - * Flush TSB of ISM mappings. + * When an SCD is created the SCD hat is linked on the ism + * mapping lists for each ISM segment which is part of the + * SCD. If we find an SCD hat, when walking these lists, + * then we flush the shared TSBs, if we find a private hat, + * which is part of an SCD, but where the region + * corresponding to this va is not part of the SCD then we + * flush the private TSBs. */ - ttesz = get_hblk_ttesz(hmeblkp); - if (ttesz == TTE8K || ttesz == TTE4M) { - sfmmu_unload_tsb(sfmmup, va, ttesz); - } else { - caddr_t sva = va; - caddr_t eva; - ASSERT(addr == (caddr_t)get_hblk_base(hmeblkp)); - eva = sva + get_hblk_span(hmeblkp); - sfmmu_unload_tsb_range(sfmmup, sva, eva, ttesz); + if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL && + !SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD) && + !SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)) { + if (!find_ism_rid(sfmmup, ism_sfmmup, va, + &ism_rid)) { + cmn_err(CE_PANIC, + "can't find matching ISM rid!"); + } + + scdp = sfmmup->sfmmu_scdp; + if (SFMMU_IS_ISMRID_VALID(ism_rid) && + SF_RGNMAP_TEST(scdp->scd_ismregion_map, + ism_rid)) { + continue; + } } + SFMMU_UNLOAD_TSB(va, sfmmup, hmeblkp, 1); cpuset = sfmmup->sfmmu_cpusran; CPUSET_AND(cpuset, cpu_ready_set); CPUSET_DEL(cpuset, CPU->cpu_id); - SFMMU_XCALL_STATS(sfmmup); xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)va, (uint64_t)sfmmup); - vtag_flushpage(va, (uint64_t)sfmmup); #ifdef VAC @@ -10854,11 +12234,14 @@ sfmmu_tlbcache_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp, cpuset_t cpuset; hatlock_t *hatlockp; + ASSERT(!hmeblkp->hblk_shared); + #if defined(lint) && !defined(VAC) pfnum = pfnum; cpu_flag = cpu_flag; cache_flush_flag = cache_flush_flag; #endif + /* * There is no longer a need to protect against ctx being * stolen here since we don't store the ctx in the TSB anymore. @@ -10884,7 +12267,7 @@ sfmmu_tlbcache_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp, /* * Flush the TSB and TLB. */ - SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp); + SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0); cpuset = sfmmup->sfmmu_cpusran; CPUSET_AND(cpuset, cpu_ready_set); @@ -10936,6 +12319,8 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp, cpuset_t cpuset; hatlock_t *hatlockp; + ASSERT(!hmeblkp->hblk_shared); + /* * If the process is exiting we have nothing to do. */ @@ -10947,7 +12332,7 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp, */ if (!hat_lock_held) hatlockp = sfmmu_hat_enter(sfmmup); - SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp); + SFMMU_UNLOAD_TSB(addr, sfmmup, hmeblkp, 0); kpreempt_disable(); @@ -10973,6 +12358,9 @@ sfmmu_tlb_demap(caddr_t addr, sfmmu_t *sfmmup, struct hme_blk *hmeblkp, */ static int sfmmu_xcall_save; +/* + * this routine is never used for demaping addresses backed by SRD hmeblks. + */ static void sfmmu_tlb_range_demap(demap_range_t *dmrp) { @@ -11154,10 +12542,13 @@ sfmmu_invalidate_ctx(sfmmu_t *sfmmup) */ if ((sfmmu_getctx_sec() == currcnum) && (currcnum != INVALID_CONTEXT)) { + /* sets shared context to INVALID too */ sfmmu_setctx_sec(INVALID_CONTEXT); sfmmu_clear_utsbinfo(); } + SFMMU_FLAGS_SET(sfmmup, HAT_ALLCTX_INVALID); + kpreempt_enable(); /* @@ -11219,18 +12610,60 @@ sfmmu_cache_flushcolor(int vcolor, pfn_t pfnum) static int sfmmu_tsb_pre_relocator(caddr_t va, uint_t tsbsz, uint_t flags, void *tsbinfo) { - hatlock_t *hatlockp; struct tsb_info *tsbinfop = (struct tsb_info *)tsbinfo; sfmmu_t *sfmmup = tsbinfop->tsb_sfmmu; - extern uint32_t sendmondo_in_recover; + hatlock_t *hatlockp; + sf_scd_t *scdp; if (flags != HAT_PRESUSPEND) return (0); + /* + * If tsb is a shared TSB with TSB_SHAREDCTX set, sfmmup must + * be a shared hat, then set SCD's tsbinfo's flag. + * If tsb is not shared, sfmmup is a private hat, then set + * its private tsbinfo's flag. + */ hatlockp = sfmmu_hat_enter(sfmmup); - tsbinfop->tsb_flags |= TSB_RELOC_FLAG; + if (!(tsbinfop->tsb_flags & TSB_SHAREDCTX)) { + sfmmu_tsb_inv_ctx(sfmmup); + sfmmu_hat_exit(hatlockp); + } else { + /* release lock on the shared hat */ + sfmmu_hat_exit(hatlockp); + /* sfmmup is a shared hat */ + ASSERT(sfmmup->sfmmu_scdhat); + scdp = sfmmup->sfmmu_scdp; + ASSERT(scdp != NULL); + /* get private hat from the scd list */ + mutex_enter(&scdp->scd_mutex); + sfmmup = scdp->scd_sf_list; + while (sfmmup != NULL) { + hatlockp = sfmmu_hat_enter(sfmmup); + /* + * We do not call sfmmu_tsb_inv_ctx here because + * sendmondo_in_recover check is only needed for + * sun4u. + */ + sfmmu_invalidate_ctx(sfmmup); + sfmmu_hat_exit(hatlockp); + sfmmup = sfmmup->sfmmu_scd_link.next; + + } + mutex_exit(&scdp->scd_mutex); + } + return (0); +} + +static void +sfmmu_tsb_inv_ctx(sfmmu_t *sfmmup) +{ + extern uint32_t sendmondo_in_recover; + + ASSERT(sfmmu_hat_lock_held(sfmmup)); + /* * For Cheetah+ Erratum 25: * Wait for any active recovery to finish. We can't risk @@ -11262,9 +12695,6 @@ sfmmu_tsb_pre_relocator(caddr_t va, uint_t tsbsz, uint_t flags, void *tsbinfo) } sfmmu_invalidate_ctx(sfmmup); - sfmmu_hat_exit(hatlockp); - - return (0); } /* ARGSUSED */ @@ -11291,7 +12721,6 @@ sfmmu_tsb_post_relocator(caddr_t va, uint_t tsbsz, uint_t flags, if ((tsbinfop->tsb_flags & TSB_SWAPPED) == 0) { ASSERT(va == tsbinfop->tsb_va); sfmmu_tsbinfo_setup_phys(tsbinfop, newpfn); - sfmmu_setup_tsbinfo(sfmmup); if (tsbinfop->tsb_flags & TSB_FLUSH_NEEDED) { sfmmu_inv_tsb(tsbinfop->tsb_va, @@ -11351,11 +12780,18 @@ sfmmu_tsb_free(struct tsb_info *tsbinfo) * need to uninstall the callback handler. */ if (tsbinfo->tsb_cache != sfmmu_tsb8k_cache) { - uintptr_t slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT; - caddr_t slab_vaddr = (caddr_t)((uintptr_t)tsbva & slab_mask); + uintptr_t slab_mask; + caddr_t slab_vaddr; page_t **ppl; int ret; + ASSERT(tsb_size <= MMU_PAGESIZE4M || use_bigtsb_arena); + if (tsb_size > MMU_PAGESIZE4M) + slab_mask = ~((uintptr_t)bigtsb_slab_mask) << PAGESHIFT; + else + slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT; + slab_vaddr = (caddr_t)((uintptr_t)tsbva & slab_mask); + ret = as_pagelock(&kas, &ppl, slab_vaddr, PAGESIZE, S_WRITE); ASSERT(ret == 0); hat_delete_callback(tsbva, (uint_t)tsb_size, (void *)tsbinfo, @@ -11436,7 +12872,7 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask, { caddr_t vaddr = NULL; caddr_t slab_vaddr; - uintptr_t slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT; + uintptr_t slab_mask; int tsbbytes = TSB_BYTES(tsbcode); int lowmem = 0; struct kmem_cache *kmem_cachep = NULL; @@ -11447,6 +12883,12 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask, page_t **pplist; int ret; + ASSERT(tsbbytes <= MMU_PAGESIZE4M || use_bigtsb_arena); + if (tsbbytes > MMU_PAGESIZE4M) + slab_mask = ~((uintptr_t)bigtsb_slab_mask) << PAGESHIFT; + else + slab_mask = ~((uintptr_t)tsb_slab_mask) << PAGESHIFT; + if (flags & (TSB_FORCEALLOC | TSB_SWAPIN | TSB_GROW | TSB_SHRINK)) flags |= TSB_ALLOC; @@ -11524,9 +12966,15 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask, lgrpid = 0; /* use lgrp of boot CPU */ if (tsbbytes > MMU_PAGESIZE) { - vmp = kmem_tsb_default_arena[lgrpid]; - vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes, 0, 0, - NULL, NULL, VM_NOSLEEP); + if (tsbbytes > MMU_PAGESIZE4M) { + vmp = kmem_bigtsb_default_arena[lgrpid]; + vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes, + 0, 0, NULL, NULL, VM_NOSLEEP); + } else { + vmp = kmem_tsb_default_arena[lgrpid]; + vaddr = (caddr_t)vmem_xalloc(vmp, tsbbytes, tsbbytes, + 0, 0, NULL, NULL, VM_NOSLEEP); + } #ifdef DEBUG } else if (lowmem || (flags & TSB_FORCEALLOC) || tsb_forceheap) { #else /* !DEBUG */ @@ -11595,11 +13043,12 @@ sfmmu_init_tsbinfo(struct tsb_info *tsbinfo, int tteszmask, sfmmu_tsbinfo_setup_phys(tsbinfo, pfn); + sfmmu_inv_tsb(vaddr, tsbbytes); + if (kmem_cachep != sfmmu_tsb8k_cache) { as_pageunlock(&kas, pplist, slab_vaddr, PAGESIZE, S_WRITE); } - sfmmu_inv_tsb(vaddr, tsbbytes); return (0); } @@ -11907,6 +13356,11 @@ hat_supported(enum hat_features feature, void *arg) case HAT_DYNAMIC_ISM_UNMAP: case HAT_VMODSORT: return (1); + case HAT_SHARED_REGIONS: + if (!disable_shctx && shctx_on) + return (1); + else + return (0); default: return (0); } @@ -11980,29 +13434,19 @@ sfmmu_kstat_percpu_update(kstat_t *ksp, int rw) ASSERT(cpu_kstat); if (rw == KSTAT_READ) { for (i = 0; i < NCPU; cpu_kstat++, tsbm++, kpmtsbm++, i++) { - cpu_kstat->sf_itlb_misses = tsbm->itlb_misses; - cpu_kstat->sf_dtlb_misses = tsbm->dtlb_misses; + cpu_kstat->sf_itlb_misses = 0; + cpu_kstat->sf_dtlb_misses = 0; cpu_kstat->sf_utsb_misses = tsbm->utsb_misses - tsbm->uprot_traps; cpu_kstat->sf_ktsb_misses = tsbm->ktsb_misses + kpmtsbm->kpm_tsb_misses - tsbm->kprot_traps; - - if (tsbm->itlb_misses > 0 && tsbm->dtlb_misses > 0) { - cpu_kstat->sf_tsb_hits = - (tsbm->itlb_misses + tsbm->dtlb_misses) - - (tsbm->utsb_misses + tsbm->ktsb_misses + - kpmtsbm->kpm_tsb_misses); - } else { - cpu_kstat->sf_tsb_hits = 0; - } + cpu_kstat->sf_tsb_hits = 0; cpu_kstat->sf_umod_faults = tsbm->uprot_traps; cpu_kstat->sf_kmod_faults = tsbm->kprot_traps; } } else { /* KSTAT_WRITE is used to clear stats */ for (i = 0; i < NCPU; tsbm++, kpmtsbm++, i++) { - tsbm->itlb_misses = 0; - tsbm->dtlb_misses = 0; tsbm->utsb_misses = 0; tsbm->ktsb_misses = 0; tsbm->uprot_traps = 0; @@ -12189,7 +13633,7 @@ hat_dump(void) void hat_thread_exit(kthread_t *thd) { - uint64_t pgsz_cnum; + uint_t pgsz_cnum; uint_t pstate_save; ASSERT(thd->t_procp->p_as == &kas); @@ -12198,6 +13642,7 @@ hat_thread_exit(kthread_t *thd) #ifdef sun4u pgsz_cnum |= (ksfmmup->sfmmu_cext << CTXREG_EXT_SHIFT); #endif + /* * Note that sfmmu_load_mmustate() is currently a no-op for * kernel threads. We need to disable interrupts here, @@ -12205,7 +13650,1817 @@ hat_thread_exit(kthread_t *thd) * if the caller does not disable interrupts. */ pstate_save = sfmmu_disable_intrs(); + + /* Compatibility Note: hw takes care of MMU_SCONTEXT1 */ sfmmu_setctx_sec(pgsz_cnum); sfmmu_load_mmustate(ksfmmup); sfmmu_enable_intrs(pstate_save); } + + +/* + * SRD support + */ +#define SRD_HASH_FUNCTION(vp) (((((uintptr_t)(vp)) >> 4) ^ \ + (((uintptr_t)(vp)) >> 11)) & \ + srd_hashmask) + +/* + * Attach the process to the srd struct associated with the exec vnode + * from which the process is started. + */ +void +hat_join_srd(struct hat *sfmmup, vnode_t *evp) +{ + uint_t hash = SRD_HASH_FUNCTION(evp); + sf_srd_t *srdp; + sf_srd_t *newsrdp; + + ASSERT(sfmmup != ksfmmup); + ASSERT(sfmmup->sfmmu_srdp == NULL); + + if (disable_shctx || !shctx_on) { + return; + } + + VN_HOLD(evp); + + if (srd_buckets[hash].srdb_srdp != NULL) { + mutex_enter(&srd_buckets[hash].srdb_lock); + for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL; + srdp = srdp->srd_hash) { + if (srdp->srd_evp == evp) { + ASSERT(srdp->srd_refcnt >= 0); + sfmmup->sfmmu_srdp = srdp; + atomic_add_32( + (volatile uint_t *)&srdp->srd_refcnt, 1); + mutex_exit(&srd_buckets[hash].srdb_lock); + return; + } + } + mutex_exit(&srd_buckets[hash].srdb_lock); + } + newsrdp = kmem_cache_alloc(srd_cache, KM_SLEEP); + ASSERT(newsrdp->srd_next_ismrid == 0 && newsrdp->srd_next_hmerid == 0); + + newsrdp->srd_evp = evp; + newsrdp->srd_refcnt = 1; + newsrdp->srd_hmergnfree = NULL; + newsrdp->srd_ismrgnfree = NULL; + + mutex_enter(&srd_buckets[hash].srdb_lock); + for (srdp = srd_buckets[hash].srdb_srdp; srdp != NULL; + srdp = srdp->srd_hash) { + if (srdp->srd_evp == evp) { + ASSERT(srdp->srd_refcnt >= 0); + sfmmup->sfmmu_srdp = srdp; + atomic_add_32((volatile uint_t *)&srdp->srd_refcnt, 1); + mutex_exit(&srd_buckets[hash].srdb_lock); + kmem_cache_free(srd_cache, newsrdp); + return; + } + } + newsrdp->srd_hash = srd_buckets[hash].srdb_srdp; + srd_buckets[hash].srdb_srdp = newsrdp; + sfmmup->sfmmu_srdp = newsrdp; + + mutex_exit(&srd_buckets[hash].srdb_lock); + +} + +static void +sfmmu_leave_srd(sfmmu_t *sfmmup) +{ + vnode_t *evp; + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + uint_t hash; + sf_srd_t **prev_srdpp; + sf_region_t *rgnp; + sf_region_t *nrgnp; +#ifdef DEBUG + int rgns = 0; +#endif + int i; + + ASSERT(sfmmup != ksfmmup); + ASSERT(srdp != NULL); + ASSERT(srdp->srd_refcnt > 0); + ASSERT(sfmmup->sfmmu_scdp == NULL); + ASSERT(sfmmup->sfmmu_free == 1); + + sfmmup->sfmmu_srdp = NULL; + evp = srdp->srd_evp; + ASSERT(evp != NULL); + if (atomic_add_32_nv( + (volatile uint_t *)&srdp->srd_refcnt, -1)) { + VN_RELE(evp); + return; + } + + hash = SRD_HASH_FUNCTION(evp); + mutex_enter(&srd_buckets[hash].srdb_lock); + for (prev_srdpp = &srd_buckets[hash].srdb_srdp; + (srdp = *prev_srdpp) != NULL; prev_srdpp = &srdp->srd_hash) { + if (srdp->srd_evp == evp) { + break; + } + } + if (srdp == NULL || srdp->srd_refcnt) { + mutex_exit(&srd_buckets[hash].srdb_lock); + VN_RELE(evp); + return; + } + *prev_srdpp = srdp->srd_hash; + mutex_exit(&srd_buckets[hash].srdb_lock); + + ASSERT(srdp->srd_refcnt == 0); + VN_RELE(evp); + +#ifdef DEBUG + for (i = 0; i < SFMMU_MAX_REGION_BUCKETS; i++) { + ASSERT(srdp->srd_rgnhash[i] == NULL); + } +#endif /* DEBUG */ + + /* free each hme regions in the srd */ + for (rgnp = srdp->srd_hmergnfree; rgnp != NULL; rgnp = nrgnp) { + nrgnp = rgnp->rgn_next; + ASSERT(rgnp->rgn_id < srdp->srd_next_hmerid); + ASSERT(rgnp->rgn_refcnt == 0); + ASSERT(rgnp->rgn_sfmmu_head == NULL); + ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE); + ASSERT(rgnp->rgn_hmeflags == 0); + ASSERT(srdp->srd_hmergnp[rgnp->rgn_id] == rgnp); +#ifdef DEBUG + for (i = 0; i < MMU_PAGE_SIZES; i++) { + ASSERT(rgnp->rgn_ttecnt[i] == 0); + } + rgns++; +#endif /* DEBUG */ + kmem_cache_free(region_cache, rgnp); + } + ASSERT(rgns == srdp->srd_next_hmerid); + +#ifdef DEBUG + rgns = 0; +#endif + /* free each ism rgns in the srd */ + for (rgnp = srdp->srd_ismrgnfree; rgnp != NULL; rgnp = nrgnp) { + nrgnp = rgnp->rgn_next; + ASSERT(rgnp->rgn_id < srdp->srd_next_ismrid); + ASSERT(rgnp->rgn_refcnt == 0); + ASSERT(rgnp->rgn_sfmmu_head == NULL); + ASSERT(rgnp->rgn_flags & SFMMU_REGION_FREE); + ASSERT(srdp->srd_ismrgnp[rgnp->rgn_id] == rgnp); +#ifdef DEBUG + for (i = 0; i < MMU_PAGE_SIZES; i++) { + ASSERT(rgnp->rgn_ttecnt[i] == 0); + } + rgns++; +#endif /* DEBUG */ + kmem_cache_free(region_cache, rgnp); + } + ASSERT(rgns == srdp->srd_next_ismrid); + ASSERT(srdp->srd_ismbusyrgns == 0); + ASSERT(srdp->srd_hmebusyrgns == 0); + + srdp->srd_next_ismrid = 0; + srdp->srd_next_hmerid = 0; + + bzero((void *)srdp->srd_ismrgnp, + sizeof (sf_region_t *) * SFMMU_MAX_ISM_REGIONS); + bzero((void *)srdp->srd_hmergnp, + sizeof (sf_region_t *) * SFMMU_MAX_HME_REGIONS); + + ASSERT(srdp->srd_scdp == NULL); + kmem_cache_free(srd_cache, srdp); +} + +/* ARGSUSED */ +static int +sfmmu_srdcache_constructor(void *buf, void *cdrarg, int kmflags) +{ + sf_srd_t *srdp = (sf_srd_t *)buf; + bzero(buf, sizeof (*srdp)); + + mutex_init(&srdp->srd_mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&srdp->srd_scd_mutex, NULL, MUTEX_DEFAULT, NULL); + return (0); +} + +/* ARGSUSED */ +static void +sfmmu_srdcache_destructor(void *buf, void *cdrarg) +{ + sf_srd_t *srdp = (sf_srd_t *)buf; + + mutex_destroy(&srdp->srd_mutex); + mutex_destroy(&srdp->srd_scd_mutex); +} + +/* + * The caller makes sure hat_join_region()/hat_leave_region() can't be called + * at the same time for the same process and address range. This is ensured by + * the fact that address space is locked as writer when a process joins the + * regions. Therefore there's no need to hold an srd lock during the entire + * execution of hat_join_region()/hat_leave_region(). + */ + +#define RGN_HASH_FUNCTION(obj) (((((uintptr_t)(obj)) >> 4) ^ \ + (((uintptr_t)(obj)) >> 11)) & \ + srd_rgn_hashmask) +/* + * This routine implements the shared context functionality required when + * attaching a segment to an address space. It must be called from + * hat_share() for D(ISM) segments and from segvn_create() for segments + * with the MAP_PRIVATE and MAP_TEXT flags set. It returns a region_cookie + * which is saved in the private segment data for hme segments and + * the ism_map structure for ism segments. + */ +hat_region_cookie_t +hat_join_region(struct hat *sfmmup, + caddr_t r_saddr, + size_t r_size, + void *r_obj, + u_offset_t r_objoff, + uchar_t r_perm, + uchar_t r_pgszc, + hat_rgn_cb_func_t r_cb_function, + uint_t flags) +{ + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + uint_t rhash; + uint_t rid; + hatlock_t *hatlockp; + sf_region_t *rgnp; + sf_region_t *new_rgnp = NULL; + int i; + uint16_t *nextidp; + sf_region_t **freelistp; + int maxids; + sf_region_t **rarrp; + uint16_t *busyrgnsp; + ulong_t rttecnt; + int rkmalloc = 0; + uchar_t tteflag; + uchar_t r_type = flags & HAT_REGION_TYPE_MASK; + int text = (r_type == HAT_REGION_TEXT); + + if (srdp == NULL || r_size == 0) { + return (HAT_INVALID_REGION_COOKIE); + } + + ASSERT(sfmmup->sfmmu_xhat_provider == NULL); + ASSERT(sfmmup != ksfmmup); + ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock)); + ASSERT(srdp->srd_refcnt > 0); + ASSERT(!(flags & ~HAT_REGION_TYPE_MASK)); + ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM); + ASSERT(r_pgszc < mmu_page_sizes); + if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) || + !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) { + panic("hat_join_region: region addr or size is not aligned\n"); + } + + + r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM : + SFMMU_REGION_HME; + /* + * Currently only support shared hmes for the main text region. + */ + if (r_type == SFMMU_REGION_HME && r_obj != srdp->srd_evp) { + return (HAT_INVALID_REGION_COOKIE); + } + + rhash = RGN_HASH_FUNCTION(r_obj); + + if (r_type == SFMMU_REGION_ISM) { + nextidp = &srdp->srd_next_ismrid; + freelistp = &srdp->srd_ismrgnfree; + maxids = SFMMU_MAX_ISM_REGIONS; + rarrp = srdp->srd_ismrgnp; + busyrgnsp = &srdp->srd_ismbusyrgns; + } else { + nextidp = &srdp->srd_next_hmerid; + freelistp = &srdp->srd_hmergnfree; + maxids = SFMMU_MAX_HME_REGIONS; + rarrp = srdp->srd_hmergnp; + busyrgnsp = &srdp->srd_hmebusyrgns; + } + + mutex_enter(&srdp->srd_mutex); + + for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL; + rgnp = rgnp->rgn_hash) { + if (rgnp->rgn_saddr == r_saddr && rgnp->rgn_size == r_size && + rgnp->rgn_obj == r_obj && rgnp->rgn_objoff == r_objoff && + rgnp->rgn_perm == r_perm && rgnp->rgn_pgszc == r_pgszc) { + break; + } + } + +rfound: + if (rgnp != NULL) { + ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type); + ASSERT(rgnp->rgn_cb_function == r_cb_function); + ASSERT(rgnp->rgn_refcnt >= 0); + rid = rgnp->rgn_id; + ASSERT(rid < maxids); + ASSERT(rarrp[rid] == rgnp); + ASSERT(rid < *nextidp); + atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1); + mutex_exit(&srdp->srd_mutex); + if (new_rgnp != NULL) { + kmem_cache_free(region_cache, new_rgnp); + } + if (r_type == SFMMU_REGION_HME) { + int myjoin = + (sfmmup == astosfmmu(curthread->t_procp->p_as)); + + sfmmu_link_to_hmeregion(sfmmup, rgnp); + /* + * bitmap should be updated after linking sfmmu on + * region list so that pageunload() doesn't skip + * TSB/TLB flush. As soon as bitmap is updated another + * thread in this process can already start accessing + * this region. + */ + /* + * Normally ttecnt accounting is done as part of + * pagefault handling. But a process may not take any + * pagefaults on shared hmeblks created by some other + * process. To compensate for this assume that the + * entire region will end up faulted in using + * the region's pagesize. + * + */ + if (r_pgszc > TTE8K) { + tteflag = 1 << r_pgszc; + if (disable_large_pages & tteflag) { + tteflag = 0; + } + } else { + tteflag = 0; + } + if (tteflag && !(sfmmup->sfmmu_rtteflags & tteflag)) { + hatlockp = sfmmu_hat_enter(sfmmup); + sfmmup->sfmmu_rtteflags |= tteflag; + sfmmu_hat_exit(hatlockp); + } + hatlockp = sfmmu_hat_enter(sfmmup); + + /* + * Preallocate 1/4 of ttecnt's in 8K TSB for >= 4M + * region to allow for large page allocation failure. + */ + if (r_pgszc >= TTE4M) { + sfmmup->sfmmu_tsb0_4minflcnt += + r_size >> (TTE_PAGE_SHIFT(TTE8K) + 2); + } + + /* update sfmmu_ttecnt with the shme rgn ttecnt */ + rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc); + atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], + rttecnt); + + if (text && r_pgszc >= TTE4M && + (tteflag || ((disable_large_pages >> TTE4M) & + ((1 << (r_pgszc - TTE4M + 1)) - 1))) && + !SFMMU_FLAGS_ISSET(sfmmup, HAT_4MTEXT_FLAG)) { + SFMMU_FLAGS_SET(sfmmup, HAT_4MTEXT_FLAG); + } + + sfmmu_hat_exit(hatlockp); + /* + * On Panther we need to make sure TLB is programmed + * to accept 32M/256M pages. Call + * sfmmu_check_page_sizes() now to make sure TLB is + * setup before making hmeregions visible to other + * threads. + */ + sfmmu_check_page_sizes(sfmmup, 1); + hatlockp = sfmmu_hat_enter(sfmmup); + SF_RGNMAP_ADD(sfmmup->sfmmu_hmeregion_map, rid); + + /* + * if context is invalid tsb miss exception code will + * call sfmmu_check_page_sizes() and update tsbmiss + * area later. + */ + kpreempt_disable(); + if (myjoin && + (sfmmup->sfmmu_ctxs[CPU_MMU_IDX(CPU)].cnum + != INVALID_CONTEXT)) { + struct tsbmiss *tsbmp; + + tsbmp = &tsbmiss_area[CPU->cpu_id]; + ASSERT(sfmmup == tsbmp->usfmmup); + BT_SET(tsbmp->shmermap, rid); + if (r_pgszc > TTE64K) { + tsbmp->uhat_rtteflags |= tteflag; + } + + } + kpreempt_enable(); + + sfmmu_hat_exit(hatlockp); + ASSERT((hat_region_cookie_t)((uint64_t)rid) != + HAT_INVALID_REGION_COOKIE); + } else { + hatlockp = sfmmu_hat_enter(sfmmup); + SF_RGNMAP_ADD(sfmmup->sfmmu_ismregion_map, rid); + sfmmu_hat_exit(hatlockp); + } + ASSERT(rid < maxids); + + if (r_type == SFMMU_REGION_ISM) { + sfmmu_find_scd(sfmmup); + } + return ((hat_region_cookie_t)((uint64_t)rid)); + } + + ASSERT(new_rgnp == NULL); + + if (*busyrgnsp >= maxids) { + mutex_exit(&srdp->srd_mutex); + return (HAT_INVALID_REGION_COOKIE); + } + + ASSERT(MUTEX_HELD(&srdp->srd_mutex)); + if (*freelistp != NULL) { + new_rgnp = *freelistp; + *freelistp = new_rgnp->rgn_next; + ASSERT(new_rgnp->rgn_id < *nextidp); + ASSERT(new_rgnp->rgn_id < maxids); + ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE); + ASSERT((new_rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) + == r_type); + ASSERT(rarrp[new_rgnp->rgn_id] == new_rgnp); + + ASSERT(new_rgnp->rgn_hmeflags == 0); + } + + if (new_rgnp == NULL) { + /* + * release local locks before memory allocation. + */ + mutex_exit(&srdp->srd_mutex); + if (new_rgnp == NULL) { + rkmalloc = 1; + new_rgnp = kmem_cache_alloc(region_cache, KM_SLEEP); + } + + mutex_enter(&srdp->srd_mutex); + for (rgnp = srdp->srd_rgnhash[rhash]; rgnp != NULL; + rgnp = rgnp->rgn_hash) { + if (rgnp->rgn_saddr == r_saddr && + rgnp->rgn_size == r_size && + rgnp->rgn_obj == r_obj && + rgnp->rgn_objoff == r_objoff && + rgnp->rgn_perm == r_perm && + rgnp->rgn_pgszc == r_pgszc) { + break; + } + } + if (rgnp != NULL) { + if (!rkmalloc) { + ASSERT(new_rgnp->rgn_flags & + SFMMU_REGION_FREE); + new_rgnp->rgn_next = *freelistp; + *freelistp = new_rgnp; + new_rgnp = NULL; + } + goto rfound; + } + + if (rkmalloc) { + if (*nextidp >= maxids) { + mutex_exit(&srdp->srd_mutex); + goto fail; + } + rgnp = new_rgnp; + new_rgnp = NULL; + rgnp->rgn_id = (*nextidp)++; + ASSERT(rgnp->rgn_id < maxids); + ASSERT(rarrp[rgnp->rgn_id] == NULL); + rarrp[rgnp->rgn_id] = rgnp; + } else { + rgnp = new_rgnp; + new_rgnp = NULL; + } + } else { + rgnp = new_rgnp; + new_rgnp = NULL; + } + + ASSERT(rgnp->rgn_sfmmu_head == NULL); + ASSERT(rgnp->rgn_hmeflags == 0); +#ifdef DEBUG + for (i = 0; i < MMU_PAGE_SIZES; i++) { + ASSERT(rgnp->rgn_ttecnt[i] == 0); + } +#endif + rgnp->rgn_saddr = r_saddr; + rgnp->rgn_size = r_size; + rgnp->rgn_obj = r_obj; + rgnp->rgn_objoff = r_objoff; + rgnp->rgn_perm = r_perm; + rgnp->rgn_pgszc = r_pgszc; + rgnp->rgn_flags = r_type; + rgnp->rgn_refcnt = 0; + rgnp->rgn_cb_function = r_cb_function; + rgnp->rgn_hash = srdp->srd_rgnhash[rhash]; + srdp->srd_rgnhash[rhash] = rgnp; + (*busyrgnsp)++; + ASSERT(*busyrgnsp <= maxids); + goto rfound; + +fail: + ASSERT(new_rgnp != NULL); + if (rkmalloc) { + kmem_cache_free(region_cache, new_rgnp); + } else { + /* put it back on the free list. */ + ASSERT(new_rgnp->rgn_flags & SFMMU_REGION_FREE); + new_rgnp->rgn_next = *freelistp; + *freelistp = new_rgnp; + } + return (HAT_INVALID_REGION_COOKIE); +} + +/* + * This function implements the shared context functionality required + * when detaching a segment from an address space. It must be called + * from hat_unshare() for all D(ISM) segments and from segvn_unmap(), + * for segments with a valid region_cookie. + * It will also be called from all seg_vn routines which change a + * segment's attributes such as segvn_setprot(), segvn_setpagesize(), + * segvn_clrszc() & segvn_advise(), as well as in the case of COW fault + * from segvn_fault(). + */ +void +hat_leave_region(struct hat *sfmmup, hat_region_cookie_t rcookie, uint_t flags) +{ + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + sf_scd_t *scdp; + uint_t rhash; + uint_t rid = (uint_t)((uint64_t)rcookie); + hatlock_t *hatlockp = NULL; + sf_region_t *rgnp; + sf_region_t **prev_rgnpp; + sf_region_t *cur_rgnp; + void *r_obj; + int i; + caddr_t r_saddr; + caddr_t r_eaddr; + size_t r_size; + uchar_t r_pgszc; + uchar_t r_type = flags & HAT_REGION_TYPE_MASK; + + ASSERT(sfmmup != ksfmmup); + ASSERT(srdp != NULL); + ASSERT(srdp->srd_refcnt > 0); + ASSERT(!(flags & ~HAT_REGION_TYPE_MASK)); + ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM); + ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL); + + r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM : + SFMMU_REGION_HME; + + if (r_type == SFMMU_REGION_ISM) { + ASSERT(SFMMU_IS_ISMRID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_ISM_REGIONS); + rgnp = srdp->srd_ismrgnp[rid]; + } else { + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + rgnp = srdp->srd_hmergnp[rid]; + } + ASSERT(rgnp != NULL); + ASSERT(rgnp->rgn_id == rid); + ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type); + ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE)); + ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock)); + + ASSERT(sfmmup->sfmmu_xhat_provider == NULL); + if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) { + xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr, + rgnp->rgn_size, 0, NULL); + } + + if (sfmmup->sfmmu_free) { + ulong_t rttecnt; + r_pgszc = rgnp->rgn_pgszc; + r_size = rgnp->rgn_size; + + ASSERT(sfmmup->sfmmu_scdp == NULL); + if (r_type == SFMMU_REGION_ISM) { + SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid); + } else { + /* update shme rgns ttecnt in sfmmu_ttecnt */ + rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc); + ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt); + + atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], + -rttecnt); + + SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid); + } + } else if (r_type == SFMMU_REGION_ISM) { + hatlockp = sfmmu_hat_enter(sfmmup); + ASSERT(rid < srdp->srd_next_ismrid); + SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid); + scdp = sfmmup->sfmmu_scdp; + if (scdp != NULL && + SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) { + sfmmu_leave_scd(sfmmup, r_type); + ASSERT(sfmmu_hat_lock_held(sfmmup)); + } + sfmmu_hat_exit(hatlockp); + } else { + ulong_t rttecnt; + r_pgszc = rgnp->rgn_pgszc; + r_saddr = rgnp->rgn_saddr; + r_size = rgnp->rgn_size; + r_eaddr = r_saddr + r_size; + + ASSERT(r_type == SFMMU_REGION_HME); + hatlockp = sfmmu_hat_enter(sfmmup); + ASSERT(rid < srdp->srd_next_hmerid); + SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid); + + /* + * If region is part of an SCD call sfmmu_leave_scd(). + * Otherwise if process is not exiting and has valid context + * just drop the context on the floor to lose stale TLB + * entries and force the update of tsb miss area to reflect + * the new region map. After that clean our TSB entries. + */ + scdp = sfmmup->sfmmu_scdp; + if (scdp != NULL && + SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) { + sfmmu_leave_scd(sfmmup, r_type); + ASSERT(sfmmu_hat_lock_held(sfmmup)); + } + sfmmu_invalidate_ctx(sfmmup); + + i = TTE8K; + while (i < mmu_page_sizes) { + if (rgnp->rgn_ttecnt[i] != 0) { + sfmmu_unload_tsb_range(sfmmup, r_saddr, + r_eaddr, i); + if (i < TTE4M) { + i = TTE4M; + continue; + } else { + break; + } + } + i++; + } + /* Remove the preallocated 1/4 8k ttecnt for 4M regions. */ + if (r_pgszc >= TTE4M) { + rttecnt = r_size >> (TTE_PAGE_SHIFT(TTE8K) + 2); + ASSERT(sfmmup->sfmmu_tsb0_4minflcnt >= + rttecnt); + sfmmup->sfmmu_tsb0_4minflcnt -= rttecnt; + } + + /* update shme rgns ttecnt in sfmmu_ttecnt */ + rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc); + ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt); + atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc], -rttecnt); + + sfmmu_hat_exit(hatlockp); + if (scdp != NULL && sfmmup->sfmmu_scdp == NULL) { + /* sfmmup left the scd, grow private tsb */ + sfmmu_check_page_sizes(sfmmup, 1); + } else { + sfmmu_check_page_sizes(sfmmup, 0); + } + } + + if (r_type == SFMMU_REGION_HME) { + sfmmu_unlink_from_hmeregion(sfmmup, rgnp); + } + + r_obj = rgnp->rgn_obj; + if (atomic_add_32_nv((volatile uint_t *)&rgnp->rgn_refcnt, -1)) { + return; + } + + /* + * looks like nobody uses this region anymore. Free it. + */ + rhash = RGN_HASH_FUNCTION(r_obj); + mutex_enter(&srdp->srd_mutex); + for (prev_rgnpp = &srdp->srd_rgnhash[rhash]; + (cur_rgnp = *prev_rgnpp) != NULL; + prev_rgnpp = &cur_rgnp->rgn_hash) { + if (cur_rgnp == rgnp && cur_rgnp->rgn_refcnt == 0) { + break; + } + } + + if (cur_rgnp == NULL) { + mutex_exit(&srdp->srd_mutex); + return; + } + + ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type); + *prev_rgnpp = rgnp->rgn_hash; + if (r_type == SFMMU_REGION_ISM) { + rgnp->rgn_flags |= SFMMU_REGION_FREE; + ASSERT(rid < srdp->srd_next_ismrid); + rgnp->rgn_next = srdp->srd_ismrgnfree; + srdp->srd_ismrgnfree = rgnp; + ASSERT(srdp->srd_ismbusyrgns > 0); + srdp->srd_ismbusyrgns--; + mutex_exit(&srdp->srd_mutex); + return; + } + mutex_exit(&srdp->srd_mutex); + + /* + * Destroy region's hmeblks. + */ + sfmmu_unload_hmeregion(srdp, rgnp); + + rgnp->rgn_hmeflags = 0; + + ASSERT(rgnp->rgn_sfmmu_head == NULL); + ASSERT(rgnp->rgn_id == rid); + for (i = 0; i < MMU_PAGE_SIZES; i++) { + rgnp->rgn_ttecnt[i] = 0; + } + rgnp->rgn_flags |= SFMMU_REGION_FREE; + mutex_enter(&srdp->srd_mutex); + ASSERT(rid < srdp->srd_next_hmerid); + rgnp->rgn_next = srdp->srd_hmergnfree; + srdp->srd_hmergnfree = rgnp; + ASSERT(srdp->srd_hmebusyrgns > 0); + srdp->srd_hmebusyrgns--; + mutex_exit(&srdp->srd_mutex); +} + +/* + * For now only called for hmeblk regions and not for ISM regions. + */ +void +hat_dup_region(struct hat *sfmmup, hat_region_cookie_t rcookie) +{ + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + uint_t rid = (uint_t)((uint64_t)rcookie); + sf_region_t *rgnp; + sf_rgn_link_t *rlink; + sf_rgn_link_t *hrlink; + ulong_t rttecnt; + + ASSERT(sfmmup != ksfmmup); + ASSERT(srdp != NULL); + ASSERT(srdp->srd_refcnt > 0); + + ASSERT(rid < srdp->srd_next_hmerid); + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + ASSERT(rid < SFMMU_MAX_HME_REGIONS); + + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_refcnt > 0); + ASSERT(rgnp->rgn_id == rid); + ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == SFMMU_REGION_HME); + ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE)); + + atomic_add_32((volatile uint_t *)&rgnp->rgn_refcnt, 1); + + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 0); + ASSERT(rlink != NULL); + mutex_enter(&rgnp->rgn_mutex); + ASSERT(rgnp->rgn_sfmmu_head != NULL); + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(rgnp->rgn_sfmmu_head, rid, hrlink, 0, 0); + ASSERT(hrlink != NULL); + ASSERT(hrlink->prev == NULL); + rlink->next = rgnp->rgn_sfmmu_head; + rlink->prev = NULL; + hrlink->prev = sfmmup; + /* + * make sure rlink's next field is correct + * before making this link visible. + */ + membar_stst(); + rgnp->rgn_sfmmu_head = sfmmup; + mutex_exit(&rgnp->rgn_mutex); + + /* update sfmmu_ttecnt with the shme rgn ttecnt */ + rttecnt = rgnp->rgn_size >> TTE_PAGE_SHIFT(rgnp->rgn_pgszc); + atomic_add_long(&sfmmup->sfmmu_ttecnt[rgnp->rgn_pgszc], rttecnt); + /* update tsb0 inflation count */ + if (rgnp->rgn_pgszc >= TTE4M) { + sfmmup->sfmmu_tsb0_4minflcnt += + rgnp->rgn_size >> (TTE_PAGE_SHIFT(TTE8K) + 2); + } + /* + * Update regionid bitmask without hat lock since no other thread + * can update this region bitmask right now. + */ + SF_RGNMAP_ADD(sfmmup->sfmmu_hmeregion_map, rid); +} + +/* ARGSUSED */ +static int +sfmmu_rgncache_constructor(void *buf, void *cdrarg, int kmflags) +{ + sf_region_t *rgnp = (sf_region_t *)buf; + bzero(buf, sizeof (*rgnp)); + + mutex_init(&rgnp->rgn_mutex, NULL, MUTEX_DEFAULT, NULL); + + return (0); +} + +/* ARGSUSED */ +static void +sfmmu_rgncache_destructor(void *buf, void *cdrarg) +{ + sf_region_t *rgnp = (sf_region_t *)buf; + mutex_destroy(&rgnp->rgn_mutex); +} + +static int +sfrgnmap_isnull(sf_region_map_t *map) +{ + int i; + + for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) { + if (map->bitmap[i] != 0) { + return (0); + } + } + return (1); +} + +static int +sfhmergnmap_isnull(sf_hmeregion_map_t *map) +{ + int i; + + for (i = 0; i < SFMMU_HMERGNMAP_WORDS; i++) { + if (map->bitmap[i] != 0) { + return (0); + } + } + return (1); +} + +#ifdef DEBUG +static void +check_scd_sfmmu_list(sfmmu_t **headp, sfmmu_t *sfmmup, int onlist) +{ + sfmmu_t *sp; + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + + for (sp = *headp; sp != NULL; sp = sp->sfmmu_scd_link.next) { + ASSERT(srdp == sp->sfmmu_srdp); + if (sp == sfmmup) { + if (onlist) { + return; + } else { + panic("shctx: sfmmu 0x%p found on scd" + "list 0x%p", sfmmup, *headp); + } + } + } + if (onlist) { + panic("shctx: sfmmu 0x%p not found on scd list 0x%p", + sfmmup, *headp); + } else { + return; + } +} +#else /* DEBUG */ +#define check_scd_sfmmu_list(headp, sfmmup, onlist) +#endif /* DEBUG */ + +/* + * Removes an sfmmu from the start of the queue. + */ +static void +sfmmu_from_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup) +{ + ASSERT(sfmmup->sfmmu_srdp != NULL); + check_scd_sfmmu_list(headp, sfmmup, 1); + if (sfmmup->sfmmu_scd_link.prev != NULL) { + ASSERT(*headp != sfmmup); + sfmmup->sfmmu_scd_link.prev->sfmmu_scd_link.next = + sfmmup->sfmmu_scd_link.next; + } else { + ASSERT(*headp == sfmmup); + *headp = sfmmup->sfmmu_scd_link.next; + } + if (sfmmup->sfmmu_scd_link.next != NULL) { + sfmmup->sfmmu_scd_link.next->sfmmu_scd_link.prev = + sfmmup->sfmmu_scd_link.prev; + } +} + + +/* + * Adds an sfmmu to the start of the queue. + */ +static void +sfmmu_to_scd_list(sfmmu_t **headp, sfmmu_t *sfmmup) +{ + check_scd_sfmmu_list(headp, sfmmup, 0); + sfmmup->sfmmu_scd_link.prev = NULL; + sfmmup->sfmmu_scd_link.next = *headp; + if (*headp != NULL) + (*headp)->sfmmu_scd_link.prev = sfmmup; + *headp = sfmmup; +} + +/* + * Remove an scd from the start of the queue. + */ +static void +sfmmu_remove_scd(sf_scd_t **headp, sf_scd_t *scdp) +{ + if (scdp->scd_prev != NULL) { + ASSERT(*headp != scdp); + scdp->scd_prev->scd_next = scdp->scd_next; + } else { + ASSERT(*headp == scdp); + *headp = scdp->scd_next; + } + + if (scdp->scd_next != NULL) { + scdp->scd_next->scd_prev = scdp->scd_prev; + } +} + +/* + * Add an scd to the start of the queue. + */ +static void +sfmmu_add_scd(sf_scd_t **headp, sf_scd_t *scdp) +{ + scdp->scd_prev = NULL; + scdp->scd_next = *headp; + if (*headp != NULL) { + (*headp)->scd_prev = scdp; + } + *headp = scdp; +} + +static int +sfmmu_alloc_scd_tsbs(sf_srd_t *srdp, sf_scd_t *scdp) +{ + uint_t rid; + uint_t i; + uint_t j; + ulong_t w; + sf_region_t *rgnp; + ulong_t tte8k_cnt = 0; + ulong_t tte4m_cnt = 0; + uint_t tsb_szc; + sfmmu_t *scsfmmup = scdp->scd_sfmmup; + sfmmu_t *ism_hatid; + struct tsb_info *newtsb; + int szc; + + ASSERT(srdp != NULL); + + for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) { + if ((w = scdp->scd_region_map.bitmap[i]) == 0) { + continue; + } + j = 0; + while (w) { + if (!(w & 0x1)) { + j++; + w >>= 1; + continue; + } + rid = (i << BT_ULSHIFT) | j; + j++; + w >>= 1; + + if (rid < SFMMU_MAX_HME_REGIONS) { + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + + if (rgnp->rgn_pgszc < TTE4M) { + tte8k_cnt += rgnp->rgn_size >> + TTE_PAGE_SHIFT(TTE8K); + } else { + ASSERT(rgnp->rgn_pgszc >= TTE4M); + tte4m_cnt += rgnp->rgn_size >> + TTE_PAGE_SHIFT(TTE4M); + /* + * Inflate SCD tsb0 by preallocating + * 1/4 8k ttecnt for 4M regions to + * allow for lgpg alloc failure. + */ + tte8k_cnt += rgnp->rgn_size >> + (TTE_PAGE_SHIFT(TTE8K) + 2); + } + } else { + rid -= SFMMU_MAX_HME_REGIONS; + rgnp = srdp->srd_ismrgnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + + ism_hatid = (sfmmu_t *)rgnp->rgn_obj; + ASSERT(ism_hatid->sfmmu_ismhat); + + for (szc = 0; szc < TTE4M; szc++) { + tte8k_cnt += + ism_hatid->sfmmu_ttecnt[szc] << + TTE_BSZS_SHIFT(szc); + } + + ASSERT(rgnp->rgn_pgszc >= TTE4M); + if (rgnp->rgn_pgszc >= TTE4M) { + tte4m_cnt += rgnp->rgn_size >> + TTE_PAGE_SHIFT(TTE4M); + } + } + } + } + + tsb_szc = SELECT_TSB_SIZECODE(tte8k_cnt); + + /* Allocate both the SCD TSBs here. */ + if (sfmmu_tsbinfo_alloc(&scsfmmup->sfmmu_tsb, + tsb_szc, TSB8K|TSB64K|TSB512K, TSB_ALLOC, scsfmmup) && + (tsb_szc <= TSB_4M_SZCODE || + sfmmu_tsbinfo_alloc(&scsfmmup->sfmmu_tsb, + TSB_4M_SZCODE, TSB8K|TSB64K|TSB512K, + TSB_ALLOC, scsfmmup))) { + + SFMMU_STAT(sf_scd_1sttsb_allocfail); + return (TSB_ALLOCFAIL); + } else { + scsfmmup->sfmmu_tsb->tsb_flags |= TSB_SHAREDCTX; + + if (tte4m_cnt) { + tsb_szc = SELECT_TSB_SIZECODE(tte4m_cnt); + if (sfmmu_tsbinfo_alloc(&newtsb, tsb_szc, + TSB4M|TSB32M|TSB256M, TSB_ALLOC, scsfmmup) && + (tsb_szc <= TSB_4M_SZCODE || + sfmmu_tsbinfo_alloc(&newtsb, TSB_4M_SZCODE, + TSB4M|TSB32M|TSB256M, + TSB_ALLOC, scsfmmup))) { + /* + * If we fail to allocate the 2nd shared tsb, + * just free the 1st tsb, return failure. + */ + sfmmu_tsbinfo_free(scsfmmup->sfmmu_tsb); + SFMMU_STAT(sf_scd_2ndtsb_allocfail); + return (TSB_ALLOCFAIL); + } else { + ASSERT(scsfmmup->sfmmu_tsb->tsb_next == NULL); + newtsb->tsb_flags |= TSB_SHAREDCTX; + scsfmmup->sfmmu_tsb->tsb_next = newtsb; + SFMMU_STAT(sf_scd_2ndtsb_alloc); + } + } + SFMMU_STAT(sf_scd_1sttsb_alloc); + } + return (TSB_SUCCESS); +} + +static void +sfmmu_free_scd_tsbs(sfmmu_t *scd_sfmmu) +{ + while (scd_sfmmu->sfmmu_tsb != NULL) { + struct tsb_info *next = scd_sfmmu->sfmmu_tsb->tsb_next; + sfmmu_tsbinfo_free(scd_sfmmu->sfmmu_tsb); + scd_sfmmu->sfmmu_tsb = next; + } +} + +/* + * Link the sfmmu onto the hme region list. + */ +void +sfmmu_link_to_hmeregion(sfmmu_t *sfmmup, sf_region_t *rgnp) +{ + uint_t rid; + sf_rgn_link_t *rlink; + sfmmu_t *head; + sf_rgn_link_t *hrlink; + + rid = rgnp->rgn_id; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 1, 1); + ASSERT(rlink != NULL); + mutex_enter(&rgnp->rgn_mutex); + if ((head = rgnp->rgn_sfmmu_head) == NULL) { + rlink->next = NULL; + rlink->prev = NULL; + /* + * make sure rlink's next field is NULL + * before making this link visible. + */ + membar_stst(); + rgnp->rgn_sfmmu_head = sfmmup; + } else { + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(head, rid, hrlink, 0, 0); + ASSERT(hrlink != NULL); + ASSERT(hrlink->prev == NULL); + rlink->next = head; + rlink->prev = NULL; + hrlink->prev = sfmmup; + /* + * make sure rlink's next field is correct + * before making this link visible. + */ + membar_stst(); + rgnp->rgn_sfmmu_head = sfmmup; + } + mutex_exit(&rgnp->rgn_mutex); +} + +/* + * Unlink the sfmmu from the hme region list. + */ +void +sfmmu_unlink_from_hmeregion(sfmmu_t *sfmmup, sf_region_t *rgnp) +{ + uint_t rid; + sf_rgn_link_t *rlink; + + rid = rgnp->rgn_id; + ASSERT(SFMMU_IS_SHMERID_VALID(rid)); + + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(sfmmup, rid, rlink, 0, 0); + ASSERT(rlink != NULL); + mutex_enter(&rgnp->rgn_mutex); + if (rgnp->rgn_sfmmu_head == sfmmup) { + sfmmu_t *next = rlink->next; + rgnp->rgn_sfmmu_head = next; + /* + * if we are stopped by xc_attention() after this + * point the forward link walking in + * sfmmu_rgntlb_demap() will work correctly since the + * head correctly points to the next element. + */ + membar_stst(); + rlink->next = NULL; + ASSERT(rlink->prev == NULL); + if (next != NULL) { + sf_rgn_link_t *nrlink; + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(next, rid, nrlink, 0, 0); + ASSERT(nrlink != NULL); + ASSERT(nrlink->prev == sfmmup); + nrlink->prev = NULL; + } + } else { + sfmmu_t *next = rlink->next; + sfmmu_t *prev = rlink->prev; + sf_rgn_link_t *prlink; + + ASSERT(prev != NULL); + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(prev, rid, prlink, 0, 0); + ASSERT(prlink != NULL); + ASSERT(prlink->next == sfmmup); + prlink->next = next; + /* + * if we are stopped by xc_attention() + * after this point the forward link walking + * will work correctly since the prev element + * correctly points to the next element. + */ + membar_stst(); + rlink->next = NULL; + rlink->prev = NULL; + if (next != NULL) { + sf_rgn_link_t *nrlink; + /* LINTED: constant in conditional context */ + SFMMU_HMERID2RLINKP(next, rid, nrlink, 0, 0); + ASSERT(nrlink != NULL); + ASSERT(nrlink->prev == sfmmup); + nrlink->prev = prev; + } + } + mutex_exit(&rgnp->rgn_mutex); +} + +/* + * Link scd sfmmu onto ism or hme region list for each region in the + * scd region map. + */ +void +sfmmu_link_scd_to_regions(sf_srd_t *srdp, sf_scd_t *scdp) +{ + uint_t rid; + uint_t i; + uint_t j; + ulong_t w; + sf_region_t *rgnp; + sfmmu_t *scsfmmup; + + scsfmmup = scdp->scd_sfmmup; + ASSERT(scsfmmup->sfmmu_scdhat); + for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) { + if ((w = scdp->scd_region_map.bitmap[i]) == 0) { + continue; + } + j = 0; + while (w) { + if (!(w & 0x1)) { + j++; + w >>= 1; + continue; + } + rid = (i << BT_ULSHIFT) | j; + j++; + w >>= 1; + + if (rid < SFMMU_MAX_HME_REGIONS) { + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + sfmmu_link_to_hmeregion(scsfmmup, rgnp); + } else { + sfmmu_t *ism_hatid = NULL; + ism_ment_t *ism_ment; + rid -= SFMMU_MAX_HME_REGIONS; + rgnp = srdp->srd_ismrgnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + + ism_hatid = (sfmmu_t *)rgnp->rgn_obj; + ASSERT(ism_hatid->sfmmu_ismhat); + ism_ment = &scdp->scd_ism_links[rid]; + ism_ment->iment_hat = scsfmmup; + ism_ment->iment_base_va = rgnp->rgn_saddr; + mutex_enter(&ism_mlist_lock); + iment_add(ism_ment, ism_hatid); + mutex_exit(&ism_mlist_lock); + + } + } + } +} +/* + * Unlink scd sfmmu from ism or hme region list for each region in the + * scd region map. + */ +void +sfmmu_unlink_scd_from_regions(sf_srd_t *srdp, sf_scd_t *scdp) +{ + uint_t rid; + uint_t i; + uint_t j; + ulong_t w; + sf_region_t *rgnp; + sfmmu_t *scsfmmup; + + scsfmmup = scdp->scd_sfmmup; + for (i = 0; i < SFMMU_RGNMAP_WORDS; i++) { + if ((w = scdp->scd_region_map.bitmap[i]) == 0) { + continue; + } + j = 0; + while (w) { + if (!(w & 0x1)) { + j++; + w >>= 1; + continue; + } + rid = (i << BT_ULSHIFT) | j; + j++; + w >>= 1; + + if (rid < SFMMU_MAX_HME_REGIONS) { + rgnp = srdp->srd_hmergnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + sfmmu_unlink_from_hmeregion(scsfmmup, + rgnp); + + } else { + sfmmu_t *ism_hatid = NULL; + ism_ment_t *ism_ment; + rid -= SFMMU_MAX_HME_REGIONS; + rgnp = srdp->srd_ismrgnp[rid]; + ASSERT(rgnp->rgn_id == rid); + ASSERT(rgnp->rgn_refcnt > 0); + + ism_hatid = (sfmmu_t *)rgnp->rgn_obj; + ASSERT(ism_hatid->sfmmu_ismhat); + ism_ment = &scdp->scd_ism_links[rid]; + ASSERT(ism_ment->iment_hat == scdp->scd_sfmmup); + ASSERT(ism_ment->iment_base_va == + rgnp->rgn_saddr); + ism_ment->iment_hat = NULL; + ism_ment->iment_base_va = 0; + mutex_enter(&ism_mlist_lock); + iment_sub(ism_ment, ism_hatid); + mutex_exit(&ism_mlist_lock); + + } + } + } +} +/* + * Allocates and initialises a new SCD structure, this is called with + * the srd_scd_mutex held and returns with the reference count + * initialised to 1. + */ +static sf_scd_t * +sfmmu_alloc_scd(sf_srd_t *srdp, sf_region_map_t *new_map) +{ + sf_scd_t *new_scdp; + sfmmu_t *scsfmmup; + int i; + + ASSERT(MUTEX_HELD(&srdp->srd_scd_mutex)); + new_scdp = kmem_cache_alloc(scd_cache, KM_SLEEP); + + scsfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP); + new_scdp->scd_sfmmup = scsfmmup; + scsfmmup->sfmmu_srdp = srdp; + scsfmmup->sfmmu_scdp = new_scdp; + scsfmmup->sfmmu_tsb0_4minflcnt = 0; + scsfmmup->sfmmu_scdhat = 1; + CPUSET_ALL(scsfmmup->sfmmu_cpusran); + bzero(scsfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE); + + ASSERT(max_mmu_ctxdoms > 0); + for (i = 0; i < max_mmu_ctxdoms; i++) { + scsfmmup->sfmmu_ctxs[i].cnum = INVALID_CONTEXT; + scsfmmup->sfmmu_ctxs[i].gnum = 0; + } + + for (i = 0; i < MMU_PAGE_SIZES; i++) { + new_scdp->scd_rttecnt[i] = 0; + } + + new_scdp->scd_region_map = *new_map; + new_scdp->scd_refcnt = 1; + if (sfmmu_alloc_scd_tsbs(srdp, new_scdp) != TSB_SUCCESS) { + kmem_cache_free(scd_cache, new_scdp); + kmem_cache_free(sfmmuid_cache, scsfmmup); + return (NULL); + } + return (new_scdp); +} + +/* + * The first phase of a process joining an SCD. The hat structure is + * linked to the SCD queue and then the HAT_JOIN_SCD sfmmu flag is set + * and a cross-call with context invalidation is used to cause the + * remaining work to be carried out in the sfmmu_tsbmiss_exception() + * routine. + */ +static void +sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup) +{ + hatlock_t *hatlockp; + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + int i; + sf_scd_t *old_scdp; + + ASSERT(srdp != NULL); + ASSERT(scdp != NULL); + ASSERT(scdp->scd_refcnt > 0); + ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock)); + + if ((old_scdp = sfmmup->sfmmu_scdp) != NULL) { + ASSERT(old_scdp != scdp); + + mutex_enter(&old_scdp->scd_mutex); + sfmmu_from_scd_list(&old_scdp->scd_sf_list, sfmmup); + mutex_exit(&old_scdp->scd_mutex); + /* + * sfmmup leaves the old scd. Update sfmmu_ttecnt to + * include the shme rgn ttecnt for rgns that + * were in the old SCD + */ + for (i = 0; i < mmu_page_sizes; i++) { + ASSERT(sfmmup->sfmmu_scdrttecnt[i] == + old_scdp->scd_rttecnt[i]); + atomic_add_long(&sfmmup->sfmmu_ttecnt[i], + sfmmup->sfmmu_scdrttecnt[i]); + } + } + + /* + * Move sfmmu to the scd lists. + */ + mutex_enter(&scdp->scd_mutex); + sfmmu_to_scd_list(&scdp->scd_sf_list, sfmmup); + mutex_exit(&scdp->scd_mutex); + SF_SCD_INCR_REF(scdp); + + hatlockp = sfmmu_hat_enter(sfmmup); + /* + * For a multi-thread process, we must stop + * all the other threads before joining the scd. + */ + + SFMMU_FLAGS_SET(sfmmup, HAT_JOIN_SCD); + + sfmmu_invalidate_ctx(sfmmup); + sfmmup->sfmmu_scdp = scdp; + + /* + * Copy scd_rttecnt into sfmmup's sfmmu_scdrttecnt, and update + * sfmmu_ttecnt to not include the rgn ttecnt just joined in SCD. + */ + for (i = 0; i < mmu_page_sizes; i++) { + sfmmup->sfmmu_scdrttecnt[i] = scdp->scd_rttecnt[i]; + ASSERT(sfmmup->sfmmu_ttecnt[i] >= scdp->scd_rttecnt[i]); + atomic_add_long(&sfmmup->sfmmu_ttecnt[i], + -sfmmup->sfmmu_scdrttecnt[i]); + } + /* update tsb0 inflation count */ + if (old_scdp != NULL) { + sfmmup->sfmmu_tsb0_4minflcnt += + old_scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt; + } + ASSERT(sfmmup->sfmmu_tsb0_4minflcnt >= + scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt); + sfmmup->sfmmu_tsb0_4minflcnt -= scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt; + + sfmmu_hat_exit(hatlockp); + + if (old_scdp != NULL) { + SF_SCD_DECR_REF(srdp, old_scdp); + } + +} + +/* + * This routine is called by a process to become part of an SCD. It is called + * from sfmmu_tsbmiss_exception() once most of the initial work has been + * done by sfmmu_join_scd(). This routine must not drop the hat lock. + */ +static void +sfmmu_finish_join_scd(sfmmu_t *sfmmup) +{ + struct tsb_info *tsbinfop; + + ASSERT(sfmmu_hat_lock_held(sfmmup)); + ASSERT(sfmmup->sfmmu_scdp != NULL); + ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)); + ASSERT(!SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)); + ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ALLCTX_INVALID)); + + for (tsbinfop = sfmmup->sfmmu_tsb; tsbinfop != NULL; + tsbinfop = tsbinfop->tsb_next) { + if (tsbinfop->tsb_flags & TSB_SWAPPED) { + continue; + } + ASSERT(!(tsbinfop->tsb_flags & TSB_RELOC_FLAG)); + + sfmmu_inv_tsb(tsbinfop->tsb_va, + TSB_BYTES(tsbinfop->tsb_szc)); + } + + /* Set HAT_CTX1_FLAG for all SCD ISMs */ + sfmmu_ism_hatflags(sfmmup, 1); + + SFMMU_STAT(sf_join_scd); +} + +/* + * This routine is called in order to check if there is an SCD which matches + * the process's region map if not then a new SCD may be created. + */ +static void +sfmmu_find_scd(sfmmu_t *sfmmup) +{ + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + sf_scd_t *scdp, *new_scdp; + int ret; + + ASSERT(srdp != NULL); + ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock)); + + mutex_enter(&srdp->srd_scd_mutex); + for (scdp = srdp->srd_scdp; scdp != NULL; + scdp = scdp->scd_next) { + SF_RGNMAP_EQUAL(&scdp->scd_region_map, + &sfmmup->sfmmu_region_map, ret); + if (ret == 1) { + SF_SCD_INCR_REF(scdp); + mutex_exit(&srdp->srd_scd_mutex); + sfmmu_join_scd(scdp, sfmmup); + ASSERT(scdp->scd_refcnt >= 2); + atomic_add_32((volatile uint32_t *) + &scdp->scd_refcnt, -1); + return; + } else { + /* + * If the sfmmu region map is a subset of the scd + * region map, then the assumption is that this process + * will continue attaching to ISM segments until the + * region maps are equal. + */ + SF_RGNMAP_IS_SUBSET(&scdp->scd_region_map, + &sfmmup->sfmmu_region_map, ret); + if (ret == 1) { + mutex_exit(&srdp->srd_scd_mutex); + return; + } + } + } + + ASSERT(scdp == NULL); + /* + * No matching SCD has been found, create a new one. + */ + if ((new_scdp = sfmmu_alloc_scd(srdp, &sfmmup->sfmmu_region_map)) == + NULL) { + mutex_exit(&srdp->srd_scd_mutex); + return; + } + + /* + * sfmmu_alloc_scd() returns with a ref count of 1 on the scd. + */ + + /* Set scd_rttecnt for shme rgns in SCD */ + sfmmu_set_scd_rttecnt(srdp, new_scdp); + + /* + * Link scd onto srd_scdp list and scd sfmmu onto region/iment lists. + */ + sfmmu_link_scd_to_regions(srdp, new_scdp); + sfmmu_add_scd(&srdp->srd_scdp, new_scdp); + SFMMU_STAT_ADD(sf_create_scd, 1); + + mutex_exit(&srdp->srd_scd_mutex); + sfmmu_join_scd(new_scdp, sfmmup); + ASSERT(new_scdp->scd_refcnt >= 2); + atomic_add_32((volatile uint32_t *)&new_scdp->scd_refcnt, -1); +} + +/* + * This routine is called by a process to remove itself from an SCD. It is + * either called when the processes has detached from a segment or from + * hat_free_start() as a result of calling exit. + */ +static void +sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type) +{ + sf_scd_t *scdp = sfmmup->sfmmu_scdp; + sf_srd_t *srdp = sfmmup->sfmmu_srdp; + hatlock_t *hatlockp = TSB_HASH(sfmmup); + int i; + + ASSERT(scdp != NULL); + ASSERT(srdp != NULL); + + if (sfmmup->sfmmu_free) { + /* + * If the process is part of an SCD the sfmmu is unlinked + * from scd_sf_list. + */ + mutex_enter(&scdp->scd_mutex); + sfmmu_from_scd_list(&scdp->scd_sf_list, sfmmup); + mutex_exit(&scdp->scd_mutex); + /* + * Update sfmmu_ttecnt to include the rgn ttecnt for rgns that + * are about to leave the SCD + */ + for (i = 0; i < mmu_page_sizes; i++) { + ASSERT(sfmmup->sfmmu_scdrttecnt[i] == + scdp->scd_rttecnt[i]); + atomic_add_long(&sfmmup->sfmmu_ttecnt[i], + sfmmup->sfmmu_scdrttecnt[i]); + sfmmup->sfmmu_scdrttecnt[i] = 0; + } + sfmmup->sfmmu_scdp = NULL; + + SF_SCD_DECR_REF(srdp, scdp); + return; + } + + ASSERT(r_type != SFMMU_REGION_ISM || + SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)); + ASSERT(scdp->scd_refcnt); + ASSERT(!sfmmup->sfmmu_free); + ASSERT(sfmmu_hat_lock_held(sfmmup)); + ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock)); + + /* + * Wait for ISM maps to be updated. + */ + if (r_type != SFMMU_REGION_ISM) { + while (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY) && + sfmmup->sfmmu_scdp != NULL) { + cv_wait(&sfmmup->sfmmu_tsb_cv, + HATLOCK_MUTEXP(hatlockp)); + } + + if (sfmmup->sfmmu_scdp == NULL) { + sfmmu_hat_exit(hatlockp); + return; + } + SFMMU_FLAGS_SET(sfmmup, HAT_ISMBUSY); + } + + if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) { + SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD); + } else { + /* + * For a multi-thread process, we must stop + * all the other threads before leaving the scd. + */ + + sfmmu_invalidate_ctx(sfmmup); + + /* Clear all the rid's for ISM, delete flags, etc */ + ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)); + sfmmu_ism_hatflags(sfmmup, 0); + } + /* + * Update sfmmu_ttecnt to include the rgn ttecnt for rgns that + * are in SCD before this sfmmup leaves the SCD. + */ + for (i = 0; i < mmu_page_sizes; i++) { + ASSERT(sfmmup->sfmmu_scdrttecnt[i] == + scdp->scd_rttecnt[i]); + atomic_add_long(&sfmmup->sfmmu_ttecnt[i], + sfmmup->sfmmu_scdrttecnt[i]); + sfmmup->sfmmu_scdrttecnt[i] = 0; + /* update ismttecnt to include SCD ism before hat leaves SCD */ + sfmmup->sfmmu_ismttecnt[i] += sfmmup->sfmmu_scdismttecnt[i]; + sfmmup->sfmmu_scdismttecnt[i] = 0; + } + /* update tsb0 inflation count */ + sfmmup->sfmmu_tsb0_4minflcnt += scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt; + + if (r_type != SFMMU_REGION_ISM) { + SFMMU_FLAGS_CLEAR(sfmmup, HAT_ISMBUSY); + } + sfmmup->sfmmu_scdp = NULL; + + sfmmu_hat_exit(hatlockp); + + /* + * Unlink sfmmu from scd_sf_list this can be done without holding + * the hat lock as we hold the sfmmu_as lock which prevents + * hat_join_region from adding this thread to the scd again. Other + * threads check if sfmmu_scdp is NULL under hat lock and if it's NULL + * they won't get here, since sfmmu_leave_scd() clears sfmmu_scdp + * while holding the hat lock. + */ + mutex_enter(&scdp->scd_mutex); + sfmmu_from_scd_list(&scdp->scd_sf_list, sfmmup); + mutex_exit(&scdp->scd_mutex); + SFMMU_STAT(sf_leave_scd); + + SF_SCD_DECR_REF(srdp, scdp); + hatlockp = sfmmu_hat_enter(sfmmup); + +} + +/* + * Unlink and free up an SCD structure with a reference count of 0. + */ +static void +sfmmu_destroy_scd(sf_srd_t *srdp, sf_scd_t *scdp, sf_region_map_t *scd_rmap) +{ + sfmmu_t *scsfmmup; + sf_scd_t *sp; + hatlock_t *shatlockp; + int i, ret; + + mutex_enter(&srdp->srd_scd_mutex); + for (sp = srdp->srd_scdp; sp != NULL; sp = sp->scd_next) { + if (sp == scdp) + break; + } + if (sp == NULL || sp->scd_refcnt) { + mutex_exit(&srdp->srd_scd_mutex); + return; + } + + /* + * It is possible that the scd has been freed and reallocated with a + * different region map while we've been waiting for the srd_scd_mutex. + */ + SF_RGNMAP_EQUAL(scd_rmap, &sp->scd_region_map, ret); + if (ret != 1) { + mutex_exit(&srdp->srd_scd_mutex); + return; + } + + ASSERT(scdp->scd_sf_list == NULL); + /* + * Unlink scd from srd_scdp list. + */ + sfmmu_remove_scd(&srdp->srd_scdp, scdp); + mutex_exit(&srdp->srd_scd_mutex); + + sfmmu_unlink_scd_from_regions(srdp, scdp); + + /* Clear shared context tsb and release ctx */ + scsfmmup = scdp->scd_sfmmup; + + /* + * create a barrier so that scd will not be destroyed + * if other thread still holds the same shared hat lock. + * E.g., sfmmu_tsbmiss_exception() needs to acquire the + * shared hat lock before checking the shared tsb reloc flag. + */ + shatlockp = sfmmu_hat_enter(scsfmmup); + sfmmu_hat_exit(shatlockp); + + sfmmu_free_scd_tsbs(scsfmmup); + + for (i = 0; i < SFMMU_L1_HMERLINKS; i++) { + if (scsfmmup->sfmmu_hmeregion_links[i] != NULL) { + kmem_free(scsfmmup->sfmmu_hmeregion_links[i], + SFMMU_L2_HMERLINKS_SIZE); + scsfmmup->sfmmu_hmeregion_links[i] = NULL; + } + } + kmem_cache_free(sfmmuid_cache, scsfmmup); + kmem_cache_free(scd_cache, scdp); + SFMMU_STAT(sf_destroy_scd); +} + +/* + * Modifies the HAT_CTX1_FLAG for each of the ISM segments which correspond to + * bits which are set in the ism_region_map parameter. This flag indicates to + * the tsbmiss handler that mapping for these segments should be loaded using + * the shared context. + */ +static void +sfmmu_ism_hatflags(sfmmu_t *sfmmup, int addflag) +{ + sf_scd_t *scdp = sfmmup->sfmmu_scdp; + ism_blk_t *ism_blkp; + ism_map_t *ism_map; + int i, rid; + + ASSERT(sfmmup->sfmmu_iblk != NULL); + ASSERT(scdp != NULL); + /* + * Note that the caller either set HAT_ISMBUSY flag or checked + * under hat lock that HAT_ISMBUSY was not set by another thread. + */ + ASSERT(sfmmu_hat_lock_held(sfmmup)); + + ism_blkp = sfmmup->sfmmu_iblk; + while (ism_blkp != NULL) { + ism_map = ism_blkp->iblk_maps; + for (i = 0; ism_map[i].imap_ismhat && i < ISM_MAP_SLOTS; i++) { + rid = ism_map[i].imap_rid; + if (rid == SFMMU_INVALID_ISMRID) { + continue; + } + ASSERT(rid >= 0 && rid < SFMMU_MAX_ISM_REGIONS); + if (SF_RGNMAP_TEST(scdp->scd_ismregion_map, rid)) { + if (addflag) { + ism_map[i].imap_hatflags |= + HAT_CTX1_FLAG; + } else { + ism_map[i].imap_hatflags &= + ~HAT_CTX1_FLAG; + } + } + } + ism_blkp = ism_blkp->iblk_next; + } +} + +static int +sfmmu_srd_lock_held(sf_srd_t *srdp) +{ + return (MUTEX_HELD(&srdp->srd_mutex)); +} + +/* ARGSUSED */ +static int +sfmmu_scdcache_constructor(void *buf, void *cdrarg, int kmflags) +{ + sf_scd_t *scdp = (sf_scd_t *)buf; + + bzero(buf, sizeof (sf_scd_t)); + mutex_init(&scdp->scd_mutex, NULL, MUTEX_DEFAULT, NULL); + return (0); +} + +/* ARGSUSED */ +static void +sfmmu_scdcache_destructor(void *buf, void *cdrarg) +{ + sf_scd_t *scdp = (sf_scd_t *)buf; + + mutex_destroy(&scdp->scd_mutex); +} diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h index 1d11998521..2dc7183d85 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h @@ -72,6 +72,7 @@ extern "C" { #include <sys/ksynch.h> typedef struct hat sfmmu_t; +typedef struct sf_scd sf_scd_t; /* * SFMMU attributes for hat_memload/hat_devload @@ -186,7 +187,8 @@ typedef struct hat_lock { */ typedef struct ism_map { uintptr_t imap_seg; /* base va + sz of ISM segment */ - ushort_t imap_vb_shift; /* mmu_pageshift for ism page size */ + uchar_t imap_vb_shift; /* mmu_pageshift for ism page size */ + uchar_t imap_rid; /* region id for ism */ ushort_t imap_hatflags; /* primary ism page size */ uint_t imap_sz_mask; /* mmu_pagemask for ism page size */ sfmmu_t *imap_ismhat; /* hat id of dummy ISM as */ @@ -263,6 +265,239 @@ struct tsb_info { #define TSB_RELOC_FLAG 0x1 #define TSB_FLUSH_NEEDED 0x2 #define TSB_SWAPPED 0x4 +#define TSB_SHAREDCTX 0x8 + +#endif /* !_ASM */ + +/* + * Data structures for shared hmeblk support. + */ + +/* + * Do not increase the maximum number of ism/hme regions without checking first + * the impact on ism_map_t, TSB miss area, hblk tag and region id type in + * sf_region structure. + * Initially, shared hmes will only be used for the main text segment + * therefore this value will be set to 64, it will be increased when shared + * libraries are included. + */ + +#define SFMMU_MAX_HME_REGIONS (64) +#define SFMMU_HMERGNMAP_WORDS BT_BITOUL(SFMMU_MAX_HME_REGIONS) + +#define SFMMU_PRIVATE 0 +#define SFMMU_SHARED 1 + +#ifndef _ASM + +#define SFMMU_MAX_ISM_REGIONS (64) +#define SFMMU_ISMRGNMAP_WORDS BT_BITOUL(SFMMU_MAX_ISM_REGIONS) + +#define SFMMU_RGNMAP_WORDS (SFMMU_HMERGNMAP_WORDS + SFMMU_ISMRGNMAP_WORDS) + +#define SFMMU_MAX_REGION_BUCKETS (128) +#define SFMMU_MAX_SRD_BUCKETS (2048) + +typedef struct sf_hmeregion_map { + ulong_t bitmap[SFMMU_HMERGNMAP_WORDS]; +} sf_hmeregion_map_t; + +typedef struct sf_ismregion_map { + ulong_t bitmap[SFMMU_ISMRGNMAP_WORDS]; +} sf_ismregion_map_t; + +typedef union sf_region_map_u { + struct _h_rmap_s { + sf_hmeregion_map_t hmeregion_map; + sf_ismregion_map_t ismregion_map; + } h_rmap_s; + ulong_t bitmap[SFMMU_RGNMAP_WORDS]; +} sf_region_map_t; + +#define SF_RGNMAP_ZERO(map) { \ + int _i; \ + for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \ + (map).bitmap[_i] = 0; \ + } \ +} + +/* + * Returns 1 if map1 and map2 are equal. + */ +#define SF_RGNMAP_EQUAL(map1, map2, rval) { \ + int _i; \ + for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \ + if ((map1)->bitmap[_i] != (map2)->bitmap[_i]) \ + break; \ + } \ + if (_i < SFMMU_RGNMAP_WORDS) \ + rval = 0; \ + else \ + rval = 1; \ +} + +#define SF_RGNMAP_ADD(map, r) BT_SET((map).bitmap, r) +#define SF_RGNMAP_DEL(map, r) BT_CLEAR((map).bitmap, r) +#define SF_RGNMAP_TEST(map, r) BT_TEST((map).bitmap, r) + +/* + * Tests whether map2 is a subset of map1, returns 1 if + * this assertion is true. + */ +#define SF_RGNMAP_IS_SUBSET(map1, map2, rval) { \ + int _i; \ + for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \ + if (((map1)->bitmap[_i] & (map2)->bitmap[_i]) \ + != (map2)->bitmap[_i]) { \ + break; \ + } \ + } \ + if (_i < SFMMU_RGNMAP_WORDS) \ + rval = 0; \ + else \ + rval = 1; \ +} + +#define SF_SCD_INCR_REF(scdp) { \ + atomic_add_32((volatile uint32_t *)&(scdp)->scd_refcnt, 1); \ +} + +#define SF_SCD_DECR_REF(srdp, scdp) { \ + sf_region_map_t _scd_rmap = (scdp)->scd_region_map; \ + if (!atomic_add_32_nv( \ + (volatile uint32_t *)&(scdp)->scd_refcnt, -1)) { \ + sfmmu_destroy_scd((srdp), (scdp), &_scd_rmap); \ + } \ +} + +/* + * A sfmmup link in the link list of sfmmups that share the same region. + */ +typedef struct sf_rgn_link { + sfmmu_t *next; + sfmmu_t *prev; +} sf_rgn_link_t; + +/* + * rgn_flags values. + */ +#define SFMMU_REGION_HME 0x1 +#define SFMMU_REGION_ISM 0x2 +#define SFMMU_REGION_FREE 0x8 + +#define SFMMU_REGION_TYPE_MASK (0x3) + +/* + * sf_region defines a text or (D)ISM segment which map + * the same underlying physical object. + */ +typedef struct sf_region { + caddr_t rgn_saddr; /* base addr of attached seg */ + size_t rgn_size; /* size of attached seg */ + void *rgn_obj; /* the underlying object id */ + u_offset_t rgn_objoff; /* offset in the object mapped */ + uchar_t rgn_perm; /* PROT_READ/WRITE/EXEC */ + uchar_t rgn_pgszc; /* page size of the region */ + uchar_t rgn_flags; /* region type, free flag */ + uchar_t rgn_id; + int rgn_refcnt; /* # of hats sharing the region */ + /* callback function for hat_unload_callback */ + hat_rgn_cb_func_t rgn_cb_function; + struct sf_region *rgn_hash; /* hash chain linking the rgns */ + kmutex_t rgn_mutex; /* protect region sfmmu list */ + /* A link list of processes attached to this region */ + sfmmu_t *rgn_sfmmu_head; + ulong_t rgn_ttecnt[MMU_PAGE_SIZES]; + uint16_t rgn_hmeflags; /* rgn tte size flags */ +} sf_region_t; + +#define rgn_next rgn_hash + +/* srd */ +typedef struct sf_shared_region_domain { + vnode_t *srd_evp; /* executable vnode */ + /* hme region table */ + sf_region_t *srd_hmergnp[SFMMU_MAX_HME_REGIONS]; + /* ism region table */ + sf_region_t *srd_ismrgnp[SFMMU_MAX_ISM_REGIONS]; + /* hash chain linking srds */ + struct sf_shared_region_domain *srd_hash; + /* pointer to the next free hme region */ + sf_region_t *srd_hmergnfree; + /* pointer to the next free ism region */ + sf_region_t *srd_ismrgnfree; + /* id of next ism rgn created */ + uint16_t srd_next_ismrid; + /* pointer of next hme region created */ + uint16_t srd_next_hmerid; + uint16_t srd_ismbusyrgns; /* # of ism rgns in use */ + uint16_t srd_hmebusyrgns; /* # of hme rgns in use */ + int srd_refcnt; /* # of procs in the srd */ + kmutex_t srd_mutex; /* sync add/remove rgns */ + kmutex_t srd_scd_mutex; + sf_scd_t *srd_scdp; /* list of scds in srd */ + /* hash of regions associated with the same executable */ + sf_region_t *srd_rgnhash[SFMMU_MAX_REGION_BUCKETS]; +} sf_srd_t; + +typedef struct sf_srd_bucket { + kmutex_t srdb_lock; + sf_srd_t *srdb_srdp; +} sf_srd_bucket_t; + +/* + * The value of SFMMU_L1_HMERLINKS and SFMMU_L2_HMERLINKS will be increased + * to 16 when the use of shared hmes for shared libraries is enabled. + */ + +#define SFMMU_L1_HMERLINKS (8) +#define SFMMU_L2_HMERLINKS (8) +#define SFMMU_L1_HMERLINKS_SHIFT (3) +#define SFMMU_L1_HMERLINKS_MASK (SFMMU_L1_HMERLINKS - 1) +#define SFMMU_L2_HMERLINKS_MASK (SFMMU_L2_HMERLINKS - 1) +#define SFMMU_L1_HMERLINKS_SIZE \ + (SFMMU_L1_HMERLINKS * sizeof (sf_rgn_link_t *)) +#define SFMMU_L2_HMERLINKS_SIZE \ + (SFMMU_L2_HMERLINKS * sizeof (sf_rgn_link_t)) + +#if (SFMMU_L1_HMERLINKS * SFMMU_L2_HMERLINKS < SFMMU_MAX_HME_REGIONS) +#error Not Enough HMERLINKS +#endif + +/* + * This macro grabs hat lock and allocates level 2 hat chain + * associated with a shme rgn. In the majority of cases, the macro + * is called with alloc = 0, and lock = 0. + */ +#define SFMMU_HMERID2RLINKP(sfmmup, rid, lnkp, alloc, lock) \ +{ \ + int _l1ix = ((rid) >> SFMMU_L1_HMERLINKS_SHIFT) & \ + SFMMU_L1_HMERLINKS_MASK; \ + int _l2ix = ((rid) & SFMMU_L2_HMERLINKS_MASK); \ + hatlock_t *_hatlockp; \ + lnkp = (sfmmup)->sfmmu_hmeregion_links[_l1ix]; \ + if (lnkp != NULL) { \ + lnkp = &lnkp[_l2ix]; \ + } else if (alloc && lock) { \ + lnkp = kmem_zalloc(SFMMU_L2_HMERLINKS_SIZE, KM_SLEEP); \ + _hatlockp = sfmmu_hat_enter(sfmmup); \ + if ((sfmmup)->sfmmu_hmeregion_links[_l1ix] != NULL) { \ + sfmmu_hat_exit(_hatlockp); \ + kmem_free(lnkp, SFMMU_L2_HMERLINKS_SIZE); \ + lnkp = (sfmmup)->sfmmu_hmeregion_links[_l1ix]; \ + ASSERT(lnkp != NULL); \ + } else { \ + (sfmmup)->sfmmu_hmeregion_links[_l1ix] = lnkp; \ + sfmmu_hat_exit(_hatlockp); \ + } \ + lnkp = &lnkp[_l2ix]; \ + } else if (alloc) { \ + lnkp = kmem_zalloc(SFMMU_L2_HMERLINKS_SIZE, KM_SLEEP); \ + ASSERT((sfmmup)->sfmmu_hmeregion_links[_l1ix] == NULL); \ + (sfmmup)->sfmmu_hmeregion_links[_l1ix] = lnkp; \ + lnkp = &lnkp[_l2ix]; \ + } \ +} /* * Per-MMU context domain kstats. @@ -390,25 +625,40 @@ struct hat { void *sfmmu_xhat_provider; /* NULL for CPU hat */ cpuset_t sfmmu_cpusran; /* cpu bit mask for efficient xcalls */ struct as *sfmmu_as; /* as this hat provides mapping for */ - ulong_t sfmmu_ttecnt[MMU_PAGE_SIZES]; /* per sz tte counts */ - ulong_t sfmmu_ismttecnt[MMU_PAGE_SIZES]; /* est. ism ttes */ + /* per pgsz private ttecnt + shme rgns ttecnt for rgns not in SCD */ + ulong_t sfmmu_ttecnt[MMU_PAGE_SIZES]; + /* shme rgns ttecnt for rgns in SCD */ + ulong_t sfmmu_scdrttecnt[MMU_PAGE_SIZES]; + /* est. ism ttes that are NOT in a SCD */ + ulong_t sfmmu_ismttecnt[MMU_PAGE_SIZES]; + /* ttecnt for isms that are in a SCD */ + ulong_t sfmmu_scdismttecnt[MMU_PAGE_SIZES]; + /* inflate tsb0 to allow for large page alloc failure in region */ + ulong_t sfmmu_tsb0_4minflcnt; union _h_un { ism_blk_t *sfmmu_iblkp; /* maps to ismhat(s) */ ism_ment_t *sfmmu_imentp; /* ism hat's mapping list */ } h_un; uint_t sfmmu_free:1; /* hat to be freed - set on as_free */ uint_t sfmmu_ismhat:1; /* hat is dummy ism hatid */ - uint_t sfmmu_ctxflushed:1; /* ctx has been flushed */ + uint_t sfmmu_scdhat:1; /* hat is dummy scd hatid */ uchar_t sfmmu_rmstat; /* refmod stats refcnt */ ushort_t sfmmu_clrstart; /* start color bin for page coloring */ ushort_t sfmmu_clrbin; /* per as phys page coloring bin */ ushort_t sfmmu_flags; /* flags */ + uchar_t sfmmu_tteflags; /* pgsz flags */ + uchar_t sfmmu_rtteflags; /* pgsz flags for SRD hmes */ struct tsb_info *sfmmu_tsb; /* list of per as tsbs */ uint64_t sfmmu_ismblkpa; /* pa of sfmmu_iblkp, or -1 */ lock_t sfmmu_ctx_lock; /* sync ctx alloc and invalidation */ kcondvar_t sfmmu_tsb_cv; /* signals TSB swapin or relocation */ uchar_t sfmmu_cext; /* context page size encoding */ uint8_t sfmmu_pgsz[MMU_PAGE_SIZES]; /* ranking for MMU */ + sf_srd_t *sfmmu_srdp; + sf_scd_t *sfmmu_scdp; /* scd this address space belongs to */ + sf_region_map_t sfmmu_region_map; + sf_rgn_link_t *sfmmu_hmeregion_links[SFMMU_L1_HMERLINKS]; + sf_rgn_link_t sfmmu_scd_link; /* link to scd or pending queue */ #ifdef sun4v struct hv_tsb_block sfmmu_hvblock; #endif @@ -427,6 +677,39 @@ struct hat { #define sfmmu_iblk h_un.sfmmu_iblkp #define sfmmu_iment h_un.sfmmu_imentp +#define sfmmu_hmeregion_map sfmmu_region_map.h_rmap_s.hmeregion_map +#define sfmmu_ismregion_map sfmmu_region_map.h_rmap_s.ismregion_map + +#define SF_RGNMAP_ISNULL(sfmmup) \ + (sfrgnmap_isnull(&(sfmmup)->sfmmu_region_map)) +#define SF_HMERGNMAP_ISNULL(sfmmup) \ + (sfhmergnmap_isnull(&(sfmmup)->sfmmu_hmeregion_map)) + +struct sf_scd { + sfmmu_t *scd_sfmmup; /* shared context hat */ + /* per pgsz ttecnt for shme rgns in SCD */ + ulong_t scd_rttecnt[MMU_PAGE_SIZES]; + uint_t scd_refcnt; /* address spaces attached to scd */ + sf_region_map_t scd_region_map; /* bit mask of attached segments */ + sf_scd_t *scd_next; /* link pointers for srd_scd list */ + sf_scd_t *scd_prev; + sfmmu_t *scd_sf_list; /* list of doubly linked hat structs */ + kmutex_t scd_mutex; + /* + * Link used to add an scd to the sfmmu_iment list. + */ + ism_ment_t scd_ism_links[SFMMU_MAX_ISM_REGIONS]; +}; + +#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map +#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map + +#define scd_hmeregion_map scd_region_map.h_rmap_s.hmeregion_map +#define scd_ismregion_map scd_region_map.h_rmap_s.ismregion_map + +extern int disable_shctx; +extern int shctx_on; + /* * bit mask for managing vac conflicts on large pages. * bit 1 is for uncache flag. @@ -510,63 +793,39 @@ struct ctx_trace { (ASSERT(sfmmu_hat_lock_held((sfmmup))), \ (sfmmup)->sfmmu_flags |= (flags)) -/* - * sfmmu HAT flags - */ -#define HAT_64K_FLAG 0x01 -#define HAT_512K_FLAG 0x02 -#define HAT_4M_FLAG 0x04 -#define HAT_32M_FLAG 0x08 -#define HAT_256M_FLAG 0x10 -#define HAT_4MTEXT_FLAG 0x80 -#define HAT_SWAPPED 0x100 /* swapped out */ -#define HAT_SWAPIN 0x200 /* swapping in */ -#define HAT_BUSY 0x400 /* replacing TSB(s) */ -#define HAT_ISMBUSY 0x800 /* adding/removing/traversing ISM maps */ - -#define HAT_LGPG_FLAGS \ - (HAT_64K_FLAG | HAT_512K_FLAG | HAT_4M_FLAG | \ - HAT_32M_FLAG | HAT_256M_FLAG) +#define SFMMU_TTEFLAGS_ISSET(sfmmup, flags) \ + ((((sfmmup)->sfmmu_tteflags | (sfmmup)->sfmmu_rtteflags) & (flags)) == \ + (flags)) -#define HAT_FLAGS_MASK \ - (HAT_LGPG_FLAGS | HAT_4MTEXT_FLAG | HAT_SWAPPED | \ - HAT_SWAPIN | HAT_BUSY | HAT_ISMBUSY) /* - * Context flags + * sfmmu tte HAT flags, must fit in 8 bits */ -#define CTX_FREE_FLAG 0x1 -#define CTX_FLAGS_MASK 0x1 - -#define CTX_SET_FLAGS(ctx, flag) \ -{ \ - uint32_t old, new; \ - \ - do { \ - new = old = (ctx)->ctx_flags; \ - new &= CTX_FLAGS_MASK; \ - new |= flag; \ - new = cas32(&(ctx)->ctx_flags, old, new); \ - } while (new != old); \ -} - -#define CTX_CLEAR_FLAGS(ctx, flag) \ -{ \ - uint32_t old, new; \ - \ - do { \ - new = old = (ctx)->ctx_flags; \ - new &= CTX_FLAGS_MASK & ~(flag); \ - new = cas32(&(ctx)->ctx_flags, old, new); \ - } while (new != old); \ -} - -#define ctxtoctxnum(ctx) ((ushort_t)((ctx) - ctxs)) +#define HAT_CHKCTX1_FLAG 0x1 +#define HAT_64K_FLAG (0x1 << TTE64K) +#define HAT_512K_FLAG (0x1 << TTE512K) +#define HAT_4M_FLAG (0x1 << TTE4M) +#define HAT_32M_FLAG (0x1 << TTE32M) +#define HAT_256M_FLAG (0x1 << TTE256M) /* - * Defines needed for ctx stealing. + * sfmmu HAT flags, 16 bits at the moment. */ -#define GET_CTX_RETRY_CNT 100 +#define HAT_4MTEXT_FLAG 0x01 +#define HAT_32M_ISM 0x02 +#define HAT_256M_ISM 0x04 +#define HAT_SWAPPED 0x08 /* swapped out */ +#define HAT_SWAPIN 0x10 /* swapping in */ +#define HAT_BUSY 0x20 /* replacing TSB(s) */ +#define HAT_ISMBUSY 0x40 /* adding/removing/traversing ISM maps */ + +#define HAT_CTX1_FLAG 0x100 /* ISM imap hatflag for ctx1 */ +#define HAT_JOIN_SCD 0x200 /* region is joining scd */ +#define HAT_ALLCTX_INVALID 0x400 /* all per-MMU ctxs are invalidated */ + +#define SFMMU_LGPGS_INUSE(sfmmup) \ + (((sfmmup)->sfmmu_tteflags | (sfmmup)->sfmmu_rtteflags) || \ + ((sfmmup)->sfmmu_iblk != NULL)) /* * Starting with context 0, the first NUM_LOCKED_CTXS contexts @@ -657,31 +916,71 @@ struct pa_hment { * without checking those routines. See HTAG_SFMMUPSZ define. */ +/* + * In private hmeblks hblk_rid field must be SFMMU_INVALID_RID. + */ typedef union { struct { - uint64_t hblk_basepg: 51, /* hme_blk base pg # */ - hblk_rehash: 13; /* rehash number */ - sfmmu_t *sfmmup; + uint64_t hblk_basepg: 51, /* hme_blk base pg # */ + hblk_rehash: 3, /* rehash number */ + hblk_rid: 10; /* hme_blk region id */ + void *hblk_id; } hblk_tag_un; uint64_t htag_tag[2]; } hmeblk_tag; -#define htag_id hblk_tag_un.sfmmup +#define htag_id hblk_tag_un.hblk_id #define htag_bspage hblk_tag_un.hblk_basepg #define htag_rehash hblk_tag_un.hblk_rehash +#define htag_rid hblk_tag_un.hblk_rid + +#endif /* !_ASM */ + +#define HTAG_REHASH_SHIFT 10 +#define HTAG_MAX_RID (((0x1 << HTAG_REHASH_SHIFT) - 1)) +#define HTAG_RID_MASK HTAG_MAX_RID + +/* used for tagging all per sfmmu (i.e. non SRD) private hmeblks */ +#define SFMMU_INVALID_SHMERID HTAG_MAX_RID + +#if SFMMU_INVALID_SHMERID < SFMMU_MAX_HME_REGIONS +#error SFMMU_INVALID_SHMERID < SFMMU_MAX_HME_REGIONS +#endif + +#define SFMMU_IS_SHMERID_VALID(rid) ((rid) != SFMMU_INVALID_SHMERID) + +/* ISM regions */ +#define SFMMU_INVALID_ISMRID 0xff + +#if SFMMU_INVALID_ISMRID < SFMMU_MAX_ISM_REGIONS +#error SFMMU_INVALID_ISMRID < SFMMU_MAX_ISM_REGIONS +#endif + +#define SFMMU_IS_ISMRID_VALID(rid) ((rid) != SFMMU_INVALID_ISMRID) + #define HTAGS_EQ(tag1, tag2) (((tag1.htag_tag[0] ^ tag2.htag_tag[0]) | \ (tag1.htag_tag[1] ^ tag2.htag_tag[1])) == 0) + +/* + * this macro must only be used for comparing tags in shared hmeblks. + */ +#define HTAGS_EQ_SHME(hmetag, tag, hrmap) \ + (((hmetag).htag_rid != SFMMU_INVALID_SHMERID) && \ + (((((hmetag).htag_tag[0] ^ (tag).htag_tag[0]) & \ + ~HTAG_RID_MASK) | \ + ((hmetag).htag_tag[1] ^ (tag).htag_tag[1])) == 0) && \ + SF_RGNMAP_TEST(hrmap, hmetag.htag_rid)) + #define HME_REHASH(sfmmup) \ ((sfmmup)->sfmmu_ttecnt[TTE512K] != 0 || \ (sfmmup)->sfmmu_ttecnt[TTE4M] != 0 || \ (sfmmup)->sfmmu_ttecnt[TTE32M] != 0 || \ (sfmmup)->sfmmu_ttecnt[TTE256M] != 0) -#endif /* !_ASM */ - #define NHMENTS 8 /* # of hments in an 8k hme_blk */ /* needs to be multiple of 2 */ + #ifndef _ASM #ifdef HBLK_TRACE @@ -730,8 +1029,8 @@ struct hblk_lockcnt_audit { */ struct hme_blk_misc { - ushort_t locked_cnt; /* HAT_LOAD_LOCK ref cnt */ - uint_t notused:10; + uint_t notused:25; + uint_t shared_bit:1; /* set for SRD shared hmeblk */ uint_t xhat_bit:1; /* set for an xhat hme_blk */ uint_t shadow_bit:1; /* set for a shadow hme_blk */ uint_t nucleus_bit:1; /* set for a nucleus hme_blk */ @@ -760,6 +1059,8 @@ struct hme_blk { uint_t hblk_shadow_mask; } hblk_un; + uint_t hblk_lckcnt; + #ifdef HBLK_TRACE kmutex_t hblk_audit_lock; /* lock to protect index */ uint_t hblk_audit_index; /* index into audit_cache */ @@ -769,7 +1070,7 @@ struct hme_blk { struct sf_hment hblk_hme[1]; /* hment array */ }; -#define hblk_lckcnt hblk_misc.locked_cnt +#define hblk_shared hblk_misc.shared_bit #define hblk_xhat_bit hblk_misc.xhat_bit #define hblk_shw_bit hblk_misc.shadow_bit #define hblk_nuc_bit hblk_misc.nucleus_bit @@ -778,7 +1079,7 @@ struct hme_blk { #define hblk_vcnt hblk_un.hblk_counts.hblk_validcnt #define hblk_shw_mask hblk_un.hblk_shadow_mask -#define MAX_HBLK_LCKCNT 0xFFFF +#define MAX_HBLK_LCKCNT 0xFFFFFFFF #define HMEBLK_ALIGN 0x8 /* hmeblk has to be double aligned */ #ifdef HBLK_TRACE @@ -864,7 +1165,6 @@ struct hmehash_bucket { #endif /* !_ASM */ -/* Proc Count Project */ #define SFMMU_PGCNT_MASK 0x3f #define SFMMU_PGCNT_SHIFT 6 #define INVALID_MMU_ID -1 @@ -881,7 +1181,7 @@ struct hmehash_bucket { * bits. */ #define HTAG_SFMMUPSZ 0 /* Not really used for LP64 */ -#define HTAG_REHASHSZ 13 +#define HTAG_BSPAGE_SHIFT 13 /* * Assembly routines need to be able to get to ttesz @@ -918,6 +1218,9 @@ struct hmehash_bucket { #define tte_to_vaddr(hmeblkp, tte) ((caddr_t)(get_hblk_base(hmeblkp) \ + (TTEBYTES(TTE_CSZ(&tte)) * (tte).tte_hmenum))) +#define tte_to_evaddr(hmeblkp, ttep) ((caddr_t)(get_hblk_base(hmeblkp) \ + + (TTEBYTES(TTE_CSZ(ttep)) * ((ttep)->tte_hmenum + 1)))) + #define vaddr_to_vshift(hblktag, vaddr, shwsz) \ ((((uintptr_t)(vaddr) >> MMU_PAGESHIFT) - (hblktag.htag_bspage)) >>\ TTE_BSZS_SHIFT((shwsz) - 1)) @@ -980,6 +1283,9 @@ struct hmehash_bucket { #define KHMEHASH_SZ khmehash_num #define HMENT_HASHAVELEN 4 #define HBLK_RANGE_SHIFT MMU_PAGESHIFT64K /* shift for HBLK_BS_MASK */ +#define HBLK_MIN_TTESZ 1 +#define HBLK_MIN_BYTES MMU_PAGESIZE64K +#define HBLK_MIN_SHIFT MMU_PAGESHIFT64K #define MAX_HASHCNT 5 #define DEFAULT_MAX_HASHCNT 3 @@ -999,12 +1305,12 @@ struct hmehash_bucket { #define HME_HASH_REHASH(ttesz) \ (((ttesz) < TTE512K)? 1 : (ttesz)) -#define HME_HASH_FUNCTION(hatid, vaddr, shift) \ - ((hatid != KHATID)? \ - (&uhme_hash[ (((uintptr_t)(hatid) ^ \ - ((uintptr_t)vaddr >> (shift))) & UHMEHASH_SZ) ]): \ - (&khme_hash[ (((uintptr_t)(hatid) ^ \ - ((uintptr_t)vaddr >> (shift))) & KHMEHASH_SZ) ])) +#define HME_HASH_FUNCTION(hatid, vaddr, shift) \ + ((((void *)hatid) != ((void *)KHATID)) ? \ + (&uhme_hash[ (((uintptr_t)(hatid) ^ ((uintptr_t)vaddr >> (shift))) & \ + UHMEHASH_SZ) ]): \ + (&khme_hash[ (((uintptr_t)(hatid) ^ ((uintptr_t)vaddr >> (shift))) & \ + KHMEHASH_SZ) ])) /* * This macro will traverse a hmeblk hash link list looking for an hme_blk @@ -1067,7 +1373,6 @@ struct hmehash_bucket { } \ } - #define SFMMU_HASH_LOCK(hmebp) \ (mutex_enter(&hmebp->hmehash_mutex)) @@ -1091,7 +1396,13 @@ struct hmehash_bucket { #define astosfmmu(as) ((as)->a_hat) #define hblktosfmmu(hmeblkp) ((sfmmu_t *)(hmeblkp)->hblk_tag.htag_id) +#define hblktosrd(hmeblkp) ((sf_srd_t *)(hmeblkp)->hblk_tag.htag_id) #define sfmmutoas(sfmmup) ((sfmmup)->sfmmu_as) + +#define sfmmutohtagid(sfmmup, rid) \ + (((rid) == SFMMU_INVALID_SHMERID) ? (void *)(sfmmup) : \ + (void *)((sfmmup)->sfmmu_srdp)) + /* * We use the sfmmu data structure to keep the per as page coloring info. */ @@ -1256,29 +1567,32 @@ struct tsbe { struct tsbmiss { sfmmu_t *ksfmmup; /* kernel hat id */ sfmmu_t *usfmmup; /* user hat id */ + sf_srd_t *usrdp; /* user's SRD hat id */ struct tsbe *tsbptr; /* hardware computed ptr */ struct tsbe *tsbptr4m; /* hardware computed ptr */ + struct tsbe *tsbscdptr; /* hardware computed ptr */ + struct tsbe *tsbscdptr4m; /* hardware computed ptr */ uint64_t ismblkpa; struct hmehash_bucket *khashstart; struct hmehash_bucket *uhashstart; uint_t khashsz; uint_t uhashsz; uint16_t dcache_line_mask; /* used to flush dcache */ - uint16_t hat_flags; - uint32_t itlb_misses; - uint32_t dtlb_misses; + uchar_t uhat_tteflags; /* private page sizes */ + uchar_t uhat_rtteflags; /* SHME pagesizes */ uint32_t utsb_misses; uint32_t ktsb_misses; uint16_t uprot_traps; uint16_t kprot_traps; - /* * scratch[0] -> TSB_TAGACC * scratch[1] -> TSBMISS_HMEBP * scratch[2] -> TSBMISS_HATID */ uintptr_t scratch[3]; - uint8_t pad[0x10]; + ulong_t shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */ + ulong_t scd_shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */ + uint8_t pad[48]; /* pad to 64 bytes */ }; /* @@ -1311,10 +1625,9 @@ struct kpmtsbm { uintptr_t pad[1]; }; -extern uint_t tsb_slab_size; -extern uint_t tsb_slab_shift; -extern uint_t tsb_slab_ttesz; -extern uint_t tsb_slab_pamask; +extern size_t tsb_slab_size; +extern uint_t tsb_slab_shift; +extern size_t tsb_slab_mask; #endif /* !_ASM */ @@ -1336,7 +1649,12 @@ extern uint_t tsb_slab_pamask; #define TSB_MIN_SZCODE TSB_8K_SZCODE /* min. supported TSB size */ #define TSB_MIN_OFFSET_MASK (TSB_OFFSET_MASK(TSB_MIN_SZCODE)) -#define UTSB_MAX_SZCODE TSB_1M_SZCODE /* max. supported TSB size */ +#ifdef sun4v +#define UTSB_MAX_SZCODE TSB_256M_SZCODE /* max. supported TSB size */ +#else /* sun4u */ +#define UTSB_MAX_SZCODE TSB_1M_SZCODE /* max. supported TSB size */ +#endif /* sun4v */ + #define UTSB_MAX_OFFSET_MASK (TSB_OFFSET_MASK(UTSB_MAX_SZCODE)) #define TSB_FREEMEM_MIN 0x1000 /* 32 mb */ @@ -1351,6 +1669,12 @@ extern uint_t tsb_slab_pamask; #define TSB_1M_SZCODE 7 /* 64k entries */ #define TSB_2M_SZCODE 8 /* 128k entries */ #define TSB_4M_SZCODE 9 /* 256k entries */ +#define TSB_8M_SZCODE 10 /* 512k entries */ +#define TSB_16M_SZCODE 11 /* 1M entries */ +#define TSB_32M_SZCODE 12 /* 2M entries */ +#define TSB_64M_SZCODE 13 /* 4M entries */ +#define TSB_128M_SZCODE 14 /* 8M entries */ +#define TSB_256M_SZCODE 15 /* 16M entries */ #define TSB_ENTRY_SHIFT 4 /* each entry = 128 bits = 16 bytes */ #define TSB_ENTRY_SIZE (1 << 4) #define TSB_START_SIZE 9 @@ -1479,6 +1803,19 @@ extern uint_t tsb_slab_pamask; sethi %hi(0x1000000), reg /* + * This macro constructs a SPARC V9 "jmpl <source reg>, %g0" + * instruction, with the source register specified by the jump_reg_number. + * The jmp opcode [24:19] = 11 1000 and source register is bits [18:14]. + * The instruction is returned in reg. The macro is used to patch in a jmpl + * instruction at runtime. + */ +#define MAKE_JMP_INSTR(jump_reg_number, reg, tmp) \ + sethi %hi(0x81c00000), reg; \ + mov jump_reg_number, tmp; \ + sll tmp, 14, tmp; \ + or reg, tmp, reg + +/* * Macro to get hat per-MMU cnum on this CPU. * sfmmu - In, pass in "sfmmup" from the caller. * cnum - Out, return 'cnum' to the caller @@ -1513,7 +1850,7 @@ extern uint_t tsb_slab_pamask; #define CPU_TSBMISS_AREA(tsbmiss, tmp1) \ CPU_INDEX(tmp1, tsbmiss); /* tmp1 = cpu idx */ \ sethi %hi(tsbmiss_area), tsbmiss; /* tsbmiss base ptr */ \ - sllx tmp1, TSBMISS_SHIFT, tmp1; /* byte offset */ \ + mulx tmp1, TSBMISS_SIZE, tmp1; /* byte offset */ \ or tsbmiss, %lo(tsbmiss_area), tsbmiss; \ add tsbmiss, tmp1, tsbmiss /* tsbmiss area of CPU */ @@ -1756,7 +2093,7 @@ extern void sfmmu_init_tsbs(void); extern caddr_t sfmmu_ktsb_alloc(caddr_t); extern int sfmmu_getctx_pri(void); extern int sfmmu_getctx_sec(void); -extern void sfmmu_setctx_sec(int); +extern void sfmmu_setctx_sec(uint_t); extern void sfmmu_inv_tsb(caddr_t, uint_t); extern void sfmmu_init_ktsbinfo(void); extern int sfmmu_setup_4lp(void); @@ -1773,7 +2110,7 @@ extern int hat_page_relocate(page_t **, page_t **, spgcnt_t *); extern int sfmmu_get_ppvcolor(struct page *); extern int sfmmu_get_addrvcolor(caddr_t); extern int sfmmu_hat_lock_held(sfmmu_t *); -extern void sfmmu_alloc_ctx(sfmmu_t *, int, struct cpu *); +extern int sfmmu_alloc_ctx(sfmmu_t *, int, struct cpu *, int); /* * Functions exported to xhat_sfmmu.c @@ -1821,7 +2158,7 @@ extern uint_t mml_shift; extern uint_t hblk_alloc_dynamic; extern struct tsbmiss tsbmiss_area[NCPU]; extern struct kpmtsbm kpmtsbm_area[NCPU]; -extern int tsb_max_growsize; + #ifndef sun4v extern int dtlb_resv_ttenum; extern caddr_t utsb_vabase; @@ -1839,6 +2176,7 @@ extern uint_t disable_auto_text_large_pages; extern pfn_t sfmmu_kpm_vatopfn(caddr_t); extern void sfmmu_kpm_patch_tlbm(void); extern void sfmmu_kpm_patch_tsbm(void); +extern void sfmmu_patch_shctx(void); extern void sfmmu_kpm_load_tsb(caddr_t, tte_t *, int); extern void sfmmu_kpm_unload_tsb(caddr_t, int); extern void sfmmu_kpm_tsbmtl(short *, uint_t *, int); @@ -1922,6 +2260,12 @@ struct sfmmu_global_stat { int sf_tsb_allocfail; /* # times TSB alloc fail */ int sf_tsb_sectsb_create; /* # times second TSB added */ + int sf_scd_1sttsb_alloc; /* # SCD 1st TSB allocations */ + int sf_scd_2ndtsb_alloc; /* # SCD 2nd TSB allocations */ + int sf_scd_1sttsb_allocfail; /* # SCD 1st TSB alloc fail */ + int sf_scd_2ndtsb_allocfail; /* # SCD 2nd TSB alloc fail */ + + int sf_tteload8k; /* calls to sfmmu_tteload */ int sf_tteload64k; /* calls to sfmmu_tteload */ int sf_tteload512k; /* calls to sfmmu_tteload */ @@ -1973,6 +2317,13 @@ struct sfmmu_global_stat { int sf_ctx_inv; /* #times invalidate MMU ctx */ int sf_tlb_reprog_pgsz; /* # times switch TLB pgsz */ + + int sf_region_remap_demap; /* # times shme remap demap */ + + int sf_create_scd; /* # times SCD is created */ + int sf_join_scd; /* # process joined scd */ + int sf_leave_scd; /* # process left scd */ + int sf_destroy_scd; /* # times SCD is destroyed */ }; struct sfmmu_tsbsize_stat { @@ -1986,6 +2337,12 @@ struct sfmmu_tsbsize_stat { int sf_tsbsz_1m; int sf_tsbsz_2m; int sf_tsbsz_4m; + int sf_tsbsz_8m; + int sf_tsbsz_16m; + int sf_tsbsz_32m; + int sf_tsbsz_64m; + int sf_tsbsz_128m; + int sf_tsbsz_256m; }; struct sfmmu_percpu_stat { diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in index eb8c2ca9b4..a157f706bf 100644 --- a/usr/src/uts/sun4/ml/offsets.in +++ b/usr/src/uts/sun4/ml/offsets.in @@ -209,6 +209,7 @@ ism_blk ISMBLK_SIZE ism_map_t ISM_MAP_SZ imap_seg IMAP_SEG imap_vb_shift IMAP_VB_SHIFT + imap_rid IMAP_RID imap_hatflags IMAP_HATFLAGS imap_sz_mask IMAP_SZ_MASK imap_ismhat IMAP_ISMHAT diff --git a/usr/src/uts/sun4/ml/swtch.s b/usr/src/uts/sun4/ml/swtch.s index 98ec1edd55..f8892bd0c7 100644 --- a/usr/src/uts/sun4/ml/swtch.s +++ b/usr/src/uts/sun4/ml/swtch.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -251,23 +251,40 @@ resume(kthread_id_t t) call sfmmu_setctx_sec ! switch to kernel context or %o0, %o1, %o0 - - ba,a,pt %icc, 4f + ba,a,pt %icc, 4f + ! ! Switch to user address space. ! 3: mov %i5, %o0 ! %o0 = sfmmup mov %i1, %o2 ! %o2 = CPU + set SFMMU_PRIVATE, %o3 ! %o3 = sfmmu private flag call sfmmu_alloc_ctx mov %g0, %o1 ! %o1 = allocate flag = 0 +#ifdef sun4v + brz,a,pt %o0, 4f ! %o0 == 0, no private alloc'ed + nop + + ldn [%i5 + SFMMU_SCDP], %o0 ! using shared contexts? + brz,a,pt %o0, 4f + nop + + ldn [%o0 + SCD_SFMMUP], %o0 ! %o0 = scdp->scd_sfmmup + mov %i1, %o2 ! %o2 = CPU + set SFMMU_SHARED, %o3 ! %o3 = sfmmu shared flag + call sfmmu_alloc_ctx + mov 1, %o1 ! %o1 = allocate flag = 1 + +#endif + 4: call sfmmu_load_mmustate ! program MMU registers mov %i5, %o0 - - wrpr %g0, %i4, %pstate ! enable interrupts + wrpr %g0, %i4, %pstate ! enable interrupts + 5: ! ! spin until dispatched thread's mutex has diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c index 7e4841f8c3..4dd6b92398 100644 --- a/usr/src/uts/sun4/os/startup.c +++ b/usr/src/uts/sun4/os/startup.c @@ -675,16 +675,16 @@ startup_init(void) * behavior.) */ char sync_str[] = - "warning @ warning off : sync " - "%%tl-c %%tstate h# %p x! " - "%%g1 h# %p x! %%g2 h# %p x! %%g3 h# %p x! " - "%%g4 h# %p x! %%g5 h# %p x! %%g6 h# %p x! " - "%%g7 h# %p x! %%o0 h# %p x! %%o1 h# %p x! " - "%%o2 h# %p x! %%o3 h# %p x! %%o4 h# %p x! " - "%%o5 h# %p x! %%o6 h# %p x! %%o7 h# %p x! " - "%%tl-c %%tpc h# %p x! %%tl-c %%tnpc h# %p x! " - "%%y h# %p l! %%tl-c %%tt h# %p x! " - "sync ; warning !"; + "warning @ warning off : sync " + "%%tl-c %%tstate h# %p x! " + "%%g1 h# %p x! %%g2 h# %p x! %%g3 h# %p x! " + "%%g4 h# %p x! %%g5 h# %p x! %%g6 h# %p x! " + "%%g7 h# %p x! %%o0 h# %p x! %%o1 h# %p x! " + "%%o2 h# %p x! %%o3 h# %p x! %%o4 h# %p x! " + "%%o5 h# %p x! %%o6 h# %p x! %%o7 h# %p x! " + "%%tl-c %%tpc h# %p x! %%tl-c %%tnpc h# %p x! " + "%%y h# %p l! %%tl-c %%tt h# %p x! " + "sync ; warning !"; /* * 20 == num of %p substrings @@ -716,16 +716,16 @@ startup_init(void) * core file later. */ (void) sprintf((char *)bp, sync_str, - (void *)&sync_reg_buf.r_tstate, (void *)&sync_reg_buf.r_g1, - (void *)&sync_reg_buf.r_g2, (void *)&sync_reg_buf.r_g3, - (void *)&sync_reg_buf.r_g4, (void *)&sync_reg_buf.r_g5, - (void *)&sync_reg_buf.r_g6, (void *)&sync_reg_buf.r_g7, - (void *)&sync_reg_buf.r_o0, (void *)&sync_reg_buf.r_o1, - (void *)&sync_reg_buf.r_o2, (void *)&sync_reg_buf.r_o3, - (void *)&sync_reg_buf.r_o4, (void *)&sync_reg_buf.r_o5, - (void *)&sync_reg_buf.r_o6, (void *)&sync_reg_buf.r_o7, - (void *)&sync_reg_buf.r_pc, (void *)&sync_reg_buf.r_npc, - (void *)&sync_reg_buf.r_y, (void *)&sync_tt); + (void *)&sync_reg_buf.r_tstate, (void *)&sync_reg_buf.r_g1, + (void *)&sync_reg_buf.r_g2, (void *)&sync_reg_buf.r_g3, + (void *)&sync_reg_buf.r_g4, (void *)&sync_reg_buf.r_g5, + (void *)&sync_reg_buf.r_g6, (void *)&sync_reg_buf.r_g7, + (void *)&sync_reg_buf.r_o0, (void *)&sync_reg_buf.r_o1, + (void *)&sync_reg_buf.r_o2, (void *)&sync_reg_buf.r_o3, + (void *)&sync_reg_buf.r_o4, (void *)&sync_reg_buf.r_o5, + (void *)&sync_reg_buf.r_o6, (void *)&sync_reg_buf.r_o7, + (void *)&sync_reg_buf.r_pc, (void *)&sync_reg_buf.r_npc, + (void *)&sync_reg_buf.r_y, (void *)&sync_tt); prom_interpret(bp, 0, 0, 0, 0, 0); add_vx_handler("sync", 1, (void (*)(cell_t *))sync_handler); } @@ -1146,7 +1146,7 @@ startup_memlist(void) for (mnode = 1; mnode < max_mem_nodes; mnode++) { alloc_base = alloc_page_freelists(mnode, alloc_base, - ecache_alignsize); + ecache_alignsize); } PRM_DEBUG(alloc_base); } @@ -1177,8 +1177,8 @@ startup_memlist(void) * because there was not enough space within the nucleus. */ kpmptable_sz = (kpm_smallpages == 0) ? - sizeof (kpm_hlk_t) * kpmp_table_sz : - sizeof (kpm_shlk_t) * kpmp_stable_sz; + sizeof (kpm_hlk_t) * kpmp_table_sz : + sizeof (kpm_shlk_t) * kpmp_stable_sz; alloc_sz = roundup(kpmptable_sz, alloc_alignsize); alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, @@ -2158,7 +2158,7 @@ startup_vm(void) * kpm segment */ segmap_kpm = kpm_enable && - segmap_kpm && PAGESIZE == MAXBSIZE; + segmap_kpm && PAGESIZE == MAXBSIZE; if (kpm_enable) { rw_enter(&kas.a_lock, RW_WRITER); @@ -2617,12 +2617,12 @@ memseg_list_add(struct memseg *memsegp) if (kpm_enable) { memsegp->nextpa = (memsegp->next) ? - va_to_pa(memsegp->next) : MSEG_NULLPTR_PA; + va_to_pa(memsegp->next) : MSEG_NULLPTR_PA; if (prev_memsegp != &memsegs) { struct memseg *msp; msp = (struct memseg *)((caddr_t)prev_memsegp - - offsetof(struct memseg, next)); + offsetof(struct memseg, next)); msp->nextpa = va_to_pa(memsegp); } else { memsegspa = va_to_pa(memsegs); @@ -2722,14 +2722,14 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages, msp->kpm_pages = (kpm_page_t *)kpm_pp - 1; kpm_pp = (uintptr_t) - ((kpm_page_t *)kpm_pp - + nelem - 1); + ((kpm_page_t *)kpm_pp + + nelem - 1); } else { msp->kpm_spages = (kpm_spage_t *)kpm_pp - 1; kpm_pp = (uintptr_t) - ((kpm_spage_t *)kpm_pp - + nelem - 1); + ((kpm_spage_t *)kpm_pp + + nelem - 1); } nelem_used += nelem - 1; @@ -2738,14 +2738,14 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages, msp->kpm_pages = (kpm_page_t *)kpm_pp; kpm_pp = (uintptr_t) - ((kpm_page_t *)kpm_pp - + nelem); + ((kpm_page_t *)kpm_pp + + nelem); } else { msp->kpm_spages = (kpm_spage_t *)kpm_pp; kpm_pp = (uintptr_t) - ((kpm_spage_t *) - kpm_pp + nelem); + ((kpm_spage_t *) + kpm_pp + nelem); } nelem_used += nelem; } @@ -2754,11 +2754,11 @@ kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages, if (kpm_smallpages == 0) { msp->kpm_pages = (kpm_page_t *)kpm_pp; kpm_pp = (uintptr_t) - ((kpm_page_t *)kpm_pp + nelem); + ((kpm_page_t *)kpm_pp + nelem); } else { msp->kpm_spages = (kpm_spage_t *)kpm_pp; kpm_pp = (uintptr_t) - ((kpm_spage_t *)kpm_pp + nelem); + ((kpm_spage_t *)kpm_pp + nelem); } nelem_used = nelem; } @@ -2823,9 +2823,9 @@ kvm_init(void) (void) segkmem_create(&kvalloc); if (kmem64_base) { - (void) seg_attach(&kas, (caddr_t)kmem64_base, - (size_t)(kmem64_end - kmem64_base), &kmem64); - (void) segkmem_create(&kmem64); + (void) seg_attach(&kas, (caddr_t)kmem64_base, + (size_t)(kmem64_end - kmem64_base), &kmem64); + (void) segkmem_create(&kmem64); } /* @@ -2874,7 +2874,9 @@ char obp_tte_str[] = "h# %x constant HMEBP_HBLK " "h# %x constant HMEBUCKET_SIZE " "h# %x constant HTAG_SFMMUPSZ " - "h# %x constant HTAG_REHASHSZ " + "h# %x constant HTAG_BSPAGE_SHIFT " + "h# %x constant HTAG_REHASH_SHIFT " + "h# %x constant SFMMU_INVALID_SHMERID " "h# %x constant mmu_hashcnt " "h# %p constant uhme_hash " "h# %p constant khme_hash " @@ -2949,8 +2951,10 @@ char obp_tte_str[] = "; " ": HME_HASH_TAG ( sfmmup rehash addr -- hblktag ) " - " over HME_HASH_SHIFT HME_HASH_BSPAGE ( sfmmup rehash bspage ) " - " HTAG_REHASHSZ << or nip ( hblktag ) " + " over HME_HASH_SHIFT HME_HASH_BSPAGE ( sfmmup rehash bspage ) " + " HTAG_BSPAGE_SHIFT << ( sfmmup rehash htag-bspage )" + " swap HTAG_REHASH_SHIFT << or ( sfmmup htag-bspage-rehash )" + " SFMMU_INVALID_SHMERID or nip ( hblktag ) " "; " ": HBLK_TO_TTEP ( hmeblkp addr -- ttep ) " @@ -3023,7 +3027,9 @@ create_va_to_tte(void) OFFSET(struct hmehash_bucket, hmeh_nextpa), sizeof (struct hmehash_bucket), HTAG_SFMMUPSZ, - HTAG_REHASHSZ, + HTAG_BSPAGE_SHIFT, + HTAG_REHASH_SHIFT, + SFMMU_INVALID_SHMERID, mmu_hashcnt, (caddr_t)va_to_pa((caddr_t)uhme_hash), (caddr_t)va_to_pa((caddr_t)khme_hash), @@ -3136,8 +3142,8 @@ do_prom_version_check(void) pnode_t node; char buf[64]; static char drev[] = "Down-rev firmware detected%s\n" - "\tPlease upgrade to the following minimum version:\n" - "\t\t%s\n"; + "\tPlease upgrade to the following minimum version:\n" + "\t\t%s\n"; i = prom_version_check(buf, sizeof (buf), &node); diff --git a/usr/src/uts/sun4/vm/sfmmu.c b/usr/src/uts/sun4/vm/sfmmu.c index 24ee075277..9a0b8208de 100644 --- a/usr/src/uts/sun4/vm/sfmmu.c +++ b/usr/src/uts/sun4/vm/sfmmu.c @@ -195,6 +195,10 @@ hat_kern_setup(void) } } + if (!shctx_on || disable_shctx) { + sfmmu_patch_shctx(); + } + /* * The 8K-indexed kernel TSB space is used to hold * translations below... @@ -1212,9 +1216,6 @@ sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, NULL, NULL, vmflag)) == NULL)) return (NULL); - /* If we ever don't want TSB slab-sized pages, this will panic */ - ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); - if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { if (inaddr == NULL) vmem_xfree(vmp, addr, size); @@ -1288,8 +1289,6 @@ sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) pgcnt_t pgs_left = npages; page_t *rootpp = NULL; - ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); - hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c index 4e249ece6d..763b3dc737 100644 --- a/usr/src/uts/sun4/vm/vm_dep.c +++ b/usr/src/uts/sun4/vm/vm_dep.c @@ -52,20 +52,17 @@ /* * These variables are set by module specific config routines. - * They are only set by modules which will use physical cache page coloring - * and/or virtual cache page coloring. + * They are only set by modules which will use physical cache page coloring. */ int do_pg_coloring = 0; -int do_virtual_coloring = 0; /* * These variables can be conveniently patched at kernel load time to - * prevent do_pg_coloring or do_virtual_coloring from being enabled by + * prevent do_pg_coloring from being enabled by * module specific config routines. */ int use_page_coloring = 1; -int use_virtual_coloring = 1; /* * initialized by page_coloring_init() diff --git a/usr/src/uts/sun4u/cpu/opl_olympus.c b/usr/src/uts/sun4u/cpu/opl_olympus.c index c39cac38a9..1dc8c4fad4 100644 --- a/usr/src/uts/sun4u/cpu/opl_olympus.c +++ b/usr/src/uts/sun4u/cpu/opl_olympus.c @@ -493,7 +493,6 @@ void cpu_setup(void) { extern int at_flags; - extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; uint64_t cpu0_log; extern uint64_t opl_cpu0_err_log; @@ -536,8 +535,6 @@ cpu_setup(void) if (use_page_coloring) { do_pg_coloring = 1; - if (use_virtual_coloring) - do_virtual_coloring = 1; } isa_list = @@ -579,12 +576,6 @@ cpu_setup(void) cpc_has_overflow_intr = 1; /* - * Use SPARC64-VI flush-all support - */ - if (!disable_delay_tlb_flush) - delay_tlb_flush = 1; - - /* * Declare that this architecture/cpu combination does not support * fpRAS. */ @@ -920,7 +911,7 @@ mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) * the two most used page sizes changes and we're using * large pages in this process. */ - if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) { + if (SFMMU_LGPGS_INUSE(sfmmup)) { /* Sort page sizes. */ for (i = 0; i < mmu_page_sizes; i++) { sortcnt[i] = ttecnt[i]; diff --git a/usr/src/uts/sun4u/cpu/spitfire.c b/usr/src/uts/sun4u/cpu/spitfire.c index 786f942360..40fd06261d 100644 --- a/usr/src/uts/sun4u/cpu/spitfire.c +++ b/usr/src/uts/sun4u/cpu/spitfire.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -451,8 +451,6 @@ cpu_setup(void) if (use_page_coloring) { do_pg_coloring = 1; - if (use_virtual_coloring) - do_virtual_coloring = 1; } /* diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c index dcb848f542..13999fd361 100644 --- a/usr/src/uts/sun4u/cpu/us3_common.c +++ b/usr/src/uts/sun4u/cpu/us3_common.c @@ -474,7 +474,6 @@ void cpu_setup(void) { extern int at_flags; - extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; /* @@ -507,8 +506,6 @@ cpu_setup(void) if (use_page_coloring) { do_pg_coloring = 1; - if (use_virtual_coloring) - do_virtual_coloring = 1; } isa_list = @@ -552,12 +549,6 @@ cpu_setup(void) */ cpc_has_overflow_intr = 1; - /* - * Use cheetah flush-all support - */ - if (!disable_delay_tlb_flush) - delay_tlb_flush = 1; - #if defined(CPU_IMP_DUAL_PAGESIZE) /* * Use Cheetah+ and later dual page size support. diff --git a/usr/src/uts/sun4u/cpu/us3_common_mmu.c b/usr/src/uts/sun4u/cpu/us3_common_mmu.c index 9fd37a9ed5..892be30295 100644 --- a/usr/src/uts/sun4u/cpu/us3_common_mmu.c +++ b/usr/src/uts/sun4u/cpu/us3_common_mmu.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -151,9 +151,9 @@ mmu_init_large_pages(size_t ism_pagesize) if (cpu_impl_dual_pgsz == 0) { /* disable_dual_pgsz flag */ pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); mmu_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); mmu_disable_auto_data_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); return; } @@ -161,29 +161,29 @@ mmu_init_large_pages(size_t ism_pagesize) case MMU_PAGESIZE4M: pan_disable_large_pages = (1 << TTE256M); mmu_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); mmu_disable_auto_data_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); break; case MMU_PAGESIZE32M: pan_disable_large_pages = (1 << TTE256M); mmu_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE256M)); mmu_disable_auto_data_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); + (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); adjust_data_maxlpsize(ism_pagesize); break; case MMU_PAGESIZE256M: pan_disable_large_pages = (1 << TTE32M); mmu_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M)); + (1 << TTE512K) | (1 << TTE32M)); mmu_disable_auto_data_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); + (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); adjust_data_maxlpsize(ism_pagesize); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", - ism_pagesize); + ism_pagesize); break; } } @@ -262,24 +262,37 @@ mmu_fixup_large_pages(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) /* * Don't program 2nd dtlb for kernel and ism hat */ - ASSERT(hat->sfmmu_ismhat == NULL); + ASSERT(hat->sfmmu_ismhat == 0); ASSERT(hat != ksfmmup); ASSERT(cpu_impl_dual_pgsz == 1); - ASSERT((!SFMMU_FLAGS_ISSET(hat, HAT_32M_FLAG)) || - (!SFMMU_FLAGS_ISSET(hat, HAT_256M_FLAG))); + ASSERT(!SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG) || + !SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG)); + ASSERT(!SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG) || + !SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG)); + ASSERT(!SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM) || + !SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM)); + ASSERT(!SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM) || + !SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM)); + + if (SFMMU_TTEFLAGS_ISSET(hat, HAT_32M_FLAG) || + (ttecnt[TTE32M] != 0) || + SFMMU_FLAGS_ISSET(hat, HAT_32M_ISM)) { - if ((SFMMU_FLAGS_ISSET(hat, HAT_32M_FLAG)) || (ttecnt[TTE32M] != 0)) { spgsz = pgsz1; pgsz1 = TTE32M; if (pgsz0 == TTE32M) pgsz0 = spgsz; - } else if ((SFMMU_FLAGS_ISSET(hat, HAT_256M_FLAG)) || - (ttecnt[TTE256M] != 0)) { + + } else if (SFMMU_TTEFLAGS_ISSET(hat, HAT_256M_FLAG) || + (ttecnt[TTE256M] != 0) || + SFMMU_FLAGS_ISSET(hat, HAT_256M_ISM)) { + spgsz = pgsz1; pgsz1 = TTE256M; if (pgsz0 == TTE256M) pgsz0 = spgsz; + } else if ((pgsz1 == TTE512K) || (pgsz1 == TTE4M)) { if ((pgsz0 != TTE512K) && (pgsz0 != TTE4M)) { spgsz = pgsz0; @@ -470,7 +483,7 @@ mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) * large pages in this process, except for Panther 32M/256M pages, * which the Panther T16 does not support. */ - if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) { + if (SFMMU_LGPGS_INUSE(sfmmup)) { /* Sort page sizes. */ for (i = 0; i < mmu_page_sizes; i++) { sortcnt[i] = ttecnt[i]; @@ -569,7 +582,7 @@ mmu_init_kernel_pgsz(struct hat *hat) hat->sfmmu_cext = new_cext_primary; kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | - ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); + ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); } size_t @@ -604,7 +617,7 @@ mmu_get_kernel_lpsize(size_t lpsize) if (lpsize == TTEBYTES(p_lpgsz->tte) && (heaplp_use_dt512 == -1 || - heaplp_use_dt512 == p_lpgsz->use_dt512)) { + heaplp_use_dt512 == p_lpgsz->use_dt512)) { tte = p_lpgsz->tte; heaplp_use_dt512 = p_lpgsz->use_dt512; diff --git a/usr/src/uts/sun4u/ml/mach_offsets.in b/usr/src/uts/sun4u/ml/mach_offsets.in index 6f1f317f67..9ce9aea4fb 100644 --- a/usr/src/uts/sun4u/ml/mach_offsets.in +++ b/usr/src/uts/sun4u/ml/mach_offsets.in @@ -18,7 +18,7 @@ \ \ CDDL HEADER END \ -\ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ \ offsets.in: input file to produce assym.h using the stabs program @@ -103,7 +103,7 @@ hat HAT_SIZE sfmmu_cpusran sfmmu_tsb sfmmu_ismblkpa - sfmmu_flags + sfmmu_tteflags sfmmu_cext sfmmu_ctx_lock sfmmu_ctxs @@ -118,20 +118,19 @@ sfmmu_global_stat HATSTAT_SIZE sf_hment SFHME_SIZE SFHME_SHIFT hme_tte SFHME_TTE -tsbmiss TSBMISS_SIZE TSBMISS_SHIFT - tsbptr TSBMISS_TSBPTR - tsbptr4m TSBMISS_TSBPTR4M +tsbmiss TSBMISS_SIZE ksfmmup TSBMISS_KHATID usfmmup TSBMISS_UHATID - khashsz TSBMISS_KHASHSZ + tsbptr TSBMISS_TSBPTR + tsbptr4m TSBMISS_TSBPTR4M + ismblkpa TSBMISS_ISMBLKPA khashstart TSBMISS_KHASHSTART - dcache_line_mask TSBMISS_DMASK - uhashsz TSBMISS_UHASHSZ uhashstart TSBMISS_UHASHSTART - hat_flags TSBMISS_HATFLAGS - ismblkpa TSBMISS_ISMBLKPA - itlb_misses TSBMISS_ITLBMISS - dtlb_misses TSBMISS_DTLBMISS + khashsz TSBMISS_KHASHSZ + uhashsz TSBMISS_UHASHSZ + dcache_line_mask TSBMISS_DMASK + uhat_tteflags TSBMISS_UTTEFLAGS + uhat_rtteflags TSBMISS_URTTEFLAGS utsb_misses TSBMISS_UTSBMISS ktsb_misses TSBMISS_KTSBMISS uprot_traps TSBMISS_UPROTS @@ -141,8 +140,6 @@ tsbmiss TSBMISS_SIZE TSBMISS_SHIFT \#define TSB_TAGACC (0 * TSBMISS_SCRATCH_INCR) \#define TSBMISS_HMEBP (1 * TSBMISS_SCRATCH_INCR) \#define TSBMISS_HATID (2 * TSBMISS_SCRATCH_INCR) -\#define TSBMISS_XMMURET (3 * TSBMISS_SCRATCH_INCR) -\#define TSBMISS_XMMUPTR (4 * TSBMISS_SCRATCH_INCR) kpmtsbm KPMTSBM_SIZE KPMTSBM_SHIFT vbase KPMTSBM_VBASE diff --git a/usr/src/uts/sun4u/ml/trap_table.s b/usr/src/uts/sun4u/ml/trap_table.s index 42fd217205..160106d9d7 100644 --- a/usr/src/uts/sun4u/ml/trap_table.s +++ b/usr/src/uts/sun4u/ml/trap_table.s @@ -112,20 +112,6 @@ #endif /* - * This macro is used to update per cpu mmu stats in perf critical - * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER - * is defined. - */ -#if defined(DEBUG) || defined(SFMMU_STAT_GATHER) -#define HAT_PERCPU_DBSTAT(stat) \ - mov stat, %g1 ;\ - ba stat_mmu ;\ - rd %pc, %g7 -#else -#define HAT_PERCPU_DBSTAT(stat) -#endif /* DEBUG || SFMMU_STAT_GATHER */ - -/* * This first set are funneled to trap() with %tt as the type. * Trap will then either panic or send the user a signal. */ @@ -1081,7 +1067,6 @@ tt1_dtlbmiss: #define DTLB_MISS(table_name) ;\ .global table_name/**/_dtlbmiss ;\ table_name/**/_dtlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\ mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\ ldxa [%g0]ASI_DMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\ ldxa [%g6]ASI_DMMU, %g2 /* g2 = tag access */ ;\ @@ -1097,7 +1082,7 @@ table_name/**/_dtlbmiss: ;\ ldda [%g1]ASI_QUAD_LDD_PHYS, %g4 /* g4 = tag, %g5 data */;\ cmp %g4, %g7 ;\ bne,pn %xcc, sfmmu_tsb_miss_tt /* no 4M TSB, miss */ ;\ - mov %g0, %g3 /* clear 4M tsbe ptr */ ;\ + mov -1, %g3 /* set 4M tsbe ptr to -1 */ ;\ TT_TRACE(trace_tsbhit) /* 2 instr ifdef TRAPTRACE */ ;\ stxa %g5, [%g0]ASI_DTLB_IN /* trapstat expects TTE */ ;\ retry /* in %g5 */ ;\ @@ -1110,12 +1095,14 @@ table_name/**/_dtlbmiss: ;\ unimp 0 ;\ unimp 0 ;\ unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ .align 128 #else /* UTSB_PHYS */ #define DTLB_MISS(table_name) ;\ .global table_name/**/_dtlbmiss ;\ table_name/**/_dtlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\ mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\ ldxa [%g0]ASI_DMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\ ldxa [%g6]ASI_DMMU, %g2 /* g2 = tag access */ ;\ @@ -1129,7 +1116,7 @@ table_name/**/_dtlbmiss: ;\ ldda [%g1]ASI_NQUAD_LD, %g4 /* g4 = tag, %g5 data */ ;\ cmp %g4, %g7 ;\ bne,pn %xcc, sfmmu_tsb_miss_tt /* no 4M TSB, miss */ ;\ - mov %g0, %g3 /* clear 4M tsbe ptr */ ;\ + mov -1, %g3 /* set 4M tsbe ptr to -1 */ ;\ TT_TRACE(trace_tsbhit) /* 2 instr ifdef TRAPTRACE */ ;\ stxa %g5, [%g0]ASI_DTLB_IN /* trapstat expects TTE */ ;\ retry /* in %g5 */ ;\ @@ -1138,12 +1125,15 @@ table_name/**/_dtlbmiss: ;\ unimp 0 ;\ unimp 0 ;\ unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ .align 128 #endif /* UTSB_PHYS */ @@ -1169,7 +1159,6 @@ tt1_itlbmiss: #define ITLB_MISS(table_name) \ .global table_name/**/_itlbmiss ;\ table_name/**/_itlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\ mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\ ldxa [%g0]ASI_IMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\ ldxa [%g6]ASI_IMMU, %g2 /* g2 = tag access */ ;\ @@ -1185,7 +1174,7 @@ table_name/**/_itlbmiss: ;\ ldda [%g1]ASI_QUAD_LDD_PHYS, %g4 /* g4 = tag, g5 = data */ ;\ cmp %g4, %g7 ;\ bne,pn %xcc, sfmmu_tsb_miss_tt /* br if 8k ptr miss */ ;\ - mov %g0, %g3 /* no 4M TSB */ ;\ + mov -1, %g3 /* set 4M TSB ptr to -1 */ ;\ andcc %g5, TTE_EXECPRM_INT, %g0 /* check execute bit */ ;\ bz,pn %icc, exec_fault ;\ nop ;\ @@ -1198,12 +1187,14 @@ table_name/**/_itlbmiss: ;\ unimp 0 ;\ unimp 0 ;\ unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ .align 128 #else /* UTSB_PHYS */ #define ITLB_MISS(table_name) \ .global table_name/**/_itlbmiss ;\ table_name/**/_itlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\ mov MMU_TAG_ACCESS, %g6 /* select tag acc */ ;\ ldxa [%g0]ASI_IMMU_TSB_8K, %g1 /* g1 = tsbe ptr */ ;\ ldxa [%g6]ASI_IMMU, %g2 /* g2 = tag access */ ;\ @@ -1217,7 +1208,7 @@ table_name/**/_itlbmiss: ;\ ldda [%g1]ASI_NQUAD_LD, %g4 /* g4 = tag, g5 = data */ ;\ cmp %g4, %g7 ;\ bne,pn %xcc, sfmmu_tsb_miss_tt /* br if 8k ptr miss */ ;\ - mov %g0, %g3 /* no 4M TSB */ ;\ + mov -1, %g3 /* set 4M TSB ptr to -1 */ ;\ andcc %g5, TTE_EXECPRM_INT, %g0 /* check execute bit */ ;\ bz,pn %icc, exec_fault ;\ nop ;\ @@ -1229,9 +1220,12 @@ table_name/**/_itlbmiss: ;\ unimp 0 ;\ unimp 0 ;\ unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ - unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ + unimp 0 ;\ .align 128 #endif /* UTSB_PHYS */ @@ -2926,21 +2920,6 @@ trace_dataprot: #endif /* TRAPTRACE */ /* - * expects offset into tsbmiss area in %g1 and return pc in %g7 - */ -stat_mmu: - CPU_INDEX(%g5, %g6) - sethi %hi(tsbmiss_area), %g6 - sllx %g5, TSBMISS_SHIFT, %g5 - or %g6, %lo(tsbmiss_area), %g6 - add %g6, %g5, %g6 /* g6 = tsbmiss area */ - ld [%g6 + %g1], %g5 - add %g5, 1, %g5 - jmp %g7 + 4 - st %g5, [%g6 + %g1] - - -/* * fast_trap_done, fast_trap_done_chk_intr: * * Due to the design of UltraSPARC pipeline, pending interrupts are not diff --git a/usr/src/uts/sun4u/sys/machsystm.h b/usr/src/uts/sun4u/sys/machsystm.h index 317b9e78b9..86885bbef4 100644 --- a/usr/src/uts/sun4u/sys/machsystm.h +++ b/usr/src/uts/sun4u/sys/machsystm.h @@ -269,9 +269,7 @@ extern int cpu_setsize; /* Maximum ecache setsize of configured cpus */ * VM */ extern int do_pg_coloring; -extern int do_virtual_coloring; extern int use_page_coloring; -extern int use_virtual_coloring; extern uint_t vac_colors_mask; extern int ndata_alloc_page_freelists(struct memlist *, int); diff --git a/usr/src/uts/sun4u/vm/mach_sfmmu.h b/usr/src/uts/sun4u/vm/mach_sfmmu.h index 66640afb9e..45f6480715 100644 --- a/usr/src/uts/sun4u/vm/mach_sfmmu.h +++ b/usr/src/uts/sun4u/vm/mach_sfmmu.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -70,6 +70,19 @@ extern "C" { #ifdef _ASM /* + * This macro is used to set private secondary context register in + * sfmmu_alloc_ctx(). + * Input: + * cnum : cnum + * arg2 : unused + */ +#define SET_SECCTX(cnum, arg2, tmp1, tmp2) \ + mov MMU_SCONTEXT, tmp1; \ + sethi %hi(FLUSH_ADDR), tmp2; \ + stxa cnum, [tmp1]ASI_MMU_CTX; \ + flush tmp2 + +/* * This macro is used in the MMU code to check if TL should be lowered from * 2 to 1 to pop trapstat's state. See the block comment in trapstat.c * for details. @@ -738,6 +751,12 @@ label/**/1: \ label/**/1: \ /* END CSTYLED */ +/* + * Macro to get SCD shared hme map on sun4v platforms + * (not applicable to sun4u platforms) + */ +#define GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc) + #ifndef TRAPTRACE /* * Same as above, with the following additions: diff --git a/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s b/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s index 2bd7ee732f..18d2c9dc6f 100644 --- a/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s +++ b/usr/src/uts/sun4u/vm/mach_sfmmu_asm.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,7 +86,7 @@ sfmmu_getctx_sec() /* ARGSUSED */ void -sfmmu_setctx_sec(int ctx) +sfmmu_setctx_sec(uint_t ctx) {} /* ARGSUSED */ @@ -509,9 +509,9 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup) 6: ldx [%o0 + SFMMU_ISMBLKPA], %o1 ! copy members of sfmmu CPU_TSBMISS_AREA(%o2, %o3) ! we need to access from stx %o1, [%o2 + TSBMISS_ISMBLKPA] ! sfmmu_tsb_miss into the - lduh [%o0 + SFMMU_FLAGS], %o3 ! per-CPU tsbmiss area. + ldub [%o0 + SFMMU_TTEFLAGS], %o3 ! per-CPU tsbmiss area. stx %o0, [%o2 + TSBMISS_UHATID] - stuh %o3, [%o2 + TSBMISS_HATFLAGS] + stub %o3, [%o2 + TSBMISS_UTTEFLAGS] 3: retl nop diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c index c685046a0b..1abca2c394 100644 --- a/usr/src/uts/sun4v/cpu/niagara.c +++ b/usr/src/uts/sun4v/cpu/niagara.c @@ -144,6 +144,8 @@ cpu_setup(void) * Niagara has a performance counter overflow interrupt */ cpc_has_overflow_intr = 1; + + shctx_on = 0; } #define MB(n) ((n) * 1024 * 1024) diff --git a/usr/src/uts/sun4v/ml/mach_interrupt.s b/usr/src/uts/sun4v/ml/mach_interrupt.s index 4697b55420..b585ac6c68 100644 --- a/usr/src/uts/sun4v/ml/mach_interrupt.s +++ b/usr/src/uts/sun4v/ml/mach_interrupt.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +41,8 @@ #include <sys/machasi.h> #include <sys/scb.h> #include <sys/error.h> +#include <sys/mmu.h> +#include <vm/hat_sfmmu.h> #define INTR_REPORT_SIZE 64 #ifdef TRAPTRACE @@ -75,7 +77,7 @@ cpu_mondo(void) mov CPU_MONDO_Q_TL, %g4 ldxa [%g4]ASI_QUEUE, %g7 ! %g7 = tail ptr cmp %g6, %g7 - be,pn %xcc, 0f ! head == tail + be,pn %xcc, 3f ! head == tail nop CPU_ADDR(%g1,%g2) @@ -133,18 +135,64 @@ cpu_mondo(void) */ set KERNELBASE, %g4 cmp %g5, %g4 - bl,a,pn %xcc, 1f ! branch if bad %pc - nop + bl,pn %xcc, 2f ! branch if bad %pc + nop + + /* + * If this platform supports shared contexts and we are jumping + * to OBP code, then we need to invalidate both contexts to prevent OBP + * from corrupting the shared context registers. + * + * If shared contexts are not supported then the next two instructions + * will be patched with: + * + * jmp %g5 + * nop + * + */ + .global sfmmu_shctx_cpu_mondo_patch +sfmmu_shctx_cpu_mondo_patch: + set OFW_START_ADDR, %g4 ! Check if this a call into OBP? + cmp %g5, %g4 + bl,pt %xcc, 1f + nop + set OFW_END_ADDR, %g4 + cmp %g5, %g4 + bg,pn %xcc, 1f + nop + mov MMU_PCONTEXT, %g3 + ldxa [%g3]ASI_MMU_CTX, %g4 + cmp %g4, INVALID_CONTEXT ! Check if we are in kernel mode + ble,pn %xcc, 1f ! or the primary context is invalid + nop + set INVALID_CONTEXT, %g4 ! Invalidate contexts - compatability + stxa %g4, [%g3]ASI_MMU_CTX ! mode ensures shared contexts are also + mov MMU_SCONTEXT, %g3 ! invalidated. + stxa %g4, [%g3]ASI_MMU_CTX + membar #Sync + mov %o0, %g3 ! save output regs + mov %o1, %g4 + mov %o5, %g6 + clr %o0 ! Invalidate tsbs, set ntsb = 0 + clr %o1 ! and HV_TSB_INFO_PA = 0 + mov MMU_TSB_CTXNON0, %o5 + ta FAST_TRAP ! set TSB info for user process + brnz,a,pn %o0, ptl1_panic + mov PTL1_BAD_HCALL, %g1 + mov %g3, %o0 ! restore output regs + mov %g4, %o1 + mov %g6, %o5 +1: jmp %g5 ! jump to traphandler nop -1: +2: ! invalid trap handler, discard it for now set cpu_mondo_inval, %g4 ldx [%g4], %g5 inc %g5 stx %g5, [%g4] -0: +3: retry /* Never Reached */ SET_SIZE(cpu_mondo) diff --git a/usr/src/uts/sun4v/ml/mach_locore.s b/usr/src/uts/sun4v/ml/mach_locore.s index 1c80729869..fdd6b28992 100644 --- a/usr/src/uts/sun4v/ml/mach_locore.s +++ b/usr/src/uts/sun4v/ml/mach_locore.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -798,7 +798,17 @@ have_win: sethi %hi(FLUSH_ADDR), %g3 stxa %g2, [%g1]ASI_MMU_CTX flush %g3 ! flush required by immu - + ! + ! If shared context support is not enabled, then the next five + ! instructions will be patched with nop instructions. + ! + .global sfmmu_shctx_user_rtt_patch +sfmmu_shctx_user_rtt_patch: + mov MMU_SCONTEXT1, %g1 + ldxa [%g1]ASI_MMU_CTX, %g2 + mov MMU_PCONTEXT1, %g1 + stxa %g2, [%g1]ASI_MMU_CTX + flush %g3 ! ! setup trap regs ! diff --git a/usr/src/uts/sun4v/ml/mach_offsets.in b/usr/src/uts/sun4v/ml/mach_offsets.in index b58ed79949..362d419c82 100644 --- a/usr/src/uts/sun4v/ml/mach_offsets.in +++ b/usr/src/uts/sun4v/ml/mach_offsets.in @@ -18,7 +18,7 @@ \ \ CDDL HEADER END \ -\ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +\ Copyright 2007 Sun Microsystems, Inc. All rights reserved. \ Use is subject to license terms. \ \ offsets.in: input file to produce assym.h using the stabs program @@ -137,12 +137,20 @@ hat HAT_SIZE sfmmu_cpusran sfmmu_tsb sfmmu_ismblkpa - sfmmu_flags + sfmmu_tteflags + sfmmu_rtteflags + sfmmu_srdp + sfmmu_region_map.h_rmap_s.hmeregion_map SFMMU_HMERMAP + sfmmu_scdp sfmmu_hvblock sfmmu_cext sfmmu_ctx_lock sfmmu_ctxs +sf_scd SCD_SIZE + scd_sfmmup + scd_region_map.h_rmap_s.hmeregion_map SCD_HMERMAP + sfmmu_global_stat HATSTAT_SIZE sf_pagefaults HATSTAT_PAGEFAULT sf_uhash_searches HATSTAT_UHASH_SEARCH @@ -153,30 +161,32 @@ sfmmu_global_stat HATSTAT_SIZE sf_hment SFHME_SIZE SFHME_SHIFT hme_tte SFHME_TTE -tsbmiss TSBMISS_SIZE TSBMISS_SHIFT - tsbptr TSBMISS_TSBPTR - tsbptr4m TSBMISS_TSBPTR4M +tsbmiss TSBMISS_SIZE ksfmmup TSBMISS_KHATID usfmmup TSBMISS_UHATID - khashsz TSBMISS_KHASHSZ + usrdp TSBMISS_SHARED_UHATID + tsbptr TSBMISS_TSBPTR + tsbptr4m TSBMISS_TSBPTR4M + tsbscdptr TSBMISS_TSBSCDPTR + tsbscdptr4m TSBMISS_TSBSCDPTR4M + ismblkpa TSBMISS_ISMBLKPA khashstart TSBMISS_KHASHSTART - uhashsz TSBMISS_UHASHSZ uhashstart TSBMISS_UHASHSTART - hat_flags TSBMISS_HATFLAGS - ismblkpa TSBMISS_ISMBLKPA - itlb_misses TSBMISS_ITLBMISS - dtlb_misses TSBMISS_DTLBMISS + khashsz TSBMISS_KHASHSZ + uhashsz TSBMISS_UHASHSZ + uhat_tteflags TSBMISS_UTTEFLAGS + uhat_rtteflags TSBMISS_URTTEFLAGS utsb_misses TSBMISS_UTSBMISS ktsb_misses TSBMISS_KTSBMISS uprot_traps TSBMISS_UPROTS kprot_traps TSBMISS_KPROTS scratch TSBMISS_SCRATCH + shmermap TSBMISS_SHMERMAP + scd_shmermap TSBMISS_SCDSHMERMAP \#define TSB_TAGACC (0 * TSBMISS_SCRATCH_INCR) \#define TSBMISS_HMEBP (1 * TSBMISS_SCRATCH_INCR) \#define TSBMISS_HATID (2 * TSBMISS_SCRATCH_INCR) -\#define TSBMISS_XMMURET (3 * TSBMISS_SCRATCH_INCR) -\#define TSBMISS_XMMUPTR (4 * TSBMISS_SCRATCH_INCR) kpmtsbm KPMTSBM_SIZE KPMTSBM_SHIFT vbase KPMTSBM_VBASE diff --git a/usr/src/uts/sun4v/ml/trap_table.s b/usr/src/uts/sun4v/ml/trap_table.s index 58b38b995e..a1376210ff 100644 --- a/usr/src/uts/sun4v/ml/trap_table.s +++ b/usr/src/uts/sun4v/ml/trap_table.s @@ -113,20 +113,6 @@ #endif /* - * This macro is used to update per cpu mmu stats in perf critical - * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER - * is defined. - */ -#if defined(DEBUG) || defined(SFMMU_STAT_GATHER) -#define HAT_PERCPU_DBSTAT(stat) \ - mov stat, %g1 ;\ - ba stat_mmu ;\ - rd %pc, %g7 -#else -#define HAT_PERCPU_DBSTAT(stat) -#endif /* DEBUG || SFMMU_STAT_GATHER */ - -/* * This first set are funneled to trap() with %tt as the type. * Trap will then either panic or send the user a signal. */ @@ -953,7 +939,6 @@ tt1_dtlbmiss: #define DTLB_MISS(table_name) ;\ .global table_name/**/_dtlbmiss ;\ table_name/**/_dtlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */ ;\ GET_MMU_D_PTAGACC_CTXTYPE(%g2, %g3) /* 8 instr */ ;\ cmp %g3, INVALID_CONTEXT ;\ ble,pn %xcc, sfmmu_kdtlb_miss ;\ @@ -998,7 +983,6 @@ tt1_itlbmiss: #define ITLB_MISS(table_name) \ .global table_name/**/_itlbmiss ;\ table_name/**/_itlbmiss: ;\ - HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */ ;\ GET_MMU_I_PTAGACC_CTXTYPE(%g2, %g3) /* 8 instr */ ;\ cmp %g3, INVALID_CONTEXT ;\ ble,pn %xcc, sfmmu_kitlb_miss ;\ @@ -2779,20 +2763,6 @@ trace_dataprot: ba,pt %xcc, .mmu_exception_end mov T_DATA_EXCEPTION, %g1 SET_SIZE(.dmmu_exception) -/* - * expects offset into tsbmiss area in %g1 and return pc in %g7 - */ -stat_mmu: - CPU_INDEX(%g5, %g6) - sethi %hi(tsbmiss_area), %g6 - sllx %g5, TSBMISS_SHIFT, %g5 - or %g6, %lo(tsbmiss_area), %g6 - add %g6, %g5, %g6 /* g6 = tsbmiss area */ - ld [%g6 + %g1], %g5 - add %g5, 1, %g5 - jmp %g7 + 4 - st %g5, [%g6 + %g1] - /* * fast_trap_done, fast_trap_done_chk_intr: diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c index eaabde53ae..832054a708 100644 --- a/usr/src/uts/sun4v/os/fillsysinfo.c +++ b/usr/src/uts/sun4v/os/fillsysinfo.c @@ -65,6 +65,8 @@ uint64_t ncpu_guest_max; void fill_cpu(md_t *, mde_cookie_t); static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t); +static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t); +static uint64_t get_mmu_shcontexts(md_t *, mde_cookie_t); static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t); static char *construct_isalist(md_t *, mde_cookie_t, char **); static void set_at_flags(char *, int, char **); @@ -307,7 +309,6 @@ found: void cpu_setup_common(char **cpu_module_isa_set) { - extern int disable_delay_tlb_flush, delay_tlb_flush; extern int mmu_exported_pagesize_mask; int nocpus, i; size_t ra_limit; @@ -328,12 +329,6 @@ cpu_setup_common(char **cpu_module_isa_set) if (use_page_coloring) { do_pg_coloring = 1; - if (use_virtual_coloring) { - /* - * XXX Sun4v cpus don't have virtual caches - */ - do_virtual_coloring = 1; - } } /* @@ -344,6 +339,14 @@ cpu_setup_common(char **cpu_module_isa_set) */ mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]); + /* + * Get the number of contexts and tsbs supported. + */ + if (get_mmu_shcontexts(mdp, cpulist[0]) >= MIN_NSHCONTEXTS && + get_mmu_tsbs(mdp, cpulist[0]) >= MIN_NTSBS) { + shctx_on = 1; + } + for (i = 0; i < nocpus; i++) fill_cpu(mdp, cpulist[i]); @@ -428,12 +431,6 @@ cpu_setup_common(char **cpu_module_isa_set) * timestamping. The sun4v require use of %stick. */ traptrace_use_stick = 1; - - /* - * sun4v provides demap_all - */ - if (!disable_delay_tlb_flush) - delay_tlb_flush = 1; } /* @@ -456,6 +453,39 @@ get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie) } /* + * Get the number of tsbs from MD. If absent the default value is 0. + */ +static uint64_t +get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie) +{ + uint64_t number_tsbs; + + if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-#tsbs", + &number_tsbs)) + number_tsbs = 0; + + return (number_tsbs); +} + +/* + * Get the number of shared contexts from MD. This property more accurately + * describes the total number of contexts available, not just "shared contexts". + * If absent the default value is 1, + * + */ +static uint64_t +get_mmu_shcontexts(md_t *mdp, mde_cookie_t cpu_node_cookie) +{ + uint64_t number_contexts; + + if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#shared-contexts", + &number_contexts)) + number_contexts = 0; + + return (number_contexts); +} + +/* * Initalize supported page sizes information. * Set to 0, if the page sizes mask information is absent in MD. */ diff --git a/usr/src/uts/sun4v/sys/machparam.h b/usr/src/uts/sun4v/sys/machparam.h index 3fd034bdc5..3065d190e6 100644 --- a/usr/src/uts/sun4v/sys/machparam.h +++ b/usr/src/uts/sun4v/sys/machparam.h @@ -304,6 +304,7 @@ extern "C" { #define PTL1_BAD_HCALL_UNMAP_PERM_EINVAL 18 #define PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP 19 #define PTL1_BAD_RAISE_TSBEXCP 20 +#define PTL1_NO_SCDTSB8K 21 /* * Defines the max trap level allowed diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h index c7f3b1dd27..8b3e584fef 100644 --- a/usr/src/uts/sun4v/sys/machsystm.h +++ b/usr/src/uts/sun4v/sys/machsystm.h @@ -270,9 +270,7 @@ extern int cpu_setsize; /* Maximum ecache setsize of configured cpus */ * VM */ extern int do_pg_coloring; -extern int do_virtual_coloring; extern int use_page_coloring; -extern int use_virtual_coloring; extern uint_t vac_colors_mask; extern caddr_t get_mmfsa_scratchpad(void); diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h index b4067b25df..697f31d700 100644 --- a/usr/src/uts/sun4v/sys/mmu.h +++ b/usr/src/uts/sun4v/sys/mmu.h @@ -58,6 +58,11 @@ extern "C" { #define MMU_PCONTEXT 0x08 /* primary context number */ #define MMU_SCONTEXT 0x10 /* secondary context number */ +#define MMU_PCONTEXT0 MMU_PCONTEXT /* primary context# 0 */ +#define MMU_PCONTEXT1 0x108 /* primary context# 1 */ +#define MMU_SCONTEXT0 MMU_SCONTEXT /* secondary context# 0 */ +#define MMU_SCONTEXT1 0x110 /* secondary context# 1 */ + /* * Pseudo Synchronous Fault Status Register Layout * @@ -146,6 +151,13 @@ extern "C" { #define MIN_NCTXS_BITS 2 #define MAX_NCTXS (1ull << MAX_NCTXS_BITS) +/* + * MIN_NCONTEXTS and MIN_NTSBS are the minimum number of contexts and tsbs + * necessary for shared context support. + */ +#define MIN_NSHCONTEXTS 1 +#define MIN_NTSBS 4 + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.c b/usr/src/uts/sun4v/vm/mach_sfmmu.c index 3347332094..52e69bceec 100644 --- a/usr/src/uts/sun4v/vm/mach_sfmmu.c +++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,6 +66,7 @@ * External routines and data structures */ extern void sfmmu_cache_flushcolor(int, pfn_t); +extern uint_t mmu_page_sizes; /* * Static routines @@ -79,11 +80,11 @@ caddr_t textva, datava; tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ int enable_bigktsb = 1; +int shtsb4m_first = 0; tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; int bigktsb_nttes = 0; - /* * Controls the logic which enables the use of the * QUAD_LDD_PHYS ASI for TSB accesses. @@ -321,27 +322,27 @@ sfmmu_clear_utsbinfo() } /* - * Invalidate machine specific TSB information, indicates all TSB memory - * is being freed by hat_swapout(). - */ -void -sfmmu_invalidate_tsbinfo(sfmmu_t *sfmmup) -{ - ASSERT(sfmmup->sfmmu_tsb != NULL && - sfmmup->sfmmu_tsb->tsb_flags & TSB_SWAPPED); - - sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; - sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; -} - -/* * Set machine specific TSB information */ void sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) { - struct tsb_info *tsbinfop; - hv_tsb_info_t *tdp; + struct tsb_info *tsbinfop; + hv_tsb_info_t *tdp; + int i; + int j; + int scd = 0; + int tsbord[NHV_TSB_INFO]; + +#ifdef DEBUG + ASSERT(max_mmu_ctxdoms > 0); + if (sfmmup != ksfmmup) { + /* Process should have INVALID_CONTEXT on all MMUs. */ + for (i = 0; i < max_mmu_ctxdoms; i++) { + ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); + } + } +#endif tsbinfop = sfmmup->sfmmu_tsb; if (tsbinfop == NULL) { @@ -349,29 +350,91 @@ sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; return; } - tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[0]; - sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); - sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 1; - tdp->hvtsb_idxpgsz = TTE8K; - tdp->hvtsb_assoc = 1; - tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); - tdp->hvtsb_ctx_index = 0; - tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; - tdp->hvtsb_rsvd = 0; - tdp->hvtsb_pa = tsbinfop->tsb_pa; - if ((tsbinfop = tsbinfop->tsb_next) == NULL) - return; - sfmmup->sfmmu_hvblock.hv_tsb_info_cnt++; - tdp++; - tdp->hvtsb_idxpgsz = TTE4M; - tdp->hvtsb_assoc = 1; - tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); - tdp->hvtsb_ctx_index = 0; - tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; - tdp->hvtsb_rsvd = 0; - tdp->hvtsb_pa = tsbinfop->tsb_pa; - /* Only allow for 2 TSBs */ - ASSERT(tsbinfop->tsb_next == NULL); + + ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); + ASSERT(sfmmup->sfmmu_scdp == NULL || + sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); + + tsbord[0] = 0; + if (sfmmup->sfmmu_scdp == NULL) { + tsbord[1] = 1; + } else { + struct tsb_info *scd8ktsbp = + sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; + ulong_t shared_4mttecnt = 0; + ulong_t priv_4mttecnt = 0; + int scd4mtsb = (scd8ktsbp->tsb_next != NULL); + + for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { + if (scd4mtsb) { + shared_4mttecnt += + sfmmup->sfmmu_scdismttecnt[i] + + sfmmup->sfmmu_scdrttecnt[i]; + } + if (tsbinfop->tsb_next != NULL) { + priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + + sfmmup->sfmmu_ismttecnt[i]; + } + } + if (tsbinfop->tsb_next == NULL) { + if (shared_4mttecnt) { + tsbord[1] = 2; + tsbord[2] = 1; + } else { + tsbord[1] = 1; + tsbord[2] = 2; + } + } else if (priv_4mttecnt) { + if (shared_4mttecnt) { + tsbord[1] = shtsb4m_first ? 2 : 1; + tsbord[2] = 3; + tsbord[3] = shtsb4m_first ? 1 : 2; + } else { + tsbord[1] = 1; + tsbord[2] = 2; + tsbord[3] = 3; + } + } else if (shared_4mttecnt) { + tsbord[1] = 3; + tsbord[2] = 2; + tsbord[3] = 1; + } else { + tsbord[1] = 2; + tsbord[2] = 1; + tsbord[3] = 3; + } + } + + ASSERT(tsbinfop != NULL); + for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { + if (i == 0) { + tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; + sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); + } + + + j = tsbord[i]; + + tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; + + ASSERT(tsbinfop->tsb_ttesz_mask != 0); + tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; + tdp->hvtsb_assoc = 1; + tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); + tdp->hvtsb_ctx_index = scd; + tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; + tdp->hvtsb_rsvd = 0; + tdp->hvtsb_pa = tsbinfop->tsb_pa; + + tsbinfop = tsbinfop->tsb_next; + if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { + tsbinfop = + sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; + scd = 1; + } + } + sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; + ASSERT(tsbinfop == NULL); } /* diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.h b/usr/src/uts/sun4v/vm/mach_sfmmu.h index e91c4cdd0f..06ae00fc32 100644 --- a/usr/src/uts/sun4v/vm/mach_sfmmu.h +++ b/usr/src/uts/sun4v/vm/mach_sfmmu.h @@ -52,7 +52,7 @@ extern "C" { /* * Hypervisor TSB info */ -#define NHV_TSB_INFO 2 +#define NHV_TSB_INFO 4 #ifndef _ASM @@ -67,6 +67,20 @@ struct hv_tsb_block { #ifdef _ASM /* + * This macro is used to set private/shared secondary context register in + * sfmmu_alloc_ctx(). + * Input: + * cnum = cnum + * is_shctx = sfmmu private/shared flag (0: private, 1: shared) + */ +#define SET_SECCTX(cnum, is_shctx, tmp1, tmp2) \ + mov MMU_SCONTEXT, tmp1; \ + movrnz is_shctx, MMU_SCONTEXT1, tmp1; \ + sethi %hi(FLUSH_ADDR), tmp2; \ + stxa cnum, [tmp1]ASI_MMU_CTX; /* set 2nd ctx reg. */ \ + flush tmp2; \ + +/* * This macro is used in the MMU code to check if TL should be lowered from * 2 to 1 to pop trapstat's state. See the block comment in trapstat.c * for details. @@ -382,10 +396,13 @@ label/**/2: \ /* - * Load TSB base register into a dedicated scratchpad register. + * Load TSB base register into a dedicated scratchpad register + * for private contexts. + * Load TSB base register to TSBMISS area for shared contexts. * This register contains utsb_pabase in bits 63:13, and TSB size * code in bits 2:0. * + * For private context * In: * tsbreg = value to load (ro) * regnum = constant or register @@ -399,7 +416,24 @@ label/**/2: \ stxa tsbreg, [tmp1]ASI_SCRATCHPAD /* save tsbreg */ /* - * Get TSB base register from the scratchpad + * Load TSB base register to TSBMISS area for shared contexts. + * This register contains utsb_pabase in bits 63:13, and TSB size + * code in bits 2:0. + * + * In: + * tsbmiss = pointer to tsbmiss area + * tsbmissoffset = offset to right tsb pointer + * tsbreg = value to load (ro) + * Out: + * Specified tsbmiss area updated + * + */ +#define SET_UTSBREG_SHCTX(tsbmiss, tsbmissoffset, tsbreg) \ + stx tsbreg, [tsbmiss + tsbmissoffset] /* save tsbreg */ + +/* + * Get TSB base register from the scratchpad for + * private contexts * * In: * regnum = constant or register @@ -411,6 +445,20 @@ label/**/2: \ mov regnum, tsbreg; \ ldxa [tsbreg]ASI_SCRATCHPAD, tsbreg +/* + * Get TSB base register from the scratchpad for + * shared contexts + * + * In: + * tsbmiss = pointer to tsbmiss area + * tsbmissoffset = offset to right tsb pointer + * tsbreg = scratch + * Out: + * tsbreg = tsbreg from the specified scratchpad register + */ +#define GET_UTSBREG_SHCTX(tsbmiss, tsbmissoffset, tsbreg) \ + ldx [tsbmiss + tsbmissoffset], tsbreg + /* * Get the location of the TSB entry in the first TSB to probe @@ -563,6 +611,125 @@ label/**/1: \ /* END CSTYLED */ +/* + * Get the location in the 3rd TSB of the tsbe for this fault. + * The 3rd TSB corresponds to the shared context, and is used + * for 8K - 512k pages. + * + * In: + * tagacc = tag access register (not clobbered) + * tsbe = TSB base register + * tmp1, tmp2 = scratch registers + * Out: + * tsbe = pointer to the tsbe in the 3rd TSB + */ +#define GET_3RD_TSBE_PTR(tagacc, tsbe, tmp1, tmp2) \ + and tsbe, TSB_SOFTSZ_MASK, tmp2; /* tmp2=szc */ \ + andn tsbe, TSB_SOFTSZ_MASK, tsbe; /* tsbbase */ \ + mov TSB_ENTRIES(0), tmp1; /* nentries in TSB size 0 */ \ + sllx tmp1, tmp2, tmp1; /* tmp1 = nentries in TSB */ \ + sub tmp1, 1, tmp1; /* mask = nentries - 1 */ \ + srlx tagacc, MMU_PAGESHIFT, tmp2; \ + and tmp2, tmp1, tmp1; /* tsbent = virtpage & mask */ \ + sllx tmp1, TSB_ENTRY_SHIFT, tmp1; /* entry num --> ptr */ \ + add tsbe, tmp1, tsbe /* add entry offset to TSB base */ + + +/* + * Get the location in the 4th TSB of the tsbe for this fault. + * The 4th TSB is for the shared context. It is used for 4M - 256M pages. + * + * In: + * tagacc = tag access register (not clobbered) + * tsbe = TSB base register + * tmp1, tmp2 = scratch registers + * Out: + * tsbe = pointer to the tsbe in the 4th TSB + */ +#define GET_4TH_TSBE_PTR(tagacc, tsbe, tmp1, tmp2) \ + and tsbe, TSB_SOFTSZ_MASK, tmp2; /* tmp2=szc */ \ + andn tsbe, TSB_SOFTSZ_MASK, tsbe; /* tsbbase */ \ + mov TSB_ENTRIES(0), tmp1; /* nentries in TSB size 0 */ \ + sllx tmp1, tmp2, tmp1; /* tmp1 = nentries in TSB */ \ + sub tmp1, 1, tmp1; /* mask = nentries - 1 */ \ + srlx tagacc, MMU_PAGESHIFT4M, tmp2; \ + and tmp2, tmp1, tmp1; /* tsbent = virtpage & mask */ \ + sllx tmp1, TSB_ENTRY_SHIFT, tmp1; /* entry num --> ptr */ \ + add tsbe, tmp1, tsbe /* add entry offset to TSB base */ + +/* + * Copy the sfmmu_region_map or scd_region_map to the tsbmiss + * shmermap or scd_shmermap, from sfmmu_load_mmustate. + */ +#define SET_REGION_MAP(rgn_map, tsbmiss_map, cnt, tmp, label) \ + /* BEGIN CSTYLED */ \ +label: ;\ + ldx [rgn_map], tmp ;\ + dec cnt ;\ + add rgn_map, CLONGSIZE, rgn_map ;\ + stx tmp, [tsbmiss_map] ;\ + brnz,pt cnt, label ;\ + add tsbmiss_map, CLONGSIZE, tsbmiss_map \ + /* END CSTYLED */ + +/* + * If there is no scd, then zero the tsbmiss scd_shmermap, + * from sfmmu_load_mmustate. + */ +#define ZERO_REGION_MAP(tsbmiss_map, cnt, label) \ + /* BEGIN CSTYLED */ \ +label: ;\ + dec cnt ;\ + stx %g0, [tsbmiss_map] ;\ + brnz,pt cnt, label ;\ + add tsbmiss_map, CLONGSIZE, tsbmiss_map \ + /* END CSTYLED */ + +/* + * Set hmemisc to 1 if the shared hme is also part of an scd. + * In: + * tsbarea = tsbmiss area (not clobbered) + * hmeblkpa = hmeblkpa + hmentoff + SFHME_TTE (not clobbered) + * hmentoff = hmentoff + SFHME_TTE = tte offset(clobbered) + * Out: + * use_shctx = 1 if shme is in scd and 0 otherwise + */ +#define GET_SCDSHMERMAP(tsbarea, hmeblkpa, hmentoff, use_shctx) \ + /* BEGIN CSTYLED */ \ + sub hmeblkpa, hmentoff, hmentoff /* hmentofff = hmeblkpa */ ;\ + add hmentoff, HMEBLK_TAG, hmentoff ;\ + ldxa [hmentoff]ASI_MEM, hmentoff /* read 1st part of tag */ ;\ + and hmentoff, HTAG_RID_MASK, hmentoff /* mask off rid */ ;\ + and hmentoff, BT_ULMASK, use_shctx /* mask bit index */ ;\ + srlx hmentoff, BT_ULSHIFT, hmentoff /* extract word */ ;\ + sllx hmentoff, CLONGSHIFT, hmentoff /* index */ ;\ + add tsbarea, hmentoff, hmentoff /* add to tsbarea */ ;\ + ldx [hmentoff + TSBMISS_SCDSHMERMAP], hmentoff /* scdrgn */ ;\ + srlx hmentoff, use_shctx, use_shctx ;\ + and use_shctx, 0x1, use_shctx \ + /* END CSTYLED */ + +/* + * 1. Get ctx1. The traptype is supplied by caller. + * 2. If iTSB miss, store in MMFSA_I_CTX + * 3. if dTSB miss, store in MMFSA_D_CTX + * 4. Thus the [D|I]TLB_STUFF will work as expected. + */ +#define SAVE_CTX1(traptype, ctx1, tmp, label) \ + /* BEGIN CSTYLED */ \ + mov MMU_SCONTEXT1, tmp ;\ + ldxa [tmp]ASI_MMU_CTX, ctx1 ;\ + MMU_FAULT_STATUS_AREA(tmp) ;\ + cmp traptype, FAST_IMMU_MISS_TT ;\ + be,a,pn %icc, label ;\ + stx ctx1, [tmp + MMFSA_I_CTX] ;\ + cmp traptype, T_INSTR_MMU_MISS ;\ + be,a,pn %icc, label ;\ + stx ctx1, [tmp + MMFSA_I_CTX] ;\ + stx ctx1, [tmp + MMFSA_D_CTX] ;\ +label: + /* END CSTYLED */ + #endif /* _ASM */ #ifdef __cplusplus diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s index bbec4ee675..0dc3dc5f44 100644 --- a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s +++ b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s @@ -73,7 +73,7 @@ sfmmu_getctx_sec() /* ARGSUSED */ void -sfmmu_setctx_sec(int ctx) +sfmmu_setctx_sec(uint_t ctx) {} /* ARGSUSED */ @@ -154,8 +154,8 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup) ta FAST_TRAP brz,pt %o0, 5f nop - ba panic_bad_hcall - mov MMU_DEMAP_ALL, %o1 + ba ptl1_panic /* bad HV call */ + mov PTL1_BAD_RAISE_TSBEXCP, %g1 5: mov %g3, %o0 mov %g4, %o1 @@ -245,17 +245,17 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup) stxa %o0, [%o1]ASI_MMU_CTX /* set 2nd context reg. */ flush %o4 - /* - * if the routine is entered with intr enabled, then enable intr now. - * otherwise, keep intr disabled, return without enabing intr. - * %g1 - old intr state - */ - btst PSTATE_IE, %g1 - bnz,a,pt %icc, 2f - wrpr %g0, %g1, %pstate /* enable interrupts */ -2: retl - nop - SET_SIZE(sfmmu_setctx_sec) + /* + * if the routine is entered with intr enabled, then enable intr now. + * otherwise, keep intr disabled, return without enabing intr. + * %g1 - old intr state + */ + btst PSTATE_IE, %g1 + bnz,a,pt %icc, 2f + wrpr %g0, %g1, %pstate /* enable interrupts */ +2: retl + nop + SET_SIZE(sfmmu_setctx_sec) /* * set ktsb_phys to 1 if the processor supports ASI_QUAD_LDD_PHYS. @@ -285,9 +285,36 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup) sethi %hi(ksfmmup), %o3 ldx [%o3 + %lo(ksfmmup)], %o3 cmp %o3, %o0 - be,pn %xcc, 3f ! if kernel as, do nothing + be,pn %xcc, 7f ! if kernel as, do nothing + nop + + set MMU_SCONTEXT, %o3 + ldxa [%o3]ASI_MMU_CTX, %o5 + + cmp %o5, INVALID_CONTEXT ! ctx is invalid? + bne,pt %icc, 1f nop + CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area + stx %o0, [%o2 + TSBMISS_UHATID] + stx %g0, [%o2 + TSBMISS_SHARED_UHATID] +#ifdef DEBUG + /* check if hypervisor/hardware should handle user TSB */ + sethi %hi(hv_use_non0_tsb), %o2 + ld [%o2 + %lo(hv_use_non0_tsb)], %o2 + brz,pn %o2, 0f + nop +#endif /* DEBUG */ + clr %o0 ! ntsb = 0 for invalid ctx + clr %o1 ! HV_TSB_INFO_PA = 0 if inv ctx + mov MMU_TSB_CTXNON0, %o5 + ta FAST_TRAP ! set TSB info for user process + brnz,a,pn %o0, panic_bad_hcall + mov MMU_TSB_CTXNON0, %o1 +0: + retl + nop +1: /* * We need to set up the TSB base register, tsbmiss * area, and pass the TSB information into the hypervisor @@ -307,52 +334,106 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup) 2: SET_UTSBREG(SCRATCHPAD_UTSBREG2, %o2, %o3) + /* make 3rd and 4th TSB */ + CPU_TSBMISS_AREA(%o4, %o3) ! %o4 = tsbmiss area + + ldx [%o0 + SFMMU_SCDP], %g2 ! %g2 = sfmmu_scd + brz,pt %g2, 3f + mov -1, %o2 ! use -1 if no third TSB + + ldx [%g2 + SCD_SFMMUP], %g3 ! %g3 = scdp->scd_sfmmup + ldx [%g3 + SFMMU_TSB], %o1 ! %o1 = first scd tsbinfo + brz,pn %o1, 9f + nop ! panic if no third TSB + + /* make 3rd UTSBREG */ + MAKE_UTSBREG(%o1, %o2, %o3) ! %o2 = user tsbreg +3: + SET_UTSBREG_SHCTX(%o4, TSBMISS_TSBSCDPTR, %o2) + + brz,pt %g2, 4f + mov -1, %o2 ! use -1 if no 3rd or 4th TSB + + brz,pt %o1, 4f + mov -1, %o2 ! use -1 if no 3rd or 4th TSB + ldx [%o1 + TSBINFO_NEXTPTR], %g2 ! %g2 = second scd tsbinfo + brz,pt %g2, 4f + mov -1, %o2 ! use -1 if no 4th TSB + + /* make 4th UTSBREG */ + MAKE_UTSBREG(%g2, %o2, %o3) ! %o2 = user tsbreg +4: + SET_UTSBREG_SHCTX(%o4, TSBMISS_TSBSCDPTR4M, %o2) + #ifdef DEBUG /* check if hypervisor/hardware should handle user TSB */ sethi %hi(hv_use_non0_tsb), %o2 ld [%o2 + %lo(hv_use_non0_tsb)], %o2 - brz,pn %o2, 5f + brz,pn %o2, 6f nop #endif /* DEBUG */ CPU_ADDR(%o2, %o4) ! load CPU struct addr to %o2 using %o4 ldub [%o2 + CPU_TSTAT_FLAGS], %o1 ! load cpu_tstat_flag to %o1 - - /* - * %o0 = sfmmup - * %o2 = returned sfmmu cnum on this CPU - * %o4 = scratch - */ - SFMMU_CPU_CNUM(%o0, %o2, %o4) - mov %o5, %o4 ! preserve %o5 for resume mov %o0, %o3 ! preserve %o0 btst TSTAT_TLB_STATS, %o1 - bnz,a,pn %icc, 4f ! ntsb = 0 if TLB stats enabled + bnz,a,pn %icc, 5f ! ntsb = 0 if TLB stats enabled clr %o0 - cmp %o2, INVALID_CONTEXT - be,a,pn %icc, 4f - clr %o0 ! ntsb = 0 for invalid ctx + ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_CNT], %o0 -4: - ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_PA], %o1 +5: + ldx [%o3 + SFMMU_HVBLOCK + HV_TSB_INFO_PA], %o1 mov MMU_TSB_CTXNON0, %o5 ta FAST_TRAP ! set TSB info for user process brnz,a,pn %o0, panic_bad_hcall mov MMU_TSB_CTXNON0, %o1 mov %o3, %o0 ! restore %o0 - mov %o4, %o5 ! restore %o5 -5: +6: ldx [%o0 + SFMMU_ISMBLKPA], %o1 ! copy members of sfmmu - CPU_TSBMISS_AREA(%o2, %o3) ! we need to access from + CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area stx %o1, [%o2 + TSBMISS_ISMBLKPA] ! sfmmu_tsb_miss into the - lduh [%o0 + SFMMU_FLAGS], %o3 ! per-CPU tsbmiss area. + ldub [%o0 + SFMMU_TTEFLAGS], %o3 ! per-CPU tsbmiss area. + ldub [%o0 + SFMMU_RTTEFLAGS], %o4 + ldx [%o0 + SFMMU_SRDP], %o1 stx %o0, [%o2 + TSBMISS_UHATID] - stuh %o3, [%o2 + TSBMISS_HATFLAGS] - -3: retl + stub %o3, [%o2 + TSBMISS_UTTEFLAGS] + stub %o4, [%o2 + TSBMISS_URTTEFLAGS] + stx %o1, [%o2 + TSBMISS_SHARED_UHATID] + brz,pn %o1, 7f ! check for sfmmu_srdp + add %o0, SFMMU_HMERMAP, %o1 + add %o2, TSBMISS_SHMERMAP, %o2 + mov SFMMU_HMERGNMAP_WORDS, %o3 + ! set tsbmiss shmermap + SET_REGION_MAP(%o1, %o2, %o3, %o4, load_shme_mmustate) + + ldx [%o0 + SFMMU_SCDP], %o4 ! %o4 = sfmmu_scd + CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area + mov SFMMU_HMERGNMAP_WORDS, %o3 + brnz,pt %o4, 8f ! check for sfmmu_scdp else + add %o2, TSBMISS_SCDSHMERMAP, %o2 ! zero tsbmiss scd_shmermap + ZERO_REGION_MAP(%o2, %o3, zero_scd_mmustate) +7: + retl nop - SET_SIZE(sfmmu_load_mmustate) +8: ! set tsbmiss scd_shmermap + add %o4, SCD_HMERMAP, %o1 + SET_REGION_MAP(%o1, %o2, %o3, %o4, load_scd_mmustate) + retl + nop +9: + sethi %hi(panicstr), %g1 ! panic if no 3rd TSB + ldx [%g1 + %lo(panicstr)], %g1 + tst %g1 + + bnz,pn %xcc, 7b + nop + + sethi %hi(sfmmu_panic10), %o0 + call panic + or %o0, %lo(sfmmu_panic10), %o0 + SET_SIZE(sfmmu_load_mmustate) + #endif /* lint */ #if defined(lint) |
