summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPavel Tatashin <Pavel.Tatashin@Sun.COM>2010-02-19 10:18:21 -0800
committerPavel Tatashin <Pavel.Tatashin@Sun.COM>2010-02-19 10:18:21 -0800
commitd2365b013d4199b49b3a1438d57aea23423e02ad (patch)
tree378aab551ffbefc8c2bedeac6f3022460e6fe464 /usr/src
parent6e06433809d72b73b86972faff81c97bd893e960 (diff)
downloadillumos-joyent-d2365b013d4199b49b3a1438d57aea23423e02ad.tar.gz
6892591 per-MMU context id domains for sun4v
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_asm.s16
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.c214
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.h26
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c243
-rw-r--r--usr/src/uts/sun4v/os/mach_descrip.c24
-rw-r--r--usr/src/uts/sun4v/os/mach_startup.c14
-rw-r--r--usr/src/uts/sun4v/os/suspend.c30
-rw-r--r--usr/src/uts/sun4v/sys/mach_descrip.h5
8 files changed, 509 insertions, 63 deletions
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_asm.s b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
index 78bc5d21b7..55e0083767 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_asm.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -635,6 +635,13 @@ sfmmu_panic11:
! load global mmu_ctxp info
ldx [%o2 + CPU_MMU_CTXP], %o3 ! %o3 = mmu_ctx_t ptr
+
+#ifdef sun4v
+ /* During suspend on sun4v, context domains can be temporary removed */
+ brz,a,pn %o3, 0f
+ nop
+#endif
+
lduw [%o2 + CPU_MMU_IDX], %g2 ! %g2 = mmu index
! load global mmu_ctxp gnum
@@ -687,6 +694,13 @@ sfmmu_panic11:
! (invalid HAT cnum) && (allocflag == 1)
ba,pt %icc, 2f
nop
+#ifdef sun4v
+0:
+ set INVALID_CONTEXT, %o1
+ membar #LoadStore|#StoreStore
+ ba,pt %icc, 8f
+ mov %g0, %g4 ! %g4 = ret = 0
+#endif
1:
! valid HAT cnum, check gnum
cmp %g5, %o4
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
index d413685e2d..6156017a5e 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -532,7 +532,7 @@ static pgcnt_t ism_tsb_entries(sfmmu_t *, int szc);
extern void sfmmu_setup_tsbinfo(sfmmu_t *);
extern void sfmmu_clear_utsbinfo(void);
-static void sfmmu_ctx_wrap_around(mmu_ctx_t *);
+static void sfmmu_ctx_wrap_around(mmu_ctx_t *, boolean_t);
extern int vpm_enable;
@@ -1112,19 +1112,11 @@ hat_init(void)
* a set_platform_defaults() or does not choose to modify
* max_mmu_ctxdoms, it gets one MMU context domain for every CPU.
*
- * For sun4v, there will be one global context domain, this is to
- * avoid the ldom cpu substitution problem.
- *
* For all platforms that have CPUs sharing MMUs, this
* value must be defined.
*/
- if (max_mmu_ctxdoms == 0) {
-#ifndef sun4v
+ if (max_mmu_ctxdoms == 0)
max_mmu_ctxdoms = max_ncpus;
-#else /* sun4v */
- max_mmu_ctxdoms = 1;
-#endif /* sun4v */
- }
size = max_mmu_ctxdoms * sizeof (mmu_ctx_t *);
mmu_ctxs_tbl = kmem_zalloc(size, KM_SLEEP);
@@ -1611,26 +1603,16 @@ sfmmu_mmu_kstat_create(mmu_ctx_t *mmu_ctxp)
* specify that interface, then the function below is used instead to return
* default information. The defaults are as follows:
*
- * - For sun4u systems there's one MMU context domain per CPU.
- * This default is used by all sun4u systems except OPL. OPL systems
- * provide platform specific interface to map CPU ids to MMU ids
- * because on OPL more than 1 CPU shares a single MMU.
- * Note that on sun4v, there is one global context domain for
- * the entire system. This is to avoid running into potential problem
- * with ldom physical cpu substitution feature.
* - The number of MMU context IDs supported on any CPU in the
* system is 8K.
+ * - There is one MMU context domain per CPU.
*/
/*ARGSUSED*/
static void
sfmmu_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *infop)
{
infop->mmu_nctxs = nctxs;
-#ifndef sun4v
infop->mmu_idx = cpu[cpuid]->cpu_seqid;
-#else /* sun4v */
- infop->mmu_idx = 0;
-#endif /* sun4v */
}
/*
@@ -1676,6 +1658,7 @@ sfmmu_cpu_init(cpu_t *cp)
mmu_ctxs_tbl[info.mmu_idx] = mmu_ctxp;
} else {
ASSERT(mmu_ctxp->mmu_idx == info.mmu_idx);
+ ASSERT(mmu_ctxp->mmu_nctxs <= info.mmu_nctxs);
}
/*
@@ -1693,6 +1676,24 @@ sfmmu_cpu_init(cpu_t *cp)
mutex_exit(&mmu_ctxp->mmu_lock);
}
+static void
+sfmmu_ctxdom_free(mmu_ctx_t *mmu_ctxp)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(!MUTEX_HELD(&mmu_ctxp->mmu_lock));
+
+ mutex_destroy(&mmu_ctxp->mmu_lock);
+
+ if (mmu_ctxp->mmu_kstat)
+ kstat_delete(mmu_ctxp->mmu_kstat);
+
+ /* mmu_saved_gnum is protected by the cpu_lock. */
+ if (mmu_saved_gnum < mmu_ctxp->mmu_gnum)
+ mmu_saved_gnum = mmu_ctxp->mmu_gnum;
+
+ kmem_cache_free(mmuctxdom_cache, mmu_ctxp);
+}
+
/*
* Called to perform MMU context-related cleanup for a CPU.
*/
@@ -1718,23 +1719,165 @@ sfmmu_cpu_cleanup(cpu_t *cp)
if (--mmu_ctxp->mmu_ncpus == 0) {
mmu_ctxs_tbl[mmu_ctxp->mmu_idx] = NULL;
mutex_exit(&mmu_ctxp->mmu_lock);
- mutex_destroy(&mmu_ctxp->mmu_lock);
+ sfmmu_ctxdom_free(mmu_ctxp);
+ return;
+ }
- if (mmu_ctxp->mmu_kstat)
- kstat_delete(mmu_ctxp->mmu_kstat);
+ mutex_exit(&mmu_ctxp->mmu_lock);
+}
- /* mmu_saved_gnum is protected by the cpu_lock. */
- if (mmu_saved_gnum < mmu_ctxp->mmu_gnum)
- mmu_saved_gnum = mmu_ctxp->mmu_gnum;
+uint_t
+sfmmu_ctxdom_nctxs(int idx)
+{
+ return (mmu_ctxs_tbl[idx]->mmu_nctxs);
+}
+
+#ifdef sun4v
+/*
+ * sfmmu_ctxdoms_* is an interface provided to help keep context domains
+ * consistant after suspend/resume on system that can resume on a different
+ * hardware than it was suspended.
+ *
+ * sfmmu_ctxdom_lock(void) locks all context domains and prevents new contexts
+ * from being allocated. It acquires all hat_locks, which blocks most access to
+ * context data, except for a few cases that are handled separately or are
+ * harmless. It wraps each domain to increment gnum and invalidate on-CPU
+ * contexts, and forces cnum to its max. As a result of this call all user
+ * threads that are running on CPUs trap and try to perform wrap around but
+ * can't because hat_locks are taken. Threads that were not on CPUs but started
+ * by scheduler go to sfmmu_alloc_ctx() to aquire context without checking
+ * hat_lock, but fail, because cnum == nctxs, and therefore also trap and block
+ * on hat_lock trying to wrap. sfmmu_ctxdom_lock() must be called before CPUs
+ * are paused, else it could deadlock acquiring locks held by paused CPUs.
+ *
+ * sfmmu_ctxdoms_remove() removes context domains from every CPUs and records
+ * the CPUs that had them. It must be called after CPUs have been paused. This
+ * ensures that no threads are in sfmmu_alloc_ctx() accessing domain data,
+ * because pause_cpus sends a mondo interrupt to every CPU, and sfmmu_alloc_ctx
+ * runs with interrupts disabled. When CPUs are later resumed, they may enter
+ * sfmmu_alloc_ctx, but it will check for CPU_MMU_CTXP = NULL and immediately
+ * return failure. Or, they will be blocked trying to acquire hat_lock. Thus
+ * after sfmmu_ctxdoms_remove returns, we are guaranteed that no one is
+ * accessing the old context domains.
+ *
+ * sfmmu_ctxdoms_update(void) frees space used by old context domains and
+ * allocates new context domains based on hardware layout. It initializes
+ * every CPU that had context domain before migration to have one again.
+ * sfmmu_ctxdoms_update must be called after CPUs are resumed, else it
+ * could deadlock acquiring locks held by paused CPUs.
+ *
+ * sfmmu_ctxdoms_unlock(void) releases all hat_locks after which user threads
+ * acquire new context ids and continue execution.
+ *
+ * Therefore functions should be called in the following order:
+ * suspend_routine()
+ * sfmmu_ctxdom_lock()
+ * pause_cpus()
+ * suspend()
+ * if (suspend failed)
+ * sfmmu_ctxdom_unlock()
+ * ...
+ * sfmmu_ctxdom_remove()
+ * resume_cpus()
+ * sfmmu_ctxdom_update()
+ * sfmmu_ctxdom_unlock()
+ */
+static cpuset_t sfmmu_ctxdoms_pset;
- kmem_cache_free(mmuctxdom_cache, mmu_ctxp);
+void
+sfmmu_ctxdoms_remove()
+{
+ processorid_t id;
+ cpu_t *cp;
- return;
+ /*
+ * Record the CPUs that have domains in sfmmu_ctxdoms_pset, so they can
+ * be restored post-migration. A CPU may be powered off and not have a
+ * domain, for example.
+ */
+ CPUSET_ZERO(sfmmu_ctxdoms_pset);
+
+ for (id = 0; id < NCPU; id++) {
+ if ((cp = cpu[id]) != NULL && CPU_MMU_CTXP(cp) != NULL) {
+ CPUSET_ADD(sfmmu_ctxdoms_pset, id);
+ CPU_MMU_CTXP(cp) = NULL;
+ }
}
+}
- mutex_exit(&mmu_ctxp->mmu_lock);
+void
+sfmmu_ctxdoms_lock(void)
+{
+ int idx;
+ mmu_ctx_t *mmu_ctxp;
+
+ sfmmu_hat_lock_all();
+
+ /*
+ * At this point, no thread can be in sfmmu_ctx_wrap_around, because
+ * hat_lock is always taken before calling it.
+ *
+ * For each domain, set mmu_cnum to max so no more contexts can be
+ * allocated, and wrap to flush on-CPU contexts and force threads to
+ * acquire a new context when we later drop hat_lock after migration.
+ * Setting mmu_cnum may race with sfmmu_alloc_ctx which also sets cnum,
+ * but the latter uses CAS and will miscompare and not overwrite it.
+ */
+ kpreempt_disable(); /* required by sfmmu_ctx_wrap_around */
+ for (idx = 0; idx < max_mmu_ctxdoms; idx++) {
+ if ((mmu_ctxp = mmu_ctxs_tbl[idx]) != NULL) {
+ mutex_enter(&mmu_ctxp->mmu_lock);
+ mmu_ctxp->mmu_cnum = mmu_ctxp->mmu_nctxs;
+ /* make sure updated cnum visible */
+ membar_enter();
+ mutex_exit(&mmu_ctxp->mmu_lock);
+ sfmmu_ctx_wrap_around(mmu_ctxp, B_FALSE);
+ }
+ }
+ kpreempt_enable();
}
+void
+sfmmu_ctxdoms_unlock(void)
+{
+ sfmmu_hat_unlock_all();
+}
+
+void
+sfmmu_ctxdoms_update(void)
+{
+ processorid_t id;
+ cpu_t *cp;
+ uint_t idx;
+ mmu_ctx_t *mmu_ctxp;
+
+ /*
+ * Free all context domains. As side effect, this increases
+ * mmu_saved_gnum to the maximum gnum over all domains, which is used to
+ * init gnum in the new domains, which therefore will be larger than the
+ * sfmmu gnum for any process, guaranteeing that every process will see
+ * a new generation and allocate a new context regardless of what new
+ * domain it runs in.
+ */
+ mutex_enter(&cpu_lock);
+
+ for (idx = 0; idx < max_mmu_ctxdoms; idx++) {
+ if (mmu_ctxs_tbl[idx] != NULL) {
+ mmu_ctxp = mmu_ctxs_tbl[idx];
+ mmu_ctxs_tbl[idx] = NULL;
+ sfmmu_ctxdom_free(mmu_ctxp);
+ }
+ }
+
+ for (id = 0; id < NCPU; id++) {
+ if (CPU_IN_SET(sfmmu_ctxdoms_pset, id) &&
+ (cp = cpu[id]) != NULL)
+ sfmmu_cpu_init(cp);
+ }
+ mutex_exit(&cpu_lock);
+}
+#endif
+
/*
* Hat_setup, makes an address space context the current active one.
* In sfmmu this translates to setting the secondary context with the
@@ -9745,7 +9888,7 @@ sfmmu_get_ctx(sfmmu_t *sfmmup)
* Do a wrap-around if cnum reaches the max # cnum supported by a MMU.
*/
if (mmu_ctxp->mmu_cnum == mmu_ctxp->mmu_nctxs)
- sfmmu_ctx_wrap_around(mmu_ctxp);
+ sfmmu_ctx_wrap_around(mmu_ctxp, B_TRUE);
/*
* Let the MMU set up the page sizes to use for
@@ -9786,7 +9929,7 @@ sfmmu_get_ctx(sfmmu_t *sfmmup)
* next generation and start from 2.
*/
static void
-sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp)
+sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp, boolean_t reset_cnum)
{
/* caller must have disabled the preemption */
@@ -9820,7 +9963,7 @@ sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp)
/* xcall to others on the same MMU to invalidate ctx */
cpuset = mmu_ctxp->mmu_cpuset;
- ASSERT(CPU_IN_SET(cpuset, CPU->cpu_id));
+ ASSERT(CPU_IN_SET(cpuset, CPU->cpu_id) || !reset_cnum);
CPUSET_DEL(cpuset, CPU->cpu_id);
CPUSET_AND(cpuset, cpu_ready_set);
@@ -9857,7 +10000,8 @@ sfmmu_ctx_wrap_around(mmu_ctx_t *mmu_ctxp)
}
/* reset mmu cnum, skips cnum 0 and 1 */
- mmu_ctxp->mmu_cnum = NUM_LOCKED_CTXS;
+ if (reset_cnum == B_TRUE)
+ mmu_ctxp->mmu_cnum = NUM_LOCKED_CTXS;
done:
mutex_exit(&mmu_ctxp->mmu_lock);
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
index f075093c2f..af9da9e4eb 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -554,10 +554,10 @@ typedef enum mmu_ctx_stat_types {
* is protected via CAS.
* mmu_nctxs
* The max number of context IDs supported on every CPU in this
- * MMU context domain. It is 8K except for Rock where it is 64K.
- * This is needed here in case the system supports mixed type of
- * processors/MMUs. It also helps to make ctx switch code access
- * fewer cache lines i.e. no need to retrieve it from some global nctxs.
+ * MMU context domain. This is needed here in case the system supports
+ * mixed type of processors/MMUs. It also helps to make ctx switch code
+ * access fewer cache lines i.e. no need to retrieve it from some global
+ * nctxs.
* mmu_lock
* The mutex spin lock used to serialize context ID wrap around
* mmu_idx
@@ -599,6 +599,15 @@ extern mmu_ctx_t **mmu_ctxs_tbl;
extern void sfmmu_cpu_init(cpu_t *);
extern void sfmmu_cpu_cleanup(cpu_t *);
+extern uint_t sfmmu_ctxdom_nctxs(int);
+
+#ifdef sun4v
+extern void sfmmu_ctxdoms_remove(void);
+extern void sfmmu_ctxdoms_lock(void);
+extern void sfmmu_ctxdoms_unlock(void);
+extern void sfmmu_ctxdoms_update(void);
+#endif
+
/*
* The following structure is used to get MMU context domain information for
* a CPU from the platform.
@@ -607,7 +616,6 @@ extern void sfmmu_cpu_cleanup(cpu_t *);
* The MMU context domain index within the global array mmu_ctxs
* mmu_nctxs
* The number of context IDs supported in the MMU context domain
- * (64K for Rock)
*/
typedef struct mmu_ctx_info {
uint_t mmu_idx;
@@ -2575,7 +2583,11 @@ struct sfmmu_percpu_stat {
#define SFMMU_STAT_ADD(stat, amount) sfmmu_global_stat.stat += (amount)
#define SFMMU_STAT_SET(stat, count) sfmmu_global_stat.stat = (count)
-#define SFMMU_MMU_STAT(stat) CPU->cpu_m.cpu_mmu_ctxp->stat++
+#define SFMMU_MMU_STAT(stat) { \
+ mmu_ctx_t *ctx = CPU->cpu_m.cpu_mmu_ctxp; \
+ if (ctx) \
+ ctx->stat++; \
+}
#endif /* !_ASM */
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index 54d73f4234..3157345818 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1050,3 +1050,244 @@ init_md_broken(md_t *mdp, mde_cookie_t *cpulist)
md_free_scan_dag(mdp, &platlist);
}
+
+/*
+ * Number of bits forming a valid context for use in a sun4v TTE and the MMU
+ * context registers. Sun4v defines the minimum default value to be 13 if this
+ * property is not specified in a cpu node in machine descriptor graph.
+ */
+#define MMU_INFO_CTXBITS_MIN 13
+
+/* Convert context bits to number of contexts */
+#define MMU_INFO_BNCTXS(nbits) ((uint_t)(1u<<(nbits)))
+
+/*
+ * Read machine descriptor and load TLB to CPU mappings.
+ * Returned values: cpuid2pset[NCPU], nctxs[NCPU], md_gen
+ * - cpuid2pset is initialized so it can convert cpuids to processor set of CPUs
+ * that are shared between TLBs.
+ * - nctxs is initialized to number of contexts for each CPU
+ * - md_gen is set to generation number of machine descriptor from which this
+ * data was.
+ * Return: zero on success.
+ */
+static int
+load_tlb_cpu_mappings(cpuset_t **cpuid2pset, uint_t *nctxs, uint64_t *md_gen)
+{
+ mde_str_cookie_t cpu_sc, bck_sc;
+ int tlbs_idx, cp_idx;
+ mde_cookie_t root;
+ md_t *mdp = NULL;
+ mde_cookie_t *tlbs = NULL;
+ mde_cookie_t *cp = NULL;
+ uint64_t *cpids = NULL;
+ uint64_t nbit;
+ int ntlbs;
+ int ncp;
+ int retval = 1;
+ cpuset_t *ppset;
+
+ /* get MD handle, and string cookies for cpu and back nodes */
+ if ((mdp = md_get_handle()) == NULL ||
+ (cpu_sc = md_find_name(mdp, "cpu")) == MDE_INVAL_STR_COOKIE ||
+ (bck_sc = md_find_name(mdp, "back")) == MDE_INVAL_STR_COOKIE)
+ goto cleanup;
+
+ /* set generation number of current MD handle */
+ *md_gen = md_get_gen(mdp);
+
+ /* Find root element, and search for all TLBs in MD */
+ if ((root = md_root_node(mdp)) == MDE_INVAL_ELEM_COOKIE ||
+ (ntlbs = md_alloc_scan_dag(mdp, root, "tlb", "fwd", &tlbs)) <= 0)
+ goto cleanup;
+
+ cp = kmem_alloc(sizeof (mde_cookie_t) * NCPU, KM_SLEEP);
+ cpids = kmem_alloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
+
+ /*
+ * Build processor sets, one per possible context domain. For each tlb,
+ * search for connected CPUs. If any CPU is already in a set, then add
+ * all the TLB's CPUs to that set. Otherwise, create and populate a new
+ * pset. Thus, a single pset is built to represent multiple TLBs if
+ * they have CPUs in common.
+ */
+ for (tlbs_idx = 0; tlbs_idx < ntlbs; tlbs_idx++) {
+ ncp = md_scan_dag(mdp, tlbs[tlbs_idx], cpu_sc, bck_sc, cp);
+ if (ncp < 0)
+ goto cleanup;
+ else if (ncp == 0)
+ continue;
+
+ /* Get the id and number of contexts for each cpu */
+ for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
+ mde_cookie_t c = cp[cp_idx];
+
+ if (md_get_prop_val(mdp, c, "id", &cpids[cp_idx]))
+ goto cleanup;
+ if (md_get_prop_val(mdp, c, "mmu-#context-bits", &nbit))
+ nbit = MMU_INFO_CTXBITS_MIN;
+ nctxs[cpids[cp_idx]] = MMU_INFO_BNCTXS(nbit);
+ }
+
+ /*
+ * If a CPU is already in a set as shown by cpuid2pset[], then
+ * use that set.
+ */
+ for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
+ ASSERT(cpids[cp_idx] < NCPU);
+ ppset = cpuid2pset[cpids[cp_idx]];
+ if (ppset != NULL)
+ break;
+ }
+
+ /* No CPU has a set. Create a new one. */
+ if (ppset == NULL) {
+ ppset = kmem_alloc(sizeof (cpuset_t), KM_SLEEP);
+ CPUSET_ZERO(*ppset);
+ }
+
+ /* Add every CPU to the set, and record the set assignment. */
+ for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
+ cpuid2pset[cpids[cp_idx]] = ppset;
+ CPUSET_ADD(*ppset, cpids[cp_idx]);
+ }
+ }
+
+ retval = 0;
+
+cleanup:
+ if (tlbs != NULL)
+ md_free_scan_dag(mdp, &tlbs);
+ if (cp != NULL)
+ kmem_free(cp, sizeof (mde_cookie_t) * NCPU);
+ if (cpids != NULL)
+ kmem_free(cpids, sizeof (uint64_t) * NCPU);
+ if (mdp != NULL)
+ (void) md_fini_handle(mdp);
+
+ return (retval);
+}
+
+/*
+ * Return MMU info based on cpuid.
+ *
+ * Algorithm:
+ * Read machine descriptor and find all CPUs that share the same TLB with CPU
+ * specified by cpuid. Go through found CPUs and see if any one of them already
+ * has MMU index, if so, set index based on that value. If CPU does not share
+ * TLB with any other CPU or if none of those CPUs has mmu_ctx pointer, find the
+ * smallest available MMU index and give it to current CPU. If no available
+ * domain, perform a round robin, and start assigning from the beginning.
+ *
+ * For optimization reasons, this function uses a cache to store all TLB to CPU
+ * mappings, and updates them only when machine descriptor graph is changed.
+ * Because of this, and because we search MMU table for smallest index id, this
+ * function needs to be serialized which is protected by cpu_lock.
+ */
+void
+plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info)
+{
+ static cpuset_t **cpuid2pset = NULL;
+ static uint_t *nctxs;
+ static uint_t next_domain = 0;
+ static uint64_t md_gen = MDESC_INVAL_GEN;
+ uint64_t current_gen;
+ int idx;
+ cpuset_t cpuid_pset;
+ processorid_t id;
+ cpu_t *cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ current_gen = md_get_current_gen();
+
+ /*
+ * Load TLB CPU mappings only if MD generation has changed, FW that do
+ * not provide generation number, always return MDESC_INVAL_GEN, and as
+ * result MD is read here only once on such machines: when cpuid2pset is
+ * NULL
+ */
+ if (current_gen != md_gen || cpuid2pset == NULL) {
+ if (cpuid2pset == NULL) {
+ cpuid2pset = kmem_zalloc(sizeof (cpuset_t *) * NCPU,
+ KM_SLEEP);
+ nctxs = kmem_alloc(sizeof (uint_t) * NCPU, KM_SLEEP);
+ } else {
+ /* clean cpuid2pset[NCPU], before loading new values */
+ for (idx = 0; idx < NCPU; idx++) {
+ cpuset_t *pset = cpuid2pset[idx];
+
+ if (pset != NULL) {
+ for (;;) {
+ CPUSET_FIND(*pset, id);
+ if (id == CPUSET_NOTINSET)
+ break;
+ CPUSET_DEL(*pset, id);
+ ASSERT(id < NCPU);
+ cpuid2pset[id] = NULL;
+ }
+ ASSERT(cpuid2pset[idx] == NULL);
+ kmem_free(pset, sizeof (cpuset_t));
+ }
+ }
+ }
+
+ if (load_tlb_cpu_mappings(cpuid2pset, nctxs, &md_gen))
+ goto error_panic;
+ }
+
+ info->mmu_nctxs = nctxs[cpuid];
+
+ if (cpuid2pset[cpuid] == NULL)
+ goto error_panic;
+
+ cpuid_pset = *cpuid2pset[cpuid];
+ CPUSET_DEL(cpuid_pset, cpuid);
+
+ /* Search for a processor in the same TLB pset with MMU context */
+ for (;;) {
+ CPUSET_FIND(cpuid_pset, id);
+
+ if (id == CPUSET_NOTINSET)
+ break;
+
+ ASSERT(id < NCPU);
+ cp = cpu[id];
+ if (cp != NULL && CPU_MMU_CTXP(cp) != NULL) {
+ info->mmu_idx = CPU_MMU_IDX(cp);
+
+ return;
+ }
+ CPUSET_DEL(cpuid_pset, id);
+ }
+
+ /*
+ * No CPU in the TLB pset has a context domain yet.
+ * Use next_domain if available, or search for an unused domain, or
+ * overload next_domain, in that order. Overloading is necessary when
+ * the number of TLB psets is greater than max_mmu_ctxdoms.
+ */
+ idx = next_domain;
+
+ if (mmu_ctxs_tbl[idx] != NULL) {
+ for (idx = 0; idx < max_mmu_ctxdoms; idx++)
+ if (mmu_ctxs_tbl[idx] == NULL)
+ break;
+ if (idx == max_mmu_ctxdoms) {
+ /* overload next_domain */
+ idx = next_domain;
+
+ if (info->mmu_nctxs < sfmmu_ctxdom_nctxs(idx))
+ cmn_err(CE_PANIC, "max_mmu_ctxdoms is too small"
+ " to support CPUs with different nctxs");
+ }
+ }
+
+ info->mmu_idx = idx;
+ next_domain = (idx + 1) % max_mmu_ctxdoms;
+
+ return;
+
+error_panic:
+ cmn_err(CE_PANIC, "!cpu%d: failed to get MMU CTX domain index", cpuid);
+}
diff --git a/usr/src/uts/sun4v/os/mach_descrip.c b/usr/src/uts/sun4v/os/mach_descrip.c
index d69c6a6480..07f2292c5e 100644
--- a/usr/src/uts/sun4v/os/mach_descrip.c
+++ b/usr/src/uts/sun4v/os/mach_descrip.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Kernel Machine Description (MD)
*
@@ -861,3 +859,23 @@ md_free_scan_dag(md_t *ptr,
mdp->freep(*list, sizeof (mde_cookie_t) * mdp->node_count);
}
+
+/*
+ * Return generation number of current machine descriptor. Can be used for
+ * performance purposes to avoid requesting new md handle just to see if graph
+ * was updated.
+ */
+uint64_t
+md_get_current_gen(void)
+{
+ uint64_t gen = MDESC_INVAL_GEN;
+
+ mutex_enter(&curr_mach_descrip_lock);
+
+ if (curr_mach_descrip != NULL)
+ gen = (curr_mach_descrip->gen);
+
+ mutex_exit(&curr_mach_descrip_lock);
+
+ return (gen);
+}
diff --git a/usr/src/uts/sun4v/os/mach_startup.c b/usr/src/uts/sun4v/os/mach_startup.c
index 1dd229f317..f8e3b03de7 100644
--- a/usr/src/uts/sun4v/os/mach_startup.c
+++ b/usr/src/uts/sun4v/os/mach_startup.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -527,3 +527,15 @@ load_mach_drivers(void)
/* virtual console concentrator */
(void) i_ddi_attach_hw_nodes("vcc");
}
+
+void
+set_platform_defaults(void)
+{
+ /*
+ * Allow at most one context domain per 8 CPUs, which is ample for
+ * good performance. Do not make this too large, because it
+ * increases the space consumed in the per-process sfmmu structure.
+ */
+ if (max_mmu_ctxdoms == 0)
+ max_mmu_ctxdoms = (NCPU + 7) / 8;
+}
diff --git a/usr/src/uts/sun4v/os/suspend.c b/usr/src/uts/sun4v/os/suspend.c
index 42b288fe52..6a8302115a 100644
--- a/usr/src/uts/sun4v/os/suspend.c
+++ b/usr/src/uts/sun4v/os/suspend.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -44,6 +44,7 @@
#include <sys/sunddi.h>
#include <sys/cpupart.h>
#include <sys/hsvc.h>
+#include <vm/hat_sfmmu.h>
/*
* Sun4v OS Suspend
@@ -125,10 +126,9 @@ static int enable_user_tick_stick_emulation = 1;
boolean_t tick_stick_emulation_active = B_FALSE;
/*
- * Controls whether or not MD information is refreshed after a
- * successful suspend and resume. When non-zero, after a successful
- * suspend and resume, the MD will be downloaded, cpunodes updated,
- * and processor grouping information recalculated.
+ * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
+ * sharing data structures, and processor groups will be updated using
+ * information from the updated MD.
*/
static int suspend_update_cpu_mappings = 1;
@@ -243,15 +243,8 @@ update_cpu_mappings(void)
md_t *mdp;
processorid_t id;
cpu_t *cp;
- int rv;
cpu_pg_t *pgps[NCPU];
- /* Download the latest MD */
- if ((rv = mach_descrip_update()) != 0) {
- DBG("suspend: mach_descrip_update error: %d", rv);
- return;
- }
-
if ((mdp = md_get_handle()) == NULL) {
DBG("suspend: md_get_handle failed");
return;
@@ -491,6 +484,8 @@ suspend_start(char *error_reason, size_t max_reason_len)
ASSERT(suspend_supported());
DBG("suspend: %s", __func__);
+ sfmmu_ctxdoms_lock();
+
mutex_enter(&cpu_lock);
/* Suspend the watchdog */
@@ -535,6 +530,7 @@ suspend_start(char *error_reason, size_t max_reason_len)
start_cpus();
watchdog_resume();
mutex_exit(&cpu_lock);
+ sfmmu_ctxdoms_unlock();
DBG("suspend: failed, rv: %ld\n", rv);
return (rv);
}
@@ -561,6 +557,8 @@ suspend_start(char *error_reason, size_t max_reason_len)
tick_stick_emulation_active = B_TRUE;
}
+ sfmmu_ctxdoms_remove();
+
/* Resume cyclics, unpause CPUs */
cyclic_resume();
start_cpus();
@@ -575,6 +573,14 @@ suspend_start(char *error_reason, size_t max_reason_len)
mutex_exit(&cpu_lock);
+ /* Download the latest MD */
+ if ((rv = mach_descrip_update()) != 0)
+ cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
+ rv);
+
+ sfmmu_ctxdoms_update();
+ sfmmu_ctxdoms_unlock();
+
/* Get new MD, update CPU mappings/relationships */
if (suspend_update_cpu_mappings)
update_cpu_mappings();
diff --git a/usr/src/uts/sun4v/sys/mach_descrip.h b/usr/src/uts/sun4v/sys/mach_descrip.h
index a003a9b23b..7df7acc6ad 100644
--- a/usr/src/uts/sun4v/sys/mach_descrip.h
+++ b/usr/src/uts/sun4v/sys/mach_descrip.h
@@ -20,15 +20,13 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MACH_DESCRIP_H
#define _MACH_DESCRIP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -85,6 +83,7 @@ extern caddr_t md_get_md_raw(md_t *);
extern int md_alloc_scan_dag(md_t *, mde_cookie_t, char *, char *,
mde_cookie_t **);
extern void md_free_scan_dag(md_t *, mde_cookie_t **);
+extern uint64_t md_get_current_gen(void);
#ifdef __cplusplus
}