diff options
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/disp/cpupart.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/disp/disp.c | 204 | ||||
-rw-r--r-- | usr/src/uts/common/disp/thread.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 27 | ||||
-rw-r--r-- | usr/src/uts/common/io/vnd/vnd.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/os/cpu.c | 21 | ||||
-rw-r--r-- | usr/src/uts/common/os/lgrp.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/cpuvar.h | 14 | ||||
-rw-r--r-- | usr/src/uts/common/sys/disp.h | 13 | ||||
-rw-r--r-- | usr/src/uts/common/sys/thread.h | 3 |
10 files changed, 195 insertions, 121 deletions
diff --git a/usr/src/uts/common/disp/cpupart.c b/usr/src/uts/common/disp/cpupart.c index c260329c61..4ddc568187 100644 --- a/usr/src/uts/common/disp/cpupart.c +++ b/usr/src/uts/common/disp/cpupart.c @@ -20,6 +20,8 @@ */ /* * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright 2018 Joyent, Inc. */ #include <sys/types.h> @@ -324,7 +326,7 @@ cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced) kthread_t *t; int move_threads = 1; lgrp_id_t lgrpid; - proc_t *p; + proc_t *p; int lgrp_diff_lpl; lpl_t *cpu_lpl; int ret; @@ -569,8 +571,8 @@ again: /* Update CPU last ran on if it was this CPU */ if (t->t_cpu == cp && t->t_cpupart == oldpp && t->t_bound_cpu != cp) { - t->t_cpu = disp_lowpri_cpu(ncp, - t->t_lpl, t->t_pri, NULL); + t->t_cpu = disp_lowpri_cpu(ncp, t, + t->t_pri); } t = t->t_forw; } while (t != p->p_tlist); @@ -622,8 +624,8 @@ again: /* Update CPU last ran on if it was this CPU */ if (t->t_cpu == cp && t->t_cpupart == oldpp && t->t_bound_cpu != cp) { - t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl, - t->t_pri, NULL); + t->t_cpu = disp_lowpri_cpu(ncp, t, + t->t_pri); } t = t->t_next; @@ -883,7 +885,7 @@ cpupart_create(psetid_t *psid) static int cpupart_unbind_threads(cpupart_t *pp, boolean_t unbind_all) { - void *projbuf, *zonebuf; + void *projbuf, *zonebuf; kthread_t *t; proc_t *p; int err = 0; diff --git a/usr/src/uts/common/disp/disp.c b/usr/src/uts/common/disp/disp.c index 5f9c2c68a2..4898a18bf2 100644 --- a/usr/src/uts/common/disp/disp.c +++ b/usr/src/uts/common/disp/disp.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -60,6 +60,7 @@ #include <sys/dtrace.h> #include <sys/sdt.h> #include <sys/archsystm.h> +#include <sys/ht.h> #include <vm/as.h> @@ -1135,15 +1136,13 @@ swtch_to(kthread_t *next) */ } -#define CPU_IDLING(pri) ((pri) == -1) - static void cpu_resched(cpu_t *cp, pri_t tpri) { int call_poke_cpu = 0; pri_t cpupri = cp->cpu_dispatch_pri; - if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { + if (cpupri != CPU_IDLE_PRI && cpupri < tpri) { TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); if (tpri >= upreemptpri && cp->cpu_runrun == 0) { @@ -1239,17 +1238,17 @@ setbackdq(kthread_t *tp) /* * We'll generally let this thread continue to run where * it last ran...but will consider migration if: - * - We thread probably doesn't have much cache warmth. + * - The thread probably doesn't have much cache warmth. + * - HT exclusion would prefer us to run elsewhere * - The CPU where it last ran is the target of an offline * request. - * - The thread last ran outside it's home lgroup. + * - The thread last ran outside its home lgroup. */ if ((!THREAD_HAS_CACHE_WARMTH(tp)) || - (tp->t_cpu == cpu_inmotion)) { - cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL); - } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { - cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, - self ? tp->t_cpu : NULL); + !ht_should_run(tp, tp->t_cpu) || + (tp->t_cpu == cpu_inmotion) || + !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { + cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); } else { cp = tp->t_cpu; } @@ -1278,7 +1277,8 @@ setbackdq(kthread_t *tp) newcp = cp->cpu_next_part; } - if (RUNQ_LEN(newcp, tpri) < qlen) { + if (ht_should_run(tp, newcp) && + RUNQ_LEN(newcp, tpri) < qlen) { DTRACE_PROBE3(runq__balance, kthread_t *, tp, cpu_t *, cp, cpu_t *, newcp); @@ -1289,8 +1289,8 @@ setbackdq(kthread_t *tp) /* * Migrate to a cpu in the new partition. */ - cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, - tp->t_lpl, tp->t_pri, NULL); + cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp, + tp->t_pri); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { @@ -1427,7 +1427,7 @@ setfrontdq(kthread_t *tp) /* * We'll generally let this thread continue to run * where it last ran, but will consider migration if: - * - The thread last ran outside it's home lgroup. + * - The thread last ran outside its home lgroup. * - The CPU where it last ran is the target of an * offline request (a thread_nomigrate() on the in * motion CPU relies on this when forcing a preempt). @@ -1435,21 +1435,18 @@ setfrontdq(kthread_t *tp) * it last ran, and it is considered not likely to * have significant cache warmth. */ - if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) || - (cp == cpu_inmotion)) { - cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, - (tp == curthread) ? cp : NULL); - } else if ((tpri < cp->cpu_disp->disp_maxrunpri) && - (!THREAD_HAS_CACHE_WARMTH(tp))) { - cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, - NULL); + if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) || + cp == cpu_inmotion || + (tpri < cp->cpu_disp->disp_maxrunpri && + !THREAD_HAS_CACHE_WARMTH(tp))) { + cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); } } else { /* * Migrate to a cpu in the new partition. */ cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, - tp->t_lpl, tp->t_pri, NULL); + tp, tp->t_pri); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { @@ -1600,7 +1597,7 @@ setkpdq(kthread_t *tp, int borf) /* migrate to a cpu in the new partition */ cp = tp->t_cpupart->cp_cpulist; } - cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); + cp = disp_lowpri_cpu(cp, tp, tp->t_pri); disp_lock_enter_high(&cp->cpu_disp->disp_lock); ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); @@ -2573,80 +2570,85 @@ disp_cpu_inactive(cpu_t *cp) } /* - * disp_lowpri_cpu - find CPU running the lowest priority thread. - * The hint passed in is used as a starting point so we don't favor - * CPU 0 or any other CPU. The caller should pass in the most recently - * used CPU for the thread. + * Return a score rating this CPU for running this thread: lower is better. * - * The lgroup and priority are used to determine the best CPU to run on - * in a NUMA machine. The lgroup specifies which CPUs are closest while - * the thread priority will indicate whether the thread will actually run - * there. To pick the best CPU, the CPUs inside and outside of the given - * lgroup which are running the lowest priority threads are found. The - * remote CPU is chosen only if the thread will not run locally on a CPU - * within the lgroup, but will run on the remote CPU. If the thread - * cannot immediately run on any CPU, the best local CPU will be chosen. + * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for + * curcpu (as that's our own priority). * - * The lpl specified also identifies the cpu partition from which - * disp_lowpri_cpu should select a CPU. + * If a cpu is the target of an offline request, then try to avoid it. * - * curcpu is used to indicate that disp_lowpri_cpu is being called on - * behalf of the current thread. (curthread is looking for a new cpu) - * In this case, cpu_dispatch_pri for this thread's cpu should be - * ignored. + * Otherwise we'll use double the effective dispatcher priority for the CPU. * - * If a cpu is the target of an offline request then try to avoid it. + * We do this so ht_adjust_cpu_score() can increment the score if needed, + * without ending up over-riding a dispatcher priority. + */ +static pri_t +cpu_score(cpu_t *cp, kthread_t *tp) +{ + pri_t score; + + if (tp == curthread && cp == curthread->t_cpu) + score = 2 * CPU_IDLE_PRI; + else if (cp == cpu_inmotion) + score = SHRT_MAX; + else + score = 2 * cp->cpu_dispatch_pri; + + if (2 * cp->cpu_disp->disp_maxrunpri > score) + score = 2 * cp->cpu_disp->disp_maxrunpri; + if (2 * cp->cpu_chosen_level > score) + score = 2 * cp->cpu_chosen_level; + + return (ht_adjust_cpu_score(tp, cp, score)); +} + +/* + * disp_lowpri_cpu - find a suitable CPU to run the given thread. + * + * We are looking for a CPU with an effective dispatch priority lower than the + * thread's, so that the thread will run immediately rather than be enqueued. + * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group. + * If we don't find an available CPU there, we will expand our search to include + * wider locality levels. (Note these groups are already divided by CPU + * partition.) + * + * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on + * the best home CPU we found. * - * This function must be called at either high SPL, or with preemption - * disabled, so that the "hint" CPU cannot be removed from the online - * CPU list while we are traversing it. + * The hint passed in is used as a starting point so we don't favor CPU 0 or any + * other CPU. The caller should pass in the most recently used CPU for the + * thread; it's of course possible that this CPU isn't in the home lgroup. + * + * This function must be called at either high SPL, or with preemption disabled, + * so that the "hint" CPU cannot be removed from the online CPU list while we + * are traversing it. */ cpu_t * -disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) +disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri) { cpu_t *bestcpu; cpu_t *besthomecpu; cpu_t *cp, *cpstart; - pri_t bestpri; - pri_t cpupri; - klgrpset_t done; - klgrpset_t cur_set; lpl_t *lpl_iter, *lpl_leaf; - int i; - /* - * Scan for a CPU currently running the lowest priority thread. - * Cannot get cpu_lock here because it is adaptive. - * We do not require lock on CPU list. - */ ASSERT(hint != NULL); - ASSERT(lpl != NULL); - ASSERT(lpl->lpl_ncpu > 0); + ASSERT(tp->t_lpl->lpl_ncpu > 0); - /* - * First examine local CPUs. Note that it's possible the hint CPU - * passed in in remote to the specified home lgroup. If our priority - * isn't sufficient enough such that we can run immediately at home, - * then examine CPUs remote to our home lgroup. - * We would like to give preference to CPUs closest to "home". - * If we can't find a CPU where we'll run at a given level - * of locality, we expand our search to include the next level. - */ bestcpu = besthomecpu = NULL; klgrpset_clear(done); - /* start with lpl we were passed */ - lpl_iter = lpl; + lpl_iter = tp->t_lpl; do { + pri_t best = SHRT_MAX; + klgrpset_t cur_set; - bestpri = SHRT_MAX; klgrpset_clear(cur_set); - for (i = 0; i < lpl_iter->lpl_nrset; i++) { + for (int i = 0; i < lpl_iter->lpl_nrset; i++) { lpl_leaf = lpl_iter->lpl_rset[i]; if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) continue; @@ -2659,34 +2661,25 @@ disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) cp = cpstart = lpl_leaf->lpl_cpus; do { - if (cp == curcpu) - cpupri = -1; - else if (cp == cpu_inmotion) - cpupri = SHRT_MAX; - else - cpupri = cp->cpu_dispatch_pri; - if (cp->cpu_disp->disp_maxrunpri > cpupri) - cpupri = cp->cpu_disp->disp_maxrunpri; - if (cp->cpu_chosen_level > cpupri) - cpupri = cp->cpu_chosen_level; - if (cpupri < bestpri) { - if (CPU_IDLING(cpupri)) { - ASSERT((cp->cpu_flags & - CPU_QUIESCED) == 0); - return (cp); - } + pri_t score = cpu_score(cp, tp); + + if (score < best) { + best = score; bestcpu = cp; - bestpri = cpupri; + + /* An idle CPU: we're done. */ + if (score / 2 == CPU_IDLE_PRI) + goto out; } } while ((cp = cp->cpu_next_lpl) != cpstart); } - if (bestcpu && (tpri > bestpri)) { - ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); - return (bestcpu); - } + if (bestcpu != NULL && tpri > (best / 2)) + goto out; + if (besthomecpu == NULL) besthomecpu = bestcpu; + /* * Add the lgrps we just considered to the "done" set */ @@ -2698,8 +2691,11 @@ disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) * The specified priority isn't high enough to run immediately * anywhere, so just return the best CPU from the home lgroup. */ - ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); - return (besthomecpu); + bestcpu = besthomecpu; + +out: + ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); + return (bestcpu); } /* @@ -2719,3 +2715,19 @@ static void generic_enq_thread(cpu_t *cpu, int bound) { } + +cpu_t * +disp_choose_best_cpu(void) +{ + kthread_t *t = curthread; + cpu_t *curcpu = CPU; + + ASSERT(t->t_preempt > 0); + ASSERT(t->t_state == TS_ONPROC); + ASSERT(t->t_schedflag & TS_VCPU); + + if (ht_should_run(t, curcpu)) + return (curcpu); + + return (disp_lowpri_cpu(curcpu, t, t->t_pri)); +} diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index af000bf4f1..c923ba5d1a 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -74,6 +74,7 @@ #include <sys/waitq.h> #include <sys/cpucaps.h> #include <sys/kiconv.h> +#include <sys/ht.h> #ifndef STACK_GROWTH_DOWN #error Stacks do not grow downward; 3b2 zombie attack detected! @@ -507,8 +508,8 @@ thread_create( if (CPU->cpu_part == &cp_default) t->t_cpu = CPU; else - t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl, - t->t_pri, NULL); + t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t, + t->t_pri); t->t_disp_queue = t->t_cpu->cpu_disp; kpreempt_enable(); @@ -1422,6 +1423,8 @@ thread_unpin() itp = t->t_intr; /* interrupted thread */ t->t_intr = NULL; /* clear interrupt ptr */ + ht_end_intr(); + /* * Get state from interrupt thread for the one * it interrupted. diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 03d711838c..2127de2bf0 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -25,8 +25,8 @@ * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright 2018 Joyent, Inc. */ /* @@ -90,6 +90,7 @@ #include <sys/zfeature.h> #include <sys/zio_checksum.h> #include <sys/zil_impl.h> +#include <sys/ht.h> #include "zfs_namecheck.h" @@ -1281,6 +1282,8 @@ zvol_strategy(buf_t *bp) (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && !doread && !is_dumpified; + ht_begin_unsafe(); + /* * There must be no buffer changes when doing a dmu_sync() because * we can't change the data whilst calculating the checksum. @@ -1328,6 +1331,8 @@ zvol_strategy(buf_t *bp) zil_commit(zv->zv_zilog, ZVOL_OBJ); biodone(bp); + ht_end_unsafe(); + return (0); } @@ -1409,6 +1414,8 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr) return (error); } + ht_begin_unsafe(); + DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 0); mutex_enter(&zonep->zone_vfs_lock); @@ -1469,6 +1476,8 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr) DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 0, int, error); + ht_end_unsafe(); + return (error); } @@ -1501,6 +1510,8 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr) return (error); } + ht_begin_unsafe(); + DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 1); /* @@ -1549,6 +1560,8 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr) DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 1, int, error); + ht_end_unsafe(); + mutex_enter(&zonep->zone_vfs_lock); zonep->zone_vfs_rwstats.writes++; zonep->zone_vfs_rwstats.nwritten += tot_bytes; @@ -1818,11 +1831,17 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) case DKIOCFLUSHWRITECACHE: dkc = (struct dk_callback *)arg; mutex_exit(&zfsdev_state_lock); + + ht_begin_unsafe(); + zil_commit(zv->zv_zilog, ZVOL_OBJ); if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { (*dkc->dkc_callback)(dkc->dkc_cookie, error); error = 0; } + + ht_end_unsafe(); + return (error); case DKIOCGETWCE: @@ -1847,7 +1866,9 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) } else { zv->zv_flags &= ~ZVOL_WCE; mutex_exit(&zfsdev_state_lock); + ht_begin_unsafe(); zil_commit(zv->zv_zilog, ZVOL_OBJ); + ht_end_unsafe(); } return (0); } @@ -1900,6 +1921,8 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) mutex_exit(&zfsdev_state_lock); + ht_begin_unsafe(); + rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); @@ -1932,6 +1955,8 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) zil_commit(zv->zv_zilog, ZVOL_OBJ); } + ht_end_unsafe(); + return (error); } diff --git a/usr/src/uts/common/io/vnd/vnd.c b/usr/src/uts/common/io/vnd/vnd.c index 5a25ed22d5..d03c7ce4ec 100644 --- a/usr/src/uts/common/io/vnd/vnd.c +++ b/usr/src/uts/common/io/vnd/vnd.c @@ -830,6 +830,7 @@ #include <sys/disp.h> #include <sys/random.h> #include <sys/gsqueue.h> +#include <sys/ht.h> #include <inet/ip.h> #include <inet/ip6.h> @@ -3716,6 +3717,12 @@ vnd_squeue_tx_drain(void *arg, mblk_t *drain_mp, gsqueue_t *gsp, void *dummy) bsize = vsp->vns_bsize; mutex_exit(&vsp->vns_lock); + /* + * We're potentially going deep into the networking layer; make sure the + * guest can't run concurrently. + */ + ht_begin_unsafe(); + nmps = 0; mptot = 0; blocked = B_FALSE; @@ -3736,6 +3743,8 @@ vnd_squeue_tx_drain(void *arg, mblk_t *drain_mp, gsqueue_t *gsp, void *dummy) } } + ht_end_unsafe(); + empty = vnd_dq_is_empty(&vsp->vns_dq_write); /* diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 2efb68889c..4648dae9dd 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -400,6 +400,9 @@ force_thread_migrate(kthread_id_t tp) * CPUs prior to a successful return, it should take extra precautions (such as * their own call to kpreempt_disable) to ensure that safety. * + * CPU_BEST can be used to pick a "best" CPU to migrate to, including + * potentially the current CPU. + * * A CPU affinity reference count is maintained by thread_affinity_set and * thread_affinity_clear (incrementing and decrementing it, respectively), * maintaining CPU affinity while the count is non-zero, and allowing regions @@ -416,6 +419,10 @@ thread_affinity_set(kthread_id_t t, int cpu_id) VERIFY3P(t, ==, curthread); kpreempt_disable(); cp = CPU; + } else if (cpu_id == CPU_BEST) { + VERIFY3P(t, ==, curthread); + kpreempt_disable(); + cp = disp_choose_best_cpu(); } else { /* * We should be asserting that cpu_lock is held here, but @@ -453,9 +460,8 @@ thread_affinity_set(kthread_id_t t, int cpu_id) thread_unlock(t); } - if (cpu_id == CPU_CURRENT) { + if (cpu_id == CPU_CURRENT || cpu_id == CPU_BEST) kpreempt_enable(); - } } /* @@ -1490,8 +1496,8 @@ again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { * Update CPU last ran on if it was this CPU */ if (t->t_cpu == cp && t->t_bound_cpu != cp) - t->t_cpu = disp_lowpri_cpu(ncp, - t->t_lpl, t->t_pri, NULL); + t->t_cpu = disp_lowpri_cpu(ncp, t, + t->t_pri); ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || t->t_weakbound_cpu == cp); @@ -1533,10 +1539,9 @@ again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { * Update CPU last ran on if it was this CPU */ - if (t->t_cpu == cp && t->t_bound_cpu != cp) { - t->t_cpu = disp_lowpri_cpu(ncp, - t->t_lpl, t->t_pri, NULL); - } + if (t->t_cpu == cp && t->t_bound_cpu != cp) + t->t_cpu = disp_lowpri_cpu(ncp, t, t->t_pri); + ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || t->t_weakbound_cpu == cp); t = t->t_next; diff --git a/usr/src/uts/common/os/lgrp.c b/usr/src/uts/common/os/lgrp.c index 6288f47bed..6f6aced619 100644 --- a/usr/src/uts/common/os/lgrp.c +++ b/usr/src/uts/common/os/lgrp.c @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ /* @@ -90,6 +91,7 @@ #include <sys/pg.h> #include <sys/promif.h> #include <sys/sdt.h> +#include <sys/ht.h> lgrp_gen_t lgrp_gen = 0; /* generation of lgroup hierarchy */ lgrp_t *lgrp_table[NLGRPS_MAX]; /* table of all initialized lgrp_t structs */ @@ -520,6 +522,8 @@ lgrp_main_mp_init(void) { klgrpset_t changed; + ht_init(); + /* * Update lgroup topology (if necessary) */ diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h index 3ee4e70eec..2cfe5116d9 100644 --- a/usr/src/uts/common/sys/cpuvar.h +++ b/usr/src/uts/common/sys/cpuvar.h @@ -23,7 +23,7 @@ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright 2017 RackTop Systems. */ @@ -540,13 +540,19 @@ extern struct cpu *curcpup(void); #endif /* - * CPU_CURRENT indicates to thread_affinity_set to use CPU->cpu_id - * as the target and to grab cpu_lock instead of requiring the caller - * to grab it. + * CPU_CURRENT indicates to thread_affinity_set() to use whatever curthread's + * current CPU is; holding cpu_lock is not required. */ #define CPU_CURRENT -3 /* + * CPU_BEST can be used by thread_affinity_set() callers to set affinity to a + * good CPU (in particular, an ht_acquire()-friendly choice); holding cpu_lock + * is not required. + */ +#define CPU_BEST -4 + +/* * Per-CPU statistics * * cpu_stats_t contains numerous system and VM-related statistics, in the form diff --git a/usr/src/uts/common/sys/disp.h b/usr/src/uts/common/sys/disp.h index b324f4d323..cb3711edcd 100644 --- a/usr/src/uts/common/sys/disp.h +++ b/usr/src/uts/common/sys/disp.h @@ -23,6 +23,8 @@ * Use is subject to license terms. * * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -63,11 +65,11 @@ typedef struct _disp { /* * Priorities: * disp_maxrunpri is the maximum run priority of runnable threads - * on this queue. It is -1 if nothing is runnable. + * on this queue. It is -1 if nothing is runnable. * * disp_max_unbound_pri is the maximum run priority of threads on * this dispatch queue but runnable by any CPU. This may be left - * artificially high, then corrected when some CPU tries to take + * artificially high, then corrected when some CPU tries to take * an unbound thread. It is -1 if nothing is runnable. */ pri_t disp_maxrunpri; /* maximum run priority */ @@ -151,8 +153,7 @@ extern void dq_srundec(kthread_t *); extern void cpu_rechoose(kthread_t *); extern void cpu_surrender(kthread_t *); extern void kpreempt(int); -extern struct cpu *disp_lowpri_cpu(struct cpu *, struct lgrp_ld *, pri_t, - struct cpu *); +extern struct cpu *disp_lowpri_cpu(struct cpu *, kthread_t *, pri_t); extern int disp_bound_threads(struct cpu *, int); extern int disp_bound_anythreads(struct cpu *, int); extern int disp_bound_partition(struct cpu *, int); @@ -167,6 +168,8 @@ extern void resume_from_zombie(kthread_t *) extern void disp_swapped_enq(kthread_t *); extern int disp_anywork(void); +extern struct cpu *disp_choose_best_cpu(void); + #define KPREEMPT_SYNC (-1) #define kpreempt_disable() \ { \ @@ -183,6 +186,8 @@ extern int disp_anywork(void); #endif /* _KERNEL */ +#define CPU_IDLE_PRI (-1) + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index af9fcb75cf..678d356564 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -354,6 +354,8 @@ typedef struct _kthread { kmutex_t t_wait_mutex; /* used in CV wait functions */ char *t_name; /* thread name */ + + uint64_t t_unsafe; /* unsafe to run with HT VCPU thread */ } kthread_t; /* @@ -417,6 +419,7 @@ typedef struct _kthread { #define TS_SIGNALLED 0x0010 /* thread was awakened by cv_signal() */ #define TS_PROJWAITQ 0x0020 /* thread is on its project's waitq */ #define TS_ZONEWAITQ 0x0040 /* thread is on its zone's waitq */ +#define TS_VCPU 0x0080 /* thread will enter guest context */ #define TS_CSTART 0x0100 /* setrun() by continuelwps() */ #define TS_UNPAUSE 0x0200 /* setrun() by unpauselwps() */ #define TS_XSTART 0x0400 /* setrun() by SIGCONT */ |