diff options
author | Rafael Vanoni <rafael.vanoni@sun.com> | 2009-11-13 01:32:32 -0800 |
---|---|---|
committer | Rafael Vanoni <rafael.vanoni@sun.com> | 2009-11-13 01:32:32 -0800 |
commit | d3d50737e566cade9a08d73d2af95105ac7cd960 (patch) | |
tree | 399b76a3f6bf107e2ff506d8f9c3333654b29fc7 /usr/src/uts/common/fs/zfs | |
parent | 1eff5f7761619411b3c31280fcd96cefc32968b7 (diff) | |
download | illumos-joyent-d3d50737e566cade9a08d73d2af95105ac7cd960.tar.gz |
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
6860030 tickless clock requires a clock() decoupled lbolt / lbolt64
Portions contributed by Chad Mynhier <cmynhier@gmail.com>
Diffstat (limited to 'usr/src/uts/common/fs/zfs')
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 40 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_zfetch.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dsl_scrub.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/metaslab.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/txg.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_cache.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_queue.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zil.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zio_inject.c | 5 |
9 files changed, 46 insertions, 37 deletions
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 8cc845ffeb..9c4fb291ca 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -1580,7 +1580,8 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, if (HDR_IO_IN_PROGRESS(ab) || (spa && ab->b_spa != spa) || (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) && - lbolt - ab->b_arc_access < arc_min_prefetch_lifespan)) { + ddi_get_lbolt() - ab->b_arc_access < + arc_min_prefetch_lifespan)) { skipped++; continue; } @@ -2051,12 +2052,12 @@ arc_reclaim_thread(void) } /* reset the growth delay for every reclaim */ - growtime = lbolt + (arc_grow_retry * hz); + growtime = ddi_get_lbolt() + (arc_grow_retry * hz); arc_kmem_reap_now(last_reclaim); arc_warm = B_TRUE; - } else if (arc_no_grow && lbolt >= growtime) { + } else if (arc_no_grow && ddi_get_lbolt() >= growtime) { arc_no_grow = FALSE; } @@ -2070,7 +2071,7 @@ arc_reclaim_thread(void) /* block until needed, or one second, whichever is shorter */ CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait(&arc_reclaim_thr_cv, - &arc_reclaim_thr_lock, (lbolt + hz)); + &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz)); CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock); } @@ -2285,6 +2286,8 @@ out: static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) { + clock_t now; + ASSERT(MUTEX_HELD(hash_lock)); if (buf->b_state == arc_anon) { @@ -2295,11 +2298,13 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) */ ASSERT(buf->b_arc_access == 0); - buf->b_arc_access = lbolt; + buf->b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf); arc_change_state(arc_mru, buf, hash_lock); } else if (buf->b_state == arc_mru) { + now = ddi_get_lbolt(); + /* * If this buffer is here because of a prefetch, then either: * - clear the flag if this is a "referencing" read @@ -2315,7 +2320,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) buf->b_flags &= ~ARC_PREFETCH; ARCSTAT_BUMP(arcstat_mru_hits); } - buf->b_arc_access = lbolt; + buf->b_arc_access = now; return; } @@ -2324,13 +2329,13 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) * but it is still in the cache. Move it to the MFU * state. */ - if (lbolt > buf->b_arc_access + ARC_MINTIME) { + if (now > buf->b_arc_access + ARC_MINTIME) { /* * More than 125ms have passed since we * instantiated this buffer. Move it to the * most frequently used state. */ - buf->b_arc_access = lbolt; + buf->b_arc_access = now; DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); arc_change_state(arc_mfu, buf, hash_lock); } @@ -2353,7 +2358,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); } - buf->b_arc_access = lbolt; + buf->b_arc_access = ddi_get_lbolt(); arc_change_state(new_state, buf, hash_lock); ARCSTAT_BUMP(arcstat_mru_ghost_hits); @@ -2372,7 +2377,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) ASSERT(list_link_active(&buf->b_arc_node)); } ARCSTAT_BUMP(arcstat_mfu_hits); - buf->b_arc_access = lbolt; + buf->b_arc_access = ddi_get_lbolt(); } else if (buf->b_state == arc_mfu_ghost) { arc_state_t *new_state = arc_mfu; /* @@ -2390,7 +2395,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) new_state = arc_mru; } - buf->b_arc_access = lbolt; + buf->b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); arc_change_state(new_state, buf, hash_lock); @@ -2400,7 +2405,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) * This buffer is on the 2nd Level ARC. */ - buf->b_arc_access = lbolt; + buf->b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); arc_change_state(arc_mfu, buf, hash_lock); } else { @@ -3741,7 +3746,7 @@ l2arc_write_size(l2arc_dev_t *dev) static clock_t l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) { - clock_t interval, next; + clock_t interval, next, now; /* * If the ARC lists are busy, increase our write rate; if the @@ -3754,7 +3759,8 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) else interval = hz * l2arc_feed_secs; - next = MAX(lbolt, MIN(lbolt + interval, began + interval)); + now = ddi_get_lbolt(); + next = MAX(now, MIN(now + interval, began + interval)); return (next); } @@ -4365,7 +4371,7 @@ l2arc_feed_thread(void) l2arc_dev_t *dev; spa_t *spa; uint64_t size, wrote; - clock_t begin, next = lbolt; + clock_t begin, next = ddi_get_lbolt(); CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG); @@ -4376,7 +4382,7 @@ l2arc_feed_thread(void) (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, next); CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); - next = lbolt + hz; + next = ddi_get_lbolt() + hz; /* * Quick check for L2ARC devices. @@ -4387,7 +4393,7 @@ l2arc_feed_thread(void) continue; } mutex_exit(&l2arc_dev_mtx); - begin = lbolt; + begin = ddi_get_lbolt(); /* * This selects the next l2arc device to write to, and in diff --git a/usr/src/uts/common/fs/zfs/dmu_zfetch.c b/usr/src/uts/common/fs/zfs/dmu_zfetch.c index c51ba2a0b6..37037c30f6 100644 --- a/usr/src/uts/common/fs/zfs/dmu_zfetch.c +++ b/usr/src/uts/common/fs/zfs/dmu_zfetch.c @@ -226,7 +226,7 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs) break; } zs->zst_ph_offset = prefetch_tail; - zs->zst_last = lbolt; + zs->zst_last = ddi_get_lbolt(); } void @@ -577,7 +577,7 @@ dmu_zfetch_stream_reclaim(zfetch_t *zf) for (zs = list_head(&zf->zf_stream); zs; zs = list_next(&zf->zf_stream, zs)) { - if (((lbolt - zs->zst_last) / hz) > zfetch_min_sec_reap) + if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap) break; } @@ -708,7 +708,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) newstream->zst_ph_offset = zst.zst_len + zst.zst_offset; newstream->zst_cap = zst.zst_len; newstream->zst_direction = ZFETCH_FORWARD; - newstream->zst_last = lbolt; + newstream->zst_last = ddi_get_lbolt(); mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/usr/src/uts/common/fs/zfs/dsl_scrub.c b/usr/src/uts/common/fs/zfs/dsl_scrub.c index d1fb3d2e52..d511bb841a 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scrub.c +++ b/usr/src/uts/common/fs/zfs/dsl_scrub.c @@ -313,7 +313,7 @@ scrub_pause(dsl_pool_t *dp, const zbookmark_t *zb) mintime = dp->dp_scrub_isresilver ? zfs_resilver_min_time : zfs_scrub_min_time; - elapsed_ticks = lbolt64 - dp->dp_scrub_start_time; + elapsed_ticks = ddi_get_lbolt64() - dp->dp_scrub_start_time; if (elapsed_ticks > hz * zfs_txg_timeout || (elapsed_ticks > hz * mintime && txg_sync_waiting(dp))) { dprintf("pausing at %llx/%llx/%llx/%llx\n", @@ -836,7 +836,7 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) } dp->dp_scrub_pausing = B_FALSE; - dp->dp_scrub_start_time = lbolt64; + dp->dp_scrub_start_time = ddi_get_lbolt64(); dp->dp_scrub_isresilver = (dp->dp_scrub_min_txg != 0); spa->spa_scrub_active = B_TRUE; diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index cdbed0144c..27dc2e4fd5 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -1040,7 +1040,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, /* * For testing, make some blocks above a certain size be gang blocks. */ - if (psize >= metaslab_gang_bang && (lbolt & 3) == 0) + if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) return (ENOSPC); /* diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c index 344dcb7722..ceed1200ca 100644 --- a/usr/src/uts/common/fs/zfs/txg.c +++ b/usr/src/uts/common/fs/zfs/txg.c @@ -166,7 +166,8 @@ txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time) CALLB_CPR_SAFE_BEGIN(cpr); if (time) - (void) cv_timedwait(cv, &tx->tx_sync_lock, lbolt + time); + (void) cv_timedwait(cv, &tx->tx_sync_lock, + ddi_get_lbolt() + time); else cv_wait(cv, &tx->tx_sync_lock); @@ -377,7 +378,7 @@ txg_sync_thread(dsl_pool_t *dp) dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n", tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer); - delta = lbolt - start; + delta = ddi_get_lbolt() - start; timer = (delta > timeout ? 0 : timeout - delta); } @@ -409,9 +410,9 @@ txg_sync_thread(dsl_pool_t *dp) txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); mutex_exit(&tx->tx_sync_lock); - start = lbolt; + start = ddi_get_lbolt(); spa_sync(dp->dp_spa, txg); - delta = lbolt - start; + delta = ddi_get_lbolt() - start; mutex_enter(&tx->tx_sync_lock); tx->tx_synced_txg = txg; @@ -478,7 +479,7 @@ void txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) { tx_state_t *tx = &dp->dp_tx; - int timeout = lbolt + ticks; + int timeout = ddi_get_lbolt() + ticks; /* don't delay if this txg could transition to quiesing immediately */ if (tx->tx_open_txg > txg || @@ -491,7 +492,7 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) return; } - while (lbolt < timeout && + while (ddi_get_lbolt() < timeout && tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, timeout); diff --git a/usr/src/uts/common/fs/zfs/vdev_cache.c b/usr/src/uts/common/fs/zfs/vdev_cache.c index 9b3a9f5a26..688d541344 100644 --- a/usr/src/uts/common/fs/zfs/vdev_cache.c +++ b/usr/src/uts/common/fs/zfs/vdev_cache.c @@ -172,7 +172,7 @@ vdev_cache_allocate(zio_t *zio) ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); ve->ve_offset = offset; - ve->ve_lastused = lbolt; + ve->ve_lastused = ddi_get_lbolt(); ve->ve_data = zio_buf_alloc(VCBS); avl_add(&vc->vc_offset_tree, ve); @@ -189,9 +189,9 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) ASSERT(MUTEX_HELD(&vc->vc_lock)); ASSERT(ve->ve_fill_io == NULL); - if (ve->ve_lastused != lbolt) { + if (ve->ve_lastused != ddi_get_lbolt()) { avl_remove(&vc->vc_lastused_tree, ve); - ve->ve_lastused = lbolt; + ve->ve_lastused = ddi_get_lbolt(); avl_add(&vc->vc_lastused_tree, ve); } diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c index d98278ddef..21e60ce843 100644 --- a/usr/src/uts/common/fs/zfs/vdev_queue.c +++ b/usr/src/uts/common/fs/zfs/vdev_queue.c @@ -40,7 +40,7 @@ int zfs_vdev_max_pending = 10; int zfs_vdev_min_pending = 4; -/* deadline = pri + (lbolt >> time_shift) */ +/* deadline = pri + ddi_get_lbolt64() >> time_shift) */ int zfs_vdev_time_shift = 6; /* exponential I/O issue ramp-up rate */ @@ -359,7 +359,8 @@ vdev_queue_io(zio_t *zio) mutex_enter(&vq->vq_lock); - zio->io_deadline = (lbolt64 >> zfs_vdev_time_shift) + zio->io_priority; + zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) + + zio->io_priority; vdev_queue_io_add(vq, zio); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index dba690bbaf..40ba1a2d68 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -1647,7 +1647,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) txg_wait_synced(zilog->zl_dmu_pool, 0); zilog->zl_replay = B_TRUE; - zilog->zl_replay_time = lbolt; + zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg); diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c index 5c4a6c3df6..e8f8f7b723 100644 --- a/usr/src/uts/common/fs/zfs/zio_inject.c +++ b/usr/src/uts/common/fs/zfs/zio_inject.c @@ -318,7 +318,7 @@ zio_handle_ignored_writes(zio_t *zio) */ if (handler->zi_record.zi_timer == 0) { if (handler->zi_record.zi_duration > 0) - handler->zi_record.zi_timer = lbolt64; + handler->zi_record.zi_timer = ddi_get_lbolt64(); else handler->zi_record.zi_timer = zio->io_txg; } @@ -355,7 +355,8 @@ spa_handle_ignored_writes(spa_t *spa) if (handler->zi_record.zi_duration > 0) { VERIFY(handler->zi_record.zi_timer == 0 || handler->zi_record.zi_timer + - handler->zi_record.zi_duration * hz > lbolt64); + handler->zi_record.zi_duration * hz > + ddi_get_lbolt64()); } else { /* duration is negative so the subtraction here adds */ VERIFY(handler->zi_record.zi_timer == 0 || |