summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/Makefile.rules12
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c13
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_lockd.c13
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h1
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c131
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c14
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_scan.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h5
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c110
-rw-r--r--usr/src/uts/common/fs/zfs/zio_inject.c36
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c73
-rw-r--r--usr/src/uts/common/io/audio/impl/audio_grc3.h2
-rw-r--r--usr/src/uts/common/io/bge/bge_main2.c67
-rw-r--r--usr/src/uts/common/io/bnx/bnx.h6
-rw-r--r--usr/src/uts/common/io/bnx/bnxsnd.c2
-rw-r--r--usr/src/uts/common/io/chxge/pe.c12
-rw-r--r--usr/src/uts/common/io/cmlb.c4
-rw-r--r--usr/src/uts/common/io/cxgbe/common/common.h17
-rw-r--r--usr/src/uts/common/io/cxgbe/common/t4_hw.c85
-rw-r--r--usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h20
-rw-r--r--usr/src/uts/common/io/cxgbe/shared/shared.c6
-rw-r--r--usr/src/uts/common/io/cxgbe/shared/shared.h2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/cudbg.h2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c2
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c112
-rw-r--r--usr/src/uts/common/io/e1000g/e1000g_alloc.c2
-rw-r--r--usr/src/uts/common/io/mac/mac.c29
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c27
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c17
-rw-r--r--usr/src/uts/common/io/mac/mac_sched.c6
-rw-r--r--usr/src/uts/common/io/mac/mac_util.c2
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c50
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h16
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c162
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c1
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h55
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c105
-rw-r--r--usr/src/uts/common/io/stream.c2
-rw-r--r--usr/src/uts/common/klm/nlm_impl.h8
-rw-r--r--usr/src/uts/common/klm/nlm_rpc_handle.c21
-rw-r--r--usr/src/uts/common/klm/nlm_rpc_svc.c80
-rw-r--r--usr/src/uts/common/klm/nlm_service.c120
-rw-r--r--usr/src/uts/common/os/logsubr.c16
-rw-r--r--usr/src/uts/common/os/strsubr.c1
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/bootbanner.h33
-rw-r--r--usr/src/uts/common/sys/fibre-channel/fca/emlxs/emlxs_hw.h2
-rw-r--r--usr/src/uts/common/sys/fs/sdev_impl.h1
-rw-r--r--usr/src/uts/common/sys/mac.h10
-rw-r--r--usr/src/uts/common/sys/mac_impl.h48
-rw-r--r--usr/src/uts/common/sys/mac_provider.h3
-rw-r--r--usr/src/uts/common/sys/smbios.h11
-rw-r--r--usr/src/uts/common/sys/sysconfig.h3
-rw-r--r--usr/src/uts/common/sys/systeminfo.h12
-rw-r--r--usr/src/uts/common/sys/unistd.h25
-rw-r--r--usr/src/uts/common/syscall/sysconfig.c4
-rw-r--r--usr/src/uts/common/syscall/systeminfo.c8
-rw-r--r--usr/src/uts/common/vm/seg_spt.c4
62 files changed, 1364 insertions, 382 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index ce7b7a3e6a..720701371d 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -110,6 +110,7 @@ GENUNIX_OBJS += \
bio.o \
bitmap.o \
blabel.o \
+ bootbanner.o \
brandsys.o \
bz2blocksort.o \
bz2compress.o \
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 8a906a2e25..bb80ca63c4 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -26,6 +26,7 @@
# Copyright 2020 Joyent, Inc.
# Copyright 2018 Nexenta Systems, Inc.
# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright 2020 Oxide Computer Company
#
#
@@ -1563,6 +1564,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/krtld/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(COMMONBASE)/bootbanner/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(COMMONBASE)/list/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1591,6 +1596,13 @@ $(OBJS_DIR)/%.o: $(COMMONBASE)/refhash/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/bootbanner.o := CPPFLAGS += \
+ -DBOOTBANNER1='"$(BOOTBANNER1)"' \
+ -DBOOTBANNER2='"$(BOOTBANNER2)"' \
+ -DBOOTBANNER3='"$(BOOTBANNER3)"' \
+ -DBOOTBANNER4='"$(BOOTBANNER4)"' \
+ -DBOOTBANNER5='"$(BOOTBANNER5)"'
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/os/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index fed6be37cf..c7e5351778 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -1402,8 +1402,15 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
if (p->p_brand == NULL)
return (ENOSYS);
- VERIFY(p->p_brand == &lx_brand);
- VERIFY(p->p_brand_data != NULL);
+ /*
+ * Certain native applications may wish to start the lx_lockd process.
+ * Every other process that's not branded should be denied.
+ */
+ if (p->p_brand != &lx_brand && cmd != B_START_NFS_LOCKD)
+ return (ENOSYS);
+
+ if (cmd != B_START_NFS_LOCKD)
+ VERIFY(p->p_brand_data != NULL);
switch (cmd) {
case B_REGISTER:
diff --git a/usr/src/uts/common/brand/lx/os/lx_lockd.c b/usr/src/uts/common/brand/lx/os/lx_lockd.c
index d6d965398a..37b744b0e8 100644
--- a/usr/src/uts/common/brand/lx/os/lx_lockd.c
+++ b/usr/src/uts/common/brand/lx/os/lx_lockd.c
@@ -297,6 +297,18 @@ lx_upcall_statd(int op, struct nlm_globals *g, struct nlm_host *host)
* as we pass to monitor, so that is also handled here by this same
* brand hook.
*/
+
+ /*
+ * If the NLM was set up to be "v4 only" (i.e. no RPC call handlers
+ * to localhost at configure time), the semaphore is uninitialized,
+ * and will indefinitely hang. FURTHERMORE if just the semaphore
+ * was initialized, we'd still panic with a NULL nsm->ns_handle.
+ */
+ if (g->nlm_v4_only) {
+ stat = RPC_SYSTEMERROR;
+ goto bail;
+ }
+
nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
nsm = &g->nlm_nsm;
@@ -327,6 +339,7 @@ lx_upcall_statd(int op, struct nlm_globals *g, struct nlm_host *host)
}
sema_v(&nsm->ns_sem);
+bail:
if (stat != RPC_SUCCESS) {
NLM_WARN("Failed to contact local statd, stat=%d", stat);
if (op == SM_MON) {
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index 90d87d78a8..85aa5e34bd 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -94,6 +94,7 @@ extern "C" {
#define B_LPID_TO_SPAIR 128
#define B_GET_CURRENT_CONTEXT 129
#define B_EMULATION_DONE 130
+/* Some native programs use B_START_NFS_LOCKD, so don't change this. */
#define B_START_NFS_LOCKD 131
#define B_BLOCK_ALL_SIGS 132
#define B_UNBLOCK_ALL_SIGS 133
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index b619719ba9..fa7b9fb2fc 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -24,7 +24,7 @@
* Copyright 2016 Gary Mills
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#include <sys/dsl_scan.h>
@@ -549,6 +549,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
zfs_dbgmsg("new-style scrub was modified "
"by old software; restarting in txg %llu",
(longlong_t)scn->scn_restart_txg);
+ } else if (dsl_scan_resilvering(dp)) {
+ /*
+ * If a resilver is in progress and there are already
+ * errors, restart it instead of finishing this scan and
+ * then restarting it. If there haven't been any errors
+ * then remember that the incore DTL is valid.
+ */
+ if (scn->scn_phys.scn_errors > 0) {
+ scn->scn_restart_txg = txg;
+ zfs_dbgmsg("resilver can't excise DTL_MISSING "
+ "when finished; restarting in txg %llu",
+ (u_longlong_t)scn->scn_restart_txg);
+ } else {
+ /* it's safe to excise DTL when finished */
+ spa->spa_scrub_started = B_TRUE;
+ }
}
}
@@ -599,6 +615,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
}
boolean_t
+dsl_scan_resilver_scheduled(dsl_pool_t *dp)
+{
+ return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) ||
+ (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER));
+}
+
+boolean_t
dsl_scan_scrubbing(const dsl_pool_t *dp)
{
dsl_scan_phys_t *scn_phys = &dp->dp_scan->scn_phys;
@@ -794,7 +817,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
(void) spa_vdev_state_exit(spa, NULL, 0);
if (func == POOL_SCAN_RESILVER) {
- dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, 0);
return (0);
}
@@ -813,41 +836,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
-/*
- * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
- * B_TRUE if we have devices that need to be resilvered and are available to
- * accept resilver I/Os.
- */
-static boolean_t
-dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
-{
- boolean_t resilver_needed = B_FALSE;
- spa_t *spa = vd->vdev_spa;
-
- for (int c = 0; c < vd->vdev_children; c++) {
- resilver_needed |=
- dsl_scan_clear_deferred(vd->vdev_child[c], tx);
- }
-
- if (vd == spa->spa_root_vdev &&
- spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
- spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
- vdev_config_dirty(vd);
- spa->spa_resilver_deferred = B_FALSE;
- return (resilver_needed);
- }
-
- if (!vdev_is_concrete(vd) || vd->vdev_aux ||
- !vd->vdev_ops->vdev_op_leaf)
- return (resilver_needed);
-
- if (vd->vdev_resilver_deferred)
- vd->vdev_resilver_deferred = B_FALSE;
-
- return (!vdev_is_dead(vd) && !vd->vdev_offline &&
- vdev_resilver_needed(vd, NULL, NULL));
-}
-
/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -915,7 +903,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
"errors=%llu", spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
- spa->spa_scrub_started = B_FALSE;
spa->spa_scrub_active = B_FALSE;
/*
@@ -943,30 +930,33 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_errlog_rotate(spa);
/*
+ * Don't clear flag until after vdev_dtl_reassess to ensure that
+ * DTL_MISSING will get updated when possible.
+ */
+ spa->spa_scrub_started = B_FALSE;
+
+ /*
* We may have finished replacing a device.
* Let the async thread assess this and handle the detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
/*
- * Clear any deferred_resilver flags in the config.
+ * Clear any resilver_deferred flags in the config.
* If there are drives that need resilvering, kick
* off an asynchronous request to start resilver.
- * dsl_scan_clear_deferred() may update the config
+ * vdev_clear_resilver_deferred() may update the config
* before the resilver can restart. In the event of
* a crash during this period, the spa loading code
* will find the drives that need to be resilvered
- * when the machine reboots and start the resilver then.
+ * and start the resilver then.
*/
- if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
- boolean_t resilver_needed =
- dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
- if (resilver_needed) {
- spa_history_log_internal(spa,
- "starting deferred resilver", tx,
- "errors=%llu", spa_get_errlog_size(spa));
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) &&
+ vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
+ spa_history_log_internal(spa,
+ "starting deferred resilver", tx, "errors=%llu",
+ (u_longlong_t)spa_get_errlog_size(spa));
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
}
}
@@ -1073,7 +1063,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
/* start a new scan, or restart an existing one. */
void
-dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
+dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg)
{
if (txg == 0) {
dmu_tx_t *tx;
@@ -1221,10 +1211,13 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
static boolean_t
dsl_scan_should_clear(dsl_scan_t *scn)
{
+ spa_t *spa = scn->scn_dp->dp_spa;
vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
- uint64_t mlim_hard, mlim_soft, mused;
- uint64_t alloc = metaslab_class_get_alloc(spa_normal_class(
- scn->scn_dp->dp_spa));
+ uint64_t alloc, mlim_hard, mlim_soft, mused;
+
+ alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ alloc += metaslab_class_get_alloc(spa_special_class(spa));
+ alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE,
zfs_scan_mem_lim_min);
@@ -4208,3 +4201,33 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp)
for (int i = 0; i < BP_GET_NDVAS(bp); i++)
dsl_scan_freed_dva(spa, bp, i);
}
+
+/*
+ * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has
+ * not started, start it. Otherwise, only restart if max txg in DTL range is
+ * greater than the max txg in the current scan. If the DTL max is less than
+ * the scan max, then the vdev has not missed any new data since the resilver
+ * started, so a restart is not needed.
+ */
+void
+dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
+{
+ uint64_t min, max;
+
+ if (!vdev_resilver_needed(vd, &min, &max))
+ return;
+
+ if (!dsl_scan_resilvering(dp)) {
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+ return;
+ }
+
+ if (max <= dp->dp_scan->scn_phys.scn_max_txg)
+ return;
+
+ /* restart is needed, check if it can be deferred */
+ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_defer_resilver(vd);
+ else
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 547fa1e2bb..fc08eebbc0 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -27,9 +27,9 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
- * Copyright (c) 2017 Datto Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -6397,9 +6397,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, newvd);
+ vdev_defer_resilver(newvd);
else
- dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -7637,7 +7637,7 @@ spa_async_thread(void *arg)
if (tasks & SPA_ASYNC_RESILVER &&
(!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
- dsl_resilver_restart(dp, 0);
+ dsl_scan_restart_resilver(dp, 0);
if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
mutex_enter(&spa_namespace_lock);
@@ -7753,6 +7753,12 @@ spa_async_request(spa_t *spa, int task)
mutex_exit(&spa->spa_async_lock);
}
+int
+spa_async_tasks(spa_t *spa)
+{
+ return (spa->spa_async_tasks);
+}
+
/*
* ==========================================================================
* SPA syncing routines
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
index 1b600405ae..4693293290 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
-void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
+void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
+boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 31faac4f77..33cdfbeb4b 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -26,7 +26,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -775,6 +775,7 @@ extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
+extern int spa_async_tasks(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index a6de7e6f2c..b8c2ee5c9e 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -153,6 +154,8 @@ extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
+extern void vdev_defer_resilver(vdev_t *vd);
+extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 9947bedf54..60d4d6805f 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -24,6 +24,7 @@
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -389,6 +390,10 @@ typedef struct zinject_record {
#define ZI_NO_DVA (-1)
+/* scaled frequency ranges */
+#define ZI_PERCENTAGE_MIN 4294UL
+#define ZI_PERCENTAGE_MAX UINT32_MAX
+
typedef enum zinject_type {
ZINJECT_UNINITIALIZED,
ZINJECT_DATA_FAULT,
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 01e892f4c4..9773ec7960 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -27,6 +27,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -98,6 +99,12 @@ boolean_t vdev_validate_skip = B_FALSE;
int zfs_vdev_dtl_sm_blksz = (1 << 12);
/*
+ * Ignore errors during scrub/resilver. Allows to work around resilver
+ * upon import when there are pool errors.
+ */
+int zfs_scan_ignore_errors = 0;
+
+/*
* vdev-wide space maps that have lots of entries written to them at
* the end of each transaction can benefit from a higher I/O bandwidth
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
@@ -772,7 +779,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
+ vdev_defer_resilver(vd);
/*
* When importing a pool, we want to ignore the persistent fault
@@ -1764,18 +1771,12 @@ vdev_open(vdev_t *vd)
}
/*
- * If a leaf vdev has a DTL, and seems healthy, then kick off a
- * resilver. But don't do this if we are doing a reopen for a scrub,
- * since this would just restart the scrub we are already doing.
+ * If this is a leaf vdev, assess whether a resilver is needed.
+ * But don't do this if we are doing a reopen for a scrub, since
+ * this would just restart the scrub we are already doing.
*/
- if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen)
+ dsl_scan_assess_vdev(spa->spa_dsl_pool, vd);
return (0);
}
@@ -2470,7 +2471,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2520,10 +2520,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
/*
+ * If requested, pretend the scan completed cleanly.
+ */
+ if (zfs_scan_ignore_errors && scn)
+ scn->scn_phys.scn_errors = 0;
+
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
+ /*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
* excise regions on vdevs that were available during
@@ -2559,6 +2578,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
@@ -3543,14 +3570,11 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa,
- SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ /* If a resilver isn't required, check if vdevs can be culled */
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd) &&
+ !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool))
+ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -4559,18 +4583,46 @@ vdev_deadman(vdev_t *vd)
}
void
-vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+vdev_defer_resilver(vdev_t *vd)
{
- for (uint64_t i = 0; i < vd->vdev_children; i++)
- vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
- if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
- range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
- return;
+ vd->vdev_resilver_deferred = B_TRUE;
+ vd->vdev_spa->spa_resilver_deferred = B_TRUE;
+}
+
+/*
+ * Clears the resilver deferred flag on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+boolean_t
+vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ resilver_needed |= vdev_clear_resilver_deferred(cvd, tx);
}
- vd->vdev_resilver_deferred = B_TRUE;
- spa->spa_resilver_deferred = B_TRUE;
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
}
/*
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index a65721d175..e332da9672 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -100,6 +101,26 @@ static kmutex_t inject_delay_mtx;
static int inject_next_id = 1;
/*
+ * Test if the requested frequency was triggered
+ */
+static boolean_t
+freq_triggered(uint32_t frequency)
+{
+ /*
+ * zero implies always (100%)
+ */
+ if (frequency == 0)
+ return (B_TRUE);
+
+ /*
+ * Note: we still handle legacy (unscaled) frequecy values
+ */
+ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
+
+ return (spa_get_random(maximum) < frequency);
+}
+
+/*
* Returns true if the given record matches the I/O in progress.
*/
static boolean_t
@@ -114,8 +135,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva,
record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
else
return (B_FALSE);
}
@@ -130,8 +150,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva,
zb->zb_blkid <= record->zi_end &&
(record->zi_dvas == 0 || (record->zi_dvas & (1ULL << dva))) &&
error == record->zi_error) {
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
}
return (B_FALSE);
@@ -360,6 +379,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
if (handler->zi_record.zi_error == error) {
/*
+ * limit error injection if requested
+ */
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
+ /*
* For a failed open, pretend like the device
* has gone away.
*/
@@ -527,6 +552,9 @@ zio_handle_io_delay(zio_t *zio)
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue;
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index 9aeba33d30..b16fc9bf5f 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -731,6 +731,7 @@ ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
hook_hint_t hint;
boolean_t out;
int ret = 0;
+
const boolean_t gz = ifs->ifs_gz_controlled;
/* We currently only care about viona hooks notifications */
@@ -2438,42 +2439,6 @@ int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
return ipf_hook6(info, 1, FI_NOCKSUM, arg);
}
-/* ------------------------------------------------------------------------ */
-/* Function: ipf_hookvndl3_in */
-/* Returns: int - 0 == packet ok, else problem, free packet if not done */
-/* Parameters: event(I) - pointer to event */
-/* info(I) - pointer to hook information for firewalling */
-/* */
-/* The vnd hooks are private hooks to ON. They represents a layer 2 */
-/* datapath generally used to implement virtual machines. The driver sends */
-/* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
-/* them is in the upper 16 bits while the remaining bits are the */
-/* traditional packet hook flags. */
-/* */
-/* They end up calling the appropriate traditional ip hooks. */
-/* ------------------------------------------------------------------------ */
-/*ARGSUSED*/
-int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
-{
- return ipf_hook4_in(token, info, arg);
-}
-
-int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
-{
- return ipf_hook6_in(token, info, arg);
-}
-
-/*ARGSUSED*/
-int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
-{
- return ipf_hook4_out(token, info, arg);
-}
-
-int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
-{
- return ipf_hook6_out(token, info, arg);
-}
-
/* Static constants used by ipf_hook_ether */
static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
@@ -2569,6 +2534,42 @@ int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
}
/* ------------------------------------------------------------------------ */
+/* Function: ipf_hookvndl3_in */
+/* Returns: int - 0 == packet ok, else problem, free packet if not done */
+/* Parameters: event(I) - pointer to event */
+/* info(I) - pointer to hook information for firewalling */
+/* */
+/* The vnd hooks are private hooks to ON. They represents a layer 2 */
+/* datapath generally used to implement virtual machines. The driver sends */
+/* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
+/* them is in the upper 16 bits while the remaining bits are the */
+/* traditional packet hook flags. */
+/* */
+/* They end up calling the appropriate traditional ip hooks. */
+/* ------------------------------------------------------------------------ */
+/*ARGSUSED*/
+int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_in(token, info, arg);
+}
+
+int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_in(token, info, arg);
+}
+
+/*ARGSUSED*/
+int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_out(token, info, arg);
+}
+
+int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_out(token, info, arg);
+}
+
+/* ------------------------------------------------------------------------ */
/* Function: ipf_hookviona_{in,out} */
/* Returns: int - 0 == packet ok, else problem, free packet if not done */
/* Parameters: event(I) - pointer to event */
diff --git a/usr/src/uts/common/io/audio/impl/audio_grc3.h b/usr/src/uts/common/io/audio/impl/audio_grc3.h
index 0003dc1574..4472307edf 100644
--- a/usr/src/uts/common/io/audio/impl/audio_grc3.h
+++ b/usr/src/uts/common/io/audio/impl/audio_grc3.h
@@ -53,7 +53,7 @@ typedef struct grc3state {
int32_t *historyptr;
int32_t dummy_pad1;
- int32_t history[GRC3_MAXHISTORY * 2];
+ int32_t history[GRC3_MAXHISTORY * 2 + 1];
uint32_t outsz;
} grc3state_t;
diff --git a/usr/src/uts/common/io/bge/bge_main2.c b/usr/src/uts/common/io/bge/bge_main2.c
index ab511c068d..81b6528c7c 100644
--- a/usr/src/uts/common/io/bge/bge_main2.c
+++ b/usr/src/uts/common/io/bge/bge_main2.c
@@ -1437,8 +1437,49 @@ bge_unicst_find(bge_t *bgep, const uint8_t *mac_addr)
}
/*
- * Programs the classifier to start steering packets matching 'mac_addr' to the
- * specified ring 'arg'.
+ * The job of bge_addmac() is to set up everything in hardware for the mac
+ * address indicated to map to the specified group.
+ *
+ * For this to make sense, we need to first understand how most of the bge chips
+ * work. A given packet reaches a ring in two distinct logical steps:
+ *
+ * 1) The device must accept the packet.
+ * 2) The device must steer an accepted packet to a specific ring.
+ *
+ * For step 1, the device has four global MAC address filtering registers. We
+ * must either add the address here or put the device in promiscuous mode.
+ * Because there are only four of these and up to four groups, each group is
+ * only allowed to program a single entry. Note, this is not explicitly done in
+ * the driver. Rather, it is implicitly done by how we implement step 2. These
+ * registers start at 0x410 and are referred to as the 'EMAC MAC Addresses' in
+ * the manuals.
+ *
+ * For step 2, the device has eight sets of rule registers that are used to
+ * control how a packet in step 1 is mapped to a specific ring. Each set is
+ * comprised of a control register and a mask register. These start at 0x480 and
+ * are referred to as the 'Receive Rules Control Registers' and 'Receive Rules
+ * Value/Mask Registers'. These can be used to check for a 16-bit or 32-bit
+ * value at an offset in the packet. In addition, two sets can be combined to
+ * create a single conditional rule.
+ *
+ * For our purposes, we need to use this mechanism to steer a mac address to a
+ * specific ring. This requires that we use two of the sets of registers per MAC
+ * address that comes in here. The data about this is stored in 'mac_addr_rule'
+ * member of the 'recv_ring_t'.
+ *
+ * A reasonable question to ask is why are we storing this on the ring, when it
+ * relates to the group. The answer is that the current implementation of the
+ * driver assumes that each group is comprised of a single ring. While some
+ * parts may support additional rings, the driver doesn't take advantage of
+ * that.
+ *
+ * A result of all this is that the driver will support up to 4 groups today.
+ * Each group has a single ring. We want to make sure that each group can have a
+ * single MAC address programmed into it. This results in the check for a rule
+ * being assigned in the 'mac_addr_rule' member of the recv_ring_t below. If a
+ * future part were to support more global MAC address filters in part 1 and
+ * more rule registers needed for part 2, then we could relax this constraint
+ * and allow a group to have more than one MAC address assigned to it.
*/
static int
bge_addmac(void *arg, const uint8_t * mac_addr)
@@ -1461,7 +1502,10 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
}
/*
- * First add the unicast address to a available slot.
+ * The driver only supports a MAC address being programmed to be
+ * received by one ring in step 2. We check the global table of MAC
+ * addresses to see if this address has already been claimed by another
+ * group as a way to determine that.
*/
slot = bge_unicst_find(bgep, mac_addr);
if (slot != -1) {
@@ -1469,6 +1513,17 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
return (EEXIST);
}
+ /*
+ * Check to see if this group has already used its hardware resources
+ * for step 2. If so, we have to return ENOSPC to MAC to indicate that
+ * this group cannot handle an additional MAC address and that MAC will
+ * need to use software classification on the default group.
+ */
+ if (rrp->mac_addr_rule != NULL) {
+ mutex_exit(bgep->genlock);
+ return (ENOSPC);
+ }
+
for (slot = 0; slot < bgep->unicst_addr_total; slot++) {
if (!bgep->curr_addr[slot].set) {
bgep->curr_addr[slot].set = B_TRUE;
@@ -1483,12 +1538,6 @@ bge_addmac(void *arg, const uint8_t * mac_addr)
if ((err = bge_unicst_set(bgep, mac_addr, slot)) != 0)
goto fail;
- /* A rule is already here. Deny this. */
- if (rrp->mac_addr_rule != NULL) {
- err = ether_cmp(mac_addr, rrp->mac_addr_val) ? EEXIST : EBUSY;
- goto fail;
- }
-
/*
* Allocate a bge_rule_info_t to keep track of which rule slots
* are being used.
diff --git a/usr/src/uts/common/io/bnx/bnx.h b/usr/src/uts/common/io/bnx/bnx.h
index e1d53fa9d7..9ef282678e 100644
--- a/usr/src/uts/common/io/bnx/bnx.h
+++ b/usr/src/uts/common/io/bnx/bnx.h
@@ -55,12 +55,6 @@ extern "C" {
-/*
- */
-#pragma weak hcksum_retrieve
-#pragma weak hcksum_assoc
-
-
#include "listq.h"
#include "lm5706.h"
#include "54xx_reg.h"
diff --git a/usr/src/uts/common/io/bnx/bnxsnd.c b/usr/src/uts/common/io/bnx/bnxsnd.c
index 16f1b03c10..f6e154c056 100644
--- a/usr/src/uts/common/io/bnx/bnxsnd.c
+++ b/usr/src/uts/common/io/bnx/bnxsnd.c
@@ -611,7 +611,7 @@ bnx_xmit_ring_xmit_mblk(um_device_t * const umdevice,
umpacket->frag_list.cnt = 0;
umpacket->mp = mp;
- hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
+ mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
bnx_xmit_pkt_cpy(umdevice, umpacket);
diff --git a/usr/src/uts/common/io/chxge/pe.c b/usr/src/uts/common/io/chxge/pe.c
index 652edba984..48a796470a 100644
--- a/usr/src/uts/common/io/chxge/pe.c
+++ b/usr/src/uts/common/io/chxge/pe.c
@@ -414,12 +414,12 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
lseg = ch_bind_dvma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[nseg], mseg - nseg);
- if (lseg == NULL) {
+ if (lseg == 0) {
sa->sge->intr_cnt.tx_no_dvma1++;
if ((lseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[nseg],
- mseg - nseg)) == NULL) {
+ mseg - nseg)) == 0) {
sa->sge->intr_cnt.tx_no_dma1++;
/*
@@ -444,7 +444,7 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
lseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr, &hmp[nseg],
mseg - nseg);
- if (lseg == NULL) {
+ if (lseg == 0) {
sa->sge->intr_cnt.tx_no_dma1++;
/*
@@ -512,12 +512,12 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
nseg = ch_bind_dvma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dvma2++;
nseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr,
&hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dma2++;
/*
@@ -530,7 +530,7 @@ pe_start(ch_t *sa, mblk_t *mp, uint32_t flg)
} else {
nseg = ch_bind_dma_handle(sa, len,
(void *)mp->b_rptr, &hmp[0], 16);
- if (nseg == NULL) {
+ if (nseg == 0) {
sa->sge->intr_cnt.tx_no_dma2++;
/*
diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c
index 6275948465..f4ae9f3ed5 100644
--- a/usr/src/uts/common/io/cmlb.c
+++ b/usr/src/uts/common/io/cmlb.c
@@ -1514,7 +1514,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
if (cl->cl_alter_behavior & CMLB_CREATE_P0_MINOR_NODE) {
if (cmlb_create_minor(CMLB_DEVINFO(cl), "q", S_IFBLK,
(instance << CMLBUNIT_FORCE_P0_SHIFT) | P0_RAW_DISK,
- cl->cl_node_type, NULL, internal) == DDI_FAILURE) {
+ cl->cl_node_type, 0, internal) == DDI_FAILURE) {
ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL);
return (ENXIO);
}
@@ -1522,7 +1522,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
if (cmlb_create_minor(CMLB_DEVINFO(cl), "q,raw",
S_IFCHR,
(instance << CMLBUNIT_FORCE_P0_SHIFT) | P0_RAW_DISK,
- cl->cl_node_type, NULL, internal) == DDI_FAILURE) {
+ cl->cl_node_type, 0, internal) == DDI_FAILURE) {
ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL);
return (ENXIO);
}
diff --git a/usr/src/uts/common/io/cxgbe/common/common.h b/usr/src/uts/common/io/cxgbe/common/common.h
index c7de2c4ebf..b8d77ebda3 100644
--- a/usr/src/uts/common/io/cxgbe/common/common.h
+++ b/usr/src/uts/common/io/cxgbe/common/common.h
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef __CHELSIO_COMMON_H
#define __CHELSIO_COMMON_H
@@ -103,9 +107,16 @@ enum {
typedef unsigned char cc_pause_t;
enum {
- FEC_AUTO = 1 << 0, /* IEEE 802.3 "automatic" */
- FEC_RS = 1 << 1, /* Reed-Solomon */
- FEC_BASER_RS = 1 << 2, /* BaseR/Reed-Solomon */
+ FEC_RS = 1 << 0, /* Reed-Solomon */
+ FEC_BASER_RS = 1 << 1, /* Base-R, aka Firecode */
+ FEC_NONE = 1 << 2, /* no FEC */
+
+ /*
+ * Pseudo FECs that translate to real FECs. The firmware knows nothing
+ * about these and they start at M_FW_PORT_CAP32_FEC + 1. AUTO should
+ * be set all by itself.
+ */
+ FEC_AUTO = 1 << 5,
};
typedef unsigned char cc_fec_t;
diff --git a/usr/src/uts/common/io/cxgbe/common/t4_hw.c b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
index ae88f36f15..4bb48f1b3a 100644
--- a/usr/src/uts/common/io/cxgbe/common/t4_hw.c
+++ b/usr/src/uts/common/io/cxgbe/common/t4_hw.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include "common.h"
#include "t4_regs.h"
#include "t4_regs_values.h"
@@ -4645,20 +4649,57 @@ static inline cc_fec_t fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
cc_fec |= FEC_BASER_RS;
- return cc_fec;
+ if (cc_fec == 0)
+ cc_fec = FEC_NONE;
+
+ return (cc_fec);
}
/* Translate Common Code Forward Error Correction specification to Firmware */
-static inline fw_port_cap32_t cc_to_fwcap_fec(cc_fec_t cc_fec)
+static inline boolean_t
+cc_to_fwcap_fec(fw_port_cap32_t *fw_fecp, cc_fec_t cc_fec,
+ struct link_config *lc)
{
fw_port_cap32_t fw_fec = 0;
- if (cc_fec & FEC_RS)
+ if ((cc_fec & FEC_AUTO) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
+ fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+
+ fw_fec |= FW_PORT_CAP32_FEC_RS;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if ((cc_fec & FEC_RS) != 0)
fw_fec |= FW_PORT_CAP32_FEC_RS;
- if (cc_fec & FEC_BASER_RS)
+
+ if ((cc_fec & FEC_BASER_RS) != 0 &&
+ (lc->pcaps & FW_PORT_CAP32_SPEED_100G) == 0)
fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
- return fw_fec;
+ if ((cc_fec & FEC_NONE) != 0) {
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0) {
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+ fw_fec |= FW_PORT_CAP32_FEC_NO_FEC;
+ }
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
+ }
+
+ if (fw_fec == 0)
+ return (B_FALSE);
+
+ if ((lc->pcaps & FW_PORT_CAP32_FORCE_FEC) != 0)
+ fw_fec |= FW_PORT_CAP32_FORCE_FEC;
+
+ *fw_fecp = fw_fec;
+ return (B_TRUE);
}
/**
@@ -4692,11 +4733,18 @@ fw_port_cap32_t t4_link_acaps(struct adapter *adapter, unsigned int port,
* the Transceiver Module EPROM FEC parameters. Otherwise we
* use whatever is in the current Requested FEC settings.
*/
- if (lc->requested_fec & FEC_AUTO)
- cc_fec = fwcap_to_cc_fec(lc->def_acaps);
- else
- cc_fec = lc->requested_fec;
- fw_fec = cc_to_fwcap_fec(cc_fec);
+ if (fec_supported(lc->pcaps)) {
+ if (lc->requested_fec & FEC_AUTO)
+ cc_fec = fwcap_to_cc_fec(lc->def_acaps);
+ else
+ cc_fec = lc->requested_fec;
+
+ if (!cc_to_fwcap_fec(&fw_fec, cc_fec, lc))
+ return (0);
+ } else {
+ fw_fec = 0;
+ cc_fec = FEC_NONE;
+ }
/* Figure out what our Requested Port Capabilities are going to be.
* Note parallel structure in t4_handle_get_port_info() and
@@ -9641,12 +9689,17 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
lc->speed = 0;
lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
- /*
- * For Forward Error Control, we default to whatever the Firmware
- * tells us the Link is currently advertising.
- */
- lc->requested_fec = FEC_AUTO;
- lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ if (fec_supported(pcaps)) {
+ /*
+ * For Forward Error Control, we default to whatever the Firmware
+ * tells us the Link is currently advertising.
+ */
+ lc->requested_fec = FEC_AUTO;
+ lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ } else {
+ lc->requested_fec = FEC_NONE;
+ lc->fec = FEC_NONE;
+ }
/* If the Port is capable of Auto-Negtotiation, initialize it as
* "enabled" and copy over all of the Physical Port Capabilities
diff --git a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
index d705c73891..b998e85bae 100644
--- a/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
+++ b/usr/src/uts/common/io/cxgbe/firmware/t4fw_interface.h
@@ -11,6 +11,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#ifndef _T4FW_INTERFACE_H_
#define _T4FW_INTERFACE_H_
@@ -7204,11 +7208,12 @@ enum fw_port_mdi {
#define FW_PORT_CAP32_MDISTRAIGHT 0x00400000UL
#define FW_PORT_CAP32_FEC_RS 0x00800000UL
#define FW_PORT_CAP32_FEC_BASER_RS 0x01000000UL
-#define FW_PORT_CAP32_FEC_RESERVED1 0x02000000UL
+#define FW_PORT_CAP32_FEC_NO_FEC 0x02000000UL
#define FW_PORT_CAP32_FEC_RESERVED2 0x04000000UL
#define FW_PORT_CAP32_FEC_RESERVED3 0x08000000UL
#define FW_PORT_CAP32_FORCE_PAUSE 0x10000000UL
-#define FW_PORT_CAP32_RESERVED2 0xe0000000UL
+#define FW_PORT_CAP32_FORCE_FEC 0x20000000UL
+#define FW_PORT_CAP32_RESERVED2 0xc0000000UL
#define S_FW_PORT_CAP32_SPEED 0
#define M_FW_PORT_CAP32_SPEED 0xfff
@@ -7254,7 +7259,7 @@ enum fw_port_mdi32 {
(((x) >> S_FW_PORT_CAP32_MDI) & M_FW_PORT_CAP32_MDI)
#define S_FW_PORT_CAP32_FEC 23
-#define M_FW_PORT_CAP32_FEC 0x1f
+#define M_FW_PORT_CAP32_FEC 0x5f
#define V_FW_PORT_CAP32_FEC(x) ((x) << S_FW_PORT_CAP32_FEC)
#define G_FW_PORT_CAP32_FEC(x) \
(((x) >> S_FW_PORT_CAP32_FEC) & M_FW_PORT_CAP32_FEC)
@@ -7269,6 +7274,15 @@ enum fw_port_mdi32 {
#define CAP32_FC(__cap32) \
(V_FW_PORT_CAP32_FC(M_FW_PORT_CAP32_FC) & __cap32)
+#ifdef _KERNEL
+static inline boolean_t
+fec_supported(uint32_t caps)
+{
+ return ((caps & (FW_PORT_CAP32_SPEED_25G | FW_PORT_CAP32_SPEED_50G |
+ FW_PORT_CAP32_SPEED_100G)) != 0);
+}
+#endif
+
enum fw_port_action {
FW_PORT_ACTION_L1_CFG = 0x0001,
FW_PORT_ACTION_L2_CFG = 0x0002,
diff --git a/usr/src/uts/common/io/cxgbe/shared/shared.c b/usr/src/uts/common/io/cxgbe/shared/shared.c
index 07dd78f189..e86272134a 100644
--- a/usr/src/uts/common/io/cxgbe/shared/shared.c
+++ b/usr/src/uts/common/io/cxgbe/shared/shared.c
@@ -32,17 +32,19 @@
static int rxbuf_ctor(void *, void *, int);
static void rxbuf_dtor(void *, void *);
-void
+int
cxgb_printf(dev_info_t *dip, int level, char *f, ...)
{
va_list list;
char fmt[128];
+ int rv;
- (void) snprintf(fmt, sizeof (fmt), "%s%d: %s", ddi_driver_name(dip),
+ rv = snprintf(fmt, sizeof (fmt), "%s%d: %s", ddi_driver_name(dip),
ddi_get_instance(dip), f);
va_start(list, f);
vcmn_err(level, fmt, list);
va_end(list);
+ return (rv);
}
kmem_cache_t *
diff --git a/usr/src/uts/common/io/cxgbe/shared/shared.h b/usr/src/uts/common/io/cxgbe/shared/shared.h
index 5838416838..d3171c224b 100644
--- a/usr/src/uts/common/io/cxgbe/shared/shared.h
+++ b/usr/src/uts/common/io/cxgbe/shared/shared.h
@@ -66,7 +66,7 @@ struct rxbuf_cache_params {
size_t buf_size;
};
-void cxgb_printf(dev_info_t *dip, int level, char *f, ...);
+int cxgb_printf(dev_info_t *dip, int level, char *f, ...);
kmem_cache_t *rxbuf_cache_create(struct rxbuf_cache_params *p);
void rxbuf_cache_destroy(kmem_cache_t *cache);
struct rxbuf *rxbuf_alloc(kmem_cache_t *cache, int kmflags, uint_t ref_cnt);
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h b/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
index cb21451e5c..e86de21085 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
+++ b/usr/src/uts/common/io/cxgbe/t4nex/cudbg.h
@@ -318,7 +318,7 @@ static struct el ATTRIBUTE_UNUSED entity_list[] = {
};
#ifdef _KERNEL
-typedef int (*cudbg_print_cb) (dev_info_t *dip, ...);
+typedef int (*cudbg_print_cb) (dev_info_t *dip, int, char *, ...);
#else
typedef int (*cudbg_print_cb) (char *, ...);
#endif
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
index ee28c8a2ba..85d79e6201 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_ioctl.c
@@ -1706,7 +1706,7 @@ get_cudbg(struct adapter *sc, void *data, int flags)
cudbg = cudbg_get_init(handle);
cudbg->adap = sc;
- cudbg->print = (cudbg_print_cb)(uintptr_t)cxgb_printf;
+ cudbg->print = cxgb_printf;
memcpy(cudbg->dbg_bitmap, dump.bitmap, sizeof(cudbg->dbg_bitmap));
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
index 59c0ddde8d..9b4ffd8325 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_mac.c
@@ -20,6 +20,10 @@
* release for licensing terms and conditions.
*/
+/*
+ * Copyright 2020 RackTop Systems, Inc.
+ */
+
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/dlpi.h>
@@ -930,6 +934,62 @@ t4_mc_getcapab(void *arg, mac_capab_t cap, void *data)
return (status);
}
+static link_fec_t
+fec_to_link_fec(cc_fec_t cc_fec)
+{
+ link_fec_t link_fec = 0;
+
+ if ((cc_fec & (FEC_RS | FEC_BASER_RS)) == (FEC_RS | FEC_BASER_RS))
+ return (LINK_FEC_AUTO);
+
+ if ((cc_fec & FEC_NONE) != 0)
+ link_fec |= LINK_FEC_NONE;
+
+ if ((cc_fec & FEC_AUTO) != 0)
+ link_fec |= LINK_FEC_AUTO;
+
+ if ((cc_fec & FEC_RS) != 0)
+ link_fec |= LINK_FEC_RS;
+
+ if ((cc_fec & FEC_BASER_RS) != 0)
+ link_fec |= LINK_FEC_BASE_R;
+
+ return (link_fec);
+}
+
+static int
+link_fec_to_fec(int v)
+{
+ int fec = 0;
+
+ if ((v & LINK_FEC_AUTO) != 0) {
+ fec = FEC_AUTO;
+ v &= ~LINK_FEC_AUTO;
+ } else {
+ if ((v & LINK_FEC_NONE) != 0) {
+ fec = FEC_NONE;
+ v &= ~LINK_FEC_NONE;
+ }
+
+ if ((v & LINK_FEC_RS) != 0) {
+ fec |= FEC_RS;
+ v &= ~LINK_FEC_RS;
+ }
+
+ if ((v & LINK_FEC_BASE_R) != 0) {
+ fec |= FEC_BASER_RS;
+ v &= ~LINK_FEC_BASE_R;
+ }
+ }
+
+ if (v != 0)
+ return (-1);
+
+ ASSERT3S(fec, !=, 0);
+
+ return (fec);
+}
+
/* ARGSUSED */
static int
t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
@@ -941,7 +1001,9 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
uint8_t v8 = *(uint8_t *)val;
uint32_t v32 = *(uint32_t *)val;
int old, new = 0, relink = 0, rx_mode = 0, rc = 0;
+ boolean_t down_link = B_TRUE;
link_flowctrl_t fc;
+ link_fec_t fec;
/*
* Save a copy of link_config. This can be used to restore link_config
@@ -1009,6 +1071,30 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
}
break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps)) {
+ rc = ENOTSUP;
+ break;
+ }
+
+ fec = *(link_fec_t *)val;
+ new = link_fec_to_fec(fec);
+ if (new < 0) {
+ rc = EINVAL;
+ } else if (new != lc->requested_fec) {
+ lc->requested_fec = new;
+ relink = 1;
+ /*
+ * For fec, do not preemptively force the link
+ * down. If changing fec causes the link state
+ * to transition, then appropriate asynchronous
+ * events are generated which correctly reflect
+ * the link state.
+ */
+ down_link = B_FALSE;
+ }
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG && is_10G_port(pi)) {
old = lc->acaps & FW_PORT_CAP32_SPEED_10G;
@@ -1062,7 +1148,8 @@ t4_mc_setprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
if (isset(&sc->open_device_map, pi->port_id) != 0) {
if (relink != 0) {
- t4_os_link_changed(pi->adapter, pi->port_id, 0);
+ if (down_link)
+ t4_os_link_changed(pi->adapter, pi->port_id, 0);
rc = begin_synchronized_op(pi, 1, 1);
if (rc != 0)
return (rc);
@@ -1143,6 +1230,20 @@ t4_mc_getprop(void *arg, const char *name, mac_prop_id_t id, uint_t size,
*(link_flowctrl_t *)val = LINK_FLOWCTRL_NONE;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->fec);
+ break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ if (!fec_supported(lc->pcaps))
+ return (ENOTSUP);
+
+ *(link_fec_t *)val = fec_to_link_fec(lc->requested_fec);
+ break;
+
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
*u = !!(lc->acaps & FW_PORT_CAP32_SPEED_100G);
@@ -1212,6 +1313,15 @@ t4_mc_propinfo(void *arg, const char *name, mac_prop_id_t id,
mac_prop_info_set_default_link_flowctrl(ph, LINK_FLOWCTRL_BI);
break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(ph, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(ph, LINK_FEC_AUTO);
+ break;
+
case MAC_PROP_EN_10GFDX_CAP:
if (lc->pcaps & FW_PORT_CAP32_ANEG &&
lc->pcaps & FW_PORT_CAP32_SPEED_10G)
diff --git a/usr/src/uts/common/io/e1000g/e1000g_alloc.c b/usr/src/uts/common/io/e1000g/e1000g_alloc.c
index c7496cd164..8a460fd45a 100644
--- a/usr/src/uts/common/io/e1000g/e1000g_alloc.c
+++ b/usr/src/uts/common/io/e1000g/e1000g_alloc.c
@@ -830,7 +830,7 @@ e1000g_free_dvma_buffer(dma_buffer_t *buf)
return;
}
- buf->dma_address = NULL;
+ buf->dma_address = 0;
if (buf->address != NULL) {
kmem_free(buf->address, buf->size);
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index d698862d81..4ce359f87b 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -3341,6 +3342,10 @@ mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range)
case MAC_PROP_FLOWCTRL:
minsize = sizeof (link_flowctrl_t);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP:
+ minsize = sizeof (link_fec_t);
+ break;
case MAC_PROP_ADV_5000FDX_CAP:
case MAC_PROP_EN_5000FDX_CAP:
case MAC_PROP_ADV_2500FDX_CAP:
@@ -3529,6 +3534,28 @@ mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val,
break;
}
+ case MAC_PROP_ADV_FEC_CAP:
+ case MAC_PROP_EN_FEC_CAP: {
+ link_fec_t fec;
+
+ ASSERT(valsize >= sizeof (link_fec_t));
+
+ /*
+ * fec cannot be zero, and auto must be set exclusively.
+ */
+ bcopy(val, &fec, sizeof (link_fec_t));
+ if (fec == 0)
+ return (EINVAL);
+ if ((fec & LINK_FEC_AUTO) != 0 && (fec & ~LINK_FEC_AUTO) != 0)
+ return (EINVAL);
+
+ if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
+ err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
+ name, id, valsize, val);
+ }
+ break;
+ }
+
default:
/* For other driver properties, call driver's callback */
if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
@@ -4741,7 +4768,7 @@ mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp)
* The bridge may place this mblk on a provider's Tx
* path, a mac's Rx path, or both. Since we don't have
* enough information at this point, we can't be sure
- * that the desination(s) are capable of handling the
+ * that the destination(s) are capable of handling the
* hardware offloads requested by the mblk. We emulate
* them here as it is the safest choice. In the
* future, if bridge performance becomes a priority,
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index dcfb4803d6..b166e7987a 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -4243,7 +4243,7 @@ mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
is_mcast) {
mac_promisc_dispatch_one(mpip, mp, is_sender,
- local);
+ local);
}
}
}
@@ -4274,7 +4274,7 @@ mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
!is_mcast) {
mac_promisc_dispatch_one(mpip, mp, B_FALSE,
- B_FALSE);
+ B_FALSE);
}
}
}
@@ -4352,12 +4352,27 @@ i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
{
mac_impl_t *mip = (mac_impl_t *)mh;
- if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY)
+ if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY) {
return (B_TRUE);
- else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
- return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
- else
+ } else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB) {
+ boolean_t res;
+
+ res = mip->mi_getcapab(mip->mi_driver, cap, cap_data);
+ /*
+ * Until we have suppport for TSOv6 emulation in the MAC
+ * loopback path, do not allow the TSOv6 capability to be
+ * advertised to consumers.
+ */
+ if (res && cap == MAC_CAPAB_LSO) {
+ mac_capab_lso_t *cap_lso = cap_data;
+
+ cap_lso->lso_flags &= ~LSO_TX_BASIC_TCP_IPV6;
+ cap_lso->lso_basic_tcp_ipv6.lso_max = 0;
+ }
+ return (res);
+ } else {
return (B_FALSE);
+ }
}
/*
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 7f193f68eb..bcca602589 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -23,6 +23,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#include <sys/types.h>
@@ -1530,6 +1531,22 @@ mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
}
void
+mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
+{
+ mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
+
+ /* nothing to do if the caller doesn't want the default value */
+ if (pr->pr_default == NULL)
+ return;
+
+ ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
+
+ bcopy(&val, pr->pr_default, sizeof (val));
+
+ pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
+}
+
+void
mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
uint32_t max)
{
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c
index 94ec8add16..8f983e50e4 100644
--- a/usr/src/uts/common/io/mac/mac_sched.c
+++ b/usr/src/uts/common/io/mac/mac_sched.c
@@ -4443,9 +4443,9 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain,
mac_hw_emul(&mp, NULL, NULL, MAC_ALL_EMULS);
if (mp != NULL) {
(dst_flow_ent->fe_cb_fn)(
- dst_flow_ent->fe_cb_arg1,
- dst_flow_ent->fe_cb_arg2,
- mp, do_switch);
+ dst_flow_ent->fe_cb_arg1,
+ dst_flow_ent->fe_cb_arg2,
+ mp, do_switch);
}
}
diff --git a/usr/src/uts/common/io/mac/mac_util.c b/usr/src/uts/common/io/mac/mac_util.c
index 6e33fb7f56..03da3a3504 100644
--- a/usr/src/uts/common/io/mac/mac_util.c
+++ b/usr/src/uts/common/io/mac/mac_util.c
@@ -258,7 +258,7 @@ bail:
static boolean_t
mac_sw_cksum_ipv6(mblk_t *mp, uint32_t ip_hdr_offset, const char **err)
{
- ip6_t* ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset);
+ ip6_t *ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset);
const uint8_t proto = ip6h->ip6_nxt;
const uint16_t *iphs = (uint16_t *)ip6h;
/* ULP offset from start of L2. */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index c90fa0969b..2aefac33db 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -273,11 +273,16 @@
* before making a WQE for it.
*
* After a completion event occurs, the packet is either discarded (and the
- * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ * buffer_t returned to the free list), or it is readied for loaning to MAC
+ * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
*
* Once MAC and the rest of the system have finished with the packet, they call
- * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
- * buffer_t to the free list.
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
+ * the fate of the buffer_t is determined by the state of the
+ * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
+ * will be returned to the free list, potentially to be recycled and used
+ * again. But if the shard is draining (E.g. after a ring stop) there will be
+ * no recycling and the buffer_t is immediately destroyed.
*
* At detach/teardown time, buffers are only every destroyed from the free list.
*
@@ -289,18 +294,18 @@
* v
* +----+----+
* | created |
- * +----+----+
- * |
- * |
- * | mlxcx_buf_return
- * |
- * v
- * mlxcx_buf_destroy +----+----+
- * +---------| free |<---------------+
- * | +----+----+ |
+ * +----+----+ +------+
+ * | | dead |
+ * | +------+
+ * | mlxcx_buf_return ^
+ * | |
+ * v | mlxcx_buf_destroy
+ * mlxcx_buf_destroy +----+----+ +-----------+ |
+ * +---------| free |<------no-| draining? |-yes-+
+ * | +----+----+ +-----------+
+ * | | ^
* | | |
- * | | | mlxcx_buf_return
- * v | mlxcx_buf_take |
+ * v | mlxcx_buf_take | mlxcx_buf_return
* +---+--+ v |
* | dead | +---+---+ |
* +------+ | on WQ |- - - - - - - - >O
@@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
mlxcx_buffer_t *buf;
mutex_enter(&s->mlbs_mtx);
+
while (!list_is_empty(&s->mlbs_busy))
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
- while ((buf = list_head(&s->mlbs_free)) != NULL) {
+
+ while (!list_is_empty(&s->mlbs_loaned))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+
+ while ((buf = list_head(&s->mlbs_free)) != NULL)
mlxcx_buf_destroy(mlxp, buf);
- }
+
list_destroy(&s->mlbs_free);
list_destroy(&s->mlbs_busy);
+ list_destroy(&s->mlbs_loaned);
mutex_exit(&s->mlbs_mtx);
cv_destroy(&s->mlbs_free_nonempty);
@@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp)
offsetof(mlxcx_buffer_t, mlb_entry));
list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
list_insert_tail(&mlxp->mlx_buf_shards, s);
@@ -1743,6 +1756,11 @@ mlxcx_setup_ports(mlxcx_t *mlxp)
mutex_exit(&p->mlp_mtx);
goto err;
}
+ if (!mlxcx_cmd_query_port_fec(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_fec_requested = LINK_FEC_AUTO;
mutex_exit(&p->mlp_mtx);
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index da048b4ac3..06277d033c 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -346,6 +346,8 @@ typedef struct mlxcx_port {
mlxcx_eth_proto_t mlp_max_proto;
mlxcx_eth_proto_t mlp_admin_proto;
mlxcx_eth_proto_t mlp_oper_proto;
+ mlxcx_pplm_fec_active_t mlp_fec_active;
+ link_fec_t mlp_fec_requested;
mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
@@ -424,11 +426,18 @@ typedef enum {
MLXCX_BUFFER_ON_CHAIN,
} mlxcx_buffer_state_t;
+typedef enum {
+ MLXCX_SHARD_READY,
+ MLXCX_SHARD_DRAINING,
+} mlxcx_shard_state_t;
+
typedef struct mlxcx_buf_shard {
+ mlxcx_shard_state_t mlbs_state;
list_node_t mlbs_entry;
kmutex_t mlbs_mtx;
list_t mlbs_busy;
list_t mlbs_free;
+ list_t mlbs_loaned;
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;
@@ -1171,6 +1180,8 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
+extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
mblk_t *, size_t, mlxcx_buffer_t **);
@@ -1311,7 +1322,12 @@ extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
mlxcx_register_id_t, mlxcx_register_data_t *);
extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_port_status_t);
extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_pplm_fec_caps_t);
extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
index 30fb7ca8ef..f059b856a6 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -12,6 +12,7 @@
/*
* Copyright 2020, The University of Queensland
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -1594,6 +1595,8 @@ mlxcx_reg_name(mlxcx_register_id_t rid)
return ("MCIA");
case MLXCX_REG_PPCNT:
return ("PPCNT");
+ case MLXCX_REG_PPLM:
+ return ("PPLM");
default:
return ("???");
}
@@ -1640,6 +1643,9 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
case MLXCX_REG_PPCNT:
dsize = sizeof (mlxcx_reg_ppcnt_t);
break;
+ case MLXCX_REG_PPLM:
+ dsize = sizeof (mlxcx_reg_pplm_t);
+ break;
default:
dsize = 0;
VERIFY(0);
@@ -1776,6 +1782,25 @@ mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_modify_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_port_status_t status)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1;
+ data.mlrd_paos.mlrd_paos_admin_status = status;
+ set_bit32(&data.mlrd_paos.mlrd_paos_flags, MLXCX_PAOS_ADMIN_ST_EN);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PAOS, &data);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
{
mlxcx_register_data_t data;
@@ -1809,6 +1834,82 @@ mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
}
boolean_t
+mlxcx_cmd_query_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pplm.mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data);
+
+ if (ret) {
+ mlp->mlp_fec_active =
+ from_be24(data.mlrd_pplm.mlrd_pplm_fec_mode_active);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_modify_port_fec(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_pplm_fec_caps_t fec)
+{
+ mlxcx_register_data_t data_in, data_out;
+ mlxcx_pplm_fec_caps_t caps;
+ mlxcx_reg_pplm_t *pplm_in, *pplm_out;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data_in, sizeof (data_in));
+ pplm_in = &data_in.mlrd_pplm;
+ pplm_in->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPLM, &data_in);
+
+ if (!ret)
+ return (B_FALSE);
+
+ bzero(&data_out, sizeof (data_out));
+ pplm_out = &data_out.mlrd_pplm;
+ pplm_out->mlrd_pplm_local_port = mlp->mlp_num + 1;
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_56G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_56G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_100G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_100G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_50G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_50G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_25G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_25G, fec & caps);
+
+ caps = get_bits32(pplm_in->mlrd_pplm_fec_override_cap,
+ MLXCX_PPLM_CAP_10_40G);
+ set_bits32(&pplm_out->mlrd_pplm_fec_override_admin,
+ MLXCX_PPLM_CAP_10_40G, fec & caps);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PPLM, &data_out);
+
+ return (ret);
+}
+
+boolean_t
mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp,
mlxcx_modify_nic_vport_ctx_fields_t fields)
{
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a08cec3980..2521641a00 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -80,6 +80,53 @@ mlxcx_speed_to_bits(mlxcx_eth_proto_t v)
}
}
+static link_fec_t
+mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)
+{
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0)
+ return (LINK_FEC_NONE);
+
+ if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0)
+ return (LINK_FEC_BASE_R);
+
+ if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 |
+ MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 |
+ MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0)
+ return (LINK_FEC_RS);
+
+ return (LINK_FEC_NONE);
+}
+
+static boolean_t
+mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
+{
+ mlxcx_pplm_fec_caps_t pplm_fec = 0;
+
+ if ((fec & LINK_FEC_AUTO) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO;
+ fec &= ~LINK_FEC_AUTO;
+ } else if ((fec & LINK_FEC_NONE) != 0) {
+ pplm_fec = MLXCX_PPLM_FEC_CAP_NONE;
+ fec &= ~LINK_FEC_NONE;
+ } else if ((fec & LINK_FEC_RS) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_RS;
+ fec &= ~LINK_FEC_RS;
+ } else if ((fec & LINK_FEC_BASE_R) != 0) {
+ pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE;
+ fec &= ~LINK_FEC_BASE_R;
+ }
+
+ /*
+ * Only one fec option is allowed.
+ */
+ if (fec != 0)
+ return (B_FALSE);
+
+ *pfecp = pplm_fec;
+
+ return (B_TRUE);
+}
+
static int
mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
uint64_t *val)
@@ -451,7 +498,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
return (NULL);
}
- if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
+ MLXCX_WQ_STARTED) {
mutex_exit(&sq->mlwq_mtx);
mlxcx_buf_return_chain(mlxp, b, B_FALSE);
return (NULL);
@@ -725,8 +773,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mlxcx_buf_shard_t *s;
mlxcx_buffer_t *buf;
+ /*
+ * To prevent deadlocks and sleeping whilst holding either the
+ * CQ mutex or WQ mutex, we split the stop processing into two
+ * parts.
+ *
+ * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
+ * The Q in the HCA is set to Reset state and flagged as no
+ * longer started. Atomic with changing this WQ state, the buffer
+ * shards are flagged as draining.
+ *
+ * Now, any requests for buffers and attempts to submit messages
+ * will fail and once we're in this state it is safe to relinquish
+ * the CQ and WQ mutexes. Allowing us to complete the ring stop
+ * by waiting for the buffer lists, with the exception of
+ * the loaned list, to drain. Buffers on the loaned list are
+ * not under our control, we will get them back when the mblk tied
+ * to the buffer is freed.
+ */
+
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
+
if (wq->mlwq_state & MLXCX_WQ_STARTED) {
if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
!mlxcx_cmd_stop_rq(mlxp, wq)) {
@@ -743,7 +811,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
}
ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+ mlxcx_shard_draining(wq->mlwq_bufs);
+ if (wq->mlwq_foreign_bufs != NULL)
+ mlxcx_shard_draining(wq->mlwq_foreign_bufs);
+
+
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
/* Return any outstanding buffers to the free pool. */
while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
@@ -775,12 +851,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mutex_exit(&s->mlbs_mtx);
}
+ mutex_enter(&wq->mlwq_mtx);
wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ mutex_exit(&wq->mlwq_mtx);
+ } else {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
}
- ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
-
- mutex_exit(&wq->mlwq_mtx);
- mutex_exit(&cq->mlcq_mtx);
}
static int
@@ -1061,6 +1138,14 @@ mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
mac_prop_info_set_default_uint8(prh, 1);
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
+ break;
case MAC_PROP_ADV_100GFDX_CAP:
case MAC_PROP_EN_100GFDX_CAP:
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
@@ -1120,6 +1205,9 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint32_t new_mtu, new_hw_mtu, old_mtu;
mlxcx_buf_shard_t *sh;
boolean_t allocd = B_FALSE;
+ boolean_t relink = B_FALSE;
+ link_fec_t fec;
+ mlxcx_pplm_fec_caps_t cap_fec;
mutex_enter(&port->mlp_mtx);
@@ -1137,7 +1225,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
mutex_enter(&sh->mlbs_mtx);
if (!list_is_empty(&sh->mlbs_free) ||
- !list_is_empty(&sh->mlbs_busy)) {
+ !list_is_empty(&sh->mlbs_busy) ||
+ !list_is_empty(&sh->mlbs_loaned)) {
allocd = B_TRUE;
mutex_exit(&sh->mlbs_mtx);
break;
@@ -1167,11 +1256,57 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
break;
}
break;
+
+ case MAC_PROP_EN_FEC_CAP:
+ bcopy(pr_val, &fec, sizeof (fec));
+ if (!mlxcx_link_fec_cap(fec, &cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ /*
+ * Don't change the FEC if it is already at the requested
+ * setting AND the port is up.
+ * When the port is down, always set the FEC and attempt
+ * to retrain the link.
+ */
+ if (fec == port->mlp_fec_requested &&
+ fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) &&
+ port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN)
+ break;
+
+ /*
+ * The most like cause of this failing is an invalid
+ * or unsupported fec option.
+ */
+ if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) {
+ ret = EINVAL;
+ break;
+ }
+
+ port->mlp_fec_requested = fec;
+
+ /*
+ * For FEC to become effective, the link needs to go back
+ * to training and negotiation state. This happens when
+ * the link transitions from down to up, force a relink.
+ */
+ relink = B_TRUE;
+ break;
+
default:
ret = ENOTSUP;
break;
}
+ if (relink) {
+ if (!mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_DOWN) ||
+ !mlxcx_cmd_modify_port_status(mlxp, port,
+ MLXCX_PORT_STATUS_UP)) {
+ ret = EIO;
+ }
+ }
mutex_exit(&port->mlp_mtx);
return (ret);
@@ -1229,6 +1364,21 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
}
*(uint8_t *)pr_val = port->mlp_autoneg;
break;
+ case MAC_PROP_ADV_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val =
+ mlxcx_fec_to_link_fec(port->mlp_fec_active);
+ break;
+ case MAC_PROP_EN_FEC_CAP:
+ if (pr_valsize < sizeof (link_fec_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(link_fec_t *)pr_val = port->mlp_fec_requested;
+ break;
case MAC_PROP_MTU:
if (pr_valsize < sizeof (uint32_t)) {
ret = EOVERFLOW;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
index 4dc4291b08..aed691897b 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -355,6 +355,7 @@ mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
mutex_enter(&port->mlp_mtx);
(void) mlxcx_cmd_query_port_status(mlxp, port);
(void) mlxcx_cmd_query_port_speed(mlxp, port);
+ (void) mlxcx_cmd_query_port_fec(mlxp, port);
switch (port->mlp_oper_status) {
case MLXCX_PORT_STATUS_UP:
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index 6d09abea5c..abd717842d 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -2464,6 +2464,59 @@ typedef struct {
} mlxcx_reg_ppcnt_t;
typedef enum {
+ MLXCX_PPLM_FEC_CAP_AUTO = 0,
+ MLXCX_PPLM_FEC_CAP_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_CAP_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_CAP_RS = (1 << 2),
+} mlxcx_pplm_fec_caps_t;
+
+typedef enum {
+ MLXCX_PPLM_FEC_ACTIVE_NONE = (1 << 0),
+ MLXCX_PPLM_FEC_ACTIVE_FIRECODE = (1 << 1),
+ MLXCX_PPLM_FEC_ACTIVE_RS528 = (1 << 2),
+ MLXCX_PPLM_FEC_ACTIVE_RS271 = (1 << 3),
+ MLXCX_PPLM_FEC_ACTIVE_RS544 = (1 << 7),
+ MLXCX_PPLM_FEC_ACTIVE_RS272 = (1 << 9),
+} mlxcx_pplm_fec_active_t;
+
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_56G (bitdef_t){ 16, 0x000f0000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_100G (bitdef_t){ 12, 0x0000f000 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_50G (bitdef_t){ 8, 0x00000f00 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_25G (bitdef_t){ 4, 0x000000f0 }
+/* CSTYLED */
+#define MLXCX_PPLM_CAP_10_40G (bitdef_t){ 0, 0x0000000f }
+
+typedef struct {
+ uint8_t mlrd_pplm_rsvd;
+ uint8_t mlrd_pplm_local_port;
+ uint8_t mlrd_pplm_rsvd1[11];
+ uint24be_t mlrd_pplm_fec_mode_active;
+ bits32_t mlrd_pplm_fec_override_cap;
+ bits32_t mlrd_pplm_fec_override_admin;
+ uint16be_t mlrd_pplm_fec_override_cap_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_cap_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_cap_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_cap_50g_1x;
+ uint16be_t mlrd_pplm_fec_override_admin_400g_8x;
+ uint16be_t mlrd_pplm_fec_override_admin_200g_4x;
+ uint16be_t mlrd_pplm_fec_override_admin_100g_2x;
+ uint16be_t mlrd_pplm_fec_override_admin_50g_1x;
+ uint8_t mlrd_pplm_rsvd2[8];
+ uint16be_t mlrd_pplm_fec_override_cap_hdr;
+ uint16be_t mlrd_pplm_fec_override_cap_edr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr;
+ uint16be_t mlrd_pplm_fec_override_cap_fdr10;
+ uint16be_t mlrd_pplm_fec_override_admin_hdr;
+ uint16be_t mlrd_pplm_fec_override_admin_edr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr;
+ uint16be_t mlrd_pplm_fec_override_admin_fdr10;
+} mlxcx_reg_pplm_t;
+
+typedef enum {
MLXCX_REG_PMTU = 0x5003,
MLXCX_REG_PTYS = 0x5004,
MLXCX_REG_PAOS = 0x5006,
@@ -2472,6 +2525,7 @@ typedef enum {
MLXCX_REG_MLCR = 0x902B,
MLXCX_REG_MCIA = 0x9014,
MLXCX_REG_PPCNT = 0x5008,
+ MLXCX_REG_PPLM = 0x5023,
} mlxcx_register_id_t;
typedef union {
@@ -2482,6 +2536,7 @@ typedef union {
mlxcx_reg_pmaos_t mlrd_pmaos;
mlxcx_reg_mcia_t mlrd_mcia;
mlxcx_reg_ppcnt_t mlrd_ppcnt;
+ mlxcx_reg_pplm_t mlrd_pplm;
} mlxcx_register_data_t;
typedef enum {
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 492f8fd8a5..da98a5cf40 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -1213,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(rq->mlwq_bufs);
+
for (j = 0; j < rq->mlwq_nents; ++j) {
if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
break;
@@ -1409,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
}
sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(sq->mlwq_bufs);
+ mlxcx_shard_ready(sq->mlwq_foreign_bufs);
+
if (!mlxcx_cmd_start_sq(mlxp, sq)) {
mutex_exit(&sq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
@@ -1799,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg)
mlxcx_completion_queue_t *cq = wq->mlwq_cq;
mlxcx_t *mlxp = wq->mlwq_mlx;
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
- boolean_t refill;
+ boolean_t refill, draining;
do {
/*
- * Wait until there are some free buffers.
+ * Wait here until one of 3 conditions:
+ * 1. The shard is draining, or
+ * 2. There are buffers on the free list, or
+ * 3. The WQ is being shut down.
*/
mutex_enter(&s->mlbs_mtx);
- while (list_is_empty(&s->mlbs_free) &&
- (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0)
+ while (s->mlbs_state != MLXCX_SHARD_DRAINING &&
+ list_is_empty(&s->mlbs_free) &&
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) {
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ }
+
+ draining = (s->mlbs_state == MLXCX_SHARD_DRAINING);
mutex_exit(&s->mlbs_mtx);
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
- if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
+ if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
refill = B_FALSE;
wq->mlwq_state &= ~MLXCX_WQ_REFILLING;
} else {
@@ -1851,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
cq = mlwq->mlwq_cq;
- if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0)
+ return;
+
+ if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0)
return;
current = cq->mlcq_bufcnt;
@@ -1883,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
return;
}
- if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) {
for (i = 0; i < n; ++i)
mlxcx_buf_return(mlxp, b[i]);
return;
@@ -2058,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
wqe_index = buf->mlb_wqe_index;
if (!mlxcx_buf_loan(mlxp, buf)) {
- mlxcx_warn(mlxp, "!loan failed, dropping packet");
mlxcx_buf_return(mlxp, buf);
return (NULL);
}
@@ -2101,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg)
mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
mlxcx_t *mlxp = b->mlb_mlx;
- if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
- b->mlb_mp = NULL;
- return;
- }
- /*
- * The mblk for this buffer_t (in its mlb_mp field) has been used now,
- * so NULL it out.
- */
+ /* The mblk has been used now, so NULL it out. */
b->mlb_mp = NULL;
- mlxcx_buf_return(mlxp, b);
+
+ if (b->mlb_state == MLXCX_BUFFER_ON_LOAN)
+ mlxcx_buf_return(mlxp, b);
}
boolean_t
@@ -2177,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
ASSERT(b->mlb_foreign);
@@ -2345,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2366,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (0);
+ }
+
while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2379,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
boolean_t
mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
ASSERT3P(b->mlb_mlx, ==, mlxp);
@@ -2391,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
b->mlb_state = MLXCX_BUFFER_ON_LOAN;
b->mlb_wqe_index = 0;
+
+ mutex_enter(&s->mlbs_mtx);
+ list_remove(&s->mlbs_busy, b);
+ list_insert_tail(&s->mlbs_loaned, b);
+ mutex_exit(&s->mlbs_mtx);
+
return (B_TRUE);
}
@@ -2453,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
break;
case MLXCX_BUFFER_ON_LOAN:
ASSERT(!b->mlb_foreign);
- list_remove(&s->mlbs_busy, b);
+ list_remove(&s->mlbs_loaned, b);
+ if (s->mlbs_state == MLXCX_SHARD_DRAINING) {
+ /*
+ * When we're draining, Eg during mac_stop(),
+ * we destroy the buffer immediately rather than
+ * recycling it. Otherwise we risk leaving it
+ * on the free list and leaking it.
+ */
+ list_insert_tail(&s->mlbs_free, b);
+ mlxcx_buf_destroy(mlxp, b);
+ /*
+ * Teardown might be waiting for loaned list to empty.
+ */
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+ return;
+ }
break;
case MLXCX_BUFFER_FREE:
VERIFY(0);
@@ -2466,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
}
list_insert_tail(&s->mlbs_free, b);
- cv_signal(&s->mlbs_free_nonempty);
+ cv_broadcast(&s->mlbs_free_nonempty);
mutex_exit(&s->mlbs_mtx);
@@ -2484,9 +2532,11 @@ void
mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
b->mlb_state == MLXCX_BUFFER_INIT);
ASSERT(mutex_owned(&s->mlbs_mtx));
+
if (b->mlb_state == MLXCX_BUFFER_FREE)
list_remove(&s->mlbs_free, b);
@@ -2506,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
kmem_cache_free(mlxp->mlx_bufs_cache, b);
}
+
+void
+mlxcx_shard_ready(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_READY;
+ mutex_exit(&s->mlbs_mtx);
+}
+
+void
+mlxcx_shard_draining(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_DRAINING;
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+}
diff --git a/usr/src/uts/common/io/stream.c b/usr/src/uts/common/io/stream.c
index 55fd87db45..288f77ae47 100644
--- a/usr/src/uts/common/io/stream.c
+++ b/usr/src/uts/common/io/stream.c
@@ -839,7 +839,7 @@ frnop_func(void *arg)
*/
static mblk_t *
gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
- void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
+ void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
{
dblk_t *dbp;
mblk_t *mp;
diff --git a/usr/src/uts/common/klm/nlm_impl.h b/usr/src/uts/common/klm/nlm_impl.h
index 68604309a2..9caae1a8c7 100644
--- a/usr/src/uts/common/klm/nlm_impl.h
+++ b/usr/src/uts/common/klm/nlm_impl.h
@@ -28,7 +28,7 @@
*/
/*
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2016 Joyent, Inc.
*/
@@ -112,7 +112,7 @@ struct _kthread;
* We pass three callback functions to nlm_do_lock:
* nlm_reply_cb: send a normal RPC reply
* nlm_res_cb: do a _res (message style) RPC (call)
- * nlm_testargs_cb: do a "granted" RPC call (after blocking)
+ * nlm_granted_cb: do a "granted" RPC call (after blocking)
* Only one of the 1st or 2nd is used.
* The 3rd is used only for blocking
*
@@ -123,7 +123,7 @@ struct _kthread;
*/
typedef bool_t (*nlm_reply_cb)(SVCXPRT *, nlm4_res *);
typedef enum clnt_stat (*nlm_res_cb)(nlm4_res *, void *, CLIENT *);
-typedef enum clnt_stat (*nlm_testargs_cb)(nlm4_testargs *, void *, CLIENT *);
+typedef enum clnt_stat (*nlm_granted_cb)(nlm4_testargs *, nlm4_res *, CLIENT *);
typedef enum clnt_stat (*nlm_testres_cb)(nlm4_testres *, void *, CLIENT *);
/*
@@ -624,7 +624,7 @@ void nlm_do_notify2(nlm_sm_status *, void *, struct svc_req *);
void nlm_do_test(nlm4_testargs *, nlm4_testres *,
struct svc_req *, nlm_testres_cb);
void nlm_do_lock(nlm4_lockargs *, nlm4_res *, struct svc_req *,
- nlm_reply_cb, nlm_res_cb, nlm_testargs_cb);
+ nlm_reply_cb, nlm_res_cb, nlm_granted_cb);
void nlm_do_cancel(nlm4_cancargs *, nlm4_res *,
struct svc_req *, nlm_res_cb);
void nlm_do_unlock(nlm4_unlockargs *, nlm4_res *,
diff --git a/usr/src/uts/common/klm/nlm_rpc_handle.c b/usr/src/uts/common/klm/nlm_rpc_handle.c
index 9ddf56856c..b022acc380 100644
--- a/usr/src/uts/common/klm/nlm_rpc_handle.c
+++ b/usr/src/uts/common/klm/nlm_rpc_handle.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -130,6 +130,7 @@ update_host_rpcbinding(struct nlm_host *hostp, int vers)
static int
refresh_nlm_rpc(struct nlm_host *hostp, nlm_rpc_t *rpcp)
{
+ uint32_t zero = 0;
int ret;
if (rpcp->nr_handle == NULL) {
@@ -175,6 +176,12 @@ refresh_nlm_rpc(struct nlm_host *hostp, nlm_rpc_t *rpcp)
if (NLM_STALE_CLNT(stat)) {
ret = ESTALE;
}
+ /*
+ * Need to reset the XID after the null call above,
+ * otherwise we'll reuse the XID from that call.
+ */
+ (void) CLNT_CONTROL(rpcp->nr_handle, CLSET_XID,
+ (char *)&zero);
}
}
@@ -209,7 +216,8 @@ again:
rc = cv_wait_sig(&hostp->nh_rpcb_cv, &hostp->nh_lock);
if (rc == 0) {
mutex_exit(&hostp->nh_lock);
- return (EINTR);
+ rc = EINTR;
+ goto errout;
}
}
@@ -229,7 +237,8 @@ again:
*/
if (hostp->nh_rpcb_ustat != RPC_SUCCESS) {
mutex_exit(&hostp->nh_lock);
- return (ENOENT);
+ rc = ENOENT;
+ goto errout;
}
}
@@ -263,7 +272,7 @@ again:
}
destroy_rpch(rpcp);
- return (rc);
+ goto errout;
}
DTRACE_PROBE2(end, struct nlm_host *, hostp,
@@ -271,6 +280,10 @@ again:
*rpcpp = rpcp;
return (0);
+
+errout:
+ NLM_ERR("Can't get RPC client handle for: %s", hostp->nh_name);
+ return (rc);
}
void
diff --git a/usr/src/uts/common/klm/nlm_rpc_svc.c b/usr/src/uts/common/klm/nlm_rpc_svc.c
index 2911b31877..1f04e3f036 100644
--- a/usr/src/uts/common/klm/nlm_rpc_svc.c
+++ b/usr/src/uts/common/klm/nlm_rpc_svc.c
@@ -26,7 +26,7 @@
*/
/*
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -63,7 +63,7 @@
* 32-bit lock ranges.
*/
static void
-nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src)
+nlm_convert_to_nlm_lock(struct nlm_lock *dst, const struct nlm4_lock *src)
{
dst->caller_name = src->caller_name;
dst->fh = src->fh;
@@ -76,12 +76,22 @@ nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src)
}
/*
+ * Up-convert for v1 granted response
+ */
+static void
+nlm_convert_to_nlm4_res(struct nlm4_res *dst, const struct nlm_res *src)
+{
+ dst->cookie = src->cookie;
+ dst->stat.stat = (nlm4_stats) src->stat.stat;
+}
+
+/*
* Up-convert for v1 svc functions with a 32-bit lock range arg.
* Note that lock range checks (like overflow) are done later,
* in nlm_init_flock().
*/
static void
-nlm_convert_to_nlm4_lock(struct nlm4_lock *dst, struct nlm_lock *src)
+nlm_convert_to_nlm4_lock(struct nlm4_lock *dst, const struct nlm_lock *src)
{
dst->caller_name = src->caller_name;
@@ -93,7 +103,7 @@ nlm_convert_to_nlm4_lock(struct nlm4_lock *dst, struct nlm_lock *src)
}
static void
-nlm_convert_to_nlm4_share(struct nlm4_share *dst, struct nlm_share *src)
+nlm_convert_to_nlm4_share(struct nlm4_share *dst, const struct nlm_share *src)
{
dst->caller_name = src->caller_name;
@@ -113,7 +123,7 @@ nlm_convert_to_nlm4_share(struct nlm4_share *dst, struct nlm_share *src)
* valid 32-bit lock range.
*/
static void
-nlm_convert_to_nlm_holder(struct nlm_holder *dst, struct nlm4_holder *src)
+nlm_convert_to_nlm_holder(struct nlm_holder *dst, const struct nlm4_holder *src)
{
dst->exclusive = src->exclusive;
dst->svid = src->svid;
@@ -133,7 +143,7 @@ nlm_convert_to_nlm_stats(enum nlm4_stats src)
}
static void
-nlm_convert_to_nlm_res(struct nlm_res *dst, struct nlm4_res *src)
+nlm_convert_to_nlm_res(struct nlm_res *dst, const struct nlm4_res *src)
{
dst->cookie = src->cookie;
dst->stat.stat = nlm_convert_to_nlm_stats(src->stat.stat);
@@ -175,7 +185,7 @@ nlm_test_1_svc(struct nlm_testargs *argp, nlm_testres *resp,
* Callback functions for nlm_lock_1_svc
*/
static bool_t nlm_lock_1_reply(SVCXPRT *, nlm4_res *);
-static enum clnt_stat nlm_granted_1_cb(nlm4_testargs *, void *, CLIENT *);
+static enum clnt_stat nlm_granted_1_cb(nlm4_testargs *, nlm4_res *, CLIENT *);
bool_t
nlm_lock_1_svc(nlm_lockargs *argp, nlm_res *resp,
@@ -215,7 +225,7 @@ nlm_lock_1_reply(SVCXPRT *transp, nlm4_res *resp)
}
static enum clnt_stat
-nlm_granted_1_cb(nlm4_testargs *argp, void *resp, CLIENT *clnt)
+nlm_granted_1_cb(nlm4_testargs *argp, nlm4_res *resp, CLIENT *clnt)
{
nlm_testargs args1;
nlm_res res1;
@@ -229,9 +239,7 @@ nlm_granted_1_cb(nlm4_testargs *argp, void *resp, CLIENT *clnt)
rv = nlm_granted_1(&args1, &res1, clnt);
- /* NB: We have a result our caller will not free. */
- xdr_free((xdrproc_t)xdr_nlm_res, (void *)&res1);
- (void) resp;
+ nlm_convert_to_nlm4_res(resp, &res1);
return (rv);
}
@@ -355,7 +363,8 @@ nlm_test_res_1_cb(nlm4_testres *res4, void *null, CLIENT *clnt)
* Callback functions for nlm_lock_msg_1_svc
*/
static enum clnt_stat nlm_lock_res_1_cb(nlm4_res *, void *, CLIENT *);
-static enum clnt_stat nlm_granted_msg_1_cb(nlm4_testargs *, void *, CLIENT *);
+static enum clnt_stat nlm_granted_msg_1_cb(nlm4_testargs *, nlm4_res *,
+ CLIENT *);
bool_t
nlm_lock_msg_1_svc(nlm_lockargs *argp, void *resp,
@@ -396,16 +405,22 @@ nlm_lock_res_1_cb(nlm4_res *resp, void *null, CLIENT *clnt)
}
static enum clnt_stat
-nlm_granted_msg_1_cb(nlm4_testargs *argp, void *null, CLIENT *clnt)
+nlm_granted_msg_1_cb(nlm4_testargs *argp, nlm4_res *resp, CLIENT *clnt)
{
nlm_testargs args1;
+ int rv;
args1.cookie = argp->cookie;
args1.exclusive = argp->exclusive;
nlm_convert_to_nlm_lock(&args1.alock, &argp->alock);
- return (nlm_granted_msg_1(&args1, null, clnt));
+ rv = nlm_granted_msg_1(&args1, NULL, clnt);
+
+ /* MSG call doesn't fill in *resp, so do it here. */
+ if (rv != RPC_SUCCESS)
+ resp->stat.stat = nlm4_failed;
+ return (rv);
}
@@ -693,7 +708,6 @@ nlm4_test_4_svc(nlm4_testargs *argp, nlm4_testres *resp, struct svc_req *sr)
* Callback functions for nlm4_lock_4_svc
*/
static bool_t nlm4_lock_4_reply(SVCXPRT *, nlm4_res *);
-static enum clnt_stat nlm4_granted_4_cb(nlm4_testargs *, void *, CLIENT *);
bool_t
nlm4_lock_4_svc(nlm4_lockargs *argp, nlm4_res *resp,
@@ -703,7 +717,7 @@ nlm4_lock_4_svc(nlm4_lockargs *argp, nlm4_res *resp,
/* NLM4_LOCK */
nlm_do_lock(argp, resp, sr,
nlm4_lock_4_reply, NULL,
- nlm4_granted_4_cb);
+ nlm4_granted_4);
/* above does its own reply */
return (FALSE);
@@ -715,22 +729,6 @@ nlm4_lock_4_reply(SVCXPRT *transp, nlm4_res *resp)
return (svc_sendreply(transp, xdr_nlm4_res, (char *)resp));
}
-static enum clnt_stat
-nlm4_granted_4_cb(nlm4_testargs *argp, void *resp, CLIENT *clnt)
-{
- nlm4_res res4;
- int rv;
-
- bzero(&res4, sizeof (res4));
- rv = nlm4_granted_4(argp, &res4, clnt);
-
- /* NB: We have a result our caller will not free. */
- xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4);
- (void) resp;
-
- return (rv);
-}
-
bool_t
nlm4_cancel_4_svc(nlm4_cancargs *argp, nlm4_res *resp, struct svc_req *sr)
{
@@ -773,6 +771,8 @@ nlm4_test_msg_4_svc(nlm4_testargs *argp, void *resp, struct svc_req *sr)
* Callback functions for nlm4_lock_msg_4_svc
* (using the RPC client stubs directly)
*/
+static enum clnt_stat nlm4_granted_msg_4_cb(nlm4_testargs *, nlm4_res *,
+ CLIENT *);
bool_t
nlm4_lock_msg_4_svc(nlm4_lockargs *argp, void *resp,
@@ -784,7 +784,7 @@ nlm4_lock_msg_4_svc(nlm4_lockargs *argp, void *resp,
bzero(&res4, sizeof (res4));
nlm_do_lock(argp, &res4, sr,
NULL, nlm4_lock_res_4,
- nlm4_granted_msg_4);
+ nlm4_granted_msg_4_cb);
/* NB: We have a result our caller will not free. */
xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4);
@@ -794,6 +794,20 @@ nlm4_lock_msg_4_svc(nlm4_lockargs *argp, void *resp,
return (FALSE);
}
+static enum clnt_stat
+nlm4_granted_msg_4_cb(nlm4_testargs *argp, nlm4_res *resp, CLIENT *clnt)
+{
+ int rv;
+
+ rv = nlm4_granted_msg_4(argp, NULL, clnt);
+
+ /* MSG call doesn't fill in *resp, so do it here. */
+ if (rv != RPC_SUCCESS)
+ resp->stat.stat = nlm4_failed;
+
+ return (rv);
+}
+
bool_t
nlm4_cancel_msg_4_svc(nlm4_cancargs *argp, void *resp, struct svc_req *sr)
{
diff --git a/usr/src/uts/common/klm/nlm_service.c b/usr/src/uts/common/klm/nlm_service.c
index dceabaf53f..f4f733443e 100644
--- a/usr/src/uts/common/klm/nlm_service.c
+++ b/usr/src/uts/common/klm/nlm_service.c
@@ -27,7 +27,7 @@
/*
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
* Copyright 2014 Joyent, Inc. All rights reserved.
*/
@@ -81,6 +81,7 @@ struct nlm_block_cb_data {
struct nlm_host *hostp;
struct nlm_vhold *nvp;
struct flock64 *flp;
+ bool_t registered;
};
/*
@@ -107,9 +108,9 @@ static void nlm_block(
nlm4_lockargs *lockargs,
struct nlm_host *host,
struct nlm_vhold *nvp,
- nlm_rpc_t *rpcp,
struct flock64 *fl,
- nlm_testargs_cb grant_cb);
+ nlm_granted_cb grant_cb,
+ rpcvers_t);
static vnode_t *nlm_fh_to_vp(struct netobj *);
static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *);
@@ -314,6 +315,11 @@ nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr)
* NLM_TEST, NLM_TEST_MSG,
* NLM4_TEST, NLM4_TEST_MSG,
* Client inquiry about locks, non-blocking.
+ *
+ * Arg cb is NULL for NLM_TEST, NLM4_TEST, and
+ * non-NULL for NLM_TEST_MSG, NLM4_TEST_MSG
+ * The MSG forms use the cb to send the reply,
+ * and don't return a reply for this call.
*/
void
nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp,
@@ -455,10 +461,19 @@ out:
* We also have to keep a list of locks (pending + granted)
* both to handle retransmitted requests, and to keep the
* vnodes for those locks active.
+ *
+ * Callback arguments:
+ * reply_cb Used to send a normal RPC reply just as if
+ * we had filled in a response for our caller.
+ * Needed because we do work after the reply.
+ * res_cb Used for the MSG calls, where there's no
+ * regular RPC response.
+ * grant_cb Used to CALL the client informing them of a
+ * granted lock after a "blocked" reply.
*/
void
nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
- nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb)
+ nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_granted_cb grant_cb)
{
struct nlm_globals *g;
struct flock64 fl;
@@ -492,20 +507,18 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
struct nlm_host *, host, nlm4_lockargs *, argp);
/*
- * If we may need to do _msg_ call needing an RPC
- * callback, get the RPC client handle now,
- * so we know if we can bind to the NLM service on
- * this client.
- *
- * Note: host object carries transport type.
- * One client using multiple transports gets
- * separate sysids for each of its transports.
+ * If this is a MSG call (NLM_LOCK_MSG, NLM4_LOCK_MSG)
+ * we'll have res_cb != NULL, and we know we'll need an
+ * RPC client handle _now_ so we can send the response.
+ * If we can't get an rpc handle (rpcp) then we have
+ * no way to respond, and the client will time out.
*/
- if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) {
+ if (res_cb != NULL) {
error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp);
if (error != 0) {
+ ASSERT(rpcp == NULL);
status = nlm4_denied_nolocks;
- goto doreply;
+ goto out;
}
}
@@ -584,6 +597,8 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr,
/*
* OK, can detach this thread, so this call
* will block below (after we reply).
+ * The "blocked" reply tells the client to
+ * expect a "granted" call-back later.
*/
status = nlm4_blocked;
do_blocking = TRUE;
@@ -655,11 +670,12 @@ doreply:
* "detach" it from the RPC SVC pool, allowing it
* to block indefinitely if needed.
*/
- ASSERT(rpcp != NULL);
+ ASSERT(grant_cb != NULL);
(void) svc_detach_thread(sr->rq_xprt);
- nlm_block(argp, host, nvp, rpcp, &fl, grant_cb);
+ nlm_block(argp, host, nvp, &fl, grant_cb, sr->rq_vers);
}
+out:
DTRACE_PROBE3(lock__end, struct nlm_globals *, g,
struct nlm_host *, host, nlm4_res *, resp);
@@ -679,25 +695,26 @@ static void
nlm_block(nlm4_lockargs *lockargs,
struct nlm_host *host,
struct nlm_vhold *nvp,
- nlm_rpc_t *rpcp,
struct flock64 *flp,
- nlm_testargs_cb grant_cb)
+ nlm_granted_cb grant_cb,
+ rpcvers_t vers)
{
nlm4_testargs args;
+ nlm4_res res;
int error;
flk_callback_t flk_cb;
struct nlm_block_cb_data cb_data;
+ nlm_rpc_t *rpcp = NULL;
+ enum clnt_stat status;
/*
* Keep a list of blocked locks on nh_pending, and use it
* to cancel these threads in nlm_destroy_client_pending.
*
- * Check to see if this lock is already in the list
- * and if not, add an entry for it. Allocate first,
- * then if we don't insert, free the new one.
- * Caller already has vp held.
+ * Check to see if this lock is already in the list. If so,
+ * some earlier call is already blocked getting this lock,
+ * so there's nothing more this call needs to do.
*/
-
error = nlm_slreq_register(host, nvp, flp);
if (error != 0) {
/*
@@ -710,9 +727,22 @@ nlm_block(nlm4_lockargs *lockargs,
return;
}
+ /*
+ * Make sure we can get an RPC client handle we can use to
+ * deliver the "granted" callback if/when we get the lock.
+ * If we can't, there's no point blocking to get the lock
+ * for them because they'll never find out about it.
+ */
+ error = nlm_host_get_rpc(host, vers, &rpcp);
+ if (error != 0) {
+ (void) nlm_slreq_unregister(host, nvp, flp);
+ return;
+ }
+
cb_data.hostp = host;
cb_data.nvp = nvp;
cb_data.flp = flp;
+ cb_data.registered = TRUE;
flk_init_callback(&flk_cb, nlm_block_callback, &cb_data);
/* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */
@@ -720,23 +750,60 @@ nlm_block(nlm4_lockargs *lockargs,
F_REMOTELOCK | FREAD | FWRITE,
(u_offset_t)0, &flk_cb, CRED(), NULL);
+ /*
+ * If the nlm_block_callback didn't already do it...
+ */
+ if (cb_data.registered)
+ (void) nlm_slreq_unregister(host, nvp, flp);
+
if (error != 0) {
/*
* We failed getting the lock, but have no way to
* tell the client about that. Let 'em time out.
*/
- (void) nlm_slreq_unregister(host, nvp, flp);
return;
}
-
/*
+ * ... else we got the lock on behalf of this client.
+ *
+ * We MUST either tell the client about this lock
+ * (via the "granted" callback RPC) or unlock.
+ *
* Do the "granted" call-back to the client.
*/
+ bzero(&args, sizeof (args));
args.cookie = lockargs->cookie;
args.exclusive = lockargs->exclusive;
args.alock = lockargs->alock;
+ bzero(&res, sizeof (res));
+
+ /*
+ * Not using the NLM_INVOKE_CALLBACK() macro because
+ * we need to take actions on errors.
+ */
+ status = (*grant_cb)(&args, &res, (rpcp)->nr_handle);
+ if (status != RPC_SUCCESS) {
+ struct rpc_err err;
+
+ CLNT_GETERR((rpcp)->nr_handle, &err);
+ NLM_ERR("NLM: %s callback failed: "
+ "stat %d, err %d\n", "grant", status,
+ err.re_errno);
+ res.stat.stat = nlm4_failed;
+ }
+ if (res.stat.stat != nlm4_granted) {
+ /*
+ * Failed to deliver the granted callback, so
+ * the client doesn't know about this lock.
+ * Unlock the lock. The client will time out.
+ */
+ (void) nlm_vop_frlock(nvp->nv_vp, F_UNLCK, flp,
+ F_REMOTELOCK | FREAD | FWRITE,
+ (u_offset_t)0, NULL, CRED(), NULL);
+ }
+ xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
- NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb);
+ nlm_host_rele_rpc(host, rpcp);
}
/*
@@ -756,6 +823,7 @@ nlm_block_callback(flk_cb_when_t when, void *data)
if (when == FLK_AFTER_SLEEP) {
(void) nlm_slreq_unregister(cb_data->hostp,
cb_data->nvp, cb_data->flp);
+ cb_data->registered = FALSE;
}
return (0);
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index 9e58a7bb56..2543bdf17e 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -20,6 +20,7 @@
*/
/*
+ * Copyright 2020 Oxide Computer Company
* Copyright (c) 2013 Gary Mills
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc.
@@ -43,6 +44,7 @@
#include <sys/utsname.h>
#include <sys/id_space.h>
#include <sys/zone.h>
+#include <sys/bootbanner.h>
log_zone_t log_global;
queue_t *log_consq;
@@ -182,6 +184,14 @@ log_zonefree(zoneid_t zoneid, void *arg)
kmem_free(lzp, sizeof (log_zone_t));
}
+static void
+log_bootbanner_print(const char *line, uint_t num)
+{
+ const char *pfx = (num == 0) ? "\r" : "";
+
+ printf("%s%s\n", pfx, line);
+}
+
void
log_init(void)
{
@@ -246,11 +256,15 @@ log_init(void)
log_update(&log_backlog, log_backlogq, SL_CONSOLE, log_console);
/*
- * Now that logging is enabled, emit the SunOS banner.
+ * Now that logging is enabled, emit the boot banner.
*/
+#ifdef LEGACY_BANNER
printf("\rSunOS Release %s Version %s %u-bit\n",
utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *));
printf("Copyright 2010-2020 Joyent, Inc.\n");
+#else
+ bootbanner_print(log_bootbanner_print, KM_SLEEP);
+#endif
#ifdef DEBUG
printf("DEBUG enabled\n");
#endif
diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c
index ac1ee2d1ce..1e18a0ce9e 100644
--- a/usr/src/uts/common/os/strsubr.c
+++ b/usr/src/uts/common/os/strsubr.c
@@ -28,6 +28,7 @@
* Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright 2018 Joyent, Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 3664f0096b..24fdd94c11 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -96,6 +96,7 @@ CHKHDRS= \
bofi.h \
bofi_impl.h \
bpp_io.h \
+ bootbanner.h \
bootstat.h \
brand.h \
buf.h \
diff --git a/usr/src/uts/common/sys/bootbanner.h b/usr/src/uts/common/sys/bootbanner.h
new file mode 100644
index 0000000000..93ba1b9e79
--- /dev/null
+++ b/usr/src/uts/common/sys/bootbanner.h
@@ -0,0 +1,33 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Oxide Computer Company
+ */
+
+#ifndef _SYS_BOOTBANNER_H
+#define _SYS_BOOTBANNER_H
+
+/*
+ * Rendering of the boot banner, used on the system and zone consoles.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void bootbanner_print(void (*)(const char *, uint_t), int kmflag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BOOTBANNER_H */
diff --git a/usr/src/uts/common/sys/fibre-channel/fca/emlxs/emlxs_hw.h b/usr/src/uts/common/sys/fibre-channel/fca/emlxs/emlxs_hw.h
index ab4b4b4e6b..406c90303b 100644
--- a/usr/src/uts/common/sys/fibre-channel/fca/emlxs/emlxs_hw.h
+++ b/usr/src/uts/common/sys/fibre-channel/fca/emlxs/emlxs_hw.h
@@ -2140,7 +2140,7 @@ typedef struct
#define SLI_FW_TYPE_101 SLI_FW_TYPE_SHIFT(0xb) /* LP101 */
-enum emlxs_prog_type
+typedef enum emlxs_prog_type
{
TEST_PROGRAM, /* 0 */
UTIL_PROGRAM, /* 1 */
diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h
index d1c5f674f1..676193fcfa 100644
--- a/usr/src/uts/common/sys/fs/sdev_impl.h
+++ b/usr/src/uts/common/sys/fs/sdev_impl.h
@@ -39,6 +39,7 @@ extern "C" {
#include <sys/nvpair.h>
#include <sys/fs/sdev_plugin.h>
#include <sys/sunddi.h>
+#include <sys/fs/sdev_plugin.h>
/*
* sdev_nodes are the file-system specific part of the
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index 1d7ddf9648..a5974f6d7d 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2018 Joyent, Inc.
* Copyright (c) 2015 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _SYS_MAC_H
@@ -88,6 +89,13 @@ typedef enum {
} link_flowctrl_t;
typedef enum {
+ LINK_FEC_NONE = 1 << 0,
+ LINK_FEC_AUTO = 1 << 1,
+ LINK_FEC_RS = 1 << 2,
+ LINK_FEC_BASE_R = 1 << 3
+} link_fec_t;
+
+typedef enum {
LINK_TAGMODE_VLANONLY = 0,
LINK_TAGMODE_NORMAL
} link_tagmode_t;
@@ -239,6 +247,8 @@ typedef enum {
MAC_PROP_EN_25GFDX_CAP,
MAC_PROP_ADV_50GFDX_CAP,
MAC_PROP_EN_50GFDX_CAP,
+ MAC_PROP_EN_FEC_CAP,
+ MAC_PROP_ADV_FEC_CAP,
MAC_PROP_PRIVATE = -1
} mac_prop_id_t;
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 3c103c073a..21f2c10a8e 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -290,54 +290,6 @@ struct mac_group_s {
#define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable
#define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable
-#define MAC_RING_TX(mhp, rh, mp, rest) { \
- mac_ring_handle_t mrh = rh; \
- mac_impl_t *mimpl = (mac_impl_t *)mhp; \
- /* \
- * Send packets through a selected tx ring, or through the \
- * default handler if there is no selected ring. \
- */ \
- if (mrh == NULL) \
- mrh = mimpl->mi_default_tx_ring; \
- if (mrh == NULL) { \
- rest = mimpl->mi_tx(mimpl->mi_driver, mp); \
- } else { \
- rest = mac_hwring_tx(mrh, mp); \
- } \
-}
-
-/*
- * This is the final stop before reaching the underlying driver
- * or aggregation, so this is where the bridging hook is implemented.
- * Packets that are bridged will return through mac_bridge_tx(), with
- * rh nulled out if the bridge chooses to send output on a different
- * link due to forwarding.
- */
-#define MAC_TX(mip, rh, mp, src_mcip) { \
- mac_ring_handle_t rhandle = (rh); \
- /* \
- * If there is a bound Hybrid I/O share, send packets through \
- * the default tx ring. (When there's a bound Hybrid I/O share, \
- * the tx rings of this client are mapped in the guest domain \
- * and not accessible from here.) \
- */ \
- _NOTE(CONSTANTCONDITION) \
- if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \
- rhandle = (mip)->mi_default_tx_ring; \
- if (mip->mi_promisc_list != NULL) \
- mac_promisc_dispatch(mip, mp, src_mcip); \
- /* \
- * Grab the proper transmit pointer and handle. Special \
- * optimization: we can test mi_bridge_link itself atomically, \
- * and if that indicates no bridge send packets through tx ring.\
- */ \
- if (mip->mi_bridge_link == NULL) { \
- MAC_RING_TX(mip, rhandle, mp, mp); \
- } else { \
- mp = mac_bridge_tx(mip, rhandle, mp); \
- } \
-}
-
/* mci_tx_flag */
#define MCI_TX_QUIESCE 0x1
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 04c20d6aac..fc0866f2d1 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _SYS_MAC_PROVIDER_H
@@ -631,6 +632,8 @@ extern void mac_prop_info_set_default_uint32(
mac_prop_info_handle_t, uint32_t);
extern void mac_prop_info_set_default_link_flowctrl(
mac_prop_info_handle_t, link_flowctrl_t);
+extern void mac_prop_info_set_default_fec(
+ mac_prop_info_handle_t, link_fec_t);
extern void mac_prop_info_set_range_uint32(
mac_prop_info_handle_t,
uint32_t, uint32_t);
diff --git a/usr/src/uts/common/sys/smbios.h b/usr/src/uts/common/sys/smbios.h
index 34281898e0..55048d549d 100644
--- a/usr/src/uts/common/sys/smbios.h
+++ b/usr/src/uts/common/sys/smbios.h
@@ -1315,11 +1315,12 @@ typedef struct smbios_memdevice {
#define SMB_MTECH_NVDIMM_P 0x06 /* NVDIMM-P */
#define SMB_MTECH_INTCPM 0x07 /* Intel Optane DC Persistent Memory */
-#define SMB_MOMC_OTHER 0x01 /* other */
-#define SMB_MOMC_UNKNOWN 0x02 /* unknown */
-#define SMB_MOMC_VOLATILE 0x04 /* Volatile memory */
-#define SMB_MOMC_BYTE_PM 0x08 /* Byte-accessible persistent memory */
-#define SMB_MOMC_BLOCK_PM 0x10 /* Block-accessible persistent memory */
+#define SMB_MOMC_RESERVED 0x01 /* reserved */
+#define SMB_MOMC_OTHER 0x02 /* other */
+#define SMB_MOMC_UNKNOWN 0x04 /* unknown */
+#define SMB_MOMC_VOLATILE 0x08 /* Volatile memory */
+#define SMB_MOMC_BYTE_PM 0x10 /* Byte-accessible persistent memory */
+#define SMB_MOMC_BLOCK_PM 0x20 /* Block-accessible persistent memory */
/*
* SMBIOS Memory Array Mapped Address. See DSP0134 Section 7.20 for more
diff --git a/usr/src/uts/common/sys/sysconfig.h b/usr/src/uts/common/sys/sysconfig.h
index d5b65ef78c..7e87d7a983 100644
--- a/usr/src/uts/common/sys/sysconfig.h
+++ b/usr/src/uts/common/sys/sysconfig.h
@@ -101,8 +101,9 @@ extern int mach_sysconfig(int);
#define _CONFIG_SYMLOOP_MAX 46 /* maximum # of symlinks in pathname */
#define _CONFIG_EPHID_MAX 47 /* maximum ephemeral uid */
+#define _CONFIG_UADDR_MAX 48 /* maximum user address */
-#define _CONFIG_NPROC_NCPU 48 /* NCPU (sometimes > NPROC_MAX) */
+#define _CONFIG_NPROC_NCPU 49 /* NCPU (sometimes > NPROC_MAX) */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/systeminfo.h b/usr/src/uts/common/sys/systeminfo.h
index a664a19b9e..951d799a70 100644
--- a/usr/src/uts/common/sys/systeminfo.h
+++ b/usr/src/uts/common/sys/systeminfo.h
@@ -21,6 +21,7 @@
/*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
* Copyright 2017 RackTop Systems.
+ * Copyright 2020 Oxide Computer Company
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -56,8 +57,8 @@ extern char platform[];
* 257 - 512 Unix International assigned numbers for `set' style commands
* where the value is selected to be the value for the
* corresponding `get' command plus 256.
- * 513 - 768 Solaris specific `get' style commands.
- * 769 - 1024 Solaris specific `set' style commands where the value is
+ * 513 - 768 illumos-defined `get' style commands.
+ * 769 - 1024 illumos-defined `set' style commands where the value is
* selected to be the value for the corresponding `get' command
* plus 256.
*
@@ -69,7 +70,7 @@ extern char platform[];
/* UI defined `get' commands (1-256) */
#define SI_SYSNAME 1 /* return name of operating system */
#define SI_HOSTNAME 2 /* return name of node */
-#define SI_RELEASE 3 /* return release of operating system */
+#define SI_RELEASE 3 /* return release of operating system */
#define SI_VERSION 4 /* return version field of utsname */
#define SI_MACHINE 5 /* return kind of machine */
#define SI_ARCHITECTURE 6 /* return instruction set arch */
@@ -81,7 +82,7 @@ extern char platform[];
#define SI_SET_HOSTNAME 258 /* set name of node */
#define SI_SET_SRPC_DOMAIN 265 /* set secure RPC domain */
-/* Solaris defined `get' commands (513-768) */
+/* illumos-defined `get' commands (513-768) */
#define SI_PLATFORM 513 /* return platform identifier */
#define SI_ISALIST 514 /* return supported isa list */
#define SI_DHCP_CACHE 515 /* return kernel-cached DHCPACK */
@@ -89,8 +90,9 @@ extern char platform[];
#define SI_ARCHITECTURE_64 517 /* basic 64-bit SI_ARCHITECTURE */
#define SI_ARCHITECTURE_K 518 /* kernel SI_ARCHITECTURE equivalent */
#define SI_ARCHITECTURE_NATIVE 519 /* SI_ARCHITECTURE of the caller */
+#define SI_ADDRESS_WIDTH 520 /* number of bits in native address */
-/* Solaris defined `set' commands (769-1024) (none currently assigned) */
+/* illumos-defined `set' commands (769-1024) (none currently assigned) */
#define HW_INVALID_HOSTID 0xFFFFFFFF /* an invalid hostid */
diff --git a/usr/src/uts/common/sys/unistd.h b/usr/src/uts/common/sys/unistd.h
index f366e99f6a..591a3426f5 100644
--- a/usr/src/uts/common/sys/unistd.h
+++ b/usr/src/uts/common/sys/unistd.h
@@ -106,9 +106,9 @@ extern "C" {
#define _SC_ARG_MAX 1
#define _SC_CHILD_MAX 2
#define _SC_CLK_TCK 3
-#define _SC_NGROUPS_MAX 4
+#define _SC_NGROUPS_MAX 4
#define _SC_OPEN_MAX 5
-#define _SC_JOB_CONTROL 6
+#define _SC_JOB_CONTROL 6
#define _SC_SAVED_IDS 7
#define _SC_VERSION 8
/* SVR4 names */
@@ -151,21 +151,21 @@ extern "C" {
#define _SC_TIMER_MAX 44
/* XPG4 names */
#define _SC_2_C_BIND 45
-#define _SC_2_C_DEV 46
+#define _SC_2_C_DEV 46
#define _SC_2_C_VERSION 47
-#define _SC_2_FORT_DEV 48
-#define _SC_2_FORT_RUN 49
+#define _SC_2_FORT_DEV 48
+#define _SC_2_FORT_RUN 49
#define _SC_2_LOCALEDEF 50
-#define _SC_2_SW_DEV 51
+#define _SC_2_SW_DEV 51
#define _SC_2_UPE 52
#define _SC_2_VERSION 53
#define _SC_BC_BASE_MAX 54
-#define _SC_BC_DIM_MAX 55
+#define _SC_BC_DIM_MAX 55
#define _SC_BC_SCALE_MAX 56
#define _SC_BC_STRING_MAX 57
#define _SC_COLL_WEIGHTS_MAX 58
#define _SC_EXPR_NEST_MAX 59
-#define _SC_LINE_MAX 60
+#define _SC_LINE_MAX 60
#define _SC_RE_DUP_MAX 61
#define _SC_XOPEN_CRYPT 62
#define _SC_XOPEN_ENH_I18N 63
@@ -210,6 +210,7 @@ extern "C" {
#define _SC_NPROCESSORS_MAX 516 /* maximum # of processors */
#define _SC_CPUID_MAX 517 /* maximum CPU id */
#define _SC_EPHID_MAX 518 /* maximum ephemeral id */
+#define _SC_UADDR_MAX 519 /* maximum user address */
/*
* POSIX.1c (pthreads) names. These values are defined above
@@ -351,7 +352,7 @@ extern "C" {
#ifdef _XPG6
#define _POSIX_VERSION 200112L /* Supports IEEE Std 1003.1-2001 */
#else
-#define _POSIX_VERSION 199506L /* Supports POSIX-1c DIS */
+#define _POSIX_VERSION 199506L /* Supports POSIX-1c DIS */
#endif
#endif /* _POSIX_VERSION */
@@ -359,7 +360,7 @@ extern "C" {
#ifdef _XPG6
#define _POSIX2_VERSION 200112L /* Supports IEEE Std 1003.1-2001 */
#else
-#define _POSIX2_VERSION 199209L /* Supports ISO POSIX-2 DIS */
+#define _POSIX2_VERSION 199209L /* Supports ISO POSIX-2 DIS */
#endif
#endif /* _POSIX2_VERSION */
@@ -395,14 +396,14 @@ extern "C" {
#define _POSIX2_FORT_RUN 200112L /* Supports FORTRAN runtime */
#define _POSIX2_LOCALEDEF 200112L /* Supports creation of locales */
#define _POSIX2_SW_DEV 200112L /* Supports S/W Development Utility */
-#define _POSIX2_UPE 200112L /* Supports User Portability Utility */
+#define _POSIX2_UPE 200112L /* Supports User Portability Utility */
#else
#define _POSIX2_C_BIND 1 /* Supports C Language Bindings */
#define _POSIX2_C_DEV 1 /* Supports C language dev utility */
#define _POSIX2_FORT_RUN 1 /* Supports FORTRAN runtime */
#define _POSIX2_LOCALEDEF 1 /* Supports creation of locales */
#define _POSIX2_SW_DEV 1 /* Supports S/W Development Utility */
-#define _POSIX2_UPE 1 /* Supports User Portability Utility */
+#define _POSIX2_UPE 1 /* Supports User Portability Utility */
#endif /* _XPG6 */
/* UNIX 03 names */
diff --git a/usr/src/uts/common/syscall/sysconfig.c b/usr/src/uts/common/syscall/sysconfig.c
index e09f4e85a2..96535fdd08 100644
--- a/usr/src/uts/common/syscall/sysconfig.c
+++ b/usr/src/uts/common/syscall/sysconfig.c
@@ -47,6 +47,7 @@
#include <sys/timer.h>
#include <sys/zone.h>
#include <sys/vm_usage.h>
+#include <vm/as.h>
extern rctl_hndl_t rc_process_sigqueue;
@@ -208,6 +209,9 @@ sysconfig(int which)
case _CONFIG_EPHID_MAX:
return (MAXEPHUID);
+ case _CONFIG_UADDR_MAX:
+ return ((long)(uintptr_t)curproc->p_as->a_userlimit);
+
case _CONFIG_SYMLOOP_MAX:
return (MAXSYMLINKS);
}
diff --git a/usr/src/uts/common/syscall/systeminfo.c b/usr/src/uts/common/syscall/systeminfo.c
index 21b5ac08ba..00d11e5aba 100644
--- a/usr/src/uts/common/syscall/systeminfo.c
+++ b/usr/src/uts/common/syscall/systeminfo.c
@@ -19,6 +19,7 @@
* CDDL HEADER END
*/
/*
+ * Copyright 2020 Oxide Computer Company
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -26,6 +27,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All rights reserved. */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -81,6 +83,9 @@ systeminfo(int command, char *buf, long count)
kstr = utsname.machine;
break;
#ifdef _LP64
+ case SI_ADDRESS_WIDTH:
+ kstr = "64";
+ break;
case SI_ARCHITECTURE_64:
case SI_ARCHITECTURE_K:
kstr = architecture;
@@ -94,6 +99,9 @@ systeminfo(int command, char *buf, long count)
architecture : architecture_32;
break;
#else
+ case SI_ADDRESS_WIDTH:
+ kstr = "32";
+ break;
case SI_ARCHITECTURE_K:
case SI_ARCHITECTURE_32:
case SI_ARCHITECTURE:
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c
index 1a9ef5223f..1308935159 100644
--- a/usr/src/uts/common/vm/seg_spt.c
+++ b/usr/src/uts/common/vm/seg_spt.c
@@ -63,7 +63,7 @@ size_t spt_used;
* See spt_setminfree().
*/
pgcnt_t segspt_minfree = 0;
-size_t segspt_minfree_clamp = (1UL << 30); /* 1Gb in bytes */
+size_t segspt_minfree_clamp = (1UL << 30); /* 1GB in bytes */
static int segspt_create(struct seg **segpp, void *argsp);
static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize);
@@ -317,7 +317,7 @@ static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
*
* The traditional default value of 5% of total memory is used, except on
* systems where that quickly gets ridiculous: in that case we clamp at a rather
- * arbitrary value of 1Gb.
+ * arbitrary value of 1GB.
*
* Note that since this is called lazily on the first sptcreate(), in theory,
* this could represent a very small value if the system is heavily loaded