summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
authorMike Zeller <mike@mikezeller.net>2020-03-11 16:42:06 -0400
committerMike Zeller <mike@mikezeller.net>2020-03-11 16:42:06 -0400
commit2803e163cd303fbc63f832f544bc59c4ee562252 (patch)
treec901ef8fa7580dcebfb11316f6bb49d498bd40a8 /usr/src/uts
parentdd05dd221f0e26bb86692b7b69c8dbeab8f4c0e5 (diff)
parent1de02da27664d38cedeccf227bd4ae92d32619d9 (diff)
downloadillumos-joyent-bhyve-sync.tar.gz
Merge remote-tracking branch 'origin/master' into bhyve-syncbhyve-sync
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/common/Makefile.files7
-rw-r--r--usr/src/uts/common/Makefile.rules4
-rw-r--r--usr/src/uts/common/fs/namefs/namevfs.c27
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c70
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_crypt.c9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_crypt.h7
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zcp_change_key.h41
-rw-r--r--usr/src/uts/common/fs/zfs/zcp_change_key.c144
-rw-r--r--usr/src/uts/common/fs/zfs/zcp_synctask.c71
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c11
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c42
-rw-r--r--usr/src/uts/common/inet/ilb/ilb.c2
-rw-r--r--usr/src/uts/common/inet/ilb/ilb_conn.c9
-rw-r--r--usr/src/uts/common/inet/ip.h2
-rw-r--r--usr/src/uts/common/inet/ip/conn_opt.c2
-rw-r--r--usr/src/uts/common/inet/ip/icmp.c6
-rw-r--r--usr/src/uts/common/inet/ip/igmp.c31
-rw-r--r--usr/src/uts/common/inet/ip/ip.c20
-rw-r--r--usr/src/uts/common/inet/ip/ip6.c2
-rw-r--r--usr/src/uts/common/inet/ip/ip6_ire.c2
-rw-r--r--usr/src/uts/common/inet/ip/ip6_output.c2
-rw-r--r--usr/src/uts/common/inet/ip/ip_ftable.c6
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c20
-rw-r--r--usr/src/uts/common/inet/ip/ip_ndp.c4
-rw-r--r--usr/src/uts/common/inet/ip/ip_output.c4
-rw-r--r--usr/src/uts/common/inet/ip/ip_rts.c17
-rw-r--r--usr/src/uts/common/inet/ip/ipclassifier.c1
-rw-r--r--usr/src/uts/common/inet/ip/ipmp.c1
-rw-r--r--usr/src/uts/common/inet/ip/ipsecah.c3
-rw-r--r--usr/src/uts/common/inet/ip/ipsecesp.c3
-rw-r--r--usr/src/uts/common/inet/ip/sadb.c38
-rw-r--r--usr/src/uts/common/inet/ip/spd.c6
-rw-r--r--usr/src/uts/common/inet/ip/tnet.c5
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_asconf.c13
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_common.c2
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_cookie.c12
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_input.c3
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_opt_data.c3
-rw-r--r--usr/src/uts/common/inet/sctp/sctp_output.c10
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_bind.c5
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c2
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_input.c1
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_misc.c1
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_output.c2
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_tpi.c4
-rw-r--r--usr/src/uts/common/inet/udp/udp.c6
-rw-r--r--usr/src/uts/common/inet/udp/udp_stats.c7
-rw-r--r--usr/src/uts/common/io/aggr/aggr_grp.c5
-rw-r--r--usr/src/uts/common/io/aggr/aggr_port.c2
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_main.c10
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_sw.h1
-rw-r--r--usr/src/uts/common/io/ldterm.c5
-rw-r--r--usr/src/uts/common/io/mac/mac.c28
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c5
-rw-r--r--usr/src/uts/common/io/mac/mac_datapath_setup.c2
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c2
-rw-r--r--usr/src/uts/common/io/mac/mac_stat.c16
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c2765
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.conf101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h1298
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c3542
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_dma.c460
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_endint.h305
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c1254
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c1010
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h2481
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c2264
-rw-r--r--usr/src/uts/common/io/ptm.c11
-rw-r--r--usr/src/uts/common/io/pts.c13
-rw-r--r--usr/src/uts/common/io/sata/adapters/ahci/ahci.c4
-rw-r--r--usr/src/uts/common/io/vnic/vnic_dev.c7
-rw-r--r--usr/src/uts/common/os/streamio.c54
-rw-r--r--usr/src/uts/common/sys/fm/io/ddi.h11
-rw-r--r--usr/src/uts/common/sys/fs/namenode.h8
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h2
-rw-r--r--usr/src/uts/common/sys/mac_impl.h48
-rw-r--r--usr/src/uts/common/sys/mac_provider.h12
-rw-r--r--usr/src/uts/common/sys/strsubr.h4
-rw-r--r--usr/src/uts/i86pc/io/mp_platform_common.c102
-rw-r--r--usr/src/uts/intel/Makefile.intel1
-rw-r--r--usr/src/uts/intel/ipsecah/Makefile23
-rw-r--r--usr/src/uts/intel/ipsecesp/Makefile23
-rw-r--r--usr/src/uts/intel/mlxcx/Makefile44
-rw-r--r--usr/src/uts/intel/os/driver_aliases13
-rw-r--r--usr/src/uts/intel/procfs/Makefile3
-rw-r--r--usr/src/uts/sparc/ipsecah/Makefile25
-rw-r--r--usr/src/uts/sparc/ipsecesp/Makefile24
-rw-r--r--usr/src/uts/sparc/procfs/Makefile20
-rw-r--r--usr/src/uts/sun4v/io/vnet.c10
89 files changed, 16390 insertions, 318 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 79f7ca8b47..ce7b7a3e6a 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1440,6 +1440,7 @@ ZFS_COMMON_OBJS += \
zap_leaf.o \
zap_micro.o \
zcp.o \
+ zcp_change_key.o \
zcp_get.o \
zcp_set.o \
zcp_global.o \
@@ -2317,3 +2318,9 @@ BNX_OBJS += \
bnx_lm_main.o \
bnx_lm_recv.o \
bnx_lm_send.o
+
+#
+# mlxcx(7D)
+#
+MLXCX_OBJS += mlxcx.o mlxcx_dma.o mlxcx_cmd.o mlxcx_intr.o mlxcx_gld.o \
+ mlxcx_ring.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 1d052bdcc2..8a906a2e25 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -956,6 +956,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mii/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mlxcx/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mr_sas/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
diff --git a/usr/src/uts/common/fs/namefs/namevfs.c b/usr/src/uts/common/fs/namefs/namevfs.c
index 9952f0a742..63e618de11 100644
--- a/usr/src/uts/common/fs/namefs/namevfs.c
+++ b/usr/src/uts/common/fs/namefs/namevfs.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -182,6 +183,31 @@ namefind(vnode_t *vp, vnode_t *mnt)
}
/*
+ * For each namenode that has nm_filevp == vp, call the provided function
+ * with the namenode as an argument. This finds all of the namefs entries
+ * which are mounted on vp; note that there can be more than one.
+ */
+int
+nm_walk_mounts(const vnode_t *vp, nm_walk_mounts_f *func, cred_t *cr, void *arg)
+{
+ struct namenode *np;
+ int ret = 0;
+
+ mutex_enter(&ntable_lock);
+
+ for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp) {
+ if (np->nm_filevp == vp) {
+ if ((ret = func(np, cr, arg)) != 0)
+ break;
+ }
+ }
+
+ mutex_exit(&ntable_lock);
+
+ return (ret);
+}
+
+/*
* Force the unmouting of a file descriptor from ALL of the nodes
* that it was mounted to.
* At the present time, the only usage for this routine is in the
@@ -480,6 +506,7 @@ nm_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *crp)
newvp->v_rdev = filevp->v_rdev;
newvp->v_data = (caddr_t)nodep;
VFS_HOLD(vfsp);
+ vn_copypath(mvp, newvp);
vn_exists(newvp);
/*
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 0a9589a373..08aee63610 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -75,6 +75,7 @@
#include <sys/autoconf.h>
#include <sys/dtrace.h>
#include <sys/timod.h>
+#include <sys/fs/namenode.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <inet/cc.h>
@@ -2552,7 +2553,11 @@ prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
size_t pathlen;
size_t sz = 0;
- pathlen = MAXPATHLEN + 1;
+ /*
+ * The global zone's path to a file in a non-global zone can exceed
+ * MAXPATHLEN.
+ */
+ pathlen = MAXPATHLEN * 2 + 1;
pathname = kmem_alloc(pathlen, KM_SLEEP);
if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
@@ -2561,6 +2566,7 @@ prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
}
kmem_free(pathname, pathlen);
+
return (sz);
}
@@ -2789,6 +2795,22 @@ prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
return (sz);
}
+typedef struct prfdinfo_nm_path_cbdata {
+ proc_t *nmp_p;
+ u_offset_t nmp_sz;
+ list_t *nmp_data;
+} prfdinfo_nm_path_cbdata_t;
+
+static int
+prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
+{
+ prfdinfo_nm_path_cbdata_t *cb = arg;
+
+ cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
+
+ return (0);
+}
+
u_offset_t
prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
{
@@ -2801,8 +2823,23 @@ prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
/* Pathname */
- if (vp->v_type != VSOCK && vp->v_type != VDOOR)
+ switch (vp->v_type) {
+ case VDOOR: {
+ prfdinfo_nm_path_cbdata_t cb = {
+ .nmp_p = p,
+ .nmp_data = NULL,
+ .nmp_sz = 0
+ };
+
+ (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
+ sz += cb.nmp_sz;
+ break;
+ }
+ case VSOCK:
+ break;
+ default:
sz += prfdinfopath(p, vp, NULL, cred);
+ }
/* Socket options */
if (vp->v_type == VSOCK)
@@ -2946,14 +2983,31 @@ prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
}
}
- /*
- * Don't attempt to determine the vnode path for a socket or a door
- * as it will cause a linear scan of the dnlc table given there is no
- * v_path associated with the vnode.
- */
- if (vp->v_type != VSOCK && vp->v_type != VDOOR)
+ /* pathname */
+
+ switch (vp->v_type) {
+ case VDOOR: {
+ prfdinfo_nm_path_cbdata_t cb = {
+ .nmp_p = p,
+ .nmp_data = data,
+ .nmp_sz = 0
+ };
+
+ (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
+ break;
+ }
+ case VSOCK:
+ /*
+ * Don't attempt to determine the path for a socket as the
+ * vnode has no associated v_path. It will cause a linear scan
+ * of the dnlc table and result in no path being found.
+ */
+ break;
+ default:
(void) prfdinfopath(p, vp, data, cred);
+ }
+ /* socket options */
if (vp->v_type == VSOCK)
(void) prfdinfosockopt(vp, data, cred);
diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c
index c9d02e1c57..a092326a9c 100644
--- a/usr/src/uts/common/fs/zfs/dsl_crypt.c
+++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c
@@ -1220,12 +1220,7 @@ dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx)
tx);
}
-typedef struct spa_keystore_change_key_args {
- const char *skcka_dsname;
- dsl_crypto_params_t *skcka_cp;
-} spa_keystore_change_key_args_t;
-
-static int
+int
spa_keystore_change_key_check(void *arg, dmu_tx_t *tx)
{
int ret;
@@ -1469,7 +1464,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
dsl_dir_rele(dd, FTAG);
}
-static void
+void
spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds;
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h
index cf19665aae..5b7c1a9510 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h
@@ -164,6 +164,11 @@ typedef struct spa_keystore {
avl_tree_t sk_wkeys;
} spa_keystore_t;
+typedef struct spa_keystore_change_key_args {
+ const char *skcka_dsname;
+ dsl_crypto_params_t *skcka_cp;
+} spa_keystore_change_key_args_t;
+
int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
@@ -199,6 +204,8 @@ int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj,
dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key);
int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int spa_keystore_change_key_check(void *arg, dmu_tx_t *tx);
+void spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx);
int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
diff --git a/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h b/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h
new file mode 100644
index 0000000000..fea520455f
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h
@@ -0,0 +1,41 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _SYS_ZCP_CHANGE_KEY_H
+#define _SYS_ZCP_CHANGE_KEY_H
+
+#include <sys/types.h>
+#include <sys/dmu.h>
+#include <sys/dsl_crypt.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void zcp_synctask_change_key_cleanup(void *arg);
+int zcp_synctask_change_key_check(void *arg, dmu_tx_t *tx);
+void zcp_synctask_change_key_sync(void *arg, dmu_tx_t *tx);
+int zcp_synctask_change_key_create_params(const char *key, size_t keylen,
+ zfs_keyformat_t keyformat, dsl_crypto_params_t **dcpp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZCP_CHANGE_KEY_H */
diff --git a/usr/src/uts/common/fs/zfs/zcp_change_key.c b/usr/src/uts/common/fs/zfs/zcp_change_key.c
new file mode 100644
index 0000000000..be16a8d5c6
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/zcp_change_key.c
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#include <sys/ctype.h>
+#include <sys/zcp.h>
+#include <sys/zcp_change_key.h>
+
+static uint8_t
+hexval(char c)
+{
+ if (c >= '0' && c <= '9')
+ return (c - '0');
+ else if (c >= 'a' && c <= 'f')
+ return (c - 'a' + 10);
+ else if (c >= 'A' && c <= 'F')
+ return (c - 'A' + 10);
+
+ panic("invalid hex value");
+}
+
+static int
+hex_to_raw(const char *key, uint8_t *buf, size_t buflen)
+{
+ uint8_t *p;
+ size_t srclen = strlen(key);
+ size_t i;
+
+ if (buflen * 2 != srclen)
+ return (SET_ERROR(EINVAL));
+
+ for (i = 0, p = buf; i < srclen; i += 2, p++) {
+ if (!isxdigit(key[i]) || !isxdigit(key[i + 1]))
+ return (SET_ERROR(EINVAL));
+
+ *p = hexval(key[i]) << 4 | hexval(key[i + 1]);
+ }
+
+ return (0);
+}
+
+int
+zcp_synctask_change_key_create_params(const char *key, size_t keylen,
+ zfs_keyformat_t keyformat, dsl_crypto_params_t **dcpp)
+{
+ nvlist_t *args = fnvlist_alloc();
+ nvlist_t *hidden_args = fnvlist_alloc();
+ uint8_t rawkey[WRAPPING_KEY_LEN];
+ uint_t rawlen = 0;
+ int err = 0;
+
+ /*
+ * Currently, only raw and hex keys are supported in channel
+ * programs (there is no pbkdf2 support in the kernel to convert
+ * a passphrase).
+ */
+ switch (keyformat) {
+ case ZFS_KEYFORMAT_RAW:
+ /*
+ * dsl_crypto_params_create_nvlist() also verifies the
+ * raw key is WRAPPING_KEY_LEN bytes, so this is
+ * _almost_ redundant -- however we still want to
+ * guarantee we won't overflow rawkey when copying
+ * the contents over.
+ */
+ if (keylen != WRAPPING_KEY_LEN) {
+ err = SET_ERROR(EINVAL);
+ goto done;
+ }
+
+ bcopy(key, rawkey, keylen);
+ rawlen = keylen;
+ break;
+ case ZFS_KEYFORMAT_HEX:
+ /*
+ * hex_to_raw() will reject any input that doesn't exactly
+ * fit into rawkey
+ */
+ err = hex_to_raw(key, rawkey, sizeof (rawkey));
+ if (err != 0)
+ goto done;
+ rawlen = sizeof (rawkey);
+ break;
+ default:
+ err = SET_ERROR(EINVAL);
+ goto done;
+ }
+
+ fnvlist_add_uint64(args, zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+ (uint64_t)keyformat);
+ fnvlist_add_uint8_array(hidden_args, "wkeydata", rawkey, rawlen);
+
+ err = dsl_crypto_params_create_nvlist(DCP_CMD_NEW_KEY, args,
+ hidden_args, dcpp);
+
+done:
+ fnvlist_free(args);
+ fnvlist_free(hidden_args);
+ bzero(rawkey, sizeof (rawkey));
+
+ return (err);
+}
+
+void
+zcp_synctask_change_key_cleanup(void *arg)
+{
+ spa_keystore_change_key_args_t *skcka = arg;
+
+ dsl_crypto_params_free(skcka->skcka_cp, B_TRUE);
+}
+
+int
+zcp_synctask_change_key_check(void *arg, dmu_tx_t *tx)
+{
+ /*
+ * zcp_synctask_change_key_create_params() already validates that
+ * the new key is in an acceptable format and size for a channel
+ * program. Any future channel program specific checks would go here.
+ * For now, we just perform all the same checks done for
+ * 'zfs change-key' by calling spa_keystore_change_key_check().
+ */
+ return (spa_keystore_change_key_check(arg, tx));
+}
+
+void
+zcp_synctask_change_key_sync(void *arg, dmu_tx_t *tx)
+{
+ spa_keystore_change_key_sync(arg, tx);
+}
diff --git a/usr/src/uts/common/fs/zfs/zcp_synctask.c b/usr/src/uts/common/fs/zfs/zcp_synctask.c
index 09af25c1c9..9a1dceb044 100644
--- a/usr/src/uts/common/fs/zfs/zcp_synctask.c
+++ b/usr/src/uts/common/fs/zfs/zcp_synctask.c
@@ -23,6 +23,8 @@
#include <sys/zcp.h>
#include <sys/zcp_set.h>
+#include <sys/zcp_change_key.h>
+#include <sys/dsl_crypt.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_prop.h>
@@ -399,6 +401,74 @@ zcp_synctask_set_prop(lua_State *state, boolean_t sync, nvlist_t *err_details)
return (err);
}
+static int zcp_synctask_change_key(lua_State *, boolean_t, nvlist_t *);
+static zcp_synctask_info_t zcp_synctask_change_key_info = {
+ .name = "change_key",
+ .func = zcp_synctask_change_key,
+ .pargs = {
+ { .za_name = "dataset", .za_lua_type = LUA_TSTRING },
+ { .za_name = "key", .za_lua_type = LUA_TSTRING },
+ { .za_name = "format", .za_lua_type = LUA_TSTRING },
+ { NULL, 0 },
+ },
+ .kwargs = {
+ { NULL, 0 }
+ },
+ .space_check = ZFS_SPACE_CHECK_RESERVED,
+ /*
+ * This is the same value that is used when zfs change-key is run.
+ * See spa_keystore_change_key() in dsl_crypt.c
+ */
+ .blocks_modified = 15
+};
+
+static int
+zcp_synctask_change_key(lua_State *state, boolean_t sync, nvlist_t *err_details)
+{
+ int err;
+ spa_keystore_change_key_args_t skcka = { 0 };
+ dsl_crypto_params_t *dcp = NULL;
+ const char *dsname;
+ const char *key;
+ const char *format;
+ size_t keylen;
+ uint64_t keyformat;
+
+ dsname = lua_tostring(state, 1);
+
+ /*
+ * The key may be raw key, which could contain NUL within it.
+ * Use lua_tolstring() instead of lua_tostring() to obtain the length.
+ */
+ key = lua_tolstring(state, 2, &keylen);
+
+ format = lua_tostring(state, 3);
+
+ if (zfs_prop_string_to_index(ZFS_PROP_KEYFORMAT, format,
+ &keyformat) != 0)
+ return (SET_ERROR(EINVAL));
+
+ err = zcp_synctask_change_key_create_params(key, keylen, keyformat,
+ &dcp);
+ if (err != 0)
+ goto done;
+
+ skcka.skcka_dsname = dsname;
+ skcka.skcka_cp = dcp;
+
+ zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
+ (zcp_cleanup_t *)&zcp_synctask_change_key_cleanup, &skcka);
+
+ err = zcp_sync_task(state, zcp_synctask_change_key_check,
+ zcp_synctask_change_key_sync, &skcka, sync, dsname);
+
+ zcp_deregister_cleanup(state, zch);
+
+done:
+ dsl_crypto_params_free(dcp, (err != 0 || !sync) ? B_TRUE : B_FALSE);
+ return (err);
+}
+
static int
zcp_synctask_wrapper(lua_State *state)
{
@@ -468,6 +538,7 @@ zcp_load_synctask_lib(lua_State *state, boolean_t sync)
&zcp_synctask_snapshot_info,
&zcp_synctask_inherit_prop_info,
&zcp_synctask_set_prop_info,
+ &zcp_synctask_change_key_info,
NULL
};
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 2b4c1d55e7..153dcf1502 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -3767,6 +3767,7 @@ zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
uint64_t instrlimit, memlimit;
boolean_t sync_flag;
nvpair_t *nvarg = NULL;
+ nvlist_t *hidden_args = NULL;
if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
return (EINVAL);
@@ -3784,6 +3785,16 @@ zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
return (EINVAL);
}
+ /* hidden args are optional */
+ if (nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args) == 0) {
+ nvlist_t *argnvl = fnvpair_value_nvlist(nvarg);
+ int ret;
+
+ ret = nvlist_add_nvlist(argnvl, ZPOOL_HIDDEN_ARGS, hidden_args);
+ if (ret != 0)
+ return (ret);
+ }
+
if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
return (EINVAL);
if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index f33a1abe4f..99011b83b4 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -23,7 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2017 Nexenta Systems, Inc.
*/
@@ -377,6 +377,46 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
return (0);
}
+ case _FIODIRECTIO:
+ {
+ /*
+ * ZFS inherently provides the basic semantics for directio.
+ * This is the summary from the ZFS on Linux support for
+ * O_DIRECT, which is the common form of directio, and required
+ * no changes to ZFS.
+ *
+ * 1. Minimize cache effects of the I/O.
+ *
+ * By design the ARC is already scan-resistant, which helps
+ * mitigate the need for special O_DIRECT handling.
+ *
+ * 2. O_DIRECT _MAY_ impose restrictions on IO alignment and
+ * length.
+ *
+ * No additional alignment or length restrictions are
+ * imposed by ZFS.
+ *
+ * 3. O_DIRECT _MAY_ perform unbuffered IO operations directly
+ * between user memory and block device.
+ *
+ * No unbuffered IO operations are currently supported. In
+ * order to support features such as compression, encryption,
+ * and checksumming a copy must be made to transform the
+ * data.
+ *
+ * 4. O_DIRECT _MAY_ imply O_DSYNC (XFS).
+ *
+ * O_DIRECT does not imply O_DSYNC for ZFS.
+ *
+ * 5. O_DIRECT _MAY_ disable file locking that serializes IO
+ * operations.
+ *
+ * All I/O in ZFS is locked for correctness and this locking
+ * is not disabled by O_DIRECT.
+ */
+ return (0);
+ }
+
case _FIO_SEEK_DATA:
case _FIO_SEEK_HOLE:
{
diff --git a/usr/src/uts/common/inet/ilb/ilb.c b/usr/src/uts/common/inet/ilb/ilb.c
index 8ab2a90116..91cd671b12 100644
--- a/usr/src/uts/common/inet/ilb/ilb.c
+++ b/usr/src/uts/common/inet/ilb/ilb.c
@@ -1679,6 +1679,8 @@ ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src,
uint16_t nat_src_idx;
boolean_t busy;
+ ret = 0;
+
/*
* We don't really need to switch here since both protocols's
* ports are at the same offset. Just prepare for future protocol
diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c
index 7f79d41dd6..24b0138fbf 100644
--- a/usr/src/uts/common/inet/ilb/ilb_conn.c
+++ b/usr/src/uts/common/inet/ilb/ilb_conn.c
@@ -132,6 +132,9 @@ ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s)
ilb_conn_t **next, **prev;
ilb_conn_t **next_prev, **prev_next;
+ next_prev = NULL;
+ prev_next = NULL;
+
if (c2s) {
hash = connp->conn_c2s_hash;
ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
@@ -698,6 +701,7 @@ update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len,
uint32_t ack, seq;
int32_t seg_len;
+ ack = 0;
if (tcpha->tha_flags & TH_RST)
return (B_FALSE);
@@ -903,6 +907,11 @@ ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph,
uint32_t adj_ip_sum;
boolean_t full_nat;
+ in_iph4 = NULL;
+ in_iph6 = NULL;
+ icmph4 = NULL;
+ icmph6 = NULL;
+
if (l3 == IPPROTO_IP) {
in6_addr_t in_src, in_dst;
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index e9a3fcdeeb..89574da71f 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -1730,8 +1730,6 @@ typedef struct ill_s {
* Capabilities related fields.
*/
uint_t ill_dlpi_capab_state; /* State of capability query, IDCS_* */
- kcondvar_t ill_dlpi_capab_cv; /* CV for broadcasting state changes */
- kmutex_t ill_dlpi_capab_lock; /* Lock for accessing above Cond Var */
uint_t ill_capab_pending_cnt;
uint64_t ill_capabilities; /* Enabled capabilities, ILL_CAPAB_* */
ill_hcksum_capab_t *ill_hcksum_capab; /* H/W cksumming capabilities */
diff --git a/usr/src/uts/common/inet/ip/conn_opt.c b/usr/src/uts/common/inet/ip/conn_opt.c
index b4bff4d7b4..8a05a25b08 100644
--- a/usr/src/uts/common/inet/ip/conn_opt.c
+++ b/usr/src/uts/common/inet/ip/conn_opt.c
@@ -1209,6 +1209,7 @@ conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
return (EINVAL);
}
+ ifindex = UINT_MAX;
switch (name) {
case IP_TTL:
/* Don't allow zero */
@@ -1529,6 +1530,7 @@ conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
if (connp->conn_family != AF_INET6)
return (EINVAL);
+ ifindex = UINT_MAX;
switch (name) {
case IPV6_MULTICAST_IF:
/*
diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c
index b1a77ae0cc..46c791298a 100644
--- a/usr/src/uts/common/inet/ip/icmp.c
+++ b/usr/src/uts/common/inet/ip/icmp.c
@@ -739,6 +739,11 @@ rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
}
ASSERT(sa != NULL && len != 0);
+ sin = NULL;
+ sin6 = NULL;
+ dstport = 0;
+ flowinfo = 0;
+ v4dst = INADDR_ANY;
/*
* Determine packet type based on type of address passed in
@@ -3592,6 +3597,7 @@ icmp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
}
} else {
/* Connected case */
+ dstport = connp->conn_fport;
v6dst = connp->conn_faddr_v6;
flowinfo = connp->conn_flowinfo;
}
diff --git a/usr/src/uts/common/inet/ip/igmp.c b/usr/src/uts/common/inet/ip/igmp.c
index 423bb2a816..de6a91877a 100644
--- a/usr/src/uts/common/inet/ip/igmp.c
+++ b/usr/src/uts/common/inet/ip/igmp.c
@@ -310,15 +310,15 @@ mld_start_timers(unsigned next, ip_stack_t *ipst)
mblk_t *
igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
{
- igmpa_t *igmpa;
+ igmpa_t *igmpa;
ipha_t *ipha = (ipha_t *)(mp->b_rptr);
int iphlen, igmplen, mblklen;
- ilm_t *ilm;
+ ilm_t *ilm;
uint32_t src, dst;
- uint32_t group;
+ uint32_t group;
in6_addr_t v6group;
uint_t next;
- ipif_t *ipif;
+ ipif_t *ipif;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
@@ -778,7 +778,7 @@ igmp_joingroup(ilm_t *ilm)
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
- ilm->ilm_rtx.rtx_timer = INFINITY;
+ ilm->ilm_rtx.rtx_timer = timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
} else {
ip1dbg(("Querier mode %d, sending report, group %x\n",
@@ -857,11 +857,10 @@ mld_joingroup(ilm_t *ilm)
ill = ilm->ilm_ill;
ASSERT(ill->ill_isv6);
-
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
- ilm->ilm_rtx.rtx_timer = INFINITY;
+ ilm->ilm_rtx.rtx_timer = timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
} else {
if (ill->ill_mcast_type == MLD_V1_ROUTER) {
@@ -1435,7 +1434,7 @@ igmp_timeout_handler(void *arg)
uint_t
mld_timeout_handler_per_ill(ill_t *ill)
{
- ilm_t *ilm;
+ ilm_t *ilm;
uint_t next = INFINITY, current;
mrec_t *rp, *rtxrp;
rtx_state_t *rtxp;
@@ -1832,7 +1831,7 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
ipha_t *ipha;
int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
size_t size = hdrlen + sizeof (igmpa_t);
- ill_t *ill = ilm->ilm_ill;
+ ill_t *ill = ilm->ilm_ill;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
@@ -1859,15 +1858,15 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
| (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
- ipha->ipha_type_of_service = 0;
+ ipha->ipha_type_of_service = 0;
ipha->ipha_length = htons(size);
ipha->ipha_ident = 0;
ipha->ipha_fragment_offset_and_flags = 0;
- ipha->ipha_ttl = IGMP_TTL;
- ipha->ipha_protocol = IPPROTO_IGMP;
- ipha->ipha_hdr_checksum = 0;
- ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
- ipha->ipha_src = INADDR_ANY;
+ ipha->ipha_ttl = IGMP_TTL;
+ ipha->ipha_protocol = IPPROTO_IGMP;
+ ipha->ipha_hdr_checksum = 0;
+ ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
+ ipha->ipha_src = INADDR_ANY;
ill_mcast_queue(ill, mp);
@@ -2448,7 +2447,7 @@ mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
{
mblk_t *mp;
mld_hdr_t *mldh;
- ip6_t *ip6h;
+ ip6_t *ip6h;
ip6_hbh_t *ip6hbh;
struct ip6_opt_router *ip6router;
size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index 760454861b..925d06c62b 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -2404,6 +2404,7 @@ ipoptp_next(ipoptp_t *optp)
* its there, and make sure it points to either something
* inside this option, or the end of the option.
*/
+ pointer = IPOPT_EOL;
switch (opt) {
case IPOPT_RR:
case IPOPT_TS:
@@ -4124,8 +4125,6 @@ ip_modclose(ill_t *ill)
rw_destroy(&ill->ill_mcast_lock);
mutex_destroy(&ill->ill_mcast_serializer);
list_destroy(&ill->ill_nce);
- cv_destroy(&ill->ill_dlpi_capab_cv);
- mutex_destroy(&ill->ill_dlpi_capab_lock);
/*
* Now we are done with the module close pieces that
@@ -6340,6 +6339,9 @@ ip_opt_set_multicast_group(conn_t *connp, t_scalar_t name,
optfn = ip_opt_delete_group;
break;
default:
+ /* Should not be reached. */
+ fmode = MODE_IS_INCLUDE;
+ optfn = NULL;
ASSERT(0);
}
@@ -6469,6 +6471,9 @@ ip_opt_set_multicast_sources(conn_t *connp, t_scalar_t name,
optfn = ip_opt_delete_group;
break;
default:
+ /* Should not be reached. */
+ optfn = NULL;
+ fmode = 0;
ASSERT(0);
}
@@ -8937,6 +8942,8 @@ ip_forward_options(mblk_t *mp, ipha_t *ipha, ill_t *dst_ill,
ip2dbg(("ip_forward_options\n"));
dst = ipha->ipha_dst;
+ opt = NULL;
+
for (optval = ipoptp_first(&opts, ipha);
optval != IPOPT_EOL;
optval = ipoptp_next(&opts)) {
@@ -9023,6 +9030,7 @@ ip_forward_options(mblk_t *mp, ipha_t *ipha, ill_t *dst_ill,
opt[IPOPT_OFFSET] += IP_ADDR_LEN;
break;
case IPOPT_TS:
+ off = 0;
/* Insert timestamp if there is room */
switch (opt[IPOPT_POS_OV_FLG] & 0x0F) {
case IPOPT_TS_TSONLY:
@@ -9187,6 +9195,7 @@ ip_input_local_options(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
ip_stack_t *ipst = ill->ill_ipst;
ip2dbg(("ip_input_local_options\n"));
+ opt = NULL;
for (optval = ipoptp_first(&opts, ipha);
optval != IPOPT_EOL;
@@ -9249,6 +9258,7 @@ ip_input_local_options(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira)
opt[IPOPT_OFFSET] += IP_ADDR_LEN;
break;
case IPOPT_TS:
+ off = 0;
/* Insert timestamp if there is romm */
switch (opt[IPOPT_POS_OV_FLG] & 0x0F) {
case IPOPT_TS_TSONLY:
@@ -9342,6 +9352,7 @@ ip_input_options(ipha_t *ipha, ipaddr_t dst, mblk_t *mp,
ire_t *ire;
ip2dbg(("ip_input_options\n"));
+ opt = NULL;
*errorp = 0;
for (optval = ipoptp_first(&opts, ipha);
optval != IPOPT_EOL;
@@ -11890,6 +11901,7 @@ ip_output_local_options(ipha_t *ipha, ip_stack_t *ipst)
ipaddr_t dst;
uint32_t ts;
timestruc_t now;
+ uint32_t off = 0;
for (optval = ipoptp_first(&opts, ipha);
optval != IPOPT_EOL;
@@ -11898,7 +11910,6 @@ ip_output_local_options(ipha_t *ipha, ip_stack_t *ipst)
optlen = opts.ipoptp_len;
ASSERT((opts.ipoptp_flags & IPOPTP_ERROR) == 0);
switch (optval) {
- uint32_t off;
case IPOPT_SSRR:
case IPOPT_LSRR:
off = opt[IPOPT_OFFSET];
@@ -12546,6 +12557,7 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
}
ci.ci_ipif = NULL;
+ extract_funcp = NULL;
switch (ipip->ipi_cmd_type) {
case MISC_CMD:
case MSFILT_CMD:
@@ -12727,6 +12739,7 @@ ip_wput_nondata(queue_t *q, mblk_t *mp)
else
connp = NULL;
+ iocp = NULL;
switch (DB_TYPE(mp)) {
case M_IOCTL:
/*
@@ -12937,6 +12950,7 @@ ip_output_options(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa, ill_t *ill)
ip2dbg(("ip_output_options\n"));
+ opt = NULL;
dst = ipha->ipha_dst;
for (optval = ipoptp_first(&opts, ipha);
optval != IPOPT_EOL;
diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c
index afaf01024f..26e7be2fe8 100644
--- a/usr/src/uts/common/inet/ip/ip6.c
+++ b/usr/src/uts/common/inet/ip/ip6.c
@@ -2766,7 +2766,7 @@ ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
{
uint8_t opt_type;
- uint_t optused;
+ uint_t optused = 0;
int ret = 0;
const char *errtype;
ill_t *ill = ira->ira_ill;
diff --git a/usr/src/uts/common/inet/ip/ip6_ire.c b/usr/src/uts/common/inet/ip/ip6_ire.c
index ad738bc3b7..1145025588 100644
--- a/usr/src/uts/common/inet/ip/ip6_ire.c
+++ b/usr/src/uts/common/inet/ip/ip6_ire.c
@@ -687,7 +687,7 @@ ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid,
const ts_label_t *tsl, int match_flags)
{
- in6_addr_t gw_addr_v6;
+ in6_addr_t gw_addr_v6 = { 0 };
ill_t *ire_ill = NULL, *dst_ill;
ip_stack_t *ipst = ire->ire_ipst;
diff --git a/usr/src/uts/common/inet/ip/ip6_output.c b/usr/src/uts/common/inet/ip/ip6_output.c
index dc074454e3..143077ed32 100644
--- a/usr/src/uts/common/inet/ip/ip6_output.c
+++ b/usr/src/uts/common/inet/ip/ip6_output.c
@@ -1023,7 +1023,7 @@ ire_send_wire_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
*/
if (pktlen > ixa->ixa_fragsize ||
(ixaflags & (IXAF_IPSEC_SECURE|IXAF_IPV6_ADD_FRAGHDR))) {
- uint32_t ident;
+ uint32_t ident = 0;
if (ixaflags & IXAF_IPSEC_SECURE)
pktlen += ipsec_out_extra_length(ixa);
diff --git a/usr/src/uts/common/inet/ip/ip_ftable.c b/usr/src/uts/common/inet/ip/ip_ftable.c
index 980436b578..408b9d0ea1 100644
--- a/usr/src/uts/common/inet/ip/ip_ftable.c
+++ b/usr/src/uts/common/inet/ip/ip_ftable.c
@@ -76,7 +76,7 @@
(((ire)->ire_type & IRE_DEFAULT) || \
(((ire)->ire_type & IRE_INTERFACE) && ((ire)->ire_addr == 0)))
-#define IP_SRC_MULTIHOMING(isv6, ipst) \
+#define IP_SRC_MULTIHOMING(isv6, ipst) \
(isv6 ? ipst->ips_ipv6_strict_src_multihoming : \
ipst->ips_ip_strict_src_multihoming)
@@ -470,7 +470,7 @@ ire_get_bucket(ire_t *ire)
* routes to this destination, this routine will utilise the
* first route it finds to IP address
* Return values:
- * 0 - FAILURE
+ * 0 - FAILURE
* nonzero - ifindex
*/
uint_t
@@ -807,7 +807,7 @@ ire_round_robin(irb_t *irb_ptr, ire_ftable_args_t *margs, uint_t hash,
ire_t *orig_ire, ip_stack_t *ipst)
{
ire_t *ire, *maybe_ire = NULL;
- uint_t maybe_badcnt;
+ uint_t maybe_badcnt = 0;
uint_t maxwalk;
/* Fold in more bits from the hint/hash */
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index 24e4c200d4..a2ddcb3547 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -1394,10 +1394,11 @@ ill_capability_wait(ill_t *ill)
while (ill->ill_capab_pending_cnt != 0 &&
(ill->ill_state_flags & ILL_CONDEMNED) == 0) {
- mutex_enter(&ill->ill_dlpi_capab_lock);
+ /* This may enable blocked callers of ill_capability_done(). */
ipsq_exit(ill->ill_phyint->phyint_ipsq);
- cv_wait(&ill->ill_dlpi_capab_cv, &ill->ill_dlpi_capab_lock);
- mutex_exit(&ill->ill_dlpi_capab_lock);
+ /* Pause a bit (1msec) before we re-enter the squeue. */
+ delay(drv_usectohz(1000000));
+
/*
* If ipsq_enter() fails, someone set ILL_CONDEMNED
* while we dropped the squeue. Indicate such to the caller.
@@ -1508,9 +1509,9 @@ ill_capability_id_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *outers)
id_ic = (dl_capab_id_t *)(outers + 1);
+ inners = &id_ic->id_subcap;
if (outers->dl_length < sizeof (*id_ic) ||
- (inners = &id_ic->id_subcap,
- inners->dl_length > (outers->dl_length - sizeof (*inners)))) {
+ inners->dl_length > (outers->dl_length - sizeof (*inners))) {
cmn_err(CE_WARN, "ill_capability_id_ack: malformed "
"encapsulated capab type %d too long for mblk",
inners->dl_cap);
@@ -3513,9 +3514,6 @@ ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback,
ill->ill_max_buf = ND_MAX_Q;
ill->ill_refcnt = 0;
- cv_init(&ill->ill_dlpi_capab_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&ill->ill_dlpi_capab_lock, NULL, MUTEX_DEFAULT, NULL);
-
return (0);
}
@@ -4027,6 +4025,7 @@ ill_get_next_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst)
phyint_t *phyi_initial;
uint_t ifindex;
+ phyi_initial = NULL;
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
if (index == 0) {
@@ -12935,6 +12934,7 @@ void
ill_capability_done(ill_t *ill)
{
ASSERT(ill->ill_capab_pending_cnt != 0);
+ ASSERT(IAM_WRITER_ILL(ill));
ill_dlpi_done(ill, DL_CAPABILITY_REQ);
@@ -12942,10 +12942,6 @@ ill_capability_done(ill_t *ill)
if (ill->ill_capab_pending_cnt == 0 &&
ill->ill_dlpi_capab_state == IDCS_OK)
ill_capability_reset_alloc(ill);
-
- mutex_enter(&ill->ill_dlpi_capab_lock);
- cv_broadcast(&ill->ill_dlpi_capab_cv);
- mutex_exit(&ill->ill_dlpi_capab_lock);
}
/*
diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c
index 69506f77d4..2cee123d4a 100644
--- a/usr/src/uts/common/inet/ip/ip_ndp.c
+++ b/usr/src/uts/common/inet/ip/ip_ndp.c
@@ -2943,6 +2943,8 @@ nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr)
ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL ||
new_state == ND_INCOMPLETE);
}
+
+ tid = 0;
if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) {
tid = ncec->ncec_timeout_id;
ncec->ncec_timeout_id = 0;
@@ -4433,6 +4435,7 @@ nce_resolve_src(ncec_t *ncec, in6_addr_t *src)
ASSERT(src != NULL);
ASSERT(IN6_IS_ADDR_UNSPECIFIED(src));
+ src4 = 0;
src6 = *src;
if (is_myaddr) {
src6 = ncec->ncec_addr;
@@ -4641,6 +4644,7 @@ nce_add_common(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len,
ndp = ill->ill_ipst->ips_ndp4;
*retnce = NULL;
+ state = 0;
ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
diff --git a/usr/src/uts/common/inet/ip/ip_output.c b/usr/src/uts/common/inet/ip/ip_output.c
index 169859707e..a6ca2aabd5 100644
--- a/usr/src/uts/common/inet/ip/ip_output.c
+++ b/usr/src/uts/common/inet/ip/ip_output.c
@@ -1100,7 +1100,7 @@ ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
int, 1);
if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) {
- int error;
+ int error = 0;
DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL,
ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
@@ -1156,7 +1156,7 @@ ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
}
if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) {
- int error;
+ int error = 0;
DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill,
ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp);
diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c
index dece7be29d..5df5ad6447 100644
--- a/usr/src/uts/common/inet/ip/ip_rts.c
+++ b/usr/src/uts/common/inet/ip/ip_rts.c
@@ -114,7 +114,7 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
ip_stack_t *ipst)
{
mblk_t *mp1;
- conn_t *connp, *next_connp;
+ conn_t *connp, *next_connp;
/*
* Since we don't have an ill_t here, RTSQ_DEFAULT must already be
@@ -190,7 +190,7 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
mblk_t *mp;
rt_msghdr_t *rtm;
int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
- sa_family_t af;
+ sa_family_t af = { 0 };
in6_addr_t gw_addr_v6;
if (ire == NULL)
@@ -199,6 +199,7 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
ire->ire_ipversion == IPV6_VERSION);
ASSERT(!(ire->ire_type & IRE_IF_CLONE));
+ mp = NULL;
if (ire->ire_flags & RTF_SETSRC)
rtm_addrs |= RTA_SRC;
@@ -306,10 +307,14 @@ ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
ts_label_t *tsl = NULL;
zoneid_t zoneid;
ip_stack_t *ipst;
- ill_t *ill = NULL;
+ ill_t *ill = NULL;
zoneid = connp->conn_zoneid;
ipst = connp->conn_netstack->netstack_ip;
+ net_mask = 0;
+ src_addr = 0;
+ dst_addr = 0;
+ gw_addr = 0;
if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
freemsg(mp);
@@ -1239,6 +1244,9 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc,
ipaddr_t v4setsrc;
rtm = (rt_msghdr_t *)mp->b_rptr;
+ ifaddr = 0;
+ brdaddr = 0;
+ rtm_flags = 0;
/*
* Find the ill used to send packets. This will be NULL in case
@@ -1406,7 +1414,7 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
ill_t *ill;
ifrt_t *ifrt;
mblk_t *mp;
- in6_addr_t gw_addr_v6;
+ in6_addr_t gw_addr_v6 = { 0 };
/* Need to add back some metrics to the IRE? */
/*
@@ -1422,6 +1430,7 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
* <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
* microseconds.
*/
+ rtt = 0;
if (which & RTV_RTT)
rtt = metrics->rmx_rtt / 1000;
if (which & RTV_RTTVAR)
diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c
index 77d9d8df7e..4f3ec2d817 100644
--- a/usr/src/uts/common/inet/ip/ipclassifier.c
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c
@@ -613,6 +613,7 @@ ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
break;
default:
+ conn_cache = NULL;
connp = NULL;
ASSERT(0);
}
diff --git a/usr/src/uts/common/inet/ip/ipmp.c b/usr/src/uts/common/inet/ip/ipmp.c
index 912b489c40..3106b6e2de 100644
--- a/usr/src/uts/common/inet/ip/ipmp.c
+++ b/usr/src/uts/common/inet/ip/ipmp.c
@@ -1909,6 +1909,7 @@ ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp)
ASSERT(IAM_WRITER_IPSQ(ipsq));
ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL);
+ ill = NULL;
/*
* Send routing socket messages indicating that the phyint's ills
diff --git a/usr/src/uts/common/inet/ip/ipsecah.c b/usr/src/uts/common/inet/ip/ipsecah.c
index fc19d7f877..ced3696948 100644
--- a/usr/src/uts/common/inet/ip/ipsecah.c
+++ b/usr/src/uts/common/inet/ip/ipsecah.c
@@ -215,7 +215,7 @@ static int
ah_kstat_update(kstat_t *kp, int rw)
{
ah_kstats_t *ekp;
- netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private;
+ netstackid_t stackid;
netstack_t *ns;
ipsec_stack_t *ipss;
@@ -225,6 +225,7 @@ ah_kstat_update(kstat_t *kp, int rw)
if (rw == KSTAT_WRITE)
return (EACCES);
+ stackid = (netstackid_t)(uintptr_t)kp->ks_private;
ns = netstack_find_by_stackid(stackid);
if (ns == NULL)
return (-1);
diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c
index b3dc7d350a..e0efbbf3ce 100644
--- a/usr/src/uts/common/inet/ip/ipsecesp.c
+++ b/usr/src/uts/common/inet/ip/ipsecesp.c
@@ -208,7 +208,7 @@ static int
esp_kstat_update(kstat_t *kp, int rw)
{
esp_kstats_t *ekp;
- netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private;
+ netstackid_t stackid;
netstack_t *ns;
ipsec_stack_t *ipss;
@@ -218,6 +218,7 @@ esp_kstat_update(kstat_t *kp, int rw)
if (rw == KSTAT_WRITE)
return (EACCES);
+ stackid = (zoneid_t)(uintptr_t)kp->ks_private;
ns = netstack_find_by_stackid(stackid);
if (ns == NULL)
return (-1);
diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c
index 44ebb21db3..288c0e3e18 100644
--- a/usr/src/uts/common/inet/ip/sadb.c
+++ b/usr/src/uts/common/inet/ip/sadb.c
@@ -113,8 +113,8 @@ extern uint64_t ipsacq_maxpackets;
if (((sa)->ipsa_ ## exp) == 0) \
(sa)->ipsa_ ## exp = tmp; \
else \
- (sa)->ipsa_ ## exp = \
- MIN((sa)->ipsa_ ## exp, tmp); \
+ (sa)->ipsa_ ## exp = \
+ MIN((sa)->ipsa_ ## exp, tmp); \
} \
}
@@ -154,8 +154,6 @@ sadb_sa_refrele(void *target)
static time_t
sadb_add_time(time_t base, uint64_t delta)
{
- time_t sum;
-
/*
* Clip delta to the maximum possible time_t value to
* prevent "overwrapping" back into a shorter-than-desired
@@ -163,18 +161,12 @@ sadb_add_time(time_t base, uint64_t delta)
*/
if (delta > TIME_MAX)
delta = TIME_MAX;
- /*
- * This sum may still overflow.
- */
- sum = base + delta;
- /*
- * .. so if the result is less than the base, we overflowed.
- */
- if (sum < base)
- sum = TIME_MAX;
-
- return (sum);
+ if (base > 0) {
+ if (TIME_MAX - base < delta)
+ return (TIME_MAX); /* Overflow */
+ }
+ return (base + delta);
}
/*
@@ -1695,8 +1687,7 @@ sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
mp->b_cont = mp1;
break;
default:
- if (mp != NULL)
- freemsg(mp);
+ freemsg(mp);
return;
}
@@ -2941,7 +2932,7 @@ sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
boolean_t isupdate = (newbie != NULL);
uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
ipsec_stack_t *ipss = ns->netstack_ipsec;
- ip_stack_t *ipst = ns->netstack_ip;
+ ip_stack_t *ipst = ns->netstack_ip;
ipsec_alginfo_t *alg;
int rcode;
boolean_t async = B_FALSE;
@@ -4386,8 +4377,8 @@ sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
if (assoc->ipsa_idletime != 0) {
assoc->ipsa_idletime = min(assoc->ipsa_idletime,
assoc->ipsa_idleuselt);
- assoc->ipsa_idleexpiretime =
- current + assoc->ipsa_idletime;
+ assoc->ipsa_idleexpiretime =
+ current + assoc->ipsa_idletime;
} else {
assoc->ipsa_idleexpiretime =
current + assoc->ipsa_idleuselt;
@@ -5450,7 +5441,7 @@ sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
uint32_t seq;
uint64_t unique_id = 0;
boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
- ts_label_t *tsl;
+ ts_label_t *tsl;
netstack_t *ns = ixa->ixa_ipst->ips_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
@@ -6102,7 +6093,8 @@ sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
return (NULL);
bsllow(&sl);
- LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
+ LCLASS_SET((_bslabel_impl_t *)&sl,
+ (uint16_t)sens->sadb_sens_sens_level);
bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
bitmap_len);
@@ -6629,7 +6621,7 @@ ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
static void
ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
{
- connf_t *connfp;
+ connf_t *connfp;
conn_t *connp;
uint32_t ports;
uint16_t *pptr = (uint16_t *)&ports;
diff --git a/usr/src/uts/common/inet/ip/spd.c b/usr/src/uts/common/inet/ip/spd.c
index d703170c9f..85f06f3d02 100644
--- a/usr/src/uts/common/inet/ip/spd.c
+++ b/usr/src/uts/common/inet/ip/spd.c
@@ -163,7 +163,7 @@ int ipsec_weird_null_inbound_policy = 0;
* Inbound traffic should have matching identities for both SA's.
*/
-#define SA_IDS_MATCH(sa1, sa2) \
+#define SA_IDS_MATCH(sa1, sa2) \
(((sa1) == NULL) || ((sa2) == NULL) || \
(((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \
(((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid))))
@@ -3178,6 +3178,7 @@ ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns)
* TODO: should canonicalize a[] (i.e., zeroize any padding)
* so we can use a non-trivial policy_hash function.
*/
+ ap = NULL;
for (i = n-1; i >= 0; i--) {
hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]);
@@ -6282,6 +6283,9 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *iramp, mblk_t *mp,
#ifdef FRAGCACHE_DEBUG
cmn_err(CE_WARN, "Fragcache: %s\n", inbound ? "INBOUND" : "OUTBOUND");
#endif
+ v6_proto = 0;
+ fraghdr = NULL;
+
/*
* You're on the slow path, so insure that every packet in the
* cache is a single-mblk one.
diff --git a/usr/src/uts/common/inet/ip/tnet.c b/usr/src/uts/common/inet/ip/tnet.c
index e8c7b0c6e2..37a7402d52 100644
--- a/usr/src/uts/common/inet/ip/tnet.c
+++ b/usr/src/uts/common/inet/ip/tnet.c
@@ -692,7 +692,7 @@ tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira)
const void *src;
const ip6_t *ip6h;
cred_t *credp;
- int proto;
+ int proto;
ASSERT(DB_TYPE(mp) == M_DATA);
@@ -1477,6 +1477,9 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira)
*/
af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
+ ipha = NULL;
+ ip6h = NULL;
+ gw_rhtp = NULL;
if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
ASSERT(ire->ire_ipversion == IPV4_VERSION);
diff --git a/usr/src/uts/common/inet/sctp/sctp_asconf.c b/usr/src/uts/common/inet/sctp/sctp_asconf.c
index f5edd1994f..db770df30e 100644
--- a/usr/src/uts/common/inet/sctp/sctp_asconf.c
+++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c
@@ -47,7 +47,7 @@
typedef struct sctp_asconf_s {
mblk_t *head;
- uint32_t cid;
+ uint32_t cid;
} sctp_asconf_t;
/*
@@ -636,6 +636,12 @@ sctp_input_asconf_ack(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
ASSERT(ch->sch_id == CHUNK_ASCONF_ACK);
+ ainfo = NULL;
+ alist = NULL;
+ dlist = NULL;
+ aptr = NULL;
+ dptr = NULL;
+
snp = (uint32_t *)(ch + 1);
rlen = ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*snp);
if (rlen < 0) {
@@ -915,9 +921,9 @@ sctp_wput_asconf(sctp_t *sctp, sctp_faddr_t *fp)
{
#define SCTP_SET_SENT_FLAG(mp) ((mp)->b_flag = SCTP_CHUNK_FLAG_SENT)
- mblk_t *mp;
+ mblk_t *mp;
mblk_t *ipmp;
- uint32_t *snp;
+ uint32_t *snp;
sctp_parm_hdr_t *ph;
boolean_t isv4;
sctp_stack_t *sctps = sctp->sctp_sctps;
@@ -1467,6 +1473,7 @@ sctp_add_ip(sctp_t *sctp, const void *addrs, uint32_t cnt)
* If deleting:
* o Must be part of the association
*/
+ sin6 = NULL;
for (i = 0; i < cnt; i++) {
switch (connp->conn_family) {
case AF_INET:
diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c
index ef60f6d26a..a640ead3d1 100644
--- a/usr/src/uts/common/inet/sctp/sctp_common.c
+++ b/usr/src/uts/common/inet/sctp/sctp_common.c
@@ -804,6 +804,8 @@ sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
{
sctp_faddr_t *fpp;
+ fpp = NULL;
+
if (!sctp->sctp_faddrs) {
return;
}
diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c
index 53c35183dc..da86faa252 100644
--- a/usr/src/uts/common/inet/sctp/sctp_cookie.c
+++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c
@@ -427,10 +427,10 @@ sctp_initialize_params(sctp_t *sctp, sctp_init_chunk_t *init,
/*
* Copy the peer's original source address into addr. This relies on the
* following format (see sctp_send_initack() below):
- * relative timestamp for the cookie (int64_t) +
- * cookie lifetime (uint32_t) +
- * local tie-tag (uint32_t) + peer tie-tag (uint32_t) +
- * Peer's original src ...
+ * relative timestamp for the cookie (int64_t) +
+ * cookie lifetime (uint32_t) +
+ * local tie-tag (uint32_t) + peer tie-tag (uint32_t) +
+ * Peer's original src ...
*/
int
cl_sctp_cookie_paddr(sctp_chunk_hdr_t *ch, in6_addr_t *addr)
@@ -454,7 +454,7 @@ cl_sctp_cookie_paddr(sctp_chunk_hdr_t *ch, in6_addr_t *addr)
sizeof (int64_t) + /* timestamp */ \
sizeof (uint32_t) + /* cookie lifetime */ \
sizeof (sctp_init_chunk_t) + /* INIT ACK */ \
- sizeof (in6_addr_t) + /* peer's original source */ \
+ sizeof (in6_addr_t) + /* peer's original source */ \
ntohs((initcp)->sch_len) + /* peer's INIT */ \
sizeof (uint32_t) + /* local tie-tag */ \
sizeof (uint32_t) + /* peer tie-tag */ \
@@ -946,6 +946,8 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp,
uint16_t old_num_str;
sctp_stack_t *sctps = sctp->sctp_sctps;
+ sdc = NULL;
+ seglen = 0;
iack = (sctp_init_chunk_t *)(iackch + 1);
cph = NULL;
diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c
index 1b6449cfab..7d856fab28 100644
--- a/usr/src/uts/common/inet/sctp/sctp_input.c
+++ b/usr/src/uts/common/inet/sctp/sctp_input.c
@@ -831,7 +831,7 @@ sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
* there is a break in the sequence. We want
* to chop the reassembly list as follows (the
* numbers are TSNs):
- * 10 -> 11 -> (end of chunks)
+ * 10 -> 11 -> (end of chunks)
* 10 -> 11 -> | 13 (break in sequence)
*/
prev = mp;
@@ -943,6 +943,7 @@ sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
uint32_t tsn;
uint16_t fraglen = 0;
+ reassq_curr = NULL;
*error = 0;
/*
diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c
index 23abeccf96..476a6d921e 100644
--- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c
+++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c
@@ -1057,7 +1057,10 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp,
/* In all cases, the size of the option must be bigger than int */
if (inlen >= sizeof (int32_t)) {
onoff = ONOFF(*i1);
+ } else {
+ return (EINVAL);
}
+
retval = 0;
RUN_SCTP(sctp);
diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c
index eced6eccba..0564f5a416 100644
--- a/usr/src/uts/common/inet/sctp/sctp_output.c
+++ b/usr/src/uts/common/inet/sctp/sctp_output.c
@@ -990,8 +990,8 @@ sctp_output(sctp_t *sctp, uint_t num_pkt)
mblk_t *head;
mblk_t *meta = sctp->sctp_xmit_tail;
mblk_t *fill = NULL;
- uint16_t chunklen;
- uint32_t cansend;
+ uint16_t chunklen;
+ uint32_t cansend;
int32_t seglen;
int32_t xtralen;
int32_t sacklen;
@@ -1007,6 +1007,8 @@ sctp_output(sctp_t *sctp, uint_t num_pkt)
sctp_stack_t *sctps = sctp->sctp_sctps;
uint32_t tsn;
+ lfp = NULL;
+
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
sacklen = 0;
} else {
@@ -1651,7 +1653,7 @@ sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
* - the chunk is unsent, i.e. new data.
*/
#define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \
- (!SCTP_CHUNK_ABANDONED((mp)) && \
+ (!SCTP_CHUNK_ABANDONED((mp)) && \
((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \
!SCTP_CHUNK_ISACKED(mp))) || \
(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
@@ -1694,7 +1696,7 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
*
* if the advanced peer ack point includes the next
* chunk to be retransmited - possibly the Forward
- * TSN was lost.
+ * TSN was lost.
*
* if we are PRSCTP aware and the next chunk to be
* retransmitted is now abandoned
diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c
index ec2a5d4e29..876e7d48e6 100644
--- a/usr/src/uts/common/inet/tcp/tcp_bind.c
+++ b/usr/src/uts/common/inet/tcp/tcp_bind.c
@@ -324,7 +324,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
boolean_t bind_to_req_port_only, cred_t *cr)
{
in_port_t mlp_port;
- mlp_type_t addrtype, mlptype;
+ mlp_type_t addrtype, mlptype;
boolean_t user_specified;
in_port_t allocated_port;
in_port_t requested_port = *requested_port_ptr;
@@ -333,6 +333,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
tcp_stack_t *tcps = tcp->tcp_tcps;
in6_addr_t v6addr = connp->conn_laddr_v6;
+ zone = NULL;
/*
* XXX It's up to the caller to specify bind_to_req_port_only or not.
*/
@@ -697,7 +698,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
if (connp->conn_anon_priv_bind) {
/*
* loopmax =
- * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
+ * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
*/
loopmax = IPPORT_RESERVED -
tcps->tcps_min_anonpriv_port;
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index e73c34de34..f2cb8f6dbd 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -160,7 +160,7 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcpha_t *tcpha)
if (!tcp->tcp_unfusable && !peer_tcp->tcp_unfusable &&
tcp->tcp_xmit_head == NULL && peer_tcp->tcp_xmit_head == NULL) {
- mblk_t *mp;
+ mblk_t *mp = NULL;
queue_t *peer_rq = peer_connp->conn_rq;
ASSERT(!TCP_IS_DETACHED(peer_tcp));
diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c
index ece2abbc04..0aaad871ba 100644
--- a/usr/src/uts/common/inet/tcp/tcp_input.c
+++ b/usr/src/uts/common/inet/tcp/tcp_input.c
@@ -2469,6 +2469,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
tcp_unfuse(tcp);
}
+ mss = 0;
iphdr = mp->b_rptr;
rptr = mp->b_rptr;
ASSERT(OK_32PTR(rptr));
diff --git a/usr/src/uts/common/inet/tcp/tcp_misc.c b/usr/src/uts/common/inet/tcp/tcp_misc.c
index 4f6399c433..0896dd7611 100644
--- a/usr/src/uts/common/inet/tcp/tcp_misc.c
+++ b/usr/src/uts/common/inet/tcp/tcp_misc.c
@@ -291,6 +291,7 @@ tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count,
startover:
nmatch = 0;
+ last = NULL;
mutex_enter(&connfp->connf_lock);
for (tconnp = connfp->connf_head; tconnp != NULL;
diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c
index ae9efe863d..7a0472f3dd 100644
--- a/usr/src/uts/common/inet/tcp/tcp_output.c
+++ b/usr/src/uts/common/inet/tcp/tcp_output.c
@@ -1787,7 +1787,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
uint32_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time)
{
int num_lso_seg = 1;
- uint_t lso_usable;
+ uint_t lso_usable = 0;
boolean_t do_lso_send = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
diff --git a/usr/src/uts/common/inet/tcp/tcp_tpi.c b/usr/src/uts/common/inet/tcp/tcp_tpi.c
index dbdc5b8dc7..6b32a0ad27 100644
--- a/usr/src/uts/common/inet/tcp/tcp_tpi.c
+++ b/usr/src/uts/common/inet/tcp/tcp_tpi.c
@@ -154,6 +154,10 @@ tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
opt_offset = tcresp->OPT_offset;
opt_lenp = (t_scalar_t *)&tcresp->OPT_length;
break;
+ default:
+ opt_lenp = 0;
+ opt_offset = 0;
+ break;
}
*t_errorp = 0;
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index 165adcb852..b2183405eb 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -4984,6 +4984,8 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
mlp_type_t addrtype, mlptype;
udp_stack_t *us = udp->udp_us;
+ sin = NULL;
+ sin6 = NULL;
switch (len) {
case sizeof (sin_t): /* Complete IPv4 address */
sin = (sin_t *)sa;
@@ -5697,6 +5699,10 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
udp = connp->conn_udp;
us = udp->udp_us;
+ sin = NULL;
+ sin6 = NULL;
+ v4dst = INADDR_ANY;
+ flowinfo = 0;
/*
* Address has been verified by the caller
diff --git a/usr/src/uts/common/inet/udp/udp_stats.c b/usr/src/uts/common/inet/udp/udp_stats.c
index 2f5202f693..4ed1ab9773 100644
--- a/usr/src/uts/common/inet/udp/udp_stats.c
+++ b/usr/src/uts/common/inet/udp/udp_stats.c
@@ -93,7 +93,12 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req)
*/
mp2ctl = copymsg(mpctl);
- mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
+ mp6_info_ctl = NULL;
+ mp6_attr_ctl = NULL;
+ mp6_conn_ctl = NULL;
+ mp_info_ctl = NULL;
+ mp_attr_ctl = NULL;
+ mp_conn_ctl = NULL;
if (mpctl == NULL ||
(mpdata = mpctl->b_cont) == NULL ||
(mp_conn_ctl = copymsg(mpctl)) == NULL ||
diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c
index 9097e059b5..82f3989f42 100644
--- a/usr/src/uts/common/io/aggr/aggr_grp.c
+++ b/usr/src/uts/common/io/aggr/aggr_grp.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -1442,8 +1442,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
grp->lg_rx_group_count = 1;
- for (i = 0, port = grp->lg_ports; port != NULL;
- i++, port = port->lp_next) {
+ for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
uint_t num_rgroups;
mac_perim_enter_by_mh(port->lp_mh, &mph);
diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c
index c8dbe00336..e764dd104e 100644
--- a/usr/src/uts/common/io/aggr/aggr_port.c
+++ b/usr/src/uts/common/io/aggr/aggr_port.c
@@ -22,7 +22,7 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/*
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
index d6200a93b4..b52483d3de 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
@@ -25,7 +25,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved.
@@ -6546,8 +6546,10 @@ ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid)
}
vlp = ixgbe_find_vlan(rx_group, vid);
- if (vlp == NULL)
+ if (vlp == NULL) {
+ mutex_exit(&ixgbe->gen_lock);
return (ENOENT);
+ }
/*
* See the comment in ixgbe_addvlan() about is_def_grp and
@@ -6601,8 +6603,10 @@ ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid)
/* This shouldn't fail, but if it does return EIO. */
ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE,
B_TRUE);
- if (ret != IXGBE_SUCCESS)
+ if (ret != IXGBE_SUCCESS) {
+ mutex_exit(&ixgbe->gen_lock);
return (EIO);
+ }
}
}
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
index 0dbb3288c3..cfd987787a 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h
@@ -92,6 +92,7 @@ extern "C" {
#define MAX_NUM_UNICAST_ADDRESSES 0x80
#define MAX_NUM_MULTICAST_ADDRESSES 0x1000
#define MAX_NUM_VLAN_FILTERS 0x40
+
#define IXGBE_INTR_NONE 0
#define IXGBE_INTR_MSIX 1
#define IXGBE_INTR_MSI 2
diff --git a/usr/src/uts/common/io/ldterm.c b/usr/src/uts/common/io/ldterm.c
index 97a9c1a478..46669ace0c 100644
--- a/usr/src/uts/common/io/ldterm.c
+++ b/usr/src/uts/common/io/ldterm.c
@@ -22,7 +22,7 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2018, Joyent, Inc.
- * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -4087,7 +4087,8 @@ ldterm_dosig(queue_t *q, int sig, uchar_t c, int mtype, int mode)
if (c != '\0') {
if ((tp->t_echomp = allocb(4, BPRI_HI)) != NULL) {
- if (ldterm_echo(c, WR(q), 4, tp) > 0)
+ if (ldterm_echo(c, WR(q), 4, tp) > 0 ||
+ (tp->t_state & TS_ISPTSTTY))
putnext(WR(q), tp->t_echomp);
else
freemsg(tp->t_echomp);
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index f4074a2b91..d698862d81 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2015 Garrett D'Amore <garrett@damore.org>
*/
@@ -1648,7 +1648,8 @@ mac_hwrings_idx_get(mac_handle_t mh, uint_t idx, mac_group_handle_t *hwgh,
if (rtype == MAC_RING_TYPE_RX) {
grp = mip->mi_rx_groups;
- } else if (rtype == MAC_RING_TYPE_TX) {
+ } else {
+ ASSERT(rtype == MAC_RING_TYPE_TX);
grp = mip->mi_tx_groups;
}
@@ -5536,6 +5537,11 @@ mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr,
return (0);
}
+ /*
+ * We failed to set promisc mode and we are about to free 'map'.
+ */
+ map->ma_nusers = 0;
+
bail:
if (hw_vlan) {
int err2 = mac_group_remvlan(group, vid);
@@ -5591,6 +5597,8 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid)
if (map->ma_nusers > 0)
return (0);
+ VERIFY3S(map->ma_nusers, ==, 0);
+
/*
* The MAC address is no longer used by any MAC client, so
* remove it from its associated group. Turn off promiscuous
@@ -5615,7 +5623,16 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid)
* If we fail to remove the MAC address HW
* filter but then also fail to re-add the
* VLAN HW filter then we are in a busted
- * state and should just crash.
+ * state. We do our best by logging a warning
+ * and returning the original 'err' that got
+ * us here. At this point, traffic for this
+ * address + VLAN combination will be dropped
+ * until the user reboots the system. In the
+ * future, it would be nice to have a system
+ * that can compare the state of expected
+ * classification according to mac to the
+ * actual state of the provider, and report
+ * and fix any inconsistencies.
*/
if (MAC_GROUP_HW_VLAN(group)) {
int err2;
@@ -5629,6 +5646,7 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid)
}
}
+ map->ma_nusers = 1;
return (err);
}
@@ -5642,8 +5660,10 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid)
map->ma_type, __FILE__, __LINE__);
}
- if (err != 0)
+ if (err != 0) {
+ map->ma_nusers = 1;
return (err);
+ }
/*
* We created MAC address for the primary one at registration, so we
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index e26a028243..dcfb4803d6 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -23,6 +23,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2017 RackTop Systems.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -1287,7 +1288,7 @@ mac_addr_random(mac_client_handle_t mch, uint_t prefix_len,
prefix_len, addr_len - prefix_len);
}
- *diag = 0;
+ *diag = MAC_DIAG_NONE;
return (0);
}
@@ -2551,6 +2552,8 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
*/
ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE)));
+ *diag = MAC_DIAG_NONE;
+
/*
* Can't unicast add if the client asked only for minimal datapath
* setup.
diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c
index 656c598e53..e1dbf9a953 100644
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c
@@ -2892,8 +2892,8 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
mac_group_t *default_rgroup;
mac_group_t *default_tgroup;
int err;
- uint8_t *mac_addr;
uint16_t vid;
+ uint8_t *mac_addr;
mac_group_state_t next_state;
mac_client_impl_t *group_only_mcip;
mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 2c4ac0a1af..0f917cd8ca 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -735,7 +735,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
{
mac_impl_t *mip = (mac_impl_t *)mh;
mac_ring_t *mr = (mac_ring_t *)mrh;
- mac_soft_ring_set_t *mac_srs;
+ mac_soft_ring_set_t *mac_srs;
mblk_t *bp = mp_chain;
/*
diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c
index 2244218f20..e1151565a6 100644
--- a/usr/src/uts/common/io/mac/mac_stat.c
+++ b/usr/src/uts/common/io/mac/mac_stat.c
@@ -262,7 +262,7 @@ static stat_info_t rx_srs_stats_list[] = {
{RX_SRS_STAT_OFF(mrs_chaincntover50)},
{RX_SRS_STAT_OFF(mrs_ierrors)}
};
-#define RX_SRS_STAT_SIZE \
+#define RX_SRS_STAT_SIZE \
(sizeof (rx_srs_stats_list) / sizeof (stat_info_t))
#define TX_SOFTRING_STAT_OFF(f) (offsetof(mac_tx_stats_t, f))
@@ -274,14 +274,14 @@ static stat_info_t tx_softring_stats_list[] = {
{TX_SOFTRING_STAT_OFF(mts_unblockcnt)},
{TX_SOFTRING_STAT_OFF(mts_sdrops)},
};
-#define TX_SOFTRING_STAT_SIZE \
+#define TX_SOFTRING_STAT_SIZE \
(sizeof (tx_softring_stats_list) / sizeof (stat_info_t))
static void
i_mac_add_stats(void *sum, void *op1, void *op2,
stat_info_t stats_list[], uint_t size)
{
- int i;
+ int i;
for (i = 0; i < size; i++) {
uint64_t *op1_val = (uint64_t *)
@@ -679,8 +679,8 @@ i_mac_rx_hwlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname,
static uint64_t
i_mac_misc_stat_get(void *handle, uint_t stat)
{
- flow_entry_t *flent = handle;
- mac_client_impl_t *mcip = flent->fe_mcip;
+ flow_entry_t *flent = handle;
+ mac_client_impl_t *mcip = flent->fe_mcip;
mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat;
mac_rx_stats_t *mac_rx_stat;
mac_tx_stats_t *mac_tx_stat;
@@ -871,9 +871,9 @@ i_mac_tx_hwlane_stat_create(mac_soft_ring_t *ringp, const char *modname,
static uint64_t
i_mac_rx_fanout_stat_get(void *handle, uint_t stat)
{
- mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle;
+ mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle;
mac_soft_ring_t *udp_ringp = NULL, *oth_ringp = NULL;
- mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set;
+ mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set;
int index;
uint64_t val;
@@ -1037,7 +1037,7 @@ void
mac_srs_stat_create(mac_soft_ring_set_t *mac_srs)
{
flow_entry_t *flent = mac_srs->srs_flent;
- char statname[MAXNAMELEN];
+ char statname[MAXNAMELEN];
boolean_t is_tx_srs;
/* No hardware/software lanes for user defined flows */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
new file mode 100644
index 0000000000..12a8d52b3f
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -0,0 +1,2765 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Mellanox Connect-X 4/5/6 driver.
+ */
+
+/*
+ * The PRM for this family of parts is freely available, and can be found at:
+ * https://www.mellanox.com/related-docs/user_manuals/ \
+ * Ethernet_Adapters_Programming_Manual.pdf
+ */
+/*
+ * ConnectX glossary
+ * -----------------
+ *
+ * WR Work Request: something we've asked the hardware to do by
+ * creating a Work Queue Entry (WQE), e.g. send or recv a packet
+ *
+ * WQE Work Queue Entry: a descriptor on a work queue descriptor ring
+ *
+ * WQ Work Queue: a descriptor ring that we can place WQEs on, usually
+ * either a Send Queue (SQ) or Receive Queue (RQ). Different WQ
+ * types have different WQE structures, different commands for
+ * creating and destroying them, etc, but share a common context
+ * structure, counter setup and state graph.
+ * SQ Send Queue, a specific type of WQ that sends packets
+ * RQ Receive Queue, a specific type of WQ that receives packets
+ *
+ * CQ Completion Queue: completion of WRs from a WQ are reported to
+ * one of these, as a CQE on its entry ring.
+ * CQE Completion Queue Entry: an entry in a CQ ring. Contains error
+ * info, as well as packet size, the ID of the WQ, and the index
+ * of the WQE which completed. Does not contain any packet data.
+ *
+ * EQ Event Queue: a ring of event structs from the hardware informing
+ * us when particular events happen. Many events can point at a
+ * a particular CQ which we should then go look at.
+ * EQE Event Queue Entry: an entry on the EQ ring
+ *
+ * UAR User Access Region, a page of the device's PCI BAR which is
+ * tied to particular EQ/CQ/WQ sets and contains doorbells to
+ * ring to arm them for interrupts or wake them up for new work
+ *
+ * RQT RQ Table, a collection of indexed RQs used to refer to the group
+ * as a single unit (for e.g. hashing/RSS).
+ *
+ * TIR Transport Interface Recieve, a bucket of resources for the
+ * reception of packets. TIRs have to point at either a single RQ
+ * or a table of RQs (RQT). They then serve as a target for flow
+ * table entries (FEs). TIRs that point at an RQT also contain the
+ * settings for hashing for RSS.
+ *
+ * TIS Transport Interface Send, a bucket of resources associated with
+ * the transmission of packets. In particular, the temporary
+ * resources used for LSO internally in the card are accounted to
+ * a TIS.
+ *
+ * FT Flow Table, a collection of FEs and FGs that can be referred to
+ * as a single entity (e.g. used as a target from another flow
+ * entry or set as the "root" table to handle incoming or outgoing
+ * packets). Packets arriving at a FT are matched against the
+ * FEs in the table until either one matches with a terminating
+ * action or all FEs are exhausted (it's first-match-wins but with
+ * some actions that are non-terminal, like counting actions).
+ *
+ * FG Flow Group, a group of FEs which share a common "mask" (i.e.
+ * they match on the same attributes of packets coming into the
+ * flow).
+ *
+ * FE Flow Entry, an individual set of values to match against
+ * packets entering the flow table, combined with an action to
+ * take upon a successful match. The action we use most is
+ * "forward", which sends the packets to a TIR or another flow
+ * table and then stops further processing within the FE's FT.
+ *
+ * lkey/mkey A reference to something similar to a page table but in the
+ * device's internal onboard MMU. Since Connect-X parts double as
+ * IB cards (lots of RDMA) they have extensive onboard memory mgmt
+ * features which we try very hard not to use. For our WQEs we use
+ * the "reserved" lkey, which is a special value which indicates
+ * that addresses we give are linear addresses and should not be
+ * translated.
+ *
+ * PD Protection Domain, an IB concept. We have to allocate one to
+ * provide as a parameter for new WQs, but we don't do anything
+ * with it.
+ *
+ * TDOM/TD Transport Domain, an IB concept. We allocate one in order to
+ * provide it as a parameter to TIR/TIS creation, but we don't do
+ * anything with it.
+ */
+/*
+ *
+ * Data flow overview
+ * ------------------
+ *
+ * This driver is a MAC ring-enabled driver which maps rings to send and recv
+ * queues in hardware on the device.
+ *
+ * Each SQ and RQ is set up to report to its own individual CQ, to ensure
+ * sufficient space, and simplify the logic needed to work out which buffer
+ * was completed.
+ *
+ * The CQs are then round-robin allocated onto EQs, of which we set up one per
+ * interrupt that the system gives us for the device. Normally this means we
+ * have 8 EQs.
+ *
+ * When we have >= 8 EQs available, we try to allocate only RX or only TX
+ * CQs on each one. The EQs are chosen for RX and TX in an alternating fashion.
+ *
+ * EQ #0 is reserved for all event types other than completion events, and has
+ * no CQs associated with it at any time. EQs #1 and upwards are only used for
+ * handling CQ completion events.
+ *
+ * +------+ +------+ +------+ +---------+
+ * | SQ 0 |---->| CQ 0 |-----+ | EQ 0 |------> | MSI-X 0 | mlxcx_intr_0
+ * +------+ +------+ | +------+ +---------+
+ * |
+ * +------+ +------+ |
+ * | SQ 1 |---->| CQ 1 |---+ | +------+
+ * +------+ +------+ | +---> | |
+ * | | |
+ * +------+ +------+ | | EQ 1 | +---------+
+ * | SQ 2 |---->| CQ 2 |---------> | |------> | MSI-X 1 | mlxcx_intr_n
+ * +------+ +------+ | +---> | | +---------+
+ * | | +------+
+ * | |
+ * ... | |
+ * | | +------+
+ * +------+ +------+ +-----> | |
+ * | RQ 0 |---->| CQ 3 |---------> | | +---------+
+ * +------+ +------+ | | EQ 2 |------> | MSI-X 2 | mlxcx_intr_n
+ * | | | +---------+
+ * +------+ +------+ | +-> | |
+ * | RQ 1 |---->| CQ 4 |-----+ | +------+
+ * +------+ +------+ |
+ * | ....
+ * +------+ +------+ |
+ * | RQ 2 |---->| CQ 5 |-------+
+ * +------+ +------+
+ *
+ * ... (note this diagram does not show RX-only or TX-only EQs)
+ *
+ * For TX, we advertise all of the SQs we create as plain rings to MAC with
+ * no TX groups. This puts MAC in "virtual group" mode where it will allocate
+ * and use the rings as it sees fit.
+ *
+ * For RX, we advertise actual groups in order to make use of hardware
+ * classification.
+ *
+ * The hardware classification we use is based around Flow Tables, and we
+ * currently ignore all of the eswitch features of the card. The NIC VPORT
+ * is always set to promisc mode so that the eswitch sends us all of the
+ * traffic that arrives on the NIC, and we use flow entries to manage
+ * everything.
+ *
+ * We use 2 layers of flow tables for classification: traffic arrives at the
+ * root RX flow table which contains MAC address filters. Those then send
+ * matched traffic to the per-group L1 VLAN filter tables which contain VLAN
+ * presence and VID filters.
+ *
+ * Since these parts only support doing RSS hashing on a single protocol at a
+ * time, we have to use a third layer of flow tables as well to break traffic
+ * down by L4 and L3 protocol (TCPv6, TCPv4, UDPv6, UDPv4, IPv6, IPv4 etc)
+ * so that it can be sent to the appropriate TIR for hashing.
+ *
+ * Incoming packets
+ * + +---------+ +---------+
+ * | +->| group 0 | | group 0 |
+ * | | | vlan ft | +-->| hash ft |
+ * v | | L1 | | | L2 |
+ * +----+----+ | +---------+ | +---------+ +-----+ +-----+------+
+ * | eswitch | | | | | | TCPv6 |--->| TIR |--->| | RQ0 |
+ * +----+----+ | | | | +---------+ +-----+ | +------+
+ * | | | | | | UDPv6 |--->| TIR |--->| | RQ1 |
+ * | | | | | +---------+ +-----+ | +------+
+ * | | | | | | TCPv4 |--->| TIR |--->| | RQ2 |
+ * v | | | | +---------+ +-----+ | RQT +------+
+ * +----+----+ | +---------+ | | UDPv4 |--->| TIR |--->| | ... |
+ * | root rx | | | default |--+ +---------+ +-----+ | | |
+ * | flow tb | | +---------+ | | IPv6 |--->| TIR |--->| | |
+ * | L0 | | | promisc |--+ +---------+ +-----+ | | |
+ * +---------+ | +---------+ ^ | IPv4 |--->| TIR |--->| | |
+ * | bcast |---|---------------+ +---------+ +-----+ +-----+------+
+ * +---------+ | ^ | other |-+
+ * | MAC 0 |---+ | +---------+ | +-----+ +-----+
+ * +---------+ | +->| TIR |--->| RQ0 |
+ * | MAC 1 |-+ | +-----+ +-----+
+ * +---------+ | +---------------+
+ * | MAC 2 |-+ | ^
+ * +---------+ | | |
+ * | MAC 3 |-+ | +---------+ | +---------+
+ * +---------+ | | | group 1 | | | group 1 |
+ * | ..... | +--->| vlan ft | | +>| hash ft |
+ * | | | | L1 | | | | L2 |
+ * +---------+ | +---------+ | | +---------+ +-----+ +-----+------+
+ * | promisc |---+ | VLAN 0 |----+ | TCPv6 |--->| TIR |--->| | RQ3 |
+ * +---------+ +---------+ | +---------+ +-----+ | +------+
+ * | ..... | | | UDPv6 |--->| TIR |--->| | RQ4 |
+ * | | | +---------+ +-----+ | +------+
+ * | | | | TCPv4 |--->| TIR |--->| | RQ5 |
+ * | | | +---------+ +-----+ | RQT +------+
+ * +---------+ | | UDPv4 |--->| TIR |--->| | ... |
+ * | | | +---------+ +-----+ | | |
+ * +---------+ | | IPv6 |--->| TIR |--->| | |
+ * | promisc |--+ +---------+ +-----+ | | |
+ * +---------+ | IPv4 |--->| TIR |--->| | |
+ * +---------+ +-----+ +-----+------+
+ * | other |-+
+ * +---------+ |
+ * ....... | +-----+ +-----+
+ * +->| TIR |--->| RQ3 |
+ * +-----+ +-----+
+ *
+ * Note that the "promisc" flow entries are only set/enabled when promisc
+ * mode is enabled for the NIC. All promisc flow entries point directly at
+ * group 0's hashing flowtable (so all promisc-only traffic lands on group 0,
+ * the "default group" in MAC).
+ *
+ * The "default" entry in the L1 VLAN filter flow tables is used when there
+ * are no VLANs set for the group, to accept any traffic regardless of tag. It
+ * is deleted as soon as a VLAN filter is added (and re-instated if the
+ * last VLAN filter is removed).
+ *
+ * The actual descriptor ring structures for RX on Connect-X4 don't contain any
+ * space for packet data (they're a collection of scatter pointers only). TX
+ * descriptors contain some space for "inline headers" (and the card requires
+ * us to put at least the L2 Ethernet headers there for the eswitch to look at)
+ * but all the rest of the data comes from the gather pointers.
+ *
+ * When we get completions back they simply contain the ring index number of
+ * the WR (work request) which completed. So, we manage the buffers for actual
+ * packet data completely independently of the descriptors in this driver. When
+ * a WR is enqueued in a WQE (work queue entry), we stamp the packet data buffer
+ * with the WQE index that we put it at, and therefore don't have to look at
+ * the original descriptor at all when handling completions.
+ *
+ * For RX, we create sufficient packet data buffers to fill 150% of the
+ * available descriptors for each ring. These all are pre-set-up for DMA and
+ * have an mblk_t associated with them (with desballoc()).
+ *
+ * For TX we either borrow the mblk's memory and DMA bind it (if the packet is
+ * large enough), or we copy it into a pre-allocated buffer set up in the same
+ * as as for RX.
+ */
+
+/*
+ * Buffer lifecycle: RX
+ * --------------------
+ *
+ * The lifecycle of an mlxcx_buffer_t (packet buffer) used for RX is pretty
+ * straightforward.
+ *
+ * It is created (and has all its memory allocated) at the time of starting up
+ * the RX ring it belongs to. Then it is placed on the "free" list in the
+ * mlxcx_buffer_shard_t associated with its RQ. When mlxcx_rq_refill() wants
+ * more buffers to add to the RQ, it takes one off and marks it as "on WQ"
+ * before making a WQE for it.
+ *
+ * After a completion event occurs, the packet is either discarded (and the
+ * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ *
+ * Once MAC and the rest of the system have finished with the packet, they call
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
+ * buffer_t to the free list.
+ *
+ * At detach/teardown time, buffers are only every destroyed from the free list.
+ *
+ *
+ * +
+ * |
+ * | mlxcx_buf_create
+ * |
+ * v
+ * +----+----+
+ * | created |
+ * +----+----+
+ * |
+ * |
+ * | mlxcx_buf_return
+ * |
+ * v
+ * mlxcx_buf_destroy +----+----+
+ * +---------| free |<---------------+
+ * | +----+----+ |
+ * | | |
+ * | | | mlxcx_buf_return
+ * v | mlxcx_buf_take |
+ * +---+--+ v |
+ * | dead | +---+---+ |
+ * +------+ | on WQ |- - - - - - - - >O
+ * +---+---+ ^
+ * | |
+ * | |
+ * | mlxcx_buf_loan | mlxcx_buf_mp_return
+ * v |
+ * +-------+--------+ |
+ * | on loan to MAC |----------->O
+ * +----------------+ freemsg()
+ *
+ */
+
+/*
+ * Buffer lifecycle: TX
+ * --------------------
+ *
+ * mlxcx_buffer_ts used for TX are divided into two kinds: regular buffers, and
+ * "foreign" buffers.
+ *
+ * The former have their memory allocated and DMA bound by this driver, while
+ * the latter (the "foreign" buffers) are on loan from MAC. Their memory is
+ * not owned by us, though we do DMA bind it (and take responsibility for
+ * un-binding it when we're done with them).
+ *
+ * We use separate mlxcx_buf_shard_ts for foreign and local buffers on each
+ * SQ. Thus, there is a separate free list and mutex for each kind.
+ *
+ * Since a TX packet might consist of multiple mblks, we translate each mblk
+ * into exactly one buffer_t. The buffer_ts are chained together in the same
+ * order as the mblks, using the mlb_tx_chain/mlb_tx_chain_entry list_t.
+ *
+ * Each chain of TX buffers may consist of foreign or driver buffers, in any
+ * mixture.
+ *
+ * The head of a TX buffer chain has mlb_tx_head == itself, which distinguishes
+ * it from the rest of the chain buffers.
+ *
+ * TX buffer chains are always returned to the free list by
+ * mlxcx_buf_return_chain(), which takes care of walking the mlb_tx_chain and
+ * freeing all of the members.
+ *
+ * We only call freemsg() once, on the head of the TX buffer chain's original
+ * mblk. This is true whether we copied it or bound it in a foreign buffer.
+ */
+
+/*
+ * Startup and command interface
+ * -----------------------------
+ *
+ * The command interface is the primary way in which we give control orders to
+ * the hardware (e.g. actions like "create this queue" or "delete this flow
+ * entry"). The command interface is never used to transmit or receive packets
+ * -- that takes place only on the queues that are set up through it.
+ *
+ * In mlxcx_cmd.c we implement our use of the command interface on top of a
+ * simple taskq. Since it's not performance critical, we busy-wait on command
+ * completions and only process a single command at a time.
+ *
+ * If this becomes a problem later we can wire command completions up to EQ 0
+ * once we have interrupts running.
+ *
+ * The startup/attach process for this card involves a bunch of different steps
+ * which are summarised pretty well in the PRM. We have to send a number of
+ * commands which do different things to start the card up, give it some pages
+ * of our own memory for it to use, then start creating all the entities that
+ * we need to use like EQs, CQs, WQs, as well as their dependencies like PDs
+ * and TDoms.
+ */
+
+/*
+ * UARs
+ * ----
+ *
+ * The pages of the PCI BAR other than the first few are reserved for use as
+ * "UAR" sections in this device. Each UAR section can be used as a set of
+ * doorbells for our queues.
+ *
+ * Currently we just make one single UAR for all of our queues. It doesn't
+ * seem to be a major limitation yet.
+ *
+ * When we're sending packets through an SQ, the PRM is not awful clear about
+ * exactly how we're meant to use the first 16 bytes of the Blueflame buffers
+ * (it's clear on the pattern of alternation you're expected to use between
+ * even and odd for Blueflame sends, but not for regular doorbells).
+ *
+ * Currently we don't do the even-odd alternating pattern for ordinary
+ * doorbells, and we don't use Blueflame at all. This seems to work fine, at
+ * least on Connect-X4 Lx.
+ */
+
+/*
+ * Lock ordering
+ * -------------
+ *
+ * Interrupt side:
+ *
+ * - mleq_mtx
+ * - mlcq_mtx
+ * - mlcq_bufbmtx
+ * - mlwq_mtx
+ * - mlbs_mtx
+ * - mlp_mtx
+ *
+ * GLD side:
+ *
+ * - mlp_mtx
+ * - mlg_mtx
+ * - mlg_*.mlft_mtx
+ * - mlp_*.mlft_mtx
+ * - mlwq_mtx
+ * - mlbs_mtx
+ * - mlcq_bufbmtx
+ * - mleq_mtx
+ * - mlcq_mtx
+ *
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+
+#include <sys/mac_provider.h>
+
+#include <mlxcx.h>
+
+CTASSERT((1 << MLXCX_RX_HASH_FT_SIZE_SHIFT) >= MLXCX_TIRS_PER_GROUP);
+
+#define MLXCX_MODULE_NAME "mlxcx"
+/*
+ * We give this to the firmware, so it has to be in a fixed format that it
+ * understands.
+ */
+#define MLXCX_DRIVER_VERSION "illumos,mlxcx,1.0.0,1,000,000000"
+
+/*
+ * Firmware may take a while to reclaim pages. Try a set number of times.
+ */
+clock_t mlxcx_reclaim_delay = 1000 * 50; /* 50 ms in us */
+uint_t mlxcx_reclaim_tries = 100; /* Wait at most 5000ms */
+
+static void *mlxcx_softstate;
+
+/*
+ * Fault detection thresholds.
+ */
+uint_t mlxcx_doorbell_tries = MLXCX_DOORBELL_TRIES_DFLT;
+uint_t mlxcx_stuck_intr_count = MLXCX_STUCK_INTR_COUNT_DFLT;
+
+static void
+mlxcx_load_props(mlxcx_t *mlxp)
+{
+ mlxcx_drv_props_t *p = &mlxp->mlx_props;
+
+ p->mldp_eq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "eq_size_shift",
+ MLXCX_EQ_SIZE_SHIFT_DFLT);
+ p->mldp_cq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cq_size_shift",
+ MLXCX_CQ_SIZE_SHIFT_DFLT);
+ p->mldp_sq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "sq_size_shift",
+ MLXCX_SQ_SIZE_SHIFT_DFLT);
+ p->mldp_rq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rq_size_shift",
+ MLXCX_RQ_SIZE_SHIFT_DFLT);
+
+ p->mldp_cqemod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_period_usec",
+ MLXCX_CQEMOD_PERIOD_USEC_DFLT);
+ p->mldp_cqemod_count = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_count",
+ MLXCX_CQEMOD_COUNT_DFLT);
+ p->mldp_intrmod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "intrmod_period_usec",
+ MLXCX_INTRMOD_PERIOD_USEC_DFLT);
+
+ p->mldp_tx_ngroups = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_ngroups",
+ MLXCX_TX_NGROUPS_DFLT);
+ p->mldp_tx_nrings_per_group = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_nrings_per_group",
+ MLXCX_TX_NRINGS_PER_GROUP_DFLT);
+
+ p->mldp_rx_ngroups_large = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_large",
+ MLXCX_RX_NGROUPS_LARGE_DFLT);
+ p->mldp_rx_ngroups_small = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_small",
+ MLXCX_RX_NGROUPS_SMALL_DFLT);
+ p->mldp_rx_nrings_per_large_group = ddi_getprop(DDI_DEV_T_ANY,
+ mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
+ "rx_nrings_per_large_group", MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT);
+ p->mldp_rx_nrings_per_small_group = ddi_getprop(DDI_DEV_T_ANY,
+ mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
+ "rx_nrings_per_small_group", MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT);
+
+ p->mldp_ftbl_root_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_root_size_shift",
+ MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT);
+
+ p->mldp_tx_bind_threshold = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_bind_threshold",
+ MLXCX_TX_BIND_THRESHOLD_DFLT);
+
+ p->mldp_ftbl_vlan_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_vlan_size_shift",
+ MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT);
+
+ p->mldp_eq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
+ mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
+ "eq_check_interval_sec", MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT);
+ p->mldp_cq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
+ mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
+ "cq_check_interval_sec", MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT);
+ p->mldp_wq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
+ mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
+ "wq_check_interval_sec", MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT);
+}
+
+void
+mlxcx_note(mlxcx_t *mlxp, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (mlxp != NULL && mlxp->mlx_dip != NULL) {
+ vdev_err(mlxp->mlx_dip, CE_NOTE, fmt, ap);
+ } else {
+ vcmn_err(CE_NOTE, fmt, ap);
+ }
+ va_end(ap);
+}
+
+void
+mlxcx_warn(mlxcx_t *mlxp, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (mlxp != NULL && mlxp->mlx_dip != NULL) {
+ vdev_err(mlxp->mlx_dip, CE_WARN, fmt, ap);
+ } else {
+ vcmn_err(CE_WARN, fmt, ap);
+ }
+ va_end(ap);
+}
+
+void
+mlxcx_panic(mlxcx_t *mlxp, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (mlxp != NULL && mlxp->mlx_dip != NULL) {
+ vdev_err(mlxp->mlx_dip, CE_PANIC, fmt, ap);
+ } else {
+ vcmn_err(CE_PANIC, fmt, ap);
+ }
+ va_end(ap);
+}
+
+uint16_t
+mlxcx_get16(mlxcx_t *mlxp, uintptr_t off)
+{
+ uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
+ return (ddi_get16(mlxp->mlx_regs_handle, (void *)addr));
+}
+
+uint32_t
+mlxcx_get32(mlxcx_t *mlxp, uintptr_t off)
+{
+ uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
+ return (ddi_get32(mlxp->mlx_regs_handle, (void *)addr));
+}
+
+uint64_t
+mlxcx_get64(mlxcx_t *mlxp, uintptr_t off)
+{
+ uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
+ return (ddi_get64(mlxp->mlx_regs_handle, (void *)addr));
+}
+
+void
+mlxcx_put32(mlxcx_t *mlxp, uintptr_t off, uint32_t val)
+{
+ uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
+ ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
+}
+
+void
+mlxcx_put64(mlxcx_t *mlxp, uintptr_t off, uint64_t val)
+{
+ uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
+ ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
+}
+
+void
+mlxcx_uar_put32(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint32_t val)
+{
+ /*
+ * The UAR is always inside the first BAR, which we mapped as
+ * mlx_regs
+ */
+ uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
+ (uintptr_t)mlxp->mlx_regs_base;
+ ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
+}
+
+void
+mlxcx_uar_put64(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint64_t val)
+{
+ uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
+ (uintptr_t)mlxp->mlx_regs_base;
+ ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
+}
+
+static void
+mlxcx_fm_fini(mlxcx_t *mlxp)
+{
+ if (mlxp->mlx_fm_caps == 0)
+ return;
+
+ if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps))
+ ddi_fm_handler_unregister(mlxp->mlx_dip);
+
+ if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) ||
+ DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps))
+ pci_ereport_teardown(mlxp->mlx_dip);
+
+ ddi_fm_fini(mlxp->mlx_dip);
+
+ mlxp->mlx_fm_caps = 0;
+}
+
+void
+mlxcx_fm_ereport(mlxcx_t *mlxp, const char *detail)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+
+ if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
+ return;
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+ ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
+ NULL);
+}
+
+static int
+mlxcx_fm_errcb(dev_info_t *dip, ddi_fm_error_t *err, const void *arg)
+{
+ /*
+ * as the driver can always deal with an error in any dma or
+ * access handle, we can just return the fme_status value.
+ */
+ pci_ereport_post(dip, err, NULL);
+ return (err->fme_status);
+}
+
+static void
+mlxcx_fm_init(mlxcx_t *mlxp)
+{
+ ddi_iblock_cookie_t iblk;
+ int def = DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
+ DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE;
+
+ mlxp->mlx_fm_caps = ddi_prop_get_int(DDI_DEV_T_ANY, mlxp->mlx_dip,
+ DDI_PROP_DONTPASS, "fm_capable", def);
+
+ if (mlxp->mlx_fm_caps < 0) {
+ mlxp->mlx_fm_caps = 0;
+ }
+ mlxp->mlx_fm_caps &= def;
+
+ if (mlxp->mlx_fm_caps == 0)
+ return;
+
+ ddi_fm_init(mlxp->mlx_dip, &mlxp->mlx_fm_caps, &iblk);
+ if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) ||
+ DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) {
+ pci_ereport_setup(mlxp->mlx_dip);
+ }
+ if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) {
+ ddi_fm_handler_register(mlxp->mlx_dip, mlxcx_fm_errcb,
+ (void *)mlxp);
+ }
+}
+
+static void
+mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
+{
+ mlxcx_buffer_t *buf;
+
+ mutex_enter(&s->mlbs_mtx);
+ while (!list_is_empty(&s->mlbs_busy))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ while ((buf = list_head(&s->mlbs_free)) != NULL) {
+ mlxcx_buf_destroy(mlxp, buf);
+ }
+ list_destroy(&s->mlbs_free);
+ list_destroy(&s->mlbs_busy);
+ mutex_exit(&s->mlbs_mtx);
+
+ cv_destroy(&s->mlbs_free_nonempty);
+ mutex_destroy(&s->mlbs_mtx);
+}
+
+static void
+mlxcx_teardown_bufs(mlxcx_t *mlxp)
+{
+ mlxcx_buf_shard_t *s;
+
+ while ((s = list_remove_head(&mlxp->mlx_buf_shards)) != NULL) {
+ mlxcx_mlbs_teardown(mlxp, s);
+ kmem_free(s, sizeof (mlxcx_buf_shard_t));
+ }
+ list_destroy(&mlxp->mlx_buf_shards);
+
+ kmem_cache_destroy(mlxp->mlx_bufs_cache);
+}
+
+static void
+mlxcx_teardown_pages(mlxcx_t *mlxp)
+{
+ uint_t nzeros = 0;
+
+ mutex_enter(&mlxp->mlx_pagemtx);
+
+ while (mlxp->mlx_npages > 0) {
+ int32_t req, ret;
+ uint64_t pas[MLXCX_MANAGE_PAGES_MAX_PAGES];
+
+ ASSERT0(avl_is_empty(&mlxp->mlx_pages));
+ req = MIN(mlxp->mlx_npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
+
+ if (!mlxcx_cmd_return_pages(mlxp, req, pas, &ret)) {
+ mlxcx_warn(mlxp, "hardware refused to return pages, "
+ "leaking %u remaining pages", mlxp->mlx_npages);
+ goto out;
+ }
+
+ for (int32_t i = 0; i < ret; i++) {
+ mlxcx_dev_page_t *mdp, probe;
+ bzero(&probe, sizeof (probe));
+ probe.mxdp_pa = pas[i];
+
+ mdp = avl_find(&mlxp->mlx_pages, &probe, NULL);
+
+ if (mdp != NULL) {
+ avl_remove(&mlxp->mlx_pages, mdp);
+ mlxp->mlx_npages--;
+ mlxcx_dma_free(&mdp->mxdp_dma);
+ kmem_free(mdp, sizeof (mlxcx_dev_page_t));
+ } else {
+ mlxcx_panic(mlxp, "hardware returned a page "
+ "with PA 0x%" PRIx64 " but we have no "
+ "record of giving out such a page", pas[i]);
+ }
+ }
+
+ /*
+ * If no pages were returned, note that fact.
+ */
+ if (ret == 0) {
+ nzeros++;
+ if (nzeros > mlxcx_reclaim_tries) {
+ mlxcx_warn(mlxp, "hardware refused to return "
+ "pages, leaking %u remaining pages",
+ mlxp->mlx_npages);
+ goto out;
+ }
+ delay(drv_usectohz(mlxcx_reclaim_delay));
+ }
+ }
+
+ avl_destroy(&mlxp->mlx_pages);
+
+out:
+ mutex_exit(&mlxp->mlx_pagemtx);
+ mutex_destroy(&mlxp->mlx_pagemtx);
+}
+
+static boolean_t
+mlxcx_eq_alloc_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ boolean_t ret;
+ size_t sz, i;
+
+ VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC);
+
+ mleq->mleq_entshift = mlxp->mlx_props.mldp_eq_size_shift;
+ mleq->mleq_nents = (1 << mleq->mleq_entshift);
+ sz = mleq->mleq_nents * sizeof (mlxcx_eventq_ent_t);
+ ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0);
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_queue_attr(mlxp, &attr);
+
+ ret = mlxcx_dma_alloc(mlxp, &mleq->mleq_dma, &attr, &acc,
+ B_TRUE, sz, B_TRUE);
+ if (!ret) {
+ mlxcx_warn(mlxp, "failed to allocate EQ memory");
+ return (B_FALSE);
+ }
+
+ mleq->mleq_ent = (mlxcx_eventq_ent_t *)mleq->mleq_dma.mxdb_va;
+
+ for (i = 0; i < mleq->mleq_nents; ++i)
+ mleq->mleq_ent[i].mleqe_owner = MLXCX_EQ_OWNER_INIT;
+
+ mleq->mleq_state |= MLXCX_EQ_ALLOC;
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_eq_rele_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC);
+ if (mleq->mleq_state & MLXCX_EQ_CREATED)
+ VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED);
+
+ mlxcx_dma_free(&mleq->mleq_dma);
+ mleq->mleq_ent = NULL;
+
+ mleq->mleq_state &= ~MLXCX_EQ_ALLOC;
+}
+
+void
+mlxcx_teardown_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *ft)
+{
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe;
+ int i;
+
+ ASSERT(mutex_owned(&ft->mlft_mtx));
+
+ for (i = ft->mlft_nents - 1; i >= 0; --i) {
+ fe = &ft->mlft_ent[i];
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
+ mlxcx_panic(mlxp, "failed to delete flow "
+ "entry %u on table %u", i,
+ ft->mlft_num);
+ }
+ }
+ }
+
+ while ((fg = list_remove_head(&ft->mlft_groups)) != NULL) {
+ if (fg->mlfg_state & MLXCX_FLOW_GROUP_CREATED &&
+ !(fg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_flow_group(mlxp, fg)) {
+ mlxcx_panic(mlxp, "failed to destroy flow "
+ "group %u", fg->mlfg_num);
+ }
+ }
+ kmem_free(fg, sizeof (mlxcx_flow_group_t));
+ }
+ list_destroy(&ft->mlft_groups);
+ if (ft->mlft_state & MLXCX_FLOW_TABLE_CREATED &&
+ !(ft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_flow_table(mlxp, ft)) {
+ mlxcx_panic(mlxp, "failed to destroy flow table %u",
+ ft->mlft_num);
+ }
+ }
+ kmem_free(ft->mlft_ent, ft->mlft_entsize);
+ ft->mlft_ent = NULL;
+ mutex_exit(&ft->mlft_mtx);
+ mutex_destroy(&ft->mlft_mtx);
+ kmem_free(ft, sizeof (mlxcx_flow_table_t));
+}
+
+static void
+mlxcx_teardown_ports(mlxcx_t *mlxp)
+{
+ uint_t i;
+ mlxcx_port_t *p;
+ mlxcx_flow_table_t *ft;
+
+ for (i = 0; i < mlxp->mlx_nports; ++i) {
+ p = &mlxp->mlx_ports[i];
+ if (!(p->mlp_init & MLXCX_PORT_INIT))
+ continue;
+ mutex_enter(&p->mlp_mtx);
+ if ((ft = p->mlp_rx_flow) != NULL) {
+ mutex_enter(&ft->mlft_mtx);
+ /*
+ * teardown_flow_table() will destroy the mutex, so
+ * we don't release it here.
+ */
+ mlxcx_teardown_flow_table(mlxp, ft);
+ }
+ mutex_exit(&p->mlp_mtx);
+ mutex_destroy(&p->mlp_mtx);
+ p->mlp_init &= ~MLXCX_PORT_INIT;
+ }
+
+ kmem_free(mlxp->mlx_ports, mlxp->mlx_ports_size);
+ mlxp->mlx_ports = NULL;
+}
+
+static void
+mlxcx_teardown_wqs(mlxcx_t *mlxp)
+{
+ mlxcx_work_queue_t *mlwq;
+
+ while ((mlwq = list_head(&mlxp->mlx_wqs)) != NULL) {
+ mlxcx_wq_teardown(mlxp, mlwq);
+ }
+ list_destroy(&mlxp->mlx_wqs);
+}
+
+static void
+mlxcx_teardown_cqs(mlxcx_t *mlxp)
+{
+ mlxcx_completion_queue_t *mlcq;
+
+ while ((mlcq = list_head(&mlxp->mlx_cqs)) != NULL) {
+ mlxcx_cq_teardown(mlxp, mlcq);
+ }
+ list_destroy(&mlxp->mlx_cqs);
+}
+
+static void
+mlxcx_teardown_eqs(mlxcx_t *mlxp)
+{
+ mlxcx_event_queue_t *mleq;
+ uint_t i;
+
+ for (i = 0; i < mlxp->mlx_intr_count; ++i) {
+ mleq = &mlxp->mlx_eqs[i];
+ mutex_enter(&mleq->mleq_mtx);
+ if ((mleq->mleq_state & MLXCX_EQ_CREATED) &&
+ !(mleq->mleq_state & MLXCX_EQ_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_eq(mlxp, mleq)) {
+ mlxcx_warn(mlxp, "failed to destroy "
+ "event queue idx %u eqn %u",
+ i, mleq->mleq_num);
+ }
+ }
+ if (mleq->mleq_state & MLXCX_EQ_ALLOC) {
+ mlxcx_eq_rele_dma(mlxp, mleq);
+ }
+ mutex_exit(&mleq->mleq_mtx);
+ }
+}
+
+static void
+mlxcx_teardown_checktimers(mlxcx_t *mlxp)
+{
+ if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0)
+ ddi_periodic_delete(mlxp->mlx_eq_checktimer);
+ if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0)
+ ddi_periodic_delete(mlxp->mlx_cq_checktimer);
+ if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0)
+ ddi_periodic_delete(mlxp->mlx_wq_checktimer);
+}
+
+static void
+mlxcx_teardown(mlxcx_t *mlxp)
+{
+ uint_t i;
+ dev_info_t *dip = mlxp->mlx_dip;
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_GROUPS) {
+ mlxcx_teardown_groups(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_GROUPS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_CHKTIMERS) {
+ mlxcx_teardown_checktimers(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_CHKTIMERS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_WQS) {
+ mlxcx_teardown_wqs(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_WQS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_CQS) {
+ mlxcx_teardown_cqs(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_CQS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_BUFS) {
+ mlxcx_teardown_bufs(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_BUFS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_PORTS) {
+ mlxcx_teardown_ports(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_PORTS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_INTRS) {
+ mlxcx_teardown_eqs(mlxp);
+ mlxcx_intr_teardown(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_INTRS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_UAR_PD_TD) {
+ if (mlxp->mlx_uar.mlu_allocated) {
+ if (!mlxcx_cmd_dealloc_uar(mlxp, &mlxp->mlx_uar)) {
+ mlxcx_warn(mlxp, "failed to release UAR");
+ }
+ for (i = 0; i < MLXCX_BF_PER_UAR; ++i)
+ mutex_destroy(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx);
+ }
+ if (mlxp->mlx_pd.mlpd_allocated &&
+ !mlxcx_cmd_dealloc_pd(mlxp, &mlxp->mlx_pd)) {
+ mlxcx_warn(mlxp, "failed to release PD");
+ }
+ if (mlxp->mlx_tdom.mltd_allocated &&
+ !mlxcx_cmd_dealloc_tdom(mlxp, &mlxp->mlx_tdom)) {
+ mlxcx_warn(mlxp, "failed to release TDOM");
+ }
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_UAR_PD_TD;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_INIT_HCA) {
+ if (!mlxcx_cmd_teardown_hca(mlxp)) {
+ mlxcx_warn(mlxp, "failed to send teardown HCA "
+ "command during device detach");
+ }
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_INIT_HCA;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_PAGE_LIST) {
+ mlxcx_teardown_pages(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_PAGE_LIST;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_ENABLE_HCA) {
+ if (!mlxcx_cmd_disable_hca(mlxp)) {
+ mlxcx_warn(mlxp, "failed to send DISABLE HCA command "
+ "during device detach");
+ }
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_ENABLE_HCA;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_CMD) {
+ mlxcx_cmd_queue_fini(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_CMD;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_CAPS) {
+ kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t));
+ mlxp->mlx_caps = NULL;
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_CAPS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_REGS) {
+ ddi_regs_map_free(&mlxp->mlx_regs_handle);
+ mlxp->mlx_regs_handle = NULL;
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_REGS;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_PCI_CONFIG) {
+ pci_config_teardown(&mlxp->mlx_cfg_handle);
+ mlxp->mlx_cfg_handle = NULL;
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_PCI_CONFIG;
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_FM) {
+ mlxcx_fm_fini(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_FM;
+ }
+
+ VERIFY3S(mlxp->mlx_attach, ==, 0);
+ ddi_soft_state_free(mlxcx_softstate, mlxp->mlx_inst);
+ ddi_set_driver_private(dip, NULL);
+}
+
+static boolean_t
+mlxcx_regs_map(mlxcx_t *mlxp)
+{
+ off_t memsize;
+ int ret;
+ ddi_device_acc_attr_t da;
+
+ if (ddi_dev_regsize(mlxp->mlx_dip, MLXCX_REG_NUMBER, &memsize) !=
+ DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "failed to get register set size");
+ return (B_FALSE);
+ }
+
+ /*
+ * All data in the main BAR is kept in big-endian even though it's a PCI
+ * device.
+ */
+ bzero(&da, sizeof (ddi_device_acc_attr_t));
+ da.devacc_attr_version = DDI_DEVICE_ATTR_V0;
+ da.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
+ da.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
+ if (DDI_FM_ACC_ERR_CAP(mlxp->mlx_fm_caps)) {
+ da.devacc_attr_access = DDI_FLAGERR_ACC;
+ } else {
+ da.devacc_attr_access = DDI_DEFAULT_ACC;
+ }
+
+ ret = ddi_regs_map_setup(mlxp->mlx_dip, MLXCX_REG_NUMBER,
+ &mlxp->mlx_regs_base, 0, memsize, &da, &mlxp->mlx_regs_handle);
+
+ if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "failed to map device registers: %d", ret);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+mlxcx_check_issi(mlxcx_t *mlxp)
+{
+ uint32_t issi;
+
+ if (!mlxcx_cmd_query_issi(mlxp, &issi)) {
+ mlxcx_warn(mlxp, "failed to get ISSI");
+ return (B_FALSE);
+ }
+
+ if ((issi & (1 << MLXCX_CURRENT_ISSI)) == 0) {
+ mlxcx_warn(mlxp, "hardware does not support software ISSI, "
+ "hw vector 0x%x, sw version %u", issi, MLXCX_CURRENT_ISSI);
+ return (B_FALSE);
+ }
+
+ if (!mlxcx_cmd_set_issi(mlxp, MLXCX_CURRENT_ISSI)) {
+ mlxcx_warn(mlxp, "failed to set ISSI to %u",
+ MLXCX_CURRENT_ISSI);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_give_pages(mlxcx_t *mlxp, int32_t npages)
+{
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ int32_t i;
+ list_t plist;
+ mlxcx_dev_page_t *mdp;
+ const ddi_dma_cookie_t *ck;
+
+ /*
+ * If there are no pages required, then we're done here.
+ */
+ if (npages <= 0) {
+ return (B_TRUE);
+ }
+
+ list_create(&plist, sizeof (mlxcx_dev_page_t),
+ offsetof(mlxcx_dev_page_t, mxdp_list));
+
+ for (i = 0; i < npages; i++) {
+ mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP);
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_page_attr(mlxp, &attr);
+ if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc,
+ B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) {
+ mlxcx_warn(mlxp, "failed to allocate 4k page %u/%u", i,
+ npages);
+ kmem_free(mdp, sizeof (mlxcx_dev_page_t));
+ goto cleanup_npages;
+ }
+ ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma);
+ mdp->mxdp_pa = ck->dmac_laddress;
+
+ list_insert_tail(&plist, mdp);
+ }
+
+ /*
+ * Now that all of the pages have been allocated, given them to hardware
+ * in chunks.
+ */
+ while (npages > 0) {
+ mlxcx_dev_page_t *pages[MLXCX_MANAGE_PAGES_MAX_PAGES];
+ int32_t togive = MIN(MLXCX_MANAGE_PAGES_MAX_PAGES, npages);
+
+ for (i = 0; i < togive; i++) {
+ pages[i] = list_remove_head(&plist);
+ }
+
+ if (!mlxcx_cmd_give_pages(mlxp,
+ MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, togive, pages)) {
+ mlxcx_warn(mlxp, "!hardware refused our gift of %u "
+ "pages!", togive);
+ for (i = 0; i < togive; i++) {
+ list_insert_tail(&plist, pages[i]);
+ }
+ goto cleanup_npages;
+ }
+
+ mutex_enter(&mlxp->mlx_pagemtx);
+ for (i = 0; i < togive; i++) {
+ avl_add(&mlxp->mlx_pages, pages[i]);
+ }
+ mlxp->mlx_npages += togive;
+ mutex_exit(&mlxp->mlx_pagemtx);
+ npages -= togive;
+ }
+
+ list_destroy(&plist);
+
+ return (B_TRUE);
+
+cleanup_npages:
+ while ((mdp = list_remove_head(&plist)) != NULL) {
+ mlxcx_dma_free(&mdp->mxdp_dma);
+ kmem_free(mdp, sizeof (mlxcx_dev_page_t));
+ }
+ list_destroy(&plist);
+ return (B_FALSE);
+}
+
+static boolean_t
+mlxcx_init_pages(mlxcx_t *mlxp, uint_t type)
+{
+ int32_t npages;
+
+ if (!mlxcx_cmd_query_pages(mlxp, type, &npages)) {
+ mlxcx_warn(mlxp, "failed to determine boot pages");
+ return (B_FALSE);
+ }
+
+ return (mlxcx_give_pages(mlxp, npages));
+}
+
+static int
+mlxcx_bufs_cache_constr(void *arg, void *cookie, int kmflags)
+{
+ mlxcx_t *mlxp = cookie;
+ mlxcx_buffer_t *b = arg;
+
+ bzero(b, sizeof (mlxcx_buffer_t));
+ b->mlb_mlx = mlxp;
+ b->mlb_state = MLXCX_BUFFER_INIT;
+ list_create(&b->mlb_tx_chain, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_tx_chain_entry));
+
+ return (0);
+}
+
+static void
+mlxcx_bufs_cache_destr(void *arg, void *cookie)
+{
+ mlxcx_t *mlxp = cookie;
+ mlxcx_buffer_t *b = arg;
+ VERIFY3P(b->mlb_mlx, ==, mlxp);
+ VERIFY(b->mlb_state == MLXCX_BUFFER_INIT);
+ list_destroy(&b->mlb_tx_chain);
+}
+
+mlxcx_buf_shard_t *
+mlxcx_mlbs_create(mlxcx_t *mlxp)
+{
+ mlxcx_buf_shard_t *s;
+
+ s = kmem_zalloc(sizeof (mlxcx_buf_shard_t), KM_SLEEP);
+
+ mutex_init(&s->mlbs_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ list_create(&s->mlbs_busy, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
+ cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
+
+ list_insert_tail(&mlxp->mlx_buf_shards, s);
+
+ return (s);
+}
+
+static boolean_t
+mlxcx_setup_bufs(mlxcx_t *mlxp)
+{
+ char namebuf[KSTAT_STRLEN];
+
+ (void) snprintf(namebuf, KSTAT_STRLEN, "mlxcx%d_bufs_cache",
+ ddi_get_instance(mlxp->mlx_dip));
+ mlxp->mlx_bufs_cache = kmem_cache_create(namebuf,
+ sizeof (mlxcx_buffer_t), sizeof (uint64_t),
+ mlxcx_bufs_cache_constr, mlxcx_bufs_cache_destr,
+ NULL, mlxp, NULL, 0);
+
+ list_create(&mlxp->mlx_buf_shards, sizeof (mlxcx_buf_shard_t),
+ offsetof(mlxcx_buf_shard_t, mlbs_entry));
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_fm_qstate_ereport(mlxcx_t *mlxp, const char *qtype, uint32_t qnum,
+ const char *state, uint8_t statenum)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+
+ if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
+ return;
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
+ MLXCX_FM_SERVICE_MLXCX, "qstate.err");
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+
+ ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
+ "state", DATA_TYPE_STRING, state,
+ "state_num", DATA_TYPE_UINT8, statenum,
+ "qtype", DATA_TYPE_STRING, qtype,
+ "qnum", DATA_TYPE_UINT32, qnum,
+ NULL);
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED);
+}
+
+static void
+mlxcx_eq_check(void *arg)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_event_queue_t *eq;
+ mlxcx_eventq_ctx_t ctx;
+ const char *str;
+
+ uint_t i;
+
+ for (i = 0; i < mlxp->mlx_intr_count; ++i) {
+ eq = &mlxp->mlx_eqs[i];
+ if (!(eq->mleq_state & MLXCX_EQ_CREATED) ||
+ (eq->mleq_state & MLXCX_EQ_DESTROYED))
+ continue;
+ mutex_enter(&eq->mleq_mtx);
+ if (!mlxcx_cmd_query_eq(mlxp, eq, &ctx)) {
+ mutex_exit(&eq->mleq_mtx);
+ continue;
+ }
+
+ str = "???";
+ switch (ctx.mleqc_status) {
+ case MLXCX_EQ_STATUS_OK:
+ break;
+ case MLXCX_EQ_STATUS_WRITE_FAILURE:
+ str = "WRITE_FAILURE";
+ break;
+ }
+ if (ctx.mleqc_status != MLXCX_EQ_STATUS_OK) {
+ mlxcx_fm_qstate_ereport(mlxp, "event",
+ eq->mleq_num, str, ctx.mleqc_status);
+ mlxcx_warn(mlxp, "EQ %u is in bad status: %x (%s)",
+ eq->mleq_intr_index, ctx.mleqc_status, str);
+ }
+
+ if (ctx.mleqc_state != MLXCX_EQ_ST_ARMED &&
+ (eq->mleq_state & MLXCX_EQ_ARMED)) {
+ if (eq->mleq_cc == eq->mleq_check_disarm_cc &&
+ ++eq->mleq_check_disarm_cnt >= 3) {
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL);
+ mlxcx_warn(mlxp, "EQ %u isn't armed",
+ eq->mleq_intr_index);
+ }
+ eq->mleq_check_disarm_cc = eq->mleq_cc;
+ } else {
+ eq->mleq_check_disarm_cc = 0;
+ eq->mleq_check_disarm_cnt = 0;
+ }
+
+ mutex_exit(&eq->mleq_mtx);
+ }
+}
+
+static void
+mlxcx_cq_check(void *arg)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_completionq_ctx_t ctx;
+ const char *str, *type;
+ uint_t v;
+
+ for (cq = list_head(&mlxp->mlx_cqs); cq != NULL;
+ cq = list_next(&mlxp->mlx_cqs, cq)) {
+ mutex_enter(&cq->mlcq_mtx);
+ if (!(cq->mlcq_state & MLXCX_CQ_CREATED) ||
+ (cq->mlcq_state & MLXCX_CQ_DESTROYED) ||
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN)) {
+ mutex_exit(&cq->mlcq_mtx);
+ continue;
+ }
+ if (cq->mlcq_fm_repd_qstate) {
+ mutex_exit(&cq->mlcq_mtx);
+ continue;
+ }
+ if (!mlxcx_cmd_query_cq(mlxp, cq, &ctx)) {
+ mutex_exit(&cq->mlcq_mtx);
+ continue;
+ }
+ if (cq->mlcq_wq != NULL) {
+ mlxcx_work_queue_t *wq = cq->mlcq_wq;
+ if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ)
+ type = "rx ";
+ else if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ)
+ type = "tx ";
+ else
+ type = "";
+ } else {
+ type = "";
+ }
+
+ str = "???";
+ v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATUS);
+ switch (v) {
+ case MLXCX_CQC_STATUS_OK:
+ break;
+ case MLXCX_CQC_STATUS_OVERFLOW:
+ str = "OVERFLOW";
+ break;
+ case MLXCX_CQC_STATUS_WRITE_FAIL:
+ str = "WRITE_FAIL";
+ break;
+ case MLXCX_CQC_STATUS_INVALID:
+ str = "INVALID";
+ break;
+ }
+ if (v != MLXCX_CQC_STATUS_OK) {
+ mlxcx_fm_qstate_ereport(mlxp, "completion",
+ cq->mlcq_num, str, v);
+ mlxcx_warn(mlxp, "%sCQ 0x%x is in bad status: %x (%s)",
+ type, cq->mlcq_num, v, str);
+ cq->mlcq_fm_repd_qstate = B_TRUE;
+ }
+
+ v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATE);
+ if (v != MLXCX_CQC_STATE_ARMED &&
+ (cq->mlcq_state & MLXCX_CQ_ARMED) &&
+ !(cq->mlcq_state & MLXCX_CQ_POLLING)) {
+ if (cq->mlcq_cc == cq->mlcq_check_disarm_cc &&
+ ++cq->mlcq_check_disarm_cnt >= 3) {
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL);
+ mlxcx_warn(mlxp, "%sCQ 0x%x (%p) isn't armed",
+ type, cq->mlcq_num, cq);
+ }
+ cq->mlcq_check_disarm_cc = cq->mlcq_cc;
+ } else {
+ cq->mlcq_check_disarm_cnt = 0;
+ cq->mlcq_check_disarm_cc = 0;
+ }
+ mutex_exit(&cq->mlcq_mtx);
+ }
+}
+
+void
+mlxcx_check_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *sq)
+{
+ mlxcx_sq_ctx_t ctx;
+ mlxcx_sq_state_t state;
+
+ ASSERT(mutex_owned(&sq->mlwq_mtx));
+
+ if (!mlxcx_cmd_query_sq(mlxp, sq, &ctx))
+ return;
+
+ ASSERT3U(from_be24(ctx.mlsqc_cqn), ==, sq->mlwq_cq->mlcq_num);
+ state = get_bits32(ctx.mlsqc_flags, MLXCX_SQ_STATE);
+ switch (state) {
+ case MLXCX_SQ_STATE_RST:
+ if (sq->mlwq_state & MLXCX_WQ_STARTED) {
+ mlxcx_fm_qstate_ereport(mlxp, "send",
+ sq->mlwq_num, "RST", state);
+ sq->mlwq_fm_repd_qstate = B_TRUE;
+ }
+ break;
+ case MLXCX_SQ_STATE_RDY:
+ if (!(sq->mlwq_state & MLXCX_WQ_STARTED)) {
+ mlxcx_fm_qstate_ereport(mlxp, "send",
+ sq->mlwq_num, "RDY", state);
+ sq->mlwq_fm_repd_qstate = B_TRUE;
+ }
+ break;
+ case MLXCX_SQ_STATE_ERR:
+ mlxcx_fm_qstate_ereport(mlxp, "send",
+ sq->mlwq_num, "ERR", state);
+ sq->mlwq_fm_repd_qstate = B_TRUE;
+ break;
+ default:
+ mlxcx_fm_qstate_ereport(mlxp, "send",
+ sq->mlwq_num, "???", state);
+ sq->mlwq_fm_repd_qstate = B_TRUE;
+ break;
+ }
+}
+
+void
+mlxcx_check_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *rq)
+{
+ mlxcx_rq_ctx_t ctx;
+ mlxcx_rq_state_t state;
+
+ ASSERT(mutex_owned(&rq->mlwq_mtx));
+
+ if (!mlxcx_cmd_query_rq(mlxp, rq, &ctx))
+ return;
+
+ ASSERT3U(from_be24(ctx.mlrqc_cqn), ==, rq->mlwq_cq->mlcq_num);
+ state = get_bits32(ctx.mlrqc_flags, MLXCX_RQ_STATE);
+ switch (state) {
+ case MLXCX_RQ_STATE_RST:
+ if (rq->mlwq_state & MLXCX_WQ_STARTED) {
+ mlxcx_fm_qstate_ereport(mlxp, "receive",
+ rq->mlwq_num, "RST", state);
+ rq->mlwq_fm_repd_qstate = B_TRUE;
+ }
+ break;
+ case MLXCX_RQ_STATE_RDY:
+ if (!(rq->mlwq_state & MLXCX_WQ_STARTED)) {
+ mlxcx_fm_qstate_ereport(mlxp, "receive",
+ rq->mlwq_num, "RDY", state);
+ rq->mlwq_fm_repd_qstate = B_TRUE;
+ }
+ break;
+ case MLXCX_RQ_STATE_ERR:
+ mlxcx_fm_qstate_ereport(mlxp, "receive",
+ rq->mlwq_num, "ERR", state);
+ rq->mlwq_fm_repd_qstate = B_TRUE;
+ break;
+ default:
+ mlxcx_fm_qstate_ereport(mlxp, "receive",
+ rq->mlwq_num, "???", state);
+ rq->mlwq_fm_repd_qstate = B_TRUE;
+ break;
+ }
+}
+
+static void
+mlxcx_wq_check(void *arg)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_work_queue_t *wq;
+
+ for (wq = list_head(&mlxp->mlx_wqs); wq != NULL;
+ wq = list_next(&mlxp->mlx_wqs, wq)) {
+ mutex_enter(&wq->mlwq_mtx);
+ if (!(wq->mlwq_state & MLXCX_WQ_CREATED) ||
+ (wq->mlwq_state & MLXCX_WQ_DESTROYED) ||
+ (wq->mlwq_state & MLXCX_WQ_TEARDOWN)) {
+ mutex_exit(&wq->mlwq_mtx);
+ continue;
+ }
+ if (wq->mlwq_fm_repd_qstate) {
+ mutex_exit(&wq->mlwq_mtx);
+ continue;
+ }
+ switch (wq->mlwq_type) {
+ case MLXCX_WQ_TYPE_SENDQ:
+ mlxcx_check_sq(mlxp, wq);
+ break;
+ case MLXCX_WQ_TYPE_RECVQ:
+ mlxcx_check_rq(mlxp, wq);
+ break;
+ }
+ mutex_exit(&wq->mlwq_mtx);
+ }
+}
+
+static boolean_t
+mlxcx_setup_checktimers(mlxcx_t *mlxp)
+{
+ if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0) {
+ mlxp->mlx_eq_checktimer = ddi_periodic_add(mlxcx_eq_check, mlxp,
+ mlxp->mlx_props.mldp_eq_check_interval_sec * NANOSEC,
+ DDI_IPL_0);
+ }
+ if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0) {
+ mlxp->mlx_cq_checktimer = ddi_periodic_add(mlxcx_cq_check, mlxp,
+ mlxp->mlx_props.mldp_cq_check_interval_sec * NANOSEC,
+ DDI_IPL_0);
+ }
+ if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0) {
+ mlxp->mlx_wq_checktimer = ddi_periodic_add(mlxcx_wq_check, mlxp,
+ mlxp->mlx_props.mldp_wq_check_interval_sec * NANOSEC,
+ DDI_IPL_0);
+ }
+ return (B_TRUE);
+}
+
+int
+mlxcx_dmac_fe_compare(const void *arg0, const void *arg1)
+{
+ const mlxcx_flow_entry_t *left = arg0;
+ const mlxcx_flow_entry_t *right = arg1;
+ int bcmpr;
+
+ bcmpr = memcmp(left->mlfe_dmac, right->mlfe_dmac,
+ sizeof (left->mlfe_dmac));
+ if (bcmpr < 0)
+ return (-1);
+ if (bcmpr > 0)
+ return (1);
+ if (left->mlfe_vid < right->mlfe_vid)
+ return (-1);
+ if (left->mlfe_vid > right->mlfe_vid)
+ return (1);
+ return (0);
+}
+
+int
+mlxcx_grmac_compare(const void *arg0, const void *arg1)
+{
+ const mlxcx_group_mac_t *left = arg0;
+ const mlxcx_group_mac_t *right = arg1;
+ int bcmpr;
+
+ bcmpr = memcmp(left->mlgm_mac, right->mlgm_mac,
+ sizeof (left->mlgm_mac));
+ if (bcmpr < 0)
+ return (-1);
+ if (bcmpr > 0)
+ return (1);
+ return (0);
+}
+
+int
+mlxcx_page_compare(const void *arg0, const void *arg1)
+{
+ const mlxcx_dev_page_t *p0 = arg0;
+ const mlxcx_dev_page_t *p1 = arg1;
+
+ if (p0->mxdp_pa < p1->mxdp_pa)
+ return (-1);
+ if (p0->mxdp_pa > p1->mxdp_pa)
+ return (1);
+ return (0);
+}
+
+static boolean_t
+mlxcx_setup_ports(mlxcx_t *mlxp)
+{
+ uint_t i, j;
+ mlxcx_port_t *p;
+ mlxcx_flow_table_t *ft;
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe;
+
+ VERIFY3U(mlxp->mlx_nports, >, 0);
+ mlxp->mlx_ports_size = mlxp->mlx_nports * sizeof (mlxcx_port_t);
+ mlxp->mlx_ports = kmem_zalloc(mlxp->mlx_ports_size, KM_SLEEP);
+
+ for (i = 0; i < mlxp->mlx_nports; ++i) {
+ p = &mlxp->mlx_ports[i];
+ p->mlp_num = i;
+ p->mlp_init |= MLXCX_PORT_INIT;
+ mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ mutex_enter(&p->mlp_mtx);
+ if (!mlxcx_cmd_query_nic_vport_ctx(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ if (!mlxcx_cmd_query_port_mtu(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ if (!mlxcx_cmd_query_port_status(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ if (!mlxcx_cmd_query_port_speed(mlxp, p)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, p,
+ MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC)) {
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+
+ mutex_exit(&p->mlp_mtx);
+ }
+
+ for (i = 0; i < mlxp->mlx_nports; ++i) {
+ p = &mlxp->mlx_ports[i];
+ mutex_enter(&p->mlp_mtx);
+ p->mlp_rx_flow = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t),
+ KM_SLEEP));
+ mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX;
+ ft->mlft_port = p;
+ ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_root_size_shift;
+ if (ft->mlft_entshift > mlxp->mlx_caps->mlc_max_rx_ft_shift)
+ ft->mlft_entshift = mlxp->mlx_caps->mlc_max_rx_ft_shift;
+ ft->mlft_nents = (1 << ft->mlft_entshift);
+ ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t);
+ ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP);
+ list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t),
+ offsetof(mlxcx_flow_group_t, mlfg_entry));
+
+ for (j = 0; j < ft->mlft_nents; ++j) {
+ ft->mlft_ent[j].mlfe_table = ft;
+ ft->mlft_ent[j].mlfe_index = j;
+ }
+
+ if (!mlxcx_cmd_create_flow_table(mlxp, ft)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+
+ if (!mlxcx_cmd_set_flow_table_root(mlxp, ft)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+
+ /*
+ * We match broadcast at the top of the root flow table, then
+ * all multicast/unicast MACs, then the promisc entry is down
+ * the very bottom.
+ *
+ * This way when promisc is on, that entry simply catches any
+ * remaining traffic that earlier flows haven't matched.
+ */
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_bcast = fg;
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ (void) memset(fe->mlfe_dmac, 0xff, sizeof (fe->mlfe_dmac));
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = ft->mlft_nents - 2;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_umcast = fg;
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ goto err;
+ }
+ p->mlp_promisc = fg;
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+
+ avl_create(&p->mlp_dmac_fe, mlxcx_dmac_fe_compare,
+ sizeof (mlxcx_flow_entry_t), offsetof(mlxcx_flow_entry_t,
+ mlfe_dmac_entry));
+
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&p->mlp_mtx);
+ }
+
+ return (B_TRUE);
+
+err:
+ mlxcx_teardown_ports(mlxp);
+ return (B_FALSE);
+}
+
+void
+mlxcx_remove_all_vlan_entries(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
+ mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
+ mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
+ mlxcx_flow_entry_t *fe;
+ mlxcx_group_vlan_t *v;
+
+ ASSERT(mutex_owned(&g->mlg_mtx));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ if (!list_is_empty(&g->mlg_rx_vlans)) {
+ fe = list_head(&dfg->mlfg_entries);
+ (void) mlxcx_cmd_set_flow_table_entry(mlxp, fe);
+ }
+
+ while ((v = list_remove_head(&g->mlg_rx_vlans)) != NULL) {
+ fe = v->mlgv_fe;
+ ASSERT3P(fe->mlfe_table, ==, ft);
+ ASSERT3P(fe->mlfe_group, ==, fg);
+ kmem_free(v, sizeof (mlxcx_group_vlan_t));
+
+ (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+}
+
+boolean_t
+mlxcx_remove_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
+ boolean_t tagged, uint16_t vid)
+{
+ mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
+ mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
+ mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
+ mlxcx_flow_entry_t *fe;
+ mlxcx_group_vlan_t *v;
+ boolean_t found = B_FALSE;
+
+ ASSERT(mutex_owned(&g->mlg_mtx));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ for (v = list_head(&g->mlg_rx_vlans); v != NULL;
+ v = list_next(&g->mlg_rx_vlans, v)) {
+ if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+
+ list_remove(&g->mlg_rx_vlans, v);
+
+ /*
+ * If this is the last VLAN entry, we have to go back to accepting
+ * any VLAN (which means re-enabling the default entry).
+ *
+ * Do this before we remove the flow entry for the last specific
+ * VLAN so that we don't lose any traffic in the transition.
+ */
+ if (list_is_empty(&g->mlg_rx_vlans)) {
+ fe = list_head(&dfg->mlfg_entries);
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ list_insert_tail(&g->mlg_rx_vlans, v);
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+ }
+
+ fe = v->mlgv_fe;
+ ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED);
+ ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED);
+ ASSERT3P(fe->mlfe_table, ==, ft);
+ ASSERT3P(fe->mlfe_group, ==, fg);
+
+ if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
+ list_insert_tail(&g->mlg_rx_vlans, v);
+ fe = list_head(&dfg->mlfg_entries);
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
+ }
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+
+ kmem_free(v, sizeof (mlxcx_group_vlan_t));
+
+ mutex_exit(&ft->mlft_mtx);
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_add_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g, boolean_t tagged,
+ uint16_t vid)
+{
+ mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
+ mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
+ mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
+ mlxcx_flow_entry_t *fe;
+ mlxcx_group_vlan_t *v;
+ boolean_t found = B_FALSE;
+ boolean_t first = B_FALSE;
+
+ ASSERT(mutex_owned(&g->mlg_mtx));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ for (v = list_head(&g->mlg_rx_vlans); v != NULL;
+ v = list_next(&g->mlg_rx_vlans, v)) {
+ if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_TRUE);
+ }
+ }
+ if (list_is_empty(&g->mlg_rx_vlans))
+ first = B_TRUE;
+
+ for (fe = list_head(&fg->mlfg_entries); fe != NULL;
+ fe = list_next(&fg->mlfg_entries, fe)) {
+ if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+
+ v = kmem_zalloc(sizeof (mlxcx_group_vlan_t), KM_SLEEP);
+ v->mlgv_fe = fe;
+ v->mlgv_tagged = tagged;
+ v->mlgv_vid = vid;
+
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED;
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+ fe->mlfe_vid = vid;
+ if (tagged) {
+ fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_CVLAN;
+ } else {
+ fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_NONE;
+ }
+
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY;
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+ kmem_free(v, sizeof (mlxcx_group_vlan_t));
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+
+ list_insert_tail(&g->mlg_rx_vlans, v);
+
+ /*
+ * If the vlan list was empty for this group before adding this one,
+ * then we no longer want the "default" entry to allow all VLANs
+ * through.
+ */
+ if (first) {
+ fe = list_head(&dfg->mlfg_entries);
+ (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+ return (B_TRUE);
+}
+
+void
+mlxcx_remove_all_umcast_entries(mlxcx_t *mlxp, mlxcx_port_t *port,
+ mlxcx_ring_group_t *group)
+{
+ mlxcx_flow_entry_t *fe;
+ mlxcx_flow_table_t *ft = port->mlp_rx_flow;
+ mlxcx_group_mac_t *gm, *ngm;
+
+ ASSERT(mutex_owned(&port->mlp_mtx));
+ ASSERT(mutex_owned(&group->mlg_mtx));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ gm = avl_first(&group->mlg_rx_macs);
+ for (; gm != NULL; gm = ngm) {
+ ngm = AVL_NEXT(&group->mlg_rx_macs, gm);
+
+ ASSERT3P(gm->mlgm_group, ==, group);
+ fe = gm->mlgm_fe;
+ ASSERT3P(fe->mlfe_table, ==, ft);
+
+ avl_remove(&group->mlg_rx_macs, gm);
+ list_remove(&fe->mlfe_ring_groups, gm);
+ kmem_free(gm, sizeof (mlxcx_group_mac_t));
+
+ fe->mlfe_ndest = 0;
+ for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL;
+ gm = list_next(&fe->mlfe_ring_groups, gm)) {
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow =
+ gm->mlgm_group->mlg_rx_vlan_ft;
+ }
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+
+ if (fe->mlfe_ndest > 0) {
+ (void) mlxcx_cmd_set_flow_table_entry(mlxp, fe);
+ continue;
+ }
+
+ /*
+ * There are no more ring groups left for this MAC (it wasn't
+ * attached to any other groups since ndest == 0), so clean up
+ * its flow entry.
+ */
+ avl_remove(&port->mlp_dmac_fe, fe);
+ (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
+ list_destroy(&fe->mlfe_ring_groups);
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+}
+
+boolean_t
+mlxcx_remove_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port,
+ mlxcx_ring_group_t *group, const uint8_t *macaddr)
+{
+ mlxcx_flow_entry_t *fe;
+ mlxcx_flow_table_t *ft = port->mlp_rx_flow;
+ mlxcx_group_mac_t *gm, probe;
+
+ ASSERT(mutex_owned(&port->mlp_mtx));
+ ASSERT(mutex_owned(&group->mlg_mtx));
+
+ bzero(&probe, sizeof (probe));
+ bcopy(macaddr, probe.mlgm_mac, sizeof (probe.mlgm_mac));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ gm = avl_find(&group->mlg_rx_macs, &probe, NULL);
+ if (gm == NULL) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+ ASSERT3P(gm->mlgm_group, ==, group);
+ ASSERT0(bcmp(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac)));
+
+ fe = gm->mlgm_fe;
+ ASSERT3P(fe->mlfe_table, ==, ft);
+ ASSERT0(bcmp(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac)));
+
+ list_remove(&fe->mlfe_ring_groups, gm);
+ avl_remove(&group->mlg_rx_macs, gm);
+ kmem_free(gm, sizeof (mlxcx_group_mac_t));
+
+ fe->mlfe_ndest = 0;
+ for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL;
+ gm = list_next(&fe->mlfe_ring_groups, gm)) {
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow =
+ gm->mlgm_group->mlg_rx_vlan_ft;
+ }
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+
+ if (fe->mlfe_ndest > 0) {
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+ mutex_exit(&ft->mlft_mtx);
+ return (B_TRUE);
+ }
+
+ /*
+ * There are no more ring groups left for this MAC (it wasn't attached
+ * to any other groups since ndest == 0), so clean up its flow entry.
+ */
+ avl_remove(&port->mlp_dmac_fe, fe);
+ (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
+ list_destroy(&fe->mlfe_ring_groups);
+
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+
+ mutex_exit(&ft->mlft_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_add_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port,
+ mlxcx_ring_group_t *group, const uint8_t *macaddr)
+{
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe, probe;
+ mlxcx_flow_table_t *ft = port->mlp_rx_flow;
+ mlxcx_group_mac_t *gm;
+ boolean_t found = B_FALSE;
+
+ ASSERT(mutex_owned(&port->mlp_mtx));
+ ASSERT(mutex_owned(&group->mlg_mtx));
+
+ bzero(&probe, sizeof (probe));
+ bcopy(macaddr, probe.mlfe_dmac, sizeof (probe.mlfe_dmac));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ fe = avl_find(&port->mlp_dmac_fe, &probe, NULL);
+
+ if (fe == NULL) {
+ fg = port->mlp_umcast;
+ for (fe = list_head(&fg->mlfg_entries); fe != NULL;
+ fe = list_next(&fg->mlfg_entries, fe)) {
+ if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+ list_create(&fe->mlfe_ring_groups, sizeof (mlxcx_group_mac_t),
+ offsetof(mlxcx_group_mac_t, mlgm_fe_entry));
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ bcopy(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac));
+
+ avl_add(&port->mlp_dmac_fe, fe);
+ }
+
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = group->mlg_rx_vlan_ft;
+ fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
+
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY;
+ if (--fe->mlfe_ndest == 0) {
+ fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
+ }
+ mutex_exit(&ft->mlft_mtx);
+ return (B_FALSE);
+ }
+
+ gm = kmem_zalloc(sizeof (mlxcx_group_mac_t), KM_SLEEP);
+ gm->mlgm_group = group;
+ gm->mlgm_fe = fe;
+ bcopy(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac));
+ avl_add(&group->mlg_rx_macs, gm);
+ list_insert_tail(&fe->mlfe_ring_groups, gm);
+
+ mutex_exit(&ft->mlft_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_setup_flow_group(mlxcx_t *mlxp, mlxcx_flow_table_t *ft,
+ mlxcx_flow_group_t *fg)
+{
+ mlxcx_flow_entry_t *fe;
+ uint_t i, idx;
+
+ ASSERT(mutex_owned(&ft->mlft_mtx));
+ ASSERT(ft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ ASSERT3P(fg->mlfg_table, ==, ft);
+
+ if (ft->mlft_next_ent + fg->mlfg_size > ft->mlft_nents)
+ return (B_FALSE);
+ fg->mlfg_start_idx = ft->mlft_next_ent;
+
+ if (!mlxcx_cmd_create_flow_group(mlxp, fg)) {
+ return (B_FALSE);
+ }
+
+ list_create(&fg->mlfg_entries, sizeof (mlxcx_flow_entry_t),
+ offsetof(mlxcx_flow_entry_t, mlfe_group_entry));
+ for (i = 0; i < fg->mlfg_size; ++i) {
+ idx = fg->mlfg_start_idx + i;
+ fe = &ft->mlft_ent[idx];
+ fe->mlfe_group = fg;
+ list_insert_tail(&fg->mlfg_entries, fe);
+ }
+ fg->mlfg_avail = fg->mlfg_size;
+ ft->mlft_next_ent += fg->mlfg_size;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+mlxcx_setup_eq0(mlxcx_t *mlxp)
+{
+ mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[0];
+
+ mutex_enter(&mleq->mleq_mtx);
+ if (!mlxcx_eq_alloc_dma(mlxp, mleq)) {
+ /* mlxcx_teardown_eqs() will clean this up */
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ mleq->mleq_mlx = mlxp;
+ mleq->mleq_uar = &mlxp->mlx_uar;
+ mleq->mleq_events =
+ (1ULL << MLXCX_EVENT_PAGE_REQUEST) |
+ (1ULL << MLXCX_EVENT_PORT_STATE) |
+ (1ULL << MLXCX_EVENT_INTERNAL_ERROR) |
+ (1ULL << MLXCX_EVENT_PORT_MODULE) |
+ (1ULL << MLXCX_EVENT_SENDQ_DRAIN) |
+ (1ULL << MLXCX_EVENT_LAST_WQE) |
+ (1ULL << MLXCX_EVENT_CQ_ERROR) |
+ (1ULL << MLXCX_EVENT_WQ_CATASTROPHE) |
+ (1ULL << MLXCX_EVENT_PAGE_FAULT) |
+ (1ULL << MLXCX_EVENT_WQ_INVALID_REQ) |
+ (1ULL << MLXCX_EVENT_WQ_ACCESS_VIOL) |
+ (1ULL << MLXCX_EVENT_NIC_VPORT) |
+ (1ULL << MLXCX_EVENT_DOORBELL_CONGEST);
+ if (!mlxcx_cmd_create_eq(mlxp, mleq)) {
+ /* mlxcx_teardown_eqs() will clean this up */
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ if (ddi_intr_enable(mlxp->mlx_intr_handles[0]) != DDI_SUCCESS) {
+ /*
+ * mlxcx_teardown_eqs() will handle calling cmd_destroy_eq and
+ * eq_rele_dma
+ */
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ mlxcx_arm_eq(mlxp, mleq);
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_TRUE);
+}
+
+int
+mlxcx_cq_compare(const void *arg0, const void *arg1)
+{
+ const mlxcx_completion_queue_t *left = arg0;
+ const mlxcx_completion_queue_t *right = arg1;
+
+ if (left->mlcq_num < right->mlcq_num) {
+ return (-1);
+ }
+ if (left->mlcq_num > right->mlcq_num) {
+ return (1);
+ }
+ return (0);
+}
+
+static boolean_t
+mlxcx_setup_eqs(mlxcx_t *mlxp)
+{
+ uint_t i;
+ mlxcx_event_queue_t *mleq;
+
+ ASSERT3S(mlxp->mlx_intr_count, >, 0);
+
+ for (i = 1; i < mlxp->mlx_intr_count; ++i) {
+ mleq = &mlxp->mlx_eqs[i];
+ mutex_enter(&mleq->mleq_mtx);
+ if (!mlxcx_eq_alloc_dma(mlxp, mleq)) {
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ mleq->mleq_uar = &mlxp->mlx_uar;
+ if (!mlxcx_cmd_create_eq(mlxp, mleq)) {
+ /* mlxcx_teardown() will handle calling eq_rele_dma */
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ if (mlxp->mlx_props.mldp_intrmod_period_usec != 0 &&
+ !mlxcx_cmd_set_int_mod(mlxp, i,
+ mlxp->mlx_props.mldp_intrmod_period_usec)) {
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ if (ddi_intr_enable(mlxp->mlx_intr_handles[i]) != DDI_SUCCESS) {
+ mutex_exit(&mleq->mleq_mtx);
+ return (B_FALSE);
+ }
+ mlxcx_arm_eq(mlxp, mleq);
+ mutex_exit(&mleq->mleq_mtx);
+ }
+
+ mlxp->mlx_next_eq = 1;
+
+ return (B_TRUE);
+}
+
+/*
+ * Snapshot all of the hardware capabilities that we care about and then modify
+ * the HCA capabilities to get things moving.
+ */
+static boolean_t
+mlxcx_init_caps(mlxcx_t *mlxp)
+{
+ mlxcx_caps_t *c;
+
+ mlxp->mlx_caps = c = kmem_zalloc(sizeof (mlxcx_caps_t), KM_SLEEP);
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL,
+ MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_hca_cur)) {
+ mlxcx_warn(mlxp, "failed to obtain current HCA general caps");
+ }
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL,
+ MLXCX_HCA_CAP_MODE_MAX, &c->mlc_hca_max)) {
+ mlxcx_warn(mlxp, "failed to obtain maximum HCA general caps");
+ }
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET,
+ MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_ether_cur)) {
+ mlxcx_warn(mlxp, "failed to obtain current HCA eth caps");
+ }
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET,
+ MLXCX_HCA_CAP_MODE_MAX, &c->mlc_ether_max)) {
+ mlxcx_warn(mlxp, "failed to obtain maximum HCA eth caps");
+ }
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW,
+ MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_nic_flow_cur)) {
+ mlxcx_warn(mlxp, "failed to obtain current HCA flow caps");
+ }
+
+ if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW,
+ MLXCX_HCA_CAP_MODE_MAX, &c->mlc_nic_flow_max)) {
+ mlxcx_warn(mlxp, "failed to obtain maximum HCA flow caps");
+ }
+
+ /*
+ * Check the caps meet our requirements.
+ */
+ const mlxcx_hca_cap_general_caps_t *gen = &c->mlc_hca_cur.mhc_general;
+
+ if (gen->mlcap_general_log_pg_sz != 12) {
+ mlxcx_warn(mlxp, "!hardware has page size != 4k "
+ "(log_pg_sz = %u)", (uint_t)gen->mlcap_general_log_pg_sz);
+ goto err;
+ }
+ if (gen->mlcap_general_cqe_version != 1) {
+ mlxcx_warn(mlxp, "!hardware does not support CQE v1 "
+ "(cqe_ver = %u)", (uint_t)gen->mlcap_general_cqe_version);
+ goto err;
+ }
+ if (gen->mlcap_general_port_type !=
+ MLXCX_CAP_GENERAL_PORT_TYPE_ETHERNET) {
+ mlxcx_warn(mlxp, "!hardware has non-ethernet ports");
+ goto err;
+ }
+ mlxp->mlx_nports = gen->mlcap_general_num_ports;
+ mlxp->mlx_max_sdu = (1 << (gen->mlcap_general_log_max_msg & 0x1F));
+
+ c->mlc_max_tir = (1 << gen->mlcap_general_log_max_tir);
+
+ c->mlc_checksum = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags,
+ MLXCX_ETH_CAP_CSUM_CAP);
+ c->mlc_vxlan = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags,
+ MLXCX_ETH_CAP_TUNNEL_STATELESS_VXLAN);
+
+ c->mlc_max_lso_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth.
+ mlcap_eth_flags, MLXCX_ETH_CAP_MAX_LSO_CAP));
+ if (c->mlc_max_lso_size == 1) {
+ c->mlc_max_lso_size = 0;
+ c->mlc_lso = B_FALSE;
+ } else {
+ c->mlc_lso = B_TRUE;
+ }
+
+ c->mlc_max_rqt_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth.
+ mlcap_eth_flags, MLXCX_ETH_CAP_RSS_IND_TBL_CAP));
+
+ if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
+ mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_SUPPORT)) {
+ mlxcx_warn(mlxp, "!hardware does not support rx flow tables");
+ goto err;
+ }
+ if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
+ mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_MODIFY)) {
+ mlxcx_warn(mlxp, "!hardware does not support modifying rx "
+ "flow table entries");
+ goto err;
+ }
+
+ c->mlc_max_rx_ft_shift = c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
+ mlcap_flow_prop_log_max_ft_size;
+ c->mlc_max_rx_flows = (1 << c->mlc_nic_flow_cur.mhc_flow.
+ mlcap_flow_nic_rx.mlcap_flow_prop_log_max_flow);
+ c->mlc_max_rx_fe_dest = (1 << c->mlc_nic_flow_cur.mhc_flow.
+ mlcap_flow_nic_rx.mlcap_flow_prop_log_max_destination);
+
+ return (B_TRUE);
+
+err:
+ kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t));
+ return (B_FALSE);
+}
+
+static int
+mlxcx_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ mlxcx_t *mlxp;
+
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mlxp = ddi_get_driver_private(dip);
+ if (mlxp == NULL) {
+ mlxcx_warn(NULL, "asked to detach, but missing instance "
+ "private data");
+ return (DDI_FAILURE);
+ }
+
+ if (mlxp->mlx_attach & MLXCX_ATTACH_MAC_HDL) {
+ if (mac_unregister(mlxp->mlx_mac_hdl) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_MAC_HDL;
+ }
+
+ mlxcx_teardown(mlxp);
+ return (DDI_SUCCESS);
+}
+
+static size_t
+mlxcx_calc_rx_ngroups(mlxcx_t *mlxp)
+{
+ size_t ngroups = mlxp->mlx_props.mldp_rx_ngroups_large +
+ mlxp->mlx_props.mldp_rx_ngroups_small;
+ size_t tirlim, flowlim, gflowlim;
+
+ tirlim = mlxp->mlx_caps->mlc_max_tir / MLXCX_TIRS_PER_GROUP;
+ if (tirlim < ngroups) {
+ mlxcx_note(mlxp, "limiting number of rx groups to %u based "
+ "on number of TIRs available", tirlim);
+ ngroups = tirlim;
+ }
+
+ flowlim = (1 << mlxp->mlx_caps->mlc_max_rx_ft_shift) - 2;
+ if (flowlim < ngroups) {
+ mlxcx_note(mlxp, "limiting number of rx groups to %u based "
+ "on max size of RX flow tables", flowlim);
+ ngroups = flowlim;
+ }
+
+ do {
+ gflowlim = mlxp->mlx_caps->mlc_max_rx_flows - 16 * ngroups - 2;
+ if (gflowlim < ngroups) {
+ mlxcx_note(mlxp, "limiting number of rx groups to %u "
+ "based on max total RX flows", gflowlim);
+ --ngroups;
+ }
+ } while (gflowlim < ngroups);
+
+ return (ngroups);
+}
+
+static int
+mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ mlxcx_t *mlxp;
+ uint_t i;
+ int inst, ret;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ inst = ddi_get_instance(dip);
+ ret = ddi_soft_state_zalloc(mlxcx_softstate, inst);
+ if (ret != 0)
+ return (ret);
+
+ mlxp = ddi_get_soft_state(mlxcx_softstate, inst);
+ if (mlxp == NULL)
+ return (DDI_FAILURE);
+ mlxp->mlx_dip = dip;
+ mlxp->mlx_inst = inst;
+ ddi_set_driver_private(dip, mlxp);
+
+ mlxcx_load_props(mlxp);
+
+ mlxcx_fm_init(mlxp);
+ mlxp->mlx_attach |= MLXCX_ATTACH_FM;
+
+ if (pci_config_setup(mlxp->mlx_dip, &mlxp->mlx_cfg_handle) !=
+ DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "failed to initial PCI config space");
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_PCI_CONFIG;
+
+ if (!mlxcx_regs_map(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_REGS;
+
+ if (!mlxcx_cmd_queue_init(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_CMD;
+
+ if (!mlxcx_cmd_enable_hca(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_ENABLE_HCA;
+
+ if (!mlxcx_check_issi(mlxp)) {
+ goto err;
+ }
+
+ /*
+ * We have to get our interrupts now so we know what priority to
+ * create pagemtx with.
+ */
+ if (!mlxcx_intr_setup(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_INTRS;
+
+ mutex_init(&mlxp->mlx_pagemtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ avl_create(&mlxp->mlx_pages, mlxcx_page_compare,
+ sizeof (mlxcx_dev_page_t), offsetof(mlxcx_dev_page_t, mxdp_tree));
+ mlxp->mlx_attach |= MLXCX_ATTACH_PAGE_LIST;
+
+ if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_BOOT)) {
+ goto err;
+ }
+
+ if (!mlxcx_init_caps(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_CAPS;
+
+ if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_INIT)) {
+ goto err;
+ }
+
+ if (!mlxcx_cmd_init_hca(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_INIT_HCA;
+
+ if (!mlxcx_cmd_set_driver_version(mlxp, MLXCX_DRIVER_VERSION)) {
+ goto err;
+ }
+
+ /*
+ * The User Access Region (UAR) is needed so we can ring EQ and CQ
+ * doorbells.
+ */
+ if (!mlxcx_cmd_alloc_uar(mlxp, &mlxp->mlx_uar)) {
+ goto err;
+ }
+ for (i = 0; i < MLXCX_BF_PER_UAR; ++i) {
+ mutex_init(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx, NULL,
+ MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_UAR_PD_TD;
+
+ /*
+ * Set up event queue #0 -- it's special and only handles control
+ * type events, like PAGE_REQUEST (which we will probably get during
+ * the commands below).
+ *
+ * This will enable and arm the interrupt on EQ 0, too.
+ */
+ if (!mlxcx_setup_eq0(mlxp)) {
+ goto err;
+ }
+
+ /*
+ * Allocate a protection and transport domain. These don't really do
+ * anything for us (they're IB concepts), but we need to give their
+ * ID numbers in other commands.
+ */
+ if (!mlxcx_cmd_alloc_pd(mlxp, &mlxp->mlx_pd)) {
+ goto err;
+ }
+ if (!mlxcx_cmd_alloc_tdom(mlxp, &mlxp->mlx_tdom)) {
+ goto err;
+ }
+ /*
+ * Fetch the "reserved" lkey that lets us give linear addresses in
+ * work queue entries, rather than having to mess with the NIC's
+ * internal MMU.
+ */
+ if (!mlxcx_cmd_query_special_ctxs(mlxp)) {
+ goto err;
+ }
+
+ /*
+ * Query our port information and current state, populate the
+ * mlxcx_port_t structs.
+ *
+ * This also sets up the root flow tables and flow groups.
+ */
+ if (!mlxcx_setup_ports(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_PORTS;
+
+ /*
+ * Set up, enable and arm the rest of the interrupt EQs which will
+ * service events from CQs.
+ *
+ * The MLXCX_ATTACH_INTRS flag covers checking if these need to be
+ * cleaned up.
+ */
+ if (!mlxcx_setup_eqs(mlxp)) {
+ goto err;
+ }
+
+ /* Completion queues */
+ list_create(&mlxp->mlx_cqs, sizeof (mlxcx_completion_queue_t),
+ offsetof(mlxcx_completion_queue_t, mlcq_entry));
+ mlxp->mlx_attach |= MLXCX_ATTACH_CQS;
+
+ /* Work queues (send queues, receive queues) */
+ list_create(&mlxp->mlx_wqs, sizeof (mlxcx_work_queue_t),
+ offsetof(mlxcx_work_queue_t, mlwq_entry));
+ mlxp->mlx_attach |= MLXCX_ATTACH_WQS;
+
+ /* Set up periodic fault check timers which check the queue states */
+ if (!mlxcx_setup_checktimers(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_CHKTIMERS;
+
+ /*
+ * Construct our arrays of mlxcx_ring_group_ts, which represent the
+ * "groups" we advertise to MAC.
+ */
+ mlxp->mlx_rx_ngroups = mlxcx_calc_rx_ngroups(mlxp);
+ mlxp->mlx_rx_groups_size = mlxp->mlx_rx_ngroups *
+ sizeof (mlxcx_ring_group_t);
+ mlxp->mlx_rx_groups = kmem_zalloc(mlxp->mlx_rx_groups_size, KM_SLEEP);
+
+ mlxp->mlx_tx_ngroups = mlxp->mlx_props.mldp_tx_ngroups;
+ mlxp->mlx_tx_groups_size = mlxp->mlx_tx_ngroups *
+ sizeof (mlxcx_ring_group_t);
+ mlxp->mlx_tx_groups = kmem_zalloc(mlxp->mlx_tx_groups_size, KM_SLEEP);
+
+ mlxp->mlx_attach |= MLXCX_ATTACH_GROUPS;
+
+ /*
+ * Sets up the free/busy buffers list for keeping track of packet
+ * buffers.
+ */
+ if (!mlxcx_setup_bufs(mlxp))
+ goto err;
+ mlxp->mlx_attach |= MLXCX_ATTACH_BUFS;
+
+ /*
+ * Before we tell MAC about our rings/groups, we need to do enough
+ * setup on them to be sure about the numbers and configuration that
+ * we have. This will do basically everything short of allocating
+ * packet buffers and starting the rings up.
+ */
+ for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) {
+ if (!mlxcx_tx_group_setup(mlxp, &mlxp->mlx_tx_groups[i]))
+ goto err;
+ }
+ for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) {
+ if (!mlxcx_rx_group_setup(mlxp, &mlxp->mlx_rx_groups[i]))
+ goto err;
+ }
+
+ /*
+ * Finally, tell MAC that we exist!
+ */
+ if (!mlxcx_register_mac(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_MAC_HDL;
+
+ return (DDI_SUCCESS);
+
+err:
+ mlxcx_teardown(mlxp);
+ return (DDI_FAILURE);
+}
+
+static struct cb_ops mlxcx_cb_ops = {
+ .cb_open = nulldev,
+ .cb_close = nulldev,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = nodev,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_flag = D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
+static struct dev_ops mlxcx_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+ .devo_getinfo = NULL,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_attach = mlxcx_attach,
+ .devo_detach = mlxcx_detach,
+ .devo_reset = nodev,
+ .devo_power = ddi_power,
+ .devo_quiesce = ddi_quiesce_not_supported,
+ .devo_cb_ops = &mlxcx_cb_ops
+};
+
+static struct modldrv mlxcx_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "Mellanox Connect-X 4/5/6",
+ .drv_dev_ops = &mlxcx_dev_ops
+};
+
+static struct modlinkage mlxcx_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &mlxcx_modldrv, NULL }
+};
+
+int
+_init(void)
+{
+ int ret;
+
+ ret = ddi_soft_state_init(&mlxcx_softstate, sizeof (mlxcx_t), 0);
+ if (ret != 0) {
+ return (ret);
+ }
+
+ mac_init_ops(&mlxcx_dev_ops, MLXCX_MODULE_NAME);
+
+ if ((ret = mod_install(&mlxcx_modlinkage)) != DDI_SUCCESS) {
+ mac_fini_ops(&mlxcx_dev_ops);
+ ddi_soft_state_fini(&mlxcx_softstate);
+ return (ret);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&mlxcx_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ if ((ret = mod_remove(&mlxcx_modlinkage)) != DDI_SUCCESS) {
+ return (ret);
+ }
+
+ mac_fini_ops(&mlxcx_dev_ops);
+
+ ddi_soft_state_fini(&mlxcx_softstate);
+
+ return (DDI_SUCCESS);
+}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.conf b/usr/src/uts/common/io/mlxcx/mlxcx.conf
new file mode 100644
index 0000000000..3569c4e5f5
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.conf
@@ -0,0 +1,101 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018, Joyent, Inc.
+# Copyright 2020, The University of Queensland
+#
+
+#
+# Driver.conf file for Mellanox Connect-X 4/5/6.
+# See mlxcx(7D) for valid options.
+#
+
+#
+# Sizing of event and completion queues.
+#
+# The number of entries on each queue will be (1 << *_size_shift) -- so
+# a value of 9 would mean 512 entries.
+#
+#eq_size_shift = 9;
+#cq_size_shift = 10;
+
+#
+# Sizing of send and receive queues.
+#
+# Note that this determines the size of the RX and TX rings that mlxcx will
+# advertise to MAC. It also determines how many packet buffers we will allocate
+# when starting the interface.
+#
+#sq_size_shift = 11;
+#rq_size_shift = 10;
+
+#
+# Number and configuration of TX groups and rings.
+#
+#tx_ngroups = 1;
+#tx_nrings_per_group = 64;
+
+#
+# Number and configuration of RX groups and rings.
+#
+#rx_ngroups_large = 2;
+#rx_nrings_per_large_group = 16;
+#rx_ngroups_small = 256;
+#rx_nrings_per_small_group = 4;
+
+#
+# Number of flow table entries allocated to root flow tables.
+#
+# This places an upper ceiling on how many MAC addresses can be filtered into
+# groups across the whole NIC. If you have a lot of VNICs you might want to
+# raise this (and probably also rx_ngroups_small).
+#
+#ftbl_root_size_shift = 12;
+
+#
+# Number of flow table entries allocated to each L1 VLAN filtering table.
+#
+# This places a limit on the number of VLANs that one MAC address can be
+# associated with before falling back to software classification. Two entries
+# are always reserved for the non-VLAN catch-all and promisc entries.
+#
+# Note: illumos MAC today only supports giving a single VLAN per MAC address
+# to hardware drivers anyway, so setting this higher is pointless until that
+# changes.
+#
+#ftbl_vlan_size_shift = 4;
+
+#
+# Interrupt and completion moderation.
+#
+#cqemod_period_usec = 50;
+#cqemod_count = <80% of cq_size>;
+#intrmod_period_usec = 10;
+
+#
+# Minimum packet size before we use a ddi_dma_bind_addr() rather than bcopy()
+# of the packet data. DMA binds are expensive and involve taking locks in the
+# PCI nexus driver, so it's seldom worth doing them for small packets.
+#
+#tx_bind_threshold = 2048;
+
+#
+# Interval between periodic double-checks of queue status against hardware
+# state. This is used to detect hardware stalls or errors, as well as guard
+# against driver bugs.
+#
+# If set to too high a frequency, checks may impact NIC performance. Can be
+# set to zero to disable periodic checking entirely.
+#
+#eq_check_interval_sec = 30;
+#cq_check_interval_sec = 300;
+#wq_check_interval_sec = 300;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
new file mode 100644
index 0000000000..3b58989961
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -0,0 +1,1298 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Mellanox Connect-X 4/5/6 driver.
+ *
+ * More details in mlxcx.c
+ */
+
+#ifndef _MLXCX_H
+#define _MLXCX_H
+
+/*
+ * mlxcx(7D) defintions
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ddifm.h>
+#include <sys/id_space.h>
+#include <sys/list.h>
+#include <sys/stddef.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_ether.h>
+#include <sys/cpuvar.h>
+#include <sys/ethernet.h>
+
+#include <inet/ip.h>
+#include <inet/ip6.h>
+
+#include <sys/ddifm.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
+#include <sys/fm/io/ddi.h>
+
+#include <mlxcx_reg.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Get access to the first PCI BAR.
+ */
+#define MLXCX_REG_NUMBER 1
+
+/*
+ * The command queue is supposed to be a page, which is 4k.
+ */
+#define MLXCX_CMD_DMA_PAGE_SIZE 4096
+
+/*
+ * Queues can allocate in units of this much memory.
+ */
+#define MLXCX_QUEUE_DMA_PAGE_SIZE 4096
+
+/*
+ * We advertise two sizes of groups to MAC -- a certain number of "large"
+ * groups (including the default group, which is sized to at least ncpus)
+ * followed by a certain number of "small" groups.
+ *
+ * This allows us to have a larger amount of classification resources available
+ * for zones/VMs without resorting to software classification.
+ */
+#define MLXCX_RX_NGROUPS_LARGE_DFLT 2
+#define MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT 16
+#define MLXCX_RX_NGROUPS_SMALL_DFLT 256
+#define MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT 4
+
+#define MLXCX_TX_NGROUPS_DFLT 1
+#define MLXCX_TX_NRINGS_PER_GROUP_DFLT 64
+
+/*
+ * Queues will be sized to (1 << *Q_SIZE_SHIFT) entries long.
+ */
+#define MLXCX_EQ_SIZE_SHIFT_DFLT 9
+#define MLXCX_CQ_SIZE_SHIFT_DFLT 10
+
+/*
+ * Default to making SQs bigger than RQs for 9k MTU, since most packets will
+ * spill over into more than one slot. RQ WQEs are always 1 slot.
+ */
+#define MLXCX_SQ_SIZE_SHIFT_DFLT 11
+#define MLXCX_RQ_SIZE_SHIFT_DFLT 10
+
+#define MLXCX_CQ_HWM_GAP 16
+#define MLXCX_CQ_LWM_GAP 24
+
+#define MLXCX_RQ_REFILL_STEP 64
+
+/*
+ * CQ event moderation
+ */
+#define MLXCX_CQEMOD_PERIOD_USEC_DFLT 50
+#define MLXCX_CQEMOD_COUNT_DFLT \
+ (8 * ((1 << MLXCX_CQ_SIZE_SHIFT_DFLT) / 10))
+
+/*
+ * EQ interrupt moderation
+ */
+#define MLXCX_INTRMOD_PERIOD_USEC_DFLT 10
+
+/* Size of root flow tables */
+#define MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT 12
+
+/* Size of 2nd level flow tables for VLAN filtering */
+#define MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT 4
+
+/*
+ * How big does an mblk have to be before we dma_bind() it instead of
+ * bcopying?
+ */
+#define MLXCX_TX_BIND_THRESHOLD_DFLT 2048
+
+/*
+ * How often to check the status of completion queues for overflow and
+ * other problems.
+ */
+#define MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT 300
+#define MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT 300
+#define MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT 30
+
+#define MLXCX_DOORBELL_TRIES_DFLT 3
+extern uint_t mlxcx_doorbell_tries;
+
+#define MLXCX_STUCK_INTR_COUNT_DFLT 128
+extern uint_t mlxcx_stuck_intr_count;
+
+#define MLXCX_BUF_BIND_MAX_ATTEMTPS 50
+
+#define MLXCX_MTU_OFFSET \
+ (sizeof (struct ether_vlan_header) + ETHERFCSL)
+
+/*
+ * This is the current version of the command structure that the driver expects
+ * to be found in the ISS.
+ */
+#define MLXCX_CMD_REVISION 5
+
+#ifdef DEBUG
+#define MLXCX_DMA_SYNC(dma, flag) VERIFY0(ddi_dma_sync( \
+ (dma).mxdb_dma_handle, 0, 0, \
+ (flag)))
+#else
+#define MLXCX_DMA_SYNC(dma, flag) (void) ddi_dma_sync( \
+ (dma).mxdb_dma_handle, 0, 0, \
+ (flag))
+#endif
+
+#define MLXCX_FM_SERVICE_MLXCX "mlxcx"
+
+/*
+ * This macro defines the expected value of the 'Interface Step Sequence ID'
+ * (issi) which represents the version of the start up and tear down sequence.
+ * We must check that hardware supports this and tell it which version we're
+ * using as well.
+ */
+#define MLXCX_CURRENT_ISSI 1
+
+/*
+ * This is the size of a page that the hardware expects from us when
+ * manipulating pages.
+ */
+#define MLXCX_HW_PAGE_SIZE 4096
+
+/*
+ * This is a special lkey value used to terminate a list of scatter pointers.
+ */
+#define MLXCX_NULL_LKEY 0x100
+
+/*
+ * Forwards
+ */
+struct mlxcx;
+typedef struct mlxcx mlxcx_t;
+
+typedef enum {
+ MLXCX_DMABUF_HDL_ALLOC = 1 << 0,
+ MLXCX_DMABUF_MEM_ALLOC = 1 << 1,
+ MLXCX_DMABUF_BOUND = 1 << 2,
+ MLXCX_DMABUF_FOREIGN = 1 << 3,
+} mlxcx_dma_buffer_flags_t;
+
+typedef struct mlxcx_dma_buffer {
+ mlxcx_dma_buffer_flags_t mxdb_flags;
+ caddr_t mxdb_va; /* Buffer VA */
+ size_t mxdb_len; /* Buffer logical len */
+ ddi_acc_handle_t mxdb_acc_handle;
+ ddi_dma_handle_t mxdb_dma_handle;
+ uint_t mxdb_ncookies;
+} mlxcx_dma_buffer_t;
+
+typedef struct mlxcx_dev_page {
+ list_node_t mxdp_list;
+ avl_node_t mxdp_tree;
+ uintptr_t mxdp_pa;
+ mlxcx_dma_buffer_t mxdp_dma;
+} mlxcx_dev_page_t;
+
+/*
+ * Data structure to keep track of all information related to the command queue.
+ */
+typedef enum {
+ MLXCX_CMD_QUEUE_S_IDLE = 1,
+ MLXCX_CMD_QUEUE_S_BUSY,
+ MLXCX_CMD_QUEUE_S_BROKEN
+} mlxcx_cmd_queue_status_t;
+
+typedef struct mlxcx_cmd_queue {
+ kmutex_t mcmd_lock;
+ kcondvar_t mcmd_cv;
+ mlxcx_dma_buffer_t mcmd_dma;
+ mlxcx_cmd_ent_t *mcmd_ent;
+
+ uint8_t mcmd_size_l2;
+ uint8_t mcmd_stride_l2;
+
+ mlxcx_cmd_queue_status_t mcmd_status;
+
+ ddi_taskq_t *mcmd_taskq;
+ id_space_t *mcmd_tokens;
+} mlxcx_cmd_queue_t;
+
+typedef struct mlxcd_cmd_mbox {
+ list_node_t mlbox_node;
+ mlxcx_dma_buffer_t mlbox_dma;
+ mlxcx_cmd_mailbox_t *mlbox_data;
+} mlxcx_cmd_mbox_t;
+
+typedef enum {
+ MLXCX_EQ_ALLOC = 1 << 0, /* dma mem alloc'd, size set */
+ MLXCX_EQ_CREATED = 1 << 1, /* CREATE_EQ sent to hw */
+ MLXCX_EQ_DESTROYED = 1 << 2, /* DESTROY_EQ sent to hw */
+ MLXCX_EQ_ARMED = 1 << 3, /* Armed through the UAR */
+ MLXCX_EQ_POLLING = 1 << 4, /* Currently being polled */
+} mlxcx_eventq_state_t;
+
+typedef struct mlxcx_bf {
+ kmutex_t mbf_mtx;
+ uint_t mbf_cnt;
+ uint_t mbf_even;
+ uint_t mbf_odd;
+} mlxcx_bf_t;
+
+typedef struct mlxcx_uar {
+ boolean_t mlu_allocated;
+ uint_t mlu_num;
+ uint_t mlu_base;
+
+ volatile uint_t mlu_bfcnt;
+ mlxcx_bf_t mlu_bf[MLXCX_BF_PER_UAR];
+} mlxcx_uar_t;
+
+typedef struct mlxcx_pd {
+ boolean_t mlpd_allocated;
+ uint32_t mlpd_num;
+} mlxcx_pd_t;
+
+typedef struct mlxcx_tdom {
+ boolean_t mltd_allocated;
+ uint32_t mltd_num;
+} mlxcx_tdom_t;
+
+typedef enum {
+ MLXCX_PORT_VPORT_PROMISC = 1 << 0,
+} mlxcx_port_flags_t;
+
+typedef struct mlxcx_flow_table mlxcx_flow_table_t;
+typedef struct mlxcx_flow_group mlxcx_flow_group_t;
+
+typedef struct {
+ uint64_t mlps_rx_drops;
+} mlxcx_port_stats_t;
+
+typedef enum {
+ MLXCX_PORT_INIT = 1 << 0
+} mlxcx_port_init_t;
+
+typedef struct mlxcx_port {
+ kmutex_t mlp_mtx;
+ mlxcx_port_init_t mlp_init;
+ mlxcx_t *mlp_mlx;
+ /*
+ * The mlp_num we have here starts at zero (it's an index), but the
+ * numbering we have to use for register access starts at 1. We
+ * currently write mlp_num into the other_vport fields in mlxcx_cmd.c
+ * (where 0 is a magic number meaning "my vport") so if we ever add
+ * support for virtualisation features and deal with more than one
+ * vport, we will probably have to change this.
+ */
+ uint_t mlp_num;
+ mlxcx_port_flags_t mlp_flags;
+ uint64_t mlp_guid;
+ uint8_t mlp_mac_address[ETHERADDRL];
+
+ uint_t mlp_mtu;
+ uint_t mlp_max_mtu;
+
+ mlxcx_port_status_t mlp_admin_status;
+ mlxcx_port_status_t mlp_oper_status;
+
+ boolean_t mlp_autoneg;
+ mlxcx_eth_proto_t mlp_max_proto;
+ mlxcx_eth_proto_t mlp_admin_proto;
+ mlxcx_eth_proto_t mlp_oper_proto;
+
+ mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
+
+ /* Root flow tables */
+ mlxcx_flow_table_t *mlp_rx_flow;
+ mlxcx_flow_table_t *mlp_tx_flow;
+
+ mlxcx_flow_group_t *mlp_promisc;
+ mlxcx_flow_group_t *mlp_bcast;
+ mlxcx_flow_group_t *mlp_umcast;
+
+ avl_tree_t mlp_dmac_fe;
+
+ mlxcx_port_stats_t mlp_stats;
+
+ mlxcx_module_status_t mlp_last_modstate;
+ mlxcx_module_error_type_t mlp_last_moderr;
+} mlxcx_port_t;
+
+typedef enum {
+ MLXCX_EQ_TYPE_ANY,
+ MLXCX_EQ_TYPE_RX,
+ MLXCX_EQ_TYPE_TX
+} mlxcx_eventq_type_t;
+
+typedef struct mlxcx_event_queue {
+ kmutex_t mleq_mtx;
+ mlxcx_t *mleq_mlx;
+ mlxcx_eventq_state_t mleq_state;
+ mlxcx_eventq_type_t mleq_type;
+
+ mlxcx_dma_buffer_t mleq_dma;
+
+ size_t mleq_entshift;
+ size_t mleq_nents;
+ mlxcx_eventq_ent_t *mleq_ent;
+ uint32_t mleq_cc; /* consumer counter */
+ uint32_t mleq_cc_armed;
+
+ uint32_t mleq_events;
+
+ uint32_t mleq_badintrs;
+
+ /* Hardware eq number */
+ uint_t mleq_num;
+ /* Index into the mlxcx_t's interrupts array */
+ uint_t mleq_intr_index;
+
+ /* UAR region that has this EQ's doorbell in it */
+ mlxcx_uar_t *mleq_uar;
+
+ /* Tree of CQn => mlxcx_completion_queue_t */
+ avl_tree_t mleq_cqs;
+
+ uint32_t mleq_check_disarm_cc;
+ uint_t mleq_check_disarm_cnt;
+} mlxcx_event_queue_t;
+
+typedef enum {
+ MLXCX_TIS_CREATED = 1 << 0,
+ MLXCX_TIS_DESTROYED = 1 << 1,
+} mlxcx_tis_state_t;
+
+typedef struct mlxcx_tis {
+ mlxcx_tis_state_t mltis_state;
+ list_node_t mltis_entry;
+ uint_t mltis_num;
+ mlxcx_tdom_t *mltis_tdom;
+} mlxcx_tis_t;
+
+typedef enum {
+ MLXCX_BUFFER_INIT,
+ MLXCX_BUFFER_FREE,
+ MLXCX_BUFFER_ON_WQ,
+ MLXCX_BUFFER_ON_LOAN,
+ MLXCX_BUFFER_ON_CHAIN,
+} mlxcx_buffer_state_t;
+
+typedef struct mlxcx_buf_shard {
+ list_node_t mlbs_entry;
+ kmutex_t mlbs_mtx;
+ list_t mlbs_busy;
+ list_t mlbs_free;
+ kcondvar_t mlbs_free_nonempty;
+} mlxcx_buf_shard_t;
+
+typedef struct mlxcx_buffer {
+ mlxcx_buf_shard_t *mlb_shard;
+ list_node_t mlb_entry;
+ list_node_t mlb_cq_entry;
+
+ struct mlxcx_buffer *mlb_tx_head; /* head of tx chain */
+ list_t mlb_tx_chain;
+ list_node_t mlb_tx_chain_entry;
+
+ boolean_t mlb_foreign;
+ size_t mlb_used;
+ mblk_t *mlb_tx_mp;
+
+ mlxcx_t *mlb_mlx;
+ mlxcx_buffer_state_t mlb_state;
+ uint_t mlb_wqe_index;
+ mlxcx_dma_buffer_t mlb_dma;
+ mblk_t *mlb_mp;
+ frtn_t mlb_frtn;
+} mlxcx_buffer_t;
+
+typedef enum {
+ MLXCX_CQ_ALLOC = 1 << 0,
+ MLXCX_CQ_CREATED = 1 << 1,
+ MLXCX_CQ_DESTROYED = 1 << 2,
+ MLXCX_CQ_EQAVL = 1 << 3,
+ MLXCX_CQ_BLOCKED_MAC = 1 << 4,
+ MLXCX_CQ_TEARDOWN = 1 << 5,
+ MLXCX_CQ_POLLING = 1 << 6,
+ MLXCX_CQ_ARMED = 1 << 7,
+} mlxcx_completionq_state_t;
+
+typedef struct mlxcx_work_queue mlxcx_work_queue_t;
+
+typedef struct mlxcx_completion_queue {
+ kmutex_t mlcq_mtx;
+ mlxcx_t *mlcq_mlx;
+ mlxcx_completionq_state_t mlcq_state;
+
+ mlxcx_port_stats_t *mlcq_stats;
+
+ list_node_t mlcq_entry;
+ avl_node_t mlcq_eq_entry;
+
+ uint_t mlcq_num;
+
+ mlxcx_work_queue_t *mlcq_wq;
+ mlxcx_event_queue_t *mlcq_eq;
+
+ /* UAR region that has this CQ's UAR doorbell in it */
+ mlxcx_uar_t *mlcq_uar;
+
+ mlxcx_dma_buffer_t mlcq_dma;
+
+ size_t mlcq_entshift;
+ size_t mlcq_nents;
+ mlxcx_completionq_ent_t *mlcq_ent;
+ uint32_t mlcq_cc; /* consumer counter */
+ uint32_t mlcq_cc_armed; /* cc at last arm */
+ uint32_t mlcq_ec; /* event counter */
+ uint32_t mlcq_ec_armed; /* ec at last arm */
+
+ mlxcx_dma_buffer_t mlcq_doorbell_dma;
+ mlxcx_completionq_doorbell_t *mlcq_doorbell;
+
+ uint64_t mlcq_bufcnt;
+ size_t mlcq_bufhwm;
+ size_t mlcq_buflwm;
+ list_t mlcq_buffers;
+ kmutex_t mlcq_bufbmtx;
+ list_t mlcq_buffers_b;
+
+ uint_t mlcq_check_disarm_cnt;
+ uint64_t mlcq_check_disarm_cc;
+
+ uint_t mlcq_cqemod_period_usec;
+ uint_t mlcq_cqemod_count;
+
+ mac_ring_handle_t mlcq_mac_hdl;
+ uint64_t mlcq_mac_gen;
+
+ boolean_t mlcq_fm_repd_qstate;
+} mlxcx_completion_queue_t;
+
+typedef enum {
+ MLXCX_WQ_ALLOC = 1 << 0,
+ MLXCX_WQ_CREATED = 1 << 1,
+ MLXCX_WQ_STARTED = 1 << 2,
+ MLXCX_WQ_DESTROYED = 1 << 3,
+ MLXCX_WQ_TEARDOWN = 1 << 4,
+ MLXCX_WQ_BUFFERS = 1 << 5,
+} mlxcx_workq_state_t;
+
+typedef enum {
+ MLXCX_WQ_TYPE_SENDQ = 1,
+ MLXCX_WQ_TYPE_RECVQ
+} mlxcx_workq_type_t;
+
+typedef struct mlxcx_ring_group mlxcx_ring_group_t;
+
+struct mlxcx_work_queue {
+ kmutex_t mlwq_mtx;
+ mlxcx_t *mlwq_mlx;
+ mlxcx_workq_type_t mlwq_type;
+ mlxcx_workq_state_t mlwq_state;
+
+ list_node_t mlwq_entry;
+ list_node_t mlwq_group_entry;
+
+ mlxcx_ring_group_t *mlwq_group;
+
+ uint_t mlwq_num;
+
+ mlxcx_completion_queue_t *mlwq_cq;
+ mlxcx_pd_t *mlwq_pd;
+
+ /* Required for send queues */
+ mlxcx_tis_t *mlwq_tis;
+
+ /* UAR region that has this WQ's blueflame buffers in it */
+ mlxcx_uar_t *mlwq_uar;
+
+ mlxcx_dma_buffer_t mlwq_dma;
+
+ mlxcx_eth_inline_mode_t mlwq_inline_mode;
+ size_t mlwq_entshift;
+ size_t mlwq_nents;
+ /* Discriminate based on mwq_type */
+ union {
+ mlxcx_sendq_ent_t *mlwq_send_ent;
+ mlxcx_sendq_extra_ent_t *mlwq_send_extra_ent;
+ mlxcx_recvq_ent_t *mlwq_recv_ent;
+ mlxcx_sendq_bf_t *mlwq_bf_ent;
+ };
+ uint64_t mlwq_pc; /* producer counter */
+
+ mlxcx_dma_buffer_t mlwq_doorbell_dma;
+ mlxcx_workq_doorbell_t *mlwq_doorbell;
+
+ mlxcx_buf_shard_t *mlwq_bufs;
+ mlxcx_buf_shard_t *mlwq_foreign_bufs;
+
+ boolean_t mlwq_fm_repd_qstate;
+};
+
+#define MLXCX_RQT_MAX_SIZE 64
+
+typedef enum {
+ MLXCX_RQT_CREATED = 1 << 0,
+ MLXCX_RQT_DESTROYED = 1 << 1,
+ MLXCX_RQT_DIRTY = 1 << 2,
+} mlxcx_rqtable_state_t;
+
+typedef struct mlxcx_rqtable {
+ mlxcx_rqtable_state_t mlrqt_state;
+ list_node_t mlrqt_entry;
+ uint_t mlrqt_num;
+
+ size_t mlrqt_max;
+ size_t mlrqt_used;
+
+ size_t mlrqt_rq_size;
+ mlxcx_work_queue_t **mlrqt_rq;
+} mlxcx_rqtable_t;
+
+typedef enum {
+ MLXCX_TIR_CREATED = 1 << 0,
+ MLXCX_TIR_DESTROYED = 1 << 1,
+} mlxcx_tir_state_t;
+
+typedef struct mlxcx_tir {
+ mlxcx_tir_state_t mltir_state;
+ list_node_t mltir_entry;
+ uint_t mltir_num;
+ mlxcx_tdom_t *mltir_tdom;
+ mlxcx_tir_type_t mltir_type;
+ union {
+ mlxcx_rqtable_t *mltir_rqtable;
+ mlxcx_work_queue_t *mltir_rq;
+ };
+ mlxcx_tir_hash_fn_t mltir_hash_fn;
+ uint8_t mltir_toeplitz_key[40];
+ mlxcx_tir_rx_hash_l3_type_t mltir_l3_type;
+ mlxcx_tir_rx_hash_l4_type_t mltir_l4_type;
+ mlxcx_tir_rx_hash_fields_t mltir_hash_fields;
+} mlxcx_tir_t;
+
+typedef enum {
+ MLXCX_FLOW_GROUP_CREATED = 1 << 0,
+ MLXCX_FLOW_GROUP_BUSY = 1 << 1,
+ MLXCX_FLOW_GROUP_DESTROYED = 1 << 2,
+} mlxcx_flow_group_state_t;
+
+typedef enum {
+ MLXCX_FLOW_MATCH_SMAC = 1 << 0,
+ MLXCX_FLOW_MATCH_DMAC = 1 << 1,
+ MLXCX_FLOW_MATCH_VLAN = 1 << 2,
+ MLXCX_FLOW_MATCH_VID = 1 << 3,
+ MLXCX_FLOW_MATCH_IP_VER = 1 << 4,
+ MLXCX_FLOW_MATCH_SRCIP = 1 << 5,
+ MLXCX_FLOW_MATCH_DSTIP = 1 << 6,
+ MLXCX_FLOW_MATCH_IP_PROTO = 1 << 7,
+ MLXCX_FLOW_MATCH_SQN = 1 << 8,
+ MLXCX_FLOW_MATCH_VXLAN = 1 << 9,
+} mlxcx_flow_mask_t;
+
+struct mlxcx_flow_group {
+ list_node_t mlfg_entry;
+ list_node_t mlfg_role_entry;
+ mlxcx_flow_group_state_t mlfg_state;
+ mlxcx_flow_table_t *mlfg_table;
+ uint_t mlfg_num;
+ size_t mlfg_start_idx;
+ size_t mlfg_size;
+ size_t mlfg_avail;
+ list_t mlfg_entries;
+ mlxcx_flow_mask_t mlfg_mask;
+};
+
+typedef enum {
+ MLXCX_FLOW_ENTRY_RESERVED = 1 << 0,
+ MLXCX_FLOW_ENTRY_CREATED = 1 << 1,
+ MLXCX_FLOW_ENTRY_DELETED = 1 << 2,
+ MLXCX_FLOW_ENTRY_DIRTY = 1 << 3,
+} mlxcx_flow_entry_state_t;
+
+typedef struct {
+ mlxcx_tir_t *mlfed_tir;
+ mlxcx_flow_table_t *mlfed_flow;
+} mlxcx_flow_entry_dest_t;
+
+typedef struct mlxcx_flow_entry {
+ list_node_t mlfe_group_entry;
+ avl_node_t mlfe_dmac_entry;
+ mlxcx_flow_entry_state_t mlfe_state;
+ mlxcx_flow_table_t *mlfe_table;
+ mlxcx_flow_group_t *mlfe_group;
+ uint_t mlfe_index;
+
+ mlxcx_flow_action_t mlfe_action;
+
+ /* Criteria for match */
+ uint8_t mlfe_smac[ETHERADDRL];
+ uint8_t mlfe_dmac[ETHERADDRL];
+
+ mlxcx_vlan_type_t mlfe_vlan_type;
+ uint16_t mlfe_vid;
+
+ uint_t mlfe_ip_version;
+ uint8_t mlfe_srcip[IPV6_ADDR_LEN];
+ uint8_t mlfe_dstip[IPV6_ADDR_LEN];
+
+ uint_t mlfe_ip_proto;
+ uint16_t mlfe_sport;
+ uint16_t mlfe_dport;
+
+ uint32_t mlfe_sqn;
+ uint32_t mlfe_vxlan_vni;
+
+ /* Destinations */
+ size_t mlfe_ndest;
+ mlxcx_flow_entry_dest_t mlfe_dest[MLXCX_FLOW_MAX_DESTINATIONS];
+
+ /*
+ * mlxcx_group_mac_ts joining this entry to N ring groups
+ * only used by FEs on the root rx flow table
+ */
+ list_t mlfe_ring_groups;
+} mlxcx_flow_entry_t;
+
+typedef enum {
+ MLXCX_FLOW_TABLE_CREATED = 1 << 0,
+ MLXCX_FLOW_TABLE_DESTROYED = 1 << 1,
+ MLXCX_FLOW_TABLE_ROOT = 1 << 2
+} mlxcx_flow_table_state_t;
+
+struct mlxcx_flow_table {
+ kmutex_t mlft_mtx;
+ mlxcx_flow_table_state_t mlft_state;
+ uint_t mlft_level;
+ uint_t mlft_num;
+ mlxcx_flow_table_type_t mlft_type;
+
+ mlxcx_port_t *mlft_port;
+
+ size_t mlft_entshift;
+ size_t mlft_nents;
+
+ size_t mlft_entsize;
+ mlxcx_flow_entry_t *mlft_ent;
+
+ /* First entry not yet claimed by a group */
+ size_t mlft_next_ent;
+
+ list_t mlft_groups;
+};
+
+typedef enum {
+ MLXCX_GROUP_RX,
+ MLXCX_GROUP_TX
+} mlxcx_group_type_t;
+
+typedef enum {
+ MLXCX_GROUP_INIT = 1 << 0,
+ MLXCX_GROUP_WQS = 1 << 1,
+ MLXCX_GROUP_TIRTIS = 1 << 2,
+ MLXCX_GROUP_FLOWS = 1 << 3,
+ MLXCX_GROUP_RUNNING = 1 << 4,
+ MLXCX_GROUP_RQT = 1 << 5,
+} mlxcx_group_state_t;
+
+#define MLXCX_RX_HASH_FT_SIZE_SHIFT 4
+
+typedef enum {
+ MLXCX_TIR_ROLE_IPv4 = 0,
+ MLXCX_TIR_ROLE_IPv6,
+ MLXCX_TIR_ROLE_TCPv4,
+ MLXCX_TIR_ROLE_TCPv6,
+ MLXCX_TIR_ROLE_UDPv4,
+ MLXCX_TIR_ROLE_UDPv6,
+ MLXCX_TIR_ROLE_OTHER,
+
+ MLXCX_TIRS_PER_GROUP
+} mlxcx_tir_role_t;
+
+typedef struct {
+ avl_node_t mlgm_group_entry;
+ list_node_t mlgm_fe_entry;
+ mlxcx_ring_group_t *mlgm_group;
+ uint8_t mlgm_mac[6];
+ mlxcx_flow_entry_t *mlgm_fe;
+} mlxcx_group_mac_t;
+
+typedef struct {
+ list_node_t mlgv_entry;
+ boolean_t mlgv_tagged;
+ uint16_t mlgv_vid;
+ mlxcx_flow_entry_t *mlgv_fe;
+} mlxcx_group_vlan_t;
+
+struct mlxcx_ring_group {
+ kmutex_t mlg_mtx;
+ mlxcx_t *mlg_mlx;
+ mlxcx_group_state_t mlg_state;
+ mlxcx_group_type_t mlg_type;
+
+ mac_group_handle_t mlg_mac_hdl;
+
+ union {
+ mlxcx_tis_t mlg_tis;
+ mlxcx_tir_t mlg_tir[MLXCX_TIRS_PER_GROUP];
+ };
+ mlxcx_port_t *mlg_port;
+
+ size_t mlg_nwqs;
+ size_t mlg_wqs_size;
+ mlxcx_work_queue_t *mlg_wqs;
+
+ mlxcx_rqtable_t *mlg_rqt;
+
+ /*
+ * Flow table for matching VLAN IDs
+ */
+ mlxcx_flow_table_t *mlg_rx_vlan_ft;
+ mlxcx_flow_group_t *mlg_rx_vlan_fg;
+ mlxcx_flow_group_t *mlg_rx_vlan_def_fg;
+ mlxcx_flow_group_t *mlg_rx_vlan_promisc_fg;
+ list_t mlg_rx_vlans;
+
+ /*
+ * Flow table for separating out by protocol before hashing
+ */
+ mlxcx_flow_table_t *mlg_rx_hash_ft;
+
+ /*
+ * Links to flow entries on the root flow table which are pointing to
+ * our rx_vlan_ft.
+ */
+ avl_tree_t mlg_rx_macs;
+};
+
+typedef enum mlxcx_cmd_state {
+ MLXCX_CMD_S_DONE = 1 << 0,
+ MLXCX_CMD_S_ERROR = 1 << 1
+} mlxcx_cmd_state_t;
+
+typedef struct mlxcx_cmd {
+ struct mlxcx *mlcmd_mlxp;
+ kmutex_t mlcmd_lock;
+ kcondvar_t mlcmd_cv;
+
+ uint8_t mlcmd_token;
+ mlxcx_cmd_op_t mlcmd_op;
+
+ /*
+ * Command data and extended mailboxes for responses.
+ */
+ const void *mlcmd_in;
+ uint32_t mlcmd_inlen;
+ void *mlcmd_out;
+ uint32_t mlcmd_outlen;
+ list_t mlcmd_mbox_in;
+ uint8_t mlcmd_nboxes_in;
+ list_t mlcmd_mbox_out;
+ uint8_t mlcmd_nboxes_out;
+ /*
+ * Status information.
+ */
+ mlxcx_cmd_state_t mlcmd_state;
+ uint8_t mlcmd_status;
+} mlxcx_cmd_t;
+
+/*
+ * Our view of capabilities.
+ */
+typedef struct mlxcx_hca_cap {
+ mlxcx_hca_cap_mode_t mhc_mode;
+ mlxcx_hca_cap_type_t mhc_type;
+ union {
+ uint8_t mhc_bulk[MLXCX_HCA_CAP_SIZE];
+ mlxcx_hca_cap_general_caps_t mhc_general;
+ mlxcx_hca_cap_eth_caps_t mhc_eth;
+ mlxcx_hca_cap_flow_caps_t mhc_flow;
+ };
+} mlxcx_hca_cap_t;
+
+typedef struct {
+ /* Cooked values */
+ boolean_t mlc_checksum;
+ boolean_t mlc_lso;
+ boolean_t mlc_vxlan;
+ size_t mlc_max_lso_size;
+ size_t mlc_max_rqt_size;
+
+ size_t mlc_max_rx_ft_shift;
+ size_t mlc_max_rx_fe_dest;
+ size_t mlc_max_rx_flows;
+
+ size_t mlc_max_tir;
+
+ /* Raw caps data */
+ mlxcx_hca_cap_t mlc_hca_cur;
+ mlxcx_hca_cap_t mlc_hca_max;
+ mlxcx_hca_cap_t mlc_ether_cur;
+ mlxcx_hca_cap_t mlc_ether_max;
+ mlxcx_hca_cap_t mlc_nic_flow_cur;
+ mlxcx_hca_cap_t mlc_nic_flow_max;
+} mlxcx_caps_t;
+
+typedef struct {
+ uint_t mldp_eq_size_shift;
+ uint_t mldp_cq_size_shift;
+ uint_t mldp_rq_size_shift;
+ uint_t mldp_sq_size_shift;
+ uint_t mldp_cqemod_period_usec;
+ uint_t mldp_cqemod_count;
+ uint_t mldp_intrmod_period_usec;
+ uint_t mldp_rx_ngroups_large;
+ uint_t mldp_rx_ngroups_small;
+ uint_t mldp_rx_nrings_per_large_group;
+ uint_t mldp_rx_nrings_per_small_group;
+ uint_t mldp_tx_ngroups;
+ uint_t mldp_tx_nrings_per_group;
+ uint_t mldp_ftbl_root_size_shift;
+ size_t mldp_tx_bind_threshold;
+ uint_t mldp_ftbl_vlan_size_shift;
+ uint64_t mldp_eq_check_interval_sec;
+ uint64_t mldp_cq_check_interval_sec;
+ uint64_t mldp_wq_check_interval_sec;
+} mlxcx_drv_props_t;
+
+typedef enum {
+ MLXCX_ATTACH_FM = 1 << 0,
+ MLXCX_ATTACH_PCI_CONFIG = 1 << 1,
+ MLXCX_ATTACH_REGS = 1 << 2,
+ MLXCX_ATTACH_CMD = 1 << 3,
+ MLXCX_ATTACH_ENABLE_HCA = 1 << 4,
+ MLXCX_ATTACH_PAGE_LIST = 1 << 5,
+ MLXCX_ATTACH_INIT_HCA = 1 << 6,
+ MLXCX_ATTACH_UAR_PD_TD = 1 << 7,
+ MLXCX_ATTACH_INTRS = 1 << 8,
+ MLXCX_ATTACH_PORTS = 1 << 9,
+ MLXCX_ATTACH_MAC_HDL = 1 << 10,
+ MLXCX_ATTACH_CQS = 1 << 11,
+ MLXCX_ATTACH_WQS = 1 << 12,
+ MLXCX_ATTACH_GROUPS = 1 << 13,
+ MLXCX_ATTACH_BUFS = 1 << 14,
+ MLXCX_ATTACH_CAPS = 1 << 15,
+ MLXCX_ATTACH_CHKTIMERS = 1 << 16,
+} mlxcx_attach_progress_t;
+
+struct mlxcx {
+ /* entry on the mlxcx_glist */
+ list_node_t mlx_gentry;
+
+ dev_info_t *mlx_dip;
+ int mlx_inst;
+ mlxcx_attach_progress_t mlx_attach;
+
+ mlxcx_drv_props_t mlx_props;
+
+ /*
+ * Misc. data
+ */
+ uint16_t mlx_fw_maj;
+ uint16_t mlx_fw_min;
+ uint16_t mlx_fw_rev;
+ uint16_t mlx_cmd_rev;
+
+ /*
+ * Various capabilities of hardware.
+ */
+ mlxcx_caps_t *mlx_caps;
+
+ uint_t mlx_max_sdu;
+ uint_t mlx_sdu;
+
+ /*
+ * FM State
+ */
+ int mlx_fm_caps;
+
+ /*
+ * PCI Data
+ */
+ ddi_acc_handle_t mlx_cfg_handle;
+ ddi_acc_handle_t mlx_regs_handle;
+ caddr_t mlx_regs_base;
+
+ /*
+ * MAC handle
+ */
+ mac_handle_t mlx_mac_hdl;
+
+ /*
+ * Main command queue for issuing general FW control commands.
+ */
+ mlxcx_cmd_queue_t mlx_cmd;
+
+ /*
+ * Interrupts
+ */
+ uint_t mlx_intr_pri;
+ uint_t mlx_intr_type; /* always MSI-X */
+ int mlx_intr_count;
+ size_t mlx_intr_size; /* allocation size */
+ ddi_intr_handle_t *mlx_intr_handles;
+
+ /*
+ * Basic firmware resources which we use for a variety of things.
+ * The UAR is a reference to a page where CQ and EQ doorbells are
+ * located. It also holds all the BlueFlame stuff (which we don't
+ * use).
+ */
+ mlxcx_uar_t mlx_uar;
+ /*
+ * The PD (Protection Domain) and TDOM (Transport Domain) are opaque
+ * entities to us (they're Infiniband constructs we don't actually care
+ * about) -- we just allocate them and shove their ID numbers in
+ * whenever we're asked for one.
+ *
+ * The "reserved" LKEY is what we should put in queue entries that
+ * have references to memory to indicate that they're using linear
+ * addresses (comes from the QUERY_SPECIAL_CONTEXTS cmd).
+ */
+ mlxcx_pd_t mlx_pd;
+ mlxcx_tdom_t mlx_tdom;
+ uint_t mlx_rsvd_lkey;
+
+ /*
+ * Our event queues. These are 1:1 with interrupts.
+ */
+ size_t mlx_eqs_size; /* allocation size */
+ mlxcx_event_queue_t *mlx_eqs;
+
+ /*
+ * Page list. These represent the set of 4k pages we've given to
+ * hardware.
+ *
+ * We can add to this list at the request of hardware from interrupt
+ * context (the PAGE_REQUEST event), so it's protected by pagemtx.
+ */
+ kmutex_t mlx_pagemtx;
+ uint_t mlx_npages;
+ avl_tree_t mlx_pages;
+
+ /*
+ * Port state
+ */
+ uint_t mlx_nports;
+ size_t mlx_ports_size;
+ mlxcx_port_t *mlx_ports;
+
+ /*
+ * Completion queues (CQs). These are also indexed off the
+ * event_queue_ts that they each report to.
+ */
+ list_t mlx_cqs;
+
+ uint_t mlx_next_eq;
+
+ /*
+ * Work queues (WQs).
+ */
+ list_t mlx_wqs;
+
+ /*
+ * Ring groups
+ */
+ size_t mlx_rx_ngroups;
+ size_t mlx_rx_groups_size;
+ mlxcx_ring_group_t *mlx_rx_groups;
+
+ size_t mlx_tx_ngroups;
+ size_t mlx_tx_groups_size;
+ mlxcx_ring_group_t *mlx_tx_groups;
+
+ kmem_cache_t *mlx_bufs_cache;
+ list_t mlx_buf_shards;
+
+ ddi_periodic_t mlx_eq_checktimer;
+ ddi_periodic_t mlx_cq_checktimer;
+ ddi_periodic_t mlx_wq_checktimer;
+};
+
+/*
+ * Register access
+ */
+extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t);
+extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t);
+extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t);
+
+extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t);
+extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t);
+
+extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t);
+extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t);
+
+/*
+ * Logging functions.
+ */
+extern void mlxcx_warn(mlxcx_t *, const char *, ...);
+extern void mlxcx_note(mlxcx_t *, const char *, ...);
+extern void mlxcx_panic(mlxcx_t *, const char *, ...);
+
+extern void mlxcx_fm_ereport(mlxcx_t *, const char *);
+
+extern void mlxcx_check_sq(mlxcx_t *, mlxcx_work_queue_t *);
+extern void mlxcx_check_rq(mlxcx_t *, mlxcx_work_queue_t *);
+
+/*
+ * DMA Functions
+ */
+extern void mlxcx_dma_free(mlxcx_dma_buffer_t *);
+extern boolean_t mlxcx_dma_alloc(mlxcx_t *, mlxcx_dma_buffer_t *,
+ ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, size_t, boolean_t);
+extern boolean_t mlxcx_dma_init(mlxcx_t *, mlxcx_dma_buffer_t *,
+ ddi_dma_attr_t *, boolean_t);
+extern boolean_t mlxcx_dma_bind_mblk(mlxcx_t *, mlxcx_dma_buffer_t *,
+ const mblk_t *, size_t, boolean_t);
+extern boolean_t mlxcx_dma_alloc_offset(mlxcx_t *, mlxcx_dma_buffer_t *,
+ ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t,
+ size_t, size_t, boolean_t);
+extern void mlxcx_dma_unbind(mlxcx_t *, mlxcx_dma_buffer_t *);
+extern void mlxcx_dma_acc_attr(mlxcx_t *, ddi_device_acc_attr_t *);
+extern void mlxcx_dma_page_attr(mlxcx_t *, ddi_dma_attr_t *);
+extern void mlxcx_dma_queue_attr(mlxcx_t *, ddi_dma_attr_t *);
+extern void mlxcx_dma_qdbell_attr(mlxcx_t *, ddi_dma_attr_t *);
+extern void mlxcx_dma_buf_attr(mlxcx_t *, ddi_dma_attr_t *);
+
+extern boolean_t mlxcx_give_pages(mlxcx_t *, int32_t);
+
+static inline const ddi_dma_cookie_t *
+mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t *db,
+ const ddi_dma_cookie_t *prev)
+{
+ ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
+ return (ddi_dma_cookie_iter(db->mxdb_dma_handle, prev));
+}
+
+static inline const ddi_dma_cookie_t *
+mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t *db)
+{
+ ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
+ return (ddi_dma_cookie_one(db->mxdb_dma_handle));
+}
+
+/*
+ * From mlxcx_intr.c
+ */
+extern boolean_t mlxcx_intr_setup(mlxcx_t *);
+extern void mlxcx_intr_teardown(mlxcx_t *);
+extern void mlxcx_arm_eq(mlxcx_t *, mlxcx_event_queue_t *);
+extern void mlxcx_arm_cq(mlxcx_t *, mlxcx_completion_queue_t *);
+
+extern mblk_t *mlxcx_rx_poll(mlxcx_t *, mlxcx_completion_queue_t *, size_t);
+
+/*
+ * From mlxcx_gld.c
+ */
+extern boolean_t mlxcx_register_mac(mlxcx_t *);
+
+/*
+ * From mlxcx_ring.c
+ */
+extern boolean_t mlxcx_cq_alloc_dma(mlxcx_t *, mlxcx_completion_queue_t *);
+extern void mlxcx_cq_rele_dma(mlxcx_t *, mlxcx_completion_queue_t *);
+extern boolean_t mlxcx_wq_alloc_dma(mlxcx_t *, mlxcx_work_queue_t *);
+extern void mlxcx_wq_rele_dma(mlxcx_t *, mlxcx_work_queue_t *);
+
+extern boolean_t mlxcx_buf_create(mlxcx_t *, mlxcx_buf_shard_t *,
+ mlxcx_buffer_t **);
+extern boolean_t mlxcx_buf_create_foreign(mlxcx_t *, mlxcx_buf_shard_t *,
+ mlxcx_buffer_t **);
+extern void mlxcx_buf_take(mlxcx_t *, mlxcx_work_queue_t *, mlxcx_buffer_t **);
+extern size_t mlxcx_buf_take_n(mlxcx_t *, mlxcx_work_queue_t *,
+ mlxcx_buffer_t **, size_t);
+extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
+extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+
+extern boolean_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
+ mblk_t *, size_t, mlxcx_buffer_t **);
+
+extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
+extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
+
+extern boolean_t mlxcx_rx_group_start(mlxcx_t *, mlxcx_ring_group_t *);
+extern boolean_t mlxcx_tx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
+ mlxcx_work_queue_t *);
+extern boolean_t mlxcx_rx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
+ mlxcx_work_queue_t *);
+
+extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
+ mlxcx_buffer_t *);
+extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *,
+ mlxcx_buffer_t **, size_t);
+extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
+ uint8_t *, size_t, uint32_t, mlxcx_buffer_t *);
+extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *);
+extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *);
+
+extern void mlxcx_teardown_groups(mlxcx_t *);
+extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *);
+extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *);
+extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *);
+extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *);
+
+extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
+ mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
+extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
+ mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
+
+extern mlxcx_buf_shard_t *mlxcx_mlbs_create(mlxcx_t *);
+
+/*
+ * Flow mgmt
+ */
+extern boolean_t mlxcx_add_umcast_entry(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_ring_group_t *, const uint8_t *);
+extern boolean_t mlxcx_remove_umcast_entry(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_ring_group_t *, const uint8_t *);
+extern void mlxcx_remove_all_umcast_entries(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_ring_group_t *);
+extern boolean_t mlxcx_setup_flow_group(mlxcx_t *, mlxcx_flow_table_t *,
+ mlxcx_flow_group_t *);
+extern void mlxcx_teardown_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
+
+extern void mlxcx_remove_all_vlan_entries(mlxcx_t *, mlxcx_ring_group_t *);
+extern boolean_t mlxcx_remove_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
+ boolean_t, uint16_t);
+extern boolean_t mlxcx_add_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
+ boolean_t, uint16_t);
+
+/*
+ * Command functions
+ */
+extern boolean_t mlxcx_cmd_queue_init(mlxcx_t *);
+extern void mlxcx_cmd_queue_fini(mlxcx_t *);
+
+extern boolean_t mlxcx_cmd_enable_hca(mlxcx_t *);
+extern boolean_t mlxcx_cmd_disable_hca(mlxcx_t *);
+
+extern boolean_t mlxcx_cmd_query_issi(mlxcx_t *, uint_t *);
+extern boolean_t mlxcx_cmd_set_issi(mlxcx_t *, uint16_t);
+
+extern boolean_t mlxcx_cmd_query_pages(mlxcx_t *, uint_t, int32_t *);
+extern boolean_t mlxcx_cmd_give_pages(mlxcx_t *, uint_t, int32_t,
+ mlxcx_dev_page_t **);
+extern boolean_t mlxcx_cmd_return_pages(mlxcx_t *, int32_t, uint64_t *,
+ int32_t *);
+
+extern boolean_t mlxcx_cmd_query_hca_cap(mlxcx_t *, mlxcx_hca_cap_type_t,
+ mlxcx_hca_cap_mode_t, mlxcx_hca_cap_t *);
+
+extern boolean_t mlxcx_cmd_set_driver_version(mlxcx_t *, const char *);
+
+extern boolean_t mlxcx_cmd_init_hca(mlxcx_t *);
+extern boolean_t mlxcx_cmd_teardown_hca(mlxcx_t *);
+
+extern boolean_t mlxcx_cmd_alloc_uar(mlxcx_t *, mlxcx_uar_t *);
+extern boolean_t mlxcx_cmd_dealloc_uar(mlxcx_t *, mlxcx_uar_t *);
+
+extern boolean_t mlxcx_cmd_alloc_pd(mlxcx_t *, mlxcx_pd_t *);
+extern boolean_t mlxcx_cmd_dealloc_pd(mlxcx_t *, mlxcx_pd_t *);
+
+extern boolean_t mlxcx_cmd_alloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
+extern boolean_t mlxcx_cmd_dealloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
+
+extern boolean_t mlxcx_cmd_create_eq(mlxcx_t *, mlxcx_event_queue_t *);
+extern boolean_t mlxcx_cmd_destroy_eq(mlxcx_t *, mlxcx_event_queue_t *);
+extern boolean_t mlxcx_cmd_query_eq(mlxcx_t *, mlxcx_event_queue_t *,
+ mlxcx_eventq_ctx_t *);
+
+extern boolean_t mlxcx_cmd_create_cq(mlxcx_t *, mlxcx_completion_queue_t *);
+extern boolean_t mlxcx_cmd_destroy_cq(mlxcx_t *, mlxcx_completion_queue_t *);
+extern boolean_t mlxcx_cmd_query_cq(mlxcx_t *, mlxcx_completion_queue_t *,
+ mlxcx_completionq_ctx_t *);
+
+extern boolean_t mlxcx_cmd_create_rq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_start_rq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_stop_rq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_destroy_rq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_query_rq(mlxcx_t *, mlxcx_work_queue_t *,
+ mlxcx_rq_ctx_t *);
+
+extern boolean_t mlxcx_cmd_create_tir(mlxcx_t *, mlxcx_tir_t *);
+extern boolean_t mlxcx_cmd_destroy_tir(mlxcx_t *, mlxcx_tir_t *);
+
+extern boolean_t mlxcx_cmd_create_sq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_start_sq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_stop_sq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_destroy_sq(mlxcx_t *, mlxcx_work_queue_t *);
+extern boolean_t mlxcx_cmd_query_sq(mlxcx_t *, mlxcx_work_queue_t *,
+ mlxcx_sq_ctx_t *);
+
+extern boolean_t mlxcx_cmd_create_tis(mlxcx_t *, mlxcx_tis_t *);
+extern boolean_t mlxcx_cmd_destroy_tis(mlxcx_t *, mlxcx_tis_t *);
+
+extern boolean_t mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_special_ctxs(mlxcx_t *);
+
+extern boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *,
+ mlxcx_modify_nic_vport_ctx_fields_t);
+
+extern boolean_t mlxcx_cmd_create_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
+extern boolean_t mlxcx_cmd_destroy_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
+extern boolean_t mlxcx_cmd_set_flow_table_root(mlxcx_t *, mlxcx_flow_table_t *);
+
+extern boolean_t mlxcx_cmd_create_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
+extern boolean_t mlxcx_cmd_set_flow_table_entry(mlxcx_t *,
+ mlxcx_flow_entry_t *);
+extern boolean_t mlxcx_cmd_delete_flow_table_entry(mlxcx_t *,
+ mlxcx_flow_entry_t *);
+extern boolean_t mlxcx_cmd_destroy_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
+
+extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
+ mlxcx_register_id_t, mlxcx_register_data_t *);
+extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
+extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
+
+extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
+
+extern boolean_t mlxcx_cmd_create_rqt(mlxcx_t *, mlxcx_rqtable_t *);
+extern boolean_t mlxcx_cmd_destroy_rqt(mlxcx_t *, mlxcx_rqtable_t *);
+
+extern boolean_t mlxcx_cmd_set_int_mod(mlxcx_t *, uint_t, uint_t);
+
+extern boolean_t mlxcx_cmd_query_module_status(mlxcx_t *, uint_t,
+ mlxcx_module_status_t *, mlxcx_module_error_type_t *);
+extern boolean_t mlxcx_cmd_set_port_led(mlxcx_t *, mlxcx_port_t *, uint16_t);
+
+/* Comparator for avl_ts */
+extern int mlxcx_cq_compare(const void *, const void *);
+extern int mlxcx_dmac_fe_compare(const void *, const void *);
+extern int mlxcx_grmac_compare(const void *, const void *);
+extern int mlxcx_page_compare(const void *, const void *);
+
+extern void mlxcx_update_link_state(mlxcx_t *, mlxcx_port_t *);
+
+extern void mlxcx_eth_proto_to_string(mlxcx_eth_proto_t, char *, size_t);
+extern const char *mlxcx_port_status_string(mlxcx_port_status_t);
+
+extern const char *mlxcx_event_name(mlxcx_event_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MLXCX_H */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
new file mode 100644
index 0000000000..30fb7ca8ef
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -0,0 +1,3542 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Controls the management of commands that are issues to and from the HCA
+ * command queue.
+ */
+
+#include <mlxcx.h>
+
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+
+/*
+ * When we start up the command queue, it will undergo some internal
+ * initialization after we set the command queue address. These values allow us
+ * to control how much time we should wait for that to occur.
+ */
+clock_t mlxcx_cmd_init_delay = 1000 * 10; /* 10 ms in us */
+uint_t mlxcx_cmd_init_trys = 100; /* Wait at most 1s */
+
+clock_t mlxcx_cmd_delay = 1000 * 1; /* 1 ms in us */
+uint_t mlxcx_cmd_tries = 5000; /* Wait at most 1s */
+
+/*
+ * This macro is used to identify that we care about our own function that we're
+ * communicating with. We always use this function.
+ */
+#define MLXCX_FUNCTION_SELF (to_be16(0))
+
+static const char *
+mlxcx_cmd_response_string(mlxcx_cmd_ret_t ret)
+{
+ switch (ret) {
+ case MLXCX_CMD_R_OK:
+ return ("MLXCX_CMD_R_OK");
+ case MLXCX_CMD_R_INTERNAL_ERR:
+ return ("MLXCX_CMD_R_INTERNAL_ERR");
+ case MLXCX_CMD_R_BAD_OP:
+ return ("MLXCX_CMD_R_BAD_OP");
+ case MLXCX_CMD_R_BAD_PARAM:
+ return ("MLXCX_CMD_R_BAD_PARAM");
+ case MLXCX_CMD_R_BAD_SYS_STATE:
+ return ("MLXCX_CMD_R_BAD_SYS_STATE");
+ case MLXCX_CMD_R_BAD_RESOURCE:
+ return ("MLXCX_CMD_R_BAD_RESOURCE");
+ case MLXCX_CMD_R_RESOURCE_BUSY:
+ return ("MLXCX_CMD_R_RESOURCE_BUSY");
+ case MLXCX_CMD_R_EXCEED_LIM:
+ return ("MLXCX_CMD_R_EXCEED_LIM");
+ case MLXCX_CMD_R_BAD_RES_STATE:
+ return ("MLXCX_CMD_R_BAD_RES_STATE");
+ case MLXCX_CMD_R_BAD_INDEX:
+ return ("MLXCX_CMD_R_BAD_INDEX");
+ case MLXCX_CMD_R_NO_RESOURCES:
+ return ("MLXCX_CMD_R_NO_RESOURCES");
+ case MLXCX_CMD_R_BAD_INPUT_LEN:
+ return ("MLXCX_CMD_R_BAD_INPUT_LEN");
+ case MLXCX_CMD_R_BAD_OUTPUT_LEN:
+ return ("MLXCX_CMD_R_BAD_OUTPUT_LEN");
+ case MLXCX_CMD_R_BAD_RESOURCE_STATE:
+ return ("MLXCX_CMD_R_BAD_RESOURCE_STATE");
+ case MLXCX_CMD_R_BAD_PKT:
+ return ("MLXCX_CMD_R_BAD_PKT");
+ case MLXCX_CMD_R_BAD_SIZE:
+ return ("MLXCX_CMD_R_BAD_SIZE");
+ default:
+ return ("Unknown command");
+ }
+}
+
+static const char *
+mlxcx_cmd_opcode_string(mlxcx_cmd_op_t op)
+{
+ switch (op) {
+ case MLXCX_OP_QUERY_HCA_CAP:
+ return ("MLXCX_OP_QUERY_HCA_CAP");
+ case MLXCX_OP_QUERY_ADAPTER:
+ return ("MLXCX_OP_QUERY_ADAPTER");
+ case MLXCX_OP_INIT_HCA:
+ return ("MLXCX_OP_INIT_HCA");
+ case MLXCX_OP_TEARDOWN_HCA:
+ return ("MLXCX_OP_TEARDOWN_HCA");
+ case MLXCX_OP_ENABLE_HCA:
+ return ("MLXCX_OP_ENABLE_HCA");
+ case MLXCX_OP_DISABLE_HCA:
+ return ("MLXCX_OP_DISABLE_HCA");
+ case MLXCX_OP_QUERY_PAGES:
+ return ("MLXCX_OP_QUERY_PAGES");
+ case MLXCX_OP_MANAGE_PAGES:
+ return ("MLXCX_OP_MANAGE_PAGES");
+ case MLXCX_OP_SET_HCA_CAP:
+ return ("MLXCX_OP_SET_HCA_CAP");
+ case MLXCX_OP_QUERY_ISSI:
+ return ("MLXCX_OP_QUERY_ISSI");
+ case MLXCX_OP_SET_ISSI:
+ return ("MLXCX_OP_SET_ISSI");
+ case MLXCX_OP_SET_DRIVER_VERSION:
+ return ("MLXCX_OP_SET_DRIVER_VERSION");
+ case MLXCX_OP_QUERY_OTHER_HCA_CAP:
+ return ("MLXCX_OP_QUERY_OTHER_HCA_CAP");
+ case MLXCX_OP_MODIFY_OTHER_HCA_CAP:
+ return ("MLXCX_OP_MODIFY_OTHER_HCA_CAP");
+ case MLXCX_OP_SET_TUNNELED_OPERATIONS:
+ return ("MLXCX_OP_SET_TUNNELED_OPERATIONS");
+ case MLXCX_OP_CREATE_MKEY:
+ return ("MLXCX_OP_CREATE_MKEY");
+ case MLXCX_OP_QUERY_MKEY:
+ return ("MLXCX_OP_QUERY_MKEY");
+ case MLXCX_OP_DESTROY_MKEY:
+ return ("MLXCX_OP_DESTROY_MKEY");
+ case MLXCX_OP_QUERY_SPECIAL_CONTEXTS:
+ return ("MLXCX_OP_QUERY_SPECIAL_CONTEXTS");
+ case MLXCX_OP_PAGE_FAULT_RESUME:
+ return ("MLXCX_OP_PAGE_FAULT_RESUME");
+ case MLXCX_OP_CREATE_EQ:
+ return ("MLXCX_OP_CREATE_EQ");
+ case MLXCX_OP_DESTROY_EQ:
+ return ("MLXCX_OP_DESTROY_EQ");
+ case MLXCX_OP_QUERY_EQ:
+ return ("MLXCX_OP_QUERY_EQ");
+ case MLXCX_OP_GEN_EQE:
+ return ("MLXCX_OP_GEN_EQE");
+ case MLXCX_OP_CREATE_CQ:
+ return ("MLXCX_OP_CREATE_CQ");
+ case MLXCX_OP_DESTROY_CQ:
+ return ("MLXCX_OP_DESTROY_CQ");
+ case MLXCX_OP_QUERY_CQ:
+ return ("MLXCX_OP_QUERY_CQ");
+ case MLXCX_OP_MODIFY_CQ:
+ return ("MLXCX_OP_MODIFY_CQ");
+ case MLXCX_OP_CREATE_QP:
+ return ("MLXCX_OP_CREATE_QP");
+ case MLXCX_OP_DESTROY_QP:
+ return ("MLXCX_OP_DESTROY_QP");
+ case MLXCX_OP_RST2INIT_QP:
+ return ("MLXCX_OP_RST2INIT_QP");
+ case MLXCX_OP_INIT2RTR_QP:
+ return ("MLXCX_OP_INIT2RTR_QP");
+ case MLXCX_OP_RTR2RTS_QP:
+ return ("MLXCX_OP_RTR2RTS_QP");
+ case MLXCX_OP_RTS2RTS_QP:
+ return ("MLXCX_OP_RTS2RTS_QP");
+ case MLXCX_OP_SQERR2RTS_QP:
+ return ("MLXCX_OP_SQERR2RTS_QP");
+ case MLXCX_OP__2ERR_QP:
+ return ("MLXCX_OP__2ERR_QP");
+ case MLXCX_OP__2RST_QP:
+ return ("MLXCX_OP__2RST_QP");
+ case MLXCX_OP_QUERY_QP:
+ return ("MLXCX_OP_QUERY_QP");
+ case MLXCX_OP_SQD_RTS_QP:
+ return ("MLXCX_OP_SQD_RTS_QP");
+ case MLXCX_OP_INIT2INIT_QP:
+ return ("MLXCX_OP_INIT2INIT_QP");
+ case MLXCX_OP_CREATE_PSV:
+ return ("MLXCX_OP_CREATE_PSV");
+ case MLXCX_OP_DESTROY_PSV:
+ return ("MLXCX_OP_DESTROY_PSV");
+ case MLXCX_OP_CREATE_SRQ:
+ return ("MLXCX_OP_CREATE_SRQ");
+ case MLXCX_OP_DESTROY_SRQ:
+ return ("MLXCX_OP_DESTROY_SRQ");
+ case MLXCX_OP_QUERY_SRQ:
+ return ("MLXCX_OP_QUERY_SRQ");
+ case MLXCX_OP_ARM_RQ:
+ return ("MLXCX_OP_ARM_RQ");
+ case MLXCX_OP_CREATE_XRC_SRQ:
+ return ("MLXCX_OP_CREATE_XRC_SRQ");
+ case MLXCX_OP_DESTROY_XRC_SRQ:
+ return ("MLXCX_OP_DESTROY_XRC_SRQ");
+ case MLXCX_OP_QUERY_XRC_SRQ:
+ return ("MLXCX_OP_QUERY_XRC_SRQ");
+ case MLXCX_OP_ARM_XRC_SRQ:
+ return ("MLXCX_OP_ARM_XRC_SRQ");
+ case MLXCX_OP_CREATE_DCT:
+ return ("MLXCX_OP_CREATE_DCT");
+ case MLXCX_OP_DESTROY_DCT:
+ return ("MLXCX_OP_DESTROY_DCT");
+ case MLXCX_OP_DRAIN_DCT:
+ return ("MLXCX_OP_DRAIN_DCT");
+ case MLXCX_OP_QUERY_DCT:
+ return ("MLXCX_OP_QUERY_DCT");
+ case MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ return ("MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION");
+ case MLXCX_OP_CREATE_XRQ:
+ return ("MLXCX_OP_CREATE_XRQ");
+ case MLXCX_OP_DESTROY_XRQ:
+ return ("MLXCX_OP_DESTROY_XRQ");
+ case MLXCX_OP_QUERY_XRQ:
+ return ("MLXCX_OP_QUERY_XRQ");
+ case MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER:
+ return ("MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER");
+ case MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER:
+ return ("MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER");
+ case MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER:
+ return ("MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER");
+ case MLXCX_OP_ATTACH_NVMF_NAMESPACE:
+ return ("MLXCX_OP_ATTACH_NVMF_NAMESPACE");
+ case MLXCX_OP_DETACH_NVMF_NAMESPACE:
+ return ("MLXCX_OP_DETACH_NVMF_NAMESPACE");
+ case MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ return ("MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY");
+ case MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ return ("MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY");
+ case MLXCX_OP_QUERY_XRQ_ERROR_PARAMS:
+ return ("MLXCX_OP_QUERY_XRQ_ERROR_PARAMS");
+ case MLXCX_OP_QUERY_VPORT_STATE:
+ return ("MLXCX_OP_QUERY_VPORT_STATE");
+ case MLXCX_OP_MODIFY_VPORT_STATE:
+ return ("MLXCX_OP_MODIFY_VPORT_STATE");
+ case MLXCX_OP_QUERY_ESW_VPORT_CONTEXT:
+ return ("MLXCX_OP_QUERY_ESW_VPORT_CONTEXT");
+ case MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT:
+ return ("MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT");
+ case MLXCX_OP_QUERY_NIC_VPORT_CONTEXT:
+ return ("MLXCX_OP_QUERY_NIC_VPORT_CONTEXT");
+ case MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT:
+ return ("MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT");
+ case MLXCX_OP_QUERY_ROCE_ADDRESS:
+ return ("MLXCX_OP_QUERY_ROCE_ADDRESS");
+ case MLXCX_OP_SET_ROCE_ADDRESS:
+ return ("MLXCX_OP_SET_ROCE_ADDRESS");
+ case MLXCX_OP_QUERY_HCA_VPORT_CONTEXT:
+ return ("MLXCX_OP_QUERY_HCA_VPORT_CONTEXT");
+ case MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT:
+ return ("MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT");
+ case MLXCX_OP_QUERY_HCA_VPORT_GID:
+ return ("MLXCX_OP_QUERY_HCA_VPORT_GID");
+ case MLXCX_OP_QUERY_HCA_VPORT_PKEY:
+ return ("MLXCX_OP_QUERY_HCA_VPORT_PKEY");
+ case MLXCX_OP_QUERY_VPORT_COUNTER:
+ return ("MLXCX_OP_QUERY_VPORT_COUNTER");
+ case MLXCX_OP_ALLOC_Q_COUNTER:
+ return ("MLXCX_OP_ALLOC_Q_COUNTER");
+ case MLXCX_OP_DEALLOC_Q_COUNTER:
+ return ("MLXCX_OP_DEALLOC_Q_COUNTER");
+ case MLXCX_OP_QUERY_Q_COUNTER:
+ return ("MLXCX_OP_QUERY_Q_COUNTER");
+ case MLXCX_OP_SET_PP_RATE_LIMIT:
+ return ("MLXCX_OP_SET_PP_RATE_LIMIT");
+ case MLXCX_OP_QUERY_PP_RATE_LIMIT:
+ return ("MLXCX_OP_QUERY_PP_RATE_LIMIT");
+ case MLXCX_OP_ALLOC_PD:
+ return ("MLXCX_OP_ALLOC_PD");
+ case MLXCX_OP_DEALLOC_PD:
+ return ("MLXCX_OP_DEALLOC_PD");
+ case MLXCX_OP_ALLOC_UAR:
+ return ("MLXCX_OP_ALLOC_UAR");
+ case MLXCX_OP_DEALLOC_UAR:
+ return ("MLXCX_OP_DEALLOC_UAR");
+ case MLXCX_OP_CONFIG_INT_MODERATION:
+ return ("MLXCX_OP_CONFIG_INT_MODERATION");
+ case MLXCX_OP_ACCESS_REG:
+ return ("MLXCX_OP_ACCESS_REG");
+ case MLXCX_OP_ATTACH_TO_MCG:
+ return ("MLXCX_OP_ATTACH_TO_MCG");
+ case MLXCX_OP_DETACH_FROM_MCG:
+ return ("MLXCX_OP_DETACH_FROM_MCG");
+ case MLXCX_OP_MAD_IFC:
+ return ("MLXCX_OP_MAD_IFC");
+ case MLXCX_OP_QUERY_MAD_DEMUX:
+ return ("MLXCX_OP_QUERY_MAD_DEMUX");
+ case MLXCX_OP_SET_MAD_DEMUX:
+ return ("MLXCX_OP_SET_MAD_DEMUX");
+ case MLXCX_OP_NOP:
+ return ("MLXCX_OP_NOP");
+ case MLXCX_OP_ALLOC_XRCD:
+ return ("MLXCX_OP_ALLOC_XRCD");
+ case MLXCX_OP_DEALLOC_XRCD:
+ return ("MLXCX_OP_DEALLOC_XRCD");
+ case MLXCX_OP_ALLOC_TRANSPORT_DOMAIN:
+ return ("MLXCX_OP_ALLOC_TRANSPORT_DOMAIN");
+ case MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN:
+ return ("MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN");
+ case MLXCX_OP_QUERY_CONG_STATUS:
+ return ("MLXCX_OP_QUERY_CONG_STATUS");
+ case MLXCX_OP_MODIFY_CONG_STATUS:
+ return ("MLXCX_OP_MODIFY_CONG_STATUS");
+ case MLXCX_OP_QUERY_CONG_PARAMS:
+ return ("MLXCX_OP_QUERY_CONG_PARAMS");
+ case MLXCX_OP_MODIFY_CONG_PARAMS:
+ return ("MLXCX_OP_MODIFY_CONG_PARAMS");
+ case MLXCX_OP_QUERY_CONG_STATISTICS:
+ return ("MLXCX_OP_QUERY_CONG_STATISTICS");
+ case MLXCX_OP_ADD_VXLAN_UDP_DPORT:
+ return ("MLXCX_OP_ADD_VXLAN_UDP_DPORT");
+ case MLXCX_OP_DELETE_VXLAN_UDP_DPORT:
+ return ("MLXCX_OP_DELETE_VXLAN_UDP_DPORT");
+ case MLXCX_OP_SET_L2_TABLE_ENTRY:
+ return ("MLXCX_OP_SET_L2_TABLE_ENTRY");
+ case MLXCX_OP_QUERY_L2_TABLE_ENTRY:
+ return ("MLXCX_OP_QUERY_L2_TABLE_ENTRY");
+ case MLXCX_OP_DELETE_L2_TABLE_ENTRY:
+ return ("MLXCX_OP_DELETE_L2_TABLE_ENTRY");
+ case MLXCX_OP_SET_WOL_ROL:
+ return ("MLXCX_OP_SET_WOL_ROL");
+ case MLXCX_OP_QUERY_WOL_ROL:
+ return ("MLXCX_OP_QUERY_WOL_ROL");
+ case MLXCX_OP_CREATE_TIR:
+ return ("MLXCX_OP_CREATE_TIR");
+ case MLXCX_OP_MODIFY_TIR:
+ return ("MLXCX_OP_MODIFY_TIR");
+ case MLXCX_OP_DESTROY_TIR:
+ return ("MLXCX_OP_DESTROY_TIR");
+ case MLXCX_OP_QUERY_TIR:
+ return ("MLXCX_OP_QUERY_TIR");
+ case MLXCX_OP_CREATE_SQ:
+ return ("MLXCX_OP_CREATE_SQ");
+ case MLXCX_OP_MODIFY_SQ:
+ return ("MLXCX_OP_MODIFY_SQ");
+ case MLXCX_OP_DESTROY_SQ:
+ return ("MLXCX_OP_DESTROY_SQ");
+ case MLXCX_OP_QUERY_SQ:
+ return ("MLXCX_OP_QUERY_SQ");
+ case MLXCX_OP_CREATE_RQ:
+ return ("MLXCX_OP_CREATE_RQ");
+ case MLXCX_OP_MODIFY_RQ:
+ return ("MLXCX_OP_MODIFY_RQ");
+ case MLXCX_OP_DESTROY_RQ:
+ return ("MLXCX_OP_DESTROY_RQ");
+ case MLXCX_OP_QUERY_RQ:
+ return ("MLXCX_OP_QUERY_RQ");
+ case MLXCX_OP_CREATE_RMP:
+ return ("MLXCX_OP_CREATE_RMP");
+ case MLXCX_OP_MODIFY_RMP:
+ return ("MLXCX_OP_MODIFY_RMP");
+ case MLXCX_OP_DESTROY_RMP:
+ return ("MLXCX_OP_DESTROY_RMP");
+ case MLXCX_OP_QUERY_RMP:
+ return ("MLXCX_OP_QUERY_RMP");
+ case MLXCX_OP_CREATE_TIS:
+ return ("MLXCX_OP_CREATE_TIS");
+ case MLXCX_OP_MODIFY_TIS:
+ return ("MLXCX_OP_MODIFY_TIS");
+ case MLXCX_OP_DESTROY_TIS:
+ return ("MLXCX_OP_DESTROY_TIS");
+ case MLXCX_OP_QUERY_TIS:
+ return ("MLXCX_OP_QUERY_TIS");
+ case MLXCX_OP_CREATE_RQT:
+ return ("MLXCX_OP_CREATE_RQT");
+ case MLXCX_OP_MODIFY_RQT:
+ return ("MLXCX_OP_MODIFY_RQT");
+ case MLXCX_OP_DESTROY_RQT:
+ return ("MLXCX_OP_DESTROY_RQT");
+ case MLXCX_OP_QUERY_RQT:
+ return ("MLXCX_OP_QUERY_RQT");
+ case MLXCX_OP_SET_FLOW_TABLE_ROOT:
+ return ("MLXCX_OP_SET_FLOW_TABLE_ROOT");
+ case MLXCX_OP_CREATE_FLOW_TABLE:
+ return ("MLXCX_OP_CREATE_FLOW_TABLE");
+ case MLXCX_OP_DESTROY_FLOW_TABLE:
+ return ("MLXCX_OP_DESTROY_FLOW_TABLE");
+ case MLXCX_OP_QUERY_FLOW_TABLE:
+ return ("MLXCX_OP_QUERY_FLOW_TABLE");
+ case MLXCX_OP_CREATE_FLOW_GROUP:
+ return ("MLXCX_OP_CREATE_FLOW_GROUP");
+ case MLXCX_OP_DESTROY_FLOW_GROUP:
+ return ("MLXCX_OP_DESTROY_FLOW_GROUP");
+ case MLXCX_OP_QUERY_FLOW_GROUP:
+ return ("MLXCX_OP_QUERY_FLOW_GROUP");
+ case MLXCX_OP_SET_FLOW_TABLE_ENTRY:
+ return ("MLXCX_OP_SET_FLOW_TABLE_ENTRY");
+ case MLXCX_OP_QUERY_FLOW_TABLE_ENTRY:
+ return ("MLXCX_OP_QUERY_FLOW_TABLE_ENTRY");
+ case MLXCX_OP_DELETE_FLOW_TABLE_ENTRY:
+ return ("MLXCX_OP_DELETE_FLOW_TABLE_ENTRY");
+ case MLXCX_OP_ALLOC_FLOW_COUNTER:
+ return ("MLXCX_OP_ALLOC_FLOW_COUNTER");
+ case MLXCX_OP_DEALLOC_FLOW_COUNTER:
+ return ("MLXCX_OP_DEALLOC_FLOW_COUNTER");
+ case MLXCX_OP_QUERY_FLOW_COUNTER:
+ return ("MLXCX_OP_QUERY_FLOW_COUNTER");
+ case MLXCX_OP_MODIFY_FLOW_TABLE:
+ return ("MLXCX_OP_MODIFY_FLOW_TABLE");
+ case MLXCX_OP_ALLOC_ENCAP_HEADER:
+ return ("MLXCX_OP_ALLOC_ENCAP_HEADER");
+ case MLXCX_OP_DEALLOC_ENCAP_HEADER:
+ return ("MLXCX_OP_DEALLOC_ENCAP_HEADER");
+ case MLXCX_OP_QUERY_ENCAP_HEADER:
+ return ("MLXCX_OP_QUERY_ENCAP_HEADER");
+ default:
+ return ("Unknown Opcode");
+ }
+}
+
+const char *
+mlxcx_port_status_string(mlxcx_port_status_t st)
+{
+ switch (st) {
+ case MLXCX_PORT_STATUS_UP:
+ return ("UP");
+ case MLXCX_PORT_STATUS_DOWN:
+ return ("DOWN");
+ case MLXCX_PORT_STATUS_UP_ONCE:
+ return ("UP_ONCE");
+ case MLXCX_PORT_STATUS_DISABLED:
+ return ("DISABLED");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+void
+mlxcx_eth_proto_to_string(mlxcx_eth_proto_t p, char *buf, size_t size)
+{
+ if (p & MLXCX_PROTO_SGMII)
+ (void) strlcat(buf, "SGMII|", size);
+ if (p & MLXCX_PROTO_1000BASE_KX)
+ (void) strlcat(buf, "1000BASE_KX|", size);
+ if (p & MLXCX_PROTO_10GBASE_CX4)
+ (void) strlcat(buf, "10GBASE_CX4|", size);
+ if (p & MLXCX_PROTO_10GBASE_KX4)
+ (void) strlcat(buf, "10GBASE_KX4|", size);
+ if (p & MLXCX_PROTO_10GBASE_KR)
+ (void) strlcat(buf, "10GBASE_KR|", size);
+ if (p & MLXCX_PROTO_40GBASE_CR4)
+ (void) strlcat(buf, "40GBASE_CR4|", size);
+ if (p & MLXCX_PROTO_40GBASE_KR4)
+ (void) strlcat(buf, "40GBASE_KR4|", size);
+ if (p & MLXCX_PROTO_SGMII_100BASE)
+ (void) strlcat(buf, "SGMII_100BASE|", size);
+ if (p & MLXCX_PROTO_10GBASE_CR)
+ (void) strlcat(buf, "10GBASE_CR|", size);
+ if (p & MLXCX_PROTO_10GBASE_SR)
+ (void) strlcat(buf, "10GBASE_SR|", size);
+ if (p & MLXCX_PROTO_10GBASE_ER_LR)
+ (void) strlcat(buf, "10GBASE_ER_LR|", size);
+ if (p & MLXCX_PROTO_40GBASE_SR4)
+ (void) strlcat(buf, "40GBASE_SR4|", size);
+ if (p & MLXCX_PROTO_40GBASE_LR4_ER4)
+ (void) strlcat(buf, "40GBASE_LR4_ER4|", size);
+ if (p & MLXCX_PROTO_50GBASE_SR2)
+ (void) strlcat(buf, "50GBASE_SR2|", size);
+ if (p & MLXCX_PROTO_100GBASE_CR4)
+ (void) strlcat(buf, "100GBASE_CR4|", size);
+ if (p & MLXCX_PROTO_100GBASE_SR4)
+ (void) strlcat(buf, "100GBASE_SR4|", size);
+ if (p & MLXCX_PROTO_100GBASE_KR4)
+ (void) strlcat(buf, "100GBASE_KR4|", size);
+ if (p & MLXCX_PROTO_25GBASE_CR)
+ (void) strlcat(buf, "25GBASE_CR|", size);
+ if (p & MLXCX_PROTO_25GBASE_KR)
+ (void) strlcat(buf, "25GBASE_KR|", size);
+ if (p & MLXCX_PROTO_25GBASE_SR)
+ (void) strlcat(buf, "25GBASE_SR|", size);
+ if (p & MLXCX_PROTO_50GBASE_CR2)
+ (void) strlcat(buf, "50GBASE_CR2|", size);
+ /* Chop off the trailing '|' */
+ if (strlen(buf) > 0)
+ buf[strlen(buf) - 1] = '\0';
+}
+
+void
+mlxcx_cmd_queue_fini(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_queue_t *cmd = &mlxp->mlx_cmd;
+
+ mutex_enter(&cmd->mcmd_lock);
+ VERIFY3S(cmd->mcmd_status, ==, MLXCX_CMD_QUEUE_S_IDLE);
+ mutex_exit(&cmd->mcmd_lock);
+
+ if (cmd->mcmd_tokens != NULL) {
+ id_space_destroy(cmd->mcmd_tokens);
+ cmd->mcmd_tokens = NULL;
+ }
+
+ if (cmd->mcmd_taskq != NULL) {
+ ddi_taskq_destroy(cmd->mcmd_taskq);
+ cmd->mcmd_taskq = NULL;
+ }
+
+ cv_destroy(&cmd->mcmd_cv);
+ mutex_destroy(&cmd->mcmd_lock);
+
+ cmd->mcmd_ent = NULL;
+ mlxcx_dma_free(&cmd->mcmd_dma);
+}
+
+boolean_t
+mlxcx_cmd_queue_init(mlxcx_t *mlxp)
+{
+ uint32_t tmp, cmd_low, cmd_high, i;
+ mlxcx_cmd_queue_t *cmd = &mlxp->mlx_cmd;
+ char buf[64];
+ const ddi_dma_cookie_t *ck;
+
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+
+ tmp = mlxcx_get32(mlxp, MLXCX_ISS_FIRMWARE);
+ mlxp->mlx_fw_maj = MLXCX_ISS_FW_MAJOR(tmp);
+ mlxp->mlx_fw_min = MLXCX_ISS_FW_MINOR(tmp);
+
+ tmp = mlxcx_get32(mlxp, MLXCX_ISS_FW_CMD);
+ mlxp->mlx_fw_rev = MLXCX_ISS_FW_REV(tmp);
+ mlxp->mlx_cmd_rev = MLXCX_ISS_CMD_REV(tmp);
+
+ if (mlxp->mlx_cmd_rev != MLXCX_CMD_REVISION) {
+ mlxcx_warn(mlxp, "found unsupported command revision: %u, "
+ "expected %u", mlxp->mlx_cmd_rev, MLXCX_CMD_REVISION);
+ return (B_FALSE);
+ }
+
+ cmd_low = mlxcx_get32(mlxp, MLXCX_ISS_CMD_LOW);
+ cmd->mcmd_size_l2 = MLXCX_ISS_CMDQ_SIZE(cmd_low);
+ cmd->mcmd_stride_l2 = MLXCX_ISS_CMDQ_STRIDE(cmd_low);
+
+ mutex_init(&cmd->mcmd_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&cmd->mcmd_cv, NULL, CV_DRIVER, NULL);
+ cmd->mcmd_status = MLXCX_CMD_QUEUE_S_IDLE;
+
+ (void) snprintf(buf, sizeof (buf), "mlxcx_tokens_%d", mlxp->mlx_inst);
+ if ((cmd->mcmd_tokens = id_space_create(buf, 1, UINT8_MAX)) == NULL) {
+ mlxcx_warn(mlxp, "failed to allocate token id space");
+ mlxcx_cmd_queue_fini(mlxp);
+ return (B_FALSE);
+ }
+
+ (void) snprintf(buf, sizeof (buf), "mlxcx_cmdq_%d", mlxp->mlx_inst);
+ if ((cmd->mcmd_taskq = ddi_taskq_create(mlxp->mlx_dip, buf, 1,
+ TASKQ_DEFAULTPRI, 0)) == NULL) {
+ mlxcx_warn(mlxp, "failed to create command queue task queue");
+ mlxcx_cmd_queue_fini(mlxp);
+ return (B_FALSE);
+ }
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_page_attr(mlxp, &attr);
+
+ if (!mlxcx_dma_alloc(mlxp, &cmd->mcmd_dma, &attr, &acc, B_TRUE,
+ MLXCX_CMD_DMA_PAGE_SIZE, B_TRUE)) {
+ mlxcx_warn(mlxp, "failed to allocate command dma buffer");
+ mlxcx_cmd_queue_fini(mlxp);
+ return (B_FALSE);
+ }
+
+ ck = mlxcx_dma_cookie_one(&cmd->mcmd_dma);
+ cmd_high = (uint32_t)(ck->dmac_laddress >> 32);
+ cmd_low = (uint32_t)(ck->dmac_laddress & UINT32_MAX);
+
+ mlxcx_put32(mlxp, MLXCX_ISS_CMD_HIGH, cmd_high);
+ mlxcx_put32(mlxp, MLXCX_ISS_CMD_LOW, cmd_low);
+
+ /*
+ * Before this is ready, the initializing bit must become zero.
+ */
+ for (i = 0; i < mlxcx_cmd_init_trys; i++) {
+ uint32_t init = mlxcx_get32(mlxp, MLXCX_ISS_INIT);
+
+ if (MLXCX_ISS_INITIALIZING(init) == 0)
+ break;
+ delay(drv_usectohz(mlxcx_cmd_init_delay));
+ }
+ if (i == mlxcx_cmd_init_trys) {
+ mlxcx_warn(mlxp, "timed out initializing command queue");
+ mlxcx_cmd_queue_fini(mlxp);
+ return (B_FALSE);
+ }
+
+ cmd->mcmd_ent = (void *)cmd->mcmd_dma.mxdb_va;
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_cmd_in_header_init(mlxcx_cmd_t *cmd, mlxcx_cmd_in_t *in,
+ mlxcx_cmd_op_t op, uint16_t mod)
+{
+ ASSERT3U(op, <=, UINT16_MAX);
+ in->mci_opcode = to_be16(op);
+ in->mci_op_mod = to_be16(mod);
+ cmd->mlcmd_op = op;
+}
+
+static boolean_t
+mlxcx_cmd_mbox_alloc(mlxcx_t *mlxp, list_t *listp, uint8_t nblocks)
+{
+ uint8_t i;
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_page_attr(mlxp, &attr);
+
+ for (i = 0; i < nblocks; i++) {
+ mlxcx_cmd_mbox_t *mbox;
+
+ mbox = kmem_zalloc(sizeof (*mbox), KM_SLEEP);
+ if (!mlxcx_dma_alloc(mlxp, &mbox->mlbox_dma, &attr, &acc,
+ B_TRUE, sizeof (mlxcx_cmd_mailbox_t), B_TRUE)) {
+ mlxcx_warn(mlxp, "failed to allocate mailbox dma "
+ "buffer");
+ kmem_free(mbox, sizeof (*mbox));
+ /*
+ * mlxcx_cmd_fini will clean up any mboxes that we
+ * already placed onto listp.
+ */
+ return (B_FALSE);
+ }
+ mbox->mlbox_data = (void *)mbox->mlbox_dma.mxdb_va;
+ list_insert_tail(listp, mbox);
+ }
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_cmd_mbox_free(mlxcx_cmd_mbox_t *mbox)
+{
+ mlxcx_dma_free(&mbox->mlbox_dma);
+ kmem_free(mbox, sizeof (mlxcx_cmd_mbox_t));
+}
+
+static void
+mlxcx_cmd_fini(mlxcx_t *mlxp, mlxcx_cmd_t *cmd)
+{
+ mlxcx_cmd_mbox_t *mbox;
+
+ while ((mbox = list_remove_head(&cmd->mlcmd_mbox_out)) != NULL) {
+ mlxcx_cmd_mbox_free(mbox);
+ }
+ list_destroy(&cmd->mlcmd_mbox_out);
+ while ((mbox = list_remove_head(&cmd->mlcmd_mbox_in)) != NULL) {
+ mlxcx_cmd_mbox_free(mbox);
+ }
+ list_destroy(&cmd->mlcmd_mbox_in);
+ id_free(mlxp->mlx_cmd.mcmd_tokens, cmd->mlcmd_token);
+ cv_destroy(&cmd->mlcmd_cv);
+ mutex_destroy(&cmd->mlcmd_lock);
+}
+
+static void
+mlxcx_cmd_init(mlxcx_t *mlxp, mlxcx_cmd_t *cmd)
+{
+ bzero(cmd, sizeof (*cmd));
+ mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&cmd->mlcmd_cv, NULL, CV_DRIVER, NULL);
+ cmd->mlcmd_token = id_alloc(mlxp->mlx_cmd.mcmd_tokens);
+ list_create(&cmd->mlcmd_mbox_in, sizeof (mlxcx_cmd_mbox_t),
+ offsetof(mlxcx_cmd_mbox_t, mlbox_node));
+ list_create(&cmd->mlcmd_mbox_out, sizeof (mlxcx_cmd_mbox_t),
+ offsetof(mlxcx_cmd_mbox_t, mlbox_node));
+}
+
+static void
+mlxcx_cmd_prep_input(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd)
+{
+ uint32_t rem = cmd->mlcmd_inlen;
+ uint8_t i;
+ const void *in = cmd->mlcmd_in;
+ uint32_t copy;
+ mlxcx_cmd_mbox_t *mbox;
+ const ddi_dma_cookie_t *ck;
+
+ copy = MIN(MLXCX_CMD_INLINE_INPUT_LEN, rem);
+ bcopy(in, ent->mce_input, copy);
+
+ rem -= copy;
+ in += copy;
+
+ if (rem == 0) {
+ ent->mce_in_mbox = to_be64(0);
+ VERIFY3U(cmd->mlcmd_nboxes_in, ==, 0);
+ return;
+ }
+
+ mbox = list_head(&cmd->mlcmd_mbox_in);
+ ck = mlxcx_dma_cookie_one(&mbox->mlbox_dma);
+ ent->mce_in_mbox = to_be64(ck->dmac_laddress);
+ for (i = 0; mbox != NULL;
+ mbox = list_next(&cmd->mlcmd_mbox_in, mbox), i++) {
+ mlxcx_cmd_mbox_t *next;
+ mlxcx_cmd_mailbox_t *mp = mbox->mlbox_data;
+
+ copy = MIN(MLXCX_CMD_MAILBOX_LEN, rem);
+ bcopy(in, mp->mlxb_data, copy);
+ rem -= copy;
+ in += copy;
+
+ mp->mlxb_token = cmd->mlcmd_token;
+ mp->mlxb_blockno = to_be32(i);
+
+ next = list_next(&cmd->mlcmd_mbox_in, mbox);
+ if (next == NULL) {
+ mp->mlxb_nextp = to_be64(0);
+ } else {
+ ck = mlxcx_dma_cookie_one(&next->mlbox_dma);
+ mp->mlxb_nextp = to_be64(ck->dmac_laddress);
+ }
+ MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORDEV);
+ }
+ VERIFY3U(i, ==, cmd->mlcmd_nboxes_in);
+ VERIFY0(rem);
+}
+
+static void
+mlxcx_cmd_prep_output(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd)
+{
+ uint8_t i;
+ mlxcx_cmd_mbox_t *mbox;
+ const ddi_dma_cookie_t *ck;
+
+ if (cmd->mlcmd_nboxes_out == 0) {
+ ent->mce_out_mbox = to_be64(0);
+ return;
+ }
+
+ mbox = list_head(&cmd->mlcmd_mbox_out);
+ ck = mlxcx_dma_cookie_one(&mbox->mlbox_dma);
+ ent->mce_out_mbox = to_be64(ck->dmac_laddress);
+ for (i = 0, mbox = list_head(&cmd->mlcmd_mbox_out); mbox != NULL;
+ mbox = list_next(&cmd->mlcmd_mbox_out, mbox), i++) {
+ mlxcx_cmd_mbox_t *next;
+ mlxcx_cmd_mailbox_t *mp = mbox->mlbox_data;
+
+ mp->mlxb_token = cmd->mlcmd_token;
+ mp->mlxb_blockno = to_be32(i);
+
+ next = list_next(&cmd->mlcmd_mbox_out, mbox);
+ if (next == NULL) {
+ mp->mlxb_nextp = to_be64(0);
+ } else {
+ ck = mlxcx_dma_cookie_one(&next->mlbox_dma);
+ mp->mlxb_nextp = to_be64(ck->dmac_laddress);
+ }
+ MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORDEV);
+ }
+ VERIFY3U(i, ==, cmd->mlcmd_nboxes_out);
+}
+
+static void
+mlxcx_cmd_copy_output(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd)
+{
+ void *out = cmd->mlcmd_out;
+ uint32_t rem = cmd->mlcmd_outlen;
+ uint32_t copy;
+ mlxcx_cmd_mbox_t *mbox;
+
+ copy = MIN(rem, MLXCX_CMD_INLINE_OUTPUT_LEN);
+ bcopy(ent->mce_output, out, copy);
+ out += copy;
+ rem -= copy;
+
+ if (rem == 0) {
+ VERIFY0(cmd->mlcmd_nboxes_out);
+ return;
+ }
+
+ for (mbox = list_head(&cmd->mlcmd_mbox_out); mbox != NULL;
+ mbox = list_next(&cmd->mlcmd_mbox_out, mbox)) {
+ MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORKERNEL);
+ copy = MIN(MLXCX_CMD_MAILBOX_LEN, rem);
+ bcopy(mbox->mlbox_data->mlxb_data, out, copy);
+ out += copy;
+ rem -= copy;
+ }
+ VERIFY0(rem);
+}
+
+static void
+mlxcx_cmd_taskq(void *arg)
+{
+ mlxcx_cmd_t *cmd = arg;
+ mlxcx_t *mlxp = cmd->mlcmd_mlxp;
+ mlxcx_cmd_queue_t *cmdq = &mlxp->mlx_cmd;
+ mlxcx_cmd_ent_t *ent;
+ uint_t poll;
+
+ ASSERT3S(cmd->mlcmd_op, !=, 0);
+
+ mutex_enter(&cmdq->mcmd_lock);
+ while (cmdq->mcmd_status == MLXCX_CMD_QUEUE_S_BUSY) {
+ cv_wait(&cmdq->mcmd_cv, &cmdq->mcmd_lock);
+ }
+
+ if (cmdq->mcmd_status != MLXCX_CMD_QUEUE_S_IDLE) {
+ mutex_exit(&cmdq->mcmd_lock);
+
+ mutex_enter(&cmd->mlcmd_lock);
+ cmd->mlcmd_state = MLXCX_CMD_S_ERROR;
+ cv_broadcast(&cmd->mlcmd_cv);
+ mutex_exit(&cmd->mlcmd_lock);
+ return;
+ }
+
+ cmdq->mcmd_status = MLXCX_CMD_QUEUE_S_BUSY;
+ ent = cmdq->mcmd_ent;
+ mutex_exit(&cmdq->mcmd_lock);
+
+ /*
+ * Command queue is currently ours as we set busy.
+ */
+ bzero(ent, sizeof (*ent));
+ ent->mce_type = MLXCX_CMD_TRANSPORT_PCI;
+ ent->mce_in_length = to_be32(cmd->mlcmd_inlen);
+ ent->mce_out_length = to_be32(cmd->mlcmd_outlen);
+ ent->mce_token = cmd->mlcmd_token;
+ ent->mce_sig = 0;
+ ent->mce_status = MLXCX_CMD_HW_OWNED;
+ mlxcx_cmd_prep_input(ent, cmd);
+ mlxcx_cmd_prep_output(ent, cmd);
+ MLXCX_DMA_SYNC(cmdq->mcmd_dma, DDI_DMA_SYNC_FORDEV);
+
+ /* This assumes we only ever use the first command */
+ mlxcx_put32(mlxp, MLXCX_ISS_CMD_DOORBELL, 1);
+
+ for (poll = 0; poll < mlxcx_cmd_tries; poll++) {
+ delay(drv_usectohz(mlxcx_cmd_delay));
+ MLXCX_DMA_SYNC(cmdq->mcmd_dma, DDI_DMA_SYNC_FORKERNEL);
+ if ((ent->mce_status & MLXCX_CMD_HW_OWNED) == 0)
+ break;
+ }
+
+ /*
+ * Command is done (or timed out). Save relevant data. Once we broadcast
+ * on the CV and drop the lock, we must not touch the cmd again.
+ */
+ mutex_enter(&cmd->mlcmd_lock);
+
+ if (poll == mlxcx_cmd_tries) {
+ cmd->mlcmd_status = MLXCX_CMD_R_TIMEOUT;
+ cmd->mlcmd_state = MLXCX_CMD_S_ERROR;
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_NO_RESPONSE);
+ } else {
+ cmd->mlcmd_status = MLXCX_CMD_STATUS(ent->mce_status);
+ cmd->mlcmd_state = MLXCX_CMD_S_DONE;
+ if (cmd->mlcmd_status == 0) {
+ mlxcx_cmd_copy_output(ent, cmd);
+ }
+ }
+ cv_broadcast(&cmd->mlcmd_cv);
+ mutex_exit(&cmd->mlcmd_lock);
+
+ mutex_enter(&cmdq->mcmd_lock);
+ cmdq->mcmd_status = MLXCX_CMD_QUEUE_S_IDLE;
+ cv_broadcast(&cmdq->mcmd_cv);
+ mutex_exit(&cmdq->mcmd_lock);
+}
+
+static boolean_t
+mlxcx_cmd_send(mlxcx_t *mlxp, mlxcx_cmd_t *cmd, const void *in, uint32_t inlen,
+ void *out, uint32_t outlen)
+{
+ if (inlen > MLXCX_CMD_INLINE_INPUT_LEN) {
+ uint32_t need = inlen - MLXCX_CMD_INLINE_INPUT_LEN;
+ uint8_t nblocks;
+
+ if (need / MLXCX_CMD_MAILBOX_LEN + 1 > UINT8_MAX) {
+ mlxcx_warn(mlxp, "requested too many input blocks for "
+ "%u byte input len", inlen);
+ return (B_FALSE);
+ }
+
+ nblocks = need / MLXCX_CMD_MAILBOX_LEN + 1;
+ if (!mlxcx_cmd_mbox_alloc(mlxp, &cmd->mlcmd_mbox_in, nblocks)) {
+ mlxcx_warn(mlxp, "failed to allocate %u blocks of "
+ "input mailbox", nblocks);
+ return (B_FALSE);
+ }
+ cmd->mlcmd_nboxes_in = nblocks;
+ }
+
+ if (outlen > MLXCX_CMD_INLINE_OUTPUT_LEN) {
+ uint32_t need = outlen - MLXCX_CMD_INLINE_OUTPUT_LEN;
+ uint8_t nblocks;
+
+ if (need / MLXCX_CMD_MAILBOX_LEN + 1 > UINT8_MAX) {
+ mlxcx_warn(mlxp, "requested too many output blocks for "
+ "%u byte output len", outlen);
+ return (B_FALSE);
+ }
+
+ nblocks = need / MLXCX_CMD_MAILBOX_LEN + 1;
+ if (!mlxcx_cmd_mbox_alloc(mlxp, &cmd->mlcmd_mbox_out,
+ nblocks)) {
+ mlxcx_warn(mlxp, "failed to allocate %u blocks of "
+ "output mailbox", nblocks);
+ return (B_FALSE);
+ }
+ cmd->mlcmd_nboxes_out = nblocks;
+ }
+
+ cmd->mlcmd_in = in;
+ cmd->mlcmd_inlen = inlen;
+ cmd->mlcmd_out = out;
+ cmd->mlcmd_outlen = outlen;
+ cmd->mlcmd_mlxp = mlxp;
+
+ /*
+ * Now that all allocations have been done, all that remains is for us
+ * to dispatch the request to process this to the taskq for it to be
+ * processed.
+ */
+ if (ddi_taskq_dispatch(mlxp->mlx_cmd.mcmd_taskq, mlxcx_cmd_taskq, cmd,
+ DDI_SLEEP) != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "failed to submit command to taskq");
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_cmd_wait(mlxcx_cmd_t *cmd)
+{
+ mutex_enter(&cmd->mlcmd_lock);
+ while (cmd->mlcmd_state == 0) {
+ cv_wait(&cmd->mlcmd_cv, &cmd->mlcmd_lock);
+ }
+ mutex_exit(&cmd->mlcmd_lock);
+}
+
+static boolean_t
+mlxcx_cmd_evaluate(mlxcx_t *mlxp, mlxcx_cmd_t *cmd)
+{
+ mlxcx_cmd_out_t *out;
+
+ if ((cmd->mlcmd_state & MLXCX_CMD_S_ERROR) != 0) {
+ mlxcx_warn(mlxp, "command %s (0x%x) failed due to an internal "
+ "driver error",
+ mlxcx_cmd_opcode_string(cmd->mlcmd_op),
+ cmd->mlcmd_op);
+ return (B_FALSE);
+ }
+
+ if (cmd->mlcmd_status != 0) {
+ mlxcx_warn(mlxp, "command %s (0x%x) failed with command queue "
+ "error 0x%x",
+ mlxcx_cmd_opcode_string(cmd->mlcmd_op),
+ cmd->mlcmd_op, cmd->mlcmd_status);
+ return (B_FALSE);
+ }
+
+ out = cmd->mlcmd_out;
+ if (out->mco_status != MLXCX_CMD_R_OK) {
+ mlxcx_warn(mlxp, "command %s 0x%x failed with status code %s "
+ "(0x%x)", mlxcx_cmd_opcode_string(cmd->mlcmd_op),
+ cmd->mlcmd_op, mlxcx_cmd_response_string(out->mco_status),
+ out->mco_status);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_cmd_disable_hca(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_disable_hca_in_t in;
+ mlxcx_cmd_disable_hca_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_disable_hca_head,
+ MLXCX_OP_DISABLE_HCA, 0);
+ in.mlxi_disable_hca_func = MLXCX_FUNCTION_SELF;
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_enable_hca(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_enable_hca_in_t in;
+ mlxcx_cmd_enable_hca_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_enable_hca_head,
+ MLXCX_OP_ENABLE_HCA, 0);
+ in.mlxi_enable_hca_func = MLXCX_FUNCTION_SELF;
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_issi(mlxcx_t *mlxp, uint32_t *issip)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_issi_in_t in;
+ mlxcx_cmd_query_issi_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_issi_head,
+ MLXCX_OP_QUERY_ISSI, 0);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ *issip = out.mlxo_supported_issi;
+ } else if (cmd.mlcmd_status == 0 &&
+ out.mlxo_query_issi_head.mco_status == MLXCX_CMD_R_BAD_OP) {
+ /*
+ * The PRM says that if we get a bad operation, that means this
+ * command isn't supported so it only supports version 1 of the
+ * ISSI, which means bit zero should be set.
+ */
+ ret = B_TRUE;
+ *issip = 1;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_issi(mlxcx_t *mlxp, uint16_t issi)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_set_issi_in_t in;
+ mlxcx_cmd_set_issi_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_issi_head,
+ MLXCX_OP_SET_ISSI, 0);
+ in.mlxi_set_issi_current = to_be16(issi);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_pages(mlxcx_t *mlxp, uint_t type, int32_t *npages)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_pages_in_t in;
+ mlxcx_cmd_query_pages_out_t out;
+ boolean_t ret;
+
+ switch (type) {
+ case MLXCX_QUERY_PAGES_OPMOD_BOOT:
+ case MLXCX_QUERY_PAGES_OPMOD_INIT:
+ case MLXCX_QUERY_PAGES_OPMOD_REGULAR:
+ break;
+ default:
+ mlxcx_warn(mlxp, "!passed invalid type to query pages: %u",
+ type);
+ return (B_FALSE);
+ }
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_pages_head,
+ MLXCX_OP_QUERY_PAGES, type);
+ in.mlxi_query_pages_func = MLXCX_FUNCTION_SELF;
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ *npages = from_be32(out.mlxo_query_pages_npages);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_give_pages(mlxcx_t *mlxp, uint_t type, int32_t npages,
+ mlxcx_dev_page_t **pages)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_manage_pages_in_t in;
+ mlxcx_cmd_manage_pages_out_t out;
+ size_t insize, outsize;
+ boolean_t ret;
+ uint32_t i;
+ uint64_t pa;
+ const ddi_dma_cookie_t *ck;
+
+ switch (type) {
+ case MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL:
+ if (npages != 0) {
+ mlxcx_warn(mlxp, "passed non-zero number of pages (%d) "
+ "but asked to fail page allocation", npages);
+ return (B_FALSE);
+ }
+ break;
+ case MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES:
+ if (npages <= 0 || npages > MLXCX_MANAGE_PAGES_MAX_PAGES) {
+ mlxcx_warn(mlxp, "passed invalid number of pages (%d) "
+ "to give pages", npages);
+ return (B_FALSE);
+ }
+ break;
+ default:
+ mlxcx_warn(mlxp, "!passed invalid type to give pages: %u",
+ type);
+ return (B_FALSE);
+ }
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+ insize = offsetof(mlxcx_cmd_manage_pages_in_t, mlxi_manage_pages_pas) +
+ npages * sizeof (uint64_t);
+ outsize = offsetof(mlxcx_cmd_manage_pages_out_t, mlxo_manage_pages_pas);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_manage_pages_head,
+ MLXCX_OP_MANAGE_PAGES, type);
+ in.mlxi_manage_pages_func = MLXCX_FUNCTION_SELF;
+ in.mlxi_manage_pages_npages = to_be32(npages);
+ for (i = 0; i < npages; i++) {
+ ck = mlxcx_dma_cookie_one(&pages[i]->mxdp_dma);
+ pa = ck->dmac_laddress;
+ ASSERT3U(pa & 0xfff, ==, 0);
+ ASSERT3U(ck->dmac_size, ==, MLXCX_HW_PAGE_SIZE);
+ in.mlxi_manage_pages_pas[i] = to_be64(pa);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_return_pages(mlxcx_t *mlxp, int32_t nreq, uint64_t *pas,
+ int32_t *nret)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_manage_pages_in_t in;
+ mlxcx_cmd_manage_pages_out_t out;
+ size_t insize, outsize;
+ boolean_t ret;
+ uint32_t i;
+
+ if (nreq <= 0) {
+ mlxcx_warn(mlxp, "passed invalid number of pages (%d) "
+ "to return pages", nreq);
+ return (B_FALSE);
+ }
+ VERIFY3S(nreq, <=, MLXCX_MANAGE_PAGES_MAX_PAGES);
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+ insize = offsetof(mlxcx_cmd_manage_pages_in_t, mlxi_manage_pages_pas);
+ outsize = offsetof(mlxcx_cmd_manage_pages_out_t,
+ mlxo_manage_pages_pas) + nreq * sizeof (uint64_t);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_manage_pages_head,
+ MLXCX_OP_MANAGE_PAGES, MLXCX_MANAGE_PAGES_OPMOD_RETURN_PAGES);
+ in.mlxi_manage_pages_func = MLXCX_FUNCTION_SELF;
+ in.mlxi_manage_pages_npages = to_be32(nreq);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ *nret = from_be32(out.mlxo_manage_pages_npages);
+ for (i = 0; i < *nret; i++) {
+ pas[i] = from_be64(out.mlxo_manage_pages_pas[i]);
+ }
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_hca_cap(mlxcx_t *mlxp, mlxcx_hca_cap_type_t type,
+ mlxcx_hca_cap_mode_t mode, mlxcx_hca_cap_t *capp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_hca_cap_in_t in;
+ mlxcx_cmd_query_hca_cap_out_t *out;
+ boolean_t ret;
+ uint16_t opmode;
+
+ bzero(&in, sizeof (in));
+ out = kmem_zalloc(sizeof (mlxcx_cmd_query_hca_cap_out_t), KM_SLEEP);
+ mlxcx_cmd_init(mlxp, &cmd);
+
+ opmode = type << 1 | mode;
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_hca_cap_head,
+ MLXCX_OP_QUERY_HCA_CAP, opmode);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), out, sizeof (*out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ kmem_free(out, sizeof (mlxcx_cmd_query_hca_cap_out_t));
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ capp->mhc_mode = mode;
+ capp->mhc_type = type;
+ ASSERT3U(sizeof (out->mlxo_query_hca_cap_data), ==,
+ sizeof (capp->mhc_bulk));
+ bcopy(out->mlxo_query_hca_cap_data, capp->mhc_bulk,
+ sizeof (capp->mhc_bulk));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+
+ kmem_free(out, sizeof (mlxcx_cmd_query_hca_cap_out_t));
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_cmd_init_hca(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_init_hca_in_t in;
+ mlxcx_cmd_init_hca_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_init_hca_head,
+ MLXCX_OP_INIT_HCA, 0);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_driver_version(mlxcx_t *mlxp, const char *version)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_set_driver_version_in_t in;
+ mlxcx_cmd_set_driver_version_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_driver_version_head,
+ MLXCX_OP_SET_DRIVER_VERSION, 0);
+ VERIFY3U(strlcpy(in.mlxi_set_driver_version_version, version,
+ sizeof (in.mlxi_set_driver_version_version)), <=,
+ sizeof (in.mlxi_set_driver_version_version));
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_alloc_uar(mlxcx_t *mlxp, mlxcx_uar_t *mlup)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_alloc_uar_in_t in;
+ mlxcx_cmd_alloc_uar_out_t out;
+ boolean_t ret;
+ size_t i;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_uar_head,
+ MLXCX_OP_ALLOC_UAR, 0);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlup->mlu_allocated = B_TRUE;
+ mlup->mlu_num = from_be24(out.mlxo_alloc_uar_uar);
+ VERIFY3U(mlup->mlu_num, >, 0);
+ mlup->mlu_base = mlup->mlu_num * MLXCX_HW_PAGE_SIZE;
+
+ for (i = 0; i < MLXCX_BF_PER_UAR; ++i) {
+ mlup->mlu_bf[i].mbf_even = mlup->mlu_base +
+ MLXCX_BF_BASE + MLXCX_BF_SIZE * 2 * i;
+ mlup->mlu_bf[i].mbf_odd = mlup->mlu_bf[i].mbf_even +
+ MLXCX_BF_SIZE;
+ }
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_dealloc_uar(mlxcx_t *mlxp, mlxcx_uar_t *mlup)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_dealloc_uar_in_t in;
+ mlxcx_cmd_dealloc_uar_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_uar_head,
+ MLXCX_OP_DEALLOC_UAR, 0);
+ VERIFY(mlup->mlu_allocated);
+ in.mlxi_dealloc_uar_uar = to_be24(mlup->mlu_num);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlup->mlu_allocated = B_FALSE;
+ mlup->mlu_num = 0;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_alloc_pd(mlxcx_t *mlxp, mlxcx_pd_t *mlpd)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_alloc_pd_in_t in;
+ mlxcx_cmd_alloc_pd_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_pd_head,
+ MLXCX_OP_ALLOC_PD, 0);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlpd->mlpd_allocated = B_TRUE;
+ mlpd->mlpd_num = from_be24(out.mlxo_alloc_pd_pdn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_dealloc_pd(mlxcx_t *mlxp, mlxcx_pd_t *mlpd)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_dealloc_pd_in_t in;
+ mlxcx_cmd_dealloc_pd_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_pd_head,
+ MLXCX_OP_DEALLOC_PD, 0);
+ VERIFY(mlpd->mlpd_allocated);
+ in.mlxi_dealloc_pd_pdn = to_be24(mlpd->mlpd_num);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlpd->mlpd_allocated = B_FALSE;
+ mlpd->mlpd_num = 0;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_alloc_tdom(mlxcx_t *mlxp, mlxcx_tdom_t *mltd)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_alloc_tdom_in_t in;
+ mlxcx_cmd_alloc_tdom_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_tdom_head,
+ MLXCX_OP_ALLOC_TRANSPORT_DOMAIN, 0);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltd->mltd_allocated = B_TRUE;
+ mltd->mltd_num = from_be24(out.mlxo_alloc_tdom_tdomn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_dealloc_tdom(mlxcx_t *mlxp, mlxcx_tdom_t *mltd)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_dealloc_tdom_in_t in;
+ mlxcx_cmd_dealloc_tdom_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_tdom_head,
+ MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN, 0);
+ VERIFY(mltd->mltd_allocated);
+ in.mlxi_dealloc_tdom_tdomn = to_be24(mltd->mltd_num);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltd->mltd_allocated = B_FALSE;
+ mltd->mltd_num = 0;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_teardown_hca(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_teardown_hca_in_t in;
+ mlxcx_cmd_teardown_hca_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_teardown_hca_head,
+ MLXCX_OP_TEARDOWN_HCA, 0);
+ in.mlxi_teardown_hca_profile = to_be16(MLXCX_TEARDOWN_HCA_GRACEFUL);
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_nic_vport_ctx_in_t in;
+ mlxcx_cmd_query_nic_vport_ctx_out_t out;
+ boolean_t ret;
+ const mlxcx_nic_vport_ctx_t *ctx;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_nic_vport_ctx_head,
+ MLXCX_OP_QUERY_NIC_VPORT_CONTEXT, MLXCX_VPORT_TYPE_VNIC);
+
+ in.mlxi_query_nic_vport_ctx_vport_number = to_be16(mlp->mlp_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ ctx = &out.mlxo_query_nic_vport_ctx_context;
+ mlp->mlp_guid = from_be64(ctx->mlnvc_port_guid);
+ mlp->mlp_mtu = from_be16(ctx->mlnvc_mtu);
+ bcopy(ctx->mlnvc_permanent_address, mlp->mlp_mac_address,
+ sizeof (mlp->mlp_mac_address));
+ mlp->mlp_wqe_min_inline = get_bits64(ctx->mlnvc_flags,
+ MLXCX_VPORT_CTX_MIN_WQE_INLINE);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+static const char *
+mlxcx_reg_name(mlxcx_register_id_t rid)
+{
+ switch (rid) {
+ case MLXCX_REG_PMTU:
+ return ("PMTU");
+ case MLXCX_REG_PAOS:
+ return ("PAOS");
+ case MLXCX_REG_PTYS:
+ return ("PTYS");
+ case MLXCX_REG_MSGI:
+ return ("MSGI");
+ case MLXCX_REG_PMAOS:
+ return ("PMAOS");
+ case MLXCX_REG_MLCR:
+ return ("MLCR");
+ case MLXCX_REG_MCIA:
+ return ("MCIA");
+ case MLXCX_REG_PPCNT:
+ return ("PPCNT");
+ default:
+ return ("???");
+ }
+}
+
+boolean_t
+mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
+ mlxcx_register_id_t rid, mlxcx_register_data_t *data)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_access_register_in_t in;
+ mlxcx_cmd_access_register_out_t out;
+ boolean_t ret;
+ size_t dsize, insize, outsize;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_access_register_head,
+ MLXCX_OP_ACCESS_REG, opmod);
+
+ in.mlxi_access_register_register_id = to_be16(rid);
+
+ switch (rid) {
+ case MLXCX_REG_PMTU:
+ dsize = sizeof (mlxcx_reg_pmtu_t);
+ break;
+ case MLXCX_REG_PAOS:
+ dsize = sizeof (mlxcx_reg_paos_t);
+ break;
+ case MLXCX_REG_PTYS:
+ dsize = sizeof (mlxcx_reg_ptys_t);
+ break;
+ case MLXCX_REG_MLCR:
+ dsize = sizeof (mlxcx_reg_mlcr_t);
+ break;
+ case MLXCX_REG_PMAOS:
+ dsize = sizeof (mlxcx_reg_pmaos_t);
+ break;
+ case MLXCX_REG_MCIA:
+ dsize = sizeof (mlxcx_reg_mcia_t);
+ break;
+ case MLXCX_REG_PPCNT:
+ dsize = sizeof (mlxcx_reg_ppcnt_t);
+ break;
+ default:
+ dsize = 0;
+ VERIFY(0);
+ return (B_FALSE);
+ }
+ insize = dsize + offsetof(mlxcx_cmd_access_register_in_t,
+ mlxi_access_register_data);
+ outsize = dsize + offsetof(mlxcx_cmd_access_register_out_t,
+ mlxo_access_register_data);
+
+ bcopy(data, &in.mlxi_access_register_data, dsize);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ bcopy(&out.mlxo_access_register_data, data, dsize);
+ } else {
+ mlxcx_warn(mlxp, "failed OP_ACCESS_REG was for register "
+ "%04x (%s)", rid, mlxcx_reg_name(rid));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_port_mtu(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ /*
+ * Since we modify the port here we require that the caller is holding
+ * the port mutex.
+ */
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pmtu.mlrd_pmtu_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PMTU, &data);
+
+ if (ret) {
+ mlp->mlp_mtu = from_be16(data.mlrd_pmtu.mlrd_pmtu_admin_mtu);
+ mlp->mlp_max_mtu = from_be16(data.mlrd_pmtu.mlrd_pmtu_max_mtu);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_module_status(mlxcx_t *mlxp, uint_t id,
+ mlxcx_module_status_t *pstatus, mlxcx_module_error_type_t *perr)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ bzero(&data, sizeof (data));
+ ASSERT3U(id, <, 0xff);
+ data.mlrd_pmaos.mlrd_pmaos_module = (uint8_t)id;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PMAOS, &data);
+
+ if (ret) {
+ if (pstatus != NULL)
+ *pstatus = data.mlrd_pmaos.mlrd_pmaos_oper_status;
+ if (perr != NULL)
+ *perr = data.mlrd_pmaos.mlrd_pmaos_error_type;
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_port_mtu(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_pmtu.mlrd_pmtu_local_port = mlp->mlp_num + 1;
+ data.mlrd_pmtu.mlrd_pmtu_admin_mtu = to_be16(mlp->mlp_mtu);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_PMTU, &data);
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_port_led(mlxcx_t *mlxp, mlxcx_port_t *mlp, uint16_t sec)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_mlcr.mlrd_mlcr_local_port = mlp->mlp_num + 1;
+ set_bits8(&data.mlrd_mlcr.mlrd_mlcr_flags, MLXCX_MLCR_LED_TYPE,
+ MLXCX_LED_TYPE_PORT);
+ data.mlrd_mlcr.mlrd_mlcr_beacon_duration = to_be16(sec);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE,
+ MLXCX_REG_MLCR, &data);
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1;
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PAOS, &data);
+
+ if (ret) {
+ mlp->mlp_admin_status = data.mlrd_paos.mlrd_paos_admin_status;
+ mlp->mlp_oper_status = data.mlrd_paos.mlrd_paos_oper_status;
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ret;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&data, sizeof (data));
+ data.mlrd_ptys.mlrd_ptys_local_port = mlp->mlp_num + 1;
+ set_bit8(&data.mlrd_ptys.mlrd_ptys_proto_mask,
+ MLXCX_PTYS_PROTO_MASK_ETH);
+
+ ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PTYS, &data);
+
+ if (ret) {
+ if (get_bit8(data.mlrd_ptys.mlrd_ptys_autoneg_flags,
+ MLXCX_AUTONEG_DISABLE)) {
+ mlp->mlp_autoneg = B_FALSE;
+ } else {
+ mlp->mlp_autoneg = B_TRUE;
+ }
+ mlp->mlp_max_proto =
+ from_bits32(data.mlrd_ptys.mlrd_ptys_proto_cap);
+ mlp->mlp_admin_proto =
+ from_bits32(data.mlrd_ptys.mlrd_ptys_proto_admin);
+ mlp->mlp_oper_proto =
+ from_bits32(data.mlrd_ptys.mlrd_ptys_proto_oper);
+ }
+
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp,
+ mlxcx_modify_nic_vport_ctx_fields_t fields)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_modify_nic_vport_ctx_in_t in;
+ mlxcx_cmd_modify_nic_vport_ctx_out_t out;
+ boolean_t ret;
+ mlxcx_nic_vport_ctx_t *ctx;
+
+ ASSERT(mutex_owned(&mlp->mlp_mtx));
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_nic_vport_ctx_head,
+ MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT, MLXCX_VPORT_TYPE_VNIC);
+
+ in.mlxi_modify_nic_vport_ctx_vport_number = to_be16(mlp->mlp_num);
+ in.mlxi_modify_nic_vport_ctx_field_select = to_be32(fields);
+
+ ctx = &in.mlxi_modify_nic_vport_ctx_context;
+ if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC) {
+ set_bit16(&ctx->mlnvc_promisc_list_type,
+ MLXCX_VPORT_PROMISC_ALL);
+ }
+ if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_MTU) {
+ ctx->mlnvc_mtu = to_be16(mlp->mlp_mtu);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC) {
+ mlp->mlp_flags |= MLXCX_PORT_VPORT_PROMISC;
+ }
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_eq_in_t in;
+ mlxcx_cmd_create_eq_out_t out;
+ boolean_t ret;
+ mlxcx_eventq_ctx_t *ctx;
+ size_t rem, insize;
+ const ddi_dma_cookie_t *c;
+ uint64_t pa, npages;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC);
+ VERIFY0(mleq->mleq_state & MLXCX_EQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_eq_head,
+ MLXCX_OP_CREATE_EQ, 0);
+
+ ctx = &in.mlxi_create_eq_context;
+ ctx->mleqc_uar_page = to_be24(mleq->mleq_uar->mlu_num);
+ ctx->mleqc_log_eq_size = mleq->mleq_entshift;
+ ctx->mleqc_intr = mleq->mleq_intr_index;
+
+ in.mlxi_create_eq_event_bitmask = to_be64(mleq->mleq_events);
+
+ npages = 0;
+ c = NULL;
+ while ((c = mlxcx_dma_cookie_iter(&mleq->mleq_dma, c)) != NULL) {
+ pa = c->dmac_laddress;
+ rem = c->dmac_size;
+ while (rem > 0) {
+ ASSERT3U(pa & 0xfff, ==, 0);
+ ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE);
+ in.mlxi_create_eq_pas[npages++] = to_be64(pa);
+ rem -= MLXCX_HW_PAGE_SIZE;
+ pa += MLXCX_HW_PAGE_SIZE;
+ }
+ }
+ ASSERT3U(npages, <=, MLXCX_CREATE_QUEUE_MAX_PAGES);
+
+ insize = offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_pas) +
+ sizeof (uint64_t) * npages;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mleq->mleq_state |= MLXCX_EQ_CREATED;
+ mleq->mleq_num = out.mlxo_create_eq_eqn;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq,
+ mlxcx_eventq_ctx_t *ctxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_eq_in_t in;
+ mlxcx_cmd_query_eq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC);
+ VERIFY(mleq->mleq_state & MLXCX_EQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_eq_head,
+ MLXCX_OP_QUERY_EQ, 0);
+
+ in.mlxi_query_eq_eqn = mleq->mleq_num;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ bcopy(&out.mlxo_query_eq_context, ctxp,
+ sizeof (mlxcx_eventq_ctx_t));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_eq_in_t in;
+ mlxcx_cmd_destroy_eq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC);
+ VERIFY(mleq->mleq_state & MLXCX_EQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_eq_head,
+ MLXCX_OP_DESTROY_EQ, 0);
+
+ in.mlxi_destroy_eq_eqn = mleq->mleq_num;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mleq->mleq_state |= MLXCX_EQ_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_special_ctxs(mlxcx_t *mlxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_special_ctxs_in_t in;
+ mlxcx_cmd_query_special_ctxs_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_special_ctxs_head,
+ MLXCX_OP_QUERY_SPECIAL_CONTEXTS, 0);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlxp->mlx_rsvd_lkey = from_be32(
+ out.mlxo_query_special_ctxs_resd_lkey);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_cq_in_t in;
+ mlxcx_cmd_create_cq_out_t out;
+ boolean_t ret;
+ mlxcx_completionq_ctx_t *ctx;
+ size_t rem, insize;
+ const ddi_dma_cookie_t *c;
+ uint64_t pa, npages;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC);
+ VERIFY0(mlcq->mlcq_state & MLXCX_CQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_cq_head,
+ MLXCX_OP_CREATE_CQ, 0);
+
+ ctx = &in.mlxi_create_cq_context;
+ ctx->mlcqc_uar_page = to_be24(mlcq->mlcq_uar->mlu_num);
+ ctx->mlcqc_log_cq_size = mlcq->mlcq_entshift;
+ ctx->mlcqc_eqn = mlcq->mlcq_eq->mleq_num;
+ ctx->mlcqc_cq_period = to_be16(mlcq->mlcq_cqemod_period_usec);
+ ctx->mlcqc_cq_max_count = to_be16(mlcq->mlcq_cqemod_count);
+
+ c = mlxcx_dma_cookie_one(&mlcq->mlcq_doorbell_dma);
+ ctx->mlcqc_dbr_addr = to_be64(c->dmac_laddress);
+ ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_completionq_doorbell_t));
+
+ npages = 0;
+ c = NULL;
+ while ((c = mlxcx_dma_cookie_iter(&mlcq->mlcq_dma, c)) != NULL) {
+ pa = c->dmac_laddress;
+ rem = c->dmac_size;
+ while (rem > 0) {
+ ASSERT3U(pa & 0xfff, ==, 0);
+ ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE);
+ in.mlxi_create_cq_pas[npages++] = to_be64(pa);
+ rem -= MLXCX_HW_PAGE_SIZE;
+ pa += MLXCX_HW_PAGE_SIZE;
+ }
+ }
+ ASSERT3U(npages, <=, MLXCX_CREATE_QUEUE_MAX_PAGES);
+
+ insize = offsetof(mlxcx_cmd_create_cq_in_t, mlxi_create_cq_pas) +
+ sizeof (uint64_t) * npages;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlcq->mlcq_state |= MLXCX_CQ_CREATED;
+ mlcq->mlcq_num = from_be24(out.mlxo_create_cq_cqn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
+ mlxcx_rq_ctx_t *ctxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_rq_in_t in;
+ mlxcx_cmd_query_rq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ ASSERT3S(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_rq_head,
+ MLXCX_OP_QUERY_RQ, 0);
+
+ in.mlxi_query_rq_rqn = to_be24(mlwq->mlwq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ bcopy(&out.mlxo_query_rq_context, ctxp,
+ sizeof (mlxcx_rq_ctx_t));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
+ mlxcx_sq_ctx_t *ctxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_sq_in_t in;
+ mlxcx_cmd_query_sq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ ASSERT3S(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_sq_head,
+ MLXCX_OP_QUERY_SQ, 0);
+
+ in.mlxi_query_sq_sqn = to_be24(mlwq->mlwq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ bcopy(&out.mlxo_query_sq_context, ctxp,
+ sizeof (mlxcx_sq_ctx_t));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_query_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
+ mlxcx_completionq_ctx_t *ctxp)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_query_cq_in_t in;
+ mlxcx_cmd_query_cq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC);
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_cq_head,
+ MLXCX_OP_QUERY_CQ, 0);
+
+ in.mlxi_query_cq_cqn = to_be24(mlcq->mlcq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ bcopy(&out.mlxo_query_cq_context, ctxp,
+ sizeof (mlxcx_completionq_ctx_t));
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_cq_in_t in;
+ mlxcx_cmd_destroy_cq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC);
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_cq_head,
+ MLXCX_OP_DESTROY_CQ, 0);
+
+ in.mlxi_destroy_cq_cqn = to_be24(mlcq->mlcq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlcq->mlcq_state |= MLXCX_CQ_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_rq_in_t in;
+ mlxcx_cmd_create_rq_out_t out;
+ boolean_t ret;
+ mlxcx_rq_ctx_t *ctx;
+ size_t rem, insize;
+ const ddi_dma_cookie_t *c;
+ uint64_t pa, npages;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_rq_head,
+ MLXCX_OP_CREATE_RQ, 0);
+
+ ctx = &in.mlxi_create_rq_context;
+
+ set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_RLKEY);
+ set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_FLUSH_IN_ERROR);
+ set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_VLAN_STRIP_DISABLE);
+ ctx->mlrqc_cqn = to_be24(mlwq->mlwq_cq->mlcq_num);
+
+ set_bits32(&ctx->mlrqc_wq.mlwqc_flags, MLXCX_WORKQ_CTX_TYPE,
+ MLXCX_WORKQ_TYPE_CYCLIC);
+ ctx->mlrqc_wq.mlwqc_pd = to_be24(mlwq->mlwq_pd->mlpd_num);
+ ctx->mlrqc_wq.mlwqc_log_wq_sz = mlwq->mlwq_entshift;
+ ctx->mlrqc_wq.mlwqc_log_wq_stride = MLXCX_RECVQ_STRIDE_SHIFT;
+
+ c = mlxcx_dma_cookie_one(&mlwq->mlwq_doorbell_dma);
+ ctx->mlrqc_wq.mlwqc_dbr_addr = to_be64(c->dmac_laddress);
+ ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_workq_doorbell_t));
+
+ npages = 0;
+ c = NULL;
+ while ((c = mlxcx_dma_cookie_iter(&mlwq->mlwq_dma, c)) != NULL) {
+ pa = c->dmac_laddress;
+ rem = c->dmac_size;
+ while (rem > 0) {
+ ASSERT3U(pa & 0xfff, ==, 0);
+ ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE);
+ ctx->mlrqc_wq.mlwqc_pas[npages++] = to_be64(pa);
+ rem -= MLXCX_HW_PAGE_SIZE;
+ pa += MLXCX_HW_PAGE_SIZE;
+ }
+ }
+ ASSERT3U(npages, <=, MLXCX_WORKQ_CTX_MAX_ADDRESSES);
+
+ insize = offsetof(mlxcx_cmd_create_rq_in_t, mlxi_create_rq_context) +
+ offsetof(mlxcx_rq_ctx_t, mlrqc_wq) +
+ offsetof(mlxcx_workq_ctx_t, mlwqc_pas) +
+ sizeof (uint64_t) * npages;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_CREATED;
+ mlwq->mlwq_num = from_be24(out.mlxo_create_rq_rqn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_start_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_modify_rq_in_t in;
+ mlxcx_cmd_modify_rq_out_t out;
+ boolean_t ret;
+ ddi_fm_error_t err;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ /*
+ * Before starting the queue, we have to be sure that it is
+ * empty and the doorbell and counters are set to 0.
+ */
+ ASSERT(mutex_owned(&mlwq->mlwq_cq->mlcq_mtx));
+ ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers));
+ ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers_b));
+
+ mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(0);
+ MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK)
+ return (B_FALSE);
+ mlwq->mlwq_pc = 0;
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_rq_head,
+ MLXCX_OP_MODIFY_RQ, 0);
+
+ in.mlxi_modify_rq_rqn = to_be24(mlwq->mlwq_num);
+
+ /* From state */
+ set_bits8(&in.mlxi_modify_rq_state, MLXCX_CMD_MODIFY_RQ_STATE,
+ MLXCX_RQ_STATE_RST);
+ /* To state */
+ set_bits32(&in.mlxi_modify_rq_context.mlrqc_flags, MLXCX_RQ_STATE,
+ MLXCX_RQ_STATE_RDY);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_STARTED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_stop_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_modify_rq_in_t in;
+ mlxcx_cmd_modify_rq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_rq_head,
+ MLXCX_OP_MODIFY_RQ, 0);
+
+ in.mlxi_modify_rq_rqn = to_be24(mlwq->mlwq_num);
+
+ /* From state */
+ set_bits8(&in.mlxi_modify_rq_state, MLXCX_CMD_MODIFY_RQ_STATE,
+ MLXCX_RQ_STATE_RDY);
+ /* To state */
+ set_bits32(&in.mlxi_modify_rq_context.mlrqc_flags, MLXCX_RQ_STATE,
+ MLXCX_RQ_STATE_RST);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state &= ~MLXCX_WQ_STARTED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_rq_in_t in;
+ mlxcx_cmd_destroy_rq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_rq_head,
+ MLXCX_OP_DESTROY_RQ, 0);
+
+ in.mlxi_destroy_rq_rqn = to_be24(mlwq->mlwq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_tir(mlxcx_t *mlxp, mlxcx_tir_t *mltir)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_tir_in_t in;
+ mlxcx_cmd_create_tir_out_t out;
+ mlxcx_tir_ctx_t *ctx;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY0(mltir->mltir_state & MLXCX_TIR_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_tir_head,
+ MLXCX_OP_CREATE_TIR, 0);
+
+ ctx = &in.mlxi_create_tir_context;
+ ctx->mltirc_transport_domain = to_be24(mltir->mltir_tdom->mltd_num);
+ set_bits8(&ctx->mltirc_disp_type, MLXCX_TIR_CTX_DISP_TYPE,
+ mltir->mltir_type);
+ switch (mltir->mltir_type) {
+ case MLXCX_TIR_INDIRECT:
+ VERIFY(mltir->mltir_rqtable != NULL);
+ VERIFY(mltir->mltir_rqtable->mlrqt_state & MLXCX_RQT_CREATED);
+ ctx->mltirc_indirect_table =
+ to_be24(mltir->mltir_rqtable->mlrqt_num);
+ set_bits8(&ctx->mltirc_hash_lb, MLXCX_TIR_RX_HASH_FN,
+ mltir->mltir_hash_fn);
+ bcopy(mltir->mltir_toeplitz_key,
+ ctx->mltirc_rx_hash_toeplitz_key,
+ sizeof (ctx->mltirc_rx_hash_toeplitz_key));
+ set_bits32(&ctx->mltirc_rx_hash_fields_outer,
+ MLXCX_RX_HASH_L3_TYPE, mltir->mltir_l3_type);
+ set_bits32(&ctx->mltirc_rx_hash_fields_outer,
+ MLXCX_RX_HASH_L4_TYPE, mltir->mltir_l4_type);
+ set_bits32(&ctx->mltirc_rx_hash_fields_outer,
+ MLXCX_RX_HASH_FIELDS, mltir->mltir_hash_fields);
+ break;
+ case MLXCX_TIR_DIRECT:
+ VERIFY(mltir->mltir_rq != NULL);
+ VERIFY(mltir->mltir_rq->mlwq_state & MLXCX_WQ_CREATED);
+ ctx->mltirc_inline_rqn = to_be24(mltir->mltir_rq->mlwq_num);
+ break;
+ default:
+ VERIFY(0);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltir->mltir_state |= MLXCX_TIR_CREATED;
+ mltir->mltir_num = from_be24(out.mlxo_create_tir_tirn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_tir(mlxcx_t *mlxp, mlxcx_tir_t *mltir)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_tir_in_t in;
+ mlxcx_cmd_destroy_tir_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY(mltir->mltir_state & MLXCX_TIR_CREATED);
+ VERIFY0(mltir->mltir_state & MLXCX_TIR_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_tir_head,
+ MLXCX_OP_DESTROY_TIR, 0);
+
+ in.mlxi_destroy_tir_tirn = to_be24(mltir->mltir_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltir->mltir_state |= MLXCX_TIR_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_tis(mlxcx_t *mlxp, mlxcx_tis_t *mltis)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_tis_in_t in;
+ mlxcx_cmd_create_tis_out_t out;
+ mlxcx_tis_ctx_t *ctx;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY0(mltis->mltis_state & MLXCX_TIS_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_tis_head,
+ MLXCX_OP_CREATE_TIS, 0);
+
+ ctx = &in.mlxi_create_tis_context;
+ ctx->mltisc_transport_domain = to_be24(mltis->mltis_tdom->mltd_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltis->mltis_state |= MLXCX_TIS_CREATED;
+ mltis->mltis_num = from_be24(out.mlxo_create_tis_tisn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_tis(mlxcx_t *mlxp, mlxcx_tis_t *mltis)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_tis_in_t in;
+ mlxcx_cmd_destroy_tis_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY(mltis->mltis_state & MLXCX_TIR_CREATED);
+ VERIFY0(mltis->mltis_state & MLXCX_TIR_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_tis_head,
+ MLXCX_OP_DESTROY_TIS, 0);
+
+ in.mlxi_destroy_tis_tisn = to_be24(mltis->mltis_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mltis->mltis_state |= MLXCX_TIS_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_flow_table_in_t in;
+ mlxcx_cmd_create_flow_table_out_t out;
+ mlxcx_flow_table_ctx_t *ctx;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_flow_table_head,
+ MLXCX_OP_CREATE_FLOW_TABLE, 0);
+
+ in.mlxi_create_flow_table_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_create_flow_table_table_type = mlft->mlft_type;
+ ctx = &in.mlxi_create_flow_table_context;
+ ctx->mlftc_log_size = mlft->mlft_entshift;
+ ctx->mlftc_level = mlft->mlft_level;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlft->mlft_num = from_be24(out.mlxo_create_flow_table_table_id);
+ mlft->mlft_state |= MLXCX_FLOW_TABLE_CREATED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_flow_table_in_t in;
+ mlxcx_cmd_destroy_flow_table_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_flow_table_head,
+ MLXCX_OP_DESTROY_FLOW_TABLE, 0);
+
+ in.mlxi_destroy_flow_table_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_destroy_flow_table_table_type = mlft->mlft_type;
+ in.mlxi_destroy_flow_table_table_id = to_be24(mlft->mlft_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlft->mlft_state |= MLXCX_FLOW_TABLE_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_flow_table_root(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_set_flow_table_root_in_t in;
+ mlxcx_cmd_set_flow_table_root_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_flow_table_root_head,
+ MLXCX_OP_SET_FLOW_TABLE_ROOT, 0);
+
+ in.mlxi_set_flow_table_root_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_set_flow_table_root_table_type = mlft->mlft_type;
+ in.mlxi_set_flow_table_root_table_id = to_be24(mlft->mlft_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlft->mlft_state |= MLXCX_FLOW_TABLE_ROOT;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_flow_group(mlxcx_t *mlxp, mlxcx_flow_group_t *mlfg)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_flow_group_in_t in;
+ mlxcx_cmd_create_flow_group_out_t out;
+ boolean_t ret;
+ const mlxcx_flow_table_t *mlft;
+ mlxcx_flow_header_match_t *hdrs;
+ mlxcx_flow_params_match_t *params;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlft = mlfg->mlfg_table;
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+ VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_flow_group_head,
+ MLXCX_OP_CREATE_FLOW_GROUP, 0);
+
+ in.mlxi_create_flow_group_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_create_flow_group_table_type = mlft->mlft_type;
+ in.mlxi_create_flow_group_table_id = to_be24(mlft->mlft_num);
+ in.mlxi_create_flow_group_start_flow_index =
+ to_be32(mlfg->mlfg_start_idx);
+ in.mlxi_create_flow_group_end_flow_index =
+ to_be32(mlfg->mlfg_start_idx + (mlfg->mlfg_size - 1));
+
+ hdrs = &in.mlxi_create_flow_group_match_criteria.mlfm_outer_headers;
+ params = &in.mlxi_create_flow_group_match_criteria.mlfm_misc_parameters;
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SMAC) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS;
+ (void) memset(&hdrs->mlfh_smac, 0xff, sizeof (hdrs->mlfh_smac));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DMAC) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS;
+ (void) memset(&hdrs->mlfh_dmac, 0xff, sizeof (hdrs->mlfh_dmac));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS;
+ set_bit24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_CVLAN_TAG);
+ set_bit24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_SVLAN_TAG);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VID) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN);
+ set_bits16(&hdrs->mlfh_first_vid_flags,
+ MLXCX_FLOW_HDR_FIRST_VID, UINT16_MAX);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS;
+ set_bits24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_IP_VERSION,
+ UINT32_MAX);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ (void) memset(&hdrs->mlfh_src_ip, 0xff,
+ sizeof (hdrs->mlfh_src_ip));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ (void) memset(&hdrs->mlfh_src_ip, 0xff,
+ sizeof (hdrs->mlfh_dst_ip));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_PROTO) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS;
+ hdrs->mlfh_ip_protocol = UINT8_MAX;
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ (void) memset(&hdrs->mlfh_src_ip, 0xff,
+ sizeof (hdrs->mlfh_src_ip));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ (void) memset(&hdrs->mlfh_src_ip, 0xff,
+ sizeof (hdrs->mlfh_dst_ip));
+ }
+
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SQN) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS;
+ params->mlfp_source_sqn = to_be24(UINT32_MAX);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VXLAN) {
+ in.mlxi_create_flow_group_match_criteria_en |=
+ MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS;
+ params->mlfp_vxlan_vni = to_be24(UINT32_MAX);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlfg->mlfg_state |= MLXCX_FLOW_GROUP_CREATED;
+ mlfg->mlfg_num = from_be24(out.mlxo_create_flow_group_group_id);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_flow_group(mlxcx_t *mlxp, mlxcx_flow_group_t *mlfg)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_flow_group_in_t in;
+ mlxcx_cmd_destroy_flow_group_out_t out;
+ boolean_t ret;
+ const mlxcx_flow_table_t *mlft;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlft = mlfg->mlfg_table;
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+ VERIFY(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED);
+ VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_flow_group_head,
+ MLXCX_OP_DESTROY_FLOW_GROUP, 0);
+
+ in.mlxi_destroy_flow_group_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_destroy_flow_group_table_type = mlft->mlft_type;
+ in.mlxi_destroy_flow_group_table_id = to_be24(mlft->mlft_num);
+ in.mlxi_destroy_flow_group_group_id = to_be32(mlfg->mlfg_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlfg->mlfg_state |= MLXCX_FLOW_GROUP_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_flow_table_entry(mlxcx_t *mlxp, mlxcx_flow_entry_t *mlfe)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_set_flow_table_entry_in_t in;
+ mlxcx_cmd_set_flow_table_entry_out_t out;
+ boolean_t ret;
+ size_t insize;
+ mlxcx_flow_entry_ctx_t *ctx;
+ const mlxcx_flow_table_t *mlft;
+ mlxcx_flow_group_t *mlfg;
+ mlxcx_flow_dest_t *d;
+ uint_t i;
+ mlxcx_flow_header_match_t *hdrs;
+ mlxcx_flow_params_match_t *params;
+ mlxcx_cmd_set_flow_table_entry_opmod_t opmod;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlft = mlfe->mlfe_table;
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+
+ mlfg = mlfe->mlfe_group;
+ VERIFY(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED);
+ VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED);
+
+ opmod = MLXCX_CMD_FLOW_ENTRY_SET_NEW;
+ if (mlfe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ ASSERT(mlfe->mlfe_state & MLXCX_FLOW_ENTRY_DIRTY);
+ opmod = MLXCX_CMD_FLOW_ENTRY_MODIFY;
+ }
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_flow_table_entry_head,
+ MLXCX_OP_SET_FLOW_TABLE_ENTRY, opmod);
+
+ in.mlxi_set_flow_table_entry_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_set_flow_table_entry_table_type = mlft->mlft_type;
+ in.mlxi_set_flow_table_entry_table_id = to_be24(mlft->mlft_num);
+ in.mlxi_set_flow_table_entry_flow_index = to_be32(mlfe->mlfe_index);
+
+ if (mlfe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ set_bit8(&in.mlxi_set_flow_table_entry_modify_bitmask,
+ MLXCX_CMD_FLOW_ENTRY_SET_ACTION);
+ set_bit8(&in.mlxi_set_flow_table_entry_modify_bitmask,
+ MLXCX_CMD_FLOW_ENTRY_SET_DESTINATION);
+ }
+
+ ctx = &in.mlxi_set_flow_table_entry_context;
+ ctx->mlfec_group_id = to_be32(mlfg->mlfg_num);
+
+ insize = offsetof(mlxcx_cmd_set_flow_table_entry_in_t,
+ mlxi_set_flow_table_entry_context) +
+ offsetof(mlxcx_flow_entry_ctx_t, mlfec_destination);
+
+ ctx->mlfec_action = to_be16(mlfe->mlfe_action);
+
+ switch (mlfe->mlfe_action) {
+ case MLXCX_FLOW_ACTION_ALLOW:
+ case MLXCX_FLOW_ACTION_DROP:
+ break;
+ case MLXCX_FLOW_ACTION_FORWARD:
+ ASSERT3U(mlfe->mlfe_ndest, <=, MLXCX_FLOW_MAX_DESTINATIONS);
+ ASSERT3U(mlfe->mlfe_ndest, <=,
+ mlxp->mlx_caps->mlc_max_rx_fe_dest);
+ ctx->mlfec_destination_list_size = to_be24(mlfe->mlfe_ndest);
+ for (i = 0; i < mlfe->mlfe_ndest; ++i) {
+ insize += sizeof (mlxcx_flow_dest_t);
+ d = &ctx->mlfec_destination[i];
+ if (mlfe->mlfe_dest[i].mlfed_tir != NULL) {
+ d->mlfd_destination_type = MLXCX_FLOW_DEST_TIR;
+ d->mlfd_destination_id = to_be24(
+ mlfe->mlfe_dest[i].mlfed_tir->mltir_num);
+ } else if (mlfe->mlfe_dest[i].mlfed_flow != NULL) {
+ d->mlfd_destination_type =
+ MLXCX_FLOW_DEST_FLOW_TABLE;
+ d->mlfd_destination_id = to_be24(
+ mlfe->mlfe_dest[i].mlfed_flow->mlft_num);
+ } else {
+ /* Invalid flow entry destination */
+ VERIFY(0);
+ }
+ }
+ break;
+ case MLXCX_FLOW_ACTION_COUNT:
+ /* We don't support count actions yet. */
+ VERIFY(0);
+ break;
+ case MLXCX_FLOW_ACTION_ENCAP:
+ case MLXCX_FLOW_ACTION_DECAP:
+ /* We don't support encap/decap actions yet. */
+ VERIFY(0);
+ break;
+ }
+
+ hdrs = &ctx->mlfec_match_value.mlfm_outer_headers;
+ params = &ctx->mlfec_match_value.mlfm_misc_parameters;
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SMAC) {
+ bcopy(mlfe->mlfe_smac, hdrs->mlfh_smac,
+ sizeof (hdrs->mlfh_smac));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DMAC) {
+ bcopy(mlfe->mlfe_dmac, hdrs->mlfh_dmac,
+ sizeof (hdrs->mlfh_dmac));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN) {
+ switch (mlfe->mlfe_vlan_type) {
+ case MLXCX_VLAN_TYPE_CVLAN:
+ set_bit24(&hdrs->mlfh_tcp_ip_flags,
+ MLXCX_FLOW_HDR_CVLAN_TAG);
+ break;
+ case MLXCX_VLAN_TYPE_SVLAN:
+ set_bit24(&hdrs->mlfh_tcp_ip_flags,
+ MLXCX_FLOW_HDR_SVLAN_TAG);
+ break;
+ default:
+ break;
+ }
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VID) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN);
+ set_bits16(&hdrs->mlfh_first_vid_flags,
+ MLXCX_FLOW_HDR_FIRST_VID, mlfe->mlfe_vid);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER) {
+ set_bits24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_IP_VERSION,
+ mlfe->mlfe_ip_version);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ bcopy(mlfe->mlfe_srcip, hdrs->mlfh_src_ip,
+ sizeof (hdrs->mlfh_src_ip));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) {
+ ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER);
+ bcopy(mlfe->mlfe_dstip, hdrs->mlfh_src_ip,
+ sizeof (hdrs->mlfh_dst_ip));
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_PROTO) {
+ hdrs->mlfh_ip_protocol = mlfe->mlfe_ip_proto;
+ }
+
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SQN) {
+ params->mlfp_source_sqn = to_be24(mlfe->mlfe_sqn);
+ }
+ if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VXLAN) {
+ params->mlfp_vxlan_vni = to_be24(mlfe->mlfe_vxlan_vni);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlfe->mlfe_state |= MLXCX_FLOW_ENTRY_CREATED;
+ mlfe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY;
+ mlfg->mlfg_state |= MLXCX_FLOW_GROUP_BUSY;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_delete_flow_table_entry(mlxcx_t *mlxp, mlxcx_flow_entry_t *mlfe)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_delete_flow_table_entry_in_t in;
+ mlxcx_cmd_delete_flow_table_entry_out_t out;
+ boolean_t ret;
+ const mlxcx_flow_table_t *mlft;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlft = mlfe->mlfe_table;
+ ASSERT(mutex_owned(&mlft->mlft_mtx));
+ VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
+ VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_delete_flow_table_entry_head,
+ MLXCX_OP_DELETE_FLOW_TABLE_ENTRY, 0);
+
+ in.mlxi_delete_flow_table_entry_vport_number =
+ to_be16(mlft->mlft_port->mlp_num);
+ in.mlxi_delete_flow_table_entry_table_type = mlft->mlft_type;
+ in.mlxi_delete_flow_table_entry_table_id = to_be24(mlft->mlft_num);
+ in.mlxi_delete_flow_table_entry_flow_index = to_be32(mlfe->mlfe_index);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ /*
+ * Note that flow entries have a different lifecycle to most
+ * other things we create -- we have to be able to re-use them
+ * after they have been deleted, since they exist at a fixed
+ * position in their flow table.
+ *
+ * So we clear the CREATED bit here for them to let us call
+ * create_flow_table_entry() on the same entry again later.
+ */
+ mlfe->mlfe_state &= ~MLXCX_FLOW_ENTRY_CREATED;
+ mlfe->mlfe_state |= MLXCX_FLOW_ENTRY_DELETED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_sq_in_t in;
+ mlxcx_cmd_create_sq_out_t out;
+ boolean_t ret;
+ mlxcx_sq_ctx_t *ctx;
+ size_t rem, insize;
+ const ddi_dma_cookie_t *c;
+ uint64_t pa, npages;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_sq_head,
+ MLXCX_OP_CREATE_SQ, 0);
+
+ ctx = &in.mlxi_create_sq_context;
+
+ set_bit32(&ctx->mlsqc_flags, MLXCX_SQ_FLAGS_RLKEY);
+ set_bit32(&ctx->mlsqc_flags, MLXCX_SQ_FLAGS_FLUSH_IN_ERROR);
+ set_bits32(&ctx->mlsqc_flags, MLXCX_SQ_MIN_WQE_INLINE,
+ mlwq->mlwq_inline_mode);
+ ctx->mlsqc_cqn = to_be24(mlwq->mlwq_cq->mlcq_num);
+
+ VERIFY(mlwq->mlwq_tis != NULL);
+ ctx->mlsqc_tis_lst_sz = to_be16(1);
+ ctx->mlsqc_tis_num = to_be24(mlwq->mlwq_tis->mltis_num);
+
+ set_bits32(&ctx->mlsqc_wq.mlwqc_flags, MLXCX_WORKQ_CTX_TYPE,
+ MLXCX_WORKQ_TYPE_CYCLIC);
+ ctx->mlsqc_wq.mlwqc_pd = to_be24(mlwq->mlwq_pd->mlpd_num);
+ ctx->mlsqc_wq.mlwqc_uar_page = to_be24(mlwq->mlwq_uar->mlu_num);
+ ctx->mlsqc_wq.mlwqc_log_wq_sz = mlwq->mlwq_entshift;
+ ctx->mlsqc_wq.mlwqc_log_wq_stride = MLXCX_SENDQ_STRIDE_SHIFT;
+
+ c = mlxcx_dma_cookie_one(&mlwq->mlwq_doorbell_dma);
+ ctx->mlsqc_wq.mlwqc_dbr_addr = to_be64(c->dmac_laddress);
+ ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_workq_doorbell_t));
+
+ npages = 0;
+ c = NULL;
+ while ((c = mlxcx_dma_cookie_iter(&mlwq->mlwq_dma, c)) != NULL) {
+ pa = c->dmac_laddress;
+ rem = c->dmac_size;
+ while (rem > 0) {
+ ASSERT3U(pa & 0xfff, ==, 0);
+ ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE);
+ ctx->mlsqc_wq.mlwqc_pas[npages++] = to_be64(pa);
+ rem -= MLXCX_HW_PAGE_SIZE;
+ pa += MLXCX_HW_PAGE_SIZE;
+ }
+ }
+ ASSERT3U(npages, <=, MLXCX_WORKQ_CTX_MAX_ADDRESSES);
+
+ insize = offsetof(mlxcx_cmd_create_sq_in_t, mlxi_create_sq_context) +
+ offsetof(mlxcx_sq_ctx_t, mlsqc_wq) +
+ offsetof(mlxcx_workq_ctx_t, mlwqc_pas) +
+ sizeof (uint64_t) * npages;
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_CREATED;
+ mlwq->mlwq_num = from_be24(out.mlxo_create_sq_sqn);
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_start_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_modify_sq_in_t in;
+ mlxcx_cmd_modify_sq_out_t out;
+ boolean_t ret;
+ ddi_fm_error_t err;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ ASSERT(mlwq->mlwq_cq != NULL);
+
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ /*
+ * Before starting the queue, we have to be sure that it is
+ * empty and the doorbell and counters are set to 0.
+ */
+ ASSERT(mutex_owned(&mlwq->mlwq_cq->mlcq_mtx));
+ ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers));
+ ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers_b));
+
+ mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(0);
+ MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK)
+ return (B_FALSE);
+ mlwq->mlwq_pc = 0;
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_sq_head,
+ MLXCX_OP_MODIFY_SQ, 0);
+
+ in.mlxi_modify_sq_sqn = to_be24(mlwq->mlwq_num);
+
+ /* From state */
+ set_bits8(&in.mlxi_modify_sq_state, MLXCX_CMD_MODIFY_SQ_STATE,
+ MLXCX_SQ_STATE_RST);
+ /* To state */
+ set_bits32(&in.mlxi_modify_sq_context.mlsqc_flags, MLXCX_SQ_STATE,
+ MLXCX_SQ_STATE_RDY);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_STARTED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_stop_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_modify_sq_in_t in;
+ mlxcx_cmd_modify_sq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_sq_head,
+ MLXCX_OP_MODIFY_SQ, 0);
+
+ in.mlxi_modify_sq_sqn = to_be24(mlwq->mlwq_num);
+
+ /* From state */
+ set_bits8(&in.mlxi_modify_sq_state, MLXCX_CMD_MODIFY_SQ_STATE,
+ MLXCX_SQ_STATE_RDY);
+ /* To state */
+ set_bits32(&in.mlxi_modify_sq_context.mlsqc_flags, MLXCX_SQ_STATE,
+ MLXCX_SQ_STATE_RST);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state &= ~MLXCX_WQ_STARTED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_sq_in_t in;
+ mlxcx_cmd_destroy_sq_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED);
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_sq_head,
+ MLXCX_OP_DESTROY_SQ, 0);
+
+ in.mlxi_destroy_sq_sqn = to_be24(mlwq->mlwq_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlwq->mlwq_state |= MLXCX_WQ_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_create_rqt(mlxcx_t *mlxp, mlxcx_rqtable_t *mlrqt)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_create_rqt_in_t in;
+ mlxcx_cmd_create_rqt_out_t out;
+ mlxcx_rqtable_ctx_t *ctx;
+ boolean_t ret;
+ uint_t i;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY0(mlrqt->mlrqt_state & MLXCX_RQT_CREATED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_rqt_head,
+ MLXCX_OP_CREATE_RQT, 0);
+
+ ctx = &in.mlxi_create_rqt_context;
+ ASSERT3U(mlrqt->mlrqt_max, <=, MLXCX_RQT_MAX_RQ_REFS);
+ ASSERT3U(mlrqt->mlrqt_max, <=, mlxp->mlx_caps->mlc_max_rqt_size);
+ ctx->mlrqtc_max_size = to_be16(mlrqt->mlrqt_max);
+ ctx->mlrqtc_actual_size = to_be16(mlrqt->mlrqt_used);
+ for (i = 0; i < mlrqt->mlrqt_used; ++i) {
+ ctx->mlrqtc_rqref[i].mlrqtr_rqn = to_be24(
+ mlrqt->mlrqt_rq[i]->mlwq_num);
+ }
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlrqt->mlrqt_num = from_be24(out.mlxo_create_rqt_rqtn);
+ mlrqt->mlrqt_state |= MLXCX_RQT_CREATED;
+ mlrqt->mlrqt_state &= ~MLXCX_RQT_DIRTY;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_destroy_rqt(mlxcx_t *mlxp, mlxcx_rqtable_t *mlrqt)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_destroy_rqt_in_t in;
+ mlxcx_cmd_destroy_rqt_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ VERIFY(mlrqt->mlrqt_state & MLXCX_RQT_CREATED);
+ VERIFY0(mlrqt->mlrqt_state & MLXCX_RQT_DESTROYED);
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_rqt_head,
+ MLXCX_OP_DESTROY_RQT, 0);
+
+ in.mlxi_destroy_rqt_rqtn = to_be24(mlrqt->mlrqt_num);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ if (ret) {
+ mlrqt->mlrqt_state |= MLXCX_RQT_DESTROYED;
+ }
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+boolean_t
+mlxcx_cmd_set_int_mod(mlxcx_t *mlxp, uint_t intr, uint_t min_delay)
+{
+ mlxcx_cmd_t cmd;
+ mlxcx_cmd_config_int_mod_in_t in;
+ mlxcx_cmd_config_int_mod_out_t out;
+ boolean_t ret;
+
+ bzero(&in, sizeof (in));
+ bzero(&out, sizeof (out));
+
+ mlxcx_cmd_init(mlxp, &cmd);
+ mlxcx_cmd_in_header_init(&cmd, &in.mlxi_config_int_mod_head,
+ MLXCX_OP_CONFIG_INT_MODERATION, MLXCX_CMD_CONFIG_INT_MOD_WRITE);
+
+ in.mlxi_config_int_mod_int_vector = to_be16(intr);
+ in.mlxi_config_int_mod_min_delay = to_be16(min_delay);
+
+ if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) {
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (B_FALSE);
+ }
+ mlxcx_cmd_wait(&cmd);
+
+ ret = mlxcx_cmd_evaluate(mlxp, &cmd);
+ mlxcx_cmd_fini(mlxp, &cmd);
+ return (ret);
+}
+
+/*
+ * CTASSERTs here are for the structs in mlxcx_reg.h, to check they match
+ * against offsets from the PRM.
+ *
+ * They're not in the header file, to avoid them being used by multiple .c
+ * files.
+ */
+
+CTASSERT(offsetof(mlxcx_eventq_ent_t, mleqe_unknown_data) == 0x20);
+CTASSERT(offsetof(mlxcx_eventq_ent_t, mleqe_signature) == 0x3c + 2);
+CTASSERT(sizeof (mlxcx_eventq_ent_t) == 64);
+
+CTASSERT(offsetof(mlxcx_completionq_error_ent_t, mlcqee_byte_cnt) == 0x2C);
+CTASSERT(offsetof(mlxcx_completionq_error_ent_t, mlcqee_wqe_opcode) == 0x38);
+
+CTASSERT(sizeof (mlxcx_completionq_error_ent_t) ==
+ sizeof (mlxcx_completionq_ent_t));
+CTASSERT(sizeof (mlxcx_wqe_control_seg_t) == (1 << 4));
+
+CTASSERT(offsetof(mlxcx_wqe_eth_seg_t, mles_inline_headers) == 0x0e);
+CTASSERT(sizeof (mlxcx_wqe_eth_seg_t) == (1 << 5));
+
+CTASSERT(sizeof (mlxcx_wqe_data_seg_t) == (1 << 4));
+
+CTASSERT(sizeof (mlxcx_sendq_ent_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT));
+
+CTASSERT(sizeof (mlxcx_sendq_bf_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT));
+
+CTASSERT(sizeof (mlxcx_sendq_extra_ent_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT));
+
+CTASSERT(sizeof (mlxcx_recvq_ent_t) == (1 << MLXCX_RECVQ_STRIDE_SHIFT));
+
+CTASSERT(offsetof(mlxcx_workq_ctx_t, mlwqc_dbr_addr) == 0x10);
+CTASSERT(offsetof(mlxcx_workq_ctx_t, mlwqc_pas) == 0xc0);
+
+CTASSERT(offsetof(mlxcx_rq_ctx_t, mlrqc_cqn) == 0x09);
+CTASSERT(offsetof(mlxcx_rq_ctx_t, mlrqc_wq) == 0x30);
+
+CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_cqn) == 0x09);
+CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_tis_lst_sz) == 0x20);
+CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_tis_num) == 0x2d);
+CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_wq) == 0x30);
+
+CTASSERT(sizeof (mlxcx_tis_ctx_t) == 0xa0);
+CTASSERT(offsetof(mlxcx_tis_ctx_t, mltisc_transport_domain) == 0x25);
+
+CTASSERT(offsetof(mlxcx_rqtable_ctx_t, mlrqtc_max_size) == 0x16);
+CTASSERT(offsetof(mlxcx_rqtable_ctx_t, mlrqtc_rqref) == 0xF0);
+
+CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_event_bitmask) ==
+ 0x58);
+CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_pas) == 0x110);
+CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_context) == 0x10);
+
+CTASSERT(offsetof(mlxcx_cmd_create_tir_in_t, mlxi_create_tir_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_create_tis_in_t, mlxi_create_tis_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_query_special_ctxs_out_t,
+ mlxo_query_special_ctxs_resd_lkey) == 0x0c);
+
+CTASSERT(offsetof(mlxcx_cmd_query_cq_out_t, mlxo_query_cq_context) == 0x10);
+CTASSERT(offsetof(mlxcx_cmd_query_cq_out_t, mlxo_query_cq_pas) == 0x110);
+
+CTASSERT(offsetof(mlxcx_cmd_query_rq_out_t, mlxo_query_rq_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_create_sq_in_t, mlxi_create_sq_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_modify_sq_in_t, mlxi_modify_sq_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_query_sq_out_t, mlxo_query_sq_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_cmd_create_rqt_in_t, mlxi_create_rqt_context) == 0x20);
+
+CTASSERT(offsetof(mlxcx_reg_pmtu_t, mlrd_pmtu_oper_mtu) == 0x0C);
+
+CTASSERT(sizeof (mlxcx_reg_ptys_t) == 64);
+CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_cap) == 0x0c);
+CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_admin) == 0x18);
+CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_partner_advert) == 0x30);
+
+CTASSERT(offsetof(mlxcx_reg_mcia_t, mlrd_mcia_data) == 0x10);
+
+CTASSERT(offsetof(mlxcx_ppcnt_ieee_802_3_t,
+ mlppc_ieee_802_3_in_range_len_err) == 0x50);
+CTASSERT(offsetof(mlxcx_ppcnt_ieee_802_3_t,
+ mlppc_ieee_802_3_pause_tx) == 0x90);
+
+CTASSERT(sizeof (mlxcx_reg_ppcnt_t) == 256);
+CTASSERT(offsetof(mlxcx_reg_ppcnt_t, mlrd_ppcnt_data) == 0x08);
+
+CTASSERT(offsetof(mlxcx_cmd_access_register_in_t,
+ mlxi_access_register_argument) == 0x0C);
+CTASSERT(offsetof(mlxcx_cmd_access_register_in_t,
+ mlxi_access_register_data) == 0x10);
+
+CTASSERT(offsetof(mlxcx_cmd_access_register_out_t,
+ mlxo_access_register_data) == 0x10);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_dma.c b/usr/src/uts/common/io/mlxcx/mlxcx_dma.c
new file mode 100644
index 0000000000..79b9bb3746
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_dma.c
@@ -0,0 +1,460 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * DMA allocation and tear down routines.
+ */
+
+#include <mlxcx.h>
+
+void
+mlxcx_dma_acc_attr(mlxcx_t *mlxp, ddi_device_acc_attr_t *accp)
+{
+ bzero(accp, sizeof (*accp));
+ accp->devacc_attr_version = DDI_DEVICE_ATTR_V0;
+ accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
+ accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
+
+ if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) {
+ accp->devacc_attr_access = DDI_FLAGERR_ACC;
+ } else {
+ accp->devacc_attr_access = DDI_DEFAULT_ACC;
+ }
+}
+
+void
+mlxcx_dma_page_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp)
+{
+ bzero(attrp, sizeof (*attrp));
+ attrp->dma_attr_version = DMA_ATTR_V0;
+
+ /*
+ * This is a 64-bit PCIe device. We can use the entire address space.
+ */
+ attrp->dma_attr_addr_lo = 0x0;
+ attrp->dma_attr_addr_hi = UINT64_MAX;
+
+ /*
+ * The count max indicates the total amount that can fit into one
+ * cookie. Because we're creating a single page for tracking purposes,
+ * this can be a page in size. The alignment and segment are related to
+ * this same requirement. The alignment needs to be page aligned and the
+ * segment is the boundary that this can't cross, aka a 4k page.
+ */
+ attrp->dma_attr_count_max = MLXCX_CMD_DMA_PAGE_SIZE - 1;
+ attrp->dma_attr_align = MLXCX_CMD_DMA_PAGE_SIZE;
+ attrp->dma_attr_seg = MLXCX_CMD_DMA_PAGE_SIZE - 1;
+
+ attrp->dma_attr_burstsizes = 0xfff;
+
+ /*
+ * The minimum and and maximum sizes that we can send. We cap this based
+ * on the use of this, which is a page size.
+ */
+ attrp->dma_attr_minxfer = 0x1;
+ attrp->dma_attr_maxxfer = MLXCX_CMD_DMA_PAGE_SIZE;
+
+ /*
+ * This is supposed to be used for static data structures, therefore we
+ * keep this just to a page.
+ */
+ attrp->dma_attr_sgllen = 1;
+
+ /*
+ * The granularity describe the addressing graularity. That is, the
+ * hardware can ask for chunks in this units of bytes.
+ */
+ attrp->dma_attr_granular = MLXCX_CMD_DMA_PAGE_SIZE;
+
+ if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) {
+ attrp->dma_attr_flags = DDI_DMA_FLAGERR;
+ } else {
+ attrp->dma_attr_flags = 0;
+ }
+}
+
+/*
+ * DMA attributes for queue memory (EQ, CQ, WQ etc)
+ *
+ * These have to allocate in units of whole pages, but can be multiple
+ * pages and don't have to be physically contiguous.
+ */
+void
+mlxcx_dma_queue_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp)
+{
+ bzero(attrp, sizeof (*attrp));
+ attrp->dma_attr_version = DMA_ATTR_V0;
+
+ /*
+ * This is a 64-bit PCIe device. We can use the entire address space.
+ */
+ attrp->dma_attr_addr_lo = 0x0;
+ attrp->dma_attr_addr_hi = UINT64_MAX;
+
+ attrp->dma_attr_count_max = MLXCX_QUEUE_DMA_PAGE_SIZE - 1;
+
+ attrp->dma_attr_align = MLXCX_QUEUE_DMA_PAGE_SIZE;
+
+ attrp->dma_attr_burstsizes = 0xfff;
+
+ /*
+ * The minimum and and maximum sizes that we can send. We cap this based
+ * on the use of this, which is a page size.
+ */
+ attrp->dma_attr_minxfer = MLXCX_QUEUE_DMA_PAGE_SIZE;
+ attrp->dma_attr_maxxfer = UINT32_MAX;
+
+ attrp->dma_attr_seg = UINT64_MAX;
+
+ attrp->dma_attr_granular = MLXCX_QUEUE_DMA_PAGE_SIZE;
+
+ /* But we can have more than one. */
+ attrp->dma_attr_sgllen = MLXCX_CREATE_QUEUE_MAX_PAGES;
+
+ if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) {
+ attrp->dma_attr_flags = DDI_DMA_FLAGERR;
+ } else {
+ attrp->dma_attr_flags = 0;
+ }
+}
+
+/*
+ * DMA attributes for packet buffers
+ */
+void
+mlxcx_dma_buf_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp)
+{
+ bzero(attrp, sizeof (*attrp));
+ attrp->dma_attr_version = DMA_ATTR_V0;
+
+ /*
+ * This is a 64-bit PCIe device. We can use the entire address space.
+ */
+ attrp->dma_attr_addr_lo = 0x0;
+ attrp->dma_attr_addr_hi = UINT64_MAX;
+
+ /*
+ * Each scatter pointer has a 32-bit length field.
+ */
+ attrp->dma_attr_count_max = UINT32_MAX;
+
+ /*
+ * The PRM gives us no alignment requirements for scatter pointers,
+ * but it implies that units < 16bytes are a bad idea.
+ */
+ attrp->dma_attr_align = 16;
+ attrp->dma_attr_granular = 1;
+
+ attrp->dma_attr_burstsizes = 0xfff;
+
+ attrp->dma_attr_minxfer = 1;
+ attrp->dma_attr_maxxfer = UINT64_MAX;
+
+ attrp->dma_attr_seg = UINT64_MAX;
+
+ /*
+ * We choose how many scatter pointers we're allowed per packet when
+ * we set the recv queue stride. This macro is from mlxcx_reg.h where
+ * we fix that for all of our receive queues.
+ */
+ attrp->dma_attr_sgllen = MLXCX_RECVQ_MAX_PTRS;
+
+ if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) {
+ attrp->dma_attr_flags = DDI_DMA_FLAGERR;
+ } else {
+ attrp->dma_attr_flags = 0;
+ }
+}
+
+/*
+ * DMA attributes for queue doorbells
+ */
+void
+mlxcx_dma_qdbell_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp)
+{
+ bzero(attrp, sizeof (*attrp));
+ attrp->dma_attr_version = DMA_ATTR_V0;
+
+ /*
+ * This is a 64-bit PCIe device. We can use the entire address space.
+ */
+ attrp->dma_attr_addr_lo = 0x0;
+ attrp->dma_attr_addr_hi = UINT64_MAX;
+
+ /*
+ * Queue doorbells are always exactly 16 bytes in length, but
+ * the ddi_dma functions don't like such small values of count_max.
+ *
+ * We tell some lies here.
+ */
+ attrp->dma_attr_count_max = MLXCX_QUEUE_DMA_PAGE_SIZE - 1;
+ attrp->dma_attr_align = 8;
+ attrp->dma_attr_burstsizes = 0x8;
+ attrp->dma_attr_minxfer = 1;
+ attrp->dma_attr_maxxfer = UINT16_MAX;
+ attrp->dma_attr_seg = MLXCX_QUEUE_DMA_PAGE_SIZE - 1;
+ attrp->dma_attr_granular = 1;
+ attrp->dma_attr_sgllen = 1;
+
+ if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) {
+ attrp->dma_attr_flags = DDI_DMA_FLAGERR;
+ } else {
+ attrp->dma_attr_flags = 0;
+ }
+}
+
+void
+mlxcx_dma_free(mlxcx_dma_buffer_t *mxdb)
+{
+ int ret;
+
+ if (mxdb->mxdb_flags & MLXCX_DMABUF_BOUND) {
+ VERIFY(mxdb->mxdb_dma_handle != NULL);
+ ret = ddi_dma_unbind_handle(mxdb->mxdb_dma_handle);
+ VERIFY3S(ret, ==, DDI_SUCCESS);
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_BOUND;
+ mxdb->mxdb_ncookies = 0;
+ }
+
+ if (mxdb->mxdb_flags & MLXCX_DMABUF_MEM_ALLOC) {
+ ddi_dma_mem_free(&mxdb->mxdb_acc_handle);
+ mxdb->mxdb_acc_handle = NULL;
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_len = 0;
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_MEM_ALLOC;
+ }
+
+ if (mxdb->mxdb_flags & MLXCX_DMABUF_FOREIGN) {
+ /* The mblk will be freed separately */
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_len = 0;
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN;
+ }
+
+ if (mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC) {
+ ddi_dma_free_handle(&mxdb->mxdb_dma_handle);
+ mxdb->mxdb_dma_handle = NULL;
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_HDL_ALLOC;
+ }
+
+ ASSERT3U(mxdb->mxdb_flags, ==, 0);
+ ASSERT3P(mxdb->mxdb_dma_handle, ==, NULL);
+ ASSERT3P(mxdb->mxdb_va, ==, NULL);
+ ASSERT3U(mxdb->mxdb_len, ==, 0);
+ ASSERT3U(mxdb->mxdb_ncookies, ==, 0);
+}
+
+void
+mlxcx_dma_unbind(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb)
+{
+ int ret;
+
+ ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC);
+ ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_BOUND);
+
+ if (mxdb->mxdb_flags & MLXCX_DMABUF_FOREIGN) {
+ /* The mblk will be freed separately */
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_len = 0;
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN;
+ }
+
+ ret = ddi_dma_unbind_handle(mxdb->mxdb_dma_handle);
+ VERIFY3S(ret, ==, DDI_SUCCESS);
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_BOUND;
+ mxdb->mxdb_ncookies = 0;
+}
+
+boolean_t
+mlxcx_dma_init(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb,
+ ddi_dma_attr_t *attrp, boolean_t wait)
+{
+ int ret;
+ int (*memcb)(caddr_t);
+
+ if (wait == B_TRUE) {
+ memcb = DDI_DMA_SLEEP;
+ } else {
+ memcb = DDI_DMA_DONTWAIT;
+ }
+
+ ASSERT3S(mxdb->mxdb_flags, ==, 0);
+
+ ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL,
+ &mxdb->mxdb_dma_handle);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret);
+ mxdb->mxdb_dma_handle = NULL;
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC;
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_dma_bind_mblk(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb,
+ const mblk_t *mp, size_t off, boolean_t wait)
+{
+ int ret;
+ uint_t flags = DDI_DMA_STREAMING;
+ int (*memcb)(caddr_t);
+
+ if (wait == B_TRUE) {
+ memcb = DDI_DMA_SLEEP;
+ } else {
+ memcb = DDI_DMA_DONTWAIT;
+ }
+
+ ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC);
+ ASSERT0(mxdb->mxdb_flags &
+ (MLXCX_DMABUF_FOREIGN | MLXCX_DMABUF_MEM_ALLOC));
+ ASSERT0(mxdb->mxdb_flags & MLXCX_DMABUF_BOUND);
+
+ ASSERT3U(off, <=, MBLKL(mp));
+ mxdb->mxdb_va = (caddr_t)(mp->b_rptr + off);
+ mxdb->mxdb_len = MBLKL(mp) - off;
+ mxdb->mxdb_flags |= MLXCX_DMABUF_FOREIGN;
+
+ ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL,
+ mxdb->mxdb_va, mxdb->mxdb_len, DDI_DMA_WRITE | flags, memcb, NULL,
+ NULL, NULL);
+ if (ret != DDI_DMA_MAPPED) {
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_len = 0;
+ mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN;
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND;
+ mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_dma_alloc(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb,
+ ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero,
+ size_t size, boolean_t wait)
+{
+ int ret;
+ uint_t flags = DDI_DMA_CONSISTENT;
+ size_t len;
+ int (*memcb)(caddr_t);
+
+ if (wait == B_TRUE) {
+ memcb = DDI_DMA_SLEEP;
+ } else {
+ memcb = DDI_DMA_DONTWAIT;
+ }
+
+ ASSERT3U(mxdb->mxdb_flags, ==, 0);
+
+ ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL,
+ &mxdb->mxdb_dma_handle);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret);
+ mxdb->mxdb_dma_handle = NULL;
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC;
+
+ ret = ddi_dma_mem_alloc(mxdb->mxdb_dma_handle, size, accp, flags, memcb,
+ NULL, &mxdb->mxdb_va, &len, &mxdb->mxdb_acc_handle);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "!failed to allocate DMA memory: %d", ret);
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_acc_handle = NULL;
+ mlxcx_dma_free(mxdb);
+ return (B_FALSE);
+ }
+ mxdb->mxdb_len = size;
+ mxdb->mxdb_flags |= MLXCX_DMABUF_MEM_ALLOC;
+
+ if (zero == B_TRUE)
+ bzero(mxdb->mxdb_va, len);
+
+ ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL,
+ mxdb->mxdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, NULL,
+ NULL);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "!failed to bind DMA memory: %d", ret);
+ mlxcx_dma_free(mxdb);
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND;
+ mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_dma_alloc_offset(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb,
+ ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero,
+ size_t size, size_t offset, boolean_t wait)
+{
+ int ret;
+ uint_t flags = DDI_DMA_STREAMING;
+ size_t len;
+ int (*memcb)(caddr_t);
+
+ if (wait == B_TRUE) {
+ memcb = DDI_DMA_SLEEP;
+ } else {
+ memcb = DDI_DMA_DONTWAIT;
+ }
+
+ ASSERT3U(mxdb->mxdb_flags, ==, 0);
+
+ ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL,
+ &mxdb->mxdb_dma_handle);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret);
+ mxdb->mxdb_dma_handle = NULL;
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC;
+
+ ret = ddi_dma_mem_alloc(mxdb->mxdb_dma_handle, size + offset, accp,
+ flags, memcb, NULL, &mxdb->mxdb_va, &len, &mxdb->mxdb_acc_handle);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "!failed to allocate DMA memory: %d", ret);
+ mxdb->mxdb_va = NULL;
+ mxdb->mxdb_acc_handle = NULL;
+ mlxcx_dma_free(mxdb);
+ return (B_FALSE);
+ }
+
+ if (zero == B_TRUE)
+ bzero(mxdb->mxdb_va, len);
+
+ mxdb->mxdb_va += offset;
+ len -= offset;
+ mxdb->mxdb_len = len;
+ mxdb->mxdb_flags |= MLXCX_DMABUF_MEM_ALLOC;
+
+ ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL,
+ mxdb->mxdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, NULL,
+ NULL);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "!failed to bind DMA memory: %d", ret);
+ mlxcx_dma_free(mxdb);
+ return (B_FALSE);
+ }
+ mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND;
+ mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle);
+
+ return (B_TRUE);
+}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_endint.h b/usr/src/uts/common/io/mlxcx/mlxcx_endint.h
new file mode 100644
index 0000000000..4ad69173c0
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_endint.h
@@ -0,0 +1,305 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ */
+
+#ifndef _MLXCX_ENDINT_H
+#define _MLXCX_ENDINT_H
+
+#include <sys/types.h>
+#include <sys/byteorder.h>
+
+/*
+ * The inlines and structs in this file are used by mlxcx to ensure endian
+ * safety when dealing with memory-mapped structures from the device, and
+ * also simpler use of 24-bit integers (which Mellanox loves).
+ *
+ * By declaring all of these values in the memory-mapped structures as structs
+ * (e.g. uint32be_t) rather than bare integers (uint32_t) we ensure that the
+ * compiler will not allow them to be silently converted to integers and used
+ * without doing the necessary byte-swapping work.
+ *
+ * The uintXbe_t structs are designed to be used inside a #pragma pack(1)
+ * context only and we don't try to fix up their alignment.
+ *
+ * Also present in here are a number of bitsX_t types which can be used to
+ * gain a little bit of type safety when dealing with endian-swapped bitfields.
+ */
+
+#pragma pack(1)
+typedef struct { uint16_t be_val; } uint16be_t;
+typedef struct { uint8_t be_val[3]; } uint24be_t;
+typedef struct { uint32_t be_val; } uint32be_t;
+typedef struct { uint64_t be_val; } uint64be_t;
+#pragma pack()
+
+static inline uint16_t
+from_be16(uint16be_t v)
+{
+ return (BE_16(v.be_val));
+}
+
+static inline uint32_t
+from_be24(uint24be_t v)
+{
+ return (((uint32_t)v.be_val[0] << 16) |
+ ((uint32_t)v.be_val[1] << 8) |
+ ((uint32_t)v.be_val[2]));
+}
+
+static inline uint32_t
+from_be32(uint32be_t v)
+{
+ return (BE_32(v.be_val));
+}
+
+static inline uint64_t
+from_be64(uint64be_t v)
+{
+ return (BE_64(v.be_val));
+}
+
+static inline uint16be_t
+to_be16(uint16_t v)
+{
+ /* CSTYLED */
+ return ((uint16be_t){ .be_val = BE_16(v) });
+}
+
+static inline uint24be_t
+to_be24(uint32_t v)
+{
+ /* CSTYLED */
+ return ((uint24be_t){ .be_val = {
+ (v & 0xFF0000) >> 16,
+ (v & 0x00FF00) >> 8,
+ (v & 0x0000FF)
+ }});
+}
+
+static inline uint32be_t
+to_be32(uint32_t v)
+{
+ /* CSTYLED */
+ return ((uint32be_t){ .be_val = BE_32(v) });
+}
+
+static inline uint64be_t
+to_be64(uint64_t v)
+{
+ /* CSTYLED */
+ return ((uint64be_t){ .be_val = BE_64(v) });
+}
+
+#pragma pack(1)
+typedef struct { uint8_t bit_val; } bits8_t;
+typedef struct { uint16_t bit_val; } bits16_t;
+typedef struct { uint32_t bit_val; } bits32_t;
+typedef struct { uint24be_t bit_val; } bits24_t;
+typedef struct { uint64_t bit_val; } bits64_t;
+typedef struct { uint64_t bit_shift; uint64_t bit_mask; } bitdef_t;
+#pragma pack()
+
+static inline uint8_t
+get_bits8(bits8_t v, bitdef_t d)
+{
+ return ((v.bit_val & d.bit_mask) >> d.bit_shift);
+}
+static inline void
+set_bits8(bits8_t *v, bitdef_t d, uint8_t val)
+{
+ v->bit_val &= ~d.bit_mask;
+ v->bit_val |= (val << d.bit_shift) & d.bit_mask;
+}
+static inline uint8_t
+get_bit8(bits8_t v, uint8_t mask)
+{
+ return ((v.bit_val & mask) != 0);
+}
+static inline void
+set_bit8(bits8_t *v, uint8_t mask)
+{
+ v->bit_val |= mask;
+}
+static inline void
+clear_bit8(bits8_t *v, uint8_t mask)
+{
+ v->bit_val &= ~mask;
+}
+static inline bits8_t
+new_bits8(void)
+{
+ /* CSTYLED */
+ return ((bits8_t){ .bit_val = 0 });
+}
+static inline uint8_t
+from_bits8(bits8_t v)
+{
+ return (v.bit_val);
+}
+
+static inline uint16_t
+get_bits16(bits16_t v, bitdef_t d)
+{
+ return ((BE_16(v.bit_val) & d.bit_mask) >> d.bit_shift);
+}
+static inline void
+set_bits16(bits16_t *v, bitdef_t d, uint16_t val)
+{
+ v->bit_val &= BE_16(~d.bit_mask);
+ v->bit_val |= BE_16((val << d.bit_shift) & d.bit_mask);
+}
+static inline uint16_t
+get_bit16(bits16_t v, uint16_t mask)
+{
+ return ((BE_16(v.bit_val) & mask) != 0);
+}
+static inline void
+set_bit16(bits16_t *v, uint16_t mask)
+{
+ v->bit_val |= BE_16(mask);
+}
+static inline void
+clear_bit16(bits16_t *v, uint16_t mask)
+{
+ v->bit_val &= BE_16(~mask);
+}
+static inline bits16_t
+new_bits16(void)
+{
+ /* CSTYLED */
+ return ((bits16_t){ .bit_val = 0 });
+}
+static inline uint16_t
+from_bits16(bits16_t v)
+{
+ return (BE_16(v.bit_val));
+}
+
+static inline uint32_t
+get_bits32(bits32_t v, bitdef_t d)
+{
+ return ((BE_32(v.bit_val) & d.bit_mask) >> d.bit_shift);
+}
+static inline void
+set_bits32(bits32_t *v, bitdef_t d, uint32_t val)
+{
+ v->bit_val &= BE_32(~d.bit_mask);
+ v->bit_val |= BE_32((val << d.bit_shift) & d.bit_mask);
+}
+static inline uint32_t
+get_bit32(bits32_t v, uint32_t mask)
+{
+ return ((BE_32(v.bit_val) & mask) != 0);
+}
+static inline void
+set_bit32(bits32_t *v, uint32_t mask)
+{
+ v->bit_val |= BE_32(mask);
+}
+static inline void
+clear_bit32(bits32_t *v, uint32_t mask)
+{
+ v->bit_val &= BE_32(~mask);
+}
+static inline bits32_t
+new_bits32(void)
+{
+ /* CSTYLED */
+ return ((bits32_t){ .bit_val = 0 });
+}
+static inline uint32_t
+from_bits32(bits32_t v)
+{
+ return (BE_32(v.bit_val));
+}
+
+static inline uint32_t
+get_bits24(bits24_t v, bitdef_t d)
+{
+ return ((from_be24(v.bit_val) & d.bit_mask) >> d.bit_shift);
+}
+static inline void
+set_bits24(bits24_t *v, bitdef_t d, uint32_t val)
+{
+ uint32_t vv = from_be24(v->bit_val);
+ vv &= ~d.bit_mask;
+ vv |= (val << d.bit_shift) & d.bit_mask;
+ v->bit_val = to_be24(vv);
+}
+static inline uint32_t
+get_bit24(bits24_t v, uint32_t mask)
+{
+ return ((from_be24(v.bit_val) & mask) != 0);
+}
+static inline void
+set_bit24(bits24_t *v, uint32_t mask)
+{
+ v->bit_val = to_be24(from_be24(v->bit_val) | mask);
+}
+static inline void
+clear_bit24(bits24_t *v, uint32_t mask)
+{
+ v->bit_val = to_be24(from_be24(v->bit_val) & ~mask);
+}
+static inline bits24_t
+new_bits24(void)
+{
+ /* CSTYLED */
+ return ((bits24_t){ .bit_val = to_be24(0) });
+}
+static inline uint32_t
+from_bits24(bits24_t v)
+{
+ return (from_be24(v.bit_val));
+}
+
+static inline uint64_t
+get_bits64(bits64_t v, bitdef_t d)
+{
+ return ((BE_64(v.bit_val) & d.bit_mask) >> d.bit_shift);
+}
+static inline void
+set_bits64(bits64_t *v, bitdef_t d, uint64_t val)
+{
+ v->bit_val &= BE_64(~d.bit_mask);
+ v->bit_val |= BE_64((val << d.bit_shift) & d.bit_mask);
+}
+static inline uint64_t
+get_bit64(bits64_t v, uint64_t mask)
+{
+ return ((BE_64(v.bit_val) & mask) != 0);
+}
+static inline void
+set_bit64(bits64_t *v, uint64_t mask)
+{
+ v->bit_val |= BE_64(mask);
+}
+static inline void
+clear_bit64(bits64_t *v, uint64_t mask)
+{
+ v->bit_val &= BE_64(~mask);
+}
+static inline bits64_t
+new_bits64(void)
+{
+ /* CSTYLED */
+ return ((bits64_t){ .bit_val = 0 });
+}
+static inline uint64_t
+from_bits64(bits64_t v)
+{
+ return (BE_64(v.bit_val));
+}
+
+#endif /* _MLXCX_ENDINT_H */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
new file mode 100644
index 0000000000..871c4f30b3
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -0,0 +1,1254 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2020, the University of Queensland
+ */
+
+/*
+ * Mellanox Connect-X 4/5/6 driver.
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/sysmacros.h>
+#include <sys/vlan.h>
+
+#include <sys/pattr.h>
+#include <sys/dlpi.h>
+
+#include <sys/mac_provider.h>
+
+/* Need these for mac_vlan_header_info() */
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
+
+#include <mlxcx.h>
+
+static char *mlxcx_priv_props[] = {
+ NULL
+};
+
+#define MBITS 1000000ULL
+#define GBITS (1000ULL * MBITS)
+
+static uint64_t
+mlxcx_speed_to_bits(mlxcx_eth_proto_t v)
+{
+ switch (v) {
+ case MLXCX_PROTO_SGMII_100BASE:
+ return (100ULL * MBITS);
+ case MLXCX_PROTO_SGMII:
+ case MLXCX_PROTO_1000BASE_KX:
+ return (1000ULL * MBITS);
+ case MLXCX_PROTO_10GBASE_CX4:
+ case MLXCX_PROTO_10GBASE_KX4:
+ case MLXCX_PROTO_10GBASE_KR:
+ case MLXCX_PROTO_10GBASE_CR:
+ case MLXCX_PROTO_10GBASE_SR:
+ case MLXCX_PROTO_10GBASE_ER_LR:
+ return (10ULL * GBITS);
+ case MLXCX_PROTO_40GBASE_CR4:
+ case MLXCX_PROTO_40GBASE_KR4:
+ case MLXCX_PROTO_40GBASE_SR4:
+ case MLXCX_PROTO_40GBASE_LR4_ER4:
+ return (40ULL * GBITS);
+ case MLXCX_PROTO_25GBASE_CR:
+ case MLXCX_PROTO_25GBASE_KR:
+ case MLXCX_PROTO_25GBASE_SR:
+ return (25ULL * GBITS);
+ case MLXCX_PROTO_50GBASE_SR2:
+ case MLXCX_PROTO_50GBASE_CR2:
+ case MLXCX_PROTO_50GBASE_KR2:
+ return (50ULL * GBITS);
+ case MLXCX_PROTO_100GBASE_CR4:
+ case MLXCX_PROTO_100GBASE_SR4:
+ case MLXCX_PROTO_100GBASE_KR4:
+ return (100ULL * GBITS);
+ default:
+ return (0);
+ }
+}
+
+static int
+mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
+ uint64_t *val)
+{
+ int ret = 0;
+ boolean_t ok;
+ mlxcx_register_data_t data;
+ mlxcx_ppcnt_rfc_2863_t *st;
+
+ ASSERT(mutex_owned(&port->mlp_mtx));
+
+ bzero(&data, sizeof (data));
+ data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1;
+ data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_RFC_2863;
+ data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR;
+
+ ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPCNT, &data);
+ if (!ok)
+ return (EIO);
+ st = &data.mlrd_ppcnt.mlrd_ppcnt_rfc_2863;
+
+ switch (stat) {
+ case MAC_STAT_RBYTES:
+ *val = from_be64(st->mlppc_rfc_2863_in_octets);
+ break;
+ case MAC_STAT_MULTIRCV:
+ *val = from_be64(st->mlppc_rfc_2863_in_mcast_pkts);
+ break;
+ case MAC_STAT_BRDCSTRCV:
+ *val = from_be64(st->mlppc_rfc_2863_in_bcast_pkts);
+ break;
+ case MAC_STAT_MULTIXMT:
+ *val = from_be64(st->mlppc_rfc_2863_out_mcast_pkts);
+ break;
+ case MAC_STAT_BRDCSTXMT:
+ *val = from_be64(st->mlppc_rfc_2863_out_bcast_pkts);
+ break;
+ case MAC_STAT_IERRORS:
+ *val = from_be64(st->mlppc_rfc_2863_in_errors);
+ break;
+ case MAC_STAT_UNKNOWNS:
+ *val = from_be64(st->mlppc_rfc_2863_in_unknown_protos);
+ break;
+ case MAC_STAT_OERRORS:
+ *val = from_be64(st->mlppc_rfc_2863_out_errors);
+ break;
+ case MAC_STAT_OBYTES:
+ *val = from_be64(st->mlppc_rfc_2863_out_octets);
+ break;
+ default:
+ ret = ENOTSUP;
+ }
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_stat_ieee_802_3(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
+ uint64_t *val)
+{
+ int ret = 0;
+ boolean_t ok;
+ mlxcx_register_data_t data;
+ mlxcx_ppcnt_ieee_802_3_t *st;
+
+ ASSERT(mutex_owned(&port->mlp_mtx));
+
+ bzero(&data, sizeof (data));
+ data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1;
+ data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_IEEE_802_3;
+ data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR;
+
+ ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_PPCNT, &data);
+ if (!ok)
+ return (EIO);
+ st = &data.mlrd_ppcnt.mlrd_ppcnt_ieee_802_3;
+
+ switch (stat) {
+ case MAC_STAT_IPACKETS:
+ *val = from_be64(st->mlppc_ieee_802_3_frames_rx);
+ break;
+ case MAC_STAT_OPACKETS:
+ *val = from_be64(st->mlppc_ieee_802_3_frames_tx);
+ break;
+ case ETHER_STAT_ALIGN_ERRORS:
+ *val = from_be64(st->mlppc_ieee_802_3_align_err);
+ break;
+ case ETHER_STAT_FCS_ERRORS:
+ *val = from_be64(st->mlppc_ieee_802_3_fcs_err);
+ break;
+ case ETHER_STAT_TOOLONG_ERRORS:
+ *val = from_be64(st->mlppc_ieee_802_3_frame_too_long_err);
+ break;
+ default:
+ ret = ENOTSUP;
+ }
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ int ret = 0;
+
+ mutex_enter(&port->mlp_mtx);
+
+ switch (stat) {
+ case MAC_STAT_IFSPEED:
+ *val = mlxcx_speed_to_bits(port->mlp_oper_proto);
+ break;
+ case ETHER_STAT_LINK_DUPLEX:
+ *val = LINK_DUPLEX_FULL;
+ break;
+ case MAC_STAT_RBYTES:
+ case MAC_STAT_MULTIRCV:
+ case MAC_STAT_BRDCSTRCV:
+ case MAC_STAT_MULTIXMT:
+ case MAC_STAT_BRDCSTXMT:
+ case MAC_STAT_IERRORS:
+ case MAC_STAT_UNKNOWNS:
+ case MAC_STAT_OERRORS:
+ case MAC_STAT_OBYTES:
+ ret = mlxcx_mac_stat_rfc_2863(mlxp, port, stat, val);
+ break;
+ case MAC_STAT_IPACKETS:
+ case MAC_STAT_OPACKETS:
+ case ETHER_STAT_ALIGN_ERRORS:
+ case ETHER_STAT_FCS_ERRORS:
+ case ETHER_STAT_TOOLONG_ERRORS:
+ ret = mlxcx_mac_stat_ieee_802_3(mlxp, port, stat, val);
+ break;
+ case MAC_STAT_NORCVBUF:
+ *val = port->mlp_stats.mlps_rx_drops;
+ break;
+ default:
+ ret = ENOTSUP;
+ }
+
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_led_set(void *arg, mac_led_mode_t mode, uint_t flags)
+{
+ mlxcx_t *mlxp = arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ int ret = 0;
+
+ if (flags != 0) {
+ return (EINVAL);
+ }
+
+ mutex_enter(&port->mlp_mtx);
+
+ switch (mode) {
+ case MAC_LED_DEFAULT:
+ case MAC_LED_OFF:
+ if (!mlxcx_cmd_set_port_led(mlxp, port, 0)) {
+ ret = EIO;
+ break;
+ }
+ break;
+ case MAC_LED_IDENT:
+ if (!mlxcx_cmd_set_port_led(mlxp, port, UINT16_MAX)) {
+ ret = EIO;
+ break;
+ }
+ break;
+ default:
+ ret = ENOTSUP;
+ }
+
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_txr_info(void *arg, uint_t id, mac_transceiver_info_t *infop)
+{
+ mlxcx_t *mlxp = arg;
+ mlxcx_module_status_t st;
+
+ if (!mlxcx_cmd_query_module_status(mlxp, id, &st, NULL))
+ return (EIO);
+
+ if (st != MLXCX_MODULE_UNPLUGGED)
+ mac_transceiver_info_set_present(infop, B_TRUE);
+
+ if (st == MLXCX_MODULE_PLUGGED)
+ mac_transceiver_info_set_usable(infop, B_TRUE);
+
+ return (0);
+}
+
+static int
+mlxcx_mac_txr_read(void *arg, uint_t id, uint_t page, void *vbuf,
+ size_t nbytes, off_t offset, size_t *nread)
+{
+ mlxcx_t *mlxp = arg;
+ mlxcx_register_data_t data;
+ uint8_t *buf = vbuf;
+ boolean_t ok;
+ size_t take, done = 0;
+ uint8_t i2c_addr;
+
+ if (id != 0 || vbuf == NULL || nbytes == 0 || nread == NULL)
+ return (EINVAL);
+
+ if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256))
+ return (EINVAL);
+
+ /*
+ * The PRM is really not very clear about any of this, but it seems
+ * that the i2c_device_addr field in MCIA is the SFP+ spec "page"
+ * number shifted right by 1 bit. They're written in the SFF spec
+ * like "1010000X" so Mellanox just dropped the X.
+ *
+ * This means that if we want page 0xA0, we put 0x50 in the
+ * i2c_device_addr field.
+ *
+ * The "page_number" field in MCIA means something else. Don't ask me
+ * what. FreeBSD leaves it as zero, so we will too!
+ */
+ i2c_addr = page >> 1;
+
+ while (done < nbytes) {
+ take = nbytes - done;
+ if (take > sizeof (data.mlrd_mcia.mlrd_mcia_data))
+ take = sizeof (data.mlrd_mcia.mlrd_mcia_data);
+
+ bzero(&data, sizeof (data));
+ ASSERT3U(id, <=, 0xff);
+ data.mlrd_mcia.mlrd_mcia_module = (uint8_t)id;
+ data.mlrd_mcia.mlrd_mcia_i2c_device_addr = i2c_addr;
+ data.mlrd_mcia.mlrd_mcia_device_addr = to_be16(offset);
+ data.mlrd_mcia.mlrd_mcia_size = to_be16(take);
+
+ ok = mlxcx_cmd_access_register(mlxp,
+ MLXCX_CMD_ACCESS_REGISTER_READ, MLXCX_REG_MCIA, &data);
+ if (!ok) {
+ *nread = 0;
+ return (EIO);
+ }
+
+ if (data.mlrd_mcia.mlrd_mcia_status != MLXCX_MCIA_STATUS_OK) {
+ *nread = 0;
+ return (EIO);
+ }
+
+ bcopy(data.mlrd_mcia.mlrd_mcia_data, &buf[done], take);
+
+ done += take;
+ offset += take;
+ }
+ *nread = done;
+ return (0);
+}
+
+static int
+mlxcx_mac_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
+{
+ mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
+ (void) wq;
+
+ /*
+ * We should add support for using hw flow counters and such to
+ * get per-ring statistics. Not done yet though!
+ */
+
+ switch (stat) {
+ default:
+ *val = 0;
+ return (ENOTSUP);
+ }
+
+ return (0);
+}
+
+static int
+mlxcx_mac_start(void *arg)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ (void) mlxp;
+ return (0);
+}
+
+static void
+mlxcx_mac_stop(void *arg)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ (void) mlxp;
+}
+
+static mblk_t *
+mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
+{
+ mlxcx_work_queue_t *sq = (mlxcx_work_queue_t *)arg;
+ mlxcx_t *mlxp = sq->mlwq_mlx;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_buffer_t *b;
+ mac_header_info_t mhi;
+ mblk_t *kmp, *nmp;
+ uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN];
+ size_t inline_hdrlen, rem, off;
+ uint32_t chkflags = 0;
+ boolean_t ok;
+ size_t take = 0;
+
+ VERIFY(mp->b_next == NULL);
+
+ mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags);
+
+ if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) {
+ /*
+ * We got given a frame without a valid L2 header on it. We
+ * can't really transmit that (mlx parts don't like it), so
+ * we will just drop it on the floor.
+ */
+ freemsg(mp);
+ return (NULL);
+ }
+
+ inline_hdrlen = rem = mhi.mhi_hdrsize;
+
+ kmp = mp;
+ off = 0;
+ while (rem > 0) {
+ const ptrdiff_t sz = MBLKL(kmp);
+ ASSERT3S(sz, >=, 0);
+ ASSERT3U(sz, <=, SIZE_MAX);
+ take = sz;
+ if (take > rem)
+ take = rem;
+ bcopy(kmp->b_rptr, inline_hdrs + off, take);
+ rem -= take;
+ off += take;
+ if (take == sz) {
+ take = 0;
+ kmp = kmp->b_cont;
+ }
+ }
+
+ if (!mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b)) {
+ /*
+ * Something went really wrong, and we probably will never be
+ * able to TX again (all our buffers are broken and DMA is
+ * failing). Drop the packet on the floor -- FMA should be
+ * reporting this error elsewhere.
+ */
+ freemsg(mp);
+ return (NULL);
+ }
+
+ mutex_enter(&sq->mlwq_mtx);
+ VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2);
+ cq = sq->mlwq_cq;
+
+ /*
+ * state is a single int, so read-only access without the CQ lock
+ * should be fine.
+ */
+ if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) {
+ mutex_exit(&sq->mlwq_mtx);
+ mlxcx_buf_return_chain(mlxp, b, B_FALSE);
+ return (NULL);
+ }
+
+ if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ mutex_exit(&sq->mlwq_mtx);
+ mlxcx_buf_return_chain(mlxp, b, B_FALSE);
+ return (NULL);
+ }
+
+ /*
+ * Similar logic here: bufcnt is only manipulated atomically, and
+ * bufhwm is set at startup.
+ */
+ if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm) {
+ atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
+ mutex_exit(&sq->mlwq_mtx);
+ mlxcx_buf_return_chain(mlxp, b, B_TRUE);
+ return (mp);
+ }
+
+ ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen,
+ chkflags, b);
+ if (!ok) {
+ atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
+ mutex_exit(&sq->mlwq_mtx);
+ mlxcx_buf_return_chain(mlxp, b, B_TRUE);
+ return (mp);
+ }
+
+ /*
+ * Now that we've successfully enqueued the rest of the packet,
+ * free any mblks that we cut off while inlining headers.
+ */
+ for (; mp != kmp; mp = nmp) {
+ nmp = mp->b_cont;
+ freeb(mp);
+ }
+
+ mutex_exit(&sq->mlwq_mtx);
+
+ return (NULL);
+}
+
+static int
+mlxcx_mac_setpromisc(void *arg, boolean_t on)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe;
+ mlxcx_flow_table_t *ft;
+ mlxcx_ring_group_t *g;
+ int ret = 0;
+ uint_t idx;
+
+ mutex_enter(&port->mlp_mtx);
+
+ /*
+ * First, do the top-level flow entry on the root flow table for
+ * the port. This catches all traffic that doesn't match any MAC
+ * MAC filters.
+ */
+ ft = port->mlp_rx_flow;
+ mutex_enter(&ft->mlft_mtx);
+ fg = port->mlp_promisc;
+ fe = list_head(&fg->mlfg_entries);
+ if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ ret = EIO;
+ }
+ } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
+ if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
+ ret = EIO;
+ }
+ }
+ mutex_exit(&ft->mlft_mtx);
+
+ /*
+ * If we failed to change the top-level entry, don't bother with
+ * trying the per-group ones.
+ */
+ if (ret != 0) {
+ mutex_exit(&port->mlp_mtx);
+ return (ret);
+ }
+
+ /*
+ * Then, do the per-rx-group flow entries which catch traffic that
+ * matched a MAC filter but failed to match a VLAN filter.
+ */
+ for (idx = 0; idx < mlxp->mlx_rx_ngroups; ++idx) {
+ g = &mlxp->mlx_rx_groups[idx];
+
+ mutex_enter(&g->mlg_mtx);
+
+ ft = g->mlg_rx_vlan_ft;
+ mutex_enter(&ft->mlft_mtx);
+
+ fg = g->mlg_rx_vlan_promisc_fg;
+ fe = list_head(&fg->mlfg_entries);
+ if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ ret = EIO;
+ }
+ } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
+ if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
+ ret = EIO;
+ }
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ }
+
+ mutex_exit(&port->mlp_mtx);
+ return (ret);
+}
+
+static int
+mlxcx_mac_multicast(void *arg, boolean_t add, const uint8_t *addr)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ mlxcx_ring_group_t *g = &mlxp->mlx_rx_groups[0];
+ int ret = 0;
+
+ mutex_enter(&port->mlp_mtx);
+ mutex_enter(&g->mlg_mtx);
+ if (add) {
+ if (!mlxcx_add_umcast_entry(mlxp, port, g, addr)) {
+ ret = EIO;
+ }
+ } else {
+ if (!mlxcx_remove_umcast_entry(mlxp, port, g, addr)) {
+ ret = EIO;
+ }
+ }
+ mutex_exit(&g->mlg_mtx);
+ mutex_exit(&port->mlp_mtx);
+ return (ret);
+}
+
+static int
+mlxcx_group_add_mac(void *arg, const uint8_t *mac_addr)
+{
+ mlxcx_ring_group_t *g = arg;
+ mlxcx_t *mlxp = g->mlg_mlx;
+ mlxcx_port_t *port = g->mlg_port;
+ int ret = 0;
+
+ mutex_enter(&port->mlp_mtx);
+ mutex_enter(&g->mlg_mtx);
+ if (!mlxcx_add_umcast_entry(mlxp, port, g, mac_addr)) {
+ ret = EIO;
+ }
+ mutex_exit(&g->mlg_mtx);
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+/*
+ * Support for VLAN steering into groups is not yet available in upstream
+ * illumos.
+ */
+#if defined(MAC_VLAN_UNTAGGED)
+
+static int
+mlxcx_group_add_vlan(mac_group_driver_t gh, uint16_t vid)
+{
+ mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
+ mlxcx_t *mlxp = g->mlg_mlx;
+ int ret = 0;
+ boolean_t tagged = B_TRUE;
+
+ if (vid == MAC_VLAN_UNTAGGED) {
+ vid = 0;
+ tagged = B_FALSE;
+ }
+
+ mutex_enter(&g->mlg_mtx);
+ if (!mlxcx_add_vlan_entry(mlxp, g, tagged, vid)) {
+ ret = EIO;
+ }
+ mutex_exit(&g->mlg_mtx);
+
+ return (ret);
+}
+
+static int
+mlxcx_group_remove_vlan(mac_group_driver_t gh, uint16_t vid)
+{
+ mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
+ mlxcx_t *mlxp = g->mlg_mlx;
+ int ret = 0;
+ boolean_t tagged = B_TRUE;
+
+ if (vid == MAC_VLAN_UNTAGGED) {
+ vid = 0;
+ tagged = B_FALSE;
+ }
+
+ mutex_enter(&g->mlg_mtx);
+ if (!mlxcx_remove_vlan_entry(mlxp, g, tagged, vid)) {
+ ret = EIO;
+ }
+ mutex_exit(&g->mlg_mtx);
+
+ return (ret);
+}
+
+#endif /* MAC_VLAN_UNTAGGED */
+
+static int
+mlxcx_group_remove_mac(void *arg, const uint8_t *mac_addr)
+{
+ mlxcx_ring_group_t *g = arg;
+ mlxcx_t *mlxp = g->mlg_mlx;
+ mlxcx_port_t *port = g->mlg_port;
+ int ret = 0;
+
+ mutex_enter(&port->mlp_mtx);
+ mutex_enter(&g->mlg_mtx);
+ if (!mlxcx_remove_umcast_entry(mlxp, port, g, mac_addr)) {
+ ret = EIO;
+ }
+ mutex_exit(&g->mlg_mtx);
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_ring_start(mac_ring_driver_t rh, uint64_t gen_num)
+{
+ mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
+ mlxcx_completion_queue_t *cq = wq->mlwq_cq;
+ mlxcx_ring_group_t *g = wq->mlwq_group;
+ mlxcx_t *mlxp = wq->mlwq_mlx;
+
+ ASSERT(cq != NULL);
+ ASSERT(g != NULL);
+
+ ASSERT(wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ ||
+ wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ);
+ if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
+ !mlxcx_tx_ring_start(mlxp, g, wq))
+ return (EIO);
+ if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
+ !mlxcx_rx_ring_start(mlxp, g, wq))
+ return (EIO);
+
+ mutex_enter(&cq->mlcq_mtx);
+ cq->mlcq_mac_gen = gen_num;
+ mutex_exit(&cq->mlcq_mtx);
+
+ return (0);
+}
+
+static void
+mlxcx_mac_ring_stop(mac_ring_driver_t rh)
+{
+ mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
+ mlxcx_completion_queue_t *cq = wq->mlwq_cq;
+ mlxcx_t *mlxp = wq->mlwq_mlx;
+ mlxcx_buf_shard_t *s;
+ mlxcx_buffer_t *buf;
+
+ mutex_enter(&cq->mlcq_mtx);
+ mutex_enter(&wq->mlwq_mtx);
+ if (wq->mlwq_state & MLXCX_WQ_STARTED) {
+ if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
+ !mlxcx_cmd_stop_rq(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ return;
+ }
+ if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
+ !mlxcx_cmd_stop_sq(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ return;
+ }
+ }
+ ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+
+ if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ /* Return any outstanding buffers to the free pool. */
+ while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ }
+ mutex_enter(&cq->mlcq_bufbmtx);
+ while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) {
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ }
+ mutex_exit(&cq->mlcq_bufbmtx);
+ cq->mlcq_bufcnt = 0;
+
+ s = wq->mlwq_bufs;
+ mutex_enter(&s->mlbs_mtx);
+ while (!list_is_empty(&s->mlbs_busy))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ while ((buf = list_head(&s->mlbs_free)) != NULL) {
+ mlxcx_buf_destroy(mlxp, buf);
+ }
+ mutex_exit(&s->mlbs_mtx);
+
+ s = wq->mlwq_foreign_bufs;
+ if (s != NULL) {
+ mutex_enter(&s->mlbs_mtx);
+ while (!list_is_empty(&s->mlbs_busy))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ while ((buf = list_head(&s->mlbs_free)) != NULL) {
+ mlxcx_buf_destroy(mlxp, buf);
+ }
+ mutex_exit(&s->mlbs_mtx);
+ }
+
+ wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ }
+ ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
+
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+}
+
+static int
+mlxcx_mac_group_start(mac_group_driver_t gh)
+{
+ mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
+ mlxcx_t *mlxp = g->mlg_mlx;
+
+ VERIFY3S(g->mlg_type, ==, MLXCX_GROUP_RX);
+ ASSERT(mlxp != NULL);
+
+ if (g->mlg_state & MLXCX_GROUP_RUNNING)
+ return (0);
+
+ if (!mlxcx_rx_group_start(mlxp, g))
+ return (EIO);
+
+ return (0);
+}
+
+static void
+mlxcx_mac_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+ const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_ring_group_t *g;
+ mlxcx_work_queue_t *wq;
+ mac_intr_t *mintr = &infop->mri_intr;
+
+ if (rtype != MAC_RING_TYPE_TX)
+ return;
+ ASSERT3S(group_index, ==, -1);
+
+ g = &mlxp->mlx_tx_groups[0];
+ ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
+ mutex_enter(&g->mlg_mtx);
+
+ ASSERT3S(ring_index, >=, 0);
+ ASSERT3S(ring_index, <, g->mlg_nwqs);
+
+ wq = &g->mlg_wqs[ring_index];
+
+ wq->mlwq_cq->mlcq_mac_hdl = rh;
+
+ infop->mri_driver = (mac_ring_driver_t)wq;
+ infop->mri_start = mlxcx_mac_ring_start;
+ infop->mri_stop = mlxcx_mac_ring_stop;
+ infop->mri_tx = mlxcx_mac_ring_tx;
+ infop->mri_stat = mlxcx_mac_ring_stat;
+
+ mintr->mi_ddi_handle = mlxp->mlx_intr_handles[
+ wq->mlwq_cq->mlcq_eq->mleq_intr_index];
+
+ mutex_exit(&g->mlg_mtx);
+}
+
+static int
+mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh)
+{
+ mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh;
+ mlxcx_event_queue_t *eq = cq->mlcq_eq;
+ mlxcx_t *mlxp = cq->mlcq_mlx;
+
+ /*
+ * We are going to call mlxcx_arm_cq() here, so we take the EQ lock
+ * as well as the CQ one to make sure we don't race against
+ * mlxcx_intr_n().
+ */
+ mutex_enter(&eq->mleq_mtx);
+ mutex_enter(&cq->mlcq_mtx);
+ if (cq->mlcq_state & MLXCX_CQ_POLLING) {
+ cq->mlcq_state &= ~MLXCX_CQ_POLLING;
+ if (!(cq->mlcq_state & MLXCX_CQ_ARMED))
+ mlxcx_arm_cq(mlxp, cq);
+ }
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&eq->mleq_mtx);
+
+ return (0);
+}
+
+static int
+mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh)
+{
+ mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh;
+
+ atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_POLLING);
+ mutex_enter(&cq->mlcq_mtx);
+ VERIFY(cq->mlcq_state & MLXCX_CQ_POLLING);
+ mutex_exit(&cq->mlcq_mtx);
+
+ return (0);
+}
+
+static mblk_t *
+mlxcx_mac_ring_rx_poll(void *arg, int poll_bytes)
+{
+ mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)arg;
+ mlxcx_completion_queue_t *cq = wq->mlwq_cq;
+ mlxcx_t *mlxp = wq->mlwq_mlx;
+ mblk_t *mp;
+
+ ASSERT(cq != NULL);
+ ASSERT3S(poll_bytes, >, 0);
+ if (poll_bytes == 0)
+ return (NULL);
+
+ mutex_enter(&cq->mlcq_mtx);
+ mp = mlxcx_rx_poll(mlxp, cq, poll_bytes);
+ mutex_exit(&cq->mlcq_mtx);
+
+ return (mp);
+}
+
+static void
+mlxcx_mac_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+ const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_ring_group_t *g;
+ mlxcx_work_queue_t *wq;
+ mac_intr_t *mintr = &infop->mri_intr;
+
+ if (rtype != MAC_RING_TYPE_RX)
+ return;
+ ASSERT3S(group_index, >=, 0);
+ ASSERT3S(group_index, <, mlxp->mlx_rx_ngroups);
+
+ g = &mlxp->mlx_rx_groups[group_index];
+ ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
+ mutex_enter(&g->mlg_mtx);
+
+ ASSERT3S(ring_index, >=, 0);
+ ASSERT3S(ring_index, <, g->mlg_nwqs);
+
+ ASSERT(g->mlg_state & MLXCX_GROUP_WQS);
+ wq = &g->mlg_wqs[ring_index];
+
+ wq->mlwq_cq->mlcq_mac_hdl = rh;
+
+ infop->mri_driver = (mac_ring_driver_t)wq;
+ infop->mri_start = mlxcx_mac_ring_start;
+ infop->mri_stop = mlxcx_mac_ring_stop;
+ infop->mri_poll = mlxcx_mac_ring_rx_poll;
+ infop->mri_stat = mlxcx_mac_ring_stat;
+
+ mintr->mi_handle = (mac_intr_handle_t)wq->mlwq_cq;
+ mintr->mi_enable = mlxcx_mac_ring_intr_enable;
+ mintr->mi_disable = mlxcx_mac_ring_intr_disable;
+
+ mintr->mi_ddi_handle = mlxp->mlx_intr_handles[
+ wq->mlwq_cq->mlcq_eq->mleq_intr_index];
+
+ mutex_exit(&g->mlg_mtx);
+}
+
+static void
+mlxcx_mac_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
+ mac_group_info_t *infop, mac_group_handle_t gh)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_ring_group_t *g;
+
+ if (rtype != MAC_RING_TYPE_RX)
+ return;
+
+ ASSERT3S(index, >=, 0);
+ ASSERT3S(index, <, mlxp->mlx_rx_ngroups);
+ g = &mlxp->mlx_rx_groups[index];
+ ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
+
+ g->mlg_mac_hdl = gh;
+
+ infop->mgi_driver = (mac_group_driver_t)g;
+ infop->mgi_start = mlxcx_mac_group_start;
+ infop->mgi_stop = NULL;
+ infop->mgi_addmac = mlxcx_group_add_mac;
+ infop->mgi_remmac = mlxcx_group_remove_mac;
+#if defined(MAC_VLAN_UNTAGGED)
+ infop->mgi_addvlan = mlxcx_group_add_vlan;
+ infop->mgi_remvlan = mlxcx_group_remove_vlan;
+#endif /* MAC_VLAN_UNTAGGED */
+
+ infop->mgi_count = g->mlg_nwqs;
+}
+
+static boolean_t
+mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mac_capab_rings_t *cap_rings;
+ mac_capab_led_t *cap_leds;
+ mac_capab_transceiver_t *cap_txr;
+ uint_t i, n = 0;
+
+ switch (cap) {
+
+ case MAC_CAPAB_RINGS:
+ cap_rings = cap_data;
+ cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+ switch (cap_rings->mr_type) {
+ case MAC_RING_TYPE_TX:
+ cap_rings->mr_gnum = 0;
+ cap_rings->mr_rnum = mlxp->mlx_tx_groups[0].mlg_nwqs;
+ cap_rings->mr_rget = mlxcx_mac_fill_tx_ring;
+ cap_rings->mr_gget = NULL;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ break;
+ case MAC_RING_TYPE_RX:
+ cap_rings->mr_gnum = mlxp->mlx_rx_ngroups;
+ for (i = 0; i < mlxp->mlx_rx_ngroups; ++i)
+ n += mlxp->mlx_rx_groups[i].mlg_nwqs;
+ cap_rings->mr_rnum = n;
+ cap_rings->mr_rget = mlxcx_mac_fill_rx_ring;
+ cap_rings->mr_gget = mlxcx_mac_fill_rx_group;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ break;
+ default:
+ return (B_FALSE);
+ }
+ break;
+
+ case MAC_CAPAB_HCKSUM:
+ if (mlxp->mlx_caps->mlc_checksum) {
+ *(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 |
+ HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM;
+ }
+ break;
+
+ case MAC_CAPAB_LED:
+ cap_leds = cap_data;
+
+ cap_leds->mcl_flags = 0;
+ cap_leds->mcl_modes = MAC_LED_DEFAULT | MAC_LED_OFF |
+ MAC_LED_IDENT;
+ cap_leds->mcl_set = mlxcx_mac_led_set;
+ break;
+
+ case MAC_CAPAB_TRANSCEIVER:
+ cap_txr = cap_data;
+
+ cap_txr->mct_flags = 0;
+ cap_txr->mct_ntransceivers = 1;
+ cap_txr->mct_info = mlxcx_mac_txr_info;
+ cap_txr->mct_read = mlxcx_mac_txr_read;
+ break;
+
+ default:
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+
+ mutex_enter(&port->mlp_mtx);
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ case MAC_PROP_SPEED:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ break;
+ case MAC_PROP_MTU:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_range_uint32(prh, MLXCX_MTU_OFFSET,
+ port->mlp_max_mtu);
+ mac_prop_info_set_default_uint32(prh,
+ port->mlp_mtu - MLXCX_MTU_OFFSET);
+ break;
+ case MAC_PROP_AUTONEG:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh, 1);
+ break;
+ default:
+ break;
+ }
+
+ mutex_exit(&port->mlp_mtx);
+}
+
+static int
+mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ int ret = 0;
+ uint32_t new_mtu, new_hw_mtu, old_mtu;
+ mlxcx_buf_shard_t *sh;
+ boolean_t allocd = B_FALSE;
+
+ mutex_enter(&port->mlp_mtx);
+
+ switch (pr_num) {
+ case MAC_PROP_MTU:
+ bcopy(pr_val, &new_mtu, sizeof (new_mtu));
+ new_hw_mtu = new_mtu + MLXCX_MTU_OFFSET;
+ if (new_hw_mtu == port->mlp_mtu)
+ break;
+ if (new_hw_mtu > port->mlp_max_mtu) {
+ ret = EINVAL;
+ break;
+ }
+ sh = list_head(&mlxp->mlx_buf_shards);
+ for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
+ mutex_enter(&sh->mlbs_mtx);
+ if (!list_is_empty(&sh->mlbs_free) ||
+ !list_is_empty(&sh->mlbs_busy)) {
+ allocd = B_TRUE;
+ mutex_exit(&sh->mlbs_mtx);
+ break;
+ }
+ mutex_exit(&sh->mlbs_mtx);
+ }
+ if (allocd) {
+ ret = EBUSY;
+ break;
+ }
+ old_mtu = port->mlp_mtu;
+ ret = mac_maxsdu_update(mlxp->mlx_mac_hdl, new_mtu);
+ if (ret != 0)
+ break;
+ port->mlp_mtu = new_hw_mtu;
+ if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, port,
+ MLXCX_MODIFY_NIC_VPORT_CTX_MTU)) {
+ port->mlp_mtu = old_mtu;
+ (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu);
+ ret = EIO;
+ break;
+ }
+ if (!mlxcx_cmd_set_port_mtu(mlxp, port)) {
+ port->mlp_mtu = old_mtu;
+ (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu);
+ ret = EIO;
+ break;
+ }
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+static int
+mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_port_t *port = &mlxp->mlx_ports[0];
+ uint64_t speed;
+ int ret = 0;
+
+ mutex_enter(&port->mlp_mtx);
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ if (pr_valsize < sizeof (link_duplex_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ /* connectx parts only support full duplex */
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL;
+ break;
+ case MAC_PROP_SPEED:
+ if (pr_valsize < sizeof (uint64_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ speed = mlxcx_speed_to_bits(port->mlp_oper_proto);
+ bcopy(&speed, pr_val, sizeof (speed));
+ break;
+ case MAC_PROP_STATUS:
+ if (pr_valsize < sizeof (link_state_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ switch (port->mlp_oper_status) {
+ case MLXCX_PORT_STATUS_UP:
+ case MLXCX_PORT_STATUS_UP_ONCE:
+ *(link_state_t *)pr_val = LINK_STATE_UP;
+ break;
+ case MLXCX_PORT_STATUS_DOWN:
+ *(link_state_t *)pr_val = LINK_STATE_DOWN;
+ break;
+ default:
+ *(link_state_t *)pr_val = LINK_STATE_UNKNOWN;
+ }
+ break;
+ case MAC_PROP_AUTONEG:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(uint8_t *)pr_val = port->mlp_autoneg;
+ break;
+ case MAC_PROP_MTU:
+ if (pr_valsize < sizeof (uint32_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ *(uint32_t *)pr_val = port->mlp_mtu - MLXCX_MTU_OFFSET;
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+
+ mutex_exit(&port->mlp_mtx);
+
+ return (ret);
+}
+
+#define MLXCX_MAC_CALLBACK_FLAGS \
+ (MC_GETCAPAB | MC_GETPROP | MC_PROPINFO | MC_SETPROP)
+
+static mac_callbacks_t mlxcx_mac_callbacks = {
+ .mc_callbacks = MLXCX_MAC_CALLBACK_FLAGS,
+ .mc_getstat = mlxcx_mac_stat,
+ .mc_start = mlxcx_mac_start,
+ .mc_stop = mlxcx_mac_stop,
+ .mc_setpromisc = mlxcx_mac_setpromisc,
+ .mc_multicst = mlxcx_mac_multicast,
+ .mc_ioctl = NULL,
+ .mc_getcapab = mlxcx_mac_getcapab,
+ .mc_setprop = mlxcx_mac_setprop,
+ .mc_getprop = mlxcx_mac_getprop,
+ .mc_propinfo = mlxcx_mac_propinfo,
+ .mc_tx = NULL,
+ .mc_unicst = NULL,
+};
+
+boolean_t
+mlxcx_register_mac(mlxcx_t *mlxp)
+{
+ mac_register_t *mac = mac_alloc(MAC_VERSION);
+ mlxcx_port_t *port;
+ int ret;
+
+ if (mac == NULL)
+ return (B_FALSE);
+
+ VERIFY3U(mlxp->mlx_nports, ==, 1);
+ port = &mlxp->mlx_ports[0];
+
+ mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ mac->m_driver = mlxp;
+ mac->m_dip = mlxp->mlx_dip;
+ mac->m_src_addr = port->mlp_mac_address;
+ mac->m_callbacks = &mlxcx_mac_callbacks;
+ mac->m_min_sdu = MLXCX_MTU_OFFSET;
+ mac->m_max_sdu = port->mlp_mtu - MLXCX_MTU_OFFSET;
+ mac->m_margin = VLAN_TAGSZ;
+ mac->m_priv_props = mlxcx_priv_props;
+ mac->m_v12n = MAC_VIRT_LEVEL1;
+
+ ret = mac_register(mac, &mlxp->mlx_mac_hdl);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "mac_register() returned %d", ret);
+ }
+ mac_free(mac);
+
+ mlxcx_update_link_state(mlxp, port);
+
+ return (ret == 0);
+}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
new file mode 100644
index 0000000000..0516f86d6b
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -0,0 +1,1010 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2020, the University of Queensland
+ */
+
+/*
+ * Mellanox Connect-X 4/5/6 driver.
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/sysmacros.h>
+
+#include <sys/mac_provider.h>
+
+#include <mlxcx.h>
+
+void
+mlxcx_intr_teardown(mlxcx_t *mlxp)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < mlxp->mlx_intr_count; ++i) {
+ mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[i];
+ mutex_enter(&mleq->mleq_mtx);
+ VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC);
+ if (mleq->mleq_state & MLXCX_EQ_CREATED)
+ VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED);
+ if (i != 0) {
+ VERIFY(avl_is_empty(&mleq->mleq_cqs));
+ avl_destroy(&mleq->mleq_cqs);
+ }
+ mutex_exit(&mleq->mleq_mtx);
+ (void) ddi_intr_disable(mlxp->mlx_intr_handles[i]);
+ (void) ddi_intr_remove_handler(mlxp->mlx_intr_handles[i]);
+ ret = ddi_intr_free(mlxp->mlx_intr_handles[i]);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "failed to free interrupt %d: %d",
+ i, ret);
+ }
+ mutex_destroy(&mleq->mleq_mtx);
+ }
+ kmem_free(mlxp->mlx_intr_handles, mlxp->mlx_intr_size);
+ kmem_free(mlxp->mlx_eqs, mlxp->mlx_eqs_size);
+ mlxp->mlx_intr_handles = NULL;
+ mlxp->mlx_eqs = NULL;
+}
+
+/*
+ * Get the next SW-owned entry on the event queue, or NULL if we reach the end.
+ */
+static mlxcx_eventq_ent_t *
+mlxcx_eq_next(mlxcx_event_queue_t *mleq)
+{
+ mlxcx_eventq_ent_t *ent;
+ ddi_fm_error_t err;
+ uint_t ci;
+ const uint_t swowner = ((mleq->mleq_cc >> mleq->mleq_entshift) & 1);
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
+
+ /* mleq_nents is always a power of 2 */
+ ci = mleq->mleq_cc & (mleq->mleq_nents - 1);
+
+ ent = &mleq->mleq_ent[ci];
+ VERIFY0(ddi_dma_sync(mleq->mleq_dma.mxdb_dma_handle,
+ (uintptr_t)ent - (uintptr_t)mleq->mleq_ent,
+ sizeof (mlxcx_eventq_ent_t), DDI_DMA_SYNC_FORCPU));
+ ddi_fm_dma_err_get(mleq->mleq_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status == DDI_FM_OK && (ent->mleqe_owner & 1) == swowner) {
+ /* The PRM says we have to membar here, so we're doing it */
+ membar_consumer();
+ ++mleq->mleq_cc;
+ return (ent);
+ }
+ /*
+ * In the case of a DMA error, we should re-arm this EQ and then come
+ * back and try again when the device wakes us back up.
+ *
+ * Hopefully the fault will be gone by then.
+ */
+ ddi_fm_dma_err_clear(mleq->mleq_dma.mxdb_dma_handle, DDI_FME_VERSION);
+
+ return (NULL);
+}
+
+void
+mlxcx_arm_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ uint_t try = 0;
+ ddi_fm_error_t err;
+ bits32_t v = new_bits32();
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_POLLING);
+
+ mleq->mleq_state |= MLXCX_EQ_ARMED;
+ mleq->mleq_cc_armed = mleq->mleq_cc;
+
+ set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num);
+ set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc);
+
+retry:
+ mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_ARM,
+ from_bits32(v));
+ ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status == DDI_FM_OK)
+ return;
+ if (try++ < mlxcx_doorbell_tries) {
+ ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
+ goto retry;
+ }
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+}
+
+static void
+mlxcx_update_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
+{
+ bits32_t v = new_bits32();
+ ddi_fm_error_t err;
+
+ ASSERT(mutex_owned(&mleq->mleq_mtx));
+ ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
+ ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED);
+
+ set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num);
+ set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc);
+
+ mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_NOARM,
+ from_bits32(v));
+ ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
+ DDI_FME_VERSION);
+ ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
+ /*
+ * Ignore the error, if it's still happening when we try to re-arm the
+ * EQ, we will note the impact then.
+ */
+}
+
+static mlxcx_completionq_ent_t *
+mlxcx_cq_next(mlxcx_completion_queue_t *mlcq)
+{
+ mlxcx_completionq_ent_t *ent;
+ ddi_fm_error_t err;
+ uint_t ci;
+ const uint_t swowner = ((mlcq->mlcq_cc >> mlcq->mlcq_entshift) & 1);
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED);
+ ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED);
+
+ /* mlcq_nents is always a power of 2 */
+ ci = mlcq->mlcq_cc & (mlcq->mlcq_nents - 1);
+
+ ent = &mlcq->mlcq_ent[ci];
+ VERIFY0(ddi_dma_sync(mlcq->mlcq_dma.mxdb_dma_handle,
+ (uintptr_t)ent - (uintptr_t)mlcq->mlcq_ent,
+ sizeof (mlxcx_completionq_ent_t), DDI_DMA_SYNC_FORCPU));
+ ddi_fm_dma_err_get(mlcq->mlcq_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status == DDI_FM_OK && (ent->mlcqe_owner & 1) == swowner) {
+ /* The PRM says we have to membar here, so we're doing it */
+ membar_consumer();
+ ++mlcq->mlcq_cc;
+ return (ent);
+ }
+ ddi_fm_dma_err_clear(mlcq->mlcq_dma.mxdb_dma_handle, DDI_FME_VERSION);
+
+ return (NULL);
+}
+
+void
+mlxcx_arm_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ bits32_t dbval = new_bits32();
+ uint64_t udbval;
+ ddi_fm_error_t err;
+ uint_t try = 0;
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED);
+ ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED);
+
+ if (mlcq->mlcq_state & MLXCX_CQ_ARMED)
+ ASSERT3U(mlcq->mlcq_ec, >, mlcq->mlcq_ec_armed);
+
+ if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ return;
+
+ mlcq->mlcq_state |= MLXCX_CQ_ARMED;
+ mlcq->mlcq_cc_armed = mlcq->mlcq_cc;
+ mlcq->mlcq_ec_armed = mlcq->mlcq_ec;
+
+ set_bits32(&dbval, MLXCX_CQ_ARM_SEQ, mlcq->mlcq_ec);
+ set_bits32(&dbval, MLXCX_CQ_ARM_CI, mlcq->mlcq_cc);
+
+ udbval = (uint64_t)from_bits32(dbval) << 32;
+ udbval |= mlcq->mlcq_num & 0xffffff;
+
+ mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc);
+ mlcq->mlcq_doorbell->mlcqd_arm_ci = dbval;
+
+retry:
+ MLXCX_DMA_SYNC(mlcq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(mlcq->mlcq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ if (try++ < mlxcx_doorbell_tries) {
+ ddi_fm_dma_err_clear(
+ mlcq->mlcq_doorbell_dma.mxdb_dma_handle,
+ DDI_FME_VERSION);
+ goto retry;
+ } else {
+ goto err;
+ }
+ }
+
+ mlxcx_uar_put64(mlxp, mlcq->mlcq_uar, MLXCX_UAR_CQ_ARM, udbval);
+ ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status == DDI_FM_OK)
+ return;
+ if (try++ < mlxcx_doorbell_tries) {
+ ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
+ goto retry;
+ }
+
+err:
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+}
+
+const char *
+mlxcx_event_name(mlxcx_event_t evt)
+{
+ switch (evt) {
+ case MLXCX_EVENT_COMPLETION:
+ return ("COMPLETION");
+ case MLXCX_EVENT_PATH_MIGRATED:
+ return ("PATH_MIGRATED");
+ case MLXCX_EVENT_COMM_ESTABLISH:
+ return ("COMM_ESTABLISH");
+ case MLXCX_EVENT_SENDQ_DRAIN:
+ return ("SENDQ_DRAIN");
+ case MLXCX_EVENT_LAST_WQE:
+ return ("LAST_WQE");
+ case MLXCX_EVENT_SRQ_LIMIT:
+ return ("SRQ_LIMIT");
+ case MLXCX_EVENT_DCT_ALL_CLOSED:
+ return ("DCT_ALL_CLOSED");
+ case MLXCX_EVENT_DCT_ACCKEY_VIOL:
+ return ("DCT_ACCKEY_VIOL");
+ case MLXCX_EVENT_CQ_ERROR:
+ return ("CQ_ERROR");
+ case MLXCX_EVENT_WQ_CATASTROPHE:
+ return ("WQ_CATASTROPHE");
+ case MLXCX_EVENT_PATH_MIGRATE_FAIL:
+ return ("PATH_MIGRATE_FAIL");
+ case MLXCX_EVENT_PAGE_FAULT:
+ return ("PAGE_FAULT");
+ case MLXCX_EVENT_WQ_INVALID_REQ:
+ return ("WQ_INVALID_REQ");
+ case MLXCX_EVENT_WQ_ACCESS_VIOL:
+ return ("WQ_ACCESS_VIOL");
+ case MLXCX_EVENT_SRQ_CATASTROPHE:
+ return ("SRQ_CATASTROPHE");
+ case MLXCX_EVENT_INTERNAL_ERROR:
+ return ("INTERNAL_ERROR");
+ case MLXCX_EVENT_PORT_STATE:
+ return ("PORT_STATE");
+ case MLXCX_EVENT_GPIO:
+ return ("GPIO");
+ case MLXCX_EVENT_PORT_MODULE:
+ return ("PORT_MODULE");
+ case MLXCX_EVENT_TEMP_WARNING:
+ return ("TEMP_WARNING");
+ case MLXCX_EVENT_REMOTE_CONFIG:
+ return ("REMOTE_CONFIG");
+ case MLXCX_EVENT_DCBX_CHANGE:
+ return ("DCBX_CHANGE");
+ case MLXCX_EVENT_DOORBELL_CONGEST:
+ return ("DOORBELL_CONGEST");
+ case MLXCX_EVENT_STALL_VL:
+ return ("STALL_VL");
+ case MLXCX_EVENT_CMD_COMPLETION:
+ return ("CMD_COMPLETION");
+ case MLXCX_EVENT_PAGE_REQUEST:
+ return ("PAGE_REQUEST");
+ case MLXCX_EVENT_NIC_VPORT:
+ return ("NIC_VPORT");
+ case MLXCX_EVENT_EC_PARAMS_CHANGE:
+ return ("EC_PARAMS_CHANGE");
+ case MLXCX_EVENT_XRQ_ERROR:
+ return ("XRQ_ERROR");
+ }
+ return ("UNKNOWN");
+}
+
+/* Should be called only when link state has changed. */
+void
+mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
+{
+ link_state_t ls;
+
+ mutex_enter(&port->mlp_mtx);
+ (void) mlxcx_cmd_query_port_status(mlxp, port);
+ (void) mlxcx_cmd_query_port_speed(mlxp, port);
+
+ switch (port->mlp_oper_status) {
+ case MLXCX_PORT_STATUS_UP:
+ case MLXCX_PORT_STATUS_UP_ONCE:
+ ls = LINK_STATE_UP;
+ break;
+ case MLXCX_PORT_STATUS_DOWN:
+ ls = LINK_STATE_DOWN;
+ break;
+ default:
+ ls = LINK_STATE_UNKNOWN;
+ }
+ mac_link_update(mlxp->mlx_mac_hdl, ls);
+
+ mutex_exit(&port->mlp_mtx);
+}
+
+static void
+mlxcx_give_pages_once(mlxcx_t *mlxp, size_t npages)
+{
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ mlxcx_dev_page_t *mdp;
+ int32_t togive;
+ mlxcx_dev_page_t *pages[MLXCX_MANAGE_PAGES_MAX_PAGES];
+ uint_t i;
+ const ddi_dma_cookie_t *ck;
+
+ togive = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
+
+ for (i = 0; i < togive; i++) {
+ mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP);
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_page_attr(mlxp, &attr);
+ if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc,
+ B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) {
+ mlxcx_warn(mlxp, "failed to allocate 4k page %u/%u", i,
+ togive);
+ goto cleanup_npages;
+ }
+ ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma);
+ mdp->mxdp_pa = ck->dmac_laddress;
+ pages[i] = mdp;
+ }
+
+ mutex_enter(&mlxp->mlx_pagemtx);
+
+ if (!mlxcx_cmd_give_pages(mlxp,
+ MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, togive, pages)) {
+ mlxcx_warn(mlxp, "!hardware refused our gift of %u "
+ "pages!", togive);
+ goto cleanup_npages;
+ }
+
+ for (i = 0; i < togive; i++) {
+ avl_add(&mlxp->mlx_pages, pages[i]);
+ }
+ mlxp->mlx_npages += togive;
+ mutex_exit(&mlxp->mlx_pagemtx);
+
+ return;
+
+cleanup_npages:
+ for (i = 0; i < togive; i++) {
+ mdp = pages[i];
+ mlxcx_dma_free(&mdp->mxdp_dma);
+ kmem_free(mdp, sizeof (mlxcx_dev_page_t));
+ }
+ /* Tell the hardware we had an allocation failure. */
+ (void) mlxcx_cmd_give_pages(mlxp, MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL,
+ 0, NULL);
+ mutex_exit(&mlxp->mlx_pagemtx);
+}
+
+static void
+mlxcx_take_pages_once(mlxcx_t *mlxp, size_t npages)
+{
+ uint_t i;
+ int32_t req, ret;
+ uint64_t pas[MLXCX_MANAGE_PAGES_MAX_PAGES];
+ mlxcx_dev_page_t *mdp, probe;
+
+ mutex_enter(&mlxp->mlx_pagemtx);
+
+ ASSERT0(avl_is_empty(&mlxp->mlx_pages));
+ req = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
+
+ if (!mlxcx_cmd_return_pages(mlxp, req, pas, &ret)) {
+ return;
+ }
+
+ for (i = 0; i < ret; i++) {
+ bzero(&probe, sizeof (probe));
+ probe.mxdp_pa = pas[i];
+
+ mdp = avl_find(&mlxp->mlx_pages, &probe, NULL);
+
+ if (mdp != NULL) {
+ avl_remove(&mlxp->mlx_pages, mdp);
+ mlxp->mlx_npages--;
+ mlxcx_dma_free(&mdp->mxdp_dma);
+ kmem_free(mdp, sizeof (mlxcx_dev_page_t));
+ } else {
+ mlxcx_warn(mlxp, "hardware returned a page "
+ "with PA 0x%" PRIx64 " but we have no "
+ "record of giving out such a page", pas[i]);
+ }
+ }
+
+ mutex_exit(&mlxp->mlx_pagemtx);
+}
+
+static const char *
+mlxcx_module_error_string(mlxcx_module_error_type_t err)
+{
+ switch (err) {
+ case MLXCX_MODULE_ERR_POWER_BUDGET:
+ return ("POWER_BUDGET");
+ case MLXCX_MODULE_ERR_LONG_RANGE:
+ return ("LONG_RANGE");
+ case MLXCX_MODULE_ERR_BUS_STUCK:
+ return ("BUS_STUCK");
+ case MLXCX_MODULE_ERR_NO_EEPROM:
+ return ("NO_EEPROM");
+ case MLXCX_MODULE_ERR_ENFORCEMENT:
+ return ("ENFORCEMENT");
+ case MLXCX_MODULE_ERR_UNKNOWN_IDENT:
+ return ("UNKNOWN_IDENT");
+ case MLXCX_MODULE_ERR_HIGH_TEMP:
+ return ("HIGH_TEMP");
+ case MLXCX_MODULE_ERR_CABLE_SHORTED:
+ return ("CABLE_SHORTED");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+static void
+mlxcx_report_module_error(mlxcx_t *mlxp, mlxcx_evdata_port_mod_t *evd)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+ const char *lename;
+ const char *ename;
+ const char *stname;
+ uint_t eno = 0;
+ mlxcx_module_status_t state = evd->mled_port_mod_module_status;
+
+ switch (state) {
+ case MLXCX_MODULE_ERROR:
+ stname = "error";
+ eno = evd->mled_port_mod_error_type;
+ lename = mlxcx_module_error_string(eno);
+ switch (eno) {
+ case MLXCX_MODULE_ERR_ENFORCEMENT:
+ ename = DDI_FM_TXR_ERROR_WHITELIST;
+ break;
+ case MLXCX_MODULE_ERR_UNKNOWN_IDENT:
+ case MLXCX_MODULE_ERR_NO_EEPROM:
+ ename = DDI_FM_TXR_ERROR_NOTSUPP;
+ break;
+ case MLXCX_MODULE_ERR_HIGH_TEMP:
+ ename = DDI_FM_TXR_ERROR_OVERTEMP;
+ break;
+ case MLXCX_MODULE_ERR_POWER_BUDGET:
+ case MLXCX_MODULE_ERR_LONG_RANGE:
+ case MLXCX_MODULE_ERR_CABLE_SHORTED:
+ ename = DDI_FM_TXR_ERROR_HWFAIL;
+ break;
+ case MLXCX_MODULE_ERR_BUS_STUCK:
+ default:
+ ename = DDI_FM_TXR_ERROR_UNKNOWN;
+ }
+ break;
+ default:
+ return;
+ }
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
+ DDI_FM_NIC, DDI_FM_TXR_ERROR);
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+ if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
+ return;
+
+ ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
+ /* compulsory FM props */
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
+ /* generic NIC txr error event props */
+ "error", DATA_TYPE_STRING, ename,
+ "port_index", DATA_TYPE_UINT8, 0,
+ "txr_index", DATA_TYPE_UINT8, evd->mled_port_mod_module,
+ /* local props */
+ "mlxcx_state", DATA_TYPE_STRING, stname,
+ "mlxcx_error", DATA_TYPE_STRING, lename,
+ "mlxcx_error_num", DATA_TYPE_UINT8, eno,
+ NULL);
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+}
+
+static uint_t
+mlxcx_intr_0(caddr_t arg, caddr_t arg2)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2;
+ mlxcx_eventq_ent_t *ent;
+ mlxcx_port_t *port;
+ uint_t portn;
+ int32_t npages = 0;
+
+ mutex_enter(&mleq->mleq_mtx);
+
+ if (!(mleq->mleq_state & MLXCX_EQ_ALLOC) ||
+ !(mleq->mleq_state & MLXCX_EQ_CREATED) ||
+ (mleq->mleq_state & MLXCX_EQ_DESTROYED)) {
+ mlxcx_warn(mlxp, "int0 on bad eq state");
+ mutex_exit(&mleq->mleq_mtx);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ ent = mlxcx_eq_next(mleq);
+ if (ent == NULL) {
+ mlxcx_warn(mlxp, "spurious int 0?");
+ mutex_exit(&mleq->mleq_mtx);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED);
+ mleq->mleq_state &= ~MLXCX_EQ_ARMED;
+
+ for (; ent != NULL; ent = mlxcx_eq_next(mleq)) {
+ switch (ent->mleqe_event_type) {
+ case MLXCX_EVENT_PAGE_REQUEST:
+ VERIFY3U(from_be16(ent->mleqe_page_request.
+ mled_page_request_function_id), ==, 0);
+ npages += (int32_t)from_be32(ent->mleqe_page_request.
+ mled_page_request_num_pages);
+ break;
+ case MLXCX_EVENT_PORT_STATE:
+ portn = get_bits8(
+ ent->mleqe_port_state.mled_port_state_port_num,
+ MLXCX_EVENT_PORT_NUM) - 1;
+ if (portn >= mlxp->mlx_nports)
+ break;
+ port = &mlxp->mlx_ports[portn];
+ mlxcx_update_link_state(mlxp, port);
+ break;
+ case MLXCX_EVENT_PORT_MODULE:
+ mlxcx_report_module_error(mlxp, &ent->mleqe_port_mod);
+ break;
+ default:
+ mlxcx_warn(mlxp, "unhandled event 0x%x on int0",
+ ent->mleqe_event_type);
+ }
+ }
+
+ if (npages > 0) {
+ mlxcx_give_pages_once(mlxp, npages);
+ } else if (npages < 0) {
+ mlxcx_take_pages_once(mlxp, -1 * npages);
+ }
+
+ mlxcx_arm_eq(mlxp, mleq);
+ mutex_exit(&mleq->mleq_mtx);
+
+ return (DDI_INTR_CLAIMED);
+}
+
+mblk_t *
+mlxcx_rx_poll(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, size_t bytelim)
+{
+ mlxcx_buffer_t *buf;
+ mblk_t *mp, *cmp, *nmp;
+ mlxcx_completionq_ent_t *cent;
+ size_t bytes = 0;
+ boolean_t found;
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+
+ ASSERT(mlcq->mlcq_wq != NULL);
+ ASSERT3U(mlcq->mlcq_wq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ);
+
+ if (!(mlcq->mlcq_state & MLXCX_CQ_ALLOC) ||
+ !(mlcq->mlcq_state & MLXCX_CQ_CREATED) ||
+ (mlcq->mlcq_state & MLXCX_CQ_DESTROYED) ||
+ (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)) {
+ return (NULL);
+ }
+
+ ASSERT(mlcq->mlcq_state & MLXCX_CQ_POLLING);
+
+ nmp = cmp = mp = NULL;
+
+ cent = mlxcx_cq_next(mlcq);
+ for (; cent != NULL; cent = mlxcx_cq_next(mlcq)) {
+ /*
+ * Teardown and ring stop can atomic_or this flag
+ * into our state if they want us to stop early.
+ */
+ if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ break;
+
+ if (cent->mlcqe_opcode == MLXCX_CQE_OP_REQ &&
+ cent->mlcqe_send_wqe_opcode == MLXCX_WQE_OP_NOP) {
+ /* NOP */
+ goto nextcq;
+ }
+
+ buf = list_head(&mlcq->mlcq_buffers);
+ found = B_FALSE;
+ while (buf != NULL) {
+ if ((buf->mlb_wqe_index & UINT16_MAX) ==
+ from_be16(cent->mlcqe_wqe_counter)) {
+ found = B_TRUE;
+ break;
+ }
+ buf = list_next(&mlcq->mlcq_buffers, buf);
+ }
+ if (!found) {
+ buf = list_head(&mlcq->mlcq_buffers);
+ mlxcx_warn(mlxp, "got completion on CQ %x but "
+ "no buffer matching wqe found: %x (first "
+ "buffer counter = %x)", mlcq->mlcq_num,
+ from_be16(cent->mlcqe_wqe_counter),
+ buf == NULL ? UINT32_MAX : buf->mlb_wqe_index);
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE);
+ goto nextcq;
+ }
+ list_remove(&mlcq->mlcq_buffers, buf);
+ atomic_dec_64(&mlcq->mlcq_bufcnt);
+
+ nmp = mlxcx_rx_completion(mlxp, mlcq, cent, buf);
+ if (nmp != NULL) {
+ bytes += from_be32(cent->mlcqe_byte_cnt);
+ if (cmp != NULL) {
+ cmp->b_next = nmp;
+ cmp = nmp;
+ } else {
+ mp = cmp = nmp;
+ }
+ }
+nextcq:
+ mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc);
+
+ if (bytelim != 0 && bytes > bytelim)
+ break;
+ }
+
+ return (mp);
+}
+
+static uint_t
+mlxcx_intr_n(caddr_t arg, caddr_t arg2)
+{
+ mlxcx_t *mlxp = (mlxcx_t *)arg;
+ mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2;
+ mlxcx_eventq_ent_t *ent;
+ mlxcx_completionq_ent_t *cent;
+ mlxcx_completion_queue_t *mlcq, probe;
+ mlxcx_buffer_t *buf;
+ mblk_t *mp, *cmp, *nmp;
+ boolean_t found, tellmac = B_FALSE, added;
+
+ mutex_enter(&mleq->mleq_mtx);
+
+ if (!(mleq->mleq_state & MLXCX_EQ_ALLOC) ||
+ !(mleq->mleq_state & MLXCX_EQ_CREATED) ||
+ (mleq->mleq_state & MLXCX_EQ_DESTROYED)) {
+ mutex_exit(&mleq->mleq_mtx);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ ent = mlxcx_eq_next(mleq);
+ if (ent == NULL) {
+ if (++mleq->mleq_badintrs > mlxcx_stuck_intr_count) {
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_BADINT_LIMIT);
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+ (void) ddi_intr_disable(mlxp->mlx_intr_handles[
+ mleq->mleq_intr_index]);
+ }
+ mutex_exit(&mleq->mleq_mtx);
+ return (DDI_INTR_CLAIMED);
+ }
+ mleq->mleq_badintrs = 0;
+
+ ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED);
+ mleq->mleq_state &= ~MLXCX_EQ_ARMED;
+
+ for (; ent != NULL; ent = mlxcx_eq_next(mleq)) {
+ if (ent->mleqe_event_type != MLXCX_EVENT_COMPLETION) {
+ mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE);
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+ (void) ddi_intr_disable(mlxp->mlx_intr_handles[
+ mleq->mleq_intr_index]);
+ mutex_exit(&mleq->mleq_mtx);
+ return (DDI_INTR_CLAIMED);
+ }
+ ASSERT3U(ent->mleqe_event_type, ==, MLXCX_EVENT_COMPLETION);
+
+ probe.mlcq_num =
+ from_be24(ent->mleqe_completion.mled_completion_cqn);
+ mlcq = avl_find(&mleq->mleq_cqs, &probe, NULL);
+
+ if (mlcq == NULL)
+ continue;
+
+ /*
+ * The polling function might have the mutex and stop us from
+ * getting the lock here, so we increment the event counter
+ * atomically from outside.
+ *
+ * This way at the end of polling when we go back to interrupts
+ * from this CQ, the event counter is still correct.
+ *
+ * Note that mlxcx_mac_ring_intr_enable() takes the EQ lock so
+ * as to avoid any possibility of racing against us here, so we
+ * only have to consider mlxcx_rx_poll().
+ */
+ atomic_inc_32(&mlcq->mlcq_ec);
+ atomic_and_uint(&mlcq->mlcq_state, ~MLXCX_CQ_ARMED);
+
+ if (mutex_tryenter(&mlcq->mlcq_mtx) == 0) {
+ /*
+ * If we failed to take the mutex because the polling
+ * function has it, just move on. We don't want to
+ * block other CQs behind this one.
+ */
+ if (mlcq->mlcq_state & MLXCX_CQ_POLLING)
+ continue;
+ /* Otherwise we will wait. */
+ mutex_enter(&mlcq->mlcq_mtx);
+ }
+
+ if (!(mlcq->mlcq_state & MLXCX_CQ_ALLOC) ||
+ !(mlcq->mlcq_state & MLXCX_CQ_CREATED) ||
+ (mlcq->mlcq_state & MLXCX_CQ_DESTROYED) ||
+ (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN) ||
+ (mlcq->mlcq_state & MLXCX_CQ_POLLING)) {
+ mutex_exit(&mlcq->mlcq_mtx);
+ continue;
+ }
+
+ nmp = cmp = mp = NULL;
+ tellmac = B_FALSE;
+
+ cent = mlxcx_cq_next(mlcq);
+ for (; cent != NULL; cent = mlxcx_cq_next(mlcq)) {
+ /*
+ * Teardown and ring stop can atomic_or this flag
+ * into our state if they want us to stop early.
+ */
+ if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ break;
+ if (mlcq->mlcq_state & MLXCX_CQ_POLLING)
+ break;
+
+ if (cent->mlcqe_opcode == MLXCX_CQE_OP_REQ &&
+ cent->mlcqe_send_wqe_opcode == MLXCX_WQE_OP_NOP) {
+ /* NOP */
+ goto nextcq;
+ }
+
+lookagain:
+ /*
+ * Generally the buffer we're looking for will be
+ * at the front of the list, so this loop won't
+ * need to look far.
+ */
+ buf = list_head(&mlcq->mlcq_buffers);
+ found = B_FALSE;
+ while (buf != NULL) {
+ if ((buf->mlb_wqe_index & UINT16_MAX) ==
+ from_be16(cent->mlcqe_wqe_counter)) {
+ found = B_TRUE;
+ break;
+ }
+ buf = list_next(&mlcq->mlcq_buffers, buf);
+ }
+ if (!found) {
+ /*
+ * If there's any buffers waiting on the
+ * buffers_b list, then merge those into
+ * the main list and have another look.
+ *
+ * The wq enqueue routines push new buffers
+ * into buffers_b so that they can avoid
+ * taking the mlcq_mtx and blocking us for
+ * every single packet.
+ */
+ added = B_FALSE;
+ mutex_enter(&mlcq->mlcq_bufbmtx);
+ if (!list_is_empty(&mlcq->mlcq_buffers_b)) {
+ list_move_tail(&mlcq->mlcq_buffers,
+ &mlcq->mlcq_buffers_b);
+ added = B_TRUE;
+ }
+ mutex_exit(&mlcq->mlcq_bufbmtx);
+ if (added)
+ goto lookagain;
+ }
+ if (!found) {
+ buf = list_head(&mlcq->mlcq_buffers);
+ mlxcx_warn(mlxp, "got completion on CQ %x but "
+ "no buffer matching wqe found: %x (first "
+ "buffer counter = %x)", mlcq->mlcq_num,
+ from_be16(cent->mlcqe_wqe_counter),
+ buf == NULL ? UINT32_MAX :
+ buf->mlb_wqe_index);
+ mlxcx_fm_ereport(mlxp,
+ DDI_FM_DEVICE_INVAL_STATE);
+ goto nextcq;
+ }
+ list_remove(&mlcq->mlcq_buffers, buf);
+ atomic_dec_64(&mlcq->mlcq_bufcnt);
+
+ switch (mlcq->mlcq_wq->mlwq_type) {
+ case MLXCX_WQ_TYPE_SENDQ:
+ mlxcx_tx_completion(mlxp, mlcq, cent, buf);
+ break;
+ case MLXCX_WQ_TYPE_RECVQ:
+ nmp = mlxcx_rx_completion(mlxp, mlcq, cent,
+ buf);
+ if (nmp != NULL) {
+ if (cmp != NULL) {
+ cmp->b_next = nmp;
+ cmp = nmp;
+ } else {
+ mp = cmp = nmp;
+ }
+ }
+ break;
+ }
+
+nextcq:
+ /*
+ * Update the "doorbell" consumer counter for the queue
+ * every time. Unlike a UAR write, this is relatively
+ * cheap and doesn't require us to go out on the bus
+ * straight away (since it's our memory).
+ */
+ mlcq->mlcq_doorbell->mlcqd_update_ci =
+ to_be24(mlcq->mlcq_cc);
+
+ if ((mlcq->mlcq_state & MLXCX_CQ_BLOCKED_MAC) &&
+ mlcq->mlcq_bufcnt < mlcq->mlcq_buflwm) {
+ mlcq->mlcq_state &= ~MLXCX_CQ_BLOCKED_MAC;
+ tellmac = B_TRUE;
+ }
+ }
+
+ mlxcx_arm_cq(mlxp, mlcq);
+ mutex_exit(&mlcq->mlcq_mtx);
+
+ if (tellmac) {
+ mac_tx_ring_update(mlxp->mlx_mac_hdl,
+ mlcq->mlcq_mac_hdl);
+ }
+ if (mp != NULL) {
+ mac_rx_ring(mlxp->mlx_mac_hdl, mlcq->mlcq_mac_hdl,
+ mp, mlcq->mlcq_mac_gen);
+ }
+
+ /*
+ * Updating the consumer counter for an EQ requires a write
+ * to the UAR, which is possibly expensive.
+ *
+ * Try to do it only often enough to stop us wrapping around.
+ */
+ if ((mleq->mleq_cc & 0x7) == 0)
+ mlxcx_update_eq(mlxp, mleq);
+ }
+
+ mlxcx_arm_eq(mlxp, mleq);
+ mutex_exit(&mleq->mleq_mtx);
+
+ return (DDI_INTR_CLAIMED);
+}
+
+boolean_t
+mlxcx_intr_setup(mlxcx_t *mlxp)
+{
+ dev_info_t *dip = mlxp->mlx_dip;
+ int ret;
+ int nintrs = 0;
+ int navail = 0;
+ int types, i;
+ mlxcx_eventq_type_t eqt = MLXCX_EQ_TYPE_ANY;
+
+ ret = ddi_intr_get_supported_types(dip, &types);
+ if (ret != DDI_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (!(types & DDI_INTR_TYPE_MSIX)) {
+ mlxcx_warn(mlxp, "MSI-X interrupts not available, but mlxcx "
+ "requires MSI-X");
+ return (B_FALSE);
+ }
+
+ ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs);
+ if (ret != DDI_SUCCESS) {
+ return (B_FALSE);
+ }
+ if (nintrs < 2) {
+ mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx "
+ "requires 2", nintrs);
+ return (B_FALSE);
+ }
+
+ ret = ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, &navail);
+ if (navail < 2) {
+ mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx "
+ "requires 2", navail);
+ return (B_FALSE);
+ }
+
+ mlxp->mlx_intr_size = navail * sizeof (ddi_intr_handle_t);
+ mlxp->mlx_intr_handles = kmem_alloc(mlxp->mlx_intr_size, KM_SLEEP);
+
+ ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX,
+ 0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+ if (mlxp->mlx_intr_count < 2) {
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+ mlxp->mlx_intr_type = DDI_INTR_TYPE_MSIX;
+
+ ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+
+ mlxp->mlx_eqs_size = mlxp->mlx_intr_count *
+ sizeof (mlxcx_event_queue_t);
+ mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP);
+
+ ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_0,
+ (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+
+ /*
+ * If we have enough interrupts, set their "type" fields so that we
+ * avoid mixing RX and TX queues on the same EQs.
+ */
+ if (mlxp->mlx_intr_count >= 8) {
+ eqt = MLXCX_EQ_TYPE_RX;
+ }
+
+ for (i = 1; i < mlxp->mlx_intr_count; ++i) {
+ mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ avl_create(&mlxp->mlx_eqs[i].mleq_cqs, mlxcx_cq_compare,
+ sizeof (mlxcx_completion_queue_t),
+ offsetof(mlxcx_completion_queue_t, mlcq_eq_entry));
+ mlxp->mlx_eqs[i].mleq_intr_index = i;
+
+ mlxp->mlx_eqs[i].mleq_type = eqt;
+ /*
+ * If eqt is still ANY, just leave it set to that
+ * (no else here).
+ */
+ if (eqt == MLXCX_EQ_TYPE_RX) {
+ eqt = MLXCX_EQ_TYPE_TX;
+ } else if (eqt == MLXCX_EQ_TYPE_TX) {
+ eqt = MLXCX_EQ_TYPE_RX;
+ }
+
+ ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i],
+ mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
new file mode 100644
index 0000000000..76d0da30e7
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -0,0 +1,2481 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+#ifndef _MLXCX_REG_H
+#define _MLXCX_REG_H
+
+#include <sys/types.h>
+#include <sys/byteorder.h>
+
+#include <mlxcx_endint.h>
+
+#if !defined(_BIT_FIELDS_HTOL) && !defined(_BIT_FIELDS_LTOH)
+#error "Need _BIT_FIELDS_HTOL or _BIT_FIELDS_LTOH"
+#endif
+
+/*
+ * Register offsets.
+ */
+
+#define MLXCX_ISS_FIRMWARE 0x0000
+#define MLXCX_ISS_FW_MAJOR(x) (((x) & 0xffff))
+#define MLXCX_ISS_FW_MINOR(x) (((x) >> 16) & 0xffff)
+#define MLXCX_ISS_FW_CMD 0x0004
+#define MLXCX_ISS_FW_REV(x) (((x) & 0xffff))
+#define MLXCX_ISS_CMD_REV(x) (((x) >> 16) & 0xffff)
+#define MLXCX_ISS_CMD_HIGH 0x0010
+#define MLXCX_ISS_CMD_LOW 0x0014
+#define MLXCX_ISS_CMDQ_SIZE(x) (((x) >> 4) & 0xf)
+#define MLXCX_ISS_CMDQ_STRIDE(x) ((x) & 0xf)
+
+#define MLXCX_ISS_CMD_DOORBELL 0x0018
+#define MLXCX_ISS_INIT 0x01fc
+#define MLXCX_ISS_INITIALIZING(x) (((x) >> 31) & 0x1)
+#define MLXCX_ISS_HEALTH_BUF 0x0200
+#define MLXCX_ISS_NO_DRAM_NIC 0x0240
+#define MLXCX_ISS_TIMER 0x1000
+#define MLXCX_ISS_HEALTH_COUNT 0x1010
+#define MLXCX_ISS_HEALTH_SYND 0x1013
+
+#define MLXCX_CMD_INLINE_INPUT_LEN 16
+#define MLXCX_CMD_INLINE_OUTPUT_LEN 16
+
+#define MLXCX_CMD_MAILBOX_LEN 512
+
+#define MLXCX_CMD_TRANSPORT_PCI 7
+#define MLXCX_CMD_HW_OWNED 0x01
+#define MLXCX_CMD_STATUS(x) ((x) >> 1)
+
+#define MLXCX_UAR_CQ_ARM 0x0020
+#define MLXCX_UAR_EQ_ARM 0x0040
+#define MLXCX_UAR_EQ_NOARM 0x0048
+
+/* Number of blue flame reg pairs per UAR */
+#define MLXCX_BF_PER_UAR 2
+#define MLXCX_BF_PER_UAR_MASK 0x1
+#define MLXCX_BF_SIZE 0x100
+#define MLXCX_BF_BASE 0x0800
+
+/* CSTYLED */
+#define MLXCX_EQ_ARM_EQN (bitdef_t){24, 0xff000000}
+/* CSTYLED */
+#define MLXCX_EQ_ARM_CI (bitdef_t){0, 0x00ffffff}
+
+/*
+ * Hardware structure that is used to represent a command.
+ */
+#pragma pack(1)
+typedef struct {
+ uint8_t mce_type;
+ uint8_t mce_rsvd[3];
+ uint32be_t mce_in_length;
+ uint64be_t mce_in_mbox;
+ uint8_t mce_input[MLXCX_CMD_INLINE_INPUT_LEN];
+ uint8_t mce_output[MLXCX_CMD_INLINE_OUTPUT_LEN];
+ uint64be_t mce_out_mbox;
+ uint32be_t mce_out_length;
+ uint8_t mce_token;
+ uint8_t mce_sig;
+ uint8_t mce_rsvd1;
+ uint8_t mce_status;
+} mlxcx_cmd_ent_t;
+
+typedef struct {
+ uint8_t mlxb_data[MLXCX_CMD_MAILBOX_LEN];
+ uint8_t mlxb_rsvd[48];
+ uint64be_t mlxb_nextp;
+ uint32be_t mlxb_blockno;
+ uint8_t mlxb_rsvd1;
+ uint8_t mlxb_token;
+ uint8_t mlxb_ctrl_sig;
+ uint8_t mlxb_sig;
+} mlxcx_cmd_mailbox_t;
+
+typedef struct {
+ uint8_t mled_page_request_rsvd[2];
+ uint16be_t mled_page_request_function_id;
+ uint32be_t mled_page_request_num_pages;
+} mlxcx_evdata_page_request_t;
+
+/* CSTYLED */
+#define MLXCX_EVENT_PORT_NUM (bitdef_t){ .bit_shift = 4, .bit_mask = 0xF0 }
+
+typedef struct {
+ uint8_t mled_port_state_rsvd[8];
+ bits8_t mled_port_state_port_num;
+} mlxcx_evdata_port_state_t;
+
+typedef enum {
+ MLXCX_MODULE_INITIALIZING = 0x0,
+ MLXCX_MODULE_PLUGGED = 0x1,
+ MLXCX_MODULE_UNPLUGGED = 0x2,
+ MLXCX_MODULE_ERROR = 0x3
+} mlxcx_module_status_t;
+
+typedef enum {
+ MLXCX_MODULE_ERR_POWER_BUDGET = 0x0,
+ MLXCX_MODULE_ERR_LONG_RANGE = 0x1,
+ MLXCX_MODULE_ERR_BUS_STUCK = 0x2,
+ MLXCX_MODULE_ERR_NO_EEPROM = 0x3,
+ MLXCX_MODULE_ERR_ENFORCEMENT = 0x4,
+ MLXCX_MODULE_ERR_UNKNOWN_IDENT = 0x5,
+ MLXCX_MODULE_ERR_HIGH_TEMP = 0x6,
+ MLXCX_MODULE_ERR_CABLE_SHORTED = 0x7,
+} mlxcx_module_error_type_t;
+
+typedef struct {
+ uint8_t mled_port_mod_rsvd;
+ uint8_t mled_port_mod_module;
+ uint8_t mled_port_mod_rsvd2;
+ uint8_t mled_port_mod_module_status;
+ uint8_t mled_port_mod_rsvd3[2];
+ uint8_t mled_port_mod_error_type;
+ uint8_t mled_port_mod_rsvd4;
+} mlxcx_evdata_port_mod_t;
+
+typedef struct {
+ uint8_t mled_completion_rsvd[25];
+ uint24be_t mled_completion_cqn;
+} mlxcx_evdata_completion_t;
+
+typedef enum {
+ MLXCX_EV_QUEUE_TYPE_QP = 0x0,
+ MLXCX_EV_QUEUE_TYPE_RQ = 0x1,
+ MLXCX_EV_QUEUE_TYPE_SQ = 0x2,
+} mlxcx_evdata_queue_type_t;
+
+typedef struct {
+ uint8_t mled_queue_rsvd[20];
+ uint8_t mled_queue_type;
+ uint8_t mled_queue_rsvd2[4];
+ uint24be_t mled_queue_num;
+} mlxcx_evdata_queue_t;
+
+#define MLXCX_EQ_OWNER_INIT 1
+
+typedef struct {
+ uint8_t mleqe_rsvd[1];
+ uint8_t mleqe_event_type;
+ uint8_t mleqe_rsvd2[1];
+ uint8_t mleqe_event_sub_type;
+ uint8_t mleqe_rsvd3[28];
+ union {
+ uint8_t mleqe_unknown_data[28];
+ mlxcx_evdata_completion_t mleqe_completion;
+ mlxcx_evdata_page_request_t mleqe_page_request;
+ mlxcx_evdata_port_state_t mleqe_port_state;
+ mlxcx_evdata_port_mod_t mleqe_port_mod;
+ mlxcx_evdata_queue_t mleqe_queue;
+ };
+ uint8_t mleqe_rsvd4[2];
+ uint8_t mleqe_signature;
+ uint8_t mleqe_owner;
+} mlxcx_eventq_ent_t;
+
+typedef enum {
+ MLXCX_CQE_L3_HDR_NONE = 0x0,
+ MLXCX_CQE_L3_HDR_RCV_BUF = 0x1,
+ MLXCX_CQE_L3_HDR_CQE = 0x2,
+} mlxcx_cqe_l3_hdr_placement_t;
+
+typedef enum {
+ MLXCX_CQE_CSFLAGS_L4_OK = 1 << 2,
+ MLXCX_CQE_CSFLAGS_L3_OK = 1 << 1,
+ MLXCX_CQE_CSFLAGS_L2_OK = 1 << 0,
+} mlxcx_cqe_csflags_t;
+
+typedef enum {
+ MLXCX_CQE_L4_TYPE_NONE = 0,
+ MLXCX_CQE_L4_TYPE_TCP = 1,
+ MLXCX_CQE_L4_TYPE_UDP = 2,
+ MLXCX_CQE_L4_TYPE_TCP_EMPTY_ACK = 3,
+ MLXCX_CQE_L4_TYPE_TCP_ACK = 4,
+} mlxcx_cqe_l4_hdr_type_t;
+
+typedef enum {
+ MLXCX_CQE_L3_TYPE_NONE = 0,
+ MLXCX_CQE_L3_TYPE_IPv6 = 1,
+ MLXCX_CQE_L3_TYPE_IPv4 = 2,
+} mlxcx_cqe_l3_hdr_type_t;
+
+typedef enum {
+ MLXCX_CQE_RX_HASH_NONE = 0,
+ MLXCX_CQE_RX_HASH_IPv4 = 1,
+ MLXCX_CQE_RX_HASH_IPv6 = 2,
+ MLXCX_CQE_RX_HASH_IPSEC_SPI = 3,
+} mlxcx_cqe_rx_hash_type_t;
+/* BEGIN CSTYLED */
+#define MLXCX_CQE_RX_HASH_IP_SRC (bitdef_t){0, 0x3}
+#define MLXCX_CQE_RX_HASH_IP_DEST (bitdef_t){2, (0x3 << 2)}
+#define MLXCX_CQE_RX_HASH_L4_SRC (bitdef_t){4, (0x3 << 4)}
+#define MLXCX_CQE_RX_HASH_L4_DEST (bitdef_t){6, (0x3 << 6)}
+/* END CSTYLED */
+
+typedef enum {
+ MLXCX_CQE_OP_REQ = 0x0,
+ MLXCX_CQE_OP_RESP_RDMA = 0x1,
+ MLXCX_CQE_OP_RESP = 0x2,
+ MLXCX_CQE_OP_RESP_IMMEDIATE = 0x3,
+ MLXCX_CQE_OP_RESP_INVALIDATE = 0x4,
+ MLXCX_CQE_OP_RESIZE_CQ = 0x5,
+ MLXCX_CQE_OP_SIG_ERR = 0x12,
+ MLXCX_CQE_OP_REQ_ERR = 0xd,
+ MLXCX_CQE_OP_RESP_ERR = 0xe,
+ MLXCX_CQE_OP_INVALID = 0xf
+} mlxcx_cqe_opcode_t;
+
+typedef enum {
+ MLXCX_CQE_FORMAT_BASIC = 0,
+ MLXCX_CQE_FORMAT_INLINE_32 = 1,
+ MLXCX_CQE_FORMAT_INLINE_64 = 2,
+ MLXCX_CQE_FORMAT_COMPRESSED = 3,
+} mlxcx_cqe_format_t;
+
+typedef enum {
+ MLXCX_CQE_OWNER_INIT = 1
+} mlxcx_cqe_owner_t;
+
+typedef enum {
+ MLXCX_VLAN_TYPE_NONE,
+ MLXCX_VLAN_TYPE_CVLAN,
+ MLXCX_VLAN_TYPE_SVLAN,
+} mlxcx_vlan_type_t;
+
+typedef enum {
+ MLXCX_CQ_ERR_LOCAL_LENGTH = 0x1,
+ MLXCX_CQ_ERR_LOCAL_QP_OP = 0x2,
+ MLXCX_CQ_ERR_LOCAL_PROTECTION = 0x4,
+ MLXCX_CQ_ERR_WR_FLUSHED = 0x5,
+ MLXCX_CQ_ERR_MEM_WINDOW_BIND = 0x6,
+ MLXCX_CQ_ERR_BAD_RESPONSE = 0x10,
+ MLXCX_CQ_ERR_LOCAL_ACCESS = 0x11,
+ MLXCX_CQ_ERR_XPORT_RETRY_CTR = 0x15,
+ MLXCX_CQ_ERR_RNR_RETRY_CTR = 0x16,
+ MLXCX_CQ_ERR_ABORTED = 0x22
+} mlxcx_cq_error_syndrome_t;
+
+typedef struct {
+ uint8_t mlcqee_rsvd[2];
+ uint16be_t mlcqee_wqe_id;
+ uint8_t mlcqee_rsvd2[29];
+ uint24be_t mlcqee_user_index;
+ uint8_t mlcqee_rsvd3[8];
+ uint32be_t mlcqee_byte_cnt;
+ uint8_t mlcqee_rsvd4[6];
+ uint8_t mlcqee_vendor_error_syndrome;
+ uint8_t mlcqee_syndrome;
+ uint8_t mlcqee_wqe_opcode;
+ uint24be_t mlcqee_flow_tag;
+ uint16be_t mlcqee_wqe_counter;
+ uint8_t mlcqee_signature;
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcqe_opcode:4;
+ uint8_t mlcqe_rsvd5:3;
+ uint8_t mlcqe_owner:1;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcqe_owner:1;
+ uint8_t mlcqe_rsvd5:3;
+ uint8_t mlcqe_opcode:4;
+#endif
+ };
+} mlxcx_completionq_error_ent_t;
+
+typedef struct {
+ uint8_t mlcqe_tunnel_flags;
+ uint8_t mlcqe_rsvd[3];
+ uint8_t mlcqe_lro_flags;
+ uint8_t mlcqe_lro_min_ttl;
+ uint16be_t mlcqe_lro_tcp_win;
+ uint32be_t mlcqe_lro_ack_seq_num;
+ uint32be_t mlcqe_rx_hash_result;
+ bits8_t mlcqe_rx_hash_type;
+ uint8_t mlcqe_ml_path;
+ uint8_t mlcqe_rsvd2[2];
+ uint16be_t mlcqe_checksum;
+ uint16be_t mlcqe_slid_smac_lo;
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcqe_rsvd3:1;
+ uint8_t mlcqe_force_loopback:1;
+ uint8_t mlcqe_l3_hdr:2;
+ uint8_t mlcqe_sl_roce_pktype:4;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcqe_sl_roce_pktype:4;
+ uint8_t mlcqe_l3_hdr:2;
+ uint8_t mlcqe_force_loopback:1;
+ uint8_t mlcqe_rsvd3:1;
+#endif
+ };
+ uint24be_t mlcqe_rqpn;
+ bits8_t mlcqe_csflags;
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcqe_ip_frag:1;
+ uint8_t mlcqe_l4_hdr_type:3;
+ uint8_t mlcqe_l3_hdr_type:2;
+ uint8_t mlcqe_ip_ext_opts:1;
+ uint8_t mlcqe_cv:1;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcqe_cv:1;
+ uint8_t mlcqe_ip_ext_opts:1;
+ uint8_t mlcqe_l3_hdr_type:2;
+ uint8_t mlcqe_l4_hdr_type:3;
+ uint8_t mlcqe_ip_frag:1;
+#endif
+ };
+ uint16be_t mlcqe_up_cfi_vid;
+ uint8_t mlcqe_lro_num_seg;
+ uint24be_t mlcqe_user_index;
+ uint32be_t mlcqe_immediate;
+ uint8_t mlcqe_rsvd4[4];
+ uint32be_t mlcqe_byte_cnt;
+ union {
+ struct {
+ uint32be_t mlcqe_lro_timestamp_value;
+ uint32be_t mlcqe_lro_timestamp_echo;
+ };
+ uint64be_t mlcqe_timestamp;
+ };
+ union {
+ uint8_t mlcqe_rx_drop_counter;
+ uint8_t mlcqe_send_wqe_opcode;
+ };
+ uint24be_t mlcqe_flow_tag;
+ uint16be_t mlcqe_wqe_counter;
+ uint8_t mlcqe_signature;
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcqe_opcode:4;
+ uint8_t mlcqe_format:2;
+ uint8_t mlcqe_se:1;
+ uint8_t mlcqe_owner:1;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcqe_owner:1;
+ uint8_t mlcqe_se:1;
+ uint8_t mlcqe_format:2;
+ uint8_t mlcqe_opcode:4;
+#endif
+ };
+} mlxcx_completionq_ent_t;
+
+typedef struct {
+ uint8_t mlcqe_data[64];
+ mlxcx_completionq_ent_t mlcqe_ent;
+} mlxcx_completionq_ent128_t;
+
+typedef enum {
+ MLXCX_WQE_OP_NOP = 0x00,
+ MLXCX_WQE_OP_SEND_INVALIDATE = 0x01,
+ MLXCX_WQE_OP_RDMA_W = 0x08,
+ MLXCX_WQE_OP_RDMA_W_IMMEDIATE = 0x09,
+ MLXCX_WQE_OP_SEND = 0x0A,
+ MLXCX_WQE_OP_SEND_IMMEDIATE = 0x0B,
+ MLXCX_WQE_OP_LSO = 0x0E,
+ MLXCX_WQE_OP_WAIT = 0x0F,
+ MLXCX_WQE_OP_RDMA_R = 0x10,
+} mlxcx_wqe_opcode_t;
+
+#define MLXCX_SQE_MAX_DS ((1 << 6) - 1)
+#define MLXCX_SQE_MAX_PTRS 61
+
+typedef enum {
+ MLXCX_SQE_FENCE_NONE = 0x0,
+ MLXCX_SQE_FENCE_WAIT_OTHERS = 0x1,
+ MLXCX_SQE_FENCE_START = 0x2,
+ MLXCX_SQE_FENCE_STRONG_ORDER = 0x3,
+ MLXCX_SQE_FENCE_START_WAIT = 0x4
+} mlxcx_sqe_fence_mode_t;
+
+typedef enum {
+ MLXCX_SQE_CQE_ON_EACH_ERROR = 0x0,
+ MLXCX_SQE_CQE_ON_FIRST_ERROR = 0x1,
+ MLXCX_SQE_CQE_ALWAYS = 0x2,
+ MLXCX_SQE_CQE_ALWAYS_PLUS_EQE = 0x3
+} mlxcx_sqe_completion_mode_t;
+
+#define MLXCX_SQE_SOLICITED (1 << 1)
+/* CSTYLED */
+#define MLXCX_SQE_FENCE_MODE (bitdef_t){5, 0xe0}
+/* CSTYLED */
+#define MLXCX_SQE_COMPLETION_MODE (bitdef_t){2, 0x0c}
+
+typedef struct {
+ uint8_t mlcs_opcode_mod;
+ uint16be_t mlcs_wqe_index;
+ uint8_t mlcs_opcode;
+ uint24be_t mlcs_qp_or_sq;
+ uint8_t mlcs_ds;
+ uint8_t mlcs_signature;
+ uint8_t mlcs_rsvd2[2];
+ bits8_t mlcs_flags;
+ uint32be_t mlcs_immediate;
+} mlxcx_wqe_control_seg_t;
+
+typedef enum {
+ MLXCX_SQE_ETH_CSFLAG_L4_CHECKSUM = 1 << 7,
+ MLXCX_SQE_ETH_CSFLAG_L3_CHECKSUM = 1 << 6,
+ MLXCX_SQE_ETH_CSFLAG_L4_INNER_CHECKSUM = 1 << 5,
+ MLXCX_SQE_ETH_CSFLAG_L3_INNER_CHECKSUM = 1 << 4,
+} mlxcx_wqe_eth_flags_t;
+
+/* CSTYLED */
+#define MLXCX_SQE_ETH_INLINE_HDR_SZ (bitdef_t){0, 0x03ff}
+#define MLXCX_SQE_ETH_SZFLAG_VLAN (1 << 15)
+#define MLXCX_MAX_INLINE_HEADERLEN 64
+
+typedef struct {
+ uint8_t mles_rsvd[4];
+ bits8_t mles_csflags;
+ uint8_t mles_rsvd2[1];
+ uint16_t mles_mss;
+ uint8_t mles_rsvd3[4];
+ bits16_t mles_szflags;
+ uint8_t mles_inline_headers[18];
+} mlxcx_wqe_eth_seg_t;
+
+typedef struct {
+ uint32be_t mlds_byte_count;
+ uint32be_t mlds_lkey;
+ uint64be_t mlds_address;
+} mlxcx_wqe_data_seg_t;
+
+#define MLXCX_SENDQ_STRIDE_SHIFT 6
+
+typedef struct {
+ mlxcx_wqe_control_seg_t mlsqe_control;
+ mlxcx_wqe_eth_seg_t mlsqe_eth;
+ mlxcx_wqe_data_seg_t mlsqe_data[1];
+} mlxcx_sendq_ent_t;
+
+typedef struct {
+ uint64be_t mlsqbf_qwords[8];
+} mlxcx_sendq_bf_t;
+
+typedef struct {
+ mlxcx_wqe_data_seg_t mlsqe_data[4];
+} mlxcx_sendq_extra_ent_t;
+
+#define MLXCX_RECVQ_STRIDE_SHIFT 7
+/*
+ * Each mlxcx_wqe_data_seg_t is 1<<4 bytes long (there's a CTASSERT to verify
+ * this in mlxcx_cmd.c), so the number of pointers is 1 << (shift - 4).
+ */
+#define MLXCX_RECVQ_MAX_PTRS (1 << (MLXCX_RECVQ_STRIDE_SHIFT - 4))
+typedef struct {
+ mlxcx_wqe_data_seg_t mlrqe_data[MLXCX_RECVQ_MAX_PTRS];
+} mlxcx_recvq_ent_t;
+
+/* CSTYLED */
+#define MLXCX_CQ_ARM_CI (bitdef_t){ .bit_shift = 0, \
+ .bit_mask = 0x00ffffff }
+/* CSTYLED */
+#define MLXCX_CQ_ARM_SEQ (bitdef_t){ .bit_shift = 28, \
+ .bit_mask = 0x30000000 }
+#define MLXCX_CQ_ARM_SOLICITED (1 << 24)
+
+typedef struct {
+ uint8_t mlcqd_rsvd;
+ uint24be_t mlcqd_update_ci;
+ bits32_t mlcqd_arm_ci;
+} mlxcx_completionq_doorbell_t;
+
+typedef struct {
+ uint16be_t mlwqd_rsvd;
+ uint16be_t mlwqd_recv_counter;
+ uint16be_t mlwqd_rsvd2;
+ uint16be_t mlwqd_send_counter;
+} mlxcx_workq_doorbell_t;
+
+#define MLXCX_EQ_STATUS_OK (0x0 << 4)
+#define MLXCX_EQ_STATUS_WRITE_FAILURE (0xA << 4)
+
+#define MLXCX_EQ_OI (1 << 1)
+#define MLXCX_EQ_EC (1 << 2)
+
+#define MLXCX_EQ_ST_ARMED 0x9
+#define MLXCX_EQ_ST_FIRED 0xA
+
+/* CSTYLED */
+#define MLXCX_EQ_LOG_PAGE_SIZE (bitdef_t){ .bit_shift = 24, \
+ .bit_mask = 0x1F000000 }
+
+typedef struct {
+ uint8_t mleqc_status;
+ uint8_t mleqc_ecoi;
+ uint8_t mleqc_state;
+ uint8_t mleqc_rsvd[7];
+ uint16be_t mleqc_page_offset;
+ uint8_t mleqc_log_eq_size;
+ uint24be_t mleqc_uar_page;
+ uint8_t mleqc_rsvd3[7];
+ uint8_t mleqc_intr;
+ uint32be_t mleqc_log_page;
+ uint8_t mleqc_rsvd4[13];
+ uint24be_t mleqc_consumer_counter;
+ uint8_t mleqc_rsvd5;
+ uint24be_t mleqc_producer_counter;
+ uint8_t mleqc_rsvd6[16];
+} mlxcx_eventq_ctx_t;
+
+typedef enum {
+ MLXCX_CQC_CQE_SIZE_64 = 0x0,
+ MLXCX_CQC_CQE_SIZE_128 = 0x1,
+} mlxcx_cqc_cqe_sz_t;
+
+typedef enum {
+ MLXCX_CQC_STATUS_OK = 0x0,
+ MLXCX_CQC_STATUS_OVERFLOW = 0x9,
+ MLXCX_CQC_STATUS_WRITE_FAIL = 0xA,
+ MLXCX_CQC_STATUS_INVALID = 0xF
+} mlxcx_cqc_status_t;
+
+typedef enum {
+ MLXCX_CQC_STATE_ARMED_SOLICITED = 0x6,
+ MLXCX_CQC_STATE_ARMED = 0x9,
+ MLXCX_CQC_STATE_FIRED = 0xA
+} mlxcx_cqc_state_t;
+
+/* CSTYLED */
+#define MLXCX_CQ_CTX_STATUS (bitdef_t){28, 0xf0000000}
+/* CSTYLED */
+#define MLXCX_CQ_CTX_CQE_SZ (bitdef_t){21, 0x00e00000}
+/* CSTYLED */
+#define MLXCX_CQ_CTX_PERIOD_MODE (bitdef_t){15, 0x00018000}
+/* CSTYLED */
+#define MLXCX_CQ_CTX_MINI_CQE_FORMAT (bitdef_t){12, 0x00003000}
+/* CSTYLED */
+#define MLXCX_CQ_CTX_STATE (bitdef_t){8, 0x00000f00}
+
+typedef struct mlxcx_completionq_ctx {
+ bits32_t mlcqc_flags;
+
+ uint8_t mlcqc_rsvd4[4];
+
+ uint8_t mlcqc_rsvd5[2];
+ uint16be_t mlcqc_page_offset;
+
+ uint8_t mlcqc_log_cq_size;
+ uint24be_t mlcqc_uar_page;
+
+ uint16be_t mlcqc_cq_period;
+ uint16be_t mlcqc_cq_max_count;
+
+ uint8_t mlcqc_rsvd7[3];
+ uint8_t mlcqc_eqn;
+
+ uint8_t mlcqc_log_page_size;
+ uint8_t mlcqc_rsvd8[3];
+
+ uint8_t mlcqc_rsvd9[4];
+
+ uint8_t mlcqc_rsvd10;
+ uint24be_t mlcqc_last_notified_index;
+ uint8_t mlcqc_rsvd11;
+ uint24be_t mlcqc_last_solicit_index;
+ uint8_t mlcqc_rsvd12;
+ uint24be_t mlcqc_consumer_counter;
+ uint8_t mlcqc_rsvd13;
+ uint24be_t mlcqc_producer_counter;
+
+ uint8_t mlcqc_rsvd14[8];
+
+ uint64be_t mlcqc_dbr_addr;
+} mlxcx_completionq_ctx_t;
+
+typedef enum {
+ MLXCX_WORKQ_TYPE_LINKED_LIST = 0x0,
+ MLXCX_WORKQ_TYPE_CYCLIC = 0x1,
+ MLXCX_WORKQ_TYPE_LINKED_LIST_STRIDING = 0x2,
+ MLXCX_WORKQ_TYPE_CYCLIC_STRIDING = 0x3
+} mlxcx_workq_ctx_type_t;
+
+typedef enum {
+ MLXCX_WORKQ_END_PAD_NONE = 0x0,
+ MLXCX_WORKQ_END_PAD_ALIGN = 0x1
+} mlxcx_workq_end_padding_t;
+
+/* CSTYLED */
+#define MLXCX_WORKQ_CTX_TYPE (bitdef_t){ \
+ .bit_shift = 28, \
+ .bit_mask = 0xf0000000 }
+#define MLXCX_WORKQ_CTX_SIGNATURE (1 << 27)
+#define MLXCX_WORKQ_CTX_CD_SLAVE (1 << 24)
+/* CSTYLED */
+#define MLXCX_WORKQ_CTX_END_PADDING (bitdef_t){ \
+ .bit_shift = 25, \
+ .bit_mask = 0x06000000 }
+
+#define MLXCX_WORKQ_CTX_MAX_ADDRESSES 128
+
+typedef struct mlxcx_workq_ctx {
+ bits32_t mlwqc_flags;
+ uint8_t mlwqc_rsvd[2];
+ uint16be_t mlwqc_lwm;
+ uint8_t mlwqc_rsvd2;
+ uint24be_t mlwqc_pd;
+ uint8_t mlwqc_rsvd3;
+ uint24be_t mlwqc_uar_page;
+ uint64be_t mlwqc_dbr_addr;
+ uint32be_t mlwqc_hw_counter;
+ uint32be_t mlwqc_sw_counter;
+ uint8_t mlwqc_rsvd4;
+ uint8_t mlwqc_log_wq_stride;
+ uint8_t mlwqc_log_wq_pg_sz;
+ uint8_t mlwqc_log_wq_sz;
+ uint8_t mlwqc_rsvd5[2];
+ bits16_t mlwqc_strides;
+ uint8_t mlwqc_rsvd6[152];
+ uint64be_t mlwqc_pas[MLXCX_WORKQ_CTX_MAX_ADDRESSES];
+} mlxcx_workq_ctx_t;
+
+#define MLXCX_RQ_FLAGS_RLKEY (1UL << 31)
+#define MLXCX_RQ_FLAGS_SCATTER_FCS (1 << 29)
+#define MLXCX_RQ_FLAGS_VLAN_STRIP_DISABLE (1 << 28)
+#define MLXCX_RQ_FLAGS_FLUSH_IN_ERROR (1 << 18)
+/* CSTYLED */
+#define MLXCX_RQ_MEM_RQ_TYPE (bitdef_t){ \
+ .bit_shift = 24, \
+ .bit_mask = 0x0f000000 }
+/* CSTYLED */
+#define MLXCX_RQ_STATE (bitdef_t){ \
+ .bit_shift = 20, \
+ .bit_mask = 0x00f00000 }
+
+typedef struct mlxcx_rq_ctx {
+ bits32_t mlrqc_flags;
+ uint8_t mlrqc_rsvd;
+ uint24be_t mlrqc_user_index;
+ uint8_t mlrqc_rsvd2;
+ uint24be_t mlrqc_cqn;
+ uint8_t mlrqc_counter_set_id;
+ uint8_t mlrqc_rsvd3[4];
+ uint24be_t mlrqc_rmpn;
+ uint8_t mlrqc_rsvd4[28];
+ mlxcx_workq_ctx_t mlrqc_wq;
+} mlxcx_rq_ctx_t;
+
+#define MLXCX_SQ_FLAGS_RLKEY (1UL << 31)
+#define MLXCX_SQ_FLAGS_CD_MASTER (1 << 30)
+#define MLXCX_SQ_FLAGS_FRE (1 << 29)
+#define MLXCX_SQ_FLAGS_FLUSH_IN_ERROR (1 << 28)
+#define MLXCX_SQ_FLAGS_ALLOW_MULTI_PKT (1 << 27)
+#define MLXCX_SQ_FLAGS_REG_UMR (1 << 19)
+
+typedef enum {
+ MLXCX_ETH_CAP_INLINE_REQUIRE_L2 = 0,
+ MLXCX_ETH_CAP_INLINE_VPORT_CTX = 1,
+ MLXCX_ETH_CAP_INLINE_NOT_REQUIRED = 2
+} mlxcx_eth_cap_inline_mode_t;
+
+typedef enum {
+ MLXCX_ETH_INLINE_NONE = 0,
+ MLXCX_ETH_INLINE_L2 = 1,
+ MLXCX_ETH_INLINE_L3 = 2,
+ MLXCX_ETH_INLINE_L4 = 3,
+ MLXCX_ETH_INLINE_INNER_L2 = 5,
+ MLXCX_ETH_INLINE_INNER_L3 = 6,
+ MLXCX_ETH_INLINE_INNER_L4 = 7
+} mlxcx_eth_inline_mode_t;
+
+/* CSTYLED */
+#define MLXCX_SQ_MIN_WQE_INLINE (bitdef_t){ \
+ .bit_shift = 24, \
+ .bit_mask = 0x07000000 }
+/* CSTYLED */
+#define MLXCX_SQ_STATE (bitdef_t){ \
+ .bit_shift = 20, \
+ .bit_mask = 0x00f00000 }
+
+typedef struct mlxcx_sq_ctx {
+ bits32_t mlsqc_flags;
+ uint8_t mlsqc_rsvd;
+ uint24be_t mlsqc_user_index;
+ uint8_t mlsqc_rsvd2;
+ uint24be_t mlsqc_cqn;
+ uint8_t mlsqc_rsvd3[18];
+ uint16be_t mlsqc_packet_pacing_rate_limit_index;
+ uint16be_t mlsqc_tis_lst_sz;
+ uint8_t mlsqc_rsvd4[11];
+ uint24be_t mlsqc_tis_num;
+ mlxcx_workq_ctx_t mlsqc_wq;
+} mlxcx_sq_ctx_t;
+
+#define MLXCX_NIC_VPORT_CTX_MAX_ADDRESSES 64
+
+typedef enum {
+ MLXCX_VPORT_PROMISC_UCAST = 1 << 15,
+ MLXCX_VPORT_PROMISC_MCAST = 1 << 14,
+ MLXCX_VPORT_PROMISC_ALL = 1 << 13
+} mlxcx_nic_vport_ctx_promisc_t;
+
+#define MLXCX_VPORT_LIST_TYPE_MASK 0x07
+#define MLXCX_VPORT_LIST_TYPE_SHIFT 0
+
+/* CSTYLED */
+#define MLXCX_VPORT_CTX_MIN_WQE_INLINE (bitdef_t){56, 0x0700000000000000}
+
+typedef struct {
+ bits64_t mlnvc_flags;
+ uint8_t mlnvc_rsvd[28];
+ uint8_t mlnvc_rsvd2[2];
+ uint16be_t mlnvc_mtu;
+ uint64be_t mlnvc_system_image_guid;
+ uint64be_t mlnvc_port_guid;
+ uint64be_t mlnvc_node_guid;
+ uint8_t mlnvc_rsvd3[40];
+ uint16be_t mlnvc_qkey_violation_counter;
+ uint8_t mlnvc_rsvd4[2];
+ uint8_t mlnvc_rsvd5[132];
+ bits16_t mlnvc_promisc_list_type;
+ uint16be_t mlnvc_allowed_list_size;
+ uint8_t mlnvc_rsvd6[2];
+ uint8_t mlnvc_permanent_address[6];
+ uint8_t mlnvc_rsvd7[4];
+ uint64be_t mlnvc_address[MLXCX_NIC_VPORT_CTX_MAX_ADDRESSES];
+} mlxcx_nic_vport_ctx_t;
+
+typedef struct {
+ uint8_t mlftc_flags;
+ uint8_t mlftc_level;
+ uint8_t mlftc_rsvd;
+ uint8_t mlftc_log_size;
+ uint8_t mlftc_rsvd2;
+ uint24be_t mlftc_table_miss_id;
+ uint8_t mlftc_rsvd3[4];
+ uint8_t mlftc_rsvd4[28];
+} mlxcx_flow_table_ctx_t;
+
+/* CSTYLED */
+#define MLXCX_FLOW_HDR_FIRST_VID (bitdef_t){0, 0x07ff}
+/* CSTYLED */
+#define MLXCX_FLOW_HDR_FIRST_PRIO (bitdef_t){13,0x7000}
+#define MLXCX_FLOW_HDR_FIRST_CFI (1 << 12)
+
+#define MLXCX_FLOW_HDR_IP_DSCP_SHIFT 18
+#define MLXCX_FLOW_HDR_IP_DSCP_MASK 0xfc0000
+#define MLXCX_FLOW_HDR_IP_ECN_SHIFT 16
+#define MLXCX_FLOW_HDR_IP_ECN_MASK 0x030000
+#define MLXCX_FLOW_HDR_CVLAN_TAG (1 << 15)
+#define MLXCX_FLOW_HDR_SVLAN_TAG (1 << 14)
+#define MLXCX_FLOW_HDR_FRAG (1 << 13)
+/* CSTYLED */
+#define MLXCX_FLOW_HDR_IP_VERSION (bitdef_t){ \
+ .bit_shift = 9, \
+ .bit_mask = 0x001e00 }
+/* CSTYLED */
+#define MLXCX_FLOW_HDR_TCP_FLAGS (bitdef_t){ \
+ .bit_shift = 0, \
+ .bit_mask = 0x0001ff }
+
+typedef struct {
+ uint8_t mlfh_smac[6];
+ uint16be_t mlfh_ethertype;
+ uint8_t mlfh_dmac[6];
+ bits16_t mlfh_first_vid_flags;
+ uint8_t mlfh_ip_protocol;
+ bits24_t mlfh_tcp_ip_flags;
+ uint16be_t mlfh_tcp_sport;
+ uint16be_t mlfh_tcp_dport;
+ uint8_t mlfh_rsvd[3];
+ uint8_t mlfh_ip_ttl_hoplimit;
+ uint16be_t mlfh_udp_sport;
+ uint16be_t mlfh_udp_dport;
+ uint8_t mlfh_src_ip[16];
+ uint8_t mlfh_dst_ip[16];
+} mlxcx_flow_header_match_t;
+
+typedef struct {
+ uint8_t mlfp_rsvd;
+ uint24be_t mlfp_source_sqn;
+ uint8_t mlfp_rsvd2[2];
+ uint16be_t mlfp_source_port;
+ bits16_t mlfp_outer_second_vid_flags;
+ bits16_t mlfp_inner_second_vid_flags;
+ bits16_t mlfp_vlan_flags;
+ uint16be_t mlfp_gre_protocol;
+ uint32be_t mlfp_gre_key;
+ uint24be_t mlfp_vxlan_vni;
+ uint8_t mlfp_rsvd3;
+ uint8_t mlfp_rsvd4[4];
+ uint8_t mlfp_rsvd5;
+ uint24be_t mlfp_outer_ipv6_flow_label;
+ uint8_t mlfp_rsvd6;
+ uint24be_t mlfp_inner_ipv6_flow_label;
+ uint8_t mlfp_rsvd7[28];
+} mlxcx_flow_params_match_t;
+
+typedef struct {
+ mlxcx_flow_header_match_t mlfm_outer_headers;
+ mlxcx_flow_params_match_t mlfm_misc_parameters;
+ mlxcx_flow_header_match_t mlfm_inner_headers;
+ uint8_t mlfm_rsvd[320];
+} mlxcx_flow_match_t;
+
+#define MLXCX_FLOW_MAX_DESTINATIONS 64
+typedef enum {
+ MLXCX_FLOW_DEST_VPORT = 0x0,
+ MLXCX_FLOW_DEST_FLOW_TABLE = 0x1,
+ MLXCX_FLOW_DEST_TIR = 0x2,
+ MLXCX_FLOW_DEST_QP = 0x3
+} mlxcx_flow_destination_type_t;
+
+typedef struct {
+ uint8_t mlfd_destination_type;
+ uint24be_t mlfd_destination_id;
+ uint8_t mlfd_rsvd[4];
+} mlxcx_flow_dest_t;
+
+typedef enum {
+ MLXCX_FLOW_ACTION_ALLOW = 1 << 0,
+ MLXCX_FLOW_ACTION_DROP = 1 << 1,
+ MLXCX_FLOW_ACTION_FORWARD = 1 << 2,
+ MLXCX_FLOW_ACTION_COUNT = 1 << 3,
+ MLXCX_FLOW_ACTION_ENCAP = 1 << 4,
+ MLXCX_FLOW_ACTION_DECAP = 1 << 5
+} mlxcx_flow_action_t;
+
+typedef struct {
+ uint8_t mlfec_rsvd[4];
+ uint32be_t mlfec_group_id;
+ uint8_t mlfec_rsvd2;
+ uint24be_t mlfec_flow_tag;
+ uint8_t mlfec_rsvd3[2];
+ uint16be_t mlfec_action;
+ uint8_t mlfec_rsvd4;
+ uint24be_t mlfec_destination_list_size;
+ uint8_t mlfec_rsvd5;
+ uint24be_t mlfec_flow_counter_list_size;
+ uint32be_t mlfec_encap_id;
+ uint8_t mlfec_rsvd6[36];
+ mlxcx_flow_match_t mlfec_match_value;
+ uint8_t mlfec_rsvd7[192];
+ mlxcx_flow_dest_t mlfec_destination[MLXCX_FLOW_MAX_DESTINATIONS];
+} mlxcx_flow_entry_ctx_t;
+
+/* CSTYLED */
+#define MLXCX_TIR_CTX_DISP_TYPE (bitdef_t){ 4, 0xf0 }
+typedef enum {
+ MLXCX_TIR_DIRECT = 0x0,
+ MLXCX_TIR_INDIRECT = 0x1,
+} mlxcx_tir_type_t;
+
+/* CSTYLED */
+#define MLXCX_TIR_LRO_TIMEOUT (bitdef_t){ 12, 0x0ffff000 }
+/* CSTYLED */
+#define MLXCX_TIR_LRO_ENABLE_MASK (bitdef_t){ 8, 0x00000f00 }
+/* CSTYLED */
+#define MLXCX_TIR_LRO_MAX_MSG_SZ (bitdef_t){ 0, 0x000000ff }
+
+/* CSTYLED */
+#define MLXCX_TIR_RX_HASH_FN (bitdef_t){ 4, 0xf0 }
+typedef enum {
+ MLXCX_TIR_HASH_NONE = 0x0,
+ MLXCX_TIR_HASH_XOR8 = 0x1,
+ MLXCX_TIR_HASH_TOEPLITZ = 0x2
+} mlxcx_tir_hash_fn_t;
+#define MLXCX_TIR_LB_UNICAST (1 << 24)
+#define MLXCX_TIR_LB_MULTICAST (1 << 25)
+
+/* CSTYLED */
+#define MLXCX_RX_HASH_L3_TYPE (bitdef_t){ 31, 0x80000000 }
+typedef enum {
+ MLXCX_RX_HASH_L3_IPv4 = 0,
+ MLXCX_RX_HASH_L3_IPv6 = 1
+} mlxcx_tir_rx_hash_l3_type_t;
+/* CSTYLED */
+#define MLXCX_RX_HASH_L4_TYPE (bitdef_t){ 30, 0x40000000 }
+typedef enum {
+ MLXCX_RX_HASH_L4_TCP = 0,
+ MLXCX_RX_HASH_L4_UDP = 1
+} mlxcx_tir_rx_hash_l4_type_t;
+/* CSTYLED */
+#define MLXCX_RX_HASH_FIELDS (bitdef_t){ 0, 0x3fffffff }
+typedef enum {
+ MLXCX_RX_HASH_SRC_IP = 1 << 0,
+ MLXCX_RX_HASH_DST_IP = 1 << 1,
+ MLXCX_RX_HASH_L4_SPORT = 1 << 2,
+ MLXCX_RX_HASH_L4_DPORT = 1 << 3,
+ MLXCX_RX_HASH_IPSEC_SPI = 1 << 4
+} mlxcx_tir_rx_hash_fields_t;
+
+typedef struct {
+ uint8_t mltirc_rsvd[4];
+ bits8_t mltirc_disp_type;
+ uint8_t mltirc_rsvd2[11];
+ bits32_t mltirc_lro;
+ uint8_t mltirc_rsvd3[9];
+ uint24be_t mltirc_inline_rqn;
+ bits8_t mltirc_flags;
+ uint24be_t mltirc_indirect_table;
+ bits8_t mltirc_hash_lb;
+ uint24be_t mltirc_transport_domain;
+ uint8_t mltirc_rx_hash_toeplitz_key[40];
+ bits32_t mltirc_rx_hash_fields_outer;
+ bits32_t mltirc_rx_hash_fields_inner;
+ uint8_t mltirc_rsvd4[152];
+} mlxcx_tir_ctx_t;
+
+typedef struct {
+ uint8_t mltisc_rsvd;
+ uint8_t mltisc_prio_or_sl;
+ uint8_t mltisc_rsvd2[35];
+ uint24be_t mltisc_transport_domain;
+ uint8_t mltisc_rsvd3[120];
+} mlxcx_tis_ctx_t;
+
+#define MLXCX_RQT_MAX_RQ_REFS 64
+
+typedef struct {
+ uint8_t mlrqtr_rsvd;
+ uint24be_t mlrqtr_rqn;
+} mlxcx_rqtable_rq_ref_t;
+
+typedef struct {
+ uint8_t mlrqtc_rsvd[22];
+ uint16be_t mlrqtc_max_size;
+ uint8_t mlrqtc_rsvd2[2];
+ uint16be_t mlrqtc_actual_size;
+ uint8_t mlrqtc_rsvd3[212];
+ mlxcx_rqtable_rq_ref_t mlrqtc_rqref[MLXCX_RQT_MAX_RQ_REFS];
+} mlxcx_rqtable_ctx_t;
+
+#pragma pack()
+
+typedef enum {
+ MLXCX_EVENT_COMPLETION = 0x00,
+ MLXCX_EVENT_PATH_MIGRATED = 0x01,
+ MLXCX_EVENT_COMM_ESTABLISH = 0x02,
+ MLXCX_EVENT_SENDQ_DRAIN = 0x03,
+ MLXCX_EVENT_LAST_WQE = 0x13,
+ MLXCX_EVENT_SRQ_LIMIT = 0x14,
+ MLXCX_EVENT_DCT_ALL_CLOSED = 0x1C,
+ MLXCX_EVENT_DCT_ACCKEY_VIOL = 0x1D,
+ MLXCX_EVENT_CQ_ERROR = 0x04,
+ MLXCX_EVENT_WQ_CATASTROPHE = 0x05,
+ MLXCX_EVENT_PATH_MIGRATE_FAIL = 0x07,
+ MLXCX_EVENT_PAGE_FAULT = 0x0C,
+ MLXCX_EVENT_WQ_INVALID_REQ = 0x10,
+ MLXCX_EVENT_WQ_ACCESS_VIOL = 0x11,
+ MLXCX_EVENT_SRQ_CATASTROPHE = 0x12,
+ MLXCX_EVENT_INTERNAL_ERROR = 0x08,
+ MLXCX_EVENT_PORT_STATE = 0x09,
+ MLXCX_EVENT_GPIO = 0x15,
+ MLXCX_EVENT_PORT_MODULE = 0x16,
+ MLXCX_EVENT_TEMP_WARNING = 0x17,
+ MLXCX_EVENT_REMOTE_CONFIG = 0x19,
+ MLXCX_EVENT_DCBX_CHANGE = 0x1E,
+ MLXCX_EVENT_DOORBELL_CONGEST = 0x1A,
+ MLXCX_EVENT_STALL_VL = 0x1B,
+ MLXCX_EVENT_CMD_COMPLETION = 0x0A,
+ MLXCX_EVENT_PAGE_REQUEST = 0x0B,
+ MLXCX_EVENT_NIC_VPORT = 0x0D,
+ MLXCX_EVENT_EC_PARAMS_CHANGE = 0x0E,
+ MLXCX_EVENT_XRQ_ERROR = 0x18
+} mlxcx_event_t;
+
+typedef enum {
+ MLXCX_CMD_R_OK = 0x00,
+ MLXCX_CMD_R_INTERNAL_ERR = 0x01,
+ MLXCX_CMD_R_BAD_OP = 0x02,
+ MLXCX_CMD_R_BAD_PARAM = 0x03,
+ MLXCX_CMD_R_BAD_SYS_STATE = 0x04,
+ MLXCX_CMD_R_BAD_RESOURCE = 0x05,
+ MLXCX_CMD_R_RESOURCE_BUSY = 0x06,
+ MLXCX_CMD_R_EXCEED_LIM = 0x08,
+ MLXCX_CMD_R_BAD_RES_STATE = 0x09,
+ MLXCX_CMD_R_BAD_INDEX = 0x0a,
+ MLXCX_CMD_R_NO_RESOURCES = 0x0f,
+ MLXCX_CMD_R_BAD_INPUT_LEN = 0x50,
+ MLXCX_CMD_R_BAD_OUTPUT_LEN = 0x51,
+ MLXCX_CMD_R_BAD_RESOURCE_STATE = 0x10,
+ MLXCX_CMD_R_BAD_PKT = 0x30,
+ MLXCX_CMD_R_BAD_SIZE = 0x40,
+ MLXCX_CMD_R_TIMEOUT = 0xFF
+} mlxcx_cmd_ret_t;
+
+typedef enum {
+ MLXCX_OP_QUERY_HCA_CAP = 0x100,
+ MLXCX_OP_QUERY_ADAPTER = 0x101,
+ MLXCX_OP_INIT_HCA = 0x102,
+ MLXCX_OP_TEARDOWN_HCA = 0x103,
+ MLXCX_OP_ENABLE_HCA = 0x104,
+ MLXCX_OP_DISABLE_HCA = 0x105,
+ MLXCX_OP_QUERY_PAGES = 0x107,
+ MLXCX_OP_MANAGE_PAGES = 0x108,
+ MLXCX_OP_SET_HCA_CAP = 0x109,
+ MLXCX_OP_QUERY_ISSI = 0x10A,
+ MLXCX_OP_SET_ISSI = 0x10B,
+ MLXCX_OP_SET_DRIVER_VERSION = 0x10D,
+ MLXCX_OP_QUERY_OTHER_HCA_CAP = 0x10E,
+ MLXCX_OP_MODIFY_OTHER_HCA_CAP = 0x10F,
+ MLXCX_OP_SET_TUNNELED_OPERATIONS = 0x110,
+ MLXCX_OP_CREATE_MKEY = 0x200,
+ MLXCX_OP_QUERY_MKEY = 0x201,
+ MLXCX_OP_DESTROY_MKEY = 0x202,
+ MLXCX_OP_QUERY_SPECIAL_CONTEXTS = 0x203,
+ MLXCX_OP_PAGE_FAULT_RESUME = 0x204,
+ MLXCX_OP_CREATE_EQ = 0x301,
+ MLXCX_OP_DESTROY_EQ = 0x302,
+ MLXCX_OP_QUERY_EQ = 0x303,
+ MLXCX_OP_GEN_EQE = 0x304,
+ MLXCX_OP_CREATE_CQ = 0x400,
+ MLXCX_OP_DESTROY_CQ = 0x401,
+ MLXCX_OP_QUERY_CQ = 0x402,
+ MLXCX_OP_MODIFY_CQ = 0x403,
+ MLXCX_OP_CREATE_QP = 0x500,
+ MLXCX_OP_DESTROY_QP = 0x501,
+ MLXCX_OP_RST2INIT_QP = 0x502,
+ MLXCX_OP_INIT2RTR_QP = 0x503,
+ MLXCX_OP_RTR2RTS_QP = 0x504,
+ MLXCX_OP_RTS2RTS_QP = 0x505,
+ MLXCX_OP_SQERR2RTS_QP = 0x506,
+ MLXCX_OP__2ERR_QP = 0x507,
+ MLXCX_OP__2RST_QP = 0x50A,
+ MLXCX_OP_QUERY_QP = 0x50B,
+ MLXCX_OP_SQD_RTS_QP = 0x50C,
+ MLXCX_OP_INIT2INIT_QP = 0x50E,
+ MLXCX_OP_CREATE_PSV = 0x600,
+ MLXCX_OP_DESTROY_PSV = 0x601,
+ MLXCX_OP_CREATE_SRQ = 0x700,
+ MLXCX_OP_DESTROY_SRQ = 0x701,
+ MLXCX_OP_QUERY_SRQ = 0x702,
+ MLXCX_OP_ARM_RQ = 0x703,
+ MLXCX_OP_CREATE_XRC_SRQ = 0x705,
+ MLXCX_OP_DESTROY_XRC_SRQ = 0x706,
+ MLXCX_OP_QUERY_XRC_SRQ = 0x707,
+ MLXCX_OP_ARM_XRC_SRQ = 0x708,
+ MLXCX_OP_CREATE_DCT = 0x710,
+ MLXCX_OP_DESTROY_DCT = 0x711,
+ MLXCX_OP_DRAIN_DCT = 0x712,
+ MLXCX_OP_QUERY_DCT = 0x713,
+ MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714,
+ MLXCX_OP_CREATE_XRQ = 0x717,
+ MLXCX_OP_DESTROY_XRQ = 0x718,
+ MLXCX_OP_QUERY_XRQ = 0x719,
+ MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER = 0x720,
+ MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER = 0x721,
+ MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722,
+ MLXCX_OP_ATTACH_NVMF_NAMESPACE = 0x723,
+ MLXCX_OP_DETACH_NVMF_NAMESPACE = 0x724,
+ MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725,
+ MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726,
+ MLXCX_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
+ MLXCX_OP_QUERY_VPORT_STATE = 0x750,
+ MLXCX_OP_MODIFY_VPORT_STATE = 0x751,
+ MLXCX_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
+ MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753,
+ MLXCX_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
+ MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755,
+ MLXCX_OP_QUERY_ROCE_ADDRESS = 0x760,
+ MLXCX_OP_SET_ROCE_ADDRESS = 0x761,
+ MLXCX_OP_QUERY_HCA_VPORT_CONTEXT = 0x762,
+ MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763,
+ MLXCX_OP_QUERY_HCA_VPORT_GID = 0x764,
+ MLXCX_OP_QUERY_HCA_VPORT_PKEY = 0x765,
+ MLXCX_OP_QUERY_VPORT_COUNTER = 0x770,
+ MLXCX_OP_ALLOC_Q_COUNTER = 0x771,
+ MLXCX_OP_DEALLOC_Q_COUNTER = 0x772,
+ MLXCX_OP_QUERY_Q_COUNTER = 0x773,
+ MLXCX_OP_SET_PP_RATE_LIMIT = 0x780,
+ MLXCX_OP_QUERY_PP_RATE_LIMIT = 0x781,
+ MLXCX_OP_ALLOC_PD = 0x800,
+ MLXCX_OP_DEALLOC_PD = 0x801,
+ MLXCX_OP_ALLOC_UAR = 0x802,
+ MLXCX_OP_DEALLOC_UAR = 0x803,
+ MLXCX_OP_CONFIG_INT_MODERATION = 0x804,
+ MLXCX_OP_ACCESS_REG = 0x805,
+ MLXCX_OP_ATTACH_TO_MCG = 0x806,
+ MLXCX_OP_DETACH_FROM_MCG = 0x807,
+ MLXCX_OP_MAD_IFC = 0x50D,
+ MLXCX_OP_QUERY_MAD_DEMUX = 0x80B,
+ MLXCX_OP_SET_MAD_DEMUX = 0x80C,
+ MLXCX_OP_NOP = 0x80D,
+ MLXCX_OP_ALLOC_XRCD = 0x80E,
+ MLXCX_OP_DEALLOC_XRCD = 0x80F,
+ MLXCX_OP_ALLOC_TRANSPORT_DOMAIN = 0x816,
+ MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817,
+ MLXCX_OP_QUERY_CONG_STATUS = 0x822,
+ MLXCX_OP_MODIFY_CONG_STATUS = 0x823,
+ MLXCX_OP_QUERY_CONG_PARAMS = 0x824,
+ MLXCX_OP_MODIFY_CONG_PARAMS = 0x825,
+ MLXCX_OP_QUERY_CONG_STATISTICS = 0x826,
+ MLXCX_OP_ADD_VXLAN_UDP_DPORT = 0x827,
+ MLXCX_OP_DELETE_VXLAN_UDP_DPORT = 0x828,
+ MLXCX_OP_SET_L2_TABLE_ENTRY = 0x829,
+ MLXCX_OP_QUERY_L2_TABLE_ENTRY = 0x82A,
+ MLXCX_OP_DELETE_L2_TABLE_ENTRY = 0x82B,
+ MLXCX_OP_SET_WOL_ROL = 0x830,
+ MLXCX_OP_QUERY_WOL_ROL = 0x831,
+ MLXCX_OP_CREATE_TIR = 0x900,
+ MLXCX_OP_MODIFY_TIR = 0x901,
+ MLXCX_OP_DESTROY_TIR = 0x902,
+ MLXCX_OP_QUERY_TIR = 0x903,
+ MLXCX_OP_CREATE_SQ = 0x904,
+ MLXCX_OP_MODIFY_SQ = 0x905,
+ MLXCX_OP_DESTROY_SQ = 0x906,
+ MLXCX_OP_QUERY_SQ = 0x907,
+ MLXCX_OP_CREATE_RQ = 0x908,
+ MLXCX_OP_MODIFY_RQ = 0x909,
+ MLXCX_OP_DESTROY_RQ = 0x90A,
+ MLXCX_OP_QUERY_RQ = 0x90B,
+ MLXCX_OP_CREATE_RMP = 0x90C,
+ MLXCX_OP_MODIFY_RMP = 0x90D,
+ MLXCX_OP_DESTROY_RMP = 0x90E,
+ MLXCX_OP_QUERY_RMP = 0x90F,
+ MLXCX_OP_CREATE_TIS = 0x912,
+ MLXCX_OP_MODIFY_TIS = 0x913,
+ MLXCX_OP_DESTROY_TIS = 0x914,
+ MLXCX_OP_QUERY_TIS = 0x915,
+ MLXCX_OP_CREATE_RQT = 0x916,
+ MLXCX_OP_MODIFY_RQT = 0x917,
+ MLXCX_OP_DESTROY_RQT = 0x918,
+ MLXCX_OP_QUERY_RQT = 0x919,
+ MLXCX_OP_SET_FLOW_TABLE_ROOT = 0x92f,
+ MLXCX_OP_CREATE_FLOW_TABLE = 0x930,
+ MLXCX_OP_DESTROY_FLOW_TABLE = 0x931,
+ MLXCX_OP_QUERY_FLOW_TABLE = 0x932,
+ MLXCX_OP_CREATE_FLOW_GROUP = 0x933,
+ MLXCX_OP_DESTROY_FLOW_GROUP = 0x934,
+ MLXCX_OP_QUERY_FLOW_GROUP = 0x935,
+ MLXCX_OP_SET_FLOW_TABLE_ENTRY = 0x936,
+ MLXCX_OP_QUERY_FLOW_TABLE_ENTRY = 0x937,
+ MLXCX_OP_DELETE_FLOW_TABLE_ENTRY = 0x938,
+ MLXCX_OP_ALLOC_FLOW_COUNTER = 0x939,
+ MLXCX_OP_DEALLOC_FLOW_COUNTER = 0x93a,
+ MLXCX_OP_QUERY_FLOW_COUNTER = 0x93b,
+ MLXCX_OP_MODIFY_FLOW_TABLE = 0x93c,
+ MLXCX_OP_ALLOC_ENCAP_HEADER = 0x93d,
+ MLXCX_OP_DEALLOC_ENCAP_HEADER = 0x93e,
+ MLXCX_OP_QUERY_ENCAP_HEADER = 0x93f
+} mlxcx_cmd_op_t;
+
+/*
+ * Definitions for relevant commands
+ */
+#pragma pack(1)
+typedef struct {
+ uint16be_t mci_opcode;
+ uint8_t mci_rsvd[4];
+ uint16be_t mci_op_mod;
+} mlxcx_cmd_in_t;
+
+typedef struct {
+ uint8_t mco_status;
+ uint8_t mco_rsvd[3];
+ uint32be_t mco_syndrome;
+} mlxcx_cmd_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_enable_hca_head;
+ uint8_t mlxi_enable_hca_rsvd[2];
+ uint16be_t mlxi_enable_hca_func;
+ uint8_t mlxi_enable_hca_rsvd1[4];
+} mlxcx_cmd_enable_hca_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_enable_hca_head;
+ uint8_t mlxo_enable_hca_rsvd[8];
+} mlxcx_cmd_enable_hca_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_disable_hca_head;
+ uint8_t mlxi_disable_hca_rsvd[2];
+ uint16be_t mlxi_disable_hca_func;
+ uint8_t mlxi_disable_hca_rsvd1[4];
+} mlxcx_cmd_disable_hca_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_disable_hca_head;
+ uint8_t mlxo_disable_hca_rsvd[8];
+} mlxcx_cmd_disable_hca_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_issi_head;
+ uint8_t mlxi_query_issi_rsvd[8];
+} mlxcx_cmd_query_issi_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_issi_head;
+ uint8_t mlxo_query_issi_rsv[2];
+ uint16be_t mlxo_query_issi_current;
+ uint8_t mlxo_query_issi_rsvd1[20];
+ /*
+ * To date we only support version 1 of the ISSI. The last byte has the
+ * ISSI data that we care about, therefore we phrase the struct this
+ * way.
+ */
+ uint8_t mlxo_query_issi_rsvd2[79];
+ uint8_t mlxo_supported_issi;
+} mlxcx_cmd_query_issi_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_set_issi_head;
+ uint8_t mlxi_set_issi_rsvd[2];
+ uint16be_t mlxi_set_issi_current;
+ uint8_t mlxi_set_iss_rsvd1[4];
+} mlxcx_cmd_set_issi_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_set_issi_head;
+ uint8_t mlxo_set_issi_rsvd[8];
+} mlxcx_cmd_set_issi_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_init_hca_head;
+ uint8_t mlxi_init_hca_rsvd[8];
+} mlxcx_cmd_init_hca_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_init_hca_head;
+ uint8_t mlxo_init_hca_rsvd[8];
+} mlxcx_cmd_init_hca_out_t;
+
+#define MLXCX_TEARDOWN_HCA_GRACEFUL 0x00
+#define MLXCX_TEARDOWN_HCA_FORCE 0x01
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_teardown_hca_head;
+ uint8_t mlxi_teardown_hca_rsvd[2];
+ uint16be_t mlxi_teardown_hca_profile;
+ uint8_t mlxi_teardown_hca_rsvd1[4];
+} mlxcx_cmd_teardown_hca_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_teardown_hca_head;
+ uint8_t mlxo_teardown_hca_rsvd[7];
+ uint8_t mlxo_teardown_hca_state;
+} mlxcx_cmd_teardown_hca_out_t;
+
+#define MLXCX_QUERY_PAGES_OPMOD_BOOT 0x01
+#define MLXCX_QUERY_PAGES_OPMOD_INIT 0x02
+#define MLXCX_QUERY_PAGES_OPMOD_REGULAR 0x03
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_pages_head;
+ uint8_t mlxi_query_pages_rsvd[2];
+ uint16be_t mlxi_query_pages_func;
+ uint8_t mlxi_query_pages_rsvd1[4];
+} mlxcx_cmd_query_pages_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_pages_head;
+ uint8_t mlxo_query_pages_rsvd[2];
+ uint16be_t mlxo_query_pages_func;
+ uint32be_t mlxo_query_pages_npages;
+} mlxcx_cmd_query_pages_out_t;
+
+#define MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL 0x00
+#define MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES 0x01
+#define MLXCX_MANAGE_PAGES_OPMOD_RETURN_PAGES 0x02
+
+/*
+ * This is an artificial limit that we're imposing on our actions.
+ */
+#define MLXCX_MANAGE_PAGES_MAX_PAGES 512
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_manage_pages_head;
+ uint8_t mlxi_manage_pages_rsvd[2];
+ uint16be_t mlxi_manage_pages_func;
+ uint32be_t mlxi_manage_pages_npages;
+ uint64be_t mlxi_manage_pages_pas[MLXCX_MANAGE_PAGES_MAX_PAGES];
+} mlxcx_cmd_manage_pages_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_manage_pages_head;
+ uint32be_t mlxo_manage_pages_npages;
+ uint8_t mlxo_manage_pages_rsvd[4];
+ uint64be_t mlxo_manage_pages_pas[MLXCX_MANAGE_PAGES_MAX_PAGES];
+} mlxcx_cmd_manage_pages_out_t;
+
+typedef enum {
+ MLXCX_HCA_CAP_MODE_MAX = 0x0,
+ MLXCX_HCA_CAP_MODE_CURRENT = 0x1
+} mlxcx_hca_cap_mode_t;
+
+typedef enum {
+ MLXCX_HCA_CAP_GENERAL = 0x0,
+ MLXCX_HCA_CAP_ETHERNET = 0x1,
+ MLXCX_HCA_CAP_ODP = 0x2,
+ MLXCX_HCA_CAP_ATOMIC = 0x3,
+ MLXCX_HCA_CAP_ROCE = 0x4,
+ MLXCX_HCA_CAP_IPoIB = 0x5,
+ MLXCX_HCA_CAP_NIC_FLOW = 0x7,
+ MLXCX_HCA_CAP_ESWITCH_FLOW = 0x8,
+ MLXCX_HCA_CAP_ESWITCH = 0x9,
+ MLXCX_HCA_CAP_VECTOR = 0xb,
+ MLXCX_HCA_CAP_QoS = 0xc,
+ MLXCX_HCA_CAP_NVMEoF = 0xe
+} mlxcx_hca_cap_type_t;
+
+typedef enum {
+ MLXCX_CAP_GENERAL_PORT_TYPE_IB = 0x0,
+ MLXCX_CAP_GENERAL_PORT_TYPE_ETHERNET = 0x1,
+} mlxcx_hca_cap_general_port_type_t;
+
+typedef enum {
+ MLXCX_CAP_GENERAL_FLAGS_C_ESW_FLOW_TABLE = (1 << 8),
+ MLXCX_CAP_GENERAL_FLAGS_C_NIC_FLOW_TABLE = (1 << 9),
+} mlxcx_hca_cap_general_flags_c_t;
+
+typedef struct {
+ uint8_t mlcap_general_access_other_hca_roce;
+ uint8_t mlcap_general_rsvd[3];
+
+ uint8_t mlcap_general_rsvd2[12];
+
+ uint8_t mlcap_general_log_max_srq_sz;
+ uint8_t mlcap_general_log_max_qp_sz;
+ uint8_t mlcap_general_rsvd3[1];
+ uint8_t mlcap_general_log_max_qp;
+
+ uint8_t mlcap_general_rsvd4[1];
+ uint8_t mlcap_general_log_max_srq;
+ uint8_t mlcap_general_rsvd5[2];
+
+ uint8_t mlcap_general_rsvd6[1];
+ uint8_t mlcap_general_log_max_cq_sz;
+ uint8_t mlcap_general_rsvd7[1];
+ uint8_t mlcap_general_log_max_cq;
+
+ uint8_t mlcap_general_log_max_eq_sz;
+ uint8_t mlcap_general_log_max_mkey_flags;
+ uint8_t mlcap_general_rsvd8[1];
+ uint8_t mlcap_general_log_max_eq;
+
+ uint8_t mlcap_general_max_indirection;
+ uint8_t mlcap_general_log_max_mrw_sz_flags;
+ uint8_t mlcap_general_log_max_bsf_list_size_flags;
+ uint8_t mlcap_general_log_max_klm_list_size_flags;
+
+ uint8_t mlcap_general_rsvd9[1];
+ uint8_t mlcap_general_log_max_ra_req_dc;
+ uint8_t mlcap_general_rsvd10[1];
+ uint8_t mlcap_general_log_max_ra_res_dc;
+
+ uint8_t mlcap_general_rsvd11[1];
+ uint8_t mlcap_general_log_max_ra_req_qp;
+ uint8_t mlcap_general_rsvd12[1];
+ uint8_t mlcap_general_log_max_ra_res_qp;
+
+ uint16be_t mlcap_general_flags_a;
+ uint16be_t mlcap_general_gid_table_size;
+
+ bits16_t mlcap_general_flags_b;
+ uint16be_t mlcap_general_pkey_table_size;
+
+ bits16_t mlcap_general_flags_c;
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcap_general_flags_d:6;
+ uint8_t mlcap_general_port_type:2;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcap_general_port_type:2;
+ uint8_t mlcap_general_flags_d:6;
+#endif
+ };
+ uint8_t mlcap_general_num_ports;
+
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcap_general_rsvd13:3;
+ uint8_t mlcap_general_log_max_msg:5;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcap_general_log_max_msg:5;
+ uint8_t mlcap_general_rsvd13:3;
+#endif
+ };
+ uint8_t mlcap_general_max_tc;
+ bits16_t mlcap_general_flags_d_wol;
+
+ uint16be_t mlcap_general_state_rate_support;
+ uint8_t mlcap_general_rsvd14[1];
+ struct {
+#if defined(_BIT_FIELDS_HTOL)
+ uint8_t mlcap_general_rsvd15:4;
+ uint8_t mlcap_general_cqe_version:4;
+#elif defined(_BIT_FIELDS_LTOH)
+ uint8_t mlcap_general_cqe_version:4;
+ uint8_t mlcap_general_rsvd15:4;
+#endif
+ };
+
+ uint32be_t mlcap_general_flags_e;
+
+ uint32be_t mlcap_general_flags_f;
+
+ uint8_t mlcap_general_rsvd16[1];
+ uint8_t mlcap_general_uar_sz;
+ uint8_t mlcap_general_cnak;
+ uint8_t mlcap_general_log_pg_sz;
+ uint8_t mlcap_general_rsvd17[32];
+ bits8_t mlcap_general_log_max_rq_flags;
+ uint8_t mlcap_general_log_max_sq;
+ uint8_t mlcap_general_log_max_tir;
+ uint8_t mlcap_general_log_max_tis;
+} mlxcx_hca_cap_general_caps_t;
+
+typedef enum {
+ MLXCX_ETH_CAP_TUNNEL_STATELESS_VXLAN = 1 << 0,
+ MLXCX_ETH_CAP_TUNNEL_STATELESS_GRE = 1 << 1,
+ MLXCX_ETH_CAP_TUNNEL_LSO_CONST_OUT_IP_ID = 1 << 4,
+ MLXCX_ETH_CAP_SCATTER_FCS = 1 << 6,
+ MLXCX_ETH_CAP_REG_UMR_SQ = 1 << 7,
+ MLXCX_ETH_CAP_SELF_LB_UC = 1 << 21,
+ MLXCX_ETH_CAP_SELF_LB_MC = 1 << 22,
+ MLXCX_ETH_CAP_SELF_LB_EN_MODIFIABLE = 1 << 23,
+ MLXCX_ETH_CAP_WQE_VLAN_INSERT = 1 << 24,
+ MLXCX_ETH_CAP_LRO_TIME_STAMP = 1 << 27,
+ MLXCX_ETH_CAP_LRO_PSH_FLAG = 1 << 28,
+ MLXCX_ETH_CAP_LRO_CAP = 1 << 29,
+ MLXCX_ETH_CAP_VLAN_STRIP = 1 << 30,
+ MLXCX_ETH_CAP_CSUM_CAP = 1UL << 31
+} mlxcx_hca_eth_cap_flags_t;
+
+/* CSTYLED */
+#define MLXCX_ETH_CAP_RSS_IND_TBL_CAP (bitdef_t){8, 0x00000f00}
+/* CSTYLED */
+#define MLXCX_ETH_CAP_WQE_INLINE_MODE (bitdef_t){12, 0x00003000}
+/* CSTYLED */
+#define MLXCX_ETH_CAP_MULTI_PKT_SEND_WQE (bitdef_t){14, 0x0000c000}
+/* CSTYLED */
+#define MLXCX_ETH_CAP_MAX_LSO_CAP (bitdef_t){16, 0x001f0000}
+/* CSTYLED */
+#define MLXCX_ETH_CAP_LRO_MAX_MSG_SZ_MODE (bitdef_t){25, 0x06000000}
+
+typedef struct {
+ bits32_t mlcap_eth_flags;
+ uint8_t mlcap_eth_rsvd[6];
+ uint16be_t mlcap_eth_lro_min_mss_size;
+ uint8_t mlcap_eth_rsvd2[36];
+ uint32be_t mlcap_eth_lro_timer_supported_periods[4];
+} mlxcx_hca_cap_eth_caps_t;
+
+typedef enum {
+ MLXCX_FLOW_CAP_PROPS_DECAP = 1 << 23,
+ MLXCX_FLOW_CAP_PROPS_ENCAP = 1 << 24,
+ MLXCX_FLOW_CAP_PROPS_MODIFY_TBL = 1 << 25,
+ MLXCX_FLOW_CAP_PROPS_MISS_TABLE = 1 << 26,
+ MLXCX_FLOW_CAP_PROPS_MODIFY_ROOT_TBL = 1 << 27,
+ MLXCX_FLOW_CAP_PROPS_MODIFY = 1 << 28,
+ MLXCX_FLOW_CAP_PROPS_COUNTER = 1 << 29,
+ MLXCX_FLOW_CAP_PROPS_TAG = 1 << 30,
+ MLXCX_FLOW_CAP_PROPS_SUPPORT = 1UL << 31
+} mlxcx_hca_cap_flow_cap_props_flags_t;
+
+typedef struct {
+ bits32_t mlcap_flow_prop_flags;
+ uint8_t mlcap_flow_prop_log_max_ft_size;
+ uint8_t mlcap_flow_prop_rsvd[2];
+ uint8_t mlcap_flow_prop_max_ft_level;
+ uint8_t mlcap_flow_prop_rsvd2[7];
+ uint8_t mlcap_flow_prop_log_max_ft_num;
+ uint8_t mlcap_flow_prop_rsvd3[2];
+ uint8_t mlcap_flow_prop_log_max_flow_counter;
+ uint8_t mlcap_flow_prop_log_max_destination;
+ uint8_t mlcap_flow_prop_rsvd4[3];
+ uint8_t mlcap_flow_prop_log_max_flow;
+ uint8_t mlcap_flow_prop_rsvd5[8];
+ bits32_t mlcap_flow_prop_support[4];
+ bits32_t mlcap_flow_prop_bitmask[4];
+} mlxcx_hca_cap_flow_cap_props_t;
+
+typedef struct {
+ bits32_t mlcap_flow_flags;
+ uint8_t mlcap_flow_rsvd[60];
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx;
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx_rdma;
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx_sniffer;
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx;
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx_rdma;
+ mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx_sniffer;
+} mlxcx_hca_cap_flow_caps_t;
+
+/*
+ * Size of a buffer that is required to hold the output data.
+ */
+#define MLXCX_HCA_CAP_SIZE 0x1000
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_hca_cap_head;
+ uint8_t mlxi_query_hca_cap_rsvd[8];
+} mlxcx_cmd_query_hca_cap_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_hca_cap_head;
+ uint8_t mlxo_query_hca_cap_rsvd[8];
+ uint8_t mlxo_query_hca_cap_data[MLXCX_HCA_CAP_SIZE];
+} mlxcx_cmd_query_hca_cap_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_set_driver_version_head;
+ uint8_t mlxi_set_driver_version_rsvd[8];
+ char mlxi_set_driver_version_version[64];
+} mlxcx_cmd_set_driver_version_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_set_driver_version_head;
+ uint8_t mlxo_set_driver_version_rsvd[8];
+} mlxcx_cmd_set_driver_version_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_alloc_uar_head;
+ uint8_t mlxi_alloc_uar_rsvd[8];
+} mlxcx_cmd_alloc_uar_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_alloc_uar_head;
+ uint8_t mlxo_alloc_uar_rsvd;
+ uint24be_t mlxo_alloc_uar_uar;
+ uint8_t mlxo_alloc_uar_rsvd2[4];
+} mlxcx_cmd_alloc_uar_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_dealloc_uar_head;
+ uint8_t mlxi_dealloc_uar_rsvd;
+ uint24be_t mlxi_dealloc_uar_uar;
+ uint8_t mlxi_dealloc_uar_rsvd2[4];
+} mlxcx_cmd_dealloc_uar_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_dealloc_uar_head;
+ uint8_t mlxo_dealloc_uar_rsvd[8];
+} mlxcx_cmd_dealloc_uar_out_t;
+
+/*
+ * This is an artificial limit that we're imposing on our actions.
+ */
+#define MLXCX_CREATE_QUEUE_MAX_PAGES 128
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_eq_head;
+ uint8_t mlxi_create_eq_rsvd[8];
+ mlxcx_eventq_ctx_t mlxi_create_eq_context;
+ uint8_t mlxi_create_eq_rsvd2[8];
+ uint64be_t mlxi_create_eq_event_bitmask;
+ uint8_t mlxi_create_eq_rsvd3[176];
+ uint64be_t mlxi_create_eq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES];
+} mlxcx_cmd_create_eq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_eq_head;
+ uint8_t mlxo_create_eq_rsvd[3];
+ uint8_t mlxo_create_eq_eqn;
+ uint8_t mlxo_create_eq_rsvd2[4];
+} mlxcx_cmd_create_eq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_eq_head;
+ uint8_t mlxi_query_eq_rsvd[3];
+ uint8_t mlxi_query_eq_eqn;
+ uint8_t mlxi_query_eq_rsvd2[4];
+} mlxcx_cmd_query_eq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_eq_head;
+ uint8_t mlxo_query_eq_rsvd[8];
+ mlxcx_eventq_ctx_t mlxo_query_eq_context;
+ uint8_t mlxi_query_eq_rsvd2[8];
+ uint64be_t mlxi_query_eq_event_bitmask;
+ uint8_t mlxi_query_eq_rsvd3[176];
+ uint64be_t mlxi_create_eq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES];
+} mlxcx_cmd_query_eq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_eq_head;
+ uint8_t mlxi_destroy_eq_rsvd[3];
+ uint8_t mlxi_destroy_eq_eqn;
+ uint8_t mlxi_destroy_eq_rsvd2[4];
+} mlxcx_cmd_destroy_eq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_eq_head;
+ uint8_t mlxo_destroy_eq_rsvd[8];
+} mlxcx_cmd_destroy_eq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_alloc_pd_head;
+ uint8_t mlxi_alloc_pd_rsvd[8];
+} mlxcx_cmd_alloc_pd_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_alloc_pd_head;
+ uint8_t mlxo_alloc_pd_rsvd;
+ uint24be_t mlxo_alloc_pd_pdn;
+ uint8_t mlxo_alloc_pd_rsvd2[4];
+} mlxcx_cmd_alloc_pd_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_dealloc_pd_head;
+ uint8_t mlxi_dealloc_pd_rsvd;
+ uint24be_t mlxi_dealloc_pd_pdn;
+ uint8_t mlxi_dealloc_pd_rsvd2[4];
+} mlxcx_cmd_dealloc_pd_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_dealloc_pd_head;
+ uint8_t mlxo_dealloc_pd_rsvd[8];
+} mlxcx_cmd_dealloc_pd_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_alloc_tdom_head;
+ uint8_t mlxi_alloc_tdom_rsvd[8];
+} mlxcx_cmd_alloc_tdom_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_alloc_tdom_head;
+ uint8_t mlxo_alloc_tdom_rsvd;
+ uint24be_t mlxo_alloc_tdom_tdomn;
+ uint8_t mlxo_alloc_tdom_rsvd2[4];
+} mlxcx_cmd_alloc_tdom_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_dealloc_tdom_head;
+ uint8_t mlxi_dealloc_tdom_rsvd;
+ uint24be_t mlxi_dealloc_tdom_tdomn;
+ uint8_t mlxi_dealloc_tdom_rsvd2[4];
+} mlxcx_cmd_dealloc_tdom_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_dealloc_tdom_head;
+ uint8_t mlxo_dealloc_tdom_rsvd[8];
+} mlxcx_cmd_dealloc_tdom_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_tir_head;
+ uint8_t mlxi_create_tir_rsvd[24];
+ mlxcx_tir_ctx_t mlxi_create_tir_context;
+} mlxcx_cmd_create_tir_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_tir_head;
+ uint8_t mlxo_create_tir_rsvd;
+ uint24be_t mlxo_create_tir_tirn;
+ uint8_t mlxo_create_tir_rsvd2[4];
+} mlxcx_cmd_create_tir_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_tir_head;
+ uint8_t mlxi_destroy_tir_rsvd;
+ uint24be_t mlxi_destroy_tir_tirn;
+ uint8_t mlxi_destroy_tir_rsvd2[4];
+} mlxcx_cmd_destroy_tir_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_tir_head;
+ uint8_t mlxo_destroy_tir_rsvd[8];
+} mlxcx_cmd_destroy_tir_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_tis_head;
+ uint8_t mlxi_create_tis_rsvd[24];
+ mlxcx_tis_ctx_t mlxi_create_tis_context;
+} mlxcx_cmd_create_tis_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_tis_head;
+ uint8_t mlxo_create_tis_rsvd;
+ uint24be_t mlxo_create_tis_tisn;
+ uint8_t mlxo_create_tis_rsvd2[4];
+} mlxcx_cmd_create_tis_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_tis_head;
+ uint8_t mlxi_destroy_tis_rsvd;
+ uint24be_t mlxi_destroy_tis_tisn;
+ uint8_t mlxi_destroy_tis_rsvd2[4];
+} mlxcx_cmd_destroy_tis_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_tis_head;
+ uint8_t mlxo_destroy_tis_rsvd[8];
+} mlxcx_cmd_destroy_tis_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_special_ctxs_head;
+ uint8_t mlxi_query_special_ctxs_rsvd[8];
+} mlxcx_cmd_query_special_ctxs_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_special_ctxs_head;
+ uint8_t mlxo_query_special_ctxs_rsvd[4];
+ uint32be_t mlxo_query_special_ctxs_resd_lkey;
+ uint32be_t mlxo_query_special_ctxs_null_mkey;
+ uint8_t mlxo_query_special_ctxs_rsvd2[12];
+} mlxcx_cmd_query_special_ctxs_out_t;
+
+typedef enum {
+ MLXCX_VPORT_TYPE_VNIC = 0x0,
+ MLXCX_VPORT_TYPE_ESWITCH = 0x1,
+ MLXCX_VPORT_TYPE_UPLINK = 0x2,
+} mlxcx_cmd_vport_op_mod_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_nic_vport_ctx_head;
+ uint8_t mlxi_query_nic_vport_ctx_other_vport;
+ uint8_t mlxi_query_nic_vport_ctx_rsvd[1];
+ uint16be_t mlxi_query_nic_vport_ctx_vport_number;
+ uint8_t mlxi_query_nic_vport_ctx_allowed_list_type;
+ uint8_t mlxi_query_nic_vport_ctx_rsvd2[3];
+} mlxcx_cmd_query_nic_vport_ctx_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_nic_vport_ctx_head;
+ uint8_t mlxo_query_nic_vport_ctx_rsvd[8];
+ mlxcx_nic_vport_ctx_t mlxo_query_nic_vport_ctx_context;
+} mlxcx_cmd_query_nic_vport_ctx_out_t;
+
+typedef enum {
+ MLXCX_MODIFY_NIC_VPORT_CTX_ROCE_EN = 1 << 1,
+ MLXCX_MODIFY_NIC_VPORT_CTX_ADDR_LIST = 1 << 2,
+ MLXCX_MODIFY_NIC_VPORT_CTX_PERM_ADDR = 1 << 3,
+ MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC = 1 << 4,
+ MLXCX_MODIFY_NIC_VPORT_CTX_EVENT = 1 << 5,
+ MLXCX_MODIFY_NIC_VPORT_CTX_MTU = 1 << 6,
+ MLXCX_MODIFY_NIC_VPORT_CTX_WQE_INLINE = 1 << 7,
+ MLXCX_MODIFY_NIC_VPORT_CTX_PORT_GUID = 1 << 8,
+ MLXCX_MODIFY_NIC_VPORT_CTX_NODE_GUID = 1 << 9,
+} mlxcx_modify_nic_vport_ctx_fields_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_modify_nic_vport_ctx_head;
+ uint8_t mlxi_modify_nic_vport_ctx_other_vport;
+ uint8_t mlxi_modify_nic_vport_ctx_rsvd[1];
+ uint16be_t mlxi_modify_nic_vport_ctx_vport_number;
+ uint32be_t mlxi_modify_nic_vport_ctx_field_select;
+ uint8_t mlxi_modify_nic_vport_ctx_rsvd2[240];
+ mlxcx_nic_vport_ctx_t mlxi_modify_nic_vport_ctx_context;
+} mlxcx_cmd_modify_nic_vport_ctx_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_modify_nic_vport_ctx_head;
+ uint8_t mlxo_modify_nic_vport_ctx_rsvd[8];
+} mlxcx_cmd_modify_nic_vport_ctx_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_vport_state_head;
+ uint8_t mlxi_query_vport_state_other_vport;
+ uint8_t mlxi_query_vport_state_rsvd[1];
+ uint16be_t mlxi_query_vport_state_vport_number;
+ uint8_t mlxi_query_vport_state_rsvd2[4];
+} mlxcx_cmd_query_vport_state_in_t;
+
+/* CSTYLED */
+#define MLXCX_VPORT_ADMIN_STATE (bitdef_t){4, 0xF0}
+/* CSTYLED */
+#define MLXCX_VPORT_OPER_STATE (bitdef_t){0, 0x0F}
+
+typedef enum {
+ MLXCX_VPORT_OPER_STATE_DOWN = 0x0,
+ MLXCX_VPORT_OPER_STATE_UP = 0x1,
+} mlxcx_vport_oper_state_t;
+
+typedef enum {
+ MLXCX_VPORT_ADMIN_STATE_DOWN = 0x0,
+ MLXCX_VPORT_ADMIN_STATE_UP = 0x1,
+ MLXCX_VPORT_ADMIN_STATE_FOLLOW = 0x2,
+} mlxcx_vport_admin_state_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_vport_state_head;
+ uint8_t mlxo_query_vport_state_rsvd[4];
+ uint16be_t mlxo_query_vport_state_max_tx_speed;
+ uint8_t mlxo_query_vport_state_rsvd2[1];
+ uint8_t mlxo_query_vport_state_state;
+} mlxcx_cmd_query_vport_state_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_cq_head;
+ uint8_t mlxi_create_cq_rsvd[8];
+ mlxcx_completionq_ctx_t mlxi_create_cq_context;
+ uint8_t mlxi_create_cq_rsvd2[192];
+ uint64be_t mlxi_create_cq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES];
+} mlxcx_cmd_create_cq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_cq_head;
+ uint8_t mlxo_create_cq_rsvd;
+ uint24be_t mlxo_create_cq_cqn;
+ uint8_t mlxo_create_cq_rsvd2[4];
+} mlxcx_cmd_create_cq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_cq_head;
+ uint8_t mlxi_destroy_cq_rsvd;
+ uint24be_t mlxi_destroy_cq_cqn;
+ uint8_t mlxi_destroy_cq_rsvd2[4];
+} mlxcx_cmd_destroy_cq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_cq_head;
+ uint8_t mlxo_destroy_cq_rsvd[8];
+} mlxcx_cmd_destroy_cq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_cq_head;
+ uint8_t mlxi_query_cq_rsvd;
+ uint24be_t mlxi_query_cq_cqn;
+ uint8_t mlxi_query_cq_rsvd2[4];
+} mlxcx_cmd_query_cq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_cq_head;
+ uint8_t mlxo_query_cq_rsvd[8];
+ mlxcx_completionq_ctx_t mlxo_query_cq_context;
+ uint8_t mlxo_query_cq_rsvd2[192];
+ uint64be_t mlxo_query_cq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES];
+} mlxcx_cmd_query_cq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_rq_head;
+ uint8_t mlxi_create_rq_rsvd[24];
+ mlxcx_rq_ctx_t mlxi_create_rq_context;
+} mlxcx_cmd_create_rq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_rq_head;
+ uint8_t mlxo_create_rq_rsvd;
+ uint24be_t mlxo_create_rq_rqn;
+ uint8_t mlxo_create_rq_rsvd2[4];
+} mlxcx_cmd_create_rq_out_t;
+
+/* CSTYLED */
+#define MLXCX_CMD_MODIFY_RQ_STATE (bitdef_t){ \
+ .bit_shift = 4, .bit_mask = 0xF0 }
+
+typedef enum {
+ MLXCX_MODIFY_RQ_SCATTER_FCS = 1 << 2,
+ MLXCX_MODIFY_RQ_VSD = 1 << 1,
+ MLXCX_MODIFY_RQ_COUNTER_SET_ID = 1 << 3,
+ MLXCX_MODIFY_RQ_LWM = 1 << 0
+} mlxcx_cmd_modify_rq_bitmask_t;
+
+typedef enum {
+ MLXCX_RQ_STATE_RST = 0x0,
+ MLXCX_RQ_STATE_RDY = 0x1,
+ MLXCX_RQ_STATE_ERR = 0x3
+} mlxcx_rq_state_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_modify_rq_head;
+ bits8_t mlxi_modify_rq_state;
+ uint24be_t mlxi_modify_rq_rqn;
+ uint8_t mlxi_modify_rq_rsvd[4];
+ uint64be_t mlxi_modify_rq_bitmask;
+ uint8_t mlxi_modify_rq_rsvd2[8];
+ mlxcx_rq_ctx_t mlxi_modify_rq_context;
+} mlxcx_cmd_modify_rq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_modify_rq_head;
+ uint8_t mlxo_modify_rq_rsvd[8];
+} mlxcx_cmd_modify_rq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_rq_head;
+ uint8_t mlxi_query_rq_rsvd;
+ uint24be_t mlxi_query_rq_rqn;
+ uint8_t mlxi_query_rq_rsvd2[4];
+} mlxcx_cmd_query_rq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_rq_head;
+ uint8_t mlxo_query_rq_rsvd[24];
+ mlxcx_rq_ctx_t mlxo_query_rq_context;
+} mlxcx_cmd_query_rq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_rq_head;
+ uint8_t mlxi_destroy_rq_rsvd;
+ uint24be_t mlxi_destroy_rq_rqn;
+ uint8_t mlxi_destroy_rq_rsvd2[4];
+} mlxcx_cmd_destroy_rq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_rq_head;
+ uint8_t mlxo_destroy_rq_rsvd[8];
+} mlxcx_cmd_destroy_rq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_sq_head;
+ uint8_t mlxi_create_sq_rsvd[24];
+ mlxcx_sq_ctx_t mlxi_create_sq_context;
+} mlxcx_cmd_create_sq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_sq_head;
+ uint8_t mlxo_create_sq_rsvd;
+ uint24be_t mlxo_create_sq_sqn;
+ uint8_t mlxo_create_sq_rsvd2[4];
+} mlxcx_cmd_create_sq_out_t;
+
+/* CSTYLED */
+#define MLXCX_CMD_MODIFY_SQ_STATE (bitdef_t){ \
+ .bit_shift = 4, .bit_mask = 0xF0 }
+
+typedef enum {
+ MLXCX_MODIFY_SQ_PACKET_PACING_INDEX = 1 << 0,
+} mlxcx_cmd_modify_sq_bitmask_t;
+
+typedef enum {
+ MLXCX_SQ_STATE_RST = 0x0,
+ MLXCX_SQ_STATE_RDY = 0x1,
+ MLXCX_SQ_STATE_ERR = 0x3
+} mlxcx_sq_state_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_modify_sq_head;
+ bits8_t mlxi_modify_sq_state;
+ uint24be_t mlxi_modify_sq_sqn;
+ uint8_t mlxi_modify_sq_rsvd[4];
+ uint64be_t mlxi_modify_sq_bitmask;
+ uint8_t mlxi_modify_sq_rsvd2[8];
+ mlxcx_sq_ctx_t mlxi_modify_sq_context;
+} mlxcx_cmd_modify_sq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_modify_sq_head;
+ uint8_t mlxo_modify_sq_rsvd[8];
+} mlxcx_cmd_modify_sq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_query_sq_head;
+ uint8_t mlxi_query_sq_rsvd;
+ uint24be_t mlxi_query_sq_sqn;
+ uint8_t mlxi_query_sq_rsvd2[4];
+} mlxcx_cmd_query_sq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_query_sq_head;
+ uint8_t mlxo_query_sq_rsvd[24];
+ mlxcx_sq_ctx_t mlxo_query_sq_context;
+} mlxcx_cmd_query_sq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_sq_head;
+ uint8_t mlxi_destroy_sq_rsvd;
+ uint24be_t mlxi_destroy_sq_sqn;
+ uint8_t mlxi_destroy_sq_rsvd2[4];
+} mlxcx_cmd_destroy_sq_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_sq_head;
+ uint8_t mlxo_destroy_sq_rsvd[8];
+} mlxcx_cmd_destroy_sq_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_rqt_head;
+ uint8_t mlxi_create_rqt_rsvd[24];
+ mlxcx_rqtable_ctx_t mlxi_create_rqt_context;
+} mlxcx_cmd_create_rqt_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_rqt_head;
+ uint8_t mlxo_create_rqt_rsvd;
+ uint24be_t mlxo_create_rqt_rqtn;
+ uint8_t mlxo_create_rqt_rsvd2[4];
+} mlxcx_cmd_create_rqt_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_rqt_head;
+ uint8_t mlxi_destroy_rqt_rsvd;
+ uint24be_t mlxi_destroy_rqt_rqtn;
+ uint8_t mlxi_destroy_rqt_rsvd2[4];
+} mlxcx_cmd_destroy_rqt_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_rqt_head;
+ uint8_t mlxo_destroy_rqt_rsvd[8];
+} mlxcx_cmd_destroy_rqt_out_t;
+
+typedef enum {
+ MLXCX_FLOW_TABLE_NIC_RX = 0x0,
+ MLXCX_FLOW_TABLE_NIC_TX = 0x1,
+ MLXCX_FLOW_TABLE_ESW_OUT = 0x2,
+ MLXCX_FLOW_TABLE_ESW_IN = 0x3,
+ MLXCX_FLOW_TABLE_ESW_FDB = 0x4,
+ MLXCX_FLOW_TABLE_NIC_RX_SNIFF = 0x5,
+ MLXCX_FLOW_TABLE_NIC_TX_SNIFF = 0x6,
+ MLXCX_FLOW_TABLE_NIC_RX_RDMA = 0x7,
+ MLXCX_FLOW_TABLE_NIC_TX_RDMA = 0x8
+} mlxcx_flow_table_type_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_flow_table_head;
+ uint8_t mlxi_create_flow_table_other_vport;
+ uint8_t mlxi_create_flow_table_rsvd;
+ uint16be_t mlxi_create_flow_table_vport_number;
+ uint8_t mlxi_create_flow_table_rsvd2[4];
+ uint8_t mlxi_create_flow_table_table_type;
+ uint8_t mlxi_create_flow_table_rsvd3[7];
+ mlxcx_flow_table_ctx_t mlxi_create_flow_table_context;
+} mlxcx_cmd_create_flow_table_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_flow_table_head;
+ uint8_t mlxo_create_flow_table_rsvd;
+ uint24be_t mlxo_create_flow_table_table_id;
+ uint8_t mlxo_create_flow_table_rsvd2[4];
+} mlxcx_cmd_create_flow_table_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_flow_table_head;
+ uint8_t mlxi_destroy_flow_table_other_vport;
+ uint8_t mlxi_destroy_flow_table_rsvd;
+ uint16be_t mlxi_destroy_flow_table_vport_number;
+ uint8_t mlxi_destroy_flow_table_rsvd2[4];
+ uint8_t mlxi_destroy_flow_table_table_type;
+ uint8_t mlxi_destroy_flow_table_rsvd3[4];
+ uint24be_t mlxi_destroy_flow_table_table_id;
+ uint8_t mlxi_destroy_flow_table_rsvd4[4];
+} mlxcx_cmd_destroy_flow_table_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_flow_table_head;
+ uint8_t mlxo_destroy_flow_table_rsvd[8];
+} mlxcx_cmd_destroy_flow_table_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_set_flow_table_root_head;
+ uint8_t mlxi_set_flow_table_root_other_vport;
+ uint8_t mlxi_set_flow_table_root_rsvd;
+ uint16be_t mlxi_set_flow_table_root_vport_number;
+ uint8_t mlxi_set_flow_table_root_rsvd2[4];
+ uint8_t mlxi_set_flow_table_root_table_type;
+ uint8_t mlxi_set_flow_table_root_rsvd3[4];
+ uint24be_t mlxi_set_flow_table_root_table_id;
+ uint8_t mlxi_set_flow_table_root_rsvd4[4];
+} mlxcx_cmd_set_flow_table_root_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_set_flow_table_root_head;
+ uint8_t mlxo_set_flow_table_root_rsvd[8];
+} mlxcx_cmd_set_flow_table_root_out_t;
+
+typedef enum {
+ MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS = 1 << 0,
+ MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS = 1 << 1,
+ MLXCX_FLOW_GROUP_MATCH_INNER_HDRS = 1 << 2,
+} mlxcx_flow_group_match_criteria_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_create_flow_group_head;
+ uint8_t mlxi_create_flow_group_other_vport;
+ uint8_t mlxi_create_flow_group_rsvd;
+ uint16be_t mlxi_create_flow_group_vport_number;
+ uint8_t mlxi_create_flow_group_rsvd2[4];
+ uint8_t mlxi_create_flow_group_table_type;
+ uint8_t mlxi_create_flow_group_rsvd3[4];
+ uint24be_t mlxi_create_flow_group_table_id;
+ uint8_t mlxi_create_flow_group_rsvd4[4];
+ uint32be_t mlxi_create_flow_group_start_flow_index;
+ uint8_t mlxi_create_flow_group_rsvd5[4];
+ uint32be_t mlxi_create_flow_group_end_flow_index;
+ uint8_t mlxi_create_flow_group_rsvd6[23];
+ uint8_t mlxi_create_flow_group_match_criteria_en;
+ mlxcx_flow_match_t mlxi_create_flow_group_match_criteria;
+ uint8_t mlxi_create_flow_group_rsvd7[448];
+} mlxcx_cmd_create_flow_group_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_create_flow_group_head;
+ uint8_t mlxo_create_flow_group_rsvd;
+ uint24be_t mlxo_create_flow_group_group_id;
+ uint8_t mlxo_create_flow_group_rsvd2[4];
+} mlxcx_cmd_create_flow_group_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_destroy_flow_group_head;
+ uint8_t mlxi_destroy_flow_group_other_vport;
+ uint8_t mlxi_destroy_flow_group_rsvd;
+ uint16be_t mlxi_destroy_flow_group_vport_number;
+ uint8_t mlxi_destroy_flow_group_rsvd2[4];
+ uint8_t mlxi_destroy_flow_group_table_type;
+ uint8_t mlxi_destroy_flow_group_rsvd3[4];
+ uint24be_t mlxi_destroy_flow_group_table_id;
+ uint32be_t mlxi_destroy_flow_group_group_id;
+ uint8_t mlxi_destroy_flow_group_rsvd4[36];
+} mlxcx_cmd_destroy_flow_group_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_destroy_flow_group_head;
+ uint8_t mlxo_destroy_flow_group_rsvd[8];
+} mlxcx_cmd_destroy_flow_group_out_t;
+
+typedef enum {
+ MLXCX_CMD_FLOW_ENTRY_SET_NEW = 0,
+ MLXCX_CMD_FLOW_ENTRY_MODIFY = 1,
+} mlxcx_cmd_set_flow_table_entry_opmod_t;
+
+typedef enum {
+ MLXCX_CMD_FLOW_ENTRY_SET_ACTION = 1 << 0,
+ MLXCX_CMD_FLOW_ENTRY_SET_FLOW_TAG = 1 << 1,
+ MLXCX_CMD_FLOW_ENTRY_SET_DESTINATION = 1 << 2,
+ MLXCX_CMD_FLOW_ENTRY_SET_COUNTERS = 1 << 3,
+ MLXCX_CMD_FLOW_ENTRY_SET_ENCAP = 1 << 4
+} mlxcx_cmd_set_flow_table_entry_bitmask_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_set_flow_table_entry_head;
+ uint8_t mlxi_set_flow_table_entry_other_vport;
+ uint8_t mlxi_set_flow_table_entry_rsvd;
+ uint16be_t mlxi_set_flow_table_entry_vport_number;
+ uint8_t mlxi_set_flow_table_entry_rsvd2[4];
+ uint8_t mlxi_set_flow_table_entry_table_type;
+ uint8_t mlxi_set_flow_table_entry_rsvd3[4];
+ uint24be_t mlxi_set_flow_table_entry_table_id;
+ uint8_t mlxi_set_flow_table_entry_rsvd4[3];
+ bits8_t mlxi_set_flow_table_entry_modify_bitmask;
+ uint8_t mlxi_set_flow_table_entry_rsvd5[4];
+ uint32be_t mlxi_set_flow_table_entry_flow_index;
+ uint8_t mlxi_set_flow_table_entry_rsvd6[28];
+ mlxcx_flow_entry_ctx_t mlxi_set_flow_table_entry_context;
+} mlxcx_cmd_set_flow_table_entry_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_set_flow_table_entry_head;
+ uint8_t mlxo_set_flow_table_entry_rsvd[8];
+} mlxcx_cmd_set_flow_table_entry_out_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_delete_flow_table_entry_head;
+ uint8_t mlxi_delete_flow_table_entry_other_vport;
+ uint8_t mlxi_delete_flow_table_entry_rsvd;
+ uint16be_t mlxi_delete_flow_table_entry_vport_number;
+ uint8_t mlxi_delete_flow_table_entry_rsvd2[4];
+ uint8_t mlxi_delete_flow_table_entry_table_type;
+ uint8_t mlxi_delete_flow_table_entry_rsvd3[4];
+ uint24be_t mlxi_delete_flow_table_entry_table_id;
+ uint8_t mlxi_delete_flow_table_entry_rsvd4[8];
+ uint32be_t mlxi_delete_flow_table_entry_flow_index;
+ uint8_t mlxi_delete_flow_table_entry_rsvd5[28];
+} mlxcx_cmd_delete_flow_table_entry_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_delete_flow_table_entry_head;
+ uint8_t mlxo_delete_flow_table_entry_rsvd[8];
+} mlxcx_cmd_delete_flow_table_entry_out_t;
+
+typedef enum {
+ MLXCX_CMD_CONFIG_INT_MOD_READ = 1,
+ MLXCX_CMD_CONFIG_INT_MOD_WRITE = 0
+} mlxcx_cmd_config_int_mod_opmod_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_config_int_mod_head;
+ uint16be_t mlxi_config_int_mod_min_delay;
+ uint16be_t mlxi_config_int_mod_int_vector;
+ uint8_t mlxi_config_int_mod_rsvd[4];
+} mlxcx_cmd_config_int_mod_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_config_int_mod_head;
+ uint16be_t mlxo_config_int_mod_min_delay;
+ uint16be_t mlxo_config_int_mod_int_vector;
+ uint8_t mlxo_config_int_mod_rsvd[4];
+} mlxcx_cmd_config_int_mod_out_t;
+
+typedef struct {
+ uint8_t mlrd_pmtu_rsvd;
+ uint8_t mlrd_pmtu_local_port;
+ uint8_t mlrd_pmtu_rsvd2[2];
+
+ uint16be_t mlrd_pmtu_max_mtu;
+ uint8_t mlrd_pmtu_rsvd3[2];
+
+ uint16be_t mlrd_pmtu_admin_mtu;
+ uint8_t mlrd_pmtu_rsvd4[2];
+
+ uint16be_t mlrd_pmtu_oper_mtu;
+ uint8_t mlrd_pmtu_rsvd5[2];
+} mlxcx_reg_pmtu_t;
+
+typedef enum {
+ MLXCX_PORT_STATUS_UP = 1,
+ MLXCX_PORT_STATUS_DOWN = 2,
+ MLXCX_PORT_STATUS_UP_ONCE = 3,
+ MLXCX_PORT_STATUS_DISABLED = 4,
+} mlxcx_port_status_t;
+
+typedef enum {
+ MLXCX_PAOS_ADMIN_ST_EN = 1UL << 31,
+} mlxcx_paos_flags_t;
+
+typedef struct {
+ uint8_t mlrd_paos_swid;
+ uint8_t mlrd_paos_local_port;
+ uint8_t mlrd_paos_admin_status;
+ uint8_t mlrd_paos_oper_status;
+ bits32_t mlrd_paos_flags;
+ uint8_t mlrd_paos_rsvd[8];
+} mlxcx_reg_paos_t;
+
+typedef enum {
+ MLXCX_PROTO_SGMII = 1 << 0,
+ MLXCX_PROTO_1000BASE_KX = 1 << 1,
+ MLXCX_PROTO_10GBASE_CX4 = 1 << 2,
+ MLXCX_PROTO_10GBASE_KX4 = 1 << 3,
+ MLXCX_PROTO_10GBASE_KR = 1 << 4,
+ MLXCX_PROTO_UNKNOWN_1 = 1 << 5,
+ MLXCX_PROTO_40GBASE_CR4 = 1 << 6,
+ MLXCX_PROTO_40GBASE_KR4 = 1 << 7,
+ MLXCX_PROTO_UNKNOWN_2 = 1 << 8,
+ MLXCX_PROTO_SGMII_100BASE = 1 << 9,
+ MLXCX_PROTO_UNKNOWN_3 = 1 << 10,
+ MLXCX_PROTO_UNKNOWN_4 = 1 << 11,
+ MLXCX_PROTO_10GBASE_CR = 1 << 12,
+ MLXCX_PROTO_10GBASE_SR = 1 << 13,
+ MLXCX_PROTO_10GBASE_ER_LR = 1 << 14,
+ MLXCX_PROTO_40GBASE_SR4 = 1 << 15,
+ MLXCX_PROTO_40GBASE_LR4_ER4 = 1 << 16,
+ MLXCX_PROTO_UNKNOWN_5 = 1 << 17,
+ MLXCX_PROTO_50GBASE_SR2 = 1 << 18,
+ MLXCX_PROTO_UNKNOWN_6 = 1 << 19,
+ MLXCX_PROTO_100GBASE_CR4 = 1 << 20,
+ MLXCX_PROTO_100GBASE_SR4 = 1 << 21,
+ MLXCX_PROTO_100GBASE_KR4 = 1 << 22,
+ MLXCX_PROTO_UNKNOWN_7 = 1 << 23,
+ MLXCX_PROTO_UNKNOWN_8 = 1 << 24,
+ MLXCX_PROTO_UNKNOWN_9 = 1 << 25,
+ MLXCX_PROTO_UNKNOWN_10 = 1 << 26,
+ MLXCX_PROTO_25GBASE_CR = 1 << 27,
+ MLXCX_PROTO_25GBASE_KR = 1 << 28,
+ MLXCX_PROTO_25GBASE_SR = 1 << 29,
+ MLXCX_PROTO_50GBASE_CR2 = 1 << 30,
+ MLXCX_PROTO_50GBASE_KR2 = 1UL << 31,
+} mlxcx_eth_proto_t;
+
+typedef enum {
+ MLXCX_AUTONEG_DISABLE_CAP = 1 << 5,
+ MLXCX_AUTONEG_DISABLE = 1 << 6
+} mlxcx_autoneg_flags_t;
+
+typedef enum {
+ MLXCX_PTYS_PROTO_MASK_IB = 1 << 0,
+ MLXCX_PTYS_PROTO_MASK_ETH = 1 << 2,
+} mlxcx_reg_ptys_proto_mask_t;
+
+typedef struct {
+ bits8_t mlrd_ptys_autoneg_flags;
+ uint8_t mlrd_ptys_local_port;
+ uint8_t mlrd_ptys_rsvd;
+ bits8_t mlrd_ptys_proto_mask;
+
+ bits8_t mlrd_ptys_autoneg_status;
+ uint8_t mlrd_ptys_rsvd2;
+ uint16be_t mlrd_ptys_data_rate_oper;
+
+ uint8_t mlrd_ptys_rsvd3[4];
+
+ bits32_t mlrd_ptys_proto_cap;
+ uint8_t mlrd_ptys_rsvd4[8];
+ bits32_t mlrd_ptys_proto_admin;
+ uint8_t mlrd_ptys_rsvd5[8];
+ bits32_t mlrd_ptys_proto_oper;
+ uint8_t mlrd_ptys_rsvd6[8];
+ bits32_t mlrd_ptys_proto_partner_advert;
+ uint8_t mlrd_ptys_rsvd7[12];
+} mlxcx_reg_ptys_t;
+
+typedef enum {
+ MLXCX_LED_TYPE_BOTH = 0x0,
+ MLXCX_LED_TYPE_UID = 0x1,
+ MLXCX_LED_TYPE_PORT = 0x2,
+} mlxcx_led_type_t;
+
+#define MLXCX_MLCR_INDIVIDUAL_ONLY (1 << 4)
+/* CSTYLED */
+#define MLXCX_MLCR_LED_TYPE (bitdef_t){ 0, 0x0F }
+
+typedef struct {
+ uint8_t mlrd_mlcr_rsvd;
+ uint8_t mlrd_mlcr_local_port;
+ uint8_t mlrd_mlcr_rsvd2;
+ bits8_t mlrd_mlcr_flags;
+ uint8_t mlrd_mlcr_rsvd3[2];
+ uint16be_t mlrd_mlcr_beacon_duration;
+ uint8_t mlrd_mlcr_rsvd4[2];
+ uint16be_t mlrd_mlcr_beacon_remain;
+} mlxcx_reg_mlcr_t;
+
+typedef struct {
+ uint8_t mlrd_pmaos_rsvd;
+ uint8_t mlrd_pmaos_module;
+ uint8_t mlrd_pmaos_admin_status;
+ uint8_t mlrd_pmaos_oper_status;
+ bits8_t mlrd_pmaos_flags;
+ uint8_t mlrd_pmaos_rsvd2;
+ uint8_t mlrd_pmaos_error_type;
+ uint8_t mlrd_pmaos_event_en;
+ uint8_t mlrd_pmaos_rsvd3[8];
+} mlxcx_reg_pmaos_t;
+
+typedef enum {
+ MLXCX_MCIA_STATUS_OK = 0x0,
+ MLXCX_MCIA_STATUS_NO_EEPROM = 0x1,
+ MLXCX_MCIA_STATUS_NOT_SUPPORTED = 0x2,
+ MLXCX_MCIA_STATUS_NOT_CONNECTED = 0x3,
+ MLXCX_MCIA_STATUS_I2C_ERROR = 0x9,
+ MLXCX_MCIA_STATUS_DISABLED = 0x10
+} mlxcx_mcia_status_t;
+
+typedef struct {
+ bits8_t mlrd_mcia_flags;
+ uint8_t mlrd_mcia_module;
+ uint8_t mlrd_mcia_rsvd;
+ uint8_t mlrd_mcia_status;
+ uint8_t mlrd_mcia_i2c_device_addr;
+ uint8_t mlrd_mcia_page_number;
+ uint16be_t mlrd_mcia_device_addr;
+ uint8_t mlrd_mcia_rsvd2[2];
+ uint16be_t mlrd_mcia_size;
+ uint8_t mlrd_mcia_rsvd3[4];
+ uint8_t mlrd_mcia_data[48];
+} mlxcx_reg_mcia_t;
+
+typedef struct {
+ uint64be_t mlppc_ieee_802_3_frames_tx;
+ uint64be_t mlppc_ieee_802_3_frames_rx;
+ uint64be_t mlppc_ieee_802_3_fcs_err;
+ uint64be_t mlppc_ieee_802_3_align_err;
+ uint64be_t mlppc_ieee_802_3_bytes_tx;
+ uint64be_t mlppc_ieee_802_3_bytes_rx;
+ uint64be_t mlppc_ieee_802_3_mcast_tx;
+ uint64be_t mlppc_ieee_802_3_bcast_tx;
+ uint64be_t mlppc_ieee_802_3_mcast_rx;
+ uint64be_t mlppc_ieee_802_3_bcast_rx;
+ uint64be_t mlppc_ieee_802_3_in_range_len_err;
+ uint64be_t mlppc_ieee_802_3_out_of_range_len_err;
+ uint64be_t mlppc_ieee_802_3_frame_too_long_err;
+ uint64be_t mlppc_ieee_802_3_symbol_err;
+ uint64be_t mlppc_ieee_802_3_mac_ctrl_tx;
+ uint64be_t mlppc_ieee_802_3_mac_ctrl_rx;
+ uint64be_t mlppc_ieee_802_3_unsup_opcodes_rx;
+ uint64be_t mlppc_ieee_802_3_pause_rx;
+ uint64be_t mlppc_ieee_802_3_pause_tx;
+} mlxcx_ppcnt_ieee_802_3_t;
+
+typedef struct {
+ uint64be_t mlppc_rfc_2863_in_octets;
+ uint64be_t mlppc_rfc_2863_in_ucast_pkts;
+ uint64be_t mlppc_rfc_2863_in_discards;
+ uint64be_t mlppc_rfc_2863_in_errors;
+ uint64be_t mlppc_rfc_2863_in_unknown_protos;
+ uint64be_t mlppc_rfc_2863_out_octets;
+ uint64be_t mlppc_rfc_2863_out_ucast_pkts;
+ uint64be_t mlppc_rfc_2863_out_discards;
+ uint64be_t mlppc_rfc_2863_out_errors;
+ uint64be_t mlppc_rfc_2863_in_mcast_pkts;
+ uint64be_t mlppc_rfc_2863_in_bcast_pkts;
+ uint64be_t mlppc_rfc_2863_out_mcast_pkts;
+ uint64be_t mlppc_rfc_2863_out_bcast_pkts;
+} mlxcx_ppcnt_rfc_2863_t;
+
+typedef struct {
+ uint64be_t mlppc_phy_stats_time_since_last_clear;
+ uint64be_t mlppc_phy_stats_rx_bits;
+ uint64be_t mlppc_phy_stats_symbol_errs;
+ uint64be_t mlppc_phy_stats_corrected_bits;
+ uint8_t mlppc_phy_stats_rsvd[2];
+ uint8_t mlppc_phy_stats_raw_ber_mag;
+ uint8_t mlppc_phy_stats_raw_ber_coef;
+ uint8_t mlppc_phy_stats_rsvd2[2];
+ uint8_t mlppc_phy_stats_eff_ber_mag;
+ uint8_t mlppc_phy_stats_eff_ber_coef;
+} mlxcx_ppcnt_phy_stats_t;
+
+typedef enum {
+ MLXCX_PPCNT_GRP_IEEE_802_3 = 0x0,
+ MLXCX_PPCNT_GRP_RFC_2863 = 0x1,
+ MLXCX_PPCNT_GRP_RFC_2819 = 0x2,
+ MLXCX_PPCNT_GRP_RFC_3635 = 0x3,
+ MLXCX_PPCNT_GRP_ETH_EXTD = 0x5,
+ MLXCX_PPCNT_GRP_ETH_DISCARD = 0x6,
+ MLXCX_PPCNT_GRP_PER_PRIO = 0x10,
+ MLXCX_PPCNT_GRP_PER_TC = 0x11,
+ MLXCX_PPCNT_GRP_PER_TC_CONGEST = 0x13,
+ MLXCX_PPCNT_GRP_PHY_STATS = 0x16
+} mlxcx_ppcnt_grp_t;
+
+typedef enum {
+ MLXCX_PPCNT_CLEAR = (1 << 7),
+ MLXCX_PPCNT_NO_CLEAR = 0
+} mlxcx_ppcnt_clear_t;
+
+typedef struct {
+ uint8_t mlrd_ppcnt_swid;
+ uint8_t mlrd_ppcnt_local_port;
+ uint8_t mlrd_ppcnt_pnat;
+ uint8_t mlrd_ppcnt_grp;
+ uint8_t mlrd_ppcnt_clear;
+ uint8_t mlrd_ppcnt_rsvd[2];
+ uint8_t mlrd_ppcnt_prio_tc;
+ union {
+ uint8_t mlrd_ppcnt_data[248];
+ mlxcx_ppcnt_ieee_802_3_t mlrd_ppcnt_ieee_802_3;
+ mlxcx_ppcnt_rfc_2863_t mlrd_ppcnt_rfc_2863;
+ mlxcx_ppcnt_phy_stats_t mlrd_ppcnt_phy_stats;
+ };
+} mlxcx_reg_ppcnt_t;
+
+typedef enum {
+ MLXCX_REG_PMTU = 0x5003,
+ MLXCX_REG_PTYS = 0x5004,
+ MLXCX_REG_PAOS = 0x5006,
+ MLXCX_REG_PMAOS = 0x5012,
+ MLXCX_REG_MSGI = 0x9021,
+ MLXCX_REG_MLCR = 0x902B,
+ MLXCX_REG_MCIA = 0x9014,
+ MLXCX_REG_PPCNT = 0x5008,
+} mlxcx_register_id_t;
+
+typedef union {
+ mlxcx_reg_pmtu_t mlrd_pmtu;
+ mlxcx_reg_paos_t mlrd_paos;
+ mlxcx_reg_ptys_t mlrd_ptys;
+ mlxcx_reg_mlcr_t mlrd_mlcr;
+ mlxcx_reg_pmaos_t mlrd_pmaos;
+ mlxcx_reg_mcia_t mlrd_mcia;
+ mlxcx_reg_ppcnt_t mlrd_ppcnt;
+} mlxcx_register_data_t;
+
+typedef enum {
+ MLXCX_CMD_ACCESS_REGISTER_READ = 1,
+ MLXCX_CMD_ACCESS_REGISTER_WRITE = 0
+} mlxcx_cmd_reg_opmod_t;
+
+typedef struct {
+ mlxcx_cmd_in_t mlxi_access_register_head;
+ uint8_t mlxi_access_register_rsvd[2];
+ uint16be_t mlxi_access_register_register_id;
+ uint32be_t mlxi_access_register_argument;
+ mlxcx_register_data_t mlxi_access_register_data;
+} mlxcx_cmd_access_register_in_t;
+
+typedef struct {
+ mlxcx_cmd_out_t mlxo_access_register_head;
+ uint8_t mlxo_access_register_rsvd[8];
+ mlxcx_register_data_t mlxo_access_register_data;
+} mlxcx_cmd_access_register_out_t;
+
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MLXCX_REG_H */
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
new file mode 100644
index 0000000000..8337545b57
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -0,0 +1,2264 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020, The University of Queensland
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Mellanox Connect-X 4/5/6 driver.
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/sysmacros.h>
+#include <sys/atomic.h>
+#include <sys/cpuvar.h>
+
+#include <sys/pattr.h>
+#include <sys/dlpi.h>
+
+#include <sys/mac_provider.h>
+
+#include <sys/random.h>
+
+#include <mlxcx.h>
+
+boolean_t
+mlxcx_wq_alloc_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ boolean_t ret;
+ size_t sz;
+
+ VERIFY0(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+
+ /* Receive and send queue entries might be different sizes. */
+ switch (mlwq->mlwq_type) {
+ case MLXCX_WQ_TYPE_SENDQ:
+ mlwq->mlwq_entshift = mlxp->mlx_props.mldp_sq_size_shift;
+ mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift);
+ sz = mlwq->mlwq_nents * sizeof (mlxcx_sendq_ent_t);
+ break;
+ case MLXCX_WQ_TYPE_RECVQ:
+ mlwq->mlwq_entshift = mlxp->mlx_props.mldp_rq_size_shift;
+ mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift);
+ sz = mlwq->mlwq_nents * sizeof (mlxcx_recvq_ent_t);
+ break;
+ default:
+ VERIFY(0);
+ return (B_FALSE);
+ }
+ ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0);
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_queue_attr(mlxp, &attr);
+
+ ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_dma, &attr, &acc,
+ B_TRUE, sz, B_TRUE);
+ if (!ret) {
+ mlxcx_warn(mlxp, "failed to allocate WQ memory");
+ return (B_FALSE);
+ }
+
+ /*
+ * Just set the first pointer in the union. Yes, this is a strict
+ * aliasing violation. No, I don't care.
+ */
+ mlwq->mlwq_send_ent = (mlxcx_sendq_ent_t *)mlwq->mlwq_dma.mxdb_va;
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_qdbell_attr(mlxp, &attr);
+ sz = sizeof (mlxcx_workq_doorbell_t);
+ ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_doorbell_dma, &attr, &acc,
+ B_TRUE, sz, B_TRUE);
+ if (!ret) {
+ mlxcx_warn(mlxp, "failed to allocate WQ doorbell memory");
+ mlxcx_dma_free(&mlwq->mlwq_dma);
+ mlwq->mlwq_send_ent = NULL;
+ return (B_FALSE);
+ }
+
+ mlwq->mlwq_doorbell =
+ (mlxcx_workq_doorbell_t *)mlwq->mlwq_doorbell_dma.mxdb_va;
+
+ mlwq->mlwq_state |= MLXCX_WQ_ALLOC;
+
+ return (B_TRUE);
+}
+
+void
+mlxcx_wq_rele_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC);
+ if (mlwq->mlwq_state & MLXCX_WQ_CREATED)
+ VERIFY(mlwq->mlwq_state & MLXCX_WQ_DESTROYED);
+
+ mlxcx_dma_free(&mlwq->mlwq_dma);
+ mlwq->mlwq_send_ent = NULL;
+ mlxcx_dma_free(&mlwq->mlwq_doorbell_dma);
+ mlwq->mlwq_doorbell = NULL;
+
+ mlwq->mlwq_state &= ~MLXCX_CQ_ALLOC;
+}
+
+boolean_t
+mlxcx_cq_alloc_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ boolean_t ret;
+ size_t sz, i;
+
+ VERIFY0(mlcq->mlcq_state & MLXCX_EQ_ALLOC);
+
+ mlcq->mlcq_entshift = mlxp->mlx_props.mldp_cq_size_shift;
+ mlcq->mlcq_nents = (1 << mlcq->mlcq_entshift);
+ sz = mlcq->mlcq_nents * sizeof (mlxcx_completionq_ent_t);
+ ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0);
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_queue_attr(mlxp, &attr);
+
+ ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_dma, &attr, &acc,
+ B_TRUE, sz, B_TRUE);
+ if (!ret) {
+ mlxcx_warn(mlxp, "failed to allocate CQ memory");
+ return (B_FALSE);
+ }
+
+ mlcq->mlcq_ent = (mlxcx_completionq_ent_t *)mlcq->mlcq_dma.mxdb_va;
+
+ for (i = 0; i < mlcq->mlcq_nents; ++i) {
+ mlcq->mlcq_ent[i].mlcqe_opcode = MLXCX_CQE_OP_INVALID;
+ mlcq->mlcq_ent[i].mlcqe_owner = MLXCX_CQE_OWNER_INIT;
+ }
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_qdbell_attr(mlxp, &attr);
+ sz = sizeof (mlxcx_completionq_doorbell_t);
+ ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_doorbell_dma, &attr, &acc,
+ B_TRUE, sz, B_TRUE);
+ if (!ret) {
+ mlxcx_warn(mlxp, "failed to allocate CQ doorbell memory");
+ mlxcx_dma_free(&mlcq->mlcq_dma);
+ mlcq->mlcq_ent = NULL;
+ return (B_FALSE);
+ }
+
+ mlcq->mlcq_doorbell =
+ (mlxcx_completionq_doorbell_t *)mlcq->mlcq_doorbell_dma.mxdb_va;
+
+ mlcq->mlcq_state |= MLXCX_CQ_ALLOC;
+
+ return (B_TRUE);
+}
+
+void
+mlxcx_cq_rele_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC);
+ if (mlcq->mlcq_state & MLXCX_CQ_CREATED)
+ VERIFY(mlcq->mlcq_state & MLXCX_CQ_DESTROYED);
+
+ mlxcx_dma_free(&mlcq->mlcq_dma);
+ mlcq->mlcq_ent = NULL;
+ mlxcx_dma_free(&mlcq->mlcq_doorbell_dma);
+ mlcq->mlcq_doorbell = NULL;
+
+ mlcq->mlcq_state &= ~MLXCX_CQ_ALLOC;
+}
+
+void
+mlxcx_wq_teardown(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ mlxcx_completion_queue_t *mlcq;
+
+ /*
+ * If something is holding the lock on a long operation like a
+ * refill, setting this flag asks them to exit early if possible.
+ */
+ atomic_or_uint(&mlwq->mlwq_state, MLXCX_WQ_TEARDOWN);
+
+ mutex_enter(&mlwq->mlwq_mtx);
+
+ list_remove(&mlxp->mlx_wqs, mlwq);
+
+ if ((mlwq->mlwq_state & MLXCX_WQ_CREATED) &&
+ !(mlwq->mlwq_state & MLXCX_WQ_DESTROYED)) {
+ if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
+ mlwq->mlwq_state & MLXCX_WQ_STARTED &&
+ !mlxcx_cmd_stop_rq(mlxp, mlwq)) {
+ mlxcx_warn(mlxp, "failed to stop "
+ "recv queue num %x", mlwq->mlwq_num);
+ }
+ if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
+ mlwq->mlwq_state & MLXCX_WQ_STARTED &&
+ !mlxcx_cmd_stop_sq(mlxp, mlwq)) {
+ mlxcx_warn(mlxp, "failed to stop "
+ "send queue num %x", mlwq->mlwq_num);
+ }
+ if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
+ !mlxcx_cmd_destroy_rq(mlxp, mlwq)) {
+ mlxcx_warn(mlxp, "failed to destroy "
+ "recv queue num %x", mlwq->mlwq_num);
+ }
+ if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
+ !mlxcx_cmd_destroy_sq(mlxp, mlwq)) {
+ mlxcx_warn(mlxp, "failed to destroy "
+ "send queue num %x", mlwq->mlwq_num);
+ }
+ }
+ if (mlwq->mlwq_state & MLXCX_WQ_ALLOC) {
+ mlxcx_wq_rele_dma(mlxp, mlwq);
+ }
+ mlcq = mlwq->mlwq_cq;
+
+ /* These will be released by mlxcx_teardown_bufs() */
+ mlwq->mlwq_bufs = NULL;
+ mlwq->mlwq_foreign_bufs = NULL;
+
+ mutex_exit(&mlwq->mlwq_mtx);
+
+ mutex_enter(&mlcq->mlcq_mtx);
+ mutex_enter(&mlwq->mlwq_mtx);
+ ASSERT3P(mlcq->mlcq_wq, ==, mlwq);
+ mlcq->mlcq_wq = NULL;
+ mutex_exit(&mlwq->mlwq_mtx);
+ mutex_exit(&mlcq->mlcq_mtx);
+
+ mutex_destroy(&mlwq->mlwq_mtx);
+}
+
+void
+mlxcx_cq_teardown(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
+{
+ mlxcx_event_queue_t *mleq;
+ mlxcx_buffer_t *b;
+
+ /*
+ * If something is holding the lock on a long operation like polling
+ * which we're going to abort anyway, this flag asks them to exit
+ * early if possible.
+ */
+ atomic_or_uint(&mlcq->mlcq_state, MLXCX_CQ_TEARDOWN);
+
+ mutex_enter(&mlcq->mlcq_mtx);
+
+ list_remove(&mlxp->mlx_cqs, mlcq);
+
+ if ((mlcq->mlcq_state & MLXCX_CQ_CREATED) &&
+ !(mlcq->mlcq_state & MLXCX_CQ_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_cq(mlxp, mlcq)) {
+ mlxcx_warn(mlxp, "failed to destroy "
+ "completion queue num %u",
+ mlcq->mlcq_num);
+ }
+ }
+ if (mlcq->mlcq_state & MLXCX_CQ_ALLOC) {
+ mlxcx_cq_rele_dma(mlxp, mlcq);
+ }
+ /*
+ * If we're on an EQ AVL tree, then we need to grab
+ * the EQ's mutex to take it off. The ISR always takes
+ * EQ mutex before CQ mutex, so we have to let go of
+ * the CQ mutex then come back again.
+ *
+ * The ISR will bail out if tries to touch this CQ now since
+ * we added the CQ_DESTROYED flag above.
+ */
+ if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) {
+ mleq = mlcq->mlcq_eq;
+ } else {
+ mleq = NULL;
+ }
+
+ /* Return any outstanding buffers to the free pool. */
+ while ((b = list_remove_head(&mlcq->mlcq_buffers)) != NULL) {
+ mlxcx_buf_return_chain(mlxp, b, B_FALSE);
+ }
+ mutex_enter(&mlcq->mlcq_bufbmtx);
+ while ((b = list_remove_head(&mlcq->mlcq_buffers_b)) != NULL) {
+ mlxcx_buf_return_chain(mlxp, b, B_FALSE);
+ }
+ mutex_exit(&mlcq->mlcq_bufbmtx);
+
+ /*
+ * Since the interrupt handlers take the EQ lock before the CQ one,
+ * we must do the same here. That means letting go of the lock
+ * for a brief window here (we'll double-check the state when we
+ * get back in).
+ */
+ mutex_exit(&mlcq->mlcq_mtx);
+
+ if (mleq != NULL) {
+ mutex_enter(&mleq->mleq_mtx);
+ mutex_enter(&mlcq->mlcq_mtx);
+ /*
+ * Double-check the state, we let go of the
+ * mutex briefly.
+ */
+ if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) {
+ avl_remove(&mleq->mleq_cqs, mlcq);
+ mlcq->mlcq_state &= ~MLXCX_CQ_EQAVL;
+ }
+ mutex_exit(&mlcq->mlcq_mtx);
+ mutex_exit(&mleq->mleq_mtx);
+ }
+
+ mutex_enter(&mlcq->mlcq_mtx);
+ ASSERT0(mlcq->mlcq_state & ~(MLXCX_CQ_CREATED | MLXCX_CQ_DESTROYED |
+ MLXCX_CQ_TEARDOWN | MLXCX_CQ_ARMED));
+ mutex_exit(&mlcq->mlcq_mtx);
+
+ mutex_destroy(&mlcq->mlcq_mtx);
+ mutex_destroy(&mlcq->mlcq_bufbmtx);
+ list_destroy(&mlcq->mlcq_buffers);
+ list_destroy(&mlcq->mlcq_buffers_b);
+ kmem_free(mlcq, sizeof (mlxcx_completion_queue_t));
+}
+
+static boolean_t
+mlxcx_cq_setup(mlxcx_t *mlxp, mlxcx_event_queue_t *eq,
+ mlxcx_completion_queue_t **cqp)
+{
+ mlxcx_completion_queue_t *cq;
+
+ cq = kmem_zalloc(sizeof (mlxcx_completion_queue_t), KM_SLEEP);
+ mutex_init(&cq->mlcq_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ mutex_init(&cq->mlcq_bufbmtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ list_create(&cq->mlcq_buffers, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_cq_entry));
+ list_create(&cq->mlcq_buffers_b, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_cq_entry));
+
+ cq->mlcq_mlx = mlxp;
+ list_insert_tail(&mlxp->mlx_cqs, cq);
+
+ mutex_enter(&cq->mlcq_mtx);
+
+ if (!mlxcx_cq_alloc_dma(mlxp, cq)) {
+ mutex_exit(&cq->mlcq_mtx);
+ return (B_FALSE);
+ }
+
+ cq->mlcq_bufhwm = cq->mlcq_nents - MLXCX_CQ_HWM_GAP;
+ cq->mlcq_buflwm = cq->mlcq_nents - MLXCX_CQ_LWM_GAP;
+
+ cq->mlcq_uar = &mlxp->mlx_uar;
+ cq->mlcq_eq = eq;
+
+ cq->mlcq_cqemod_period_usec = mlxp->mlx_props.mldp_cqemod_period_usec;
+ cq->mlcq_cqemod_count = mlxp->mlx_props.mldp_cqemod_count;
+
+ if (!mlxcx_cmd_create_cq(mlxp, cq)) {
+ mutex_exit(&cq->mlcq_mtx);
+ return (B_FALSE);
+ }
+
+ mutex_exit(&cq->mlcq_mtx);
+
+ mutex_enter(&eq->mleq_mtx);
+ mutex_enter(&cq->mlcq_mtx);
+ ASSERT0(cq->mlcq_state & MLXCX_CQ_EQAVL);
+ avl_add(&eq->mleq_cqs, cq);
+ cq->mlcq_state |= MLXCX_CQ_EQAVL;
+ mlxcx_arm_cq(mlxp, cq);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&eq->mleq_mtx);
+
+ *cqp = cq;
+ return (B_TRUE);
+}
+
+static boolean_t
+mlxcx_rq_setup(mlxcx_t *mlxp, mlxcx_completion_queue_t *cq,
+ mlxcx_work_queue_t *wq)
+{
+ mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+
+ list_insert_tail(&mlxp->mlx_wqs, wq);
+
+ mutex_enter(&wq->mlwq_mtx);
+
+ wq->mlwq_mlx = mlxp;
+ wq->mlwq_type = MLXCX_WQ_TYPE_RECVQ;
+ wq->mlwq_cq = cq;
+ wq->mlwq_pd = &mlxp->mlx_pd;
+ wq->mlwq_uar = &mlxp->mlx_uar;
+
+ wq->mlwq_bufs = mlxcx_mlbs_create(mlxp);
+
+ if (!mlxcx_wq_alloc_dma(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ return (B_FALSE);
+ }
+
+ if (!mlxcx_cmd_create_rq(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ return (B_FALSE);
+ }
+
+ mutex_exit(&wq->mlwq_mtx);
+
+ mutex_enter(&cq->mlcq_mtx);
+ mutex_enter(&wq->mlwq_mtx);
+ ASSERT3P(cq->mlcq_wq, ==, NULL);
+ cq->mlcq_wq = wq;
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
+ return (B_TRUE);
+}
+
+static boolean_t
+mlxcx_sq_setup(mlxcx_t *mlxp, mlxcx_port_t *port, mlxcx_completion_queue_t *cq,
+ mlxcx_tis_t *tis, mlxcx_work_queue_t *wq)
+{
+ mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+
+ list_insert_tail(&mlxp->mlx_wqs, wq);
+
+ mutex_enter(&wq->mlwq_mtx);
+
+ wq->mlwq_mlx = mlxp;
+ wq->mlwq_type = MLXCX_WQ_TYPE_SENDQ;
+ wq->mlwq_cq = cq;
+ wq->mlwq_pd = &mlxp->mlx_pd;
+ wq->mlwq_uar = &mlxp->mlx_uar;
+ wq->mlwq_tis = tis;
+
+ wq->mlwq_bufs = mlxcx_mlbs_create(mlxp);
+ wq->mlwq_foreign_bufs = mlxcx_mlbs_create(mlxp);
+
+ VERIFY3U(port->mlp_wqe_min_inline, <=, MLXCX_ETH_INLINE_L2);
+ wq->mlwq_inline_mode = MLXCX_ETH_INLINE_L2;
+
+ if (!mlxcx_wq_alloc_dma(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ return (B_FALSE);
+ }
+
+ if (!mlxcx_cmd_create_sq(mlxp, wq)) {
+ mutex_exit(&wq->mlwq_mtx);
+ return (B_FALSE);
+ }
+
+ mutex_exit(&wq->mlwq_mtx);
+
+ mutex_enter(&cq->mlcq_mtx);
+ mutex_enter(&wq->mlwq_mtx);
+ ASSERT3P(cq->mlcq_wq, ==, NULL);
+ cq->mlcq_wq = wq;
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
+ return (B_TRUE);
+}
+
+void
+mlxcx_teardown_rx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_work_queue_t *wq;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_flow_entry_t *fe;
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_table_t *ft;
+ uint_t i;
+
+ mutex_enter(&g->mlg_port->mlp_mtx);
+ mutex_enter(&g->mlg_mtx);
+
+ if (g->mlg_state & MLXCX_GROUP_FLOWS) {
+ mlxcx_remove_all_umcast_entries(mlxp, g->mlg_port, g);
+
+ if (g->mlg_rx_vlan_ft != NULL)
+ mlxcx_remove_all_vlan_entries(mlxp, g);
+
+ if (g == &mlxp->mlx_rx_groups[0]) {
+ ft = g->mlg_port->mlp_rx_flow;
+ mutex_enter(&ft->mlft_mtx);
+
+ fg = g->mlg_port->mlp_bcast;
+ fe = list_head(&fg->mlfg_entries);
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ (void) mlxcx_cmd_delete_flow_table_entry(
+ mlxp, fe);
+ }
+
+ fg = g->mlg_port->mlp_promisc;
+ fe = list_head(&fg->mlfg_entries);
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ (void) mlxcx_cmd_delete_flow_table_entry(
+ mlxp, fe);
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+ }
+
+ if (g->mlg_rx_vlan_ft != NULL) {
+ mutex_enter(&g->mlg_rx_vlan_ft->mlft_mtx);
+ ASSERT(list_is_empty(&g->mlg_rx_vlans));
+ fg = g->mlg_rx_vlan_def_fg;
+ fe = list_head(&fg->mlfg_entries);
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ (void) mlxcx_cmd_delete_flow_table_entry(
+ mlxp, fe);
+ }
+ fg = g->mlg_rx_vlan_promisc_fg;
+ fe = list_head(&fg->mlfg_entries);
+ if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
+ (void) mlxcx_cmd_delete_flow_table_entry(
+ mlxp, fe);
+ }
+ mlxcx_teardown_flow_table(mlxp, g->mlg_rx_vlan_ft);
+ list_destroy(&g->mlg_rx_vlans);
+
+ g->mlg_rx_vlan_ft = NULL;
+ }
+
+ mutex_enter(&g->mlg_rx_hash_ft->mlft_mtx);
+ mlxcx_teardown_flow_table(mlxp, g->mlg_rx_hash_ft);
+ g->mlg_rx_hash_ft = NULL;
+
+ avl_destroy(&g->mlg_rx_macs);
+ g->mlg_state &= ~MLXCX_GROUP_FLOWS;
+ }
+
+ if (g->mlg_state & MLXCX_GROUP_RUNNING) {
+ for (i = 0; i < g->mlg_nwqs; ++i) {
+ wq = &g->mlg_wqs[i];
+ mutex_enter(&wq->mlwq_mtx);
+ if (wq->mlwq_state & MLXCX_WQ_STARTED &&
+ !mlxcx_cmd_stop_rq(mlxp, wq)) {
+ mlxcx_warn(mlxp, "failed to stop rq %x",
+ wq->mlwq_num);
+ }
+ mutex_exit(&wq->mlwq_mtx);
+ }
+ g->mlg_state &= ~MLXCX_GROUP_RUNNING;
+ }
+
+ if (g->mlg_state & MLXCX_GROUP_TIRTIS) {
+ for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) {
+ mlxcx_tir_t *tir = &g->mlg_tir[i];
+ if (tir->mltir_state & MLXCX_TIR_CREATED &&
+ !(tir->mltir_state & MLXCX_TIR_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_tir(mlxp, tir)) {
+ mlxcx_warn(mlxp,
+ "failed to destroy tir %u "
+ "for rx ring", tir->mltir_num);
+ }
+ }
+ }
+ g->mlg_state &= ~MLXCX_GROUP_TIRTIS;
+ }
+
+ if (g->mlg_state & MLXCX_GROUP_RQT) {
+ if (g->mlg_rqt->mlrqt_state & MLXCX_RQT_CREATED &&
+ !(g->mlg_rqt->mlrqt_state & MLXCX_RQT_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_rqt(mlxp, g->mlg_rqt)) {
+ mlxcx_warn(mlxp, "failed to destroy rqt %u "
+ "for rx ring", g->mlg_rqt->mlrqt_num);
+ }
+ kmem_free(g->mlg_rqt->mlrqt_rq,
+ g->mlg_rqt->mlrqt_rq_size);
+ g->mlg_rqt->mlrqt_rq = NULL;
+ kmem_free(g->mlg_rqt, sizeof (mlxcx_rqtable_t));
+ g->mlg_rqt = NULL;
+ }
+ g->mlg_state &= ~MLXCX_GROUP_RQT;
+ }
+
+ for (i = 0; i < g->mlg_nwqs; ++i) {
+ wq = &g->mlg_wqs[i];
+ cq = wq->mlwq_cq;
+ mlxcx_wq_teardown(mlxp, wq);
+ if (cq != NULL)
+ mlxcx_cq_teardown(mlxp, cq);
+ }
+ kmem_free(g->mlg_wqs, g->mlg_wqs_size);
+ g->mlg_wqs = NULL;
+ g->mlg_state &= ~MLXCX_GROUP_WQS;
+
+ mutex_exit(&g->mlg_mtx);
+ mutex_exit(&g->mlg_port->mlp_mtx);
+
+ mutex_destroy(&g->mlg_mtx);
+
+ g->mlg_state &= ~MLXCX_GROUP_INIT;
+ ASSERT3S(g->mlg_state, ==, 0);
+}
+
+void
+mlxcx_teardown_tx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_work_queue_t *wq;
+ mlxcx_completion_queue_t *cq;
+ uint_t i;
+
+ mutex_enter(&g->mlg_mtx);
+
+ if (g->mlg_state & MLXCX_GROUP_WQS) {
+ for (i = 0; i < g->mlg_nwqs; ++i) {
+ wq = &g->mlg_wqs[i];
+ mutex_enter(&wq->mlwq_mtx);
+ cq = wq->mlwq_cq;
+ if (wq->mlwq_state & MLXCX_WQ_STARTED &&
+ !mlxcx_cmd_stop_sq(mlxp, wq)) {
+ mlxcx_warn(mlxp, "failed to stop sq %x",
+ wq->mlwq_num);
+ }
+ mutex_exit(&wq->mlwq_mtx);
+ mlxcx_wq_teardown(mlxp, wq);
+ if (cq != NULL)
+ mlxcx_cq_teardown(mlxp, cq);
+ }
+ g->mlg_state &= ~MLXCX_GROUP_RUNNING;
+ kmem_free(g->mlg_wqs, g->mlg_wqs_size);
+ g->mlg_wqs = NULL;
+ g->mlg_state &= ~MLXCX_GROUP_WQS;
+ }
+
+ if ((g->mlg_state & MLXCX_GROUP_TIRTIS) &&
+ g->mlg_tis.mltis_state & MLXCX_TIS_CREATED &&
+ !(g->mlg_tis.mltis_state & MLXCX_TIS_DESTROYED)) {
+ if (!mlxcx_cmd_destroy_tis(mlxp, &g->mlg_tis)) {
+ mlxcx_warn(mlxp, "failed to destroy tis %u for tx ring",
+ g->mlg_tis.mltis_num);
+ }
+ }
+ g->mlg_state &= ~MLXCX_GROUP_TIRTIS;
+
+ mutex_exit(&g->mlg_mtx);
+ mutex_destroy(&g->mlg_mtx);
+ g->mlg_state &= ~MLXCX_GROUP_INIT;
+ ASSERT3S(g->mlg_state, ==, 0);
+}
+
+void
+mlxcx_teardown_groups(mlxcx_t *mlxp)
+{
+ mlxcx_ring_group_t *g;
+ uint_t i;
+
+ for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) {
+ g = &mlxp->mlx_rx_groups[i];
+ if (!(g->mlg_state & MLXCX_GROUP_INIT))
+ continue;
+ ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_RX);
+ mlxcx_teardown_rx_group(mlxp, g);
+ }
+ kmem_free(mlxp->mlx_rx_groups, mlxp->mlx_rx_groups_size);
+ mlxp->mlx_rx_groups = NULL;
+
+ for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) {
+ g = &mlxp->mlx_tx_groups[i];
+ if (!(g->mlg_state & MLXCX_GROUP_INIT))
+ continue;
+ ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_TX);
+ mlxcx_teardown_tx_group(mlxp, g);
+ }
+ kmem_free(mlxp->mlx_tx_groups, mlxp->mlx_tx_groups_size);
+ mlxp->mlx_tx_groups = NULL;
+}
+
+boolean_t
+mlxcx_rx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_event_queue_t *eq;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_work_queue_t *rq;
+ mlxcx_flow_table_t *ft;
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe;
+ uint_t i, j;
+
+ ASSERT3S(g->mlg_state, ==, 0);
+
+ mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ mutex_enter(&g->mlg_mtx);
+ g->mlg_mlx = mlxp;
+ g->mlg_type = MLXCX_GROUP_RX;
+ g->mlg_port = &mlxp->mlx_ports[0];
+ g->mlg_state |= MLXCX_GROUP_INIT;
+
+ g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_small_group;
+ i = g - &mlxp->mlx_rx_groups[0];
+ if (i < mlxp->mlx_props.mldp_rx_ngroups_large)
+ g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_large_group;
+
+ g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t);
+ g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP);
+ g->mlg_state |= MLXCX_GROUP_WQS;
+
+ g->mlg_rqt = kmem_zalloc(sizeof (mlxcx_rqtable_t), KM_SLEEP);
+ g->mlg_rqt->mlrqt_max = 2;
+ while (g->mlg_rqt->mlrqt_max < g->mlg_nwqs)
+ g->mlg_rqt->mlrqt_max <<= 1;
+ g->mlg_rqt->mlrqt_rq_size = g->mlg_rqt->mlrqt_max *
+ sizeof (mlxcx_work_queue_t *);
+ g->mlg_rqt->mlrqt_rq = kmem_zalloc(g->mlg_rqt->mlrqt_rq_size, KM_SLEEP);
+ g->mlg_state |= MLXCX_GROUP_RQT;
+
+ for (i = 0; i < g->mlg_nwqs; ++i) {
+ eq = NULL;
+ while (eq == NULL) {
+ eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++];
+ if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count)
+ mlxp->mlx_next_eq = 1;
+ if (eq->mleq_type != MLXCX_EQ_TYPE_ANY &&
+ eq->mleq_type != MLXCX_EQ_TYPE_RX) {
+ /* Try the next one */
+ eq = NULL;
+ }
+ }
+
+ if (!mlxcx_cq_setup(mlxp, eq, &cq)) {
+ g->mlg_nwqs = i;
+ break;
+ }
+ cq->mlcq_stats = &g->mlg_port->mlp_stats;
+
+ rq = &g->mlg_wqs[i];
+ if (!mlxcx_rq_setup(mlxp, cq, rq)) {
+ g->mlg_nwqs = i;
+ break;
+ }
+ g->mlg_rqt->mlrqt_rq[g->mlg_rqt->mlrqt_used++] = rq;
+ g->mlg_rqt->mlrqt_state |= MLXCX_RQT_DIRTY;
+ rq->mlwq_group = g;
+ }
+ if (g->mlg_nwqs == 0) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ if (!mlxcx_cmd_create_rqt(mlxp, g->mlg_rqt)) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) {
+ mlxcx_tir_t *tir = &g->mlg_tir[i];
+ tir->mltir_tdom = &mlxp->mlx_tdom;
+ switch (i) {
+ case MLXCX_TIR_ROLE_OTHER:
+ tir->mltir_type = MLXCX_TIR_DIRECT;
+ tir->mltir_rq = &g->mlg_wqs[0];
+ break;
+ case MLXCX_TIR_ROLE_IPv4:
+ case MLXCX_TIR_ROLE_IPv6:
+ case MLXCX_TIR_ROLE_TCPv4:
+ case MLXCX_TIR_ROLE_TCPv6:
+ case MLXCX_TIR_ROLE_UDPv4:
+ case MLXCX_TIR_ROLE_UDPv6:
+ tir->mltir_type = MLXCX_TIR_INDIRECT;
+ tir->mltir_rqtable = g->mlg_rqt;
+ tir->mltir_hash_fn = MLXCX_TIR_HASH_TOEPLITZ;
+ (void) random_get_pseudo_bytes(tir->mltir_toeplitz_key,
+ sizeof (tir->mltir_toeplitz_key));
+ break;
+ }
+ switch (i) {
+ case MLXCX_TIR_ROLE_OTHER:
+ break;
+ case MLXCX_TIR_ROLE_IPv4:
+ case MLXCX_TIR_ROLE_TCPv4:
+ case MLXCX_TIR_ROLE_UDPv4:
+ tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv4;
+ tir->mltir_hash_fields =
+ MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP;
+ break;
+ case MLXCX_TIR_ROLE_IPv6:
+ case MLXCX_TIR_ROLE_TCPv6:
+ case MLXCX_TIR_ROLE_UDPv6:
+ tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv6;
+ tir->mltir_hash_fields =
+ MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP;
+ break;
+ }
+ switch (i) {
+ case MLXCX_TIR_ROLE_OTHER:
+ case MLXCX_TIR_ROLE_IPv4:
+ case MLXCX_TIR_ROLE_IPv6:
+ break;
+ case MLXCX_TIR_ROLE_TCPv4:
+ case MLXCX_TIR_ROLE_TCPv6:
+ tir->mltir_l4_type = MLXCX_RX_HASH_L4_TCP;
+ tir->mltir_hash_fields |=
+ MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT;
+ break;
+ case MLXCX_TIR_ROLE_UDPv4:
+ case MLXCX_TIR_ROLE_UDPv6:
+ tir->mltir_l4_type = MLXCX_RX_HASH_L4_UDP;
+ tir->mltir_hash_fields |=
+ MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT;
+ break;
+ }
+
+ if (!mlxcx_cmd_create_tir(mlxp, tir)) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ g->mlg_state |= MLXCX_GROUP_TIRTIS;
+ }
+
+ /*
+ * Flow table: our RX hashing breakout table for RSS
+ */
+
+ g->mlg_rx_hash_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t),
+ KM_SLEEP));
+ mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ avl_create(&g->mlg_rx_macs, mlxcx_grmac_compare,
+ sizeof (mlxcx_group_mac_t),
+ offsetof(mlxcx_group_mac_t, mlgm_group_entry));
+ g->mlg_state |= MLXCX_GROUP_FLOWS;
+
+ mutex_enter(&ft->mlft_mtx);
+
+ ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX;
+ ft->mlft_level = 2;
+ ft->mlft_port = g->mlg_port;
+ ft->mlft_entshift = MLXCX_RX_HASH_FT_SIZE_SHIFT;
+ ft->mlft_nents = (1 << ft->mlft_entshift);
+ ASSERT3U(ft->mlft_nents, >=, MLXCX_TIRS_PER_GROUP);
+ ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t);
+ ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP);
+ list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t),
+ offsetof(mlxcx_flow_group_t, mlfg_entry));
+
+ for (j = 0; j < ft->mlft_nents; ++j) {
+ ft->mlft_ent[j].mlfe_table = ft;
+ ft->mlft_ent[j].mlfe_index = j;
+ }
+
+ if (!mlxcx_cmd_create_flow_table(mlxp, ft)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 6;
+ fe->mlfe_ip_proto = IPPROTO_UDP;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_UDPv6];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 4;
+ fe->mlfe_ip_proto = IPPROTO_UDP;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_UDPv4];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 6;
+ fe->mlfe_ip_proto = IPPROTO_TCP;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_TCPv6];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 4;
+ fe->mlfe_ip_proto = IPPROTO_TCP;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_TCPv4];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 6;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_IPv6];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ip_version = 4;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_IPv4];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir =
+ &g->mlg_tir[MLXCX_TIR_ROLE_OTHER];
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ mutex_exit(&ft->mlft_mtx);
+
+ /*
+ * Flow table: the VLAN breakout table for doing VLAN filtering after
+ * we've matched a MAC address.
+ */
+
+ g->mlg_rx_vlan_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t),
+ KM_SLEEP));
+ mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ list_create(&g->mlg_rx_vlans, sizeof (mlxcx_group_vlan_t),
+ offsetof(mlxcx_group_vlan_t, mlgv_entry));
+
+ mutex_enter(&ft->mlft_mtx);
+
+ ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX;
+ ft->mlft_level = 1;
+ ft->mlft_port = g->mlg_port;
+ ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_vlan_size_shift;
+ ft->mlft_nents = (1 << ft->mlft_entshift);
+ ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t);
+ ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP);
+ list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t),
+ offsetof(mlxcx_flow_group_t, mlfg_entry));
+
+ for (j = 0; j < ft->mlft_nents; ++j) {
+ fe = &ft->mlft_ent[j];
+ fe->mlfe_table = ft;
+ fe->mlfe_index = j;
+ fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft;
+ }
+
+ if (!mlxcx_cmd_create_flow_table(mlxp, ft)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ /* First group is all actual matched VLANs */
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ g->mlg_rx_vlan_fg = fg;
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = ft->mlft_nents - 2;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_VLAN;
+ fg->mlfg_mask |= MLXCX_FLOW_MATCH_VID;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ /*
+ * Then the "default" entry which we enable when we have no VLAN IDs
+ * added to the group (we start with this enabled).
+ */
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ g->mlg_rx_vlan_def_fg = fg;
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ /*
+ * Finally, the promisc entry which points at the *hash ft* from the
+ * default group. We only enable this when we have promisc on.
+ */
+ fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
+ g->mlg_rx_vlan_promisc_fg = fg;
+ list_insert_tail(&ft->mlft_groups, fg);
+ fg->mlfg_table = ft;
+ fg->mlfg_size = 1;
+ if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_ndest = 1;
+ fe->mlfe_dest[0].mlfed_flow = mlxp->mlx_rx_groups[0].mlg_rx_hash_ft;
+
+ mutex_exit(&ft->mlft_mtx);
+
+ mutex_exit(&g->mlg_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
+ mlxcx_work_queue_t *rq)
+{
+ uint_t j;
+ mlxcx_buffer_t *b;
+ mlxcx_completion_queue_t *cq;
+
+ mutex_enter(&g->mlg_mtx);
+ /*
+ * Sadly, even though MAC has the mgi_start callback, it is not always
+ * called -- in particular when we are being managed under an aggr, the
+ * mgi_start callback will only ever be called on the default group.
+ *
+ * So instead of asserting about the group state here, we have to
+ * check it and call group start if needed.
+ */
+ if (!(g->mlg_state & MLXCX_GROUP_RUNNING)) {
+ mutex_exit(&g->mlg_mtx);
+ if (!mlxcx_rx_group_start(mlxp, g))
+ return (B_FALSE);
+ mutex_enter(&g->mlg_mtx);
+ }
+ ASSERT(g->mlg_state & MLXCX_GROUP_RUNNING);
+
+ cq = rq->mlwq_cq;
+ ASSERT(cq != NULL);
+
+ mutex_enter(&cq->mlcq_mtx);
+ mutex_enter(&rq->mlwq_mtx);
+
+ if (rq->mlwq_state & MLXCX_WQ_STARTED) {
+ mutex_exit(&rq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_TRUE);
+ }
+
+ if (!mlxcx_cmd_start_rq(mlxp, rq)) {
+ mutex_exit(&rq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ ASSERT(rq->mlwq_state & MLXCX_WQ_STARTED);
+
+ ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
+ rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+
+ for (j = 0; j < rq->mlwq_nents; ++j) {
+ if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
+ break;
+ mlxcx_buf_return(mlxp, b);
+ }
+ for (j = 0; j < rq->mlwq_nents / 2; ++j) {
+ if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
+ break;
+ mlxcx_buf_return(mlxp, b);
+ }
+
+ mlxcx_rq_refill(mlxp, rq);
+
+ mutex_exit(&rq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_rx_group_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_flow_table_t *ft;
+ mlxcx_flow_group_t *fg;
+ mlxcx_flow_entry_t *fe;
+
+ mutex_enter(&g->mlg_mtx);
+
+ if (g->mlg_state & MLXCX_GROUP_RUNNING) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_TRUE);
+ }
+
+ ASSERT0(g->mlg_state & MLXCX_GROUP_RUNNING);
+
+ g->mlg_state |= MLXCX_GROUP_RUNNING;
+
+ if (g == &mlxp->mlx_rx_groups[0]) {
+ ft = g->mlg_port->mlp_rx_flow;
+ mutex_enter(&ft->mlft_mtx);
+
+ /*
+ * Broadcast and promisc entries go directly to group 0's
+ * RSS hash fanout flow table. They bypass VLAN filtering.
+ */
+ fg = g->mlg_port->mlp_bcast;
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft;
+ if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
+ mutex_exit(&ft->mlft_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ fg = g->mlg_port->mlp_promisc;
+ fe = list_head(&fg->mlfg_entries);
+ fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft;
+ /*
+ * Don't actually set the promisc entry until promisc is
+ * enabled.
+ */
+
+ mutex_exit(&ft->mlft_mtx);
+ }
+
+ mutex_exit(&g->mlg_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_tx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
+{
+ mlxcx_event_queue_t *eq;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_work_queue_t *sq;
+ uint_t i;
+
+ ASSERT3S(g->mlg_state, ==, 0);
+
+ mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ g->mlg_state |= MLXCX_GROUP_INIT;
+ mutex_enter(&g->mlg_mtx);
+
+ g->mlg_mlx = mlxp;
+ g->mlg_type = MLXCX_GROUP_TX;
+ g->mlg_port = &mlxp->mlx_ports[0];
+
+ g->mlg_nwqs = mlxp->mlx_props.mldp_tx_nrings_per_group;
+ g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t);
+ g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP);
+ g->mlg_state |= MLXCX_GROUP_WQS;
+
+ g->mlg_tis.mltis_tdom = &mlxp->mlx_tdom;
+
+ if (!mlxcx_cmd_create_tis(mlxp, &g->mlg_tis)) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+
+ g->mlg_state |= MLXCX_GROUP_TIRTIS;
+
+ for (i = 0; i < g->mlg_nwqs; ++i) {
+ eq = NULL;
+ while (eq == NULL) {
+ eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++];
+ if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count)
+ mlxp->mlx_next_eq = 1;
+ if (eq->mleq_type != MLXCX_EQ_TYPE_ANY &&
+ eq->mleq_type != MLXCX_EQ_TYPE_TX) {
+ /* Try the next one */
+ eq = NULL;
+ }
+ }
+
+ if (!mlxcx_cq_setup(mlxp, eq, &cq))
+ return (B_FALSE);
+ cq->mlcq_stats = &g->mlg_port->mlp_stats;
+
+ sq = &g->mlg_wqs[i];
+ if (!mlxcx_sq_setup(mlxp, g->mlg_port, cq, &g->mlg_tis, sq)) {
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ sq->mlwq_group = g;
+ }
+
+ mutex_exit(&g->mlg_mtx);
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
+ mlxcx_work_queue_t *sq)
+{
+ uint_t i;
+ mlxcx_buffer_t *b;
+ mlxcx_completion_queue_t *cq;
+
+ mutex_enter(&g->mlg_mtx);
+
+ cq = sq->mlwq_cq;
+ ASSERT(cq != NULL);
+
+ mutex_enter(&cq->mlcq_mtx);
+ mutex_enter(&sq->mlwq_mtx);
+ if (sq->mlwq_state & MLXCX_WQ_STARTED) {
+ mutex_exit(&sq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_TRUE);
+ }
+
+ ASSERT0(sq->mlwq_state & MLXCX_WQ_BUFFERS);
+ for (i = 0; i < sq->mlwq_nents; ++i) {
+ if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b))
+ break;
+ mlxcx_buf_return(mlxp, b);
+ }
+ for (i = 0; i < sq->mlwq_nents / 2; ++i) {
+ if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b))
+ break;
+ mlxcx_buf_return(mlxp, b);
+ }
+ for (i = 0; i < sq->mlwq_nents; ++i) {
+ if (!mlxcx_buf_create(mlxp, sq->mlwq_bufs, &b))
+ break;
+ mlxcx_buf_return(mlxp, b);
+ }
+ sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+
+ if (!mlxcx_cmd_start_sq(mlxp, sq)) {
+ mutex_exit(&sq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+ return (B_FALSE);
+ }
+ g->mlg_state |= MLXCX_GROUP_RUNNING;
+
+ (void) mlxcx_sq_add_nop(mlxp, sq);
+
+ mutex_exit(&sq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+ mutex_exit(&g->mlg_mtx);
+
+ return (B_TRUE);
+}
+
+static boolean_t
+mlxcx_sq_ring_dbell(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, uint_t first)
+{
+ uint_t idx;
+ mlxcx_bf_t *bf;
+ ddi_fm_error_t err;
+ uint_t try = 0;
+
+ ASSERT3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ);
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+
+ mlwq->mlwq_doorbell->mlwqd_send_counter = to_be16(mlwq->mlwq_pc);
+
+ ASSERT(mlwq->mlwq_cq != NULL);
+ ASSERT(mlwq->mlwq_cq->mlcq_eq != NULL);
+ idx = mlwq->mlwq_cq->mlcq_eq->mleq_intr_index & MLXCX_BF_PER_UAR_MASK;
+ bf = &mlwq->mlwq_uar->mlu_bf[idx];
+
+retry:
+ MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ if (try++ < mlxcx_doorbell_tries) {
+ ddi_fm_dma_err_clear(
+ mlwq->mlwq_doorbell_dma.mxdb_dma_handle,
+ DDI_FME_VERSION);
+ goto retry;
+ } else {
+ goto err;
+ }
+ }
+
+ mlxcx_put64(mlxp, bf->mbf_even, from_be64(
+ mlwq->mlwq_bf_ent[first].mlsqbf_qwords[0]));
+ ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status == DDI_FM_OK)
+ return (B_TRUE);
+ if (try++ < mlxcx_doorbell_tries) {
+ ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
+ goto retry;
+ }
+
+err:
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
+ return (B_FALSE);
+}
+
+boolean_t
+mlxcx_sq_add_nop(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ uint_t index, start_pc;
+ mlxcx_sendq_ent_t *ent0;
+ ddi_fm_error_t err;
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+
+ index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1);
+ ent0 = &mlwq->mlwq_send_ent[index];
+ start_pc = mlwq->mlwq_pc;
+ ++mlwq->mlwq_pc;
+
+ bzero(ent0, sizeof (mlxcx_sendq_ent_t));
+ ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_NOP;
+ ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num);
+ ent0->mlsqe_control.mlcs_wqe_index = to_be16(start_pc);
+
+ set_bits8(&ent0->mlsqe_control.mlcs_flags,
+ MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_NONE);
+ set_bits8(&ent0->mlsqe_control.mlcs_flags,
+ MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS);
+
+ ent0->mlsqe_control.mlcs_ds = 1;
+
+ VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
+ (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
+ sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV));
+ ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ return (B_FALSE);
+ }
+ if (!mlxcx_sq_ring_dbell(mlxp, mlwq, index)) {
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
+ uint8_t *inlinehdrs, size_t inlinelen, uint32_t chkflags,
+ mlxcx_buffer_t *b0)
+{
+ uint_t index, first, ents = 0;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_sendq_ent_t *ent0;
+ mlxcx_sendq_extra_ent_t *ent;
+ mlxcx_wqe_data_seg_t *seg;
+ uint_t ptri, nptr;
+ const ddi_dma_cookie_t *c;
+ size_t rem;
+ mlxcx_buffer_t *b;
+ ddi_fm_error_t err;
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ ASSERT3P(b0->mlb_tx_head, ==, b0);
+ ASSERT3U(b0->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
+ cq = mlwq->mlwq_cq;
+
+ index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1);
+ ent0 = &mlwq->mlwq_send_ent[index];
+ b0->mlb_wqe_index = mlwq->mlwq_pc;
+ ++mlwq->mlwq_pc;
+ ++ents;
+
+ first = index;
+
+ mutex_enter(&cq->mlcq_bufbmtx);
+ list_insert_tail(&cq->mlcq_buffers_b, b0);
+ atomic_inc_64(&cq->mlcq_bufcnt);
+ mutex_exit(&cq->mlcq_bufbmtx);
+
+ bzero(ent0, sizeof (mlxcx_sendq_ent_t));
+ ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_SEND;
+ ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num);
+ ent0->mlsqe_control.mlcs_wqe_index = to_be16(b0->mlb_wqe_index);
+
+ set_bits8(&ent0->mlsqe_control.mlcs_flags,
+ MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_WAIT_OTHERS);
+ set_bits8(&ent0->mlsqe_control.mlcs_flags,
+ MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS);
+
+ VERIFY3U(inlinelen, <=, sizeof (ent0->mlsqe_eth.mles_inline_headers));
+ set_bits16(&ent0->mlsqe_eth.mles_szflags,
+ MLXCX_SQE_ETH_INLINE_HDR_SZ, inlinelen);
+ if (inlinelen > 0) {
+ bcopy(inlinehdrs, ent0->mlsqe_eth.mles_inline_headers,
+ inlinelen);
+ }
+
+ ent0->mlsqe_control.mlcs_ds =
+ offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16;
+
+ if (chkflags & HCK_IPV4_HDRCKSUM) {
+ ASSERT(mlxp->mlx_caps->mlc_checksum);
+ set_bit8(&ent0->mlsqe_eth.mles_csflags,
+ MLXCX_SQE_ETH_CSFLAG_L3_CHECKSUM);
+ }
+ if (chkflags & HCK_FULLCKSUM) {
+ ASSERT(mlxp->mlx_caps->mlc_checksum);
+ set_bit8(&ent0->mlsqe_eth.mles_csflags,
+ MLXCX_SQE_ETH_CSFLAG_L4_CHECKSUM);
+ }
+
+ b = b0;
+ ptri = 0;
+ nptr = sizeof (ent0->mlsqe_data) / sizeof (mlxcx_wqe_data_seg_t);
+ seg = ent0->mlsqe_data;
+ while (b != NULL) {
+ rem = b->mlb_used;
+
+ c = NULL;
+ while (rem > 0 &&
+ (c = mlxcx_dma_cookie_iter(&b->mlb_dma, c)) != NULL) {
+ if (ptri >= nptr) {
+ index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1);
+ ent = &mlwq->mlwq_send_extra_ent[index];
+ ++mlwq->mlwq_pc;
+ ++ents;
+
+ seg = ent->mlsqe_data;
+ ptri = 0;
+ nptr = sizeof (ent->mlsqe_data) /
+ sizeof (mlxcx_wqe_data_seg_t);
+ }
+
+ seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey);
+ if (c->dmac_size > rem) {
+ seg->mlds_byte_count = to_be32(rem);
+ rem = 0;
+ } else {
+ seg->mlds_byte_count = to_be32(c->dmac_size);
+ rem -= c->dmac_size;
+ }
+ seg->mlds_address = to_be64(c->dmac_laddress);
+ ++seg;
+ ++ptri;
+ ++ent0->mlsqe_control.mlcs_ds;
+
+ ASSERT3U(ent0->mlsqe_control.mlcs_ds, <=,
+ MLXCX_SQE_MAX_DS);
+ }
+
+ if (b == b0) {
+ b = list_head(&b0->mlb_tx_chain);
+ } else {
+ b = list_next(&b0->mlb_tx_chain, b);
+ }
+ }
+
+ for (; ptri < nptr; ++ptri, ++seg) {
+ seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY);
+ seg->mlds_byte_count = to_be32(0);
+ seg->mlds_address = to_be64(0);
+ }
+
+ /*
+ * Make sure the workqueue entry is flushed out before updating
+ * the doorbell.
+ */
+ VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
+ (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent,
+ ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV));
+ ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ return (B_FALSE);
+ }
+ if (!mlxcx_sq_ring_dbell(mlxp, mlwq, first)) {
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_rq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
+ mlxcx_buffer_t *buf)
+{
+ return (mlxcx_rq_add_buffers(mlxp, mlwq, &buf, 1));
+}
+
+boolean_t
+mlxcx_rq_add_buffers(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq,
+ mlxcx_buffer_t **bufs, size_t nbufs)
+{
+ uint_t index;
+ mlxcx_recvq_ent_t *ent;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_wqe_data_seg_t *seg;
+ uint_t bi, ptri;
+ const ddi_dma_cookie_t *c;
+ mlxcx_buffer_t *buf;
+ ddi_fm_error_t err;
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ cq = mlwq->mlwq_cq;
+ ASSERT(mutex_owned(&cq->mlcq_mtx));
+
+ for (bi = 0; bi < nbufs; ++bi) {
+ buf = bufs[bi];
+ bufs[bi] = NULL;
+ ASSERT3U(buf->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
+
+ index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1);
+ ent = &mlwq->mlwq_recv_ent[index];
+ buf->mlb_wqe_index = mlwq->mlwq_pc;
+
+ ++mlwq->mlwq_pc;
+
+ mutex_enter(&cq->mlcq_bufbmtx);
+ list_insert_tail(&cq->mlcq_buffers, buf);
+ atomic_inc_64(&cq->mlcq_bufcnt);
+ mutex_exit(&cq->mlcq_bufbmtx);
+
+ ASSERT3U(buf->mlb_dma.mxdb_ncookies, <=, MLXCX_RECVQ_MAX_PTRS);
+ ptri = 0;
+ c = NULL;
+ while ((c = mlxcx_dma_cookie_iter(&buf->mlb_dma, c)) != NULL) {
+ seg = &ent->mlrqe_data[ptri++];
+ seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey);
+ seg->mlds_byte_count = to_be32(c->dmac_size);
+ seg->mlds_address = to_be64(c->dmac_laddress);
+ }
+ /*
+ * Fill any unused scatter pointers with the special null
+ * value.
+ */
+ for (; ptri < MLXCX_RECVQ_MAX_PTRS; ++ptri) {
+ seg = &ent->mlrqe_data[ptri];
+ seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY);
+ seg->mlds_byte_count = to_be32(0);
+ seg->mlds_address = to_be64(0);
+ }
+
+ /*
+ * Make sure the workqueue entry is flushed out before updating
+ * the doorbell.
+ */
+ VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle,
+ (uintptr_t)ent - (uintptr_t)mlwq->mlwq_recv_ent,
+ sizeof (mlxcx_recvq_ent_t), DDI_DMA_SYNC_FORDEV));
+ ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ return (B_FALSE);
+ }
+ }
+
+ mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(mlwq->mlwq_pc);
+ /*
+ * Flush the CQ doorbell as well so that HW knows how many
+ * completions we've consumed.
+ */
+ MLXCX_DMA_SYNC(cq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(cq->mlcq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ return (B_FALSE);
+ }
+ MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+void
+mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
+{
+ size_t target, current, want, done, n;
+ mlxcx_completion_queue_t *cq;
+ mlxcx_buffer_t *b[MLXCX_RQ_REFILL_STEP];
+ uint_t i;
+
+ ASSERT(mutex_owned(&mlwq->mlwq_mtx));
+ cq = mlwq->mlwq_cq;
+ ASSERT(mutex_owned(&cq->mlcq_mtx));
+
+ ASSERT(mlwq->mlwq_state & MLXCX_WQ_BUFFERS);
+
+ target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
+ cq = mlwq->mlwq_cq;
+
+ if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ return;
+
+ current = cq->mlcq_bufcnt;
+
+ if (current >= target - MLXCX_RQ_REFILL_STEP)
+ return;
+
+ want = target - current;
+ done = 0;
+
+ while (!(mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) && done < want) {
+ n = mlxcx_buf_take_n(mlxp, mlwq, b, MLXCX_RQ_REFILL_STEP);
+ if (n == 0) {
+ mlxcx_warn(mlxp, "!exiting rq refill early, done %u "
+ "but wanted %u", done, want);
+ return;
+ }
+ if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ for (i = 0; i < n; ++i)
+ mlxcx_buf_return(mlxp, b[i]);
+ return;
+ }
+ if (!mlxcx_rq_add_buffers(mlxp, mlwq, b, n)) {
+ /*
+ * mlxcx_rq_add_buffers NULLs out the buffers as it
+ * enqueues them, so any that are non-NULL we have to
+ * free now. The others now belong to the WQ, even if
+ * we failed.
+ */
+ for (i = 0; i < n; ++i) {
+ if (b[i] != NULL) {
+ mlxcx_buf_return(mlxp, b[i]);
+ }
+ }
+ return;
+ }
+ done += n;
+ }
+}
+
+static const char *
+mlxcx_cq_err_syndrome_string(mlxcx_cq_error_syndrome_t sy)
+{
+ switch (sy) {
+ case MLXCX_CQ_ERR_LOCAL_LENGTH:
+ return ("LOCAL_LENGTH");
+ case MLXCX_CQ_ERR_LOCAL_QP_OP:
+ return ("LOCAL_QP_OP");
+ case MLXCX_CQ_ERR_LOCAL_PROTECTION:
+ return ("LOCAL_PROTECTION");
+ case MLXCX_CQ_ERR_WR_FLUSHED:
+ return ("WR_FLUSHED");
+ case MLXCX_CQ_ERR_MEM_WINDOW_BIND:
+ return ("MEM_WINDOW_BIND");
+ case MLXCX_CQ_ERR_BAD_RESPONSE:
+ return ("BAD_RESPONSE");
+ case MLXCX_CQ_ERR_LOCAL_ACCESS:
+ return ("LOCAL_ACCESS");
+ case MLXCX_CQ_ERR_XPORT_RETRY_CTR:
+ return ("XPORT_RETRY_CTR");
+ case MLXCX_CQ_ERR_RNR_RETRY_CTR:
+ return ("RNR_RETRY_CTR");
+ case MLXCX_CQ_ERR_ABORTED:
+ return ("ABORTED");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+static void
+mlxcx_fm_cqe_ereport(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
+ mlxcx_completionq_error_ent_t *ent)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+ const char *name = mlxcx_cq_err_syndrome_string(ent->mlcqee_syndrome);
+
+ if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
+ return;
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
+ MLXCX_FM_SERVICE_MLXCX, "cqe.err");
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+
+ ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
+ "syndrome", DATA_TYPE_STRING, name,
+ "syndrome_num", DATA_TYPE_UINT8, ent->mlcqee_syndrome,
+ "vendor_syndrome", DATA_TYPE_UINT8,
+ ent->mlcqee_vendor_error_syndrome,
+ "wqe_counter", DATA_TYPE_UINT16, from_be16(ent->mlcqee_wqe_counter),
+ "wq_type", DATA_TYPE_STRING,
+ (mlcq->mlcq_wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ) ? "send": "recv",
+ "cq_num", DATA_TYPE_UINT32, mlcq->mlcq_num,
+ "wq_num", DATA_TYPE_UINT32, mlcq->mlcq_wq->mlwq_num,
+ NULL);
+ ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED);
+}
+
+void
+mlxcx_tx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
+ mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf)
+{
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+ if (ent->mlcqe_opcode == MLXCX_CQE_OP_REQ_ERR) {
+ mlxcx_completionq_error_ent_t *eent =
+ (mlxcx_completionq_error_ent_t *)ent;
+ mlxcx_fm_cqe_ereport(mlxp, mlcq, eent);
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ mutex_enter(&mlcq->mlcq_wq->mlwq_mtx);
+ mlxcx_check_sq(mlxp, mlcq->mlcq_wq);
+ mutex_exit(&mlcq->mlcq_wq->mlwq_mtx);
+ return;
+ }
+
+ if (ent->mlcqe_opcode != MLXCX_CQE_OP_REQ) {
+ mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode);
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ return;
+ }
+
+ if (ent->mlcqe_send_wqe_opcode != MLXCX_WQE_OP_SEND) {
+ mlxcx_warn(mlxp, "!got weird cq wqe opcode: %x",
+ ent->mlcqe_send_wqe_opcode);
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ return;
+ }
+
+ if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) {
+ mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format);
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+ return;
+ }
+
+ mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
+}
+
+mblk_t *
+mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
+ mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf)
+{
+ uint32_t chkflags = 0;
+ ddi_fm_error_t err;
+
+ ASSERT(mutex_owned(&mlcq->mlcq_mtx));
+
+ if (ent->mlcqe_opcode == MLXCX_CQE_OP_RESP_ERR) {
+ mlxcx_completionq_error_ent_t *eent =
+ (mlxcx_completionq_error_ent_t *)ent;
+ mlxcx_fm_cqe_ereport(mlxp, mlcq, eent);
+ mlxcx_buf_return(mlxp, buf);
+ mutex_enter(&mlcq->mlcq_wq->mlwq_mtx);
+ mlxcx_check_rq(mlxp, mlcq->mlcq_wq);
+ mutex_exit(&mlcq->mlcq_wq->mlwq_mtx);
+ return (NULL);
+ }
+
+ if (ent->mlcqe_opcode != MLXCX_CQE_OP_RESP) {
+ mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode);
+ mlxcx_buf_return(mlxp, buf);
+ return (NULL);
+ }
+
+ if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) {
+ mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format);
+ mlxcx_buf_return(mlxp, buf);
+ return (NULL);
+ }
+
+ if (ent->mlcqe_rx_drop_counter > 0) {
+ atomic_add_64(&mlcq->mlcq_stats->mlps_rx_drops,
+ ent->mlcqe_rx_drop_counter);
+ }
+
+ MLXCX_DMA_SYNC(buf->mlb_dma, DDI_DMA_SYNC_FORCPU);
+ ddi_fm_dma_err_get(buf->mlb_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ ddi_fm_dma_err_clear(buf->mlb_dma.mxdb_dma_handle,
+ DDI_FME_VERSION);
+ mlxcx_buf_return(mlxp, buf);
+ return (NULL);
+ }
+
+ if (!mlxcx_buf_loan(mlxp, buf)) {
+ mlxcx_warn(mlxp, "!loan failed, dropping packet");
+ mlxcx_buf_return(mlxp, buf);
+ return (NULL);
+ }
+
+ buf->mlb_mp->b_next = NULL;
+ buf->mlb_mp->b_cont = NULL;
+ buf->mlb_mp->b_wptr = buf->mlb_mp->b_rptr +
+ from_be32(ent->mlcqe_byte_cnt);
+
+ if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L4_OK)) {
+ chkflags |= HCK_FULLCKSUM_OK;
+ }
+ if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L3_OK)) {
+ chkflags |= HCK_IPV4_HDRCKSUM_OK;
+ }
+ if (chkflags != 0) {
+ mac_hcksum_set(buf->mlb_mp, 0, 0, 0,
+ from_be16(ent->mlcqe_checksum), chkflags);
+ }
+
+ /*
+ * Don't check if a refill is needed on every single completion,
+ * since checking involves taking the RQ lock.
+ */
+ if ((buf->mlb_wqe_index & 0x7) == 0) {
+ mlxcx_work_queue_t *wq = mlcq->mlcq_wq;
+ ASSERT(wq != NULL);
+ mutex_enter(&wq->mlwq_mtx);
+ if (!(wq->mlwq_state & MLXCX_WQ_TEARDOWN))
+ mlxcx_rq_refill(mlxp, wq);
+ mutex_exit(&wq->mlwq_mtx);
+ }
+
+ return (buf->mlb_mp);
+}
+
+static void
+mlxcx_buf_mp_return(caddr_t arg)
+{
+ mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
+ mlxcx_t *mlxp = b->mlb_mlx;
+
+ if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
+ b->mlb_mp = NULL;
+ return;
+ }
+ /*
+ * The mblk for this buffer_t (in its mlb_mp field) has been used now,
+ * so NULL it out.
+ */
+ b->mlb_mp = NULL;
+ mlxcx_buf_return(mlxp, b);
+}
+
+boolean_t
+mlxcx_buf_create(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard, mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b;
+ ddi_device_acc_attr_t acc;
+ ddi_dma_attr_t attr;
+ boolean_t ret;
+
+ b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP);
+ b->mlb_shard = shard;
+ b->mlb_foreign = B_FALSE;
+
+ mlxcx_dma_acc_attr(mlxp, &acc);
+ mlxcx_dma_buf_attr(mlxp, &attr);
+
+ ret = mlxcx_dma_alloc_offset(mlxp, &b->mlb_dma, &attr, &acc,
+ B_FALSE, mlxp->mlx_ports[0].mlp_mtu, 2, B_TRUE);
+ if (!ret) {
+ kmem_cache_free(mlxp->mlx_bufs_cache, b);
+ return (B_FALSE);
+ }
+
+ b->mlb_frtn.free_func = mlxcx_buf_mp_return;
+ b->mlb_frtn.free_arg = (caddr_t)b;
+ b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va,
+ b->mlb_dma.mxdb_len, 0, &b->mlb_frtn);
+
+ *bp = b;
+
+ return (B_TRUE);
+}
+
+boolean_t
+mlxcx_buf_create_foreign(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard,
+ mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b;
+ ddi_dma_attr_t attr;
+ boolean_t ret;
+
+ b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP);
+ b->mlb_shard = shard;
+ b->mlb_foreign = B_TRUE;
+
+ mlxcx_dma_buf_attr(mlxp, &attr);
+
+ ret = mlxcx_dma_init(mlxp, &b->mlb_dma, &attr, B_TRUE);
+ if (!ret) {
+ kmem_cache_free(mlxp->mlx_bufs_cache, b);
+ return (B_FALSE);
+ }
+
+ *bp = b;
+
+ return (B_TRUE);
+}
+
+static void
+mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b;
+ mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
+
+ mutex_enter(&s->mlbs_mtx);
+ while (list_is_empty(&s->mlbs_free))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ b = list_remove_head(&s->mlbs_free);
+ ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
+ ASSERT(b->mlb_foreign);
+ b->mlb_state = MLXCX_BUFFER_ON_WQ;
+ list_insert_tail(&s->mlbs_busy, b);
+ mutex_exit(&s->mlbs_mtx);
+
+ *bp = b;
+}
+
+boolean_t
+mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mblk_t *mpb, size_t off, mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b, *b0 = NULL;
+ boolean_t first = B_TRUE;
+ ddi_fm_error_t err;
+ mblk_t *mp;
+ uint8_t *rptr;
+ size_t sz;
+ size_t ncookies = 0;
+ boolean_t ret;
+ uint_t attempts = 0;
+
+ for (mp = mpb; mp != NULL; mp = mp->b_cont) {
+ rptr = mp->b_rptr;
+ sz = MBLKL(mp);
+
+ if (off > 0)
+ ASSERT3U(off, <, sz);
+ rptr += off;
+ sz -= off;
+
+ if (sz < mlxp->mlx_props.mldp_tx_bind_threshold)
+ goto copyb;
+
+ mlxcx_buf_take_foreign(mlxp, wq, &b);
+ ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, B_FALSE);
+
+ if (!ret) {
+ mlxcx_buf_return(mlxp, b);
+
+copyb:
+ mlxcx_buf_take(mlxp, wq, &b);
+ ASSERT3U(b->mlb_dma.mxdb_len, >=, sz);
+ bcopy(rptr, b->mlb_dma.mxdb_va, sz);
+ MLXCX_DMA_SYNC(b->mlb_dma, DDI_DMA_SYNC_FORDEV);
+ ddi_fm_dma_err_get(b->mlb_dma.mxdb_dma_handle, &err,
+ DDI_FME_VERSION);
+ if (err.fme_status != DDI_FM_OK) {
+ ddi_fm_dma_err_clear(b->mlb_dma.mxdb_dma_handle,
+ DDI_FME_VERSION);
+ mlxcx_buf_return(mlxp, b);
+ if (++attempts > MLXCX_BUF_BIND_MAX_ATTEMTPS) {
+ *bp = NULL;
+ return (B_FALSE);
+ }
+ goto copyb;
+ }
+ }
+
+ /*
+ * We might overestimate here when we've copied data, since
+ * the buffer might be longer than what we copied into it. This
+ * is safe since it's always wrong in the conservative
+ * direction (and we will blow up later when we actually
+ * generate the WQE anyway).
+ *
+ * If the assert below ever blows, we'll have to come and fix
+ * this up so we can transmit these packets.
+ */
+ ncookies += b->mlb_dma.mxdb_ncookies;
+
+ if (first)
+ b0 = b;
+
+ if (!first)
+ b->mlb_state = MLXCX_BUFFER_ON_CHAIN;
+
+ b->mlb_tx_mp = mp;
+ b->mlb_tx_head = b0;
+ b->mlb_used = sz;
+
+ if (!first)
+ list_insert_tail(&b0->mlb_tx_chain, b);
+ first = B_FALSE;
+ off = 0;
+ }
+
+ ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS);
+
+ *bp = b0;
+ return (B_TRUE);
+}
+
+void
+mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, mlxcx_buffer_t **bp)
+{
+ mlxcx_buffer_t *b;
+ mlxcx_buf_shard_t *s = wq->mlwq_bufs;
+
+ mutex_enter(&s->mlbs_mtx);
+ while (list_is_empty(&s->mlbs_free))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ b = list_remove_head(&s->mlbs_free);
+ ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
+ b->mlb_state = MLXCX_BUFFER_ON_WQ;
+ list_insert_tail(&s->mlbs_busy, b);
+ mutex_exit(&s->mlbs_mtx);
+
+ *bp = b;
+}
+
+#define MLXCX_BUF_TAKE_N_TIMEOUT_USEC 5000
+#define MLXCX_BUF_TAKE_N_MAX_RETRIES 3
+
+size_t
+mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
+ mlxcx_buffer_t **bp, size_t nbufs)
+{
+ mlxcx_buffer_t *b;
+ size_t done = 0, empty = 0;
+ clock_t wtime = drv_usectohz(MLXCX_BUF_TAKE_N_TIMEOUT_USEC);
+ mlxcx_buf_shard_t *s;
+
+ s = wq->mlwq_bufs;
+
+ mutex_enter(&s->mlbs_mtx);
+ while (done < nbufs) {
+ while (list_is_empty(&s->mlbs_free)) {
+ (void) cv_reltimedwait(&s->mlbs_free_nonempty,
+ &s->mlbs_mtx, wtime, TR_MILLISEC);
+ if (list_is_empty(&s->mlbs_free) &&
+ empty++ >= MLXCX_BUF_TAKE_N_MAX_RETRIES) {
+ mutex_exit(&s->mlbs_mtx);
+ return (done);
+ }
+ }
+ b = list_remove_head(&s->mlbs_free);
+ ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
+ b->mlb_state = MLXCX_BUFFER_ON_WQ;
+ list_insert_tail(&s->mlbs_busy, b);
+ bp[done++] = b;
+ }
+ mutex_exit(&s->mlbs_mtx);
+ return (done);
+}
+
+boolean_t
+mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
+{
+ VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
+ ASSERT3P(b->mlb_mlx, ==, mlxp);
+
+ if (b->mlb_mp == NULL) {
+ b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va,
+ b->mlb_dma.mxdb_len, 0, &b->mlb_frtn);
+ if (b->mlb_mp == NULL)
+ return (B_FALSE);
+ }
+
+ b->mlb_state = MLXCX_BUFFER_ON_LOAN;
+ b->mlb_wqe_index = 0;
+ return (B_TRUE);
+}
+
+void
+mlxcx_buf_return_chain(mlxcx_t *mlxp, mlxcx_buffer_t *b0, boolean_t keepmp)
+{
+ mlxcx_buffer_t *b;
+
+ if (b0->mlb_tx_head != b0) {
+ mlxcx_buf_return(mlxp, b0);
+ return;
+ }
+
+ while ((b = list_head(&b0->mlb_tx_chain)) != NULL) {
+ mlxcx_buf_return(mlxp, b);
+ }
+ if (keepmp) {
+ b0->mlb_tx_mp = NULL;
+ b0->mlb_tx_head = NULL;
+ }
+ mlxcx_buf_return(mlxp, b0);
+}
+
+void
+mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
+{
+ mlxcx_buffer_state_t oldstate = b->mlb_state;
+ mlxcx_buffer_t *txhead = b->mlb_tx_head;
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+ mblk_t *mp = b->mlb_tx_mp;
+
+ VERIFY3U(oldstate, !=, MLXCX_BUFFER_FREE);
+ ASSERT3P(b->mlb_mlx, ==, mlxp);
+ b->mlb_state = MLXCX_BUFFER_FREE;
+ b->mlb_wqe_index = 0;
+ b->mlb_tx_head = NULL;
+ b->mlb_tx_mp = NULL;
+ b->mlb_used = 0;
+ ASSERT(list_is_empty(&b->mlb_tx_chain));
+
+ mutex_enter(&s->mlbs_mtx);
+ switch (oldstate) {
+ case MLXCX_BUFFER_INIT:
+ break;
+ case MLXCX_BUFFER_ON_WQ:
+ list_remove(&s->mlbs_busy, b);
+ break;
+ case MLXCX_BUFFER_ON_LOAN:
+ ASSERT(!b->mlb_foreign);
+ list_remove(&s->mlbs_busy, b);
+ break;
+ case MLXCX_BUFFER_FREE:
+ VERIFY(0);
+ break;
+ case MLXCX_BUFFER_ON_CHAIN:
+ ASSERT(txhead != NULL);
+ list_remove(&txhead->mlb_tx_chain, b);
+ list_remove(&s->mlbs_busy, b);
+ break;
+ }
+
+ if (b->mlb_foreign) {
+ if (b->mlb_dma.mxdb_flags & MLXCX_DMABUF_BOUND) {
+ mlxcx_dma_unbind(mlxp, &b->mlb_dma);
+ }
+ }
+
+ list_insert_tail(&s->mlbs_free, b);
+ cv_signal(&s->mlbs_free_nonempty);
+
+ mutex_exit(&s->mlbs_mtx);
+
+ /*
+ * For TX chain heads, free the mblk_t after we let go of the lock.
+ * This might be a borrowed buf that we in turn loaned to MAC, in which
+ * case calling freemsg() on it will re-enter this very function -- so
+ * we better not be holding the lock!
+ */
+ if (txhead == b)
+ freemsg(mp);
+}
+
+void
+mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
+{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+ VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
+ b->mlb_state == MLXCX_BUFFER_INIT);
+ ASSERT(mutex_owned(&s->mlbs_mtx));
+ if (b->mlb_state == MLXCX_BUFFER_FREE)
+ list_remove(&s->mlbs_free, b);
+
+ /*
+ * This is going back to the kmem cache, so it needs to be set up in
+ * the same way we expect a new buffer to come out (state INIT, other
+ * fields NULL'd)
+ */
+ b->mlb_state = MLXCX_BUFFER_INIT;
+ b->mlb_shard = NULL;
+ if (b->mlb_mp != NULL) {
+ freeb(b->mlb_mp);
+ ASSERT(b->mlb_mp == NULL);
+ }
+ mlxcx_dma_free(&b->mlb_dma);
+ ASSERT(list_is_empty(&b->mlb_tx_chain));
+
+ kmem_cache_free(mlxp->mlx_bufs_cache, b);
+}
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index 4d24932269..d4dfe83766 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -24,7 +24,9 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-
+/*
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ */
/*
* Pseudo Terminal Master Driver.
@@ -533,6 +535,13 @@ ptmwput(queue_t *qp, mblk_t *mp)
DBG(("ack the UNLKPT/ISPTM\n"));
miocack(qp, mp, 0, 0);
break;
+ case PTSSTTY:
+ mutex_enter(&ptmp->pt_lock);
+ ptmp->pt_state |= PTSTTY;
+ mutex_exit(&ptmp->pt_lock);
+ DBG(("ack PTSSTTY\n"));
+ miocack(qp, mp, 0, 0);
+ break;
case ZONEPT:
{
zoneid_t z;
diff --git a/usr/src/uts/common/io/pts.c b/usr/src/uts/common/io/pts.c
index d67beb255a..ff2d91f566 100644
--- a/usr/src/uts/common/io/pts.c
+++ b/usr/src/uts/common/io/pts.c
@@ -25,7 +25,9 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-
+/*
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ */
/*
* Pseudo Terminal Slave Driver.
@@ -106,6 +108,7 @@
#include <sys/sysmacros.h>
#include <sys/stream.h>
#include <sys/stropts.h>
+#include <sys/strsubr.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <sys/debug.h>
@@ -337,7 +340,6 @@ ptsopen(
DDBGP("ptsopen: p = %p\n", (uintptr_t)ptsp);
DDBG("ptsopen: state = %x\n", ptsp->pt_state);
-
ASSERT(ptsp->pt_minor == dminor);
if ((ptsp->pt_state & PTLOCK) || !(ptsp->pt_state & PTMOPEN)) {
@@ -347,7 +349,7 @@ ptsopen(
}
/*
- * if already, open simply return...
+ * if already open, simply return...
*/
if (ptsp->pt_state & PTSOPEN) {
ASSERT(rqp->q_ptr == ptsp);
@@ -386,6 +388,9 @@ ptsopen(
mutex_exit(&ptsp->pt_lock);
mutex_exit(&ptms_lock);
+ if (ptsp->pt_state & PTSTTY)
+ STREAM(rqp)->sd_flag |= STRXPG4TTY;
+
qprocson(rqp);
/*
@@ -416,8 +421,6 @@ ptsopen(
return (0);
}
-
-
/*
* Find the address to private data identifying the slave's write
* queue. Send a 0-length msg up the slave's read queue to designate
diff --git a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
index 0e342e8bcc..721aead599 100644
--- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
+++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
@@ -1432,6 +1432,7 @@ ahci_tran_probe_port(dev_info_t *dip, sata_device_t *sd)
uint8_t port;
int rval = SATA_SUCCESS, rval_init;
+ port_state = 0;
ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip));
port = ahci_ctlp->ahcictl_cport_to_port[cport];
@@ -1996,6 +1997,7 @@ ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
ahci_portp->ahciport_pending_tags,
ahci_portp->ahciport_pending_ncq_tags);
+ free_slots = 0;
/*
* According to the AHCI spec, system software is responsible to
* ensure that queued and non-queued commands are not mixed in
@@ -9837,6 +9839,8 @@ ahci_watchdog_handler(ahci_ctl_t *ahci_ctlp)
AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp,
"ahci_watchdog_handler entered", NULL);
+ current_slot = 0;
+ current_tags = 0;
for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) {
if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) {
continue;
diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c
index e532a551e7..d75db5f258 100644
--- a/usr/src/uts/common/io/vnic/vnic_dev.c
+++ b/usr/src/uts/common/io/vnic/vnic_dev.c
@@ -22,6 +22,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2018 Joyent, Inc.
* Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
#include <sys/types.h>
@@ -198,7 +199,7 @@ vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
uint16_t vid, boolean_t req_hwgrp_flag)
{
- mac_diag_t mac_diag;
+ mac_diag_t mac_diag = MAC_DIAG_NONE;
uint16_t mac_flags = 0;
int err;
uint_t addr_len;
@@ -1060,7 +1061,7 @@ static int
vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, const void *pr_val)
{
- int err = 0;
+ int err = 0;
vnic_t *vn = m_driver;
switch (pr_num) {
@@ -1158,7 +1159,7 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, void *pr_val)
{
vnic_t *vn = arg;
- int ret = 0;
+ int ret = 0;
boolean_t out;
switch (pr_num) {
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index 1a1a734d5f..866fd3fc2c 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -25,7 +25,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2017 Joyent, Inc.
- * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
#include <sys/types.h>
@@ -79,6 +79,7 @@
#include <sys/dld.h>
#include <sys/zone.h>
#include <sys/limits.h>
+#include <sys/ptms.h>
#include <c2/audit.h>
/*
@@ -232,6 +233,50 @@ push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
return (0);
}
+static int
+xpg4_fixup(queue_t *qp, dev_t *devp, struct stdata *stp, cred_t *crp)
+{
+ static const char *ptsmods[] = {
+ "ptem", "ldterm", "ttcompat"
+ };
+ dev_t dummydev = *devp;
+ struct strioctl strioc;
+ zoneid_t zoneid;
+ int32_t rval;
+ uint_t i;
+
+ /*
+ * Push modules required for the slave PTY to have terminal
+ * semantics out of the box; this is required by XPG4v2.
+ * These three modules are flagged as single-instance so that
+ * the system will never end up with duplicate copies pushed
+ * onto a stream.
+ */
+
+ zoneid = crgetzoneid(crp);
+ for (i = 0; i < ARRAY_SIZE(ptsmods); i++) {
+ int error;
+
+ error = push_mod(qp, &dummydev, stp, ptsmods[i], 0,
+ crp, zoneid);
+ if (error != 0)
+ return (error);
+ }
+
+ /*
+ * Send PTSSTTY down the stream
+ */
+
+ strioc.ic_cmd = PTSSTTY;
+ strioc.ic_timout = 0;
+ strioc.ic_len = 0;
+ strioc.ic_dp = NULL;
+
+ (void) strdoioctl(stp, &strioc, FNATIVE, K_TO_K, crp, &rval);
+
+ return (0);
+}
+
/*
* Open a stream device.
*/
@@ -550,10 +595,15 @@ retryap:
opendone:
+ if (error == 0 &&
+ (stp->sd_flag & (STRISTTY|STRXPG4TTY)) == (STRISTTY|STRXPG4TTY)) {
+ error = xpg4_fixup(qp, devp, stp, crp);
+ }
+
/*
* let specfs know that open failed part way through
*/
- if (error) {
+ if (error != 0) {
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STREOPENFAIL;
mutex_exit(&stp->sd_lock);
diff --git a/usr/src/uts/common/sys/fm/io/ddi.h b/usr/src/uts/common/sys/fm/io/ddi.h
index 75afff5c38..d8c772cdaf 100644
--- a/usr/src/uts/common/sys/fm/io/ddi.h
+++ b/usr/src/uts/common/sys/fm/io/ddi.h
@@ -66,6 +66,17 @@ extern "C" {
#define DVR_STACK_DEPTH "dvr-stack-depth"
#define DVR_ERR_SPECIFIC "dvr-error-specific"
+/* Generic NIC driver ereports. */
+#define DDI_FM_NIC "nic"
+#define DDI_FM_TXR_ERROR "txr-err"
+
+/* Valid values of the "error" field in txr-err ereports */
+#define DDI_FM_TXR_ERROR_WHITELIST "whitelist"
+#define DDI_FM_TXR_ERROR_NOTSUPP "notsupp"
+#define DDI_FM_TXR_ERROR_OVERTEMP "overtemp"
+#define DDI_FM_TXR_ERROR_HWFAIL "hwfail"
+#define DDI_FM_TXR_ERROR_UNKNOWN "unknown"
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/fs/namenode.h b/usr/src/uts/common/sys/fs/namenode.h
index 9ebf2cf1ca..24d276b6c3 100644
--- a/usr/src/uts/common/sys/fs/namenode.h
+++ b/usr/src/uts/common/sys/fs/namenode.h
@@ -26,6 +26,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ */
+
#ifndef _SYS_FS_NAMENODE_H
#define _SYS_FS_NAMENODE_H
@@ -93,6 +97,10 @@ extern struct vnodeops *nm_vnodeops;
extern const struct fs_operation_def nm_vnodeops_template[];
extern kmutex_t ntable_lock;
+typedef int nm_walk_mounts_f(const struct namenode *, cred_t *, void *);
+extern int nm_walk_mounts(const vnode_t *, nm_walk_mounts_f *, cred_t *,
+ void *);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index 21641b884d..0f8be50fde 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -230,7 +230,7 @@ extern int mac_tx_percpu_cnt;
&(mcip)->mci_flent->fe_resource_props)
#define MCIP_EFFECTIVE_PROPS(mcip) \
- (mcip->mci_flent == NULL ? NULL : \
+ (mcip->mci_flent == NULL ? NULL : \
&(mcip)->mci_flent->fe_effective_props)
#define MCIP_RESOURCE_PROPS_MASK(mcip) \
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 21f2c10a8e..3c103c073a 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -290,6 +290,54 @@ struct mac_group_s {
#define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable
#define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable
+#define MAC_RING_TX(mhp, rh, mp, rest) { \
+ mac_ring_handle_t mrh = rh; \
+ mac_impl_t *mimpl = (mac_impl_t *)mhp; \
+ /* \
+ * Send packets through a selected tx ring, or through the \
+ * default handler if there is no selected ring. \
+ */ \
+ if (mrh == NULL) \
+ mrh = mimpl->mi_default_tx_ring; \
+ if (mrh == NULL) { \
+ rest = mimpl->mi_tx(mimpl->mi_driver, mp); \
+ } else { \
+ rest = mac_hwring_tx(mrh, mp); \
+ } \
+}
+
+/*
+ * This is the final stop before reaching the underlying driver
+ * or aggregation, so this is where the bridging hook is implemented.
+ * Packets that are bridged will return through mac_bridge_tx(), with
+ * rh nulled out if the bridge chooses to send output on a different
+ * link due to forwarding.
+ */
+#define MAC_TX(mip, rh, mp, src_mcip) { \
+ mac_ring_handle_t rhandle = (rh); \
+ /* \
+ * If there is a bound Hybrid I/O share, send packets through \
+ * the default tx ring. (When there's a bound Hybrid I/O share, \
+ * the tx rings of this client are mapped in the guest domain \
+ * and not accessible from here.) \
+ */ \
+ _NOTE(CONSTANTCONDITION) \
+ if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \
+ rhandle = (mip)->mi_default_tx_ring; \
+ if (mip->mi_promisc_list != NULL) \
+ mac_promisc_dispatch(mip, mp, src_mcip); \
+ /* \
+ * Grab the proper transmit pointer and handle. Special \
+ * optimization: we can test mi_bridge_link itself atomically, \
+ * and if that indicates no bridge send packets through tx ring.\
+ */ \
+ if (mip->mi_bridge_link == NULL) { \
+ MAC_RING_TX(mip, rhandle, mp, mp); \
+ } else { \
+ mp = mac_bridge_tx(mip, rhandle, mp); \
+ } \
+}
+
/* mci_tx_flag */
#define MCI_TX_QUIESCE 0x1
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 2dea3a4758..fc3b3892bd 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -567,14 +567,14 @@ extern void mac_free(mac_register_t *);
extern int mac_register(mac_register_t *, mac_handle_t *);
extern int mac_disable_nowait(mac_handle_t);
extern int mac_disable(mac_handle_t);
-extern int mac_unregister(mac_handle_t);
-extern void mac_rx(mac_handle_t, mac_resource_handle_t,
+extern int mac_unregister(mac_handle_t);
+extern void mac_rx(mac_handle_t, mac_resource_handle_t,
mblk_t *);
-extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t,
+extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t,
mblk_t *, uint64_t);
-extern void mac_link_update(mac_handle_t, link_state_t);
-extern void mac_link_redo(mac_handle_t, link_state_t);
-extern void mac_unicst_update(mac_handle_t,
+extern void mac_link_update(mac_handle_t, link_state_t);
+extern void mac_link_redo(mac_handle_t, link_state_t);
+extern void mac_unicst_update(mac_handle_t,
const uint8_t *);
extern void mac_dst_update(mac_handle_t, const uint8_t *);
extern void mac_tx_update(mac_handle_t);
diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h
index 65bdfb2e17..14e17c1c0c 100644
--- a/usr/src/uts/common/sys/strsubr.h
+++ b/usr/src/uts/common/sys/strsubr.h
@@ -29,7 +29,7 @@
*/
/*
- * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
#ifndef _SYS_STRSUBR_H
@@ -289,7 +289,7 @@ typedef struct stdata {
#define SNDMREAD 0x00008000 /* used for read notification */
#define OLDNDELAY 0x00010000 /* use old TTY semantics for */
/* NDELAY reads and writes */
- /* 0x00020000 unused */
+#define STRXPG4TTY 0x00020000 /* Use XPG4 TTY semantics */
/* 0x00040000 unused */
#define STRTOSTOP 0x00080000 /* block background writes */
#define STRCMDWAIT 0x00100000 /* someone is doing an _I_CMD */
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index efb4c81092..aea7f2e856 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -24,6 +24,7 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017 by Delphix. All rights reserved.
* Copyright (c) 2019, Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -79,7 +80,7 @@
/*
* Local Function Prototypes
*/
-static int apic_handle_defconf();
+static int apic_handle_defconf(void);
static int apic_parse_mpct(caddr_t mpct, int bypass);
static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size);
static int apic_checksum(caddr_t bptr, int len);
@@ -182,7 +183,7 @@ int apic_num_rebind = 0;
* Maximum number of APIC CPUs in the system, -1 indicates that dynamic
* allocation of CPU ids is disabled.
*/
-int apic_max_nproc = -1;
+int apic_max_nproc = -1;
int apic_nproc = 0;
size_t apic_cpus_size = 0;
int apic_defconf = 0;
@@ -589,10 +590,22 @@ apic_free_apic_cpus(void)
}
}
+static uint32_t
+acpi_get_apic_lid(void)
+{
+ uint32_t id;
+
+ id = apic_reg_ops->apic_read(APIC_LID_REG);
+ if (apic_mode != LOCAL_X2APIC)
+ id >>= APIC_ID_BIT_OFFSET;
+
+ return (id);
+}
+
static int
acpi_probe(char *modname)
{
- int i, intmax, index;
+ int i, intmax;
uint32_t id, ver;
int acpi_verboseflags = 0;
int madt_seen, madt_size;
@@ -640,9 +653,9 @@ acpi_probe(char *modname)
return (PSM_FAILURE);
}
- id = apic_reg_ops->apic_read(APIC_LID_REG);
- local_ids[0] = (uchar_t)(id >> 24);
- apic_nproc = index = 1;
+ local_ids[0] = acpi_get_apic_lid();
+
+ apic_nproc = 1;
apic_io_max = 0;
ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1);
@@ -657,25 +670,19 @@ acpi_probe(char *modname)
if (mpa->Id == 255) {
cmn_err(CE_WARN, "!%s: encountered "
"invalid entry in MADT: CPU %d "
- "has Local APIC Id equal to 255 ",
+ "has Local APIC Id equal to 255",
psm_name, mpa->ProcessorId);
}
if (mpa->Id == local_ids[0]) {
- ASSERT(index == 1);
proc_ids[0] = mpa->ProcessorId;
} else if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
- local_ids[index] = mpa->Id;
- proc_ids[index] = mpa->ProcessorId;
- index++;
+ local_ids[apic_nproc] = mpa->Id;
+ proc_ids[apic_nproc] = mpa->ProcessorId;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
cmn_err(CE_WARN, "%s: CPU limit "
- "exceeded"
-#if !defined(__amd64)
- " for 32-bit mode"
-#endif
- "; Solaris will use %d CPUs.",
+ "exceeded; will use %d CPUs.",
psm_name, NCPU);
warned = 1;
}
@@ -716,7 +723,7 @@ acpi_probe(char *modname)
acpi_nmi_sp = mns;
acpi_nmi_scnt++;
- cmn_err(CE_NOTE, "!apic: nmi source: %d 0x%x\n",
+ cmn_err(CE_NOTE, "!apic: nmi source: %d 0x%x",
mns->GlobalIrq, mns->IntiFlags);
break;
@@ -727,7 +734,7 @@ acpi_probe(char *modname)
acpi_nmi_cp = mlan;
acpi_nmi_ccnt++;
- cmn_err(CE_NOTE, "!apic: local nmi: %d 0x%x %d\n",
+ cmn_err(CE_NOTE, "!apic: local nmi: %d 0x%x %d",
mlan->ProcessorId, mlan->IntiFlags,
mlan->Lint);
break;
@@ -735,7 +742,7 @@ acpi_probe(char *modname)
case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE:
/* UNIMPLEMENTED */
mao = (ACPI_MADT_LOCAL_APIC_OVERRIDE *) ap;
- cmn_err(CE_NOTE, "!apic: address override: %lx\n",
+ cmn_err(CE_NOTE, "!apic: address override: %lx",
(long)mao->Address);
break;
@@ -743,7 +750,7 @@ acpi_probe(char *modname)
/* UNIMPLEMENTED */
misa = (ACPI_MADT_IO_SAPIC *) ap;
- cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n",
+ cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx",
misa->Id, misa->GlobalIrqBase,
(long)misa->Address);
break;
@@ -753,7 +760,7 @@ acpi_probe(char *modname)
mis = (ACPI_MADT_INTERRUPT_SOURCE *) ap;
cmn_err(CE_NOTE,
- "!apic: irq source: %d %d %d 0x%x %d %d\n",
+ "!apic: irq source: %d %d %d 0x%x %d %d",
mis->Id, mis->Eid, mis->GlobalIrq,
mis->IntiFlags, mis->Type,
mis->IoSapicVector);
@@ -764,21 +771,16 @@ acpi_probe(char *modname)
if (mpx2a->LapicFlags & ACPI_MADT_ENABLED) {
if (mpx2a->LocalApicId == local_ids[0]) {
- ASSERT(index == 1);
proc_ids[0] = mpx2a->Uid;
} else if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
- local_ids[index] = mpx2a->LocalApicId;
- proc_ids[index] = mpx2a->Uid;
- index++;
+ local_ids[apic_nproc] =
+ mpx2a->LocalApicId;
+ proc_ids[apic_nproc] = mpx2a->Uid;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
cmn_err(CE_WARN, "%s: CPU limit "
- "exceeded"
-#if !defined(__amd64)
- " for 32-bit mode"
-#endif
- "; Solaris will use %d CPUs.",
+ "exceeded; will use %d CPUs.",
psm_name, NCPU);
warned = 1;
}
@@ -792,9 +794,9 @@ acpi_probe(char *modname)
if (mx2alan->Uid >> 8)
acpi_nmi_ccnt++;
-#ifdef DEBUG
+#ifdef DEBUG
cmn_err(CE_NOTE,
- "!apic: local x2apic nmi: %d 0x%x %d\n",
+ "!apic: local x2apic nmi: %d 0x%x %d",
mx2alan->Uid, mx2alan->IntiFlags, mx2alan->Lint);
#endif
@@ -848,19 +850,19 @@ acpi_probe(char *modname)
* The state for each apic CPU info structure will be assigned according
* to the following rules:
* Rule 1:
- * Slot index range: [0, min(apic_nproc, boot_ncpus))
+ * Slot index range: [0, min(apic_nproc, boot_ncpus))
* State flags: 0
* Note: cpu exists and will be configured/enabled at boot time
* Rule 2:
- * Slot index range: [boot_ncpus, apic_nproc)
+ * Slot index range: [boot_ncpus, apic_nproc)
* State flags: APIC_CPU_FREE | APIC_CPU_DIRTY
* Note: cpu exists but won't be configured/enabled at boot time
* Rule 3:
- * Slot index range: [apic_nproc, boot_ncpus)
+ * Slot index range: [apic_nproc, boot_ncpus)
* State flags: APIC_CPU_FREE
* Note: cpu doesn't exist at boot time
* Rule 4:
- * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus)
+ * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus)
* State flags: APIC_CPU_FREE
* Note: cpu doesn't exist at boot time
*/
@@ -1014,10 +1016,8 @@ cleanup:
* Fill all details as MP table does not give any more info
*/
static int
-apic_handle_defconf()
+apic_handle_defconf(void)
{
- uint_t lid;
-
/* Failed to probe ACPI MADT tables, disable CPU DR. */
apic_max_nproc = -1;
apic_free_apic_cpus();
@@ -1035,8 +1035,7 @@ apic_handle_defconf()
CPUSET_ONLY(apic_cpumask, 0);
CPUSET_ADD(apic_cpumask, 1);
apic_nproc = 2;
- lid = apic_reg_ops->apic_read(APIC_LID_REG);
- apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET);
+ apic_cpus[0].aci_local_id = acpi_get_apic_lid();
/*
* According to the PC+MP spec 1.1, the local ids
* for the default configuration has to be 0 or 1
@@ -1081,10 +1080,9 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
struct apic_io_entry *ioapicp;
struct apic_io_intr *intrp;
int ioapic_ix;
- uint_t lid;
- uint32_t id;
- uchar_t hid;
- int warned = 0;
+ uint32_t lid, id;
+ uchar_t hid;
+ int warned = 0;
/*LINTED: pointer cast may result in improper alignment */
procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
@@ -1103,11 +1101,7 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
cmn_err(CE_WARN, "%s: CPU limit "
- "exceeded"
-#if !defined(__amd64)
- " for 32-bit mode"
-#endif
- "; Solaris will use %d CPUs.",
+ "exceeded; will use %d CPUs.",
psm_name, NCPU);
warned = 1;
}
@@ -1137,10 +1131,9 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
if (!bypass_cpus_and_ioapics &&
procp->proc_cpuflags & CPUFLAGS_EN) {
if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */
- lid = apic_reg_ops->apic_read(APIC_LID_REG);
+ lid = acpi_get_apic_lid();
apic_cpus[0].aci_local_id = procp->proc_apicid;
- if (apic_cpus[0].aci_local_id !=
- (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) {
+ if (apic_cpus[0].aci_local_id != lid) {
return (PSM_FAILURE);
}
apic_cpus[0].aci_local_ver =
@@ -1624,7 +1617,8 @@ apic_allocate_irq(int irq)
if (freeirq == -1) {
/* This shouldn't happen, but just in case */
- cmn_err(CE_WARN, "%s: NO available IRQ", psm_name);
+ cmn_err(CE_WARN, "%s: NO available IRQ",
+ psm_name);
return (-1);
}
}
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 820e0a4e31..aed47948a9 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -318,6 +318,7 @@ DRV_KMODS += log
DRV_KMODS += logindmux
DRV_KMODS += mega_sas
DRV_KMODS += mc-amd
+DRV_KMODS += mlxcx
DRV_KMODS += mm
DRV_KMODS += mouse8042
DRV_KMODS += mpt_sas
diff --git a/usr/src/uts/intel/ipsecah/Makefile b/usr/src/uts/intel/ipsecah/Makefile
index d744c131f1..dd8485f210 100644
--- a/usr/src/uts/intel/ipsecah/Makefile
+++ b/usr/src/uts/intel/ipsecah/Makefile
@@ -42,7 +42,6 @@ UTSBASE = ../..
#
MODULE = ipsecah
OBJECTS = $(IPSECAH_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IPSECAH_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
@@ -56,7 +55,6 @@ include $(UTSBASE)/intel/Makefile.intel
# Define targets
#
ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
@@ -64,24 +62,9 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
LDFLAGS += -dy -Ndrv/ip -Ndrv/tcp -Nmisc/kcf
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += $(CNOWARN_UNINIT)
-# needs work
-$(OBJS_DIR)/ipsecahddi.o := SMOFF += index_overflow
-$(OBJS_DIR)/ipsecah.o := SMOFF += deref_check
-$(OBJS_DIR)/sadb.o := SMOFF += signed_integer_overflow_check,deref_check,indenting,shift_to_zero
-
#
# Default build targets.
#
@@ -95,12 +78,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS)
clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
diff --git a/usr/src/uts/intel/ipsecesp/Makefile b/usr/src/uts/intel/ipsecesp/Makefile
index 713ad82d7c..3ae4a4cac1 100644
--- a/usr/src/uts/intel/ipsecesp/Makefile
+++ b/usr/src/uts/intel/ipsecesp/Makefile
@@ -26,7 +26,7 @@
# Copyright (c) 2018, Joyent, Inc.
#
-# This makefile drives the production of the ipsecesp driver
+# This makefile drives the production of the ipsecesp driver
# kernel module.
#
# intel implementation architecture dependent
@@ -42,7 +42,6 @@ UTSBASE = ../..
#
MODULE = ipsecesp
OBJECTS = $(IPSECESP_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IPSECESP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
@@ -56,7 +55,6 @@ include $(UTSBASE)/intel/Makefile.intel
# Define targets
#
ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
@@ -64,21 +62,8 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
LDFLAGS += -dy -Ndrv/ip -Ndrv/ipsecah -Nmisc/kcf
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-
CERRWARN += $(CNOWARN_UNINIT)
-# needs work
-$(OBJS_DIR)/ipsecespddi.o := SMOFF += index_overflow
-$(OBJS_DIR)/ipsecesp.o := SMOFF += deref_check
-
#
# Default build targets.
#
@@ -92,12 +77,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS)
clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
diff --git a/usr/src/uts/intel/mlxcx/Makefile b/usr/src/uts/intel/mlxcx/Makefile
new file mode 100644
index 0000000000..27bdfa4b73
--- /dev/null
+++ b/usr/src/uts/intel/mlxcx/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = mlxcx
+OBJECTS = $(MLXCX_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/mlxcx
+
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/mlxcx
+
+ALL_TARGET = $(BINARY) $(CONFMOD)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+LDFLAGS += -dy -N misc/mac
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases
index f9b5129d3b..f5b0a08489 100644
--- a/usr/src/uts/intel/os/driver_aliases
+++ b/usr/src/uts/intel/os/driver_aliases
@@ -1085,6 +1085,19 @@ mega_sas "pci1028,15.1028.1f01"
mega_sas "pci1028,15.1028.1f02"
mega_sas "pci1028,15.1028.1f03"
mouse8042 "pnpPNP,f03"
+mlxcx "pciex15b3,1013"
+mlxcx "pciex15b3,1014"
+mlxcx "pciex15b3,1015"
+mlxcx "pciex15b3,1016"
+mlxcx "pciex15b3,1017"
+mlxcx "pciex15b3,1018"
+mlxcx "pciex15b3,1019"
+mlxcx "pciex15b3,101a"
+mlxcx "pciex15b3,101b"
+mlxcx "pciex15b3,101c"
+mlxcx "pciex15b3,101d"
+mlxcx "pciex15b3,101e"
+mlxcx "pciex15b3,101f"
mpt "pci1000,30"
mpt "pci1000,50"
mpt "pci1000,54"
diff --git a/usr/src/uts/intel/procfs/Makefile b/usr/src/uts/intel/procfs/Makefile
index 1db5848438..630b6a25d3 100644
--- a/usr/src/uts/intel/procfs/Makefile
+++ b/usr/src/uts/intel/procfs/Makefile
@@ -25,6 +25,7 @@
# Use is subject to license terms.
#
# Copyright 2019 Joyent, Inc.
+# Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
#
# This makefile drives the production of the procfs file system
@@ -83,6 +84,8 @@ $(OBJS_DIR)/prsubr.o := SMOFF += all_func_returns
$(OBJS_DIR)/prcontrol.o := SMOFF += all_func_returns
$(OBJS_DIR)/prioctl.o := SMOFF += signed
+LDFLAGS += -dy -Nfs/namefs
+
#
# Default build targets.
#
diff --git a/usr/src/uts/sparc/ipsecah/Makefile b/usr/src/uts/sparc/ipsecah/Makefile
index 55ee48c88f..ad14fa4e5b 100644
--- a/usr/src/uts/sparc/ipsecah/Makefile
+++ b/usr/src/uts/sparc/ipsecah/Makefile
@@ -24,7 +24,7 @@
#
#
-# This makefile drives the production of the ipsecah driver
+# This makefile drives the production of the ipsecah driver
# kernel module.
#
# sparc architecture dependent
@@ -40,7 +40,6 @@ UTSBASE = ../..
#
MODULE = ipsecah
OBJECTS = $(IPSECAH_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IPSECAH_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
@@ -54,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc
# Define targets
#
ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
@@ -62,21 +60,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
LDFLAGS += -dy -Ndrv/ip -Ndrv/tcp -Nmisc/kcf
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
-
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += $(CNOWARN_UNINIT)
@@ -93,12 +76,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS)
clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
diff --git a/usr/src/uts/sparc/ipsecesp/Makefile b/usr/src/uts/sparc/ipsecesp/Makefile
index 1a36e4fbc7..931dc913a2 100644
--- a/usr/src/uts/sparc/ipsecesp/Makefile
+++ b/usr/src/uts/sparc/ipsecesp/Makefile
@@ -24,7 +24,7 @@
#
#
-# This makefile drives the production of the ipsecesp driver
+# This makefile drives the production of the ipsecesp driver
# kernel module.
#
# sparc architecture dependent
@@ -40,7 +40,6 @@ UTSBASE = ../..
#
MODULE = ipsecesp
OBJECTS = $(IPSECESP_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(IPSECESP_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE)
CONF_SRCDIR = $(UTSBASE)/common/inet/ip
@@ -54,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc
# Define targets
#
ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
@@ -62,20 +60,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
#
LDFLAGS += -dy -Ndrv/ip -Ndrv/ipsecah -Nmisc/kcf
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-
CERRWARN += $(CNOWARN_UNINIT)
#
@@ -91,12 +75,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS)
clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS) $(SISCHECK_DEPS)
$(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE)
diff --git a/usr/src/uts/sparc/procfs/Makefile b/usr/src/uts/sparc/procfs/Makefile
index 8dd05fe72b..3226238bd4 100644
--- a/usr/src/uts/sparc/procfs/Makefile
+++ b/usr/src/uts/sparc/procfs/Makefile
@@ -23,6 +23,7 @@
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
#
# This makefile drives the production of the procfs file system
@@ -41,7 +42,6 @@ UTSBASE = ../..
#
MODULE = procfs
OBJECTS = $(PROC_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(PROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
#
@@ -53,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
@@ -64,19 +63,12 @@ $(MODSTUBS_O) := AS_CPPFLAGS += -DPROC_MODULE
CLEANFILES += $(MODSTUBS_O)
CFLAGS += $(CCVERBOSE)
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += -_gcc=-Wno-switch
CERRWARN += $(CNOWARN_UNINIT)
+LDFLAGS += -dy -Nfs/namefs
+
#
# Default build targets.
#
@@ -90,12 +82,6 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#
diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c
index f30ef8e2d4..96fb04175d 100644
--- a/usr/src/uts/sun4v/io/vnet.c
+++ b/usr/src/uts/sun4v/io/vnet.c
@@ -1133,9 +1133,9 @@ vnet_mac_register(vnet_t *vnetp)
static int
vnet_read_mac_address(vnet_t *vnetp)
{
- uchar_t *macaddr;
- uint32_t size;
- int rv;
+ uchar_t *macaddr;
+ uint32_t size;
+ int rv;
rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
@@ -2318,7 +2318,7 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
*/
static void
vnet_get_group(void *arg, mac_ring_type_t type, const int index,
- mac_group_info_t *infop, mac_group_handle_t handle)
+ mac_group_info_t *infop, mac_group_handle_t handle)
{
vnet_t *vnetp = (vnet_t *)arg;
@@ -2631,7 +2631,7 @@ vnet_rx_poll(void *arg, int bytes_to_pickup)
/* ARGSUSED */
void
vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback)
+ boolean_t loopback)
{
vnet_t *vnetp = (vnet_t *)arg;
vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh;