diff options
author | Mike Zeller <mike@mikezeller.net> | 2020-03-11 16:42:06 -0400 |
---|---|---|
committer | Mike Zeller <mike@mikezeller.net> | 2020-03-11 16:42:06 -0400 |
commit | 2803e163cd303fbc63f832f544bc59c4ee562252 (patch) | |
tree | c901ef8fa7580dcebfb11316f6bb49d498bd40a8 /usr/src/uts | |
parent | dd05dd221f0e26bb86692b7b69c8dbeab8f4c0e5 (diff) | |
parent | 1de02da27664d38cedeccf227bd4ae92d32619d9 (diff) | |
download | illumos-joyent-bhyve-sync.tar.gz |
Merge remote-tracking branch 'origin/master' into bhyve-syncbhyve-sync
Diffstat (limited to 'usr/src/uts')
89 files changed, 16390 insertions, 318 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 79f7ca8b47..ce7b7a3e6a 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1440,6 +1440,7 @@ ZFS_COMMON_OBJS += \ zap_leaf.o \ zap_micro.o \ zcp.o \ + zcp_change_key.o \ zcp_get.o \ zcp_set.o \ zcp_global.o \ @@ -2317,3 +2318,9 @@ BNX_OBJS += \ bnx_lm_main.o \ bnx_lm_recv.o \ bnx_lm_send.o + +# +# mlxcx(7D) +# +MLXCX_OBJS += mlxcx.o mlxcx_dma.o mlxcx_cmd.o mlxcx_intr.o mlxcx_gld.o \ + mlxcx_ring.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 1d052bdcc2..8a906a2e25 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -956,6 +956,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mii/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mlxcx/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/mr_sas/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) diff --git a/usr/src/uts/common/fs/namefs/namevfs.c b/usr/src/uts/common/fs/namefs/namevfs.c index 9952f0a742..63e618de11 100644 --- a/usr/src/uts/common/fs/namefs/namevfs.c +++ b/usr/src/uts/common/fs/namefs/namevfs.c @@ -21,6 +21,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -182,6 +183,31 @@ namefind(vnode_t *vp, vnode_t *mnt) } /* + * For each namenode that has nm_filevp == vp, call the provided function + * with the namenode as an argument. This finds all of the namefs entries + * which are mounted on vp; note that there can be more than one. + */ +int +nm_walk_mounts(const vnode_t *vp, nm_walk_mounts_f *func, cred_t *cr, void *arg) +{ + struct namenode *np; + int ret = 0; + + mutex_enter(&ntable_lock); + + for (np = *NM_FILEVP_HASH(vp); np != NULL; np = np->nm_nextp) { + if (np->nm_filevp == vp) { + if ((ret = func(np, cr, arg)) != 0) + break; + } + } + + mutex_exit(&ntable_lock); + + return (ret); +} + +/* * Force the unmouting of a file descriptor from ALL of the nodes * that it was mounted to. * At the present time, the only usage for this routine is in the @@ -480,6 +506,7 @@ nm_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *crp) newvp->v_rdev = filevp->v_rdev; newvp->v_data = (caddr_t)nodep; VFS_HOLD(vfsp); + vn_copypath(mvp, newvp); vn_exists(newvp); /* diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c index 0a9589a373..08aee63610 100644 --- a/usr/src/uts/common/fs/proc/prsubr.c +++ b/usr/src/uts/common/fs/proc/prsubr.c @@ -75,6 +75,7 @@ #include <sys/autoconf.h> #include <sys/dtrace.h> #include <sys/timod.h> +#include <sys/fs/namenode.h> #include <netinet/udp.h> #include <netinet/tcp.h> #include <inet/cc.h> @@ -2552,7 +2553,11 @@ prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) size_t pathlen; size_t sz = 0; - pathlen = MAXPATHLEN + 1; + /* + * The global zone's path to a file in a non-global zone can exceed + * MAXPATHLEN. + */ + pathlen = MAXPATHLEN * 2 + 1; pathname = kmem_alloc(pathlen, KM_SLEEP); if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { @@ -2561,6 +2566,7 @@ prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) } kmem_free(pathname, pathlen); + return (sz); } @@ -2789,6 +2795,22 @@ prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) return (sz); } +typedef struct prfdinfo_nm_path_cbdata { + proc_t *nmp_p; + u_offset_t nmp_sz; + list_t *nmp_data; +} prfdinfo_nm_path_cbdata_t; + +static int +prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) +{ + prfdinfo_nm_path_cbdata_t *cb = arg; + + cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); + + return (0); +} + u_offset_t prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) { @@ -2801,8 +2823,23 @@ prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); /* Pathname */ - if (vp->v_type != VSOCK && vp->v_type != VDOOR) + switch (vp->v_type) { + case VDOOR: { + prfdinfo_nm_path_cbdata_t cb = { + .nmp_p = p, + .nmp_data = NULL, + .nmp_sz = 0 + }; + + (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); + sz += cb.nmp_sz; + break; + } + case VSOCK: + break; + default: sz += prfdinfopath(p, vp, NULL, cred); + } /* Socket options */ if (vp->v_type == VSOCK) @@ -2946,14 +2983,31 @@ prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, } } - /* - * Don't attempt to determine the vnode path for a socket or a door - * as it will cause a linear scan of the dnlc table given there is no - * v_path associated with the vnode. - */ - if (vp->v_type != VSOCK && vp->v_type != VDOOR) + /* pathname */ + + switch (vp->v_type) { + case VDOOR: { + prfdinfo_nm_path_cbdata_t cb = { + .nmp_p = p, + .nmp_data = data, + .nmp_sz = 0 + }; + + (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); + break; + } + case VSOCK: + /* + * Don't attempt to determine the path for a socket as the + * vnode has no associated v_path. It will cause a linear scan + * of the dnlc table and result in no path being found. + */ + break; + default: (void) prfdinfopath(p, vp, data, cred); + } + /* socket options */ if (vp->v_type == VSOCK) (void) prfdinfosockopt(vp, data, cred); diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c index c9d02e1c57..a092326a9c 100644 --- a/usr/src/uts/common/fs/zfs/dsl_crypt.c +++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c @@ -1220,12 +1220,7 @@ dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) tx); } -typedef struct spa_keystore_change_key_args { - const char *skcka_dsname; - dsl_crypto_params_t *skcka_cp; -} spa_keystore_change_key_args_t; - -static int +int spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) { int ret; @@ -1469,7 +1464,7 @@ spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, dsl_dir_rele(dd, FTAG); } -static void +void spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_t *ds; diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h index cf19665aae..5b7c1a9510 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h @@ -164,6 +164,11 @@ typedef struct spa_keystore { avl_tree_t sk_wkeys; } spa_keystore_t; +typedef struct spa_keystore_change_key_args { + const char *skcka_dsname; + dsl_crypto_params_t *skcka_cp; +} spa_keystore_change_key_args_t; + int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out); void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); @@ -199,6 +204,8 @@ int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key); int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp); +int spa_keystore_change_key_check(void *arg, dmu_tx_t *tx); +void spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx); int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin); void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, diff --git a/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h b/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h new file mode 100644 index 0000000000..fea520455f --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/zcp_change_key.h @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +#ifndef _SYS_ZCP_CHANGE_KEY_H +#define _SYS_ZCP_CHANGE_KEY_H + +#include <sys/types.h> +#include <sys/dmu.h> +#include <sys/dsl_crypt.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void zcp_synctask_change_key_cleanup(void *arg); +int zcp_synctask_change_key_check(void *arg, dmu_tx_t *tx); +void zcp_synctask_change_key_sync(void *arg, dmu_tx_t *tx); +int zcp_synctask_change_key_create_params(const char *key, size_t keylen, + zfs_keyformat_t keyformat, dsl_crypto_params_t **dcpp); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZCP_CHANGE_KEY_H */ diff --git a/usr/src/uts/common/fs/zfs/zcp_change_key.c b/usr/src/uts/common/fs/zfs/zcp_change_key.c new file mode 100644 index 0000000000..be16a8d5c6 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/zcp_change_key.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +#include <sys/ctype.h> +#include <sys/zcp.h> +#include <sys/zcp_change_key.h> + +static uint8_t +hexval(char c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + else if (c >= 'a' && c <= 'f') + return (c - 'a' + 10); + else if (c >= 'A' && c <= 'F') + return (c - 'A' + 10); + + panic("invalid hex value"); +} + +static int +hex_to_raw(const char *key, uint8_t *buf, size_t buflen) +{ + uint8_t *p; + size_t srclen = strlen(key); + size_t i; + + if (buflen * 2 != srclen) + return (SET_ERROR(EINVAL)); + + for (i = 0, p = buf; i < srclen; i += 2, p++) { + if (!isxdigit(key[i]) || !isxdigit(key[i + 1])) + return (SET_ERROR(EINVAL)); + + *p = hexval(key[i]) << 4 | hexval(key[i + 1]); + } + + return (0); +} + +int +zcp_synctask_change_key_create_params(const char *key, size_t keylen, + zfs_keyformat_t keyformat, dsl_crypto_params_t **dcpp) +{ + nvlist_t *args = fnvlist_alloc(); + nvlist_t *hidden_args = fnvlist_alloc(); + uint8_t rawkey[WRAPPING_KEY_LEN]; + uint_t rawlen = 0; + int err = 0; + + /* + * Currently, only raw and hex keys are supported in channel + * programs (there is no pbkdf2 support in the kernel to convert + * a passphrase). + */ + switch (keyformat) { + case ZFS_KEYFORMAT_RAW: + /* + * dsl_crypto_params_create_nvlist() also verifies the + * raw key is WRAPPING_KEY_LEN bytes, so this is + * _almost_ redundant -- however we still want to + * guarantee we won't overflow rawkey when copying + * the contents over. + */ + if (keylen != WRAPPING_KEY_LEN) { + err = SET_ERROR(EINVAL); + goto done; + } + + bcopy(key, rawkey, keylen); + rawlen = keylen; + break; + case ZFS_KEYFORMAT_HEX: + /* + * hex_to_raw() will reject any input that doesn't exactly + * fit into rawkey + */ + err = hex_to_raw(key, rawkey, sizeof (rawkey)); + if (err != 0) + goto done; + rawlen = sizeof (rawkey); + break; + default: + err = SET_ERROR(EINVAL); + goto done; + } + + fnvlist_add_uint64(args, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + (uint64_t)keyformat); + fnvlist_add_uint8_array(hidden_args, "wkeydata", rawkey, rawlen); + + err = dsl_crypto_params_create_nvlist(DCP_CMD_NEW_KEY, args, + hidden_args, dcpp); + +done: + fnvlist_free(args); + fnvlist_free(hidden_args); + bzero(rawkey, sizeof (rawkey)); + + return (err); +} + +void +zcp_synctask_change_key_cleanup(void *arg) +{ + spa_keystore_change_key_args_t *skcka = arg; + + dsl_crypto_params_free(skcka->skcka_cp, B_TRUE); +} + +int +zcp_synctask_change_key_check(void *arg, dmu_tx_t *tx) +{ + /* + * zcp_synctask_change_key_create_params() already validates that + * the new key is in an acceptable format and size for a channel + * program. Any future channel program specific checks would go here. + * For now, we just perform all the same checks done for + * 'zfs change-key' by calling spa_keystore_change_key_check(). + */ + return (spa_keystore_change_key_check(arg, tx)); +} + +void +zcp_synctask_change_key_sync(void *arg, dmu_tx_t *tx) +{ + spa_keystore_change_key_sync(arg, tx); +} diff --git a/usr/src/uts/common/fs/zfs/zcp_synctask.c b/usr/src/uts/common/fs/zfs/zcp_synctask.c index 09af25c1c9..9a1dceb044 100644 --- a/usr/src/uts/common/fs/zfs/zcp_synctask.c +++ b/usr/src/uts/common/fs/zfs/zcp_synctask.c @@ -23,6 +23,8 @@ #include <sys/zcp.h> #include <sys/zcp_set.h> +#include <sys/zcp_change_key.h> +#include <sys/dsl_crypt.h> #include <sys/dsl_dir.h> #include <sys/dsl_pool.h> #include <sys/dsl_prop.h> @@ -399,6 +401,74 @@ zcp_synctask_set_prop(lua_State *state, boolean_t sync, nvlist_t *err_details) return (err); } +static int zcp_synctask_change_key(lua_State *, boolean_t, nvlist_t *); +static zcp_synctask_info_t zcp_synctask_change_key_info = { + .name = "change_key", + .func = zcp_synctask_change_key, + .pargs = { + { .za_name = "dataset", .za_lua_type = LUA_TSTRING }, + { .za_name = "key", .za_lua_type = LUA_TSTRING }, + { .za_name = "format", .za_lua_type = LUA_TSTRING }, + { NULL, 0 }, + }, + .kwargs = { + { NULL, 0 } + }, + .space_check = ZFS_SPACE_CHECK_RESERVED, + /* + * This is the same value that is used when zfs change-key is run. + * See spa_keystore_change_key() in dsl_crypt.c + */ + .blocks_modified = 15 +}; + +static int +zcp_synctask_change_key(lua_State *state, boolean_t sync, nvlist_t *err_details) +{ + int err; + spa_keystore_change_key_args_t skcka = { 0 }; + dsl_crypto_params_t *dcp = NULL; + const char *dsname; + const char *key; + const char *format; + size_t keylen; + uint64_t keyformat; + + dsname = lua_tostring(state, 1); + + /* + * The key may be raw key, which could contain NUL within it. + * Use lua_tolstring() instead of lua_tostring() to obtain the length. + */ + key = lua_tolstring(state, 2, &keylen); + + format = lua_tostring(state, 3); + + if (zfs_prop_string_to_index(ZFS_PROP_KEYFORMAT, format, + &keyformat) != 0) + return (SET_ERROR(EINVAL)); + + err = zcp_synctask_change_key_create_params(key, keylen, keyformat, + &dcp); + if (err != 0) + goto done; + + skcka.skcka_dsname = dsname; + skcka.skcka_cp = dcp; + + zcp_cleanup_handler_t *zch = zcp_register_cleanup(state, + (zcp_cleanup_t *)&zcp_synctask_change_key_cleanup, &skcka); + + err = zcp_sync_task(state, zcp_synctask_change_key_check, + zcp_synctask_change_key_sync, &skcka, sync, dsname); + + zcp_deregister_cleanup(state, zch); + +done: + dsl_crypto_params_free(dcp, (err != 0 || !sync) ? B_TRUE : B_FALSE); + return (err); +} + static int zcp_synctask_wrapper(lua_State *state) { @@ -468,6 +538,7 @@ zcp_load_synctask_lib(lua_State *state, boolean_t sync) &zcp_synctask_snapshot_info, &zcp_synctask_inherit_prop_info, &zcp_synctask_set_prop_info, + &zcp_synctask_change_key_info, NULL }; diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 2b4c1d55e7..153dcf1502 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -3767,6 +3767,7 @@ zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl, uint64_t instrlimit, memlimit; boolean_t sync_flag; nvpair_t *nvarg = NULL; + nvlist_t *hidden_args = NULL; if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) { return (EINVAL); @@ -3784,6 +3785,16 @@ zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl, return (EINVAL); } + /* hidden args are optional */ + if (nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args) == 0) { + nvlist_t *argnvl = fnvpair_value_nvlist(nvarg); + int ret; + + ret = nvlist_add_nvlist(argnvl, ZPOOL_HIDDEN_ARGS, hidden_args); + if (ret != 0) + return (ret); + } + if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit) return (EINVAL); if (memlimit == 0 || memlimit > zfs_lua_max_memlimit) diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index f33a1abe4f..99011b83b4 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -23,7 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2017 Nexenta Systems, Inc. */ @@ -377,6 +377,46 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, return (0); } + case _FIODIRECTIO: + { + /* + * ZFS inherently provides the basic semantics for directio. + * This is the summary from the ZFS on Linux support for + * O_DIRECT, which is the common form of directio, and required + * no changes to ZFS. + * + * 1. Minimize cache effects of the I/O. + * + * By design the ARC is already scan-resistant, which helps + * mitigate the need for special O_DIRECT handling. + * + * 2. O_DIRECT _MAY_ impose restrictions on IO alignment and + * length. + * + * No additional alignment or length restrictions are + * imposed by ZFS. + * + * 3. O_DIRECT _MAY_ perform unbuffered IO operations directly + * between user memory and block device. + * + * No unbuffered IO operations are currently supported. In + * order to support features such as compression, encryption, + * and checksumming a copy must be made to transform the + * data. + * + * 4. O_DIRECT _MAY_ imply O_DSYNC (XFS). + * + * O_DIRECT does not imply O_DSYNC for ZFS. + * + * 5. O_DIRECT _MAY_ disable file locking that serializes IO + * operations. + * + * All I/O in ZFS is locked for correctness and this locking + * is not disabled by O_DIRECT. + */ + return (0); + } + case _FIO_SEEK_DATA: case _FIO_SEEK_HOLE: { diff --git a/usr/src/uts/common/inet/ilb/ilb.c b/usr/src/uts/common/inet/ilb/ilb.c index 8ab2a90116..91cd671b12 100644 --- a/usr/src/uts/common/inet/ilb/ilb.c +++ b/usr/src/uts/common/inet/ilb/ilb.c @@ -1679,6 +1679,8 @@ ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src, uint16_t nat_src_idx; boolean_t busy; + ret = 0; + /* * We don't really need to switch here since both protocols's * ports are at the same offset. Just prepare for future protocol diff --git a/usr/src/uts/common/inet/ilb/ilb_conn.c b/usr/src/uts/common/inet/ilb/ilb_conn.c index 7f79d41dd6..24b0138fbf 100644 --- a/usr/src/uts/common/inet/ilb/ilb_conn.c +++ b/usr/src/uts/common/inet/ilb/ilb_conn.c @@ -132,6 +132,9 @@ ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s) ilb_conn_t **next, **prev; ilb_conn_t **next_prev, **prev_next; + next_prev = NULL; + prev_next = NULL; + if (c2s) { hash = connp->conn_c2s_hash; ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock)); @@ -698,6 +701,7 @@ update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len, uint32_t ack, seq; int32_t seg_len; + ack = 0; if (tcpha->tha_flags & TH_RST) return (B_FALSE); @@ -903,6 +907,11 @@ ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph, uint32_t adj_ip_sum; boolean_t full_nat; + in_iph4 = NULL; + in_iph6 = NULL; + icmph4 = NULL; + icmph6 = NULL; + if (l3 == IPPROTO_IP) { in6_addr_t in_src, in_dst; diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index e9a3fcdeeb..89574da71f 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -1730,8 +1730,6 @@ typedef struct ill_s { * Capabilities related fields. */ uint_t ill_dlpi_capab_state; /* State of capability query, IDCS_* */ - kcondvar_t ill_dlpi_capab_cv; /* CV for broadcasting state changes */ - kmutex_t ill_dlpi_capab_lock; /* Lock for accessing above Cond Var */ uint_t ill_capab_pending_cnt; uint64_t ill_capabilities; /* Enabled capabilities, ILL_CAPAB_* */ ill_hcksum_capab_t *ill_hcksum_capab; /* H/W cksumming capabilities */ diff --git a/usr/src/uts/common/inet/ip/conn_opt.c b/usr/src/uts/common/inet/ip/conn_opt.c index b4bff4d7b4..8a05a25b08 100644 --- a/usr/src/uts/common/inet/ip/conn_opt.c +++ b/usr/src/uts/common/inet/ip/conn_opt.c @@ -1209,6 +1209,7 @@ conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, return (EINVAL); } + ifindex = UINT_MAX; switch (name) { case IP_TTL: /* Don't allow zero */ @@ -1529,6 +1530,7 @@ conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, if (connp->conn_family != AF_INET6) return (EINVAL); + ifindex = UINT_MAX; switch (name) { case IPV6_MULTICAST_IF: /* diff --git a/usr/src/uts/common/inet/ip/icmp.c b/usr/src/uts/common/inet/ip/icmp.c index b1a77ae0cc..46c791298a 100644 --- a/usr/src/uts/common/inet/ip/icmp.c +++ b/usr/src/uts/common/inet/ip/icmp.c @@ -739,6 +739,11 @@ rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, } ASSERT(sa != NULL && len != 0); + sin = NULL; + sin6 = NULL; + dstport = 0; + flowinfo = 0; + v4dst = INADDR_ANY; /* * Determine packet type based on type of address passed in @@ -3592,6 +3597,7 @@ icmp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, } } else { /* Connected case */ + dstport = connp->conn_fport; v6dst = connp->conn_faddr_v6; flowinfo = connp->conn_flowinfo; } diff --git a/usr/src/uts/common/inet/ip/igmp.c b/usr/src/uts/common/inet/ip/igmp.c index 423bb2a816..de6a91877a 100644 --- a/usr/src/uts/common/inet/ip/igmp.c +++ b/usr/src/uts/common/inet/ip/igmp.c @@ -310,15 +310,15 @@ mld_start_timers(unsigned next, ip_stack_t *ipst) mblk_t * igmp_input(mblk_t *mp, ip_recv_attr_t *ira) { - igmpa_t *igmpa; + igmpa_t *igmpa; ipha_t *ipha = (ipha_t *)(mp->b_rptr); int iphlen, igmplen, mblklen; - ilm_t *ilm; + ilm_t *ilm; uint32_t src, dst; - uint32_t group; + uint32_t group; in6_addr_t v6group; uint_t next; - ipif_t *ipif; + ipif_t *ipif; ill_t *ill = ira->ira_ill; ip_stack_t *ipst = ill->ill_ipst; @@ -778,7 +778,7 @@ igmp_joingroup(ilm_t *ilm) ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { - ilm->ilm_rtx.rtx_timer = INFINITY; + ilm->ilm_rtx.rtx_timer = timer = INFINITY; ilm->ilm_state = IGMP_OTHERMEMBER; } else { ip1dbg(("Querier mode %d, sending report, group %x\n", @@ -857,11 +857,10 @@ mld_joingroup(ilm_t *ilm) ill = ilm->ilm_ill; ASSERT(ill->ill_isv6); - ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { - ilm->ilm_rtx.rtx_timer = INFINITY; + ilm->ilm_rtx.rtx_timer = timer = INFINITY; ilm->ilm_state = IGMP_OTHERMEMBER; } else { if (ill->ill_mcast_type == MLD_V1_ROUTER) { @@ -1435,7 +1434,7 @@ igmp_timeout_handler(void *arg) uint_t mld_timeout_handler_per_ill(ill_t *ill) { - ilm_t *ilm; + ilm_t *ilm; uint_t next = INFINITY, current; mrec_t *rp, *rtxrp; rtx_state_t *rtxp; @@ -1832,7 +1831,7 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) ipha_t *ipha; int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; size_t size = hdrlen + sizeof (igmpa_t); - ill_t *ill = ilm->ilm_ill; + ill_t *ill = ilm->ilm_ill; ip_stack_t *ipst = ill->ill_ipst; ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); @@ -1859,15 +1858,15 @@ igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); - ipha->ipha_type_of_service = 0; + ipha->ipha_type_of_service = 0; ipha->ipha_length = htons(size); ipha->ipha_ident = 0; ipha->ipha_fragment_offset_and_flags = 0; - ipha->ipha_ttl = IGMP_TTL; - ipha->ipha_protocol = IPPROTO_IGMP; - ipha->ipha_hdr_checksum = 0; - ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; - ipha->ipha_src = INADDR_ANY; + ipha->ipha_ttl = IGMP_TTL; + ipha->ipha_protocol = IPPROTO_IGMP; + ipha->ipha_hdr_checksum = 0; + ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; + ipha->ipha_src = INADDR_ANY; ill_mcast_queue(ill, mp); @@ -2448,7 +2447,7 @@ mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) { mblk_t *mp; mld_hdr_t *mldh; - ip6_t *ip6h; + ip6_t *ip6h; ip6_hbh_t *ip6hbh; struct ip6_opt_router *ip6router; size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 760454861b..925d06c62b 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -2404,6 +2404,7 @@ ipoptp_next(ipoptp_t *optp) * its there, and make sure it points to either something * inside this option, or the end of the option. */ + pointer = IPOPT_EOL; switch (opt) { case IPOPT_RR: case IPOPT_TS: @@ -4124,8 +4125,6 @@ ip_modclose(ill_t *ill) rw_destroy(&ill->ill_mcast_lock); mutex_destroy(&ill->ill_mcast_serializer); list_destroy(&ill->ill_nce); - cv_destroy(&ill->ill_dlpi_capab_cv); - mutex_destroy(&ill->ill_dlpi_capab_lock); /* * Now we are done with the module close pieces that @@ -6340,6 +6339,9 @@ ip_opt_set_multicast_group(conn_t *connp, t_scalar_t name, optfn = ip_opt_delete_group; break; default: + /* Should not be reached. */ + fmode = MODE_IS_INCLUDE; + optfn = NULL; ASSERT(0); } @@ -6469,6 +6471,9 @@ ip_opt_set_multicast_sources(conn_t *connp, t_scalar_t name, optfn = ip_opt_delete_group; break; default: + /* Should not be reached. */ + optfn = NULL; + fmode = 0; ASSERT(0); } @@ -8937,6 +8942,8 @@ ip_forward_options(mblk_t *mp, ipha_t *ipha, ill_t *dst_ill, ip2dbg(("ip_forward_options\n")); dst = ipha->ipha_dst; + opt = NULL; + for (optval = ipoptp_first(&opts, ipha); optval != IPOPT_EOL; optval = ipoptp_next(&opts)) { @@ -9023,6 +9030,7 @@ ip_forward_options(mblk_t *mp, ipha_t *ipha, ill_t *dst_ill, opt[IPOPT_OFFSET] += IP_ADDR_LEN; break; case IPOPT_TS: + off = 0; /* Insert timestamp if there is room */ switch (opt[IPOPT_POS_OV_FLG] & 0x0F) { case IPOPT_TS_TSONLY: @@ -9187,6 +9195,7 @@ ip_input_local_options(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) ip_stack_t *ipst = ill->ill_ipst; ip2dbg(("ip_input_local_options\n")); + opt = NULL; for (optval = ipoptp_first(&opts, ipha); optval != IPOPT_EOL; @@ -9249,6 +9258,7 @@ ip_input_local_options(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) opt[IPOPT_OFFSET] += IP_ADDR_LEN; break; case IPOPT_TS: + off = 0; /* Insert timestamp if there is romm */ switch (opt[IPOPT_POS_OV_FLG] & 0x0F) { case IPOPT_TS_TSONLY: @@ -9342,6 +9352,7 @@ ip_input_options(ipha_t *ipha, ipaddr_t dst, mblk_t *mp, ire_t *ire; ip2dbg(("ip_input_options\n")); + opt = NULL; *errorp = 0; for (optval = ipoptp_first(&opts, ipha); optval != IPOPT_EOL; @@ -11890,6 +11901,7 @@ ip_output_local_options(ipha_t *ipha, ip_stack_t *ipst) ipaddr_t dst; uint32_t ts; timestruc_t now; + uint32_t off = 0; for (optval = ipoptp_first(&opts, ipha); optval != IPOPT_EOL; @@ -11898,7 +11910,6 @@ ip_output_local_options(ipha_t *ipha, ip_stack_t *ipst) optlen = opts.ipoptp_len; ASSERT((opts.ipoptp_flags & IPOPTP_ERROR) == 0); switch (optval) { - uint32_t off; case IPOPT_SSRR: case IPOPT_LSRR: off = opt[IPOPT_OFFSET]; @@ -12546,6 +12557,7 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) } ci.ci_ipif = NULL; + extract_funcp = NULL; switch (ipip->ipi_cmd_type) { case MISC_CMD: case MSFILT_CMD: @@ -12727,6 +12739,7 @@ ip_wput_nondata(queue_t *q, mblk_t *mp) else connp = NULL; + iocp = NULL; switch (DB_TYPE(mp)) { case M_IOCTL: /* @@ -12937,6 +12950,7 @@ ip_output_options(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa, ill_t *ill) ip2dbg(("ip_output_options\n")); + opt = NULL; dst = ipha->ipha_dst; for (optval = ipoptp_first(&opts, ipha); optval != IPOPT_EOL; diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index afaf01024f..26e7be2fe8 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -2766,7 +2766,7 @@ ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) { uint8_t opt_type; - uint_t optused; + uint_t optused = 0; int ret = 0; const char *errtype; ill_t *ill = ira->ira_ill; diff --git a/usr/src/uts/common/inet/ip/ip6_ire.c b/usr/src/uts/common/inet/ip/ip6_ire.c index ad738bc3b7..1145025588 100644 --- a/usr/src/uts/common/inet/ip/ip6_ire.c +++ b/usr/src/uts/common/inet/ip/ip6_ire.c @@ -687,7 +687,7 @@ ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags) { - in6_addr_t gw_addr_v6; + in6_addr_t gw_addr_v6 = { 0 }; ill_t *ire_ill = NULL, *dst_ill; ip_stack_t *ipst = ire->ire_ipst; diff --git a/usr/src/uts/common/inet/ip/ip6_output.c b/usr/src/uts/common/inet/ip/ip6_output.c index dc074454e3..143077ed32 100644 --- a/usr/src/uts/common/inet/ip/ip6_output.c +++ b/usr/src/uts/common/inet/ip/ip6_output.c @@ -1023,7 +1023,7 @@ ire_send_wire_v6(ire_t *ire, mblk_t *mp, void *iph_arg, */ if (pktlen > ixa->ixa_fragsize || (ixaflags & (IXAF_IPSEC_SECURE|IXAF_IPV6_ADD_FRAGHDR))) { - uint32_t ident; + uint32_t ident = 0; if (ixaflags & IXAF_IPSEC_SECURE) pktlen += ipsec_out_extra_length(ixa); diff --git a/usr/src/uts/common/inet/ip/ip_ftable.c b/usr/src/uts/common/inet/ip/ip_ftable.c index 980436b578..408b9d0ea1 100644 --- a/usr/src/uts/common/inet/ip/ip_ftable.c +++ b/usr/src/uts/common/inet/ip/ip_ftable.c @@ -76,7 +76,7 @@ (((ire)->ire_type & IRE_DEFAULT) || \ (((ire)->ire_type & IRE_INTERFACE) && ((ire)->ire_addr == 0))) -#define IP_SRC_MULTIHOMING(isv6, ipst) \ +#define IP_SRC_MULTIHOMING(isv6, ipst) \ (isv6 ? ipst->ips_ipv6_strict_src_multihoming : \ ipst->ips_ip_strict_src_multihoming) @@ -470,7 +470,7 @@ ire_get_bucket(ire_t *ire) * routes to this destination, this routine will utilise the * first route it finds to IP address * Return values: - * 0 - FAILURE + * 0 - FAILURE * nonzero - ifindex */ uint_t @@ -807,7 +807,7 @@ ire_round_robin(irb_t *irb_ptr, ire_ftable_args_t *margs, uint_t hash, ire_t *orig_ire, ip_stack_t *ipst) { ire_t *ire, *maybe_ire = NULL; - uint_t maybe_badcnt; + uint_t maybe_badcnt = 0; uint_t maxwalk; /* Fold in more bits from the hint/hash */ diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 24e4c200d4..a2ddcb3547 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -1394,10 +1394,11 @@ ill_capability_wait(ill_t *ill) while (ill->ill_capab_pending_cnt != 0 && (ill->ill_state_flags & ILL_CONDEMNED) == 0) { - mutex_enter(&ill->ill_dlpi_capab_lock); + /* This may enable blocked callers of ill_capability_done(). */ ipsq_exit(ill->ill_phyint->phyint_ipsq); - cv_wait(&ill->ill_dlpi_capab_cv, &ill->ill_dlpi_capab_lock); - mutex_exit(&ill->ill_dlpi_capab_lock); + /* Pause a bit (1msec) before we re-enter the squeue. */ + delay(drv_usectohz(1000000)); + /* * If ipsq_enter() fails, someone set ILL_CONDEMNED * while we dropped the squeue. Indicate such to the caller. @@ -1508,9 +1509,9 @@ ill_capability_id_ack(ill_t *ill, mblk_t *mp, dl_capability_sub_t *outers) id_ic = (dl_capab_id_t *)(outers + 1); + inners = &id_ic->id_subcap; if (outers->dl_length < sizeof (*id_ic) || - (inners = &id_ic->id_subcap, - inners->dl_length > (outers->dl_length - sizeof (*inners)))) { + inners->dl_length > (outers->dl_length - sizeof (*inners))) { cmn_err(CE_WARN, "ill_capability_id_ack: malformed " "encapsulated capab type %d too long for mblk", inners->dl_cap); @@ -3513,9 +3514,6 @@ ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback, ill->ill_max_buf = ND_MAX_Q; ill->ill_refcnt = 0; - cv_init(&ill->ill_dlpi_capab_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&ill->ill_dlpi_capab_lock, NULL, MUTEX_DEFAULT, NULL); - return (0); } @@ -4027,6 +4025,7 @@ ill_get_next_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst) phyint_t *phyi_initial; uint_t ifindex; + phyi_initial = NULL; rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (index == 0) { @@ -12935,6 +12934,7 @@ void ill_capability_done(ill_t *ill) { ASSERT(ill->ill_capab_pending_cnt != 0); + ASSERT(IAM_WRITER_ILL(ill)); ill_dlpi_done(ill, DL_CAPABILITY_REQ); @@ -12942,10 +12942,6 @@ ill_capability_done(ill_t *ill) if (ill->ill_capab_pending_cnt == 0 && ill->ill_dlpi_capab_state == IDCS_OK) ill_capability_reset_alloc(ill); - - mutex_enter(&ill->ill_dlpi_capab_lock); - cv_broadcast(&ill->ill_dlpi_capab_cv); - mutex_exit(&ill->ill_dlpi_capab_lock); } /* diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index 69506f77d4..2cee123d4a 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -2943,6 +2943,8 @@ nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr) ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL || new_state == ND_INCOMPLETE); } + + tid = 0; if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) { tid = ncec->ncec_timeout_id; ncec->ncec_timeout_id = 0; @@ -4433,6 +4435,7 @@ nce_resolve_src(ncec_t *ncec, in6_addr_t *src) ASSERT(src != NULL); ASSERT(IN6_IS_ADDR_UNSPECIFIED(src)); + src4 = 0; src6 = *src; if (is_myaddr) { src6 = ncec->ncec_addr; @@ -4641,6 +4644,7 @@ nce_add_common(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, ndp = ill->ill_ipst->ips_ndp4; *retnce = NULL; + state = 0; ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); diff --git a/usr/src/uts/common/inet/ip/ip_output.c b/usr/src/uts/common/inet/ip/ip_output.c index 169859707e..a6ca2aabd5 100644 --- a/usr/src/uts/common/inet/ip/ip_output.c +++ b/usr/src/uts/common/inet/ip/ip_output.c @@ -1100,7 +1100,7 @@ ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, int, 1); if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) { - int error; + int error = 0; DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); @@ -1156,7 +1156,7 @@ ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, } if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) { - int error; + int error = 0; DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill, ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp); diff --git a/usr/src/uts/common/inet/ip/ip_rts.c b/usr/src/uts/common/inet/ip/ip_rts.c index dece7be29d..5df5ad6447 100644 --- a/usr/src/uts/common/inet/ip/ip_rts.c +++ b/usr/src/uts/common/inet/ip/ip_rts.c @@ -114,7 +114,7 @@ rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags, ip_stack_t *ipst) { mblk_t *mp1; - conn_t *connp, *next_connp; + conn_t *connp, *next_connp; /* * Since we don't have an ill_t here, RTSQ_DEFAULT must already be @@ -190,7 +190,7 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) mblk_t *mp; rt_msghdr_t *rtm; int rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY); - sa_family_t af; + sa_family_t af = { 0 }; in6_addr_t gw_addr_v6; if (ire == NULL) @@ -199,6 +199,7 @@ ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst) ire->ire_ipversion == IPV6_VERSION); ASSERT(!(ire->ire_type & IRE_IF_CLONE)); + mp = NULL; if (ire->ire_flags & RTF_SETSRC) rtm_addrs |= RTA_SRC; @@ -306,10 +307,14 @@ ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr) ts_label_t *tsl = NULL; zoneid_t zoneid; ip_stack_t *ipst; - ill_t *ill = NULL; + ill_t *ill = NULL; zoneid = connp->conn_zoneid; ipst = connp->conn_netstack->netstack_ip; + net_mask = 0; + src_addr = 0; + dst_addr = 0; + gw_addr = 0; if (mp->b_cont != NULL && !pullupmsg(mp, -1)) { freemsg(mp); @@ -1239,6 +1244,9 @@ rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc, ipaddr_t v4setsrc; rtm = (rt_msghdr_t *)mp->b_rptr; + ifaddr = 0; + brdaddr = 0; + rtm_flags = 0; /* * Find the ill used to send packets. This will be NULL in case @@ -1406,7 +1414,7 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) ill_t *ill; ifrt_t *ifrt; mblk_t *mp; - in6_addr_t gw_addr_v6; + in6_addr_t gw_addr_v6 = { 0 }; /* Need to add back some metrics to the IRE? */ /* @@ -1422,6 +1430,7 @@ rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics) * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as * microseconds. */ + rtt = 0; if (which & RTV_RTT) rtt = metrics->rmx_rtt / 1000; if (which & RTV_RTTVAR) diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 77d9d8df7e..4f3ec2d817 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -613,6 +613,7 @@ ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) break; default: + conn_cache = NULL; connp = NULL; ASSERT(0); } diff --git a/usr/src/uts/common/inet/ip/ipmp.c b/usr/src/uts/common/inet/ip/ipmp.c index 912b489c40..3106b6e2de 100644 --- a/usr/src/uts/common/inet/ip/ipmp.c +++ b/usr/src/uts/common/inet/ip/ipmp.c @@ -1909,6 +1909,7 @@ ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp) ASSERT(IAM_WRITER_IPSQ(ipsq)); ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL); + ill = NULL; /* * Send routing socket messages indicating that the phyint's ills diff --git a/usr/src/uts/common/inet/ip/ipsecah.c b/usr/src/uts/common/inet/ip/ipsecah.c index fc19d7f877..ced3696948 100644 --- a/usr/src/uts/common/inet/ip/ipsecah.c +++ b/usr/src/uts/common/inet/ip/ipsecah.c @@ -215,7 +215,7 @@ static int ah_kstat_update(kstat_t *kp, int rw) { ah_kstats_t *ekp; - netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstackid_t stackid; netstack_t *ns; ipsec_stack_t *ipss; @@ -225,6 +225,7 @@ ah_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); + stackid = (netstackid_t)(uintptr_t)kp->ks_private; ns = netstack_find_by_stackid(stackid); if (ns == NULL) return (-1); diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index b3dc7d350a..e0efbbf3ce 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -208,7 +208,7 @@ static int esp_kstat_update(kstat_t *kp, int rw) { esp_kstats_t *ekp; - netstackid_t stackid = (zoneid_t)(uintptr_t)kp->ks_private; + netstackid_t stackid; netstack_t *ns; ipsec_stack_t *ipss; @@ -218,6 +218,7 @@ esp_kstat_update(kstat_t *kp, int rw) if (rw == KSTAT_WRITE) return (EACCES); + stackid = (zoneid_t)(uintptr_t)kp->ks_private; ns = netstack_find_by_stackid(stackid); if (ns == NULL) return (-1); diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index 44ebb21db3..288c0e3e18 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -113,8 +113,8 @@ extern uint64_t ipsacq_maxpackets; if (((sa)->ipsa_ ## exp) == 0) \ (sa)->ipsa_ ## exp = tmp; \ else \ - (sa)->ipsa_ ## exp = \ - MIN((sa)->ipsa_ ## exp, tmp); \ + (sa)->ipsa_ ## exp = \ + MIN((sa)->ipsa_ ## exp, tmp); \ } \ } @@ -154,8 +154,6 @@ sadb_sa_refrele(void *target) static time_t sadb_add_time(time_t base, uint64_t delta) { - time_t sum; - /* * Clip delta to the maximum possible time_t value to * prevent "overwrapping" back into a shorter-than-desired @@ -163,18 +161,12 @@ sadb_add_time(time_t base, uint64_t delta) */ if (delta > TIME_MAX) delta = TIME_MAX; - /* - * This sum may still overflow. - */ - sum = base + delta; - /* - * .. so if the result is less than the base, we overflowed. - */ - if (sum < base) - sum = TIME_MAX; - - return (sum); + if (base > 0) { + if (TIME_MAX - base < delta) + return (TIME_MAX); /* Overflow */ + } + return (base + delta); } /* @@ -1695,8 +1687,7 @@ sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, mp->b_cont = mp1; break; default: - if (mp != NULL) - freemsg(mp); + freemsg(mp); return; } @@ -2941,7 +2932,7 @@ sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg, boolean_t isupdate = (newbie != NULL); uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr; ipsec_stack_t *ipss = ns->netstack_ipsec; - ip_stack_t *ipst = ns->netstack_ip; + ip_stack_t *ipst = ns->netstack_ip; ipsec_alginfo_t *alg; int rcode; boolean_t async = B_FALSE; @@ -4386,8 +4377,8 @@ sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard, if (assoc->ipsa_idletime != 0) { assoc->ipsa_idletime = min(assoc->ipsa_idletime, assoc->ipsa_idleuselt); - assoc->ipsa_idleexpiretime = - current + assoc->ipsa_idletime; + assoc->ipsa_idleexpiretime = + current + assoc->ipsa_idletime; } else { assoc->ipsa_idleexpiretime = current + assoc->ipsa_idleuselt; @@ -5450,7 +5441,7 @@ sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah, uint32_t seq; uint64_t unique_id = 0; boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0; - ts_label_t *tsl; + ts_label_t *tsl; netstack_t *ns = ixa->ixa_ipst->ips_netstack; ipsec_stack_t *ipss = ns->netstack_ipsec; ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; @@ -6102,7 +6093,8 @@ sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap) return (NULL); bsllow(&sl); - LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level); + LCLASS_SET((_bslabel_impl_t *)&sl, + (uint16_t)sens->sadb_sens_sens_level); bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments, bitmap_len); @@ -6629,7 +6621,7 @@ ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst) static void ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst) { - connf_t *connfp; + connf_t *connfp; conn_t *connp; uint32_t ports; uint16_t *pptr = (uint16_t *)&ports; diff --git a/usr/src/uts/common/inet/ip/spd.c b/usr/src/uts/common/inet/ip/spd.c index d703170c9f..85f06f3d02 100644 --- a/usr/src/uts/common/inet/ip/spd.c +++ b/usr/src/uts/common/inet/ip/spd.c @@ -163,7 +163,7 @@ int ipsec_weird_null_inbound_policy = 0; * Inbound traffic should have matching identities for both SA's. */ -#define SA_IDS_MATCH(sa1, sa2) \ +#define SA_IDS_MATCH(sa1, sa2) \ (((sa1) == NULL) || ((sa2) == NULL) || \ (((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) && \ (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid)))) @@ -3178,6 +3178,7 @@ ipsec_act_find(const ipsec_act_t *a, int n, netstack_t *ns) * TODO: should canonicalize a[] (i.e., zeroize any padding) * so we can use a non-trivial policy_hash function. */ + ap = NULL; for (i = n-1; i >= 0; i--) { hval = policy_hash(IPSEC_ACTION_HASH_SIZE, &a[i], &a[n]); @@ -6282,6 +6283,9 @@ ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *iramp, mblk_t *mp, #ifdef FRAGCACHE_DEBUG cmn_err(CE_WARN, "Fragcache: %s\n", inbound ? "INBOUND" : "OUTBOUND"); #endif + v6_proto = 0; + fraghdr = NULL; + /* * You're on the slow path, so insure that every packet in the * cache is a single-mblk one. diff --git a/usr/src/uts/common/inet/ip/tnet.c b/usr/src/uts/common/inet/ip/tnet.c index e8c7b0c6e2..37a7402d52 100644 --- a/usr/src/uts/common/inet/ip/tnet.c +++ b/usr/src/uts/common/inet/ip/tnet.c @@ -692,7 +692,7 @@ tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira) const void *src; const ip6_t *ip6h; cred_t *credp; - int proto; + int proto; ASSERT(DB_TYPE(mp) == M_DATA); @@ -1477,6 +1477,9 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira) */ af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6; + ipha = NULL; + ip6h = NULL; + gw_rhtp = NULL; if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { ASSERT(ire->ire_ipversion == IPV4_VERSION); diff --git a/usr/src/uts/common/inet/sctp/sctp_asconf.c b/usr/src/uts/common/inet/sctp/sctp_asconf.c index f5edd1994f..db770df30e 100644 --- a/usr/src/uts/common/inet/sctp/sctp_asconf.c +++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c @@ -47,7 +47,7 @@ typedef struct sctp_asconf_s { mblk_t *head; - uint32_t cid; + uint32_t cid; } sctp_asconf_t; /* @@ -636,6 +636,12 @@ sctp_input_asconf_ack(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp) ASSERT(ch->sch_id == CHUNK_ASCONF_ACK); + ainfo = NULL; + alist = NULL; + dlist = NULL; + aptr = NULL; + dptr = NULL; + snp = (uint32_t *)(ch + 1); rlen = ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*snp); if (rlen < 0) { @@ -915,9 +921,9 @@ sctp_wput_asconf(sctp_t *sctp, sctp_faddr_t *fp) { #define SCTP_SET_SENT_FLAG(mp) ((mp)->b_flag = SCTP_CHUNK_FLAG_SENT) - mblk_t *mp; + mblk_t *mp; mblk_t *ipmp; - uint32_t *snp; + uint32_t *snp; sctp_parm_hdr_t *ph; boolean_t isv4; sctp_stack_t *sctps = sctp->sctp_sctps; @@ -1467,6 +1473,7 @@ sctp_add_ip(sctp_t *sctp, const void *addrs, uint32_t cnt) * If deleting: * o Must be part of the association */ + sin6 = NULL; for (i = 0; i < cnt; i++) { switch (connp->conn_family) { case AF_INET: diff --git a/usr/src/uts/common/inet/sctp/sctp_common.c b/usr/src/uts/common/inet/sctp/sctp_common.c index ef60f6d26a..a640ead3d1 100644 --- a/usr/src/uts/common/inet/sctp/sctp_common.c +++ b/usr/src/uts/common/inet/sctp/sctp_common.c @@ -804,6 +804,8 @@ sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp) { sctp_faddr_t *fpp; + fpp = NULL; + if (!sctp->sctp_faddrs) { return; } diff --git a/usr/src/uts/common/inet/sctp/sctp_cookie.c b/usr/src/uts/common/inet/sctp/sctp_cookie.c index 53c35183dc..da86faa252 100644 --- a/usr/src/uts/common/inet/sctp/sctp_cookie.c +++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c @@ -427,10 +427,10 @@ sctp_initialize_params(sctp_t *sctp, sctp_init_chunk_t *init, /* * Copy the peer's original source address into addr. This relies on the * following format (see sctp_send_initack() below): - * relative timestamp for the cookie (int64_t) + - * cookie lifetime (uint32_t) + - * local tie-tag (uint32_t) + peer tie-tag (uint32_t) + - * Peer's original src ... + * relative timestamp for the cookie (int64_t) + + * cookie lifetime (uint32_t) + + * local tie-tag (uint32_t) + peer tie-tag (uint32_t) + + * Peer's original src ... */ int cl_sctp_cookie_paddr(sctp_chunk_hdr_t *ch, in6_addr_t *addr) @@ -454,7 +454,7 @@ cl_sctp_cookie_paddr(sctp_chunk_hdr_t *ch, in6_addr_t *addr) sizeof (int64_t) + /* timestamp */ \ sizeof (uint32_t) + /* cookie lifetime */ \ sizeof (sctp_init_chunk_t) + /* INIT ACK */ \ - sizeof (in6_addr_t) + /* peer's original source */ \ + sizeof (in6_addr_t) + /* peer's original source */ \ ntohs((initcp)->sch_len) + /* peer's INIT */ \ sizeof (uint32_t) + /* local tie-tag */ \ sizeof (uint32_t) + /* peer tie-tag */ \ @@ -946,6 +946,8 @@ sctp_send_cookie_echo(sctp_t *sctp, sctp_chunk_hdr_t *iackch, mblk_t *iackmp, uint16_t old_num_str; sctp_stack_t *sctps = sctp->sctp_sctps; + sdc = NULL; + seglen = 0; iack = (sctp_init_chunk_t *)(iackch + 1); cph = NULL; diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c index 1b6449cfab..7d856fab28 100644 --- a/usr/src/uts/common/inet/sctp/sctp_input.c +++ b/usr/src/uts/common/inet/sctp/sctp_input.c @@ -831,7 +831,7 @@ sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp, * there is a break in the sequence. We want * to chop the reassembly list as follows (the * numbers are TSNs): - * 10 -> 11 -> (end of chunks) + * 10 -> 11 -> (end of chunks) * 10 -> 11 -> | 13 (break in sequence) */ prev = mp; @@ -943,6 +943,7 @@ sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error, uint32_t tsn; uint16_t fraglen = 0; + reassq_curr = NULL; *error = 0; /* diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c index 23abeccf96..476a6d921e 100644 --- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c @@ -1057,7 +1057,10 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, /* In all cases, the size of the option must be bigger than int */ if (inlen >= sizeof (int32_t)) { onoff = ONOFF(*i1); + } else { + return (EINVAL); } + retval = 0; RUN_SCTP(sctp); diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c index eced6eccba..0564f5a416 100644 --- a/usr/src/uts/common/inet/sctp/sctp_output.c +++ b/usr/src/uts/common/inet/sctp/sctp_output.c @@ -990,8 +990,8 @@ sctp_output(sctp_t *sctp, uint_t num_pkt) mblk_t *head; mblk_t *meta = sctp->sctp_xmit_tail; mblk_t *fill = NULL; - uint16_t chunklen; - uint32_t cansend; + uint16_t chunklen; + uint32_t cansend; int32_t seglen; int32_t xtralen; int32_t sacklen; @@ -1007,6 +1007,8 @@ sctp_output(sctp_t *sctp, uint_t num_pkt) sctp_stack_t *sctps = sctp->sctp_sctps; uint32_t tsn; + lfp = NULL; + if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) { sacklen = 0; } else { @@ -1651,7 +1653,7 @@ sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp) * - the chunk is unsent, i.e. new data. */ #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \ - (!SCTP_CHUNK_ABANDONED((mp)) && \ + (!SCTP_CHUNK_ABANDONED((mp)) && \ ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \ !SCTP_CHUNK_ISACKED(mp))) || \ (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \ @@ -1694,7 +1696,7 @@ sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp) * * if the advanced peer ack point includes the next * chunk to be retransmited - possibly the Forward - * TSN was lost. + * TSN was lost. * * if we are PRSCTP aware and the next chunk to be * retransmitted is now abandoned diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c index ec2a5d4e29..876e7d48e6 100644 --- a/usr/src/uts/common/inet/tcp/tcp_bind.c +++ b/usr/src/uts/common/inet/tcp/tcp_bind.c @@ -324,7 +324,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr, boolean_t bind_to_req_port_only, cred_t *cr) { in_port_t mlp_port; - mlp_type_t addrtype, mlptype; + mlp_type_t addrtype, mlptype; boolean_t user_specified; in_port_t allocated_port; in_port_t requested_port = *requested_port_ptr; @@ -333,6 +333,7 @@ tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr, tcp_stack_t *tcps = tcp->tcp_tcps; in6_addr_t v6addr = connp->conn_laddr_v6; + zone = NULL; /* * XXX It's up to the caller to specify bind_to_req_port_only or not. */ @@ -697,7 +698,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, if (connp->conn_anon_priv_bind) { /* * loopmax = - * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1 + * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1 */ loopmax = IPPORT_RESERVED - tcps->tcps_min_anonpriv_port; diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index e73c34de34..f2cb8f6dbd 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -160,7 +160,7 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcpha_t *tcpha) if (!tcp->tcp_unfusable && !peer_tcp->tcp_unfusable && tcp->tcp_xmit_head == NULL && peer_tcp->tcp_xmit_head == NULL) { - mblk_t *mp; + mblk_t *mp = NULL; queue_t *peer_rq = peer_connp->conn_rq; ASSERT(!TCP_IS_DETACHED(peer_tcp)); diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c index ece2abbc04..0aaad871ba 100644 --- a/usr/src/uts/common/inet/tcp/tcp_input.c +++ b/usr/src/uts/common/inet/tcp/tcp_input.c @@ -2469,6 +2469,7 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) tcp_unfuse(tcp); } + mss = 0; iphdr = mp->b_rptr; rptr = mp->b_rptr; ASSERT(OK_32PTR(rptr)); diff --git a/usr/src/uts/common/inet/tcp/tcp_misc.c b/usr/src/uts/common/inet/tcp/tcp_misc.c index 4f6399c433..0896dd7611 100644 --- a/usr/src/uts/common/inet/tcp/tcp_misc.c +++ b/usr/src/uts/common/inet/tcp/tcp_misc.c @@ -291,6 +291,7 @@ tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count, startover: nmatch = 0; + last = NULL; mutex_enter(&connfp->connf_lock); for (tconnp = connfp->connf_head; tconnp != NULL; diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c index ae9efe863d..7a0472f3dd 100644 --- a/usr/src/uts/common/inet/tcp/tcp_output.c +++ b/usr/src/uts/common/inet/tcp/tcp_output.c @@ -1787,7 +1787,7 @@ tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, uint32_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time) { int num_lso_seg = 1; - uint_t lso_usable; + uint_t lso_usable = 0; boolean_t do_lso_send = B_FALSE; tcp_stack_t *tcps = tcp->tcp_tcps; conn_t *connp = tcp->tcp_connp; diff --git a/usr/src/uts/common/inet/tcp/tcp_tpi.c b/usr/src/uts/common/inet/tcp/tcp_tpi.c index dbdc5b8dc7..6b32a0ad27 100644 --- a/usr/src/uts/common/inet/tcp/tcp_tpi.c +++ b/usr/src/uts/common/inet/tcp/tcp_tpi.c @@ -154,6 +154,10 @@ tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp, opt_offset = tcresp->OPT_offset; opt_lenp = (t_scalar_t *)&tcresp->OPT_length; break; + default: + opt_lenp = 0; + opt_offset = 0; + break; } *t_errorp = 0; diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c index 165adcb852..b2183405eb 100644 --- a/usr/src/uts/common/inet/udp/udp.c +++ b/usr/src/uts/common/inet/udp/udp.c @@ -4984,6 +4984,8 @@ udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, mlp_type_t addrtype, mlptype; udp_stack_t *us = udp->udp_us; + sin = NULL; + sin6 = NULL; switch (len) { case sizeof (sin_t): /* Complete IPv4 address */ sin = (sin_t *)sa; @@ -5697,6 +5699,10 @@ udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, udp = connp->conn_udp; us = udp->udp_us; + sin = NULL; + sin6 = NULL; + v4dst = INADDR_ANY; + flowinfo = 0; /* * Address has been verified by the caller diff --git a/usr/src/uts/common/inet/udp/udp_stats.c b/usr/src/uts/common/inet/udp/udp_stats.c index 2f5202f693..4ed1ab9773 100644 --- a/usr/src/uts/common/inet/udp/udp_stats.c +++ b/usr/src/uts/common/inet/udp/udp_stats.c @@ -93,7 +93,12 @@ udp_snmp_get(queue_t *q, mblk_t *mpctl, boolean_t legacy_req) */ mp2ctl = copymsg(mpctl); - mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; + mp6_info_ctl = NULL; + mp6_attr_ctl = NULL; + mp6_conn_ctl = NULL; + mp_info_ctl = NULL; + mp_attr_ctl = NULL; + mp_conn_ctl = NULL; if (mpctl == NULL || (mpdata = mpctl->b_cont) == NULL || (mp_conn_ctl = copymsg(mpctl)) == NULL || diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 9097e059b5..82f3989f42 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* @@ -1442,8 +1442,7 @@ aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, grp->lg_rx_group_count = 1; - for (i = 0, port = grp->lg_ports; port != NULL; - i++, port = port->lp_next) { + for (port = grp->lg_ports; port != NULL; port = port->lp_next) { uint_t num_rgroups; mac_perim_enter_by_mh(port->lp_mh, &mph); diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index c8dbe00336..e764dd104e 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -22,7 +22,7 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c index d6200a93b4..b52483d3de 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c @@ -25,7 +25,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved. @@ -6546,8 +6546,10 @@ ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid) } vlp = ixgbe_find_vlan(rx_group, vid); - if (vlp == NULL) + if (vlp == NULL) { + mutex_exit(&ixgbe->gen_lock); return (ENOENT); + } /* * See the comment in ixgbe_addvlan() about is_def_grp and @@ -6601,8 +6603,10 @@ ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid) /* This shouldn't fail, but if it does return EIO. */ ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, B_TRUE); - if (ret != IXGBE_SUCCESS) + if (ret != IXGBE_SUCCESS) { + mutex_exit(&ixgbe->gen_lock); return (EIO); + } } } diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h index 0dbb3288c3..cfd987787a 100644 --- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h @@ -92,6 +92,7 @@ extern "C" { #define MAX_NUM_UNICAST_ADDRESSES 0x80 #define MAX_NUM_MULTICAST_ADDRESSES 0x1000 #define MAX_NUM_VLAN_FILTERS 0x40 + #define IXGBE_INTR_NONE 0 #define IXGBE_INTR_MSIX 1 #define IXGBE_INTR_MSI 2 diff --git a/usr/src/uts/common/io/ldterm.c b/usr/src/uts/common/io/ldterm.c index 97a9c1a478..46669ace0c 100644 --- a/usr/src/uts/common/io/ldterm.c +++ b/usr/src/uts/common/io/ldterm.c @@ -22,7 +22,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2018, Joyent, Inc. - * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -4087,7 +4087,8 @@ ldterm_dosig(queue_t *q, int sig, uchar_t c, int mtype, int mode) if (c != '\0') { if ((tp->t_echomp = allocb(4, BPRI_HI)) != NULL) { - if (ldterm_echo(c, WR(q), 4, tp) > 0) + if (ldterm_echo(c, WR(q), 4, tp) > 0 || + (tp->t_state & TS_ISPTSTTY)) putnext(WR(q), tp->t_echomp); else freemsg(tp->t_echomp); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index f4074a2b91..d698862d81 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2015 Garrett D'Amore <garrett@damore.org> */ @@ -1648,7 +1648,8 @@ mac_hwrings_idx_get(mac_handle_t mh, uint_t idx, mac_group_handle_t *hwgh, if (rtype == MAC_RING_TYPE_RX) { grp = mip->mi_rx_groups; - } else if (rtype == MAC_RING_TYPE_TX) { + } else { + ASSERT(rtype == MAC_RING_TYPE_TX); grp = mip->mi_tx_groups; } @@ -5536,6 +5537,11 @@ mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr, return (0); } + /* + * We failed to set promisc mode and we are about to free 'map'. + */ + map->ma_nusers = 0; + bail: if (hw_vlan) { int err2 = mac_group_remvlan(group, vid); @@ -5591,6 +5597,8 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) if (map->ma_nusers > 0) return (0); + VERIFY3S(map->ma_nusers, ==, 0); + /* * The MAC address is no longer used by any MAC client, so * remove it from its associated group. Turn off promiscuous @@ -5615,7 +5623,16 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) * If we fail to remove the MAC address HW * filter but then also fail to re-add the * VLAN HW filter then we are in a busted - * state and should just crash. + * state. We do our best by logging a warning + * and returning the original 'err' that got + * us here. At this point, traffic for this + * address + VLAN combination will be dropped + * until the user reboots the system. In the + * future, it would be nice to have a system + * that can compare the state of expected + * classification according to mac to the + * actual state of the provider, and report + * and fix any inconsistencies. */ if (MAC_GROUP_HW_VLAN(group)) { int err2; @@ -5629,6 +5646,7 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) } } + map->ma_nusers = 1; return (err); } @@ -5642,8 +5660,10 @@ mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) map->ma_type, __FILE__, __LINE__); } - if (err != 0) + if (err != 0) { + map->ma_nusers = 1; return (err); + } /* * We created MAC address for the primary one at registration, so we diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c index e26a028243..dcfb4803d6 100644 --- a/usr/src/uts/common/io/mac/mac_client.c +++ b/usr/src/uts/common/io/mac/mac_client.c @@ -23,6 +23,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright 2017 RackTop Systems. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -1287,7 +1288,7 @@ mac_addr_random(mac_client_handle_t mch, uint_t prefix_len, prefix_len, addr_len - prefix_len); } - *diag = 0; + *diag = MAC_DIAG_NONE; return (0); } @@ -2551,6 +2552,8 @@ i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags, */ ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE))); + *diag = MAC_DIAG_NONE; + /* * Can't unicast add if the client asked only for minimal datapath * setup. diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c index 656c598e53..e1dbf9a953 100644 --- a/usr/src/uts/common/io/mac/mac_datapath_setup.c +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c @@ -2892,8 +2892,8 @@ mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent, mac_group_t *default_rgroup; mac_group_t *default_tgroup; int err; - uint8_t *mac_addr; uint16_t vid; + uint8_t *mac_addr; mac_group_state_t next_state; mac_client_impl_t *group_only_mcip; mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c index 2c4ac0a1af..0f917cd8ca 100644 --- a/usr/src/uts/common/io/mac/mac_provider.c +++ b/usr/src/uts/common/io/mac/mac_provider.c @@ -735,7 +735,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) { mac_impl_t *mip = (mac_impl_t *)mh; mac_ring_t *mr = (mac_ring_t *)mrh; - mac_soft_ring_set_t *mac_srs; + mac_soft_ring_set_t *mac_srs; mblk_t *bp = mp_chain; /* diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c index 2244218f20..e1151565a6 100644 --- a/usr/src/uts/common/io/mac/mac_stat.c +++ b/usr/src/uts/common/io/mac/mac_stat.c @@ -262,7 +262,7 @@ static stat_info_t rx_srs_stats_list[] = { {RX_SRS_STAT_OFF(mrs_chaincntover50)}, {RX_SRS_STAT_OFF(mrs_ierrors)} }; -#define RX_SRS_STAT_SIZE \ +#define RX_SRS_STAT_SIZE \ (sizeof (rx_srs_stats_list) / sizeof (stat_info_t)) #define TX_SOFTRING_STAT_OFF(f) (offsetof(mac_tx_stats_t, f)) @@ -274,14 +274,14 @@ static stat_info_t tx_softring_stats_list[] = { {TX_SOFTRING_STAT_OFF(mts_unblockcnt)}, {TX_SOFTRING_STAT_OFF(mts_sdrops)}, }; -#define TX_SOFTRING_STAT_SIZE \ +#define TX_SOFTRING_STAT_SIZE \ (sizeof (tx_softring_stats_list) / sizeof (stat_info_t)) static void i_mac_add_stats(void *sum, void *op1, void *op2, stat_info_t stats_list[], uint_t size) { - int i; + int i; for (i = 0; i < size; i++) { uint64_t *op1_val = (uint64_t *) @@ -679,8 +679,8 @@ i_mac_rx_hwlane_stat_create(mac_soft_ring_set_t *mac_srs, const char *modname, static uint64_t i_mac_misc_stat_get(void *handle, uint_t stat) { - flow_entry_t *flent = handle; - mac_client_impl_t *mcip = flent->fe_mcip; + flow_entry_t *flent = handle; + mac_client_impl_t *mcip = flent->fe_mcip; mac_misc_stats_t *mac_misc_stat = &mcip->mci_misc_stat; mac_rx_stats_t *mac_rx_stat; mac_tx_stats_t *mac_tx_stat; @@ -871,9 +871,9 @@ i_mac_tx_hwlane_stat_create(mac_soft_ring_t *ringp, const char *modname, static uint64_t i_mac_rx_fanout_stat_get(void *handle, uint_t stat) { - mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle; + mac_soft_ring_t *tcp_ringp = (mac_soft_ring_t *)handle; mac_soft_ring_t *udp_ringp = NULL, *oth_ringp = NULL; - mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set; + mac_soft_ring_set_t *mac_srs = tcp_ringp->s_ring_set; int index; uint64_t val; @@ -1037,7 +1037,7 @@ void mac_srs_stat_create(mac_soft_ring_set_t *mac_srs) { flow_entry_t *flent = mac_srs->srs_flent; - char statname[MAXNAMELEN]; + char statname[MAXNAMELEN]; boolean_t is_tx_srs; /* No hardware/software lanes for user defined flows */ diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c new file mode 100644 index 0000000000..12a8d52b3f --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx.c @@ -0,0 +1,2765 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * Mellanox Connect-X 4/5/6 driver. + */ + +/* + * The PRM for this family of parts is freely available, and can be found at: + * https://www.mellanox.com/related-docs/user_manuals/ \ + * Ethernet_Adapters_Programming_Manual.pdf + */ +/* + * ConnectX glossary + * ----------------- + * + * WR Work Request: something we've asked the hardware to do by + * creating a Work Queue Entry (WQE), e.g. send or recv a packet + * + * WQE Work Queue Entry: a descriptor on a work queue descriptor ring + * + * WQ Work Queue: a descriptor ring that we can place WQEs on, usually + * either a Send Queue (SQ) or Receive Queue (RQ). Different WQ + * types have different WQE structures, different commands for + * creating and destroying them, etc, but share a common context + * structure, counter setup and state graph. + * SQ Send Queue, a specific type of WQ that sends packets + * RQ Receive Queue, a specific type of WQ that receives packets + * + * CQ Completion Queue: completion of WRs from a WQ are reported to + * one of these, as a CQE on its entry ring. + * CQE Completion Queue Entry: an entry in a CQ ring. Contains error + * info, as well as packet size, the ID of the WQ, and the index + * of the WQE which completed. Does not contain any packet data. + * + * EQ Event Queue: a ring of event structs from the hardware informing + * us when particular events happen. Many events can point at a + * a particular CQ which we should then go look at. + * EQE Event Queue Entry: an entry on the EQ ring + * + * UAR User Access Region, a page of the device's PCI BAR which is + * tied to particular EQ/CQ/WQ sets and contains doorbells to + * ring to arm them for interrupts or wake them up for new work + * + * RQT RQ Table, a collection of indexed RQs used to refer to the group + * as a single unit (for e.g. hashing/RSS). + * + * TIR Transport Interface Recieve, a bucket of resources for the + * reception of packets. TIRs have to point at either a single RQ + * or a table of RQs (RQT). They then serve as a target for flow + * table entries (FEs). TIRs that point at an RQT also contain the + * settings for hashing for RSS. + * + * TIS Transport Interface Send, a bucket of resources associated with + * the transmission of packets. In particular, the temporary + * resources used for LSO internally in the card are accounted to + * a TIS. + * + * FT Flow Table, a collection of FEs and FGs that can be referred to + * as a single entity (e.g. used as a target from another flow + * entry or set as the "root" table to handle incoming or outgoing + * packets). Packets arriving at a FT are matched against the + * FEs in the table until either one matches with a terminating + * action or all FEs are exhausted (it's first-match-wins but with + * some actions that are non-terminal, like counting actions). + * + * FG Flow Group, a group of FEs which share a common "mask" (i.e. + * they match on the same attributes of packets coming into the + * flow). + * + * FE Flow Entry, an individual set of values to match against + * packets entering the flow table, combined with an action to + * take upon a successful match. The action we use most is + * "forward", which sends the packets to a TIR or another flow + * table and then stops further processing within the FE's FT. + * + * lkey/mkey A reference to something similar to a page table but in the + * device's internal onboard MMU. Since Connect-X parts double as + * IB cards (lots of RDMA) they have extensive onboard memory mgmt + * features which we try very hard not to use. For our WQEs we use + * the "reserved" lkey, which is a special value which indicates + * that addresses we give are linear addresses and should not be + * translated. + * + * PD Protection Domain, an IB concept. We have to allocate one to + * provide as a parameter for new WQs, but we don't do anything + * with it. + * + * TDOM/TD Transport Domain, an IB concept. We allocate one in order to + * provide it as a parameter to TIR/TIS creation, but we don't do + * anything with it. + */ +/* + * + * Data flow overview + * ------------------ + * + * This driver is a MAC ring-enabled driver which maps rings to send and recv + * queues in hardware on the device. + * + * Each SQ and RQ is set up to report to its own individual CQ, to ensure + * sufficient space, and simplify the logic needed to work out which buffer + * was completed. + * + * The CQs are then round-robin allocated onto EQs, of which we set up one per + * interrupt that the system gives us for the device. Normally this means we + * have 8 EQs. + * + * When we have >= 8 EQs available, we try to allocate only RX or only TX + * CQs on each one. The EQs are chosen for RX and TX in an alternating fashion. + * + * EQ #0 is reserved for all event types other than completion events, and has + * no CQs associated with it at any time. EQs #1 and upwards are only used for + * handling CQ completion events. + * + * +------+ +------+ +------+ +---------+ + * | SQ 0 |---->| CQ 0 |-----+ | EQ 0 |------> | MSI-X 0 | mlxcx_intr_0 + * +------+ +------+ | +------+ +---------+ + * | + * +------+ +------+ | + * | SQ 1 |---->| CQ 1 |---+ | +------+ + * +------+ +------+ | +---> | | + * | | | + * +------+ +------+ | | EQ 1 | +---------+ + * | SQ 2 |---->| CQ 2 |---------> | |------> | MSI-X 1 | mlxcx_intr_n + * +------+ +------+ | +---> | | +---------+ + * | | +------+ + * | | + * ... | | + * | | +------+ + * +------+ +------+ +-----> | | + * | RQ 0 |---->| CQ 3 |---------> | | +---------+ + * +------+ +------+ | | EQ 2 |------> | MSI-X 2 | mlxcx_intr_n + * | | | +---------+ + * +------+ +------+ | +-> | | + * | RQ 1 |---->| CQ 4 |-----+ | +------+ + * +------+ +------+ | + * | .... + * +------+ +------+ | + * | RQ 2 |---->| CQ 5 |-------+ + * +------+ +------+ + * + * ... (note this diagram does not show RX-only or TX-only EQs) + * + * For TX, we advertise all of the SQs we create as plain rings to MAC with + * no TX groups. This puts MAC in "virtual group" mode where it will allocate + * and use the rings as it sees fit. + * + * For RX, we advertise actual groups in order to make use of hardware + * classification. + * + * The hardware classification we use is based around Flow Tables, and we + * currently ignore all of the eswitch features of the card. The NIC VPORT + * is always set to promisc mode so that the eswitch sends us all of the + * traffic that arrives on the NIC, and we use flow entries to manage + * everything. + * + * We use 2 layers of flow tables for classification: traffic arrives at the + * root RX flow table which contains MAC address filters. Those then send + * matched traffic to the per-group L1 VLAN filter tables which contain VLAN + * presence and VID filters. + * + * Since these parts only support doing RSS hashing on a single protocol at a + * time, we have to use a third layer of flow tables as well to break traffic + * down by L4 and L3 protocol (TCPv6, TCPv4, UDPv6, UDPv4, IPv6, IPv4 etc) + * so that it can be sent to the appropriate TIR for hashing. + * + * Incoming packets + * + +---------+ +---------+ + * | +->| group 0 | | group 0 | + * | | | vlan ft | +-->| hash ft | + * v | | L1 | | | L2 | + * +----+----+ | +---------+ | +---------+ +-----+ +-----+------+ + * | eswitch | | | | | | TCPv6 |--->| TIR |--->| | RQ0 | + * +----+----+ | | | | +---------+ +-----+ | +------+ + * | | | | | | UDPv6 |--->| TIR |--->| | RQ1 | + * | | | | | +---------+ +-----+ | +------+ + * | | | | | | TCPv4 |--->| TIR |--->| | RQ2 | + * v | | | | +---------+ +-----+ | RQT +------+ + * +----+----+ | +---------+ | | UDPv4 |--->| TIR |--->| | ... | + * | root rx | | | default |--+ +---------+ +-----+ | | | + * | flow tb | | +---------+ | | IPv6 |--->| TIR |--->| | | + * | L0 | | | promisc |--+ +---------+ +-----+ | | | + * +---------+ | +---------+ ^ | IPv4 |--->| TIR |--->| | | + * | bcast |---|---------------+ +---------+ +-----+ +-----+------+ + * +---------+ | ^ | other |-+ + * | MAC 0 |---+ | +---------+ | +-----+ +-----+ + * +---------+ | +->| TIR |--->| RQ0 | + * | MAC 1 |-+ | +-----+ +-----+ + * +---------+ | +---------------+ + * | MAC 2 |-+ | ^ + * +---------+ | | | + * | MAC 3 |-+ | +---------+ | +---------+ + * +---------+ | | | group 1 | | | group 1 | + * | ..... | +--->| vlan ft | | +>| hash ft | + * | | | | L1 | | | | L2 | + * +---------+ | +---------+ | | +---------+ +-----+ +-----+------+ + * | promisc |---+ | VLAN 0 |----+ | TCPv6 |--->| TIR |--->| | RQ3 | + * +---------+ +---------+ | +---------+ +-----+ | +------+ + * | ..... | | | UDPv6 |--->| TIR |--->| | RQ4 | + * | | | +---------+ +-----+ | +------+ + * | | | | TCPv4 |--->| TIR |--->| | RQ5 | + * | | | +---------+ +-----+ | RQT +------+ + * +---------+ | | UDPv4 |--->| TIR |--->| | ... | + * | | | +---------+ +-----+ | | | + * +---------+ | | IPv6 |--->| TIR |--->| | | + * | promisc |--+ +---------+ +-----+ | | | + * +---------+ | IPv4 |--->| TIR |--->| | | + * +---------+ +-----+ +-----+------+ + * | other |-+ + * +---------+ | + * ....... | +-----+ +-----+ + * +->| TIR |--->| RQ3 | + * +-----+ +-----+ + * + * Note that the "promisc" flow entries are only set/enabled when promisc + * mode is enabled for the NIC. All promisc flow entries point directly at + * group 0's hashing flowtable (so all promisc-only traffic lands on group 0, + * the "default group" in MAC). + * + * The "default" entry in the L1 VLAN filter flow tables is used when there + * are no VLANs set for the group, to accept any traffic regardless of tag. It + * is deleted as soon as a VLAN filter is added (and re-instated if the + * last VLAN filter is removed). + * + * The actual descriptor ring structures for RX on Connect-X4 don't contain any + * space for packet data (they're a collection of scatter pointers only). TX + * descriptors contain some space for "inline headers" (and the card requires + * us to put at least the L2 Ethernet headers there for the eswitch to look at) + * but all the rest of the data comes from the gather pointers. + * + * When we get completions back they simply contain the ring index number of + * the WR (work request) which completed. So, we manage the buffers for actual + * packet data completely independently of the descriptors in this driver. When + * a WR is enqueued in a WQE (work queue entry), we stamp the packet data buffer + * with the WQE index that we put it at, and therefore don't have to look at + * the original descriptor at all when handling completions. + * + * For RX, we create sufficient packet data buffers to fill 150% of the + * available descriptors for each ring. These all are pre-set-up for DMA and + * have an mblk_t associated with them (with desballoc()). + * + * For TX we either borrow the mblk's memory and DMA bind it (if the packet is + * large enough), or we copy it into a pre-allocated buffer set up in the same + * as as for RX. + */ + +/* + * Buffer lifecycle: RX + * -------------------- + * + * The lifecycle of an mlxcx_buffer_t (packet buffer) used for RX is pretty + * straightforward. + * + * It is created (and has all its memory allocated) at the time of starting up + * the RX ring it belongs to. Then it is placed on the "free" list in the + * mlxcx_buffer_shard_t associated with its RQ. When mlxcx_rq_refill() wants + * more buffers to add to the RQ, it takes one off and marks it as "on WQ" + * before making a WQE for it. + * + * After a completion event occurs, the packet is either discarded (and the + * buffer_t returned to the free list), or it is readied for loaning to MAC. + * + * Once MAC and the rest of the system have finished with the packet, they call + * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the + * buffer_t to the free list. + * + * At detach/teardown time, buffers are only every destroyed from the free list. + * + * + * + + * | + * | mlxcx_buf_create + * | + * v + * +----+----+ + * | created | + * +----+----+ + * | + * | + * | mlxcx_buf_return + * | + * v + * mlxcx_buf_destroy +----+----+ + * +---------| free |<---------------+ + * | +----+----+ | + * | | | + * | | | mlxcx_buf_return + * v | mlxcx_buf_take | + * +---+--+ v | + * | dead | +---+---+ | + * +------+ | on WQ |- - - - - - - - >O + * +---+---+ ^ + * | | + * | | + * | mlxcx_buf_loan | mlxcx_buf_mp_return + * v | + * +-------+--------+ | + * | on loan to MAC |----------->O + * +----------------+ freemsg() + * + */ + +/* + * Buffer lifecycle: TX + * -------------------- + * + * mlxcx_buffer_ts used for TX are divided into two kinds: regular buffers, and + * "foreign" buffers. + * + * The former have their memory allocated and DMA bound by this driver, while + * the latter (the "foreign" buffers) are on loan from MAC. Their memory is + * not owned by us, though we do DMA bind it (and take responsibility for + * un-binding it when we're done with them). + * + * We use separate mlxcx_buf_shard_ts for foreign and local buffers on each + * SQ. Thus, there is a separate free list and mutex for each kind. + * + * Since a TX packet might consist of multiple mblks, we translate each mblk + * into exactly one buffer_t. The buffer_ts are chained together in the same + * order as the mblks, using the mlb_tx_chain/mlb_tx_chain_entry list_t. + * + * Each chain of TX buffers may consist of foreign or driver buffers, in any + * mixture. + * + * The head of a TX buffer chain has mlb_tx_head == itself, which distinguishes + * it from the rest of the chain buffers. + * + * TX buffer chains are always returned to the free list by + * mlxcx_buf_return_chain(), which takes care of walking the mlb_tx_chain and + * freeing all of the members. + * + * We only call freemsg() once, on the head of the TX buffer chain's original + * mblk. This is true whether we copied it or bound it in a foreign buffer. + */ + +/* + * Startup and command interface + * ----------------------------- + * + * The command interface is the primary way in which we give control orders to + * the hardware (e.g. actions like "create this queue" or "delete this flow + * entry"). The command interface is never used to transmit or receive packets + * -- that takes place only on the queues that are set up through it. + * + * In mlxcx_cmd.c we implement our use of the command interface on top of a + * simple taskq. Since it's not performance critical, we busy-wait on command + * completions and only process a single command at a time. + * + * If this becomes a problem later we can wire command completions up to EQ 0 + * once we have interrupts running. + * + * The startup/attach process for this card involves a bunch of different steps + * which are summarised pretty well in the PRM. We have to send a number of + * commands which do different things to start the card up, give it some pages + * of our own memory for it to use, then start creating all the entities that + * we need to use like EQs, CQs, WQs, as well as their dependencies like PDs + * and TDoms. + */ + +/* + * UARs + * ---- + * + * The pages of the PCI BAR other than the first few are reserved for use as + * "UAR" sections in this device. Each UAR section can be used as a set of + * doorbells for our queues. + * + * Currently we just make one single UAR for all of our queues. It doesn't + * seem to be a major limitation yet. + * + * When we're sending packets through an SQ, the PRM is not awful clear about + * exactly how we're meant to use the first 16 bytes of the Blueflame buffers + * (it's clear on the pattern of alternation you're expected to use between + * even and odd for Blueflame sends, but not for regular doorbells). + * + * Currently we don't do the even-odd alternating pattern for ordinary + * doorbells, and we don't use Blueflame at all. This seems to work fine, at + * least on Connect-X4 Lx. + */ + +/* + * Lock ordering + * ------------- + * + * Interrupt side: + * + * - mleq_mtx + * - mlcq_mtx + * - mlcq_bufbmtx + * - mlwq_mtx + * - mlbs_mtx + * - mlp_mtx + * + * GLD side: + * + * - mlp_mtx + * - mlg_mtx + * - mlg_*.mlft_mtx + * - mlp_*.mlft_mtx + * - mlwq_mtx + * - mlbs_mtx + * - mlcq_bufbmtx + * - mleq_mtx + * - mlcq_mtx + * + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/sysmacros.h> +#include <sys/time.h> + +#include <sys/mac_provider.h> + +#include <mlxcx.h> + +CTASSERT((1 << MLXCX_RX_HASH_FT_SIZE_SHIFT) >= MLXCX_TIRS_PER_GROUP); + +#define MLXCX_MODULE_NAME "mlxcx" +/* + * We give this to the firmware, so it has to be in a fixed format that it + * understands. + */ +#define MLXCX_DRIVER_VERSION "illumos,mlxcx,1.0.0,1,000,000000" + +/* + * Firmware may take a while to reclaim pages. Try a set number of times. + */ +clock_t mlxcx_reclaim_delay = 1000 * 50; /* 50 ms in us */ +uint_t mlxcx_reclaim_tries = 100; /* Wait at most 5000ms */ + +static void *mlxcx_softstate; + +/* + * Fault detection thresholds. + */ +uint_t mlxcx_doorbell_tries = MLXCX_DOORBELL_TRIES_DFLT; +uint_t mlxcx_stuck_intr_count = MLXCX_STUCK_INTR_COUNT_DFLT; + +static void +mlxcx_load_props(mlxcx_t *mlxp) +{ + mlxcx_drv_props_t *p = &mlxp->mlx_props; + + p->mldp_eq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "eq_size_shift", + MLXCX_EQ_SIZE_SHIFT_DFLT); + p->mldp_cq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cq_size_shift", + MLXCX_CQ_SIZE_SHIFT_DFLT); + p->mldp_sq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "sq_size_shift", + MLXCX_SQ_SIZE_SHIFT_DFLT); + p->mldp_rq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rq_size_shift", + MLXCX_RQ_SIZE_SHIFT_DFLT); + + p->mldp_cqemod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_period_usec", + MLXCX_CQEMOD_PERIOD_USEC_DFLT); + p->mldp_cqemod_count = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_count", + MLXCX_CQEMOD_COUNT_DFLT); + p->mldp_intrmod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "intrmod_period_usec", + MLXCX_INTRMOD_PERIOD_USEC_DFLT); + + p->mldp_tx_ngroups = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_ngroups", + MLXCX_TX_NGROUPS_DFLT); + p->mldp_tx_nrings_per_group = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_nrings_per_group", + MLXCX_TX_NRINGS_PER_GROUP_DFLT); + + p->mldp_rx_ngroups_large = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_large", + MLXCX_RX_NGROUPS_LARGE_DFLT); + p->mldp_rx_ngroups_small = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_small", + MLXCX_RX_NGROUPS_SMALL_DFLT); + p->mldp_rx_nrings_per_large_group = ddi_getprop(DDI_DEV_T_ANY, + mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, + "rx_nrings_per_large_group", MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT); + p->mldp_rx_nrings_per_small_group = ddi_getprop(DDI_DEV_T_ANY, + mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, + "rx_nrings_per_small_group", MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT); + + p->mldp_ftbl_root_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_root_size_shift", + MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT); + + p->mldp_tx_bind_threshold = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_bind_threshold", + MLXCX_TX_BIND_THRESHOLD_DFLT); + + p->mldp_ftbl_vlan_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_vlan_size_shift", + MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT); + + p->mldp_eq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY, + mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, + "eq_check_interval_sec", MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT); + p->mldp_cq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY, + mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, + "cq_check_interval_sec", MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT); + p->mldp_wq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY, + mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, + "wq_check_interval_sec", MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT); +} + +void +mlxcx_note(mlxcx_t *mlxp, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (mlxp != NULL && mlxp->mlx_dip != NULL) { + vdev_err(mlxp->mlx_dip, CE_NOTE, fmt, ap); + } else { + vcmn_err(CE_NOTE, fmt, ap); + } + va_end(ap); +} + +void +mlxcx_warn(mlxcx_t *mlxp, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (mlxp != NULL && mlxp->mlx_dip != NULL) { + vdev_err(mlxp->mlx_dip, CE_WARN, fmt, ap); + } else { + vcmn_err(CE_WARN, fmt, ap); + } + va_end(ap); +} + +void +mlxcx_panic(mlxcx_t *mlxp, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (mlxp != NULL && mlxp->mlx_dip != NULL) { + vdev_err(mlxp->mlx_dip, CE_PANIC, fmt, ap); + } else { + vcmn_err(CE_PANIC, fmt, ap); + } + va_end(ap); +} + +uint16_t +mlxcx_get16(mlxcx_t *mlxp, uintptr_t off) +{ + uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base; + return (ddi_get16(mlxp->mlx_regs_handle, (void *)addr)); +} + +uint32_t +mlxcx_get32(mlxcx_t *mlxp, uintptr_t off) +{ + uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base; + return (ddi_get32(mlxp->mlx_regs_handle, (void *)addr)); +} + +uint64_t +mlxcx_get64(mlxcx_t *mlxp, uintptr_t off) +{ + uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base; + return (ddi_get64(mlxp->mlx_regs_handle, (void *)addr)); +} + +void +mlxcx_put32(mlxcx_t *mlxp, uintptr_t off, uint32_t val) +{ + uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base; + ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val); +} + +void +mlxcx_put64(mlxcx_t *mlxp, uintptr_t off, uint64_t val) +{ + uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base; + ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val); +} + +void +mlxcx_uar_put32(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint32_t val) +{ + /* + * The UAR is always inside the first BAR, which we mapped as + * mlx_regs + */ + uintptr_t addr = off + (uintptr_t)mlu->mlu_base + + (uintptr_t)mlxp->mlx_regs_base; + ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val); +} + +void +mlxcx_uar_put64(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint64_t val) +{ + uintptr_t addr = off + (uintptr_t)mlu->mlu_base + + (uintptr_t)mlxp->mlx_regs_base; + ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val); +} + +static void +mlxcx_fm_fini(mlxcx_t *mlxp) +{ + if (mlxp->mlx_fm_caps == 0) + return; + + if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) + ddi_fm_handler_unregister(mlxp->mlx_dip); + + if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) || + DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) + pci_ereport_teardown(mlxp->mlx_dip); + + ddi_fm_fini(mlxp->mlx_dip); + + mlxp->mlx_fm_caps = 0; +} + +void +mlxcx_fm_ereport(mlxcx_t *mlxp, const char *detail) +{ + uint64_t ena; + char buf[FM_MAX_CLASS]; + + if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps)) + return; + + (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail); + ena = fm_ena_generate(0, FM_ENA_FMT1); + ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP, + FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, + NULL); +} + +static int +mlxcx_fm_errcb(dev_info_t *dip, ddi_fm_error_t *err, const void *arg) +{ + /* + * as the driver can always deal with an error in any dma or + * access handle, we can just return the fme_status value. + */ + pci_ereport_post(dip, err, NULL); + return (err->fme_status); +} + +static void +mlxcx_fm_init(mlxcx_t *mlxp) +{ + ddi_iblock_cookie_t iblk; + int def = DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE | + DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE; + + mlxp->mlx_fm_caps = ddi_prop_get_int(DDI_DEV_T_ANY, mlxp->mlx_dip, + DDI_PROP_DONTPASS, "fm_capable", def); + + if (mlxp->mlx_fm_caps < 0) { + mlxp->mlx_fm_caps = 0; + } + mlxp->mlx_fm_caps &= def; + + if (mlxp->mlx_fm_caps == 0) + return; + + ddi_fm_init(mlxp->mlx_dip, &mlxp->mlx_fm_caps, &iblk); + if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) || + DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) { + pci_ereport_setup(mlxp->mlx_dip); + } + if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) { + ddi_fm_handler_register(mlxp->mlx_dip, mlxcx_fm_errcb, + (void *)mlxp); + } +} + +static void +mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s) +{ + mlxcx_buffer_t *buf; + + mutex_enter(&s->mlbs_mtx); + while (!list_is_empty(&s->mlbs_busy)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + while ((buf = list_head(&s->mlbs_free)) != NULL) { + mlxcx_buf_destroy(mlxp, buf); + } + list_destroy(&s->mlbs_free); + list_destroy(&s->mlbs_busy); + mutex_exit(&s->mlbs_mtx); + + cv_destroy(&s->mlbs_free_nonempty); + mutex_destroy(&s->mlbs_mtx); +} + +static void +mlxcx_teardown_bufs(mlxcx_t *mlxp) +{ + mlxcx_buf_shard_t *s; + + while ((s = list_remove_head(&mlxp->mlx_buf_shards)) != NULL) { + mlxcx_mlbs_teardown(mlxp, s); + kmem_free(s, sizeof (mlxcx_buf_shard_t)); + } + list_destroy(&mlxp->mlx_buf_shards); + + kmem_cache_destroy(mlxp->mlx_bufs_cache); +} + +static void +mlxcx_teardown_pages(mlxcx_t *mlxp) +{ + uint_t nzeros = 0; + + mutex_enter(&mlxp->mlx_pagemtx); + + while (mlxp->mlx_npages > 0) { + int32_t req, ret; + uint64_t pas[MLXCX_MANAGE_PAGES_MAX_PAGES]; + + ASSERT0(avl_is_empty(&mlxp->mlx_pages)); + req = MIN(mlxp->mlx_npages, MLXCX_MANAGE_PAGES_MAX_PAGES); + + if (!mlxcx_cmd_return_pages(mlxp, req, pas, &ret)) { + mlxcx_warn(mlxp, "hardware refused to return pages, " + "leaking %u remaining pages", mlxp->mlx_npages); + goto out; + } + + for (int32_t i = 0; i < ret; i++) { + mlxcx_dev_page_t *mdp, probe; + bzero(&probe, sizeof (probe)); + probe.mxdp_pa = pas[i]; + + mdp = avl_find(&mlxp->mlx_pages, &probe, NULL); + + if (mdp != NULL) { + avl_remove(&mlxp->mlx_pages, mdp); + mlxp->mlx_npages--; + mlxcx_dma_free(&mdp->mxdp_dma); + kmem_free(mdp, sizeof (mlxcx_dev_page_t)); + } else { + mlxcx_panic(mlxp, "hardware returned a page " + "with PA 0x%" PRIx64 " but we have no " + "record of giving out such a page", pas[i]); + } + } + + /* + * If no pages were returned, note that fact. + */ + if (ret == 0) { + nzeros++; + if (nzeros > mlxcx_reclaim_tries) { + mlxcx_warn(mlxp, "hardware refused to return " + "pages, leaking %u remaining pages", + mlxp->mlx_npages); + goto out; + } + delay(drv_usectohz(mlxcx_reclaim_delay)); + } + } + + avl_destroy(&mlxp->mlx_pages); + +out: + mutex_exit(&mlxp->mlx_pagemtx); + mutex_destroy(&mlxp->mlx_pagemtx); +} + +static boolean_t +mlxcx_eq_alloc_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + boolean_t ret; + size_t sz, i; + + VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC); + + mleq->mleq_entshift = mlxp->mlx_props.mldp_eq_size_shift; + mleq->mleq_nents = (1 << mleq->mleq_entshift); + sz = mleq->mleq_nents * sizeof (mlxcx_eventq_ent_t); + ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0); + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_queue_attr(mlxp, &attr); + + ret = mlxcx_dma_alloc(mlxp, &mleq->mleq_dma, &attr, &acc, + B_TRUE, sz, B_TRUE); + if (!ret) { + mlxcx_warn(mlxp, "failed to allocate EQ memory"); + return (B_FALSE); + } + + mleq->mleq_ent = (mlxcx_eventq_ent_t *)mleq->mleq_dma.mxdb_va; + + for (i = 0; i < mleq->mleq_nents; ++i) + mleq->mleq_ent[i].mleqe_owner = MLXCX_EQ_OWNER_INIT; + + mleq->mleq_state |= MLXCX_EQ_ALLOC; + + return (B_TRUE); +} + +static void +mlxcx_eq_rele_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC); + if (mleq->mleq_state & MLXCX_EQ_CREATED) + VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED); + + mlxcx_dma_free(&mleq->mleq_dma); + mleq->mleq_ent = NULL; + + mleq->mleq_state &= ~MLXCX_EQ_ALLOC; +} + +void +mlxcx_teardown_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *ft) +{ + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe; + int i; + + ASSERT(mutex_owned(&ft->mlft_mtx)); + + for (i = ft->mlft_nents - 1; i >= 0; --i) { + fe = &ft->mlft_ent[i]; + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { + mlxcx_panic(mlxp, "failed to delete flow " + "entry %u on table %u", i, + ft->mlft_num); + } + } + } + + while ((fg = list_remove_head(&ft->mlft_groups)) != NULL) { + if (fg->mlfg_state & MLXCX_FLOW_GROUP_CREATED && + !(fg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED)) { + if (!mlxcx_cmd_destroy_flow_group(mlxp, fg)) { + mlxcx_panic(mlxp, "failed to destroy flow " + "group %u", fg->mlfg_num); + } + } + kmem_free(fg, sizeof (mlxcx_flow_group_t)); + } + list_destroy(&ft->mlft_groups); + if (ft->mlft_state & MLXCX_FLOW_TABLE_CREATED && + !(ft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED)) { + if (!mlxcx_cmd_destroy_flow_table(mlxp, ft)) { + mlxcx_panic(mlxp, "failed to destroy flow table %u", + ft->mlft_num); + } + } + kmem_free(ft->mlft_ent, ft->mlft_entsize); + ft->mlft_ent = NULL; + mutex_exit(&ft->mlft_mtx); + mutex_destroy(&ft->mlft_mtx); + kmem_free(ft, sizeof (mlxcx_flow_table_t)); +} + +static void +mlxcx_teardown_ports(mlxcx_t *mlxp) +{ + uint_t i; + mlxcx_port_t *p; + mlxcx_flow_table_t *ft; + + for (i = 0; i < mlxp->mlx_nports; ++i) { + p = &mlxp->mlx_ports[i]; + if (!(p->mlp_init & MLXCX_PORT_INIT)) + continue; + mutex_enter(&p->mlp_mtx); + if ((ft = p->mlp_rx_flow) != NULL) { + mutex_enter(&ft->mlft_mtx); + /* + * teardown_flow_table() will destroy the mutex, so + * we don't release it here. + */ + mlxcx_teardown_flow_table(mlxp, ft); + } + mutex_exit(&p->mlp_mtx); + mutex_destroy(&p->mlp_mtx); + p->mlp_init &= ~MLXCX_PORT_INIT; + } + + kmem_free(mlxp->mlx_ports, mlxp->mlx_ports_size); + mlxp->mlx_ports = NULL; +} + +static void +mlxcx_teardown_wqs(mlxcx_t *mlxp) +{ + mlxcx_work_queue_t *mlwq; + + while ((mlwq = list_head(&mlxp->mlx_wqs)) != NULL) { + mlxcx_wq_teardown(mlxp, mlwq); + } + list_destroy(&mlxp->mlx_wqs); +} + +static void +mlxcx_teardown_cqs(mlxcx_t *mlxp) +{ + mlxcx_completion_queue_t *mlcq; + + while ((mlcq = list_head(&mlxp->mlx_cqs)) != NULL) { + mlxcx_cq_teardown(mlxp, mlcq); + } + list_destroy(&mlxp->mlx_cqs); +} + +static void +mlxcx_teardown_eqs(mlxcx_t *mlxp) +{ + mlxcx_event_queue_t *mleq; + uint_t i; + + for (i = 0; i < mlxp->mlx_intr_count; ++i) { + mleq = &mlxp->mlx_eqs[i]; + mutex_enter(&mleq->mleq_mtx); + if ((mleq->mleq_state & MLXCX_EQ_CREATED) && + !(mleq->mleq_state & MLXCX_EQ_DESTROYED)) { + if (!mlxcx_cmd_destroy_eq(mlxp, mleq)) { + mlxcx_warn(mlxp, "failed to destroy " + "event queue idx %u eqn %u", + i, mleq->mleq_num); + } + } + if (mleq->mleq_state & MLXCX_EQ_ALLOC) { + mlxcx_eq_rele_dma(mlxp, mleq); + } + mutex_exit(&mleq->mleq_mtx); + } +} + +static void +mlxcx_teardown_checktimers(mlxcx_t *mlxp) +{ + if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0) + ddi_periodic_delete(mlxp->mlx_eq_checktimer); + if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0) + ddi_periodic_delete(mlxp->mlx_cq_checktimer); + if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0) + ddi_periodic_delete(mlxp->mlx_wq_checktimer); +} + +static void +mlxcx_teardown(mlxcx_t *mlxp) +{ + uint_t i; + dev_info_t *dip = mlxp->mlx_dip; + + if (mlxp->mlx_attach & MLXCX_ATTACH_GROUPS) { + mlxcx_teardown_groups(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_GROUPS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_CHKTIMERS) { + mlxcx_teardown_checktimers(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_CHKTIMERS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_WQS) { + mlxcx_teardown_wqs(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_WQS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_CQS) { + mlxcx_teardown_cqs(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_CQS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_BUFS) { + mlxcx_teardown_bufs(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_BUFS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_PORTS) { + mlxcx_teardown_ports(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_PORTS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_INTRS) { + mlxcx_teardown_eqs(mlxp); + mlxcx_intr_teardown(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_INTRS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_UAR_PD_TD) { + if (mlxp->mlx_uar.mlu_allocated) { + if (!mlxcx_cmd_dealloc_uar(mlxp, &mlxp->mlx_uar)) { + mlxcx_warn(mlxp, "failed to release UAR"); + } + for (i = 0; i < MLXCX_BF_PER_UAR; ++i) + mutex_destroy(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx); + } + if (mlxp->mlx_pd.mlpd_allocated && + !mlxcx_cmd_dealloc_pd(mlxp, &mlxp->mlx_pd)) { + mlxcx_warn(mlxp, "failed to release PD"); + } + if (mlxp->mlx_tdom.mltd_allocated && + !mlxcx_cmd_dealloc_tdom(mlxp, &mlxp->mlx_tdom)) { + mlxcx_warn(mlxp, "failed to release TDOM"); + } + mlxp->mlx_attach &= ~MLXCX_ATTACH_UAR_PD_TD; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_INIT_HCA) { + if (!mlxcx_cmd_teardown_hca(mlxp)) { + mlxcx_warn(mlxp, "failed to send teardown HCA " + "command during device detach"); + } + mlxp->mlx_attach &= ~MLXCX_ATTACH_INIT_HCA; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_PAGE_LIST) { + mlxcx_teardown_pages(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_PAGE_LIST; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_ENABLE_HCA) { + if (!mlxcx_cmd_disable_hca(mlxp)) { + mlxcx_warn(mlxp, "failed to send DISABLE HCA command " + "during device detach"); + } + mlxp->mlx_attach &= ~MLXCX_ATTACH_ENABLE_HCA; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_CMD) { + mlxcx_cmd_queue_fini(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_CMD; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_CAPS) { + kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t)); + mlxp->mlx_caps = NULL; + mlxp->mlx_attach &= ~MLXCX_ATTACH_CAPS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_REGS) { + ddi_regs_map_free(&mlxp->mlx_regs_handle); + mlxp->mlx_regs_handle = NULL; + mlxp->mlx_attach &= ~MLXCX_ATTACH_REGS; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_PCI_CONFIG) { + pci_config_teardown(&mlxp->mlx_cfg_handle); + mlxp->mlx_cfg_handle = NULL; + mlxp->mlx_attach &= ~MLXCX_ATTACH_PCI_CONFIG; + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_FM) { + mlxcx_fm_fini(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_FM; + } + + VERIFY3S(mlxp->mlx_attach, ==, 0); + ddi_soft_state_free(mlxcx_softstate, mlxp->mlx_inst); + ddi_set_driver_private(dip, NULL); +} + +static boolean_t +mlxcx_regs_map(mlxcx_t *mlxp) +{ + off_t memsize; + int ret; + ddi_device_acc_attr_t da; + + if (ddi_dev_regsize(mlxp->mlx_dip, MLXCX_REG_NUMBER, &memsize) != + DDI_SUCCESS) { + mlxcx_warn(mlxp, "failed to get register set size"); + return (B_FALSE); + } + + /* + * All data in the main BAR is kept in big-endian even though it's a PCI + * device. + */ + bzero(&da, sizeof (ddi_device_acc_attr_t)); + da.devacc_attr_version = DDI_DEVICE_ATTR_V0; + da.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC; + da.devacc_attr_dataorder = DDI_STRICTORDER_ACC; + if (DDI_FM_ACC_ERR_CAP(mlxp->mlx_fm_caps)) { + da.devacc_attr_access = DDI_FLAGERR_ACC; + } else { + da.devacc_attr_access = DDI_DEFAULT_ACC; + } + + ret = ddi_regs_map_setup(mlxp->mlx_dip, MLXCX_REG_NUMBER, + &mlxp->mlx_regs_base, 0, memsize, &da, &mlxp->mlx_regs_handle); + + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "failed to map device registers: %d", ret); + return (B_FALSE); + } + + return (B_TRUE); +} + +static boolean_t +mlxcx_check_issi(mlxcx_t *mlxp) +{ + uint32_t issi; + + if (!mlxcx_cmd_query_issi(mlxp, &issi)) { + mlxcx_warn(mlxp, "failed to get ISSI"); + return (B_FALSE); + } + + if ((issi & (1 << MLXCX_CURRENT_ISSI)) == 0) { + mlxcx_warn(mlxp, "hardware does not support software ISSI, " + "hw vector 0x%x, sw version %u", issi, MLXCX_CURRENT_ISSI); + return (B_FALSE); + } + + if (!mlxcx_cmd_set_issi(mlxp, MLXCX_CURRENT_ISSI)) { + mlxcx_warn(mlxp, "failed to set ISSI to %u", + MLXCX_CURRENT_ISSI); + return (B_FALSE); + } + + return (B_TRUE); +} + +boolean_t +mlxcx_give_pages(mlxcx_t *mlxp, int32_t npages) +{ + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + int32_t i; + list_t plist; + mlxcx_dev_page_t *mdp; + const ddi_dma_cookie_t *ck; + + /* + * If there are no pages required, then we're done here. + */ + if (npages <= 0) { + return (B_TRUE); + } + + list_create(&plist, sizeof (mlxcx_dev_page_t), + offsetof(mlxcx_dev_page_t, mxdp_list)); + + for (i = 0; i < npages; i++) { + mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP); + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_page_attr(mlxp, &attr); + if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc, + B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) { + mlxcx_warn(mlxp, "failed to allocate 4k page %u/%u", i, + npages); + kmem_free(mdp, sizeof (mlxcx_dev_page_t)); + goto cleanup_npages; + } + ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma); + mdp->mxdp_pa = ck->dmac_laddress; + + list_insert_tail(&plist, mdp); + } + + /* + * Now that all of the pages have been allocated, given them to hardware + * in chunks. + */ + while (npages > 0) { + mlxcx_dev_page_t *pages[MLXCX_MANAGE_PAGES_MAX_PAGES]; + int32_t togive = MIN(MLXCX_MANAGE_PAGES_MAX_PAGES, npages); + + for (i = 0; i < togive; i++) { + pages[i] = list_remove_head(&plist); + } + + if (!mlxcx_cmd_give_pages(mlxp, + MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, togive, pages)) { + mlxcx_warn(mlxp, "!hardware refused our gift of %u " + "pages!", togive); + for (i = 0; i < togive; i++) { + list_insert_tail(&plist, pages[i]); + } + goto cleanup_npages; + } + + mutex_enter(&mlxp->mlx_pagemtx); + for (i = 0; i < togive; i++) { + avl_add(&mlxp->mlx_pages, pages[i]); + } + mlxp->mlx_npages += togive; + mutex_exit(&mlxp->mlx_pagemtx); + npages -= togive; + } + + list_destroy(&plist); + + return (B_TRUE); + +cleanup_npages: + while ((mdp = list_remove_head(&plist)) != NULL) { + mlxcx_dma_free(&mdp->mxdp_dma); + kmem_free(mdp, sizeof (mlxcx_dev_page_t)); + } + list_destroy(&plist); + return (B_FALSE); +} + +static boolean_t +mlxcx_init_pages(mlxcx_t *mlxp, uint_t type) +{ + int32_t npages; + + if (!mlxcx_cmd_query_pages(mlxp, type, &npages)) { + mlxcx_warn(mlxp, "failed to determine boot pages"); + return (B_FALSE); + } + + return (mlxcx_give_pages(mlxp, npages)); +} + +static int +mlxcx_bufs_cache_constr(void *arg, void *cookie, int kmflags) +{ + mlxcx_t *mlxp = cookie; + mlxcx_buffer_t *b = arg; + + bzero(b, sizeof (mlxcx_buffer_t)); + b->mlb_mlx = mlxp; + b->mlb_state = MLXCX_BUFFER_INIT; + list_create(&b->mlb_tx_chain, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_tx_chain_entry)); + + return (0); +} + +static void +mlxcx_bufs_cache_destr(void *arg, void *cookie) +{ + mlxcx_t *mlxp = cookie; + mlxcx_buffer_t *b = arg; + VERIFY3P(b->mlb_mlx, ==, mlxp); + VERIFY(b->mlb_state == MLXCX_BUFFER_INIT); + list_destroy(&b->mlb_tx_chain); +} + +mlxcx_buf_shard_t * +mlxcx_mlbs_create(mlxcx_t *mlxp) +{ + mlxcx_buf_shard_t *s; + + s = kmem_zalloc(sizeof (mlxcx_buf_shard_t), KM_SLEEP); + + mutex_init(&s->mlbs_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + list_create(&s->mlbs_busy, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_entry)); + list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_entry)); + cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL); + + list_insert_tail(&mlxp->mlx_buf_shards, s); + + return (s); +} + +static boolean_t +mlxcx_setup_bufs(mlxcx_t *mlxp) +{ + char namebuf[KSTAT_STRLEN]; + + (void) snprintf(namebuf, KSTAT_STRLEN, "mlxcx%d_bufs_cache", + ddi_get_instance(mlxp->mlx_dip)); + mlxp->mlx_bufs_cache = kmem_cache_create(namebuf, + sizeof (mlxcx_buffer_t), sizeof (uint64_t), + mlxcx_bufs_cache_constr, mlxcx_bufs_cache_destr, + NULL, mlxp, NULL, 0); + + list_create(&mlxp->mlx_buf_shards, sizeof (mlxcx_buf_shard_t), + offsetof(mlxcx_buf_shard_t, mlbs_entry)); + + return (B_TRUE); +} + +static void +mlxcx_fm_qstate_ereport(mlxcx_t *mlxp, const char *qtype, uint32_t qnum, + const char *state, uint8_t statenum) +{ + uint64_t ena; + char buf[FM_MAX_CLASS]; + + if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps)) + return; + + (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", + MLXCX_FM_SERVICE_MLXCX, "qstate.err"); + ena = fm_ena_generate(0, FM_ENA_FMT1); + + ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP, + FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, + "state", DATA_TYPE_STRING, state, + "state_num", DATA_TYPE_UINT8, statenum, + "qtype", DATA_TYPE_STRING, qtype, + "qnum", DATA_TYPE_UINT32, qnum, + NULL); + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED); +} + +static void +mlxcx_eq_check(void *arg) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_event_queue_t *eq; + mlxcx_eventq_ctx_t ctx; + const char *str; + + uint_t i; + + for (i = 0; i < mlxp->mlx_intr_count; ++i) { + eq = &mlxp->mlx_eqs[i]; + if (!(eq->mleq_state & MLXCX_EQ_CREATED) || + (eq->mleq_state & MLXCX_EQ_DESTROYED)) + continue; + mutex_enter(&eq->mleq_mtx); + if (!mlxcx_cmd_query_eq(mlxp, eq, &ctx)) { + mutex_exit(&eq->mleq_mtx); + continue; + } + + str = "???"; + switch (ctx.mleqc_status) { + case MLXCX_EQ_STATUS_OK: + break; + case MLXCX_EQ_STATUS_WRITE_FAILURE: + str = "WRITE_FAILURE"; + break; + } + if (ctx.mleqc_status != MLXCX_EQ_STATUS_OK) { + mlxcx_fm_qstate_ereport(mlxp, "event", + eq->mleq_num, str, ctx.mleqc_status); + mlxcx_warn(mlxp, "EQ %u is in bad status: %x (%s)", + eq->mleq_intr_index, ctx.mleqc_status, str); + } + + if (ctx.mleqc_state != MLXCX_EQ_ST_ARMED && + (eq->mleq_state & MLXCX_EQ_ARMED)) { + if (eq->mleq_cc == eq->mleq_check_disarm_cc && + ++eq->mleq_check_disarm_cnt >= 3) { + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL); + mlxcx_warn(mlxp, "EQ %u isn't armed", + eq->mleq_intr_index); + } + eq->mleq_check_disarm_cc = eq->mleq_cc; + } else { + eq->mleq_check_disarm_cc = 0; + eq->mleq_check_disarm_cnt = 0; + } + + mutex_exit(&eq->mleq_mtx); + } +} + +static void +mlxcx_cq_check(void *arg) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_completion_queue_t *cq; + mlxcx_completionq_ctx_t ctx; + const char *str, *type; + uint_t v; + + for (cq = list_head(&mlxp->mlx_cqs); cq != NULL; + cq = list_next(&mlxp->mlx_cqs, cq)) { + mutex_enter(&cq->mlcq_mtx); + if (!(cq->mlcq_state & MLXCX_CQ_CREATED) || + (cq->mlcq_state & MLXCX_CQ_DESTROYED) || + (cq->mlcq_state & MLXCX_CQ_TEARDOWN)) { + mutex_exit(&cq->mlcq_mtx); + continue; + } + if (cq->mlcq_fm_repd_qstate) { + mutex_exit(&cq->mlcq_mtx); + continue; + } + if (!mlxcx_cmd_query_cq(mlxp, cq, &ctx)) { + mutex_exit(&cq->mlcq_mtx); + continue; + } + if (cq->mlcq_wq != NULL) { + mlxcx_work_queue_t *wq = cq->mlcq_wq; + if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ) + type = "rx "; + else if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ) + type = "tx "; + else + type = ""; + } else { + type = ""; + } + + str = "???"; + v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATUS); + switch (v) { + case MLXCX_CQC_STATUS_OK: + break; + case MLXCX_CQC_STATUS_OVERFLOW: + str = "OVERFLOW"; + break; + case MLXCX_CQC_STATUS_WRITE_FAIL: + str = "WRITE_FAIL"; + break; + case MLXCX_CQC_STATUS_INVALID: + str = "INVALID"; + break; + } + if (v != MLXCX_CQC_STATUS_OK) { + mlxcx_fm_qstate_ereport(mlxp, "completion", + cq->mlcq_num, str, v); + mlxcx_warn(mlxp, "%sCQ 0x%x is in bad status: %x (%s)", + type, cq->mlcq_num, v, str); + cq->mlcq_fm_repd_qstate = B_TRUE; + } + + v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATE); + if (v != MLXCX_CQC_STATE_ARMED && + (cq->mlcq_state & MLXCX_CQ_ARMED) && + !(cq->mlcq_state & MLXCX_CQ_POLLING)) { + if (cq->mlcq_cc == cq->mlcq_check_disarm_cc && + ++cq->mlcq_check_disarm_cnt >= 3) { + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL); + mlxcx_warn(mlxp, "%sCQ 0x%x (%p) isn't armed", + type, cq->mlcq_num, cq); + } + cq->mlcq_check_disarm_cc = cq->mlcq_cc; + } else { + cq->mlcq_check_disarm_cnt = 0; + cq->mlcq_check_disarm_cc = 0; + } + mutex_exit(&cq->mlcq_mtx); + } +} + +void +mlxcx_check_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *sq) +{ + mlxcx_sq_ctx_t ctx; + mlxcx_sq_state_t state; + + ASSERT(mutex_owned(&sq->mlwq_mtx)); + + if (!mlxcx_cmd_query_sq(mlxp, sq, &ctx)) + return; + + ASSERT3U(from_be24(ctx.mlsqc_cqn), ==, sq->mlwq_cq->mlcq_num); + state = get_bits32(ctx.mlsqc_flags, MLXCX_SQ_STATE); + switch (state) { + case MLXCX_SQ_STATE_RST: + if (sq->mlwq_state & MLXCX_WQ_STARTED) { + mlxcx_fm_qstate_ereport(mlxp, "send", + sq->mlwq_num, "RST", state); + sq->mlwq_fm_repd_qstate = B_TRUE; + } + break; + case MLXCX_SQ_STATE_RDY: + if (!(sq->mlwq_state & MLXCX_WQ_STARTED)) { + mlxcx_fm_qstate_ereport(mlxp, "send", + sq->mlwq_num, "RDY", state); + sq->mlwq_fm_repd_qstate = B_TRUE; + } + break; + case MLXCX_SQ_STATE_ERR: + mlxcx_fm_qstate_ereport(mlxp, "send", + sq->mlwq_num, "ERR", state); + sq->mlwq_fm_repd_qstate = B_TRUE; + break; + default: + mlxcx_fm_qstate_ereport(mlxp, "send", + sq->mlwq_num, "???", state); + sq->mlwq_fm_repd_qstate = B_TRUE; + break; + } +} + +void +mlxcx_check_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *rq) +{ + mlxcx_rq_ctx_t ctx; + mlxcx_rq_state_t state; + + ASSERT(mutex_owned(&rq->mlwq_mtx)); + + if (!mlxcx_cmd_query_rq(mlxp, rq, &ctx)) + return; + + ASSERT3U(from_be24(ctx.mlrqc_cqn), ==, rq->mlwq_cq->mlcq_num); + state = get_bits32(ctx.mlrqc_flags, MLXCX_RQ_STATE); + switch (state) { + case MLXCX_RQ_STATE_RST: + if (rq->mlwq_state & MLXCX_WQ_STARTED) { + mlxcx_fm_qstate_ereport(mlxp, "receive", + rq->mlwq_num, "RST", state); + rq->mlwq_fm_repd_qstate = B_TRUE; + } + break; + case MLXCX_RQ_STATE_RDY: + if (!(rq->mlwq_state & MLXCX_WQ_STARTED)) { + mlxcx_fm_qstate_ereport(mlxp, "receive", + rq->mlwq_num, "RDY", state); + rq->mlwq_fm_repd_qstate = B_TRUE; + } + break; + case MLXCX_RQ_STATE_ERR: + mlxcx_fm_qstate_ereport(mlxp, "receive", + rq->mlwq_num, "ERR", state); + rq->mlwq_fm_repd_qstate = B_TRUE; + break; + default: + mlxcx_fm_qstate_ereport(mlxp, "receive", + rq->mlwq_num, "???", state); + rq->mlwq_fm_repd_qstate = B_TRUE; + break; + } +} + +static void +mlxcx_wq_check(void *arg) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_work_queue_t *wq; + + for (wq = list_head(&mlxp->mlx_wqs); wq != NULL; + wq = list_next(&mlxp->mlx_wqs, wq)) { + mutex_enter(&wq->mlwq_mtx); + if (!(wq->mlwq_state & MLXCX_WQ_CREATED) || + (wq->mlwq_state & MLXCX_WQ_DESTROYED) || + (wq->mlwq_state & MLXCX_WQ_TEARDOWN)) { + mutex_exit(&wq->mlwq_mtx); + continue; + } + if (wq->mlwq_fm_repd_qstate) { + mutex_exit(&wq->mlwq_mtx); + continue; + } + switch (wq->mlwq_type) { + case MLXCX_WQ_TYPE_SENDQ: + mlxcx_check_sq(mlxp, wq); + break; + case MLXCX_WQ_TYPE_RECVQ: + mlxcx_check_rq(mlxp, wq); + break; + } + mutex_exit(&wq->mlwq_mtx); + } +} + +static boolean_t +mlxcx_setup_checktimers(mlxcx_t *mlxp) +{ + if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0) { + mlxp->mlx_eq_checktimer = ddi_periodic_add(mlxcx_eq_check, mlxp, + mlxp->mlx_props.mldp_eq_check_interval_sec * NANOSEC, + DDI_IPL_0); + } + if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0) { + mlxp->mlx_cq_checktimer = ddi_periodic_add(mlxcx_cq_check, mlxp, + mlxp->mlx_props.mldp_cq_check_interval_sec * NANOSEC, + DDI_IPL_0); + } + if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0) { + mlxp->mlx_wq_checktimer = ddi_periodic_add(mlxcx_wq_check, mlxp, + mlxp->mlx_props.mldp_wq_check_interval_sec * NANOSEC, + DDI_IPL_0); + } + return (B_TRUE); +} + +int +mlxcx_dmac_fe_compare(const void *arg0, const void *arg1) +{ + const mlxcx_flow_entry_t *left = arg0; + const mlxcx_flow_entry_t *right = arg1; + int bcmpr; + + bcmpr = memcmp(left->mlfe_dmac, right->mlfe_dmac, + sizeof (left->mlfe_dmac)); + if (bcmpr < 0) + return (-1); + if (bcmpr > 0) + return (1); + if (left->mlfe_vid < right->mlfe_vid) + return (-1); + if (left->mlfe_vid > right->mlfe_vid) + return (1); + return (0); +} + +int +mlxcx_grmac_compare(const void *arg0, const void *arg1) +{ + const mlxcx_group_mac_t *left = arg0; + const mlxcx_group_mac_t *right = arg1; + int bcmpr; + + bcmpr = memcmp(left->mlgm_mac, right->mlgm_mac, + sizeof (left->mlgm_mac)); + if (bcmpr < 0) + return (-1); + if (bcmpr > 0) + return (1); + return (0); +} + +int +mlxcx_page_compare(const void *arg0, const void *arg1) +{ + const mlxcx_dev_page_t *p0 = arg0; + const mlxcx_dev_page_t *p1 = arg1; + + if (p0->mxdp_pa < p1->mxdp_pa) + return (-1); + if (p0->mxdp_pa > p1->mxdp_pa) + return (1); + return (0); +} + +static boolean_t +mlxcx_setup_ports(mlxcx_t *mlxp) +{ + uint_t i, j; + mlxcx_port_t *p; + mlxcx_flow_table_t *ft; + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe; + + VERIFY3U(mlxp->mlx_nports, >, 0); + mlxp->mlx_ports_size = mlxp->mlx_nports * sizeof (mlxcx_port_t); + mlxp->mlx_ports = kmem_zalloc(mlxp->mlx_ports_size, KM_SLEEP); + + for (i = 0; i < mlxp->mlx_nports; ++i) { + p = &mlxp->mlx_ports[i]; + p->mlp_num = i; + p->mlp_init |= MLXCX_PORT_INIT; + mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + mutex_enter(&p->mlp_mtx); + if (!mlxcx_cmd_query_nic_vport_ctx(mlxp, p)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + if (!mlxcx_cmd_query_port_mtu(mlxp, p)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + if (!mlxcx_cmd_query_port_status(mlxp, p)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + if (!mlxcx_cmd_query_port_speed(mlxp, p)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, p, + MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC)) { + mutex_exit(&p->mlp_mtx); + goto err; + } + + mutex_exit(&p->mlp_mtx); + } + + for (i = 0; i < mlxp->mlx_nports; ++i) { + p = &mlxp->mlx_ports[i]; + mutex_enter(&p->mlp_mtx); + p->mlp_rx_flow = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t), + KM_SLEEP)); + mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + + mutex_enter(&ft->mlft_mtx); + + ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX; + ft->mlft_port = p; + ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_root_size_shift; + if (ft->mlft_entshift > mlxp->mlx_caps->mlc_max_rx_ft_shift) + ft->mlft_entshift = mlxp->mlx_caps->mlc_max_rx_ft_shift; + ft->mlft_nents = (1 << ft->mlft_entshift); + ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t); + ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP); + list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t), + offsetof(mlxcx_flow_group_t, mlfg_entry)); + + for (j = 0; j < ft->mlft_nents; ++j) { + ft->mlft_ent[j].mlfe_table = ft; + ft->mlft_ent[j].mlfe_index = j; + } + + if (!mlxcx_cmd_create_flow_table(mlxp, ft)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + goto err; + } + + if (!mlxcx_cmd_set_flow_table_root(mlxp, ft)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + goto err; + } + + /* + * We match broadcast at the top of the root flow table, then + * all multicast/unicast MACs, then the promisc entry is down + * the very bottom. + * + * This way when promisc is on, that entry simply catches any + * remaining traffic that earlier flows haven't matched. + */ + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + goto err; + } + p->mlp_bcast = fg; + fe = list_head(&fg->mlfg_entries); + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + (void) memset(fe->mlfe_dmac, 0xff, sizeof (fe->mlfe_dmac)); + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = ft->mlft_nents - 2; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + goto err; + } + p->mlp_umcast = fg; + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + goto err; + } + p->mlp_promisc = fg; + fe = list_head(&fg->mlfg_entries); + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + + avl_create(&p->mlp_dmac_fe, mlxcx_dmac_fe_compare, + sizeof (mlxcx_flow_entry_t), offsetof(mlxcx_flow_entry_t, + mlfe_dmac_entry)); + + mutex_exit(&ft->mlft_mtx); + mutex_exit(&p->mlp_mtx); + } + + return (B_TRUE); + +err: + mlxcx_teardown_ports(mlxp); + return (B_FALSE); +} + +void +mlxcx_remove_all_vlan_entries(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft; + mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg; + mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg; + mlxcx_flow_entry_t *fe; + mlxcx_group_vlan_t *v; + + ASSERT(mutex_owned(&g->mlg_mtx)); + + mutex_enter(&ft->mlft_mtx); + + if (!list_is_empty(&g->mlg_rx_vlans)) { + fe = list_head(&dfg->mlfg_entries); + (void) mlxcx_cmd_set_flow_table_entry(mlxp, fe); + } + + while ((v = list_remove_head(&g->mlg_rx_vlans)) != NULL) { + fe = v->mlgv_fe; + ASSERT3P(fe->mlfe_table, ==, ft); + ASSERT3P(fe->mlfe_group, ==, fg); + kmem_free(v, sizeof (mlxcx_group_vlan_t)); + + (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe); + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + } + + mutex_exit(&ft->mlft_mtx); +} + +boolean_t +mlxcx_remove_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g, + boolean_t tagged, uint16_t vid) +{ + mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft; + mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg; + mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg; + mlxcx_flow_entry_t *fe; + mlxcx_group_vlan_t *v; + boolean_t found = B_FALSE; + + ASSERT(mutex_owned(&g->mlg_mtx)); + + mutex_enter(&ft->mlft_mtx); + + for (v = list_head(&g->mlg_rx_vlans); v != NULL; + v = list_next(&g->mlg_rx_vlans, v)) { + if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) { + found = B_TRUE; + break; + } + } + if (!found) { + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + + list_remove(&g->mlg_rx_vlans, v); + + /* + * If this is the last VLAN entry, we have to go back to accepting + * any VLAN (which means re-enabling the default entry). + * + * Do this before we remove the flow entry for the last specific + * VLAN so that we don't lose any traffic in the transition. + */ + if (list_is_empty(&g->mlg_rx_vlans)) { + fe = list_head(&dfg->mlfg_entries); + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + list_insert_tail(&g->mlg_rx_vlans, v); + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + } + + fe = v->mlgv_fe; + ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED); + ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED); + ASSERT3P(fe->mlfe_table, ==, ft); + ASSERT3P(fe->mlfe_group, ==, fg); + + if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { + list_insert_tail(&g->mlg_rx_vlans, v); + fe = list_head(&dfg->mlfg_entries); + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe); + } + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + + kmem_free(v, sizeof (mlxcx_group_vlan_t)); + + mutex_exit(&ft->mlft_mtx); + return (B_TRUE); +} + +boolean_t +mlxcx_add_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g, boolean_t tagged, + uint16_t vid) +{ + mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft; + mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg; + mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg; + mlxcx_flow_entry_t *fe; + mlxcx_group_vlan_t *v; + boolean_t found = B_FALSE; + boolean_t first = B_FALSE; + + ASSERT(mutex_owned(&g->mlg_mtx)); + + mutex_enter(&ft->mlft_mtx); + + for (v = list_head(&g->mlg_rx_vlans); v != NULL; + v = list_next(&g->mlg_rx_vlans, v)) { + if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) { + mutex_exit(&ft->mlft_mtx); + return (B_TRUE); + } + } + if (list_is_empty(&g->mlg_rx_vlans)) + first = B_TRUE; + + for (fe = list_head(&fg->mlfg_entries); fe != NULL; + fe = list_next(&fg->mlfg_entries, fe)) { + if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) { + found = B_TRUE; + break; + } + } + if (!found) { + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + + v = kmem_zalloc(sizeof (mlxcx_group_vlan_t), KM_SLEEP); + v->mlgv_fe = fe; + v->mlgv_tagged = tagged; + v->mlgv_vid = vid; + + fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED; + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + fe->mlfe_vid = vid; + if (tagged) { + fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_CVLAN; + } else { + fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_NONE; + } + + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY; + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + kmem_free(v, sizeof (mlxcx_group_vlan_t)); + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + + list_insert_tail(&g->mlg_rx_vlans, v); + + /* + * If the vlan list was empty for this group before adding this one, + * then we no longer want the "default" entry to allow all VLANs + * through. + */ + if (first) { + fe = list_head(&dfg->mlfg_entries); + (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe); + } + + mutex_exit(&ft->mlft_mtx); + return (B_TRUE); +} + +void +mlxcx_remove_all_umcast_entries(mlxcx_t *mlxp, mlxcx_port_t *port, + mlxcx_ring_group_t *group) +{ + mlxcx_flow_entry_t *fe; + mlxcx_flow_table_t *ft = port->mlp_rx_flow; + mlxcx_group_mac_t *gm, *ngm; + + ASSERT(mutex_owned(&port->mlp_mtx)); + ASSERT(mutex_owned(&group->mlg_mtx)); + + mutex_enter(&ft->mlft_mtx); + + gm = avl_first(&group->mlg_rx_macs); + for (; gm != NULL; gm = ngm) { + ngm = AVL_NEXT(&group->mlg_rx_macs, gm); + + ASSERT3P(gm->mlgm_group, ==, group); + fe = gm->mlgm_fe; + ASSERT3P(fe->mlfe_table, ==, ft); + + avl_remove(&group->mlg_rx_macs, gm); + list_remove(&fe->mlfe_ring_groups, gm); + kmem_free(gm, sizeof (mlxcx_group_mac_t)); + + fe->mlfe_ndest = 0; + for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL; + gm = list_next(&fe->mlfe_ring_groups, gm)) { + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = + gm->mlgm_group->mlg_rx_vlan_ft; + } + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + + if (fe->mlfe_ndest > 0) { + (void) mlxcx_cmd_set_flow_table_entry(mlxp, fe); + continue; + } + + /* + * There are no more ring groups left for this MAC (it wasn't + * attached to any other groups since ndest == 0), so clean up + * its flow entry. + */ + avl_remove(&port->mlp_dmac_fe, fe); + (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe); + list_destroy(&fe->mlfe_ring_groups); + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + } + + mutex_exit(&ft->mlft_mtx); +} + +boolean_t +mlxcx_remove_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port, + mlxcx_ring_group_t *group, const uint8_t *macaddr) +{ + mlxcx_flow_entry_t *fe; + mlxcx_flow_table_t *ft = port->mlp_rx_flow; + mlxcx_group_mac_t *gm, probe; + + ASSERT(mutex_owned(&port->mlp_mtx)); + ASSERT(mutex_owned(&group->mlg_mtx)); + + bzero(&probe, sizeof (probe)); + bcopy(macaddr, probe.mlgm_mac, sizeof (probe.mlgm_mac)); + + mutex_enter(&ft->mlft_mtx); + + gm = avl_find(&group->mlg_rx_macs, &probe, NULL); + if (gm == NULL) { + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + ASSERT3P(gm->mlgm_group, ==, group); + ASSERT0(bcmp(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac))); + + fe = gm->mlgm_fe; + ASSERT3P(fe->mlfe_table, ==, ft); + ASSERT0(bcmp(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac))); + + list_remove(&fe->mlfe_ring_groups, gm); + avl_remove(&group->mlg_rx_macs, gm); + kmem_free(gm, sizeof (mlxcx_group_mac_t)); + + fe->mlfe_ndest = 0; + for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL; + gm = list_next(&fe->mlfe_ring_groups, gm)) { + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = + gm->mlgm_group->mlg_rx_vlan_ft; + } + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + + if (fe->mlfe_ndest > 0) { + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + mutex_exit(&ft->mlft_mtx); + return (B_TRUE); + } + + /* + * There are no more ring groups left for this MAC (it wasn't attached + * to any other groups since ndest == 0), so clean up its flow entry. + */ + avl_remove(&port->mlp_dmac_fe, fe); + (void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe); + list_destroy(&fe->mlfe_ring_groups); + + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + + mutex_exit(&ft->mlft_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_add_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port, + mlxcx_ring_group_t *group, const uint8_t *macaddr) +{ + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe, probe; + mlxcx_flow_table_t *ft = port->mlp_rx_flow; + mlxcx_group_mac_t *gm; + boolean_t found = B_FALSE; + + ASSERT(mutex_owned(&port->mlp_mtx)); + ASSERT(mutex_owned(&group->mlg_mtx)); + + bzero(&probe, sizeof (probe)); + bcopy(macaddr, probe.mlfe_dmac, sizeof (probe.mlfe_dmac)); + + mutex_enter(&ft->mlft_mtx); + + fe = avl_find(&port->mlp_dmac_fe, &probe, NULL); + + if (fe == NULL) { + fg = port->mlp_umcast; + for (fe = list_head(&fg->mlfg_entries); fe != NULL; + fe = list_next(&fg->mlfg_entries, fe)) { + if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) { + found = B_TRUE; + break; + } + } + if (!found) { + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + list_create(&fe->mlfe_ring_groups, sizeof (mlxcx_group_mac_t), + offsetof(mlxcx_group_mac_t, mlgm_fe_entry)); + fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + bcopy(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac)); + + avl_add(&port->mlp_dmac_fe, fe); + } + + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = group->mlg_rx_vlan_ft; + fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY; + + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY; + if (--fe->mlfe_ndest == 0) { + fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED; + } + mutex_exit(&ft->mlft_mtx); + return (B_FALSE); + } + + gm = kmem_zalloc(sizeof (mlxcx_group_mac_t), KM_SLEEP); + gm->mlgm_group = group; + gm->mlgm_fe = fe; + bcopy(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac)); + avl_add(&group->mlg_rx_macs, gm); + list_insert_tail(&fe->mlfe_ring_groups, gm); + + mutex_exit(&ft->mlft_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_setup_flow_group(mlxcx_t *mlxp, mlxcx_flow_table_t *ft, + mlxcx_flow_group_t *fg) +{ + mlxcx_flow_entry_t *fe; + uint_t i, idx; + + ASSERT(mutex_owned(&ft->mlft_mtx)); + ASSERT(ft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + ASSERT3P(fg->mlfg_table, ==, ft); + + if (ft->mlft_next_ent + fg->mlfg_size > ft->mlft_nents) + return (B_FALSE); + fg->mlfg_start_idx = ft->mlft_next_ent; + + if (!mlxcx_cmd_create_flow_group(mlxp, fg)) { + return (B_FALSE); + } + + list_create(&fg->mlfg_entries, sizeof (mlxcx_flow_entry_t), + offsetof(mlxcx_flow_entry_t, mlfe_group_entry)); + for (i = 0; i < fg->mlfg_size; ++i) { + idx = fg->mlfg_start_idx + i; + fe = &ft->mlft_ent[idx]; + fe->mlfe_group = fg; + list_insert_tail(&fg->mlfg_entries, fe); + } + fg->mlfg_avail = fg->mlfg_size; + ft->mlft_next_ent += fg->mlfg_size; + + return (B_TRUE); +} + +static boolean_t +mlxcx_setup_eq0(mlxcx_t *mlxp) +{ + mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[0]; + + mutex_enter(&mleq->mleq_mtx); + if (!mlxcx_eq_alloc_dma(mlxp, mleq)) { + /* mlxcx_teardown_eqs() will clean this up */ + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + mleq->mleq_mlx = mlxp; + mleq->mleq_uar = &mlxp->mlx_uar; + mleq->mleq_events = + (1ULL << MLXCX_EVENT_PAGE_REQUEST) | + (1ULL << MLXCX_EVENT_PORT_STATE) | + (1ULL << MLXCX_EVENT_INTERNAL_ERROR) | + (1ULL << MLXCX_EVENT_PORT_MODULE) | + (1ULL << MLXCX_EVENT_SENDQ_DRAIN) | + (1ULL << MLXCX_EVENT_LAST_WQE) | + (1ULL << MLXCX_EVENT_CQ_ERROR) | + (1ULL << MLXCX_EVENT_WQ_CATASTROPHE) | + (1ULL << MLXCX_EVENT_PAGE_FAULT) | + (1ULL << MLXCX_EVENT_WQ_INVALID_REQ) | + (1ULL << MLXCX_EVENT_WQ_ACCESS_VIOL) | + (1ULL << MLXCX_EVENT_NIC_VPORT) | + (1ULL << MLXCX_EVENT_DOORBELL_CONGEST); + if (!mlxcx_cmd_create_eq(mlxp, mleq)) { + /* mlxcx_teardown_eqs() will clean this up */ + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + if (ddi_intr_enable(mlxp->mlx_intr_handles[0]) != DDI_SUCCESS) { + /* + * mlxcx_teardown_eqs() will handle calling cmd_destroy_eq and + * eq_rele_dma + */ + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + mlxcx_arm_eq(mlxp, mleq); + mutex_exit(&mleq->mleq_mtx); + return (B_TRUE); +} + +int +mlxcx_cq_compare(const void *arg0, const void *arg1) +{ + const mlxcx_completion_queue_t *left = arg0; + const mlxcx_completion_queue_t *right = arg1; + + if (left->mlcq_num < right->mlcq_num) { + return (-1); + } + if (left->mlcq_num > right->mlcq_num) { + return (1); + } + return (0); +} + +static boolean_t +mlxcx_setup_eqs(mlxcx_t *mlxp) +{ + uint_t i; + mlxcx_event_queue_t *mleq; + + ASSERT3S(mlxp->mlx_intr_count, >, 0); + + for (i = 1; i < mlxp->mlx_intr_count; ++i) { + mleq = &mlxp->mlx_eqs[i]; + mutex_enter(&mleq->mleq_mtx); + if (!mlxcx_eq_alloc_dma(mlxp, mleq)) { + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + mleq->mleq_uar = &mlxp->mlx_uar; + if (!mlxcx_cmd_create_eq(mlxp, mleq)) { + /* mlxcx_teardown() will handle calling eq_rele_dma */ + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + if (mlxp->mlx_props.mldp_intrmod_period_usec != 0 && + !mlxcx_cmd_set_int_mod(mlxp, i, + mlxp->mlx_props.mldp_intrmod_period_usec)) { + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + if (ddi_intr_enable(mlxp->mlx_intr_handles[i]) != DDI_SUCCESS) { + mutex_exit(&mleq->mleq_mtx); + return (B_FALSE); + } + mlxcx_arm_eq(mlxp, mleq); + mutex_exit(&mleq->mleq_mtx); + } + + mlxp->mlx_next_eq = 1; + + return (B_TRUE); +} + +/* + * Snapshot all of the hardware capabilities that we care about and then modify + * the HCA capabilities to get things moving. + */ +static boolean_t +mlxcx_init_caps(mlxcx_t *mlxp) +{ + mlxcx_caps_t *c; + + mlxp->mlx_caps = c = kmem_zalloc(sizeof (mlxcx_caps_t), KM_SLEEP); + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL, + MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_hca_cur)) { + mlxcx_warn(mlxp, "failed to obtain current HCA general caps"); + } + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL, + MLXCX_HCA_CAP_MODE_MAX, &c->mlc_hca_max)) { + mlxcx_warn(mlxp, "failed to obtain maximum HCA general caps"); + } + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET, + MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_ether_cur)) { + mlxcx_warn(mlxp, "failed to obtain current HCA eth caps"); + } + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET, + MLXCX_HCA_CAP_MODE_MAX, &c->mlc_ether_max)) { + mlxcx_warn(mlxp, "failed to obtain maximum HCA eth caps"); + } + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW, + MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_nic_flow_cur)) { + mlxcx_warn(mlxp, "failed to obtain current HCA flow caps"); + } + + if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW, + MLXCX_HCA_CAP_MODE_MAX, &c->mlc_nic_flow_max)) { + mlxcx_warn(mlxp, "failed to obtain maximum HCA flow caps"); + } + + /* + * Check the caps meet our requirements. + */ + const mlxcx_hca_cap_general_caps_t *gen = &c->mlc_hca_cur.mhc_general; + + if (gen->mlcap_general_log_pg_sz != 12) { + mlxcx_warn(mlxp, "!hardware has page size != 4k " + "(log_pg_sz = %u)", (uint_t)gen->mlcap_general_log_pg_sz); + goto err; + } + if (gen->mlcap_general_cqe_version != 1) { + mlxcx_warn(mlxp, "!hardware does not support CQE v1 " + "(cqe_ver = %u)", (uint_t)gen->mlcap_general_cqe_version); + goto err; + } + if (gen->mlcap_general_port_type != + MLXCX_CAP_GENERAL_PORT_TYPE_ETHERNET) { + mlxcx_warn(mlxp, "!hardware has non-ethernet ports"); + goto err; + } + mlxp->mlx_nports = gen->mlcap_general_num_ports; + mlxp->mlx_max_sdu = (1 << (gen->mlcap_general_log_max_msg & 0x1F)); + + c->mlc_max_tir = (1 << gen->mlcap_general_log_max_tir); + + c->mlc_checksum = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags, + MLXCX_ETH_CAP_CSUM_CAP); + c->mlc_vxlan = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags, + MLXCX_ETH_CAP_TUNNEL_STATELESS_VXLAN); + + c->mlc_max_lso_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth. + mlcap_eth_flags, MLXCX_ETH_CAP_MAX_LSO_CAP)); + if (c->mlc_max_lso_size == 1) { + c->mlc_max_lso_size = 0; + c->mlc_lso = B_FALSE; + } else { + c->mlc_lso = B_TRUE; + } + + c->mlc_max_rqt_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth. + mlcap_eth_flags, MLXCX_ETH_CAP_RSS_IND_TBL_CAP)); + + if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx. + mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_SUPPORT)) { + mlxcx_warn(mlxp, "!hardware does not support rx flow tables"); + goto err; + } + if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx. + mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_MODIFY)) { + mlxcx_warn(mlxp, "!hardware does not support modifying rx " + "flow table entries"); + goto err; + } + + c->mlc_max_rx_ft_shift = c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx. + mlcap_flow_prop_log_max_ft_size; + c->mlc_max_rx_flows = (1 << c->mlc_nic_flow_cur.mhc_flow. + mlcap_flow_nic_rx.mlcap_flow_prop_log_max_flow); + c->mlc_max_rx_fe_dest = (1 << c->mlc_nic_flow_cur.mhc_flow. + mlcap_flow_nic_rx.mlcap_flow_prop_log_max_destination); + + return (B_TRUE); + +err: + kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t)); + return (B_FALSE); +} + +static int +mlxcx_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + mlxcx_t *mlxp; + + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + mlxp = ddi_get_driver_private(dip); + if (mlxp == NULL) { + mlxcx_warn(NULL, "asked to detach, but missing instance " + "private data"); + return (DDI_FAILURE); + } + + if (mlxp->mlx_attach & MLXCX_ATTACH_MAC_HDL) { + if (mac_unregister(mlxp->mlx_mac_hdl) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + mlxp->mlx_attach &= ~MLXCX_ATTACH_MAC_HDL; + } + + mlxcx_teardown(mlxp); + return (DDI_SUCCESS); +} + +static size_t +mlxcx_calc_rx_ngroups(mlxcx_t *mlxp) +{ + size_t ngroups = mlxp->mlx_props.mldp_rx_ngroups_large + + mlxp->mlx_props.mldp_rx_ngroups_small; + size_t tirlim, flowlim, gflowlim; + + tirlim = mlxp->mlx_caps->mlc_max_tir / MLXCX_TIRS_PER_GROUP; + if (tirlim < ngroups) { + mlxcx_note(mlxp, "limiting number of rx groups to %u based " + "on number of TIRs available", tirlim); + ngroups = tirlim; + } + + flowlim = (1 << mlxp->mlx_caps->mlc_max_rx_ft_shift) - 2; + if (flowlim < ngroups) { + mlxcx_note(mlxp, "limiting number of rx groups to %u based " + "on max size of RX flow tables", flowlim); + ngroups = flowlim; + } + + do { + gflowlim = mlxp->mlx_caps->mlc_max_rx_flows - 16 * ngroups - 2; + if (gflowlim < ngroups) { + mlxcx_note(mlxp, "limiting number of rx groups to %u " + "based on max total RX flows", gflowlim); + --ngroups; + } + } while (gflowlim < ngroups); + + return (ngroups); +} + +static int +mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + mlxcx_t *mlxp; + uint_t i; + int inst, ret; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + inst = ddi_get_instance(dip); + ret = ddi_soft_state_zalloc(mlxcx_softstate, inst); + if (ret != 0) + return (ret); + + mlxp = ddi_get_soft_state(mlxcx_softstate, inst); + if (mlxp == NULL) + return (DDI_FAILURE); + mlxp->mlx_dip = dip; + mlxp->mlx_inst = inst; + ddi_set_driver_private(dip, mlxp); + + mlxcx_load_props(mlxp); + + mlxcx_fm_init(mlxp); + mlxp->mlx_attach |= MLXCX_ATTACH_FM; + + if (pci_config_setup(mlxp->mlx_dip, &mlxp->mlx_cfg_handle) != + DDI_SUCCESS) { + mlxcx_warn(mlxp, "failed to initial PCI config space"); + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_PCI_CONFIG; + + if (!mlxcx_regs_map(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_REGS; + + if (!mlxcx_cmd_queue_init(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_CMD; + + if (!mlxcx_cmd_enable_hca(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_ENABLE_HCA; + + if (!mlxcx_check_issi(mlxp)) { + goto err; + } + + /* + * We have to get our interrupts now so we know what priority to + * create pagemtx with. + */ + if (!mlxcx_intr_setup(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_INTRS; + + mutex_init(&mlxp->mlx_pagemtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + avl_create(&mlxp->mlx_pages, mlxcx_page_compare, + sizeof (mlxcx_dev_page_t), offsetof(mlxcx_dev_page_t, mxdp_tree)); + mlxp->mlx_attach |= MLXCX_ATTACH_PAGE_LIST; + + if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_BOOT)) { + goto err; + } + + if (!mlxcx_init_caps(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_CAPS; + + if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_INIT)) { + goto err; + } + + if (!mlxcx_cmd_init_hca(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_INIT_HCA; + + if (!mlxcx_cmd_set_driver_version(mlxp, MLXCX_DRIVER_VERSION)) { + goto err; + } + + /* + * The User Access Region (UAR) is needed so we can ring EQ and CQ + * doorbells. + */ + if (!mlxcx_cmd_alloc_uar(mlxp, &mlxp->mlx_uar)) { + goto err; + } + for (i = 0; i < MLXCX_BF_PER_UAR; ++i) { + mutex_init(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx, NULL, + MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); + } + mlxp->mlx_attach |= MLXCX_ATTACH_UAR_PD_TD; + + /* + * Set up event queue #0 -- it's special and only handles control + * type events, like PAGE_REQUEST (which we will probably get during + * the commands below). + * + * This will enable and arm the interrupt on EQ 0, too. + */ + if (!mlxcx_setup_eq0(mlxp)) { + goto err; + } + + /* + * Allocate a protection and transport domain. These don't really do + * anything for us (they're IB concepts), but we need to give their + * ID numbers in other commands. + */ + if (!mlxcx_cmd_alloc_pd(mlxp, &mlxp->mlx_pd)) { + goto err; + } + if (!mlxcx_cmd_alloc_tdom(mlxp, &mlxp->mlx_tdom)) { + goto err; + } + /* + * Fetch the "reserved" lkey that lets us give linear addresses in + * work queue entries, rather than having to mess with the NIC's + * internal MMU. + */ + if (!mlxcx_cmd_query_special_ctxs(mlxp)) { + goto err; + } + + /* + * Query our port information and current state, populate the + * mlxcx_port_t structs. + * + * This also sets up the root flow tables and flow groups. + */ + if (!mlxcx_setup_ports(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_PORTS; + + /* + * Set up, enable and arm the rest of the interrupt EQs which will + * service events from CQs. + * + * The MLXCX_ATTACH_INTRS flag covers checking if these need to be + * cleaned up. + */ + if (!mlxcx_setup_eqs(mlxp)) { + goto err; + } + + /* Completion queues */ + list_create(&mlxp->mlx_cqs, sizeof (mlxcx_completion_queue_t), + offsetof(mlxcx_completion_queue_t, mlcq_entry)); + mlxp->mlx_attach |= MLXCX_ATTACH_CQS; + + /* Work queues (send queues, receive queues) */ + list_create(&mlxp->mlx_wqs, sizeof (mlxcx_work_queue_t), + offsetof(mlxcx_work_queue_t, mlwq_entry)); + mlxp->mlx_attach |= MLXCX_ATTACH_WQS; + + /* Set up periodic fault check timers which check the queue states */ + if (!mlxcx_setup_checktimers(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_CHKTIMERS; + + /* + * Construct our arrays of mlxcx_ring_group_ts, which represent the + * "groups" we advertise to MAC. + */ + mlxp->mlx_rx_ngroups = mlxcx_calc_rx_ngroups(mlxp); + mlxp->mlx_rx_groups_size = mlxp->mlx_rx_ngroups * + sizeof (mlxcx_ring_group_t); + mlxp->mlx_rx_groups = kmem_zalloc(mlxp->mlx_rx_groups_size, KM_SLEEP); + + mlxp->mlx_tx_ngroups = mlxp->mlx_props.mldp_tx_ngroups; + mlxp->mlx_tx_groups_size = mlxp->mlx_tx_ngroups * + sizeof (mlxcx_ring_group_t); + mlxp->mlx_tx_groups = kmem_zalloc(mlxp->mlx_tx_groups_size, KM_SLEEP); + + mlxp->mlx_attach |= MLXCX_ATTACH_GROUPS; + + /* + * Sets up the free/busy buffers list for keeping track of packet + * buffers. + */ + if (!mlxcx_setup_bufs(mlxp)) + goto err; + mlxp->mlx_attach |= MLXCX_ATTACH_BUFS; + + /* + * Before we tell MAC about our rings/groups, we need to do enough + * setup on them to be sure about the numbers and configuration that + * we have. This will do basically everything short of allocating + * packet buffers and starting the rings up. + */ + for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) { + if (!mlxcx_tx_group_setup(mlxp, &mlxp->mlx_tx_groups[i])) + goto err; + } + for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) { + if (!mlxcx_rx_group_setup(mlxp, &mlxp->mlx_rx_groups[i])) + goto err; + } + + /* + * Finally, tell MAC that we exist! + */ + if (!mlxcx_register_mac(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_MAC_HDL; + + return (DDI_SUCCESS); + +err: + mlxcx_teardown(mlxp); + return (DDI_FAILURE); +} + +static struct cb_ops mlxcx_cb_ops = { + .cb_open = nulldev, + .cb_close = nulldev, + .cb_strategy = nodev, + .cb_print = nodev, + .cb_dump = nodev, + .cb_read = nodev, + .cb_write = nodev, + .cb_ioctl = nodev, + .cb_devmap = nodev, + .cb_mmap = nodev, + .cb_segmap = nodev, + .cb_chpoll = nochpoll, + .cb_prop_op = ddi_prop_op, + .cb_flag = D_MP, + .cb_rev = CB_REV, + .cb_aread = nodev, + .cb_awrite = nodev +}; + +static struct dev_ops mlxcx_dev_ops = { + .devo_rev = DEVO_REV, + .devo_refcnt = 0, + .devo_getinfo = NULL, + .devo_identify = nulldev, + .devo_probe = nulldev, + .devo_attach = mlxcx_attach, + .devo_detach = mlxcx_detach, + .devo_reset = nodev, + .devo_power = ddi_power, + .devo_quiesce = ddi_quiesce_not_supported, + .devo_cb_ops = &mlxcx_cb_ops +}; + +static struct modldrv mlxcx_modldrv = { + .drv_modops = &mod_driverops, + .drv_linkinfo = "Mellanox Connect-X 4/5/6", + .drv_dev_ops = &mlxcx_dev_ops +}; + +static struct modlinkage mlxcx_modlinkage = { + .ml_rev = MODREV_1, + .ml_linkage = { &mlxcx_modldrv, NULL } +}; + +int +_init(void) +{ + int ret; + + ret = ddi_soft_state_init(&mlxcx_softstate, sizeof (mlxcx_t), 0); + if (ret != 0) { + return (ret); + } + + mac_init_ops(&mlxcx_dev_ops, MLXCX_MODULE_NAME); + + if ((ret = mod_install(&mlxcx_modlinkage)) != DDI_SUCCESS) { + mac_fini_ops(&mlxcx_dev_ops); + ddi_soft_state_fini(&mlxcx_softstate); + return (ret); + } + + return (DDI_SUCCESS); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&mlxcx_modlinkage, modinfop)); +} + +int +_fini(void) +{ + int ret; + + if ((ret = mod_remove(&mlxcx_modlinkage)) != DDI_SUCCESS) { + return (ret); + } + + mac_fini_ops(&mlxcx_dev_ops); + + ddi_soft_state_fini(&mlxcx_softstate); + + return (DDI_SUCCESS); +} diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.conf b/usr/src/uts/common/io/mlxcx/mlxcx.conf new file mode 100644 index 0000000000..3569c4e5f5 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx.conf @@ -0,0 +1,101 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018, Joyent, Inc. +# Copyright 2020, The University of Queensland +# + +# +# Driver.conf file for Mellanox Connect-X 4/5/6. +# See mlxcx(7D) for valid options. +# + +# +# Sizing of event and completion queues. +# +# The number of entries on each queue will be (1 << *_size_shift) -- so +# a value of 9 would mean 512 entries. +# +#eq_size_shift = 9; +#cq_size_shift = 10; + +# +# Sizing of send and receive queues. +# +# Note that this determines the size of the RX and TX rings that mlxcx will +# advertise to MAC. It also determines how many packet buffers we will allocate +# when starting the interface. +# +#sq_size_shift = 11; +#rq_size_shift = 10; + +# +# Number and configuration of TX groups and rings. +# +#tx_ngroups = 1; +#tx_nrings_per_group = 64; + +# +# Number and configuration of RX groups and rings. +# +#rx_ngroups_large = 2; +#rx_nrings_per_large_group = 16; +#rx_ngroups_small = 256; +#rx_nrings_per_small_group = 4; + +# +# Number of flow table entries allocated to root flow tables. +# +# This places an upper ceiling on how many MAC addresses can be filtered into +# groups across the whole NIC. If you have a lot of VNICs you might want to +# raise this (and probably also rx_ngroups_small). +# +#ftbl_root_size_shift = 12; + +# +# Number of flow table entries allocated to each L1 VLAN filtering table. +# +# This places a limit on the number of VLANs that one MAC address can be +# associated with before falling back to software classification. Two entries +# are always reserved for the non-VLAN catch-all and promisc entries. +# +# Note: illumos MAC today only supports giving a single VLAN per MAC address +# to hardware drivers anyway, so setting this higher is pointless until that +# changes. +# +#ftbl_vlan_size_shift = 4; + +# +# Interrupt and completion moderation. +# +#cqemod_period_usec = 50; +#cqemod_count = <80% of cq_size>; +#intrmod_period_usec = 10; + +# +# Minimum packet size before we use a ddi_dma_bind_addr() rather than bcopy() +# of the packet data. DMA binds are expensive and involve taking locks in the +# PCI nexus driver, so it's seldom worth doing them for small packets. +# +#tx_bind_threshold = 2048; + +# +# Interval between periodic double-checks of queue status against hardware +# state. This is used to detect hardware stalls or errors, as well as guard +# against driver bugs. +# +# If set to too high a frequency, checks may impact NIC performance. Can be +# set to zero to disable periodic checking entirely. +# +#eq_check_interval_sec = 30; +#cq_check_interval_sec = 300; +#wq_check_interval_sec = 300; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h new file mode 100644 index 0000000000..3b58989961 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx.h @@ -0,0 +1,1298 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * Mellanox Connect-X 4/5/6 driver. + * + * More details in mlxcx.c + */ + +#ifndef _MLXCX_H +#define _MLXCX_H + +/* + * mlxcx(7D) defintions + */ + +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/ddifm.h> +#include <sys/id_space.h> +#include <sys/list.h> +#include <sys/stddef.h> +#include <sys/stream.h> +#include <sys/strsun.h> +#include <sys/mac_provider.h> +#include <sys/mac_ether.h> +#include <sys/cpuvar.h> +#include <sys/ethernet.h> + +#include <inet/ip.h> +#include <inet/ip6.h> + +#include <sys/ddifm.h> +#include <sys/fm/protocol.h> +#include <sys/fm/util.h> +#include <sys/fm/io/ddi.h> + +#include <mlxcx_reg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Get access to the first PCI BAR. + */ +#define MLXCX_REG_NUMBER 1 + +/* + * The command queue is supposed to be a page, which is 4k. + */ +#define MLXCX_CMD_DMA_PAGE_SIZE 4096 + +/* + * Queues can allocate in units of this much memory. + */ +#define MLXCX_QUEUE_DMA_PAGE_SIZE 4096 + +/* + * We advertise two sizes of groups to MAC -- a certain number of "large" + * groups (including the default group, which is sized to at least ncpus) + * followed by a certain number of "small" groups. + * + * This allows us to have a larger amount of classification resources available + * for zones/VMs without resorting to software classification. + */ +#define MLXCX_RX_NGROUPS_LARGE_DFLT 2 +#define MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT 16 +#define MLXCX_RX_NGROUPS_SMALL_DFLT 256 +#define MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT 4 + +#define MLXCX_TX_NGROUPS_DFLT 1 +#define MLXCX_TX_NRINGS_PER_GROUP_DFLT 64 + +/* + * Queues will be sized to (1 << *Q_SIZE_SHIFT) entries long. + */ +#define MLXCX_EQ_SIZE_SHIFT_DFLT 9 +#define MLXCX_CQ_SIZE_SHIFT_DFLT 10 + +/* + * Default to making SQs bigger than RQs for 9k MTU, since most packets will + * spill over into more than one slot. RQ WQEs are always 1 slot. + */ +#define MLXCX_SQ_SIZE_SHIFT_DFLT 11 +#define MLXCX_RQ_SIZE_SHIFT_DFLT 10 + +#define MLXCX_CQ_HWM_GAP 16 +#define MLXCX_CQ_LWM_GAP 24 + +#define MLXCX_RQ_REFILL_STEP 64 + +/* + * CQ event moderation + */ +#define MLXCX_CQEMOD_PERIOD_USEC_DFLT 50 +#define MLXCX_CQEMOD_COUNT_DFLT \ + (8 * ((1 << MLXCX_CQ_SIZE_SHIFT_DFLT) / 10)) + +/* + * EQ interrupt moderation + */ +#define MLXCX_INTRMOD_PERIOD_USEC_DFLT 10 + +/* Size of root flow tables */ +#define MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT 12 + +/* Size of 2nd level flow tables for VLAN filtering */ +#define MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT 4 + +/* + * How big does an mblk have to be before we dma_bind() it instead of + * bcopying? + */ +#define MLXCX_TX_BIND_THRESHOLD_DFLT 2048 + +/* + * How often to check the status of completion queues for overflow and + * other problems. + */ +#define MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT 300 +#define MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT 300 +#define MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT 30 + +#define MLXCX_DOORBELL_TRIES_DFLT 3 +extern uint_t mlxcx_doorbell_tries; + +#define MLXCX_STUCK_INTR_COUNT_DFLT 128 +extern uint_t mlxcx_stuck_intr_count; + +#define MLXCX_BUF_BIND_MAX_ATTEMTPS 50 + +#define MLXCX_MTU_OFFSET \ + (sizeof (struct ether_vlan_header) + ETHERFCSL) + +/* + * This is the current version of the command structure that the driver expects + * to be found in the ISS. + */ +#define MLXCX_CMD_REVISION 5 + +#ifdef DEBUG +#define MLXCX_DMA_SYNC(dma, flag) VERIFY0(ddi_dma_sync( \ + (dma).mxdb_dma_handle, 0, 0, \ + (flag))) +#else +#define MLXCX_DMA_SYNC(dma, flag) (void) ddi_dma_sync( \ + (dma).mxdb_dma_handle, 0, 0, \ + (flag)) +#endif + +#define MLXCX_FM_SERVICE_MLXCX "mlxcx" + +/* + * This macro defines the expected value of the 'Interface Step Sequence ID' + * (issi) which represents the version of the start up and tear down sequence. + * We must check that hardware supports this and tell it which version we're + * using as well. + */ +#define MLXCX_CURRENT_ISSI 1 + +/* + * This is the size of a page that the hardware expects from us when + * manipulating pages. + */ +#define MLXCX_HW_PAGE_SIZE 4096 + +/* + * This is a special lkey value used to terminate a list of scatter pointers. + */ +#define MLXCX_NULL_LKEY 0x100 + +/* + * Forwards + */ +struct mlxcx; +typedef struct mlxcx mlxcx_t; + +typedef enum { + MLXCX_DMABUF_HDL_ALLOC = 1 << 0, + MLXCX_DMABUF_MEM_ALLOC = 1 << 1, + MLXCX_DMABUF_BOUND = 1 << 2, + MLXCX_DMABUF_FOREIGN = 1 << 3, +} mlxcx_dma_buffer_flags_t; + +typedef struct mlxcx_dma_buffer { + mlxcx_dma_buffer_flags_t mxdb_flags; + caddr_t mxdb_va; /* Buffer VA */ + size_t mxdb_len; /* Buffer logical len */ + ddi_acc_handle_t mxdb_acc_handle; + ddi_dma_handle_t mxdb_dma_handle; + uint_t mxdb_ncookies; +} mlxcx_dma_buffer_t; + +typedef struct mlxcx_dev_page { + list_node_t mxdp_list; + avl_node_t mxdp_tree; + uintptr_t mxdp_pa; + mlxcx_dma_buffer_t mxdp_dma; +} mlxcx_dev_page_t; + +/* + * Data structure to keep track of all information related to the command queue. + */ +typedef enum { + MLXCX_CMD_QUEUE_S_IDLE = 1, + MLXCX_CMD_QUEUE_S_BUSY, + MLXCX_CMD_QUEUE_S_BROKEN +} mlxcx_cmd_queue_status_t; + +typedef struct mlxcx_cmd_queue { + kmutex_t mcmd_lock; + kcondvar_t mcmd_cv; + mlxcx_dma_buffer_t mcmd_dma; + mlxcx_cmd_ent_t *mcmd_ent; + + uint8_t mcmd_size_l2; + uint8_t mcmd_stride_l2; + + mlxcx_cmd_queue_status_t mcmd_status; + + ddi_taskq_t *mcmd_taskq; + id_space_t *mcmd_tokens; +} mlxcx_cmd_queue_t; + +typedef struct mlxcd_cmd_mbox { + list_node_t mlbox_node; + mlxcx_dma_buffer_t mlbox_dma; + mlxcx_cmd_mailbox_t *mlbox_data; +} mlxcx_cmd_mbox_t; + +typedef enum { + MLXCX_EQ_ALLOC = 1 << 0, /* dma mem alloc'd, size set */ + MLXCX_EQ_CREATED = 1 << 1, /* CREATE_EQ sent to hw */ + MLXCX_EQ_DESTROYED = 1 << 2, /* DESTROY_EQ sent to hw */ + MLXCX_EQ_ARMED = 1 << 3, /* Armed through the UAR */ + MLXCX_EQ_POLLING = 1 << 4, /* Currently being polled */ +} mlxcx_eventq_state_t; + +typedef struct mlxcx_bf { + kmutex_t mbf_mtx; + uint_t mbf_cnt; + uint_t mbf_even; + uint_t mbf_odd; +} mlxcx_bf_t; + +typedef struct mlxcx_uar { + boolean_t mlu_allocated; + uint_t mlu_num; + uint_t mlu_base; + + volatile uint_t mlu_bfcnt; + mlxcx_bf_t mlu_bf[MLXCX_BF_PER_UAR]; +} mlxcx_uar_t; + +typedef struct mlxcx_pd { + boolean_t mlpd_allocated; + uint32_t mlpd_num; +} mlxcx_pd_t; + +typedef struct mlxcx_tdom { + boolean_t mltd_allocated; + uint32_t mltd_num; +} mlxcx_tdom_t; + +typedef enum { + MLXCX_PORT_VPORT_PROMISC = 1 << 0, +} mlxcx_port_flags_t; + +typedef struct mlxcx_flow_table mlxcx_flow_table_t; +typedef struct mlxcx_flow_group mlxcx_flow_group_t; + +typedef struct { + uint64_t mlps_rx_drops; +} mlxcx_port_stats_t; + +typedef enum { + MLXCX_PORT_INIT = 1 << 0 +} mlxcx_port_init_t; + +typedef struct mlxcx_port { + kmutex_t mlp_mtx; + mlxcx_port_init_t mlp_init; + mlxcx_t *mlp_mlx; + /* + * The mlp_num we have here starts at zero (it's an index), but the + * numbering we have to use for register access starts at 1. We + * currently write mlp_num into the other_vport fields in mlxcx_cmd.c + * (where 0 is a magic number meaning "my vport") so if we ever add + * support for virtualisation features and deal with more than one + * vport, we will probably have to change this. + */ + uint_t mlp_num; + mlxcx_port_flags_t mlp_flags; + uint64_t mlp_guid; + uint8_t mlp_mac_address[ETHERADDRL]; + + uint_t mlp_mtu; + uint_t mlp_max_mtu; + + mlxcx_port_status_t mlp_admin_status; + mlxcx_port_status_t mlp_oper_status; + + boolean_t mlp_autoneg; + mlxcx_eth_proto_t mlp_max_proto; + mlxcx_eth_proto_t mlp_admin_proto; + mlxcx_eth_proto_t mlp_oper_proto; + + mlxcx_eth_inline_mode_t mlp_wqe_min_inline; + + /* Root flow tables */ + mlxcx_flow_table_t *mlp_rx_flow; + mlxcx_flow_table_t *mlp_tx_flow; + + mlxcx_flow_group_t *mlp_promisc; + mlxcx_flow_group_t *mlp_bcast; + mlxcx_flow_group_t *mlp_umcast; + + avl_tree_t mlp_dmac_fe; + + mlxcx_port_stats_t mlp_stats; + + mlxcx_module_status_t mlp_last_modstate; + mlxcx_module_error_type_t mlp_last_moderr; +} mlxcx_port_t; + +typedef enum { + MLXCX_EQ_TYPE_ANY, + MLXCX_EQ_TYPE_RX, + MLXCX_EQ_TYPE_TX +} mlxcx_eventq_type_t; + +typedef struct mlxcx_event_queue { + kmutex_t mleq_mtx; + mlxcx_t *mleq_mlx; + mlxcx_eventq_state_t mleq_state; + mlxcx_eventq_type_t mleq_type; + + mlxcx_dma_buffer_t mleq_dma; + + size_t mleq_entshift; + size_t mleq_nents; + mlxcx_eventq_ent_t *mleq_ent; + uint32_t mleq_cc; /* consumer counter */ + uint32_t mleq_cc_armed; + + uint32_t mleq_events; + + uint32_t mleq_badintrs; + + /* Hardware eq number */ + uint_t mleq_num; + /* Index into the mlxcx_t's interrupts array */ + uint_t mleq_intr_index; + + /* UAR region that has this EQ's doorbell in it */ + mlxcx_uar_t *mleq_uar; + + /* Tree of CQn => mlxcx_completion_queue_t */ + avl_tree_t mleq_cqs; + + uint32_t mleq_check_disarm_cc; + uint_t mleq_check_disarm_cnt; +} mlxcx_event_queue_t; + +typedef enum { + MLXCX_TIS_CREATED = 1 << 0, + MLXCX_TIS_DESTROYED = 1 << 1, +} mlxcx_tis_state_t; + +typedef struct mlxcx_tis { + mlxcx_tis_state_t mltis_state; + list_node_t mltis_entry; + uint_t mltis_num; + mlxcx_tdom_t *mltis_tdom; +} mlxcx_tis_t; + +typedef enum { + MLXCX_BUFFER_INIT, + MLXCX_BUFFER_FREE, + MLXCX_BUFFER_ON_WQ, + MLXCX_BUFFER_ON_LOAN, + MLXCX_BUFFER_ON_CHAIN, +} mlxcx_buffer_state_t; + +typedef struct mlxcx_buf_shard { + list_node_t mlbs_entry; + kmutex_t mlbs_mtx; + list_t mlbs_busy; + list_t mlbs_free; + kcondvar_t mlbs_free_nonempty; +} mlxcx_buf_shard_t; + +typedef struct mlxcx_buffer { + mlxcx_buf_shard_t *mlb_shard; + list_node_t mlb_entry; + list_node_t mlb_cq_entry; + + struct mlxcx_buffer *mlb_tx_head; /* head of tx chain */ + list_t mlb_tx_chain; + list_node_t mlb_tx_chain_entry; + + boolean_t mlb_foreign; + size_t mlb_used; + mblk_t *mlb_tx_mp; + + mlxcx_t *mlb_mlx; + mlxcx_buffer_state_t mlb_state; + uint_t mlb_wqe_index; + mlxcx_dma_buffer_t mlb_dma; + mblk_t *mlb_mp; + frtn_t mlb_frtn; +} mlxcx_buffer_t; + +typedef enum { + MLXCX_CQ_ALLOC = 1 << 0, + MLXCX_CQ_CREATED = 1 << 1, + MLXCX_CQ_DESTROYED = 1 << 2, + MLXCX_CQ_EQAVL = 1 << 3, + MLXCX_CQ_BLOCKED_MAC = 1 << 4, + MLXCX_CQ_TEARDOWN = 1 << 5, + MLXCX_CQ_POLLING = 1 << 6, + MLXCX_CQ_ARMED = 1 << 7, +} mlxcx_completionq_state_t; + +typedef struct mlxcx_work_queue mlxcx_work_queue_t; + +typedef struct mlxcx_completion_queue { + kmutex_t mlcq_mtx; + mlxcx_t *mlcq_mlx; + mlxcx_completionq_state_t mlcq_state; + + mlxcx_port_stats_t *mlcq_stats; + + list_node_t mlcq_entry; + avl_node_t mlcq_eq_entry; + + uint_t mlcq_num; + + mlxcx_work_queue_t *mlcq_wq; + mlxcx_event_queue_t *mlcq_eq; + + /* UAR region that has this CQ's UAR doorbell in it */ + mlxcx_uar_t *mlcq_uar; + + mlxcx_dma_buffer_t mlcq_dma; + + size_t mlcq_entshift; + size_t mlcq_nents; + mlxcx_completionq_ent_t *mlcq_ent; + uint32_t mlcq_cc; /* consumer counter */ + uint32_t mlcq_cc_armed; /* cc at last arm */ + uint32_t mlcq_ec; /* event counter */ + uint32_t mlcq_ec_armed; /* ec at last arm */ + + mlxcx_dma_buffer_t mlcq_doorbell_dma; + mlxcx_completionq_doorbell_t *mlcq_doorbell; + + uint64_t mlcq_bufcnt; + size_t mlcq_bufhwm; + size_t mlcq_buflwm; + list_t mlcq_buffers; + kmutex_t mlcq_bufbmtx; + list_t mlcq_buffers_b; + + uint_t mlcq_check_disarm_cnt; + uint64_t mlcq_check_disarm_cc; + + uint_t mlcq_cqemod_period_usec; + uint_t mlcq_cqemod_count; + + mac_ring_handle_t mlcq_mac_hdl; + uint64_t mlcq_mac_gen; + + boolean_t mlcq_fm_repd_qstate; +} mlxcx_completion_queue_t; + +typedef enum { + MLXCX_WQ_ALLOC = 1 << 0, + MLXCX_WQ_CREATED = 1 << 1, + MLXCX_WQ_STARTED = 1 << 2, + MLXCX_WQ_DESTROYED = 1 << 3, + MLXCX_WQ_TEARDOWN = 1 << 4, + MLXCX_WQ_BUFFERS = 1 << 5, +} mlxcx_workq_state_t; + +typedef enum { + MLXCX_WQ_TYPE_SENDQ = 1, + MLXCX_WQ_TYPE_RECVQ +} mlxcx_workq_type_t; + +typedef struct mlxcx_ring_group mlxcx_ring_group_t; + +struct mlxcx_work_queue { + kmutex_t mlwq_mtx; + mlxcx_t *mlwq_mlx; + mlxcx_workq_type_t mlwq_type; + mlxcx_workq_state_t mlwq_state; + + list_node_t mlwq_entry; + list_node_t mlwq_group_entry; + + mlxcx_ring_group_t *mlwq_group; + + uint_t mlwq_num; + + mlxcx_completion_queue_t *mlwq_cq; + mlxcx_pd_t *mlwq_pd; + + /* Required for send queues */ + mlxcx_tis_t *mlwq_tis; + + /* UAR region that has this WQ's blueflame buffers in it */ + mlxcx_uar_t *mlwq_uar; + + mlxcx_dma_buffer_t mlwq_dma; + + mlxcx_eth_inline_mode_t mlwq_inline_mode; + size_t mlwq_entshift; + size_t mlwq_nents; + /* Discriminate based on mwq_type */ + union { + mlxcx_sendq_ent_t *mlwq_send_ent; + mlxcx_sendq_extra_ent_t *mlwq_send_extra_ent; + mlxcx_recvq_ent_t *mlwq_recv_ent; + mlxcx_sendq_bf_t *mlwq_bf_ent; + }; + uint64_t mlwq_pc; /* producer counter */ + + mlxcx_dma_buffer_t mlwq_doorbell_dma; + mlxcx_workq_doorbell_t *mlwq_doorbell; + + mlxcx_buf_shard_t *mlwq_bufs; + mlxcx_buf_shard_t *mlwq_foreign_bufs; + + boolean_t mlwq_fm_repd_qstate; +}; + +#define MLXCX_RQT_MAX_SIZE 64 + +typedef enum { + MLXCX_RQT_CREATED = 1 << 0, + MLXCX_RQT_DESTROYED = 1 << 1, + MLXCX_RQT_DIRTY = 1 << 2, +} mlxcx_rqtable_state_t; + +typedef struct mlxcx_rqtable { + mlxcx_rqtable_state_t mlrqt_state; + list_node_t mlrqt_entry; + uint_t mlrqt_num; + + size_t mlrqt_max; + size_t mlrqt_used; + + size_t mlrqt_rq_size; + mlxcx_work_queue_t **mlrqt_rq; +} mlxcx_rqtable_t; + +typedef enum { + MLXCX_TIR_CREATED = 1 << 0, + MLXCX_TIR_DESTROYED = 1 << 1, +} mlxcx_tir_state_t; + +typedef struct mlxcx_tir { + mlxcx_tir_state_t mltir_state; + list_node_t mltir_entry; + uint_t mltir_num; + mlxcx_tdom_t *mltir_tdom; + mlxcx_tir_type_t mltir_type; + union { + mlxcx_rqtable_t *mltir_rqtable; + mlxcx_work_queue_t *mltir_rq; + }; + mlxcx_tir_hash_fn_t mltir_hash_fn; + uint8_t mltir_toeplitz_key[40]; + mlxcx_tir_rx_hash_l3_type_t mltir_l3_type; + mlxcx_tir_rx_hash_l4_type_t mltir_l4_type; + mlxcx_tir_rx_hash_fields_t mltir_hash_fields; +} mlxcx_tir_t; + +typedef enum { + MLXCX_FLOW_GROUP_CREATED = 1 << 0, + MLXCX_FLOW_GROUP_BUSY = 1 << 1, + MLXCX_FLOW_GROUP_DESTROYED = 1 << 2, +} mlxcx_flow_group_state_t; + +typedef enum { + MLXCX_FLOW_MATCH_SMAC = 1 << 0, + MLXCX_FLOW_MATCH_DMAC = 1 << 1, + MLXCX_FLOW_MATCH_VLAN = 1 << 2, + MLXCX_FLOW_MATCH_VID = 1 << 3, + MLXCX_FLOW_MATCH_IP_VER = 1 << 4, + MLXCX_FLOW_MATCH_SRCIP = 1 << 5, + MLXCX_FLOW_MATCH_DSTIP = 1 << 6, + MLXCX_FLOW_MATCH_IP_PROTO = 1 << 7, + MLXCX_FLOW_MATCH_SQN = 1 << 8, + MLXCX_FLOW_MATCH_VXLAN = 1 << 9, +} mlxcx_flow_mask_t; + +struct mlxcx_flow_group { + list_node_t mlfg_entry; + list_node_t mlfg_role_entry; + mlxcx_flow_group_state_t mlfg_state; + mlxcx_flow_table_t *mlfg_table; + uint_t mlfg_num; + size_t mlfg_start_idx; + size_t mlfg_size; + size_t mlfg_avail; + list_t mlfg_entries; + mlxcx_flow_mask_t mlfg_mask; +}; + +typedef enum { + MLXCX_FLOW_ENTRY_RESERVED = 1 << 0, + MLXCX_FLOW_ENTRY_CREATED = 1 << 1, + MLXCX_FLOW_ENTRY_DELETED = 1 << 2, + MLXCX_FLOW_ENTRY_DIRTY = 1 << 3, +} mlxcx_flow_entry_state_t; + +typedef struct { + mlxcx_tir_t *mlfed_tir; + mlxcx_flow_table_t *mlfed_flow; +} mlxcx_flow_entry_dest_t; + +typedef struct mlxcx_flow_entry { + list_node_t mlfe_group_entry; + avl_node_t mlfe_dmac_entry; + mlxcx_flow_entry_state_t mlfe_state; + mlxcx_flow_table_t *mlfe_table; + mlxcx_flow_group_t *mlfe_group; + uint_t mlfe_index; + + mlxcx_flow_action_t mlfe_action; + + /* Criteria for match */ + uint8_t mlfe_smac[ETHERADDRL]; + uint8_t mlfe_dmac[ETHERADDRL]; + + mlxcx_vlan_type_t mlfe_vlan_type; + uint16_t mlfe_vid; + + uint_t mlfe_ip_version; + uint8_t mlfe_srcip[IPV6_ADDR_LEN]; + uint8_t mlfe_dstip[IPV6_ADDR_LEN]; + + uint_t mlfe_ip_proto; + uint16_t mlfe_sport; + uint16_t mlfe_dport; + + uint32_t mlfe_sqn; + uint32_t mlfe_vxlan_vni; + + /* Destinations */ + size_t mlfe_ndest; + mlxcx_flow_entry_dest_t mlfe_dest[MLXCX_FLOW_MAX_DESTINATIONS]; + + /* + * mlxcx_group_mac_ts joining this entry to N ring groups + * only used by FEs on the root rx flow table + */ + list_t mlfe_ring_groups; +} mlxcx_flow_entry_t; + +typedef enum { + MLXCX_FLOW_TABLE_CREATED = 1 << 0, + MLXCX_FLOW_TABLE_DESTROYED = 1 << 1, + MLXCX_FLOW_TABLE_ROOT = 1 << 2 +} mlxcx_flow_table_state_t; + +struct mlxcx_flow_table { + kmutex_t mlft_mtx; + mlxcx_flow_table_state_t mlft_state; + uint_t mlft_level; + uint_t mlft_num; + mlxcx_flow_table_type_t mlft_type; + + mlxcx_port_t *mlft_port; + + size_t mlft_entshift; + size_t mlft_nents; + + size_t mlft_entsize; + mlxcx_flow_entry_t *mlft_ent; + + /* First entry not yet claimed by a group */ + size_t mlft_next_ent; + + list_t mlft_groups; +}; + +typedef enum { + MLXCX_GROUP_RX, + MLXCX_GROUP_TX +} mlxcx_group_type_t; + +typedef enum { + MLXCX_GROUP_INIT = 1 << 0, + MLXCX_GROUP_WQS = 1 << 1, + MLXCX_GROUP_TIRTIS = 1 << 2, + MLXCX_GROUP_FLOWS = 1 << 3, + MLXCX_GROUP_RUNNING = 1 << 4, + MLXCX_GROUP_RQT = 1 << 5, +} mlxcx_group_state_t; + +#define MLXCX_RX_HASH_FT_SIZE_SHIFT 4 + +typedef enum { + MLXCX_TIR_ROLE_IPv4 = 0, + MLXCX_TIR_ROLE_IPv6, + MLXCX_TIR_ROLE_TCPv4, + MLXCX_TIR_ROLE_TCPv6, + MLXCX_TIR_ROLE_UDPv4, + MLXCX_TIR_ROLE_UDPv6, + MLXCX_TIR_ROLE_OTHER, + + MLXCX_TIRS_PER_GROUP +} mlxcx_tir_role_t; + +typedef struct { + avl_node_t mlgm_group_entry; + list_node_t mlgm_fe_entry; + mlxcx_ring_group_t *mlgm_group; + uint8_t mlgm_mac[6]; + mlxcx_flow_entry_t *mlgm_fe; +} mlxcx_group_mac_t; + +typedef struct { + list_node_t mlgv_entry; + boolean_t mlgv_tagged; + uint16_t mlgv_vid; + mlxcx_flow_entry_t *mlgv_fe; +} mlxcx_group_vlan_t; + +struct mlxcx_ring_group { + kmutex_t mlg_mtx; + mlxcx_t *mlg_mlx; + mlxcx_group_state_t mlg_state; + mlxcx_group_type_t mlg_type; + + mac_group_handle_t mlg_mac_hdl; + + union { + mlxcx_tis_t mlg_tis; + mlxcx_tir_t mlg_tir[MLXCX_TIRS_PER_GROUP]; + }; + mlxcx_port_t *mlg_port; + + size_t mlg_nwqs; + size_t mlg_wqs_size; + mlxcx_work_queue_t *mlg_wqs; + + mlxcx_rqtable_t *mlg_rqt; + + /* + * Flow table for matching VLAN IDs + */ + mlxcx_flow_table_t *mlg_rx_vlan_ft; + mlxcx_flow_group_t *mlg_rx_vlan_fg; + mlxcx_flow_group_t *mlg_rx_vlan_def_fg; + mlxcx_flow_group_t *mlg_rx_vlan_promisc_fg; + list_t mlg_rx_vlans; + + /* + * Flow table for separating out by protocol before hashing + */ + mlxcx_flow_table_t *mlg_rx_hash_ft; + + /* + * Links to flow entries on the root flow table which are pointing to + * our rx_vlan_ft. + */ + avl_tree_t mlg_rx_macs; +}; + +typedef enum mlxcx_cmd_state { + MLXCX_CMD_S_DONE = 1 << 0, + MLXCX_CMD_S_ERROR = 1 << 1 +} mlxcx_cmd_state_t; + +typedef struct mlxcx_cmd { + struct mlxcx *mlcmd_mlxp; + kmutex_t mlcmd_lock; + kcondvar_t mlcmd_cv; + + uint8_t mlcmd_token; + mlxcx_cmd_op_t mlcmd_op; + + /* + * Command data and extended mailboxes for responses. + */ + const void *mlcmd_in; + uint32_t mlcmd_inlen; + void *mlcmd_out; + uint32_t mlcmd_outlen; + list_t mlcmd_mbox_in; + uint8_t mlcmd_nboxes_in; + list_t mlcmd_mbox_out; + uint8_t mlcmd_nboxes_out; + /* + * Status information. + */ + mlxcx_cmd_state_t mlcmd_state; + uint8_t mlcmd_status; +} mlxcx_cmd_t; + +/* + * Our view of capabilities. + */ +typedef struct mlxcx_hca_cap { + mlxcx_hca_cap_mode_t mhc_mode; + mlxcx_hca_cap_type_t mhc_type; + union { + uint8_t mhc_bulk[MLXCX_HCA_CAP_SIZE]; + mlxcx_hca_cap_general_caps_t mhc_general; + mlxcx_hca_cap_eth_caps_t mhc_eth; + mlxcx_hca_cap_flow_caps_t mhc_flow; + }; +} mlxcx_hca_cap_t; + +typedef struct { + /* Cooked values */ + boolean_t mlc_checksum; + boolean_t mlc_lso; + boolean_t mlc_vxlan; + size_t mlc_max_lso_size; + size_t mlc_max_rqt_size; + + size_t mlc_max_rx_ft_shift; + size_t mlc_max_rx_fe_dest; + size_t mlc_max_rx_flows; + + size_t mlc_max_tir; + + /* Raw caps data */ + mlxcx_hca_cap_t mlc_hca_cur; + mlxcx_hca_cap_t mlc_hca_max; + mlxcx_hca_cap_t mlc_ether_cur; + mlxcx_hca_cap_t mlc_ether_max; + mlxcx_hca_cap_t mlc_nic_flow_cur; + mlxcx_hca_cap_t mlc_nic_flow_max; +} mlxcx_caps_t; + +typedef struct { + uint_t mldp_eq_size_shift; + uint_t mldp_cq_size_shift; + uint_t mldp_rq_size_shift; + uint_t mldp_sq_size_shift; + uint_t mldp_cqemod_period_usec; + uint_t mldp_cqemod_count; + uint_t mldp_intrmod_period_usec; + uint_t mldp_rx_ngroups_large; + uint_t mldp_rx_ngroups_small; + uint_t mldp_rx_nrings_per_large_group; + uint_t mldp_rx_nrings_per_small_group; + uint_t mldp_tx_ngroups; + uint_t mldp_tx_nrings_per_group; + uint_t mldp_ftbl_root_size_shift; + size_t mldp_tx_bind_threshold; + uint_t mldp_ftbl_vlan_size_shift; + uint64_t mldp_eq_check_interval_sec; + uint64_t mldp_cq_check_interval_sec; + uint64_t mldp_wq_check_interval_sec; +} mlxcx_drv_props_t; + +typedef enum { + MLXCX_ATTACH_FM = 1 << 0, + MLXCX_ATTACH_PCI_CONFIG = 1 << 1, + MLXCX_ATTACH_REGS = 1 << 2, + MLXCX_ATTACH_CMD = 1 << 3, + MLXCX_ATTACH_ENABLE_HCA = 1 << 4, + MLXCX_ATTACH_PAGE_LIST = 1 << 5, + MLXCX_ATTACH_INIT_HCA = 1 << 6, + MLXCX_ATTACH_UAR_PD_TD = 1 << 7, + MLXCX_ATTACH_INTRS = 1 << 8, + MLXCX_ATTACH_PORTS = 1 << 9, + MLXCX_ATTACH_MAC_HDL = 1 << 10, + MLXCX_ATTACH_CQS = 1 << 11, + MLXCX_ATTACH_WQS = 1 << 12, + MLXCX_ATTACH_GROUPS = 1 << 13, + MLXCX_ATTACH_BUFS = 1 << 14, + MLXCX_ATTACH_CAPS = 1 << 15, + MLXCX_ATTACH_CHKTIMERS = 1 << 16, +} mlxcx_attach_progress_t; + +struct mlxcx { + /* entry on the mlxcx_glist */ + list_node_t mlx_gentry; + + dev_info_t *mlx_dip; + int mlx_inst; + mlxcx_attach_progress_t mlx_attach; + + mlxcx_drv_props_t mlx_props; + + /* + * Misc. data + */ + uint16_t mlx_fw_maj; + uint16_t mlx_fw_min; + uint16_t mlx_fw_rev; + uint16_t mlx_cmd_rev; + + /* + * Various capabilities of hardware. + */ + mlxcx_caps_t *mlx_caps; + + uint_t mlx_max_sdu; + uint_t mlx_sdu; + + /* + * FM State + */ + int mlx_fm_caps; + + /* + * PCI Data + */ + ddi_acc_handle_t mlx_cfg_handle; + ddi_acc_handle_t mlx_regs_handle; + caddr_t mlx_regs_base; + + /* + * MAC handle + */ + mac_handle_t mlx_mac_hdl; + + /* + * Main command queue for issuing general FW control commands. + */ + mlxcx_cmd_queue_t mlx_cmd; + + /* + * Interrupts + */ + uint_t mlx_intr_pri; + uint_t mlx_intr_type; /* always MSI-X */ + int mlx_intr_count; + size_t mlx_intr_size; /* allocation size */ + ddi_intr_handle_t *mlx_intr_handles; + + /* + * Basic firmware resources which we use for a variety of things. + * The UAR is a reference to a page where CQ and EQ doorbells are + * located. It also holds all the BlueFlame stuff (which we don't + * use). + */ + mlxcx_uar_t mlx_uar; + /* + * The PD (Protection Domain) and TDOM (Transport Domain) are opaque + * entities to us (they're Infiniband constructs we don't actually care + * about) -- we just allocate them and shove their ID numbers in + * whenever we're asked for one. + * + * The "reserved" LKEY is what we should put in queue entries that + * have references to memory to indicate that they're using linear + * addresses (comes from the QUERY_SPECIAL_CONTEXTS cmd). + */ + mlxcx_pd_t mlx_pd; + mlxcx_tdom_t mlx_tdom; + uint_t mlx_rsvd_lkey; + + /* + * Our event queues. These are 1:1 with interrupts. + */ + size_t mlx_eqs_size; /* allocation size */ + mlxcx_event_queue_t *mlx_eqs; + + /* + * Page list. These represent the set of 4k pages we've given to + * hardware. + * + * We can add to this list at the request of hardware from interrupt + * context (the PAGE_REQUEST event), so it's protected by pagemtx. + */ + kmutex_t mlx_pagemtx; + uint_t mlx_npages; + avl_tree_t mlx_pages; + + /* + * Port state + */ + uint_t mlx_nports; + size_t mlx_ports_size; + mlxcx_port_t *mlx_ports; + + /* + * Completion queues (CQs). These are also indexed off the + * event_queue_ts that they each report to. + */ + list_t mlx_cqs; + + uint_t mlx_next_eq; + + /* + * Work queues (WQs). + */ + list_t mlx_wqs; + + /* + * Ring groups + */ + size_t mlx_rx_ngroups; + size_t mlx_rx_groups_size; + mlxcx_ring_group_t *mlx_rx_groups; + + size_t mlx_tx_ngroups; + size_t mlx_tx_groups_size; + mlxcx_ring_group_t *mlx_tx_groups; + + kmem_cache_t *mlx_bufs_cache; + list_t mlx_buf_shards; + + ddi_periodic_t mlx_eq_checktimer; + ddi_periodic_t mlx_cq_checktimer; + ddi_periodic_t mlx_wq_checktimer; +}; + +/* + * Register access + */ +extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t); +extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t); +extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t); + +extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t); +extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t); + +extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t); +extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t); + +/* + * Logging functions. + */ +extern void mlxcx_warn(mlxcx_t *, const char *, ...); +extern void mlxcx_note(mlxcx_t *, const char *, ...); +extern void mlxcx_panic(mlxcx_t *, const char *, ...); + +extern void mlxcx_fm_ereport(mlxcx_t *, const char *); + +extern void mlxcx_check_sq(mlxcx_t *, mlxcx_work_queue_t *); +extern void mlxcx_check_rq(mlxcx_t *, mlxcx_work_queue_t *); + +/* + * DMA Functions + */ +extern void mlxcx_dma_free(mlxcx_dma_buffer_t *); +extern boolean_t mlxcx_dma_alloc(mlxcx_t *, mlxcx_dma_buffer_t *, + ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, size_t, boolean_t); +extern boolean_t mlxcx_dma_init(mlxcx_t *, mlxcx_dma_buffer_t *, + ddi_dma_attr_t *, boolean_t); +extern boolean_t mlxcx_dma_bind_mblk(mlxcx_t *, mlxcx_dma_buffer_t *, + const mblk_t *, size_t, boolean_t); +extern boolean_t mlxcx_dma_alloc_offset(mlxcx_t *, mlxcx_dma_buffer_t *, + ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, + size_t, size_t, boolean_t); +extern void mlxcx_dma_unbind(mlxcx_t *, mlxcx_dma_buffer_t *); +extern void mlxcx_dma_acc_attr(mlxcx_t *, ddi_device_acc_attr_t *); +extern void mlxcx_dma_page_attr(mlxcx_t *, ddi_dma_attr_t *); +extern void mlxcx_dma_queue_attr(mlxcx_t *, ddi_dma_attr_t *); +extern void mlxcx_dma_qdbell_attr(mlxcx_t *, ddi_dma_attr_t *); +extern void mlxcx_dma_buf_attr(mlxcx_t *, ddi_dma_attr_t *); + +extern boolean_t mlxcx_give_pages(mlxcx_t *, int32_t); + +static inline const ddi_dma_cookie_t * +mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t *db, + const ddi_dma_cookie_t *prev) +{ + ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND); + return (ddi_dma_cookie_iter(db->mxdb_dma_handle, prev)); +} + +static inline const ddi_dma_cookie_t * +mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t *db) +{ + ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND); + return (ddi_dma_cookie_one(db->mxdb_dma_handle)); +} + +/* + * From mlxcx_intr.c + */ +extern boolean_t mlxcx_intr_setup(mlxcx_t *); +extern void mlxcx_intr_teardown(mlxcx_t *); +extern void mlxcx_arm_eq(mlxcx_t *, mlxcx_event_queue_t *); +extern void mlxcx_arm_cq(mlxcx_t *, mlxcx_completion_queue_t *); + +extern mblk_t *mlxcx_rx_poll(mlxcx_t *, mlxcx_completion_queue_t *, size_t); + +/* + * From mlxcx_gld.c + */ +extern boolean_t mlxcx_register_mac(mlxcx_t *); + +/* + * From mlxcx_ring.c + */ +extern boolean_t mlxcx_cq_alloc_dma(mlxcx_t *, mlxcx_completion_queue_t *); +extern void mlxcx_cq_rele_dma(mlxcx_t *, mlxcx_completion_queue_t *); +extern boolean_t mlxcx_wq_alloc_dma(mlxcx_t *, mlxcx_work_queue_t *); +extern void mlxcx_wq_rele_dma(mlxcx_t *, mlxcx_work_queue_t *); + +extern boolean_t mlxcx_buf_create(mlxcx_t *, mlxcx_buf_shard_t *, + mlxcx_buffer_t **); +extern boolean_t mlxcx_buf_create_foreign(mlxcx_t *, mlxcx_buf_shard_t *, + mlxcx_buffer_t **); +extern void mlxcx_buf_take(mlxcx_t *, mlxcx_work_queue_t *, mlxcx_buffer_t **); +extern size_t mlxcx_buf_take_n(mlxcx_t *, mlxcx_work_queue_t *, + mlxcx_buffer_t **, size_t); +extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *); +extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *); +extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t); +extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *); + +extern boolean_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *, + mblk_t *, size_t, mlxcx_buffer_t **); + +extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *); +extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *); + +extern boolean_t mlxcx_rx_group_start(mlxcx_t *, mlxcx_ring_group_t *); +extern boolean_t mlxcx_tx_ring_start(mlxcx_t *, mlxcx_ring_group_t *, + mlxcx_work_queue_t *); +extern boolean_t mlxcx_rx_ring_start(mlxcx_t *, mlxcx_ring_group_t *, + mlxcx_work_queue_t *); + +extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *, + mlxcx_buffer_t *); +extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *, + mlxcx_buffer_t **, size_t); +extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *, + uint8_t *, size_t, uint32_t, mlxcx_buffer_t *); +extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *); +extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *); + +extern void mlxcx_teardown_groups(mlxcx_t *); +extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *); +extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *); +extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *); +extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *); + +extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *, + mlxcx_completionq_ent_t *, mlxcx_buffer_t *); +extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *, + mlxcx_completionq_ent_t *, mlxcx_buffer_t *); + +extern mlxcx_buf_shard_t *mlxcx_mlbs_create(mlxcx_t *); + +/* + * Flow mgmt + */ +extern boolean_t mlxcx_add_umcast_entry(mlxcx_t *, mlxcx_port_t *, + mlxcx_ring_group_t *, const uint8_t *); +extern boolean_t mlxcx_remove_umcast_entry(mlxcx_t *, mlxcx_port_t *, + mlxcx_ring_group_t *, const uint8_t *); +extern void mlxcx_remove_all_umcast_entries(mlxcx_t *, mlxcx_port_t *, + mlxcx_ring_group_t *); +extern boolean_t mlxcx_setup_flow_group(mlxcx_t *, mlxcx_flow_table_t *, + mlxcx_flow_group_t *); +extern void mlxcx_teardown_flow_table(mlxcx_t *, mlxcx_flow_table_t *); + +extern void mlxcx_remove_all_vlan_entries(mlxcx_t *, mlxcx_ring_group_t *); +extern boolean_t mlxcx_remove_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *, + boolean_t, uint16_t); +extern boolean_t mlxcx_add_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *, + boolean_t, uint16_t); + +/* + * Command functions + */ +extern boolean_t mlxcx_cmd_queue_init(mlxcx_t *); +extern void mlxcx_cmd_queue_fini(mlxcx_t *); + +extern boolean_t mlxcx_cmd_enable_hca(mlxcx_t *); +extern boolean_t mlxcx_cmd_disable_hca(mlxcx_t *); + +extern boolean_t mlxcx_cmd_query_issi(mlxcx_t *, uint_t *); +extern boolean_t mlxcx_cmd_set_issi(mlxcx_t *, uint16_t); + +extern boolean_t mlxcx_cmd_query_pages(mlxcx_t *, uint_t, int32_t *); +extern boolean_t mlxcx_cmd_give_pages(mlxcx_t *, uint_t, int32_t, + mlxcx_dev_page_t **); +extern boolean_t mlxcx_cmd_return_pages(mlxcx_t *, int32_t, uint64_t *, + int32_t *); + +extern boolean_t mlxcx_cmd_query_hca_cap(mlxcx_t *, mlxcx_hca_cap_type_t, + mlxcx_hca_cap_mode_t, mlxcx_hca_cap_t *); + +extern boolean_t mlxcx_cmd_set_driver_version(mlxcx_t *, const char *); + +extern boolean_t mlxcx_cmd_init_hca(mlxcx_t *); +extern boolean_t mlxcx_cmd_teardown_hca(mlxcx_t *); + +extern boolean_t mlxcx_cmd_alloc_uar(mlxcx_t *, mlxcx_uar_t *); +extern boolean_t mlxcx_cmd_dealloc_uar(mlxcx_t *, mlxcx_uar_t *); + +extern boolean_t mlxcx_cmd_alloc_pd(mlxcx_t *, mlxcx_pd_t *); +extern boolean_t mlxcx_cmd_dealloc_pd(mlxcx_t *, mlxcx_pd_t *); + +extern boolean_t mlxcx_cmd_alloc_tdom(mlxcx_t *, mlxcx_tdom_t *); +extern boolean_t mlxcx_cmd_dealloc_tdom(mlxcx_t *, mlxcx_tdom_t *); + +extern boolean_t mlxcx_cmd_create_eq(mlxcx_t *, mlxcx_event_queue_t *); +extern boolean_t mlxcx_cmd_destroy_eq(mlxcx_t *, mlxcx_event_queue_t *); +extern boolean_t mlxcx_cmd_query_eq(mlxcx_t *, mlxcx_event_queue_t *, + mlxcx_eventq_ctx_t *); + +extern boolean_t mlxcx_cmd_create_cq(mlxcx_t *, mlxcx_completion_queue_t *); +extern boolean_t mlxcx_cmd_destroy_cq(mlxcx_t *, mlxcx_completion_queue_t *); +extern boolean_t mlxcx_cmd_query_cq(mlxcx_t *, mlxcx_completion_queue_t *, + mlxcx_completionq_ctx_t *); + +extern boolean_t mlxcx_cmd_create_rq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_start_rq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_stop_rq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_destroy_rq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_query_rq(mlxcx_t *, mlxcx_work_queue_t *, + mlxcx_rq_ctx_t *); + +extern boolean_t mlxcx_cmd_create_tir(mlxcx_t *, mlxcx_tir_t *); +extern boolean_t mlxcx_cmd_destroy_tir(mlxcx_t *, mlxcx_tir_t *); + +extern boolean_t mlxcx_cmd_create_sq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_start_sq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_stop_sq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_destroy_sq(mlxcx_t *, mlxcx_work_queue_t *); +extern boolean_t mlxcx_cmd_query_sq(mlxcx_t *, mlxcx_work_queue_t *, + mlxcx_sq_ctx_t *); + +extern boolean_t mlxcx_cmd_create_tis(mlxcx_t *, mlxcx_tis_t *); +extern boolean_t mlxcx_cmd_destroy_tis(mlxcx_t *, mlxcx_tis_t *); + +extern boolean_t mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_query_special_ctxs(mlxcx_t *); + +extern boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *, + mlxcx_modify_nic_vport_ctx_fields_t); + +extern boolean_t mlxcx_cmd_create_flow_table(mlxcx_t *, mlxcx_flow_table_t *); +extern boolean_t mlxcx_cmd_destroy_flow_table(mlxcx_t *, mlxcx_flow_table_t *); +extern boolean_t mlxcx_cmd_set_flow_table_root(mlxcx_t *, mlxcx_flow_table_t *); + +extern boolean_t mlxcx_cmd_create_flow_group(mlxcx_t *, mlxcx_flow_group_t *); +extern boolean_t mlxcx_cmd_set_flow_table_entry(mlxcx_t *, + mlxcx_flow_entry_t *); +extern boolean_t mlxcx_cmd_delete_flow_table_entry(mlxcx_t *, + mlxcx_flow_entry_t *); +extern boolean_t mlxcx_cmd_destroy_flow_group(mlxcx_t *, mlxcx_flow_group_t *); + +extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t, + mlxcx_register_id_t, mlxcx_register_data_t *); +extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *); +extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *); + +extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *); + +extern boolean_t mlxcx_cmd_create_rqt(mlxcx_t *, mlxcx_rqtable_t *); +extern boolean_t mlxcx_cmd_destroy_rqt(mlxcx_t *, mlxcx_rqtable_t *); + +extern boolean_t mlxcx_cmd_set_int_mod(mlxcx_t *, uint_t, uint_t); + +extern boolean_t mlxcx_cmd_query_module_status(mlxcx_t *, uint_t, + mlxcx_module_status_t *, mlxcx_module_error_type_t *); +extern boolean_t mlxcx_cmd_set_port_led(mlxcx_t *, mlxcx_port_t *, uint16_t); + +/* Comparator for avl_ts */ +extern int mlxcx_cq_compare(const void *, const void *); +extern int mlxcx_dmac_fe_compare(const void *, const void *); +extern int mlxcx_grmac_compare(const void *, const void *); +extern int mlxcx_page_compare(const void *, const void *); + +extern void mlxcx_update_link_state(mlxcx_t *, mlxcx_port_t *); + +extern void mlxcx_eth_proto_to_string(mlxcx_eth_proto_t, char *, size_t); +extern const char *mlxcx_port_status_string(mlxcx_port_status_t); + +extern const char *mlxcx_event_name(mlxcx_event_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _MLXCX_H */ diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c new file mode 100644 index 0000000000..30fb7ca8ef --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c @@ -0,0 +1,3542 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * Controls the management of commands that are issues to and from the HCA + * command queue. + */ + +#include <mlxcx.h> + +#include <sys/debug.h> +#include <sys/sysmacros.h> + +/* + * When we start up the command queue, it will undergo some internal + * initialization after we set the command queue address. These values allow us + * to control how much time we should wait for that to occur. + */ +clock_t mlxcx_cmd_init_delay = 1000 * 10; /* 10 ms in us */ +uint_t mlxcx_cmd_init_trys = 100; /* Wait at most 1s */ + +clock_t mlxcx_cmd_delay = 1000 * 1; /* 1 ms in us */ +uint_t mlxcx_cmd_tries = 5000; /* Wait at most 1s */ + +/* + * This macro is used to identify that we care about our own function that we're + * communicating with. We always use this function. + */ +#define MLXCX_FUNCTION_SELF (to_be16(0)) + +static const char * +mlxcx_cmd_response_string(mlxcx_cmd_ret_t ret) +{ + switch (ret) { + case MLXCX_CMD_R_OK: + return ("MLXCX_CMD_R_OK"); + case MLXCX_CMD_R_INTERNAL_ERR: + return ("MLXCX_CMD_R_INTERNAL_ERR"); + case MLXCX_CMD_R_BAD_OP: + return ("MLXCX_CMD_R_BAD_OP"); + case MLXCX_CMD_R_BAD_PARAM: + return ("MLXCX_CMD_R_BAD_PARAM"); + case MLXCX_CMD_R_BAD_SYS_STATE: + return ("MLXCX_CMD_R_BAD_SYS_STATE"); + case MLXCX_CMD_R_BAD_RESOURCE: + return ("MLXCX_CMD_R_BAD_RESOURCE"); + case MLXCX_CMD_R_RESOURCE_BUSY: + return ("MLXCX_CMD_R_RESOURCE_BUSY"); + case MLXCX_CMD_R_EXCEED_LIM: + return ("MLXCX_CMD_R_EXCEED_LIM"); + case MLXCX_CMD_R_BAD_RES_STATE: + return ("MLXCX_CMD_R_BAD_RES_STATE"); + case MLXCX_CMD_R_BAD_INDEX: + return ("MLXCX_CMD_R_BAD_INDEX"); + case MLXCX_CMD_R_NO_RESOURCES: + return ("MLXCX_CMD_R_NO_RESOURCES"); + case MLXCX_CMD_R_BAD_INPUT_LEN: + return ("MLXCX_CMD_R_BAD_INPUT_LEN"); + case MLXCX_CMD_R_BAD_OUTPUT_LEN: + return ("MLXCX_CMD_R_BAD_OUTPUT_LEN"); + case MLXCX_CMD_R_BAD_RESOURCE_STATE: + return ("MLXCX_CMD_R_BAD_RESOURCE_STATE"); + case MLXCX_CMD_R_BAD_PKT: + return ("MLXCX_CMD_R_BAD_PKT"); + case MLXCX_CMD_R_BAD_SIZE: + return ("MLXCX_CMD_R_BAD_SIZE"); + default: + return ("Unknown command"); + } +} + +static const char * +mlxcx_cmd_opcode_string(mlxcx_cmd_op_t op) +{ + switch (op) { + case MLXCX_OP_QUERY_HCA_CAP: + return ("MLXCX_OP_QUERY_HCA_CAP"); + case MLXCX_OP_QUERY_ADAPTER: + return ("MLXCX_OP_QUERY_ADAPTER"); + case MLXCX_OP_INIT_HCA: + return ("MLXCX_OP_INIT_HCA"); + case MLXCX_OP_TEARDOWN_HCA: + return ("MLXCX_OP_TEARDOWN_HCA"); + case MLXCX_OP_ENABLE_HCA: + return ("MLXCX_OP_ENABLE_HCA"); + case MLXCX_OP_DISABLE_HCA: + return ("MLXCX_OP_DISABLE_HCA"); + case MLXCX_OP_QUERY_PAGES: + return ("MLXCX_OP_QUERY_PAGES"); + case MLXCX_OP_MANAGE_PAGES: + return ("MLXCX_OP_MANAGE_PAGES"); + case MLXCX_OP_SET_HCA_CAP: + return ("MLXCX_OP_SET_HCA_CAP"); + case MLXCX_OP_QUERY_ISSI: + return ("MLXCX_OP_QUERY_ISSI"); + case MLXCX_OP_SET_ISSI: + return ("MLXCX_OP_SET_ISSI"); + case MLXCX_OP_SET_DRIVER_VERSION: + return ("MLXCX_OP_SET_DRIVER_VERSION"); + case MLXCX_OP_QUERY_OTHER_HCA_CAP: + return ("MLXCX_OP_QUERY_OTHER_HCA_CAP"); + case MLXCX_OP_MODIFY_OTHER_HCA_CAP: + return ("MLXCX_OP_MODIFY_OTHER_HCA_CAP"); + case MLXCX_OP_SET_TUNNELED_OPERATIONS: + return ("MLXCX_OP_SET_TUNNELED_OPERATIONS"); + case MLXCX_OP_CREATE_MKEY: + return ("MLXCX_OP_CREATE_MKEY"); + case MLXCX_OP_QUERY_MKEY: + return ("MLXCX_OP_QUERY_MKEY"); + case MLXCX_OP_DESTROY_MKEY: + return ("MLXCX_OP_DESTROY_MKEY"); + case MLXCX_OP_QUERY_SPECIAL_CONTEXTS: + return ("MLXCX_OP_QUERY_SPECIAL_CONTEXTS"); + case MLXCX_OP_PAGE_FAULT_RESUME: + return ("MLXCX_OP_PAGE_FAULT_RESUME"); + case MLXCX_OP_CREATE_EQ: + return ("MLXCX_OP_CREATE_EQ"); + case MLXCX_OP_DESTROY_EQ: + return ("MLXCX_OP_DESTROY_EQ"); + case MLXCX_OP_QUERY_EQ: + return ("MLXCX_OP_QUERY_EQ"); + case MLXCX_OP_GEN_EQE: + return ("MLXCX_OP_GEN_EQE"); + case MLXCX_OP_CREATE_CQ: + return ("MLXCX_OP_CREATE_CQ"); + case MLXCX_OP_DESTROY_CQ: + return ("MLXCX_OP_DESTROY_CQ"); + case MLXCX_OP_QUERY_CQ: + return ("MLXCX_OP_QUERY_CQ"); + case MLXCX_OP_MODIFY_CQ: + return ("MLXCX_OP_MODIFY_CQ"); + case MLXCX_OP_CREATE_QP: + return ("MLXCX_OP_CREATE_QP"); + case MLXCX_OP_DESTROY_QP: + return ("MLXCX_OP_DESTROY_QP"); + case MLXCX_OP_RST2INIT_QP: + return ("MLXCX_OP_RST2INIT_QP"); + case MLXCX_OP_INIT2RTR_QP: + return ("MLXCX_OP_INIT2RTR_QP"); + case MLXCX_OP_RTR2RTS_QP: + return ("MLXCX_OP_RTR2RTS_QP"); + case MLXCX_OP_RTS2RTS_QP: + return ("MLXCX_OP_RTS2RTS_QP"); + case MLXCX_OP_SQERR2RTS_QP: + return ("MLXCX_OP_SQERR2RTS_QP"); + case MLXCX_OP__2ERR_QP: + return ("MLXCX_OP__2ERR_QP"); + case MLXCX_OP__2RST_QP: + return ("MLXCX_OP__2RST_QP"); + case MLXCX_OP_QUERY_QP: + return ("MLXCX_OP_QUERY_QP"); + case MLXCX_OP_SQD_RTS_QP: + return ("MLXCX_OP_SQD_RTS_QP"); + case MLXCX_OP_INIT2INIT_QP: + return ("MLXCX_OP_INIT2INIT_QP"); + case MLXCX_OP_CREATE_PSV: + return ("MLXCX_OP_CREATE_PSV"); + case MLXCX_OP_DESTROY_PSV: + return ("MLXCX_OP_DESTROY_PSV"); + case MLXCX_OP_CREATE_SRQ: + return ("MLXCX_OP_CREATE_SRQ"); + case MLXCX_OP_DESTROY_SRQ: + return ("MLXCX_OP_DESTROY_SRQ"); + case MLXCX_OP_QUERY_SRQ: + return ("MLXCX_OP_QUERY_SRQ"); + case MLXCX_OP_ARM_RQ: + return ("MLXCX_OP_ARM_RQ"); + case MLXCX_OP_CREATE_XRC_SRQ: + return ("MLXCX_OP_CREATE_XRC_SRQ"); + case MLXCX_OP_DESTROY_XRC_SRQ: + return ("MLXCX_OP_DESTROY_XRC_SRQ"); + case MLXCX_OP_QUERY_XRC_SRQ: + return ("MLXCX_OP_QUERY_XRC_SRQ"); + case MLXCX_OP_ARM_XRC_SRQ: + return ("MLXCX_OP_ARM_XRC_SRQ"); + case MLXCX_OP_CREATE_DCT: + return ("MLXCX_OP_CREATE_DCT"); + case MLXCX_OP_DESTROY_DCT: + return ("MLXCX_OP_DESTROY_DCT"); + case MLXCX_OP_DRAIN_DCT: + return ("MLXCX_OP_DRAIN_DCT"); + case MLXCX_OP_QUERY_DCT: + return ("MLXCX_OP_QUERY_DCT"); + case MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION: + return ("MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION"); + case MLXCX_OP_CREATE_XRQ: + return ("MLXCX_OP_CREATE_XRQ"); + case MLXCX_OP_DESTROY_XRQ: + return ("MLXCX_OP_DESTROY_XRQ"); + case MLXCX_OP_QUERY_XRQ: + return ("MLXCX_OP_QUERY_XRQ"); + case MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER: + return ("MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER"); + case MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER: + return ("MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER"); + case MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER: + return ("MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER"); + case MLXCX_OP_ATTACH_NVMF_NAMESPACE: + return ("MLXCX_OP_ATTACH_NVMF_NAMESPACE"); + case MLXCX_OP_DETACH_NVMF_NAMESPACE: + return ("MLXCX_OP_DETACH_NVMF_NAMESPACE"); + case MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + return ("MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY"); + case MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY: + return ("MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY"); + case MLXCX_OP_QUERY_XRQ_ERROR_PARAMS: + return ("MLXCX_OP_QUERY_XRQ_ERROR_PARAMS"); + case MLXCX_OP_QUERY_VPORT_STATE: + return ("MLXCX_OP_QUERY_VPORT_STATE"); + case MLXCX_OP_MODIFY_VPORT_STATE: + return ("MLXCX_OP_MODIFY_VPORT_STATE"); + case MLXCX_OP_QUERY_ESW_VPORT_CONTEXT: + return ("MLXCX_OP_QUERY_ESW_VPORT_CONTEXT"); + case MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT: + return ("MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT"); + case MLXCX_OP_QUERY_NIC_VPORT_CONTEXT: + return ("MLXCX_OP_QUERY_NIC_VPORT_CONTEXT"); + case MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT: + return ("MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT"); + case MLXCX_OP_QUERY_ROCE_ADDRESS: + return ("MLXCX_OP_QUERY_ROCE_ADDRESS"); + case MLXCX_OP_SET_ROCE_ADDRESS: + return ("MLXCX_OP_SET_ROCE_ADDRESS"); + case MLXCX_OP_QUERY_HCA_VPORT_CONTEXT: + return ("MLXCX_OP_QUERY_HCA_VPORT_CONTEXT"); + case MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT: + return ("MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT"); + case MLXCX_OP_QUERY_HCA_VPORT_GID: + return ("MLXCX_OP_QUERY_HCA_VPORT_GID"); + case MLXCX_OP_QUERY_HCA_VPORT_PKEY: + return ("MLXCX_OP_QUERY_HCA_VPORT_PKEY"); + case MLXCX_OP_QUERY_VPORT_COUNTER: + return ("MLXCX_OP_QUERY_VPORT_COUNTER"); + case MLXCX_OP_ALLOC_Q_COUNTER: + return ("MLXCX_OP_ALLOC_Q_COUNTER"); + case MLXCX_OP_DEALLOC_Q_COUNTER: + return ("MLXCX_OP_DEALLOC_Q_COUNTER"); + case MLXCX_OP_QUERY_Q_COUNTER: + return ("MLXCX_OP_QUERY_Q_COUNTER"); + case MLXCX_OP_SET_PP_RATE_LIMIT: + return ("MLXCX_OP_SET_PP_RATE_LIMIT"); + case MLXCX_OP_QUERY_PP_RATE_LIMIT: + return ("MLXCX_OP_QUERY_PP_RATE_LIMIT"); + case MLXCX_OP_ALLOC_PD: + return ("MLXCX_OP_ALLOC_PD"); + case MLXCX_OP_DEALLOC_PD: + return ("MLXCX_OP_DEALLOC_PD"); + case MLXCX_OP_ALLOC_UAR: + return ("MLXCX_OP_ALLOC_UAR"); + case MLXCX_OP_DEALLOC_UAR: + return ("MLXCX_OP_DEALLOC_UAR"); + case MLXCX_OP_CONFIG_INT_MODERATION: + return ("MLXCX_OP_CONFIG_INT_MODERATION"); + case MLXCX_OP_ACCESS_REG: + return ("MLXCX_OP_ACCESS_REG"); + case MLXCX_OP_ATTACH_TO_MCG: + return ("MLXCX_OP_ATTACH_TO_MCG"); + case MLXCX_OP_DETACH_FROM_MCG: + return ("MLXCX_OP_DETACH_FROM_MCG"); + case MLXCX_OP_MAD_IFC: + return ("MLXCX_OP_MAD_IFC"); + case MLXCX_OP_QUERY_MAD_DEMUX: + return ("MLXCX_OP_QUERY_MAD_DEMUX"); + case MLXCX_OP_SET_MAD_DEMUX: + return ("MLXCX_OP_SET_MAD_DEMUX"); + case MLXCX_OP_NOP: + return ("MLXCX_OP_NOP"); + case MLXCX_OP_ALLOC_XRCD: + return ("MLXCX_OP_ALLOC_XRCD"); + case MLXCX_OP_DEALLOC_XRCD: + return ("MLXCX_OP_DEALLOC_XRCD"); + case MLXCX_OP_ALLOC_TRANSPORT_DOMAIN: + return ("MLXCX_OP_ALLOC_TRANSPORT_DOMAIN"); + case MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN: + return ("MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN"); + case MLXCX_OP_QUERY_CONG_STATUS: + return ("MLXCX_OP_QUERY_CONG_STATUS"); + case MLXCX_OP_MODIFY_CONG_STATUS: + return ("MLXCX_OP_MODIFY_CONG_STATUS"); + case MLXCX_OP_QUERY_CONG_PARAMS: + return ("MLXCX_OP_QUERY_CONG_PARAMS"); + case MLXCX_OP_MODIFY_CONG_PARAMS: + return ("MLXCX_OP_MODIFY_CONG_PARAMS"); + case MLXCX_OP_QUERY_CONG_STATISTICS: + return ("MLXCX_OP_QUERY_CONG_STATISTICS"); + case MLXCX_OP_ADD_VXLAN_UDP_DPORT: + return ("MLXCX_OP_ADD_VXLAN_UDP_DPORT"); + case MLXCX_OP_DELETE_VXLAN_UDP_DPORT: + return ("MLXCX_OP_DELETE_VXLAN_UDP_DPORT"); + case MLXCX_OP_SET_L2_TABLE_ENTRY: + return ("MLXCX_OP_SET_L2_TABLE_ENTRY"); + case MLXCX_OP_QUERY_L2_TABLE_ENTRY: + return ("MLXCX_OP_QUERY_L2_TABLE_ENTRY"); + case MLXCX_OP_DELETE_L2_TABLE_ENTRY: + return ("MLXCX_OP_DELETE_L2_TABLE_ENTRY"); + case MLXCX_OP_SET_WOL_ROL: + return ("MLXCX_OP_SET_WOL_ROL"); + case MLXCX_OP_QUERY_WOL_ROL: + return ("MLXCX_OP_QUERY_WOL_ROL"); + case MLXCX_OP_CREATE_TIR: + return ("MLXCX_OP_CREATE_TIR"); + case MLXCX_OP_MODIFY_TIR: + return ("MLXCX_OP_MODIFY_TIR"); + case MLXCX_OP_DESTROY_TIR: + return ("MLXCX_OP_DESTROY_TIR"); + case MLXCX_OP_QUERY_TIR: + return ("MLXCX_OP_QUERY_TIR"); + case MLXCX_OP_CREATE_SQ: + return ("MLXCX_OP_CREATE_SQ"); + case MLXCX_OP_MODIFY_SQ: + return ("MLXCX_OP_MODIFY_SQ"); + case MLXCX_OP_DESTROY_SQ: + return ("MLXCX_OP_DESTROY_SQ"); + case MLXCX_OP_QUERY_SQ: + return ("MLXCX_OP_QUERY_SQ"); + case MLXCX_OP_CREATE_RQ: + return ("MLXCX_OP_CREATE_RQ"); + case MLXCX_OP_MODIFY_RQ: + return ("MLXCX_OP_MODIFY_RQ"); + case MLXCX_OP_DESTROY_RQ: + return ("MLXCX_OP_DESTROY_RQ"); + case MLXCX_OP_QUERY_RQ: + return ("MLXCX_OP_QUERY_RQ"); + case MLXCX_OP_CREATE_RMP: + return ("MLXCX_OP_CREATE_RMP"); + case MLXCX_OP_MODIFY_RMP: + return ("MLXCX_OP_MODIFY_RMP"); + case MLXCX_OP_DESTROY_RMP: + return ("MLXCX_OP_DESTROY_RMP"); + case MLXCX_OP_QUERY_RMP: + return ("MLXCX_OP_QUERY_RMP"); + case MLXCX_OP_CREATE_TIS: + return ("MLXCX_OP_CREATE_TIS"); + case MLXCX_OP_MODIFY_TIS: + return ("MLXCX_OP_MODIFY_TIS"); + case MLXCX_OP_DESTROY_TIS: + return ("MLXCX_OP_DESTROY_TIS"); + case MLXCX_OP_QUERY_TIS: + return ("MLXCX_OP_QUERY_TIS"); + case MLXCX_OP_CREATE_RQT: + return ("MLXCX_OP_CREATE_RQT"); + case MLXCX_OP_MODIFY_RQT: + return ("MLXCX_OP_MODIFY_RQT"); + case MLXCX_OP_DESTROY_RQT: + return ("MLXCX_OP_DESTROY_RQT"); + case MLXCX_OP_QUERY_RQT: + return ("MLXCX_OP_QUERY_RQT"); + case MLXCX_OP_SET_FLOW_TABLE_ROOT: + return ("MLXCX_OP_SET_FLOW_TABLE_ROOT"); + case MLXCX_OP_CREATE_FLOW_TABLE: + return ("MLXCX_OP_CREATE_FLOW_TABLE"); + case MLXCX_OP_DESTROY_FLOW_TABLE: + return ("MLXCX_OP_DESTROY_FLOW_TABLE"); + case MLXCX_OP_QUERY_FLOW_TABLE: + return ("MLXCX_OP_QUERY_FLOW_TABLE"); + case MLXCX_OP_CREATE_FLOW_GROUP: + return ("MLXCX_OP_CREATE_FLOW_GROUP"); + case MLXCX_OP_DESTROY_FLOW_GROUP: + return ("MLXCX_OP_DESTROY_FLOW_GROUP"); + case MLXCX_OP_QUERY_FLOW_GROUP: + return ("MLXCX_OP_QUERY_FLOW_GROUP"); + case MLXCX_OP_SET_FLOW_TABLE_ENTRY: + return ("MLXCX_OP_SET_FLOW_TABLE_ENTRY"); + case MLXCX_OP_QUERY_FLOW_TABLE_ENTRY: + return ("MLXCX_OP_QUERY_FLOW_TABLE_ENTRY"); + case MLXCX_OP_DELETE_FLOW_TABLE_ENTRY: + return ("MLXCX_OP_DELETE_FLOW_TABLE_ENTRY"); + case MLXCX_OP_ALLOC_FLOW_COUNTER: + return ("MLXCX_OP_ALLOC_FLOW_COUNTER"); + case MLXCX_OP_DEALLOC_FLOW_COUNTER: + return ("MLXCX_OP_DEALLOC_FLOW_COUNTER"); + case MLXCX_OP_QUERY_FLOW_COUNTER: + return ("MLXCX_OP_QUERY_FLOW_COUNTER"); + case MLXCX_OP_MODIFY_FLOW_TABLE: + return ("MLXCX_OP_MODIFY_FLOW_TABLE"); + case MLXCX_OP_ALLOC_ENCAP_HEADER: + return ("MLXCX_OP_ALLOC_ENCAP_HEADER"); + case MLXCX_OP_DEALLOC_ENCAP_HEADER: + return ("MLXCX_OP_DEALLOC_ENCAP_HEADER"); + case MLXCX_OP_QUERY_ENCAP_HEADER: + return ("MLXCX_OP_QUERY_ENCAP_HEADER"); + default: + return ("Unknown Opcode"); + } +} + +const char * +mlxcx_port_status_string(mlxcx_port_status_t st) +{ + switch (st) { + case MLXCX_PORT_STATUS_UP: + return ("UP"); + case MLXCX_PORT_STATUS_DOWN: + return ("DOWN"); + case MLXCX_PORT_STATUS_UP_ONCE: + return ("UP_ONCE"); + case MLXCX_PORT_STATUS_DISABLED: + return ("DISABLED"); + default: + return ("UNKNOWN"); + } +} + +void +mlxcx_eth_proto_to_string(mlxcx_eth_proto_t p, char *buf, size_t size) +{ + if (p & MLXCX_PROTO_SGMII) + (void) strlcat(buf, "SGMII|", size); + if (p & MLXCX_PROTO_1000BASE_KX) + (void) strlcat(buf, "1000BASE_KX|", size); + if (p & MLXCX_PROTO_10GBASE_CX4) + (void) strlcat(buf, "10GBASE_CX4|", size); + if (p & MLXCX_PROTO_10GBASE_KX4) + (void) strlcat(buf, "10GBASE_KX4|", size); + if (p & MLXCX_PROTO_10GBASE_KR) + (void) strlcat(buf, "10GBASE_KR|", size); + if (p & MLXCX_PROTO_40GBASE_CR4) + (void) strlcat(buf, "40GBASE_CR4|", size); + if (p & MLXCX_PROTO_40GBASE_KR4) + (void) strlcat(buf, "40GBASE_KR4|", size); + if (p & MLXCX_PROTO_SGMII_100BASE) + (void) strlcat(buf, "SGMII_100BASE|", size); + if (p & MLXCX_PROTO_10GBASE_CR) + (void) strlcat(buf, "10GBASE_CR|", size); + if (p & MLXCX_PROTO_10GBASE_SR) + (void) strlcat(buf, "10GBASE_SR|", size); + if (p & MLXCX_PROTO_10GBASE_ER_LR) + (void) strlcat(buf, "10GBASE_ER_LR|", size); + if (p & MLXCX_PROTO_40GBASE_SR4) + (void) strlcat(buf, "40GBASE_SR4|", size); + if (p & MLXCX_PROTO_40GBASE_LR4_ER4) + (void) strlcat(buf, "40GBASE_LR4_ER4|", size); + if (p & MLXCX_PROTO_50GBASE_SR2) + (void) strlcat(buf, "50GBASE_SR2|", size); + if (p & MLXCX_PROTO_100GBASE_CR4) + (void) strlcat(buf, "100GBASE_CR4|", size); + if (p & MLXCX_PROTO_100GBASE_SR4) + (void) strlcat(buf, "100GBASE_SR4|", size); + if (p & MLXCX_PROTO_100GBASE_KR4) + (void) strlcat(buf, "100GBASE_KR4|", size); + if (p & MLXCX_PROTO_25GBASE_CR) + (void) strlcat(buf, "25GBASE_CR|", size); + if (p & MLXCX_PROTO_25GBASE_KR) + (void) strlcat(buf, "25GBASE_KR|", size); + if (p & MLXCX_PROTO_25GBASE_SR) + (void) strlcat(buf, "25GBASE_SR|", size); + if (p & MLXCX_PROTO_50GBASE_CR2) + (void) strlcat(buf, "50GBASE_CR2|", size); + /* Chop off the trailing '|' */ + if (strlen(buf) > 0) + buf[strlen(buf) - 1] = '\0'; +} + +void +mlxcx_cmd_queue_fini(mlxcx_t *mlxp) +{ + mlxcx_cmd_queue_t *cmd = &mlxp->mlx_cmd; + + mutex_enter(&cmd->mcmd_lock); + VERIFY3S(cmd->mcmd_status, ==, MLXCX_CMD_QUEUE_S_IDLE); + mutex_exit(&cmd->mcmd_lock); + + if (cmd->mcmd_tokens != NULL) { + id_space_destroy(cmd->mcmd_tokens); + cmd->mcmd_tokens = NULL; + } + + if (cmd->mcmd_taskq != NULL) { + ddi_taskq_destroy(cmd->mcmd_taskq); + cmd->mcmd_taskq = NULL; + } + + cv_destroy(&cmd->mcmd_cv); + mutex_destroy(&cmd->mcmd_lock); + + cmd->mcmd_ent = NULL; + mlxcx_dma_free(&cmd->mcmd_dma); +} + +boolean_t +mlxcx_cmd_queue_init(mlxcx_t *mlxp) +{ + uint32_t tmp, cmd_low, cmd_high, i; + mlxcx_cmd_queue_t *cmd = &mlxp->mlx_cmd; + char buf[64]; + const ddi_dma_cookie_t *ck; + + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + + tmp = mlxcx_get32(mlxp, MLXCX_ISS_FIRMWARE); + mlxp->mlx_fw_maj = MLXCX_ISS_FW_MAJOR(tmp); + mlxp->mlx_fw_min = MLXCX_ISS_FW_MINOR(tmp); + + tmp = mlxcx_get32(mlxp, MLXCX_ISS_FW_CMD); + mlxp->mlx_fw_rev = MLXCX_ISS_FW_REV(tmp); + mlxp->mlx_cmd_rev = MLXCX_ISS_CMD_REV(tmp); + + if (mlxp->mlx_cmd_rev != MLXCX_CMD_REVISION) { + mlxcx_warn(mlxp, "found unsupported command revision: %u, " + "expected %u", mlxp->mlx_cmd_rev, MLXCX_CMD_REVISION); + return (B_FALSE); + } + + cmd_low = mlxcx_get32(mlxp, MLXCX_ISS_CMD_LOW); + cmd->mcmd_size_l2 = MLXCX_ISS_CMDQ_SIZE(cmd_low); + cmd->mcmd_stride_l2 = MLXCX_ISS_CMDQ_STRIDE(cmd_low); + + mutex_init(&cmd->mcmd_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&cmd->mcmd_cv, NULL, CV_DRIVER, NULL); + cmd->mcmd_status = MLXCX_CMD_QUEUE_S_IDLE; + + (void) snprintf(buf, sizeof (buf), "mlxcx_tokens_%d", mlxp->mlx_inst); + if ((cmd->mcmd_tokens = id_space_create(buf, 1, UINT8_MAX)) == NULL) { + mlxcx_warn(mlxp, "failed to allocate token id space"); + mlxcx_cmd_queue_fini(mlxp); + return (B_FALSE); + } + + (void) snprintf(buf, sizeof (buf), "mlxcx_cmdq_%d", mlxp->mlx_inst); + if ((cmd->mcmd_taskq = ddi_taskq_create(mlxp->mlx_dip, buf, 1, + TASKQ_DEFAULTPRI, 0)) == NULL) { + mlxcx_warn(mlxp, "failed to create command queue task queue"); + mlxcx_cmd_queue_fini(mlxp); + return (B_FALSE); + } + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_page_attr(mlxp, &attr); + + if (!mlxcx_dma_alloc(mlxp, &cmd->mcmd_dma, &attr, &acc, B_TRUE, + MLXCX_CMD_DMA_PAGE_SIZE, B_TRUE)) { + mlxcx_warn(mlxp, "failed to allocate command dma buffer"); + mlxcx_cmd_queue_fini(mlxp); + return (B_FALSE); + } + + ck = mlxcx_dma_cookie_one(&cmd->mcmd_dma); + cmd_high = (uint32_t)(ck->dmac_laddress >> 32); + cmd_low = (uint32_t)(ck->dmac_laddress & UINT32_MAX); + + mlxcx_put32(mlxp, MLXCX_ISS_CMD_HIGH, cmd_high); + mlxcx_put32(mlxp, MLXCX_ISS_CMD_LOW, cmd_low); + + /* + * Before this is ready, the initializing bit must become zero. + */ + for (i = 0; i < mlxcx_cmd_init_trys; i++) { + uint32_t init = mlxcx_get32(mlxp, MLXCX_ISS_INIT); + + if (MLXCX_ISS_INITIALIZING(init) == 0) + break; + delay(drv_usectohz(mlxcx_cmd_init_delay)); + } + if (i == mlxcx_cmd_init_trys) { + mlxcx_warn(mlxp, "timed out initializing command queue"); + mlxcx_cmd_queue_fini(mlxp); + return (B_FALSE); + } + + cmd->mcmd_ent = (void *)cmd->mcmd_dma.mxdb_va; + + return (B_TRUE); +} + +static void +mlxcx_cmd_in_header_init(mlxcx_cmd_t *cmd, mlxcx_cmd_in_t *in, + mlxcx_cmd_op_t op, uint16_t mod) +{ + ASSERT3U(op, <=, UINT16_MAX); + in->mci_opcode = to_be16(op); + in->mci_op_mod = to_be16(mod); + cmd->mlcmd_op = op; +} + +static boolean_t +mlxcx_cmd_mbox_alloc(mlxcx_t *mlxp, list_t *listp, uint8_t nblocks) +{ + uint8_t i; + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_page_attr(mlxp, &attr); + + for (i = 0; i < nblocks; i++) { + mlxcx_cmd_mbox_t *mbox; + + mbox = kmem_zalloc(sizeof (*mbox), KM_SLEEP); + if (!mlxcx_dma_alloc(mlxp, &mbox->mlbox_dma, &attr, &acc, + B_TRUE, sizeof (mlxcx_cmd_mailbox_t), B_TRUE)) { + mlxcx_warn(mlxp, "failed to allocate mailbox dma " + "buffer"); + kmem_free(mbox, sizeof (*mbox)); + /* + * mlxcx_cmd_fini will clean up any mboxes that we + * already placed onto listp. + */ + return (B_FALSE); + } + mbox->mlbox_data = (void *)mbox->mlbox_dma.mxdb_va; + list_insert_tail(listp, mbox); + } + + return (B_TRUE); +} + +static void +mlxcx_cmd_mbox_free(mlxcx_cmd_mbox_t *mbox) +{ + mlxcx_dma_free(&mbox->mlbox_dma); + kmem_free(mbox, sizeof (mlxcx_cmd_mbox_t)); +} + +static void +mlxcx_cmd_fini(mlxcx_t *mlxp, mlxcx_cmd_t *cmd) +{ + mlxcx_cmd_mbox_t *mbox; + + while ((mbox = list_remove_head(&cmd->mlcmd_mbox_out)) != NULL) { + mlxcx_cmd_mbox_free(mbox); + } + list_destroy(&cmd->mlcmd_mbox_out); + while ((mbox = list_remove_head(&cmd->mlcmd_mbox_in)) != NULL) { + mlxcx_cmd_mbox_free(mbox); + } + list_destroy(&cmd->mlcmd_mbox_in); + id_free(mlxp->mlx_cmd.mcmd_tokens, cmd->mlcmd_token); + cv_destroy(&cmd->mlcmd_cv); + mutex_destroy(&cmd->mlcmd_lock); +} + +static void +mlxcx_cmd_init(mlxcx_t *mlxp, mlxcx_cmd_t *cmd) +{ + bzero(cmd, sizeof (*cmd)); + mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&cmd->mlcmd_cv, NULL, CV_DRIVER, NULL); + cmd->mlcmd_token = id_alloc(mlxp->mlx_cmd.mcmd_tokens); + list_create(&cmd->mlcmd_mbox_in, sizeof (mlxcx_cmd_mbox_t), + offsetof(mlxcx_cmd_mbox_t, mlbox_node)); + list_create(&cmd->mlcmd_mbox_out, sizeof (mlxcx_cmd_mbox_t), + offsetof(mlxcx_cmd_mbox_t, mlbox_node)); +} + +static void +mlxcx_cmd_prep_input(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd) +{ + uint32_t rem = cmd->mlcmd_inlen; + uint8_t i; + const void *in = cmd->mlcmd_in; + uint32_t copy; + mlxcx_cmd_mbox_t *mbox; + const ddi_dma_cookie_t *ck; + + copy = MIN(MLXCX_CMD_INLINE_INPUT_LEN, rem); + bcopy(in, ent->mce_input, copy); + + rem -= copy; + in += copy; + + if (rem == 0) { + ent->mce_in_mbox = to_be64(0); + VERIFY3U(cmd->mlcmd_nboxes_in, ==, 0); + return; + } + + mbox = list_head(&cmd->mlcmd_mbox_in); + ck = mlxcx_dma_cookie_one(&mbox->mlbox_dma); + ent->mce_in_mbox = to_be64(ck->dmac_laddress); + for (i = 0; mbox != NULL; + mbox = list_next(&cmd->mlcmd_mbox_in, mbox), i++) { + mlxcx_cmd_mbox_t *next; + mlxcx_cmd_mailbox_t *mp = mbox->mlbox_data; + + copy = MIN(MLXCX_CMD_MAILBOX_LEN, rem); + bcopy(in, mp->mlxb_data, copy); + rem -= copy; + in += copy; + + mp->mlxb_token = cmd->mlcmd_token; + mp->mlxb_blockno = to_be32(i); + + next = list_next(&cmd->mlcmd_mbox_in, mbox); + if (next == NULL) { + mp->mlxb_nextp = to_be64(0); + } else { + ck = mlxcx_dma_cookie_one(&next->mlbox_dma); + mp->mlxb_nextp = to_be64(ck->dmac_laddress); + } + MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORDEV); + } + VERIFY3U(i, ==, cmd->mlcmd_nboxes_in); + VERIFY0(rem); +} + +static void +mlxcx_cmd_prep_output(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd) +{ + uint8_t i; + mlxcx_cmd_mbox_t *mbox; + const ddi_dma_cookie_t *ck; + + if (cmd->mlcmd_nboxes_out == 0) { + ent->mce_out_mbox = to_be64(0); + return; + } + + mbox = list_head(&cmd->mlcmd_mbox_out); + ck = mlxcx_dma_cookie_one(&mbox->mlbox_dma); + ent->mce_out_mbox = to_be64(ck->dmac_laddress); + for (i = 0, mbox = list_head(&cmd->mlcmd_mbox_out); mbox != NULL; + mbox = list_next(&cmd->mlcmd_mbox_out, mbox), i++) { + mlxcx_cmd_mbox_t *next; + mlxcx_cmd_mailbox_t *mp = mbox->mlbox_data; + + mp->mlxb_token = cmd->mlcmd_token; + mp->mlxb_blockno = to_be32(i); + + next = list_next(&cmd->mlcmd_mbox_out, mbox); + if (next == NULL) { + mp->mlxb_nextp = to_be64(0); + } else { + ck = mlxcx_dma_cookie_one(&next->mlbox_dma); + mp->mlxb_nextp = to_be64(ck->dmac_laddress); + } + MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORDEV); + } + VERIFY3U(i, ==, cmd->mlcmd_nboxes_out); +} + +static void +mlxcx_cmd_copy_output(mlxcx_cmd_ent_t *ent, mlxcx_cmd_t *cmd) +{ + void *out = cmd->mlcmd_out; + uint32_t rem = cmd->mlcmd_outlen; + uint32_t copy; + mlxcx_cmd_mbox_t *mbox; + + copy = MIN(rem, MLXCX_CMD_INLINE_OUTPUT_LEN); + bcopy(ent->mce_output, out, copy); + out += copy; + rem -= copy; + + if (rem == 0) { + VERIFY0(cmd->mlcmd_nboxes_out); + return; + } + + for (mbox = list_head(&cmd->mlcmd_mbox_out); mbox != NULL; + mbox = list_next(&cmd->mlcmd_mbox_out, mbox)) { + MLXCX_DMA_SYNC(mbox->mlbox_dma, DDI_DMA_SYNC_FORKERNEL); + copy = MIN(MLXCX_CMD_MAILBOX_LEN, rem); + bcopy(mbox->mlbox_data->mlxb_data, out, copy); + out += copy; + rem -= copy; + } + VERIFY0(rem); +} + +static void +mlxcx_cmd_taskq(void *arg) +{ + mlxcx_cmd_t *cmd = arg; + mlxcx_t *mlxp = cmd->mlcmd_mlxp; + mlxcx_cmd_queue_t *cmdq = &mlxp->mlx_cmd; + mlxcx_cmd_ent_t *ent; + uint_t poll; + + ASSERT3S(cmd->mlcmd_op, !=, 0); + + mutex_enter(&cmdq->mcmd_lock); + while (cmdq->mcmd_status == MLXCX_CMD_QUEUE_S_BUSY) { + cv_wait(&cmdq->mcmd_cv, &cmdq->mcmd_lock); + } + + if (cmdq->mcmd_status != MLXCX_CMD_QUEUE_S_IDLE) { + mutex_exit(&cmdq->mcmd_lock); + + mutex_enter(&cmd->mlcmd_lock); + cmd->mlcmd_state = MLXCX_CMD_S_ERROR; + cv_broadcast(&cmd->mlcmd_cv); + mutex_exit(&cmd->mlcmd_lock); + return; + } + + cmdq->mcmd_status = MLXCX_CMD_QUEUE_S_BUSY; + ent = cmdq->mcmd_ent; + mutex_exit(&cmdq->mcmd_lock); + + /* + * Command queue is currently ours as we set busy. + */ + bzero(ent, sizeof (*ent)); + ent->mce_type = MLXCX_CMD_TRANSPORT_PCI; + ent->mce_in_length = to_be32(cmd->mlcmd_inlen); + ent->mce_out_length = to_be32(cmd->mlcmd_outlen); + ent->mce_token = cmd->mlcmd_token; + ent->mce_sig = 0; + ent->mce_status = MLXCX_CMD_HW_OWNED; + mlxcx_cmd_prep_input(ent, cmd); + mlxcx_cmd_prep_output(ent, cmd); + MLXCX_DMA_SYNC(cmdq->mcmd_dma, DDI_DMA_SYNC_FORDEV); + + /* This assumes we only ever use the first command */ + mlxcx_put32(mlxp, MLXCX_ISS_CMD_DOORBELL, 1); + + for (poll = 0; poll < mlxcx_cmd_tries; poll++) { + delay(drv_usectohz(mlxcx_cmd_delay)); + MLXCX_DMA_SYNC(cmdq->mcmd_dma, DDI_DMA_SYNC_FORKERNEL); + if ((ent->mce_status & MLXCX_CMD_HW_OWNED) == 0) + break; + } + + /* + * Command is done (or timed out). Save relevant data. Once we broadcast + * on the CV and drop the lock, we must not touch the cmd again. + */ + mutex_enter(&cmd->mlcmd_lock); + + if (poll == mlxcx_cmd_tries) { + cmd->mlcmd_status = MLXCX_CMD_R_TIMEOUT; + cmd->mlcmd_state = MLXCX_CMD_S_ERROR; + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_NO_RESPONSE); + } else { + cmd->mlcmd_status = MLXCX_CMD_STATUS(ent->mce_status); + cmd->mlcmd_state = MLXCX_CMD_S_DONE; + if (cmd->mlcmd_status == 0) { + mlxcx_cmd_copy_output(ent, cmd); + } + } + cv_broadcast(&cmd->mlcmd_cv); + mutex_exit(&cmd->mlcmd_lock); + + mutex_enter(&cmdq->mcmd_lock); + cmdq->mcmd_status = MLXCX_CMD_QUEUE_S_IDLE; + cv_broadcast(&cmdq->mcmd_cv); + mutex_exit(&cmdq->mcmd_lock); +} + +static boolean_t +mlxcx_cmd_send(mlxcx_t *mlxp, mlxcx_cmd_t *cmd, const void *in, uint32_t inlen, + void *out, uint32_t outlen) +{ + if (inlen > MLXCX_CMD_INLINE_INPUT_LEN) { + uint32_t need = inlen - MLXCX_CMD_INLINE_INPUT_LEN; + uint8_t nblocks; + + if (need / MLXCX_CMD_MAILBOX_LEN + 1 > UINT8_MAX) { + mlxcx_warn(mlxp, "requested too many input blocks for " + "%u byte input len", inlen); + return (B_FALSE); + } + + nblocks = need / MLXCX_CMD_MAILBOX_LEN + 1; + if (!mlxcx_cmd_mbox_alloc(mlxp, &cmd->mlcmd_mbox_in, nblocks)) { + mlxcx_warn(mlxp, "failed to allocate %u blocks of " + "input mailbox", nblocks); + return (B_FALSE); + } + cmd->mlcmd_nboxes_in = nblocks; + } + + if (outlen > MLXCX_CMD_INLINE_OUTPUT_LEN) { + uint32_t need = outlen - MLXCX_CMD_INLINE_OUTPUT_LEN; + uint8_t nblocks; + + if (need / MLXCX_CMD_MAILBOX_LEN + 1 > UINT8_MAX) { + mlxcx_warn(mlxp, "requested too many output blocks for " + "%u byte output len", outlen); + return (B_FALSE); + } + + nblocks = need / MLXCX_CMD_MAILBOX_LEN + 1; + if (!mlxcx_cmd_mbox_alloc(mlxp, &cmd->mlcmd_mbox_out, + nblocks)) { + mlxcx_warn(mlxp, "failed to allocate %u blocks of " + "output mailbox", nblocks); + return (B_FALSE); + } + cmd->mlcmd_nboxes_out = nblocks; + } + + cmd->mlcmd_in = in; + cmd->mlcmd_inlen = inlen; + cmd->mlcmd_out = out; + cmd->mlcmd_outlen = outlen; + cmd->mlcmd_mlxp = mlxp; + + /* + * Now that all allocations have been done, all that remains is for us + * to dispatch the request to process this to the taskq for it to be + * processed. + */ + if (ddi_taskq_dispatch(mlxp->mlx_cmd.mcmd_taskq, mlxcx_cmd_taskq, cmd, + DDI_SLEEP) != DDI_SUCCESS) { + mlxcx_warn(mlxp, "failed to submit command to taskq"); + return (B_FALSE); + } + + return (B_TRUE); +} + +static void +mlxcx_cmd_wait(mlxcx_cmd_t *cmd) +{ + mutex_enter(&cmd->mlcmd_lock); + while (cmd->mlcmd_state == 0) { + cv_wait(&cmd->mlcmd_cv, &cmd->mlcmd_lock); + } + mutex_exit(&cmd->mlcmd_lock); +} + +static boolean_t +mlxcx_cmd_evaluate(mlxcx_t *mlxp, mlxcx_cmd_t *cmd) +{ + mlxcx_cmd_out_t *out; + + if ((cmd->mlcmd_state & MLXCX_CMD_S_ERROR) != 0) { + mlxcx_warn(mlxp, "command %s (0x%x) failed due to an internal " + "driver error", + mlxcx_cmd_opcode_string(cmd->mlcmd_op), + cmd->mlcmd_op); + return (B_FALSE); + } + + if (cmd->mlcmd_status != 0) { + mlxcx_warn(mlxp, "command %s (0x%x) failed with command queue " + "error 0x%x", + mlxcx_cmd_opcode_string(cmd->mlcmd_op), + cmd->mlcmd_op, cmd->mlcmd_status); + return (B_FALSE); + } + + out = cmd->mlcmd_out; + if (out->mco_status != MLXCX_CMD_R_OK) { + mlxcx_warn(mlxp, "command %s 0x%x failed with status code %s " + "(0x%x)", mlxcx_cmd_opcode_string(cmd->mlcmd_op), + cmd->mlcmd_op, mlxcx_cmd_response_string(out->mco_status), + out->mco_status); + return (B_FALSE); + } + + return (B_TRUE); +} + +boolean_t +mlxcx_cmd_disable_hca(mlxcx_t *mlxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_disable_hca_in_t in; + mlxcx_cmd_disable_hca_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_disable_hca_head, + MLXCX_OP_DISABLE_HCA, 0); + in.mlxi_disable_hca_func = MLXCX_FUNCTION_SELF; + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_enable_hca(mlxcx_t *mlxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_enable_hca_in_t in; + mlxcx_cmd_enable_hca_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_enable_hca_head, + MLXCX_OP_ENABLE_HCA, 0); + in.mlxi_enable_hca_func = MLXCX_FUNCTION_SELF; + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_issi(mlxcx_t *mlxp, uint32_t *issip) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_issi_in_t in; + mlxcx_cmd_query_issi_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_issi_head, + MLXCX_OP_QUERY_ISSI, 0); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + *issip = out.mlxo_supported_issi; + } else if (cmd.mlcmd_status == 0 && + out.mlxo_query_issi_head.mco_status == MLXCX_CMD_R_BAD_OP) { + /* + * The PRM says that if we get a bad operation, that means this + * command isn't supported so it only supports version 1 of the + * ISSI, which means bit zero should be set. + */ + ret = B_TRUE; + *issip = 1; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_set_issi(mlxcx_t *mlxp, uint16_t issi) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_set_issi_in_t in; + mlxcx_cmd_set_issi_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_issi_head, + MLXCX_OP_SET_ISSI, 0); + in.mlxi_set_issi_current = to_be16(issi); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_pages(mlxcx_t *mlxp, uint_t type, int32_t *npages) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_pages_in_t in; + mlxcx_cmd_query_pages_out_t out; + boolean_t ret; + + switch (type) { + case MLXCX_QUERY_PAGES_OPMOD_BOOT: + case MLXCX_QUERY_PAGES_OPMOD_INIT: + case MLXCX_QUERY_PAGES_OPMOD_REGULAR: + break; + default: + mlxcx_warn(mlxp, "!passed invalid type to query pages: %u", + type); + return (B_FALSE); + } + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_pages_head, + MLXCX_OP_QUERY_PAGES, type); + in.mlxi_query_pages_func = MLXCX_FUNCTION_SELF; + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + *npages = from_be32(out.mlxo_query_pages_npages); + } + mlxcx_cmd_fini(mlxp, &cmd); + + return (ret); +} + +boolean_t +mlxcx_cmd_give_pages(mlxcx_t *mlxp, uint_t type, int32_t npages, + mlxcx_dev_page_t **pages) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_manage_pages_in_t in; + mlxcx_cmd_manage_pages_out_t out; + size_t insize, outsize; + boolean_t ret; + uint32_t i; + uint64_t pa; + const ddi_dma_cookie_t *ck; + + switch (type) { + case MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL: + if (npages != 0) { + mlxcx_warn(mlxp, "passed non-zero number of pages (%d) " + "but asked to fail page allocation", npages); + return (B_FALSE); + } + break; + case MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES: + if (npages <= 0 || npages > MLXCX_MANAGE_PAGES_MAX_PAGES) { + mlxcx_warn(mlxp, "passed invalid number of pages (%d) " + "to give pages", npages); + return (B_FALSE); + } + break; + default: + mlxcx_warn(mlxp, "!passed invalid type to give pages: %u", + type); + return (B_FALSE); + } + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + insize = offsetof(mlxcx_cmd_manage_pages_in_t, mlxi_manage_pages_pas) + + npages * sizeof (uint64_t); + outsize = offsetof(mlxcx_cmd_manage_pages_out_t, mlxo_manage_pages_pas); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_manage_pages_head, + MLXCX_OP_MANAGE_PAGES, type); + in.mlxi_manage_pages_func = MLXCX_FUNCTION_SELF; + in.mlxi_manage_pages_npages = to_be32(npages); + for (i = 0; i < npages; i++) { + ck = mlxcx_dma_cookie_one(&pages[i]->mxdp_dma); + pa = ck->dmac_laddress; + ASSERT3U(pa & 0xfff, ==, 0); + ASSERT3U(ck->dmac_size, ==, MLXCX_HW_PAGE_SIZE); + in.mlxi_manage_pages_pas[i] = to_be64(pa); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + + return (ret); +} + +boolean_t +mlxcx_cmd_return_pages(mlxcx_t *mlxp, int32_t nreq, uint64_t *pas, + int32_t *nret) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_manage_pages_in_t in; + mlxcx_cmd_manage_pages_out_t out; + size_t insize, outsize; + boolean_t ret; + uint32_t i; + + if (nreq <= 0) { + mlxcx_warn(mlxp, "passed invalid number of pages (%d) " + "to return pages", nreq); + return (B_FALSE); + } + VERIFY3S(nreq, <=, MLXCX_MANAGE_PAGES_MAX_PAGES); + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + insize = offsetof(mlxcx_cmd_manage_pages_in_t, mlxi_manage_pages_pas); + outsize = offsetof(mlxcx_cmd_manage_pages_out_t, + mlxo_manage_pages_pas) + nreq * sizeof (uint64_t); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_manage_pages_head, + MLXCX_OP_MANAGE_PAGES, MLXCX_MANAGE_PAGES_OPMOD_RETURN_PAGES); + in.mlxi_manage_pages_func = MLXCX_FUNCTION_SELF; + in.mlxi_manage_pages_npages = to_be32(nreq); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + *nret = from_be32(out.mlxo_manage_pages_npages); + for (i = 0; i < *nret; i++) { + pas[i] = from_be64(out.mlxo_manage_pages_pas[i]); + } + } + mlxcx_cmd_fini(mlxp, &cmd); + + return (ret); +} + +boolean_t +mlxcx_cmd_query_hca_cap(mlxcx_t *mlxp, mlxcx_hca_cap_type_t type, + mlxcx_hca_cap_mode_t mode, mlxcx_hca_cap_t *capp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_hca_cap_in_t in; + mlxcx_cmd_query_hca_cap_out_t *out; + boolean_t ret; + uint16_t opmode; + + bzero(&in, sizeof (in)); + out = kmem_zalloc(sizeof (mlxcx_cmd_query_hca_cap_out_t), KM_SLEEP); + mlxcx_cmd_init(mlxp, &cmd); + + opmode = type << 1 | mode; + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_hca_cap_head, + MLXCX_OP_QUERY_HCA_CAP, opmode); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), out, sizeof (*out))) { + mlxcx_cmd_fini(mlxp, &cmd); + kmem_free(out, sizeof (mlxcx_cmd_query_hca_cap_out_t)); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + capp->mhc_mode = mode; + capp->mhc_type = type; + ASSERT3U(sizeof (out->mlxo_query_hca_cap_data), ==, + sizeof (capp->mhc_bulk)); + bcopy(out->mlxo_query_hca_cap_data, capp->mhc_bulk, + sizeof (capp->mhc_bulk)); + } + mlxcx_cmd_fini(mlxp, &cmd); + + kmem_free(out, sizeof (mlxcx_cmd_query_hca_cap_out_t)); + return (B_TRUE); +} + +boolean_t +mlxcx_cmd_init_hca(mlxcx_t *mlxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_init_hca_in_t in; + mlxcx_cmd_init_hca_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_init_hca_head, + MLXCX_OP_INIT_HCA, 0); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_set_driver_version(mlxcx_t *mlxp, const char *version) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_set_driver_version_in_t in; + mlxcx_cmd_set_driver_version_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_driver_version_head, + MLXCX_OP_SET_DRIVER_VERSION, 0); + VERIFY3U(strlcpy(in.mlxi_set_driver_version_version, version, + sizeof (in.mlxi_set_driver_version_version)), <=, + sizeof (in.mlxi_set_driver_version_version)); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_alloc_uar(mlxcx_t *mlxp, mlxcx_uar_t *mlup) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_alloc_uar_in_t in; + mlxcx_cmd_alloc_uar_out_t out; + boolean_t ret; + size_t i; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_uar_head, + MLXCX_OP_ALLOC_UAR, 0); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlup->mlu_allocated = B_TRUE; + mlup->mlu_num = from_be24(out.mlxo_alloc_uar_uar); + VERIFY3U(mlup->mlu_num, >, 0); + mlup->mlu_base = mlup->mlu_num * MLXCX_HW_PAGE_SIZE; + + for (i = 0; i < MLXCX_BF_PER_UAR; ++i) { + mlup->mlu_bf[i].mbf_even = mlup->mlu_base + + MLXCX_BF_BASE + MLXCX_BF_SIZE * 2 * i; + mlup->mlu_bf[i].mbf_odd = mlup->mlu_bf[i].mbf_even + + MLXCX_BF_SIZE; + } + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_dealloc_uar(mlxcx_t *mlxp, mlxcx_uar_t *mlup) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_dealloc_uar_in_t in; + mlxcx_cmd_dealloc_uar_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_uar_head, + MLXCX_OP_DEALLOC_UAR, 0); + VERIFY(mlup->mlu_allocated); + in.mlxi_dealloc_uar_uar = to_be24(mlup->mlu_num); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlup->mlu_allocated = B_FALSE; + mlup->mlu_num = 0; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_alloc_pd(mlxcx_t *mlxp, mlxcx_pd_t *mlpd) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_alloc_pd_in_t in; + mlxcx_cmd_alloc_pd_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_pd_head, + MLXCX_OP_ALLOC_PD, 0); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlpd->mlpd_allocated = B_TRUE; + mlpd->mlpd_num = from_be24(out.mlxo_alloc_pd_pdn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_dealloc_pd(mlxcx_t *mlxp, mlxcx_pd_t *mlpd) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_dealloc_pd_in_t in; + mlxcx_cmd_dealloc_pd_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_pd_head, + MLXCX_OP_DEALLOC_PD, 0); + VERIFY(mlpd->mlpd_allocated); + in.mlxi_dealloc_pd_pdn = to_be24(mlpd->mlpd_num); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlpd->mlpd_allocated = B_FALSE; + mlpd->mlpd_num = 0; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_alloc_tdom(mlxcx_t *mlxp, mlxcx_tdom_t *mltd) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_alloc_tdom_in_t in; + mlxcx_cmd_alloc_tdom_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_alloc_tdom_head, + MLXCX_OP_ALLOC_TRANSPORT_DOMAIN, 0); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltd->mltd_allocated = B_TRUE; + mltd->mltd_num = from_be24(out.mlxo_alloc_tdom_tdomn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_dealloc_tdom(mlxcx_t *mlxp, mlxcx_tdom_t *mltd) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_dealloc_tdom_in_t in; + mlxcx_cmd_dealloc_tdom_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_dealloc_tdom_head, + MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN, 0); + VERIFY(mltd->mltd_allocated); + in.mlxi_dealloc_tdom_tdomn = to_be24(mltd->mltd_num); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltd->mltd_allocated = B_FALSE; + mltd->mltd_num = 0; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_teardown_hca(mlxcx_t *mlxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_teardown_hca_in_t in; + mlxcx_cmd_teardown_hca_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_teardown_hca_head, + MLXCX_OP_TEARDOWN_HCA, 0); + in.mlxi_teardown_hca_profile = to_be16(MLXCX_TEARDOWN_HCA_GRACEFUL); + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_nic_vport_ctx_in_t in; + mlxcx_cmd_query_nic_vport_ctx_out_t out; + boolean_t ret; + const mlxcx_nic_vport_ctx_t *ctx; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_nic_vport_ctx_head, + MLXCX_OP_QUERY_NIC_VPORT_CONTEXT, MLXCX_VPORT_TYPE_VNIC); + + in.mlxi_query_nic_vport_ctx_vport_number = to_be16(mlp->mlp_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + ctx = &out.mlxo_query_nic_vport_ctx_context; + mlp->mlp_guid = from_be64(ctx->mlnvc_port_guid); + mlp->mlp_mtu = from_be16(ctx->mlnvc_mtu); + bcopy(ctx->mlnvc_permanent_address, mlp->mlp_mac_address, + sizeof (mlp->mlp_mac_address)); + mlp->mlp_wqe_min_inline = get_bits64(ctx->mlnvc_flags, + MLXCX_VPORT_CTX_MIN_WQE_INLINE); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +static const char * +mlxcx_reg_name(mlxcx_register_id_t rid) +{ + switch (rid) { + case MLXCX_REG_PMTU: + return ("PMTU"); + case MLXCX_REG_PAOS: + return ("PAOS"); + case MLXCX_REG_PTYS: + return ("PTYS"); + case MLXCX_REG_MSGI: + return ("MSGI"); + case MLXCX_REG_PMAOS: + return ("PMAOS"); + case MLXCX_REG_MLCR: + return ("MLCR"); + case MLXCX_REG_MCIA: + return ("MCIA"); + case MLXCX_REG_PPCNT: + return ("PPCNT"); + default: + return ("???"); + } +} + +boolean_t +mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod, + mlxcx_register_id_t rid, mlxcx_register_data_t *data) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_access_register_in_t in; + mlxcx_cmd_access_register_out_t out; + boolean_t ret; + size_t dsize, insize, outsize; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_access_register_head, + MLXCX_OP_ACCESS_REG, opmod); + + in.mlxi_access_register_register_id = to_be16(rid); + + switch (rid) { + case MLXCX_REG_PMTU: + dsize = sizeof (mlxcx_reg_pmtu_t); + break; + case MLXCX_REG_PAOS: + dsize = sizeof (mlxcx_reg_paos_t); + break; + case MLXCX_REG_PTYS: + dsize = sizeof (mlxcx_reg_ptys_t); + break; + case MLXCX_REG_MLCR: + dsize = sizeof (mlxcx_reg_mlcr_t); + break; + case MLXCX_REG_PMAOS: + dsize = sizeof (mlxcx_reg_pmaos_t); + break; + case MLXCX_REG_MCIA: + dsize = sizeof (mlxcx_reg_mcia_t); + break; + case MLXCX_REG_PPCNT: + dsize = sizeof (mlxcx_reg_ppcnt_t); + break; + default: + dsize = 0; + VERIFY(0); + return (B_FALSE); + } + insize = dsize + offsetof(mlxcx_cmd_access_register_in_t, + mlxi_access_register_data); + outsize = dsize + offsetof(mlxcx_cmd_access_register_out_t, + mlxo_access_register_data); + + bcopy(data, &in.mlxi_access_register_data, dsize); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, outsize)) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + bcopy(&out.mlxo_access_register_data, data, dsize); + } else { + mlxcx_warn(mlxp, "failed OP_ACCESS_REG was for register " + "%04x (%s)", rid, mlxcx_reg_name(rid)); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_port_mtu(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_register_data_t data; + boolean_t ret; + + /* + * Since we modify the port here we require that the caller is holding + * the port mutex. + */ + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_pmtu.mlrd_pmtu_local_port = mlp->mlp_num + 1; + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PMTU, &data); + + if (ret) { + mlp->mlp_mtu = from_be16(data.mlrd_pmtu.mlrd_pmtu_admin_mtu); + mlp->mlp_max_mtu = from_be16(data.mlrd_pmtu.mlrd_pmtu_max_mtu); + } + + return (ret); +} + +boolean_t +mlxcx_cmd_query_module_status(mlxcx_t *mlxp, uint_t id, + mlxcx_module_status_t *pstatus, mlxcx_module_error_type_t *perr) +{ + mlxcx_register_data_t data; + boolean_t ret; + + bzero(&data, sizeof (data)); + ASSERT3U(id, <, 0xff); + data.mlrd_pmaos.mlrd_pmaos_module = (uint8_t)id; + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PMAOS, &data); + + if (ret) { + if (pstatus != NULL) + *pstatus = data.mlrd_pmaos.mlrd_pmaos_oper_status; + if (perr != NULL) + *perr = data.mlrd_pmaos.mlrd_pmaos_error_type; + } + + return (ret); +} + +boolean_t +mlxcx_cmd_set_port_mtu(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_pmtu.mlrd_pmtu_local_port = mlp->mlp_num + 1; + data.mlrd_pmtu.mlrd_pmtu_admin_mtu = to_be16(mlp->mlp_mtu); + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE, + MLXCX_REG_PMTU, &data); + + return (ret); +} + +boolean_t +mlxcx_cmd_set_port_led(mlxcx_t *mlxp, mlxcx_port_t *mlp, uint16_t sec) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_mlcr.mlrd_mlcr_local_port = mlp->mlp_num + 1; + set_bits8(&data.mlrd_mlcr.mlrd_mlcr_flags, MLXCX_MLCR_LED_TYPE, + MLXCX_LED_TYPE_PORT); + data.mlrd_mlcr.mlrd_mlcr_beacon_duration = to_be16(sec); + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_WRITE, + MLXCX_REG_MLCR, &data); + + return (ret); +} + +boolean_t +mlxcx_cmd_query_port_status(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_paos.mlrd_paos_local_port = mlp->mlp_num + 1; + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PAOS, &data); + + if (ret) { + mlp->mlp_admin_status = data.mlrd_paos.mlrd_paos_admin_status; + mlp->mlp_oper_status = data.mlrd_paos.mlrd_paos_oper_status; + } + + return (ret); +} + +boolean_t +mlxcx_cmd_query_port_speed(mlxcx_t *mlxp, mlxcx_port_t *mlp) +{ + mlxcx_register_data_t data; + boolean_t ret; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&data, sizeof (data)); + data.mlrd_ptys.mlrd_ptys_local_port = mlp->mlp_num + 1; + set_bit8(&data.mlrd_ptys.mlrd_ptys_proto_mask, + MLXCX_PTYS_PROTO_MASK_ETH); + + ret = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PTYS, &data); + + if (ret) { + if (get_bit8(data.mlrd_ptys.mlrd_ptys_autoneg_flags, + MLXCX_AUTONEG_DISABLE)) { + mlp->mlp_autoneg = B_FALSE; + } else { + mlp->mlp_autoneg = B_TRUE; + } + mlp->mlp_max_proto = + from_bits32(data.mlrd_ptys.mlrd_ptys_proto_cap); + mlp->mlp_admin_proto = + from_bits32(data.mlrd_ptys.mlrd_ptys_proto_admin); + mlp->mlp_oper_proto = + from_bits32(data.mlrd_ptys.mlrd_ptys_proto_oper); + } + + return (ret); +} + +boolean_t +mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *mlxp, mlxcx_port_t *mlp, + mlxcx_modify_nic_vport_ctx_fields_t fields) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_modify_nic_vport_ctx_in_t in; + mlxcx_cmd_modify_nic_vport_ctx_out_t out; + boolean_t ret; + mlxcx_nic_vport_ctx_t *ctx; + + ASSERT(mutex_owned(&mlp->mlp_mtx)); + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_nic_vport_ctx_head, + MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT, MLXCX_VPORT_TYPE_VNIC); + + in.mlxi_modify_nic_vport_ctx_vport_number = to_be16(mlp->mlp_num); + in.mlxi_modify_nic_vport_ctx_field_select = to_be32(fields); + + ctx = &in.mlxi_modify_nic_vport_ctx_context; + if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC) { + set_bit16(&ctx->mlnvc_promisc_list_type, + MLXCX_VPORT_PROMISC_ALL); + } + if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_MTU) { + ctx->mlnvc_mtu = to_be16(mlp->mlp_mtu); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + if (fields & MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC) { + mlp->mlp_flags |= MLXCX_PORT_VPORT_PROMISC; + } + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_eq_in_t in; + mlxcx_cmd_create_eq_out_t out; + boolean_t ret; + mlxcx_eventq_ctx_t *ctx; + size_t rem, insize; + const ddi_dma_cookie_t *c; + uint64_t pa, npages; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC); + VERIFY0(mleq->mleq_state & MLXCX_EQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_eq_head, + MLXCX_OP_CREATE_EQ, 0); + + ctx = &in.mlxi_create_eq_context; + ctx->mleqc_uar_page = to_be24(mleq->mleq_uar->mlu_num); + ctx->mleqc_log_eq_size = mleq->mleq_entshift; + ctx->mleqc_intr = mleq->mleq_intr_index; + + in.mlxi_create_eq_event_bitmask = to_be64(mleq->mleq_events); + + npages = 0; + c = NULL; + while ((c = mlxcx_dma_cookie_iter(&mleq->mleq_dma, c)) != NULL) { + pa = c->dmac_laddress; + rem = c->dmac_size; + while (rem > 0) { + ASSERT3U(pa & 0xfff, ==, 0); + ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE); + in.mlxi_create_eq_pas[npages++] = to_be64(pa); + rem -= MLXCX_HW_PAGE_SIZE; + pa += MLXCX_HW_PAGE_SIZE; + } + } + ASSERT3U(npages, <=, MLXCX_CREATE_QUEUE_MAX_PAGES); + + insize = offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_pas) + + sizeof (uint64_t) * npages; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mleq->mleq_state |= MLXCX_EQ_CREATED; + mleq->mleq_num = out.mlxo_create_eq_eqn; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq, + mlxcx_eventq_ctx_t *ctxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_eq_in_t in; + mlxcx_cmd_query_eq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC); + VERIFY(mleq->mleq_state & MLXCX_EQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_eq_head, + MLXCX_OP_QUERY_EQ, 0); + + in.mlxi_query_eq_eqn = mleq->mleq_num; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + bcopy(&out.mlxo_query_eq_context, ctxp, + sizeof (mlxcx_eventq_ctx_t)); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_eq_in_t in; + mlxcx_cmd_destroy_eq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC); + VERIFY(mleq->mleq_state & MLXCX_EQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_eq_head, + MLXCX_OP_DESTROY_EQ, 0); + + in.mlxi_destroy_eq_eqn = mleq->mleq_num; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mleq->mleq_state |= MLXCX_EQ_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_special_ctxs(mlxcx_t *mlxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_special_ctxs_in_t in; + mlxcx_cmd_query_special_ctxs_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_special_ctxs_head, + MLXCX_OP_QUERY_SPECIAL_CONTEXTS, 0); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlxp->mlx_rsvd_lkey = from_be32( + out.mlxo_query_special_ctxs_resd_lkey); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_cq_in_t in; + mlxcx_cmd_create_cq_out_t out; + boolean_t ret; + mlxcx_completionq_ctx_t *ctx; + size_t rem, insize; + const ddi_dma_cookie_t *c; + uint64_t pa, npages; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC); + VERIFY0(mlcq->mlcq_state & MLXCX_CQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_cq_head, + MLXCX_OP_CREATE_CQ, 0); + + ctx = &in.mlxi_create_cq_context; + ctx->mlcqc_uar_page = to_be24(mlcq->mlcq_uar->mlu_num); + ctx->mlcqc_log_cq_size = mlcq->mlcq_entshift; + ctx->mlcqc_eqn = mlcq->mlcq_eq->mleq_num; + ctx->mlcqc_cq_period = to_be16(mlcq->mlcq_cqemod_period_usec); + ctx->mlcqc_cq_max_count = to_be16(mlcq->mlcq_cqemod_count); + + c = mlxcx_dma_cookie_one(&mlcq->mlcq_doorbell_dma); + ctx->mlcqc_dbr_addr = to_be64(c->dmac_laddress); + ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_completionq_doorbell_t)); + + npages = 0; + c = NULL; + while ((c = mlxcx_dma_cookie_iter(&mlcq->mlcq_dma, c)) != NULL) { + pa = c->dmac_laddress; + rem = c->dmac_size; + while (rem > 0) { + ASSERT3U(pa & 0xfff, ==, 0); + ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE); + in.mlxi_create_cq_pas[npages++] = to_be64(pa); + rem -= MLXCX_HW_PAGE_SIZE; + pa += MLXCX_HW_PAGE_SIZE; + } + } + ASSERT3U(npages, <=, MLXCX_CREATE_QUEUE_MAX_PAGES); + + insize = offsetof(mlxcx_cmd_create_cq_in_t, mlxi_create_cq_pas) + + sizeof (uint64_t) * npages; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlcq->mlcq_state |= MLXCX_CQ_CREATED; + mlcq->mlcq_num = from_be24(out.mlxo_create_cq_cqn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, + mlxcx_rq_ctx_t *ctxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_rq_in_t in; + mlxcx_cmd_query_rq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + ASSERT3S(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_rq_head, + MLXCX_OP_QUERY_RQ, 0); + + in.mlxi_query_rq_rqn = to_be24(mlwq->mlwq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + bcopy(&out.mlxo_query_rq_context, ctxp, + sizeof (mlxcx_rq_ctx_t)); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, + mlxcx_sq_ctx_t *ctxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_sq_in_t in; + mlxcx_cmd_query_sq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + ASSERT3S(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_sq_head, + MLXCX_OP_QUERY_SQ, 0); + + in.mlxi_query_sq_sqn = to_be24(mlwq->mlwq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + bcopy(&out.mlxo_query_sq_context, ctxp, + sizeof (mlxcx_sq_ctx_t)); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_query_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, + mlxcx_completionq_ctx_t *ctxp) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_query_cq_in_t in; + mlxcx_cmd_query_cq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC); + VERIFY(mlcq->mlcq_state & MLXCX_CQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_query_cq_head, + MLXCX_OP_QUERY_CQ, 0); + + in.mlxi_query_cq_cqn = to_be24(mlcq->mlcq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + bcopy(&out.mlxo_query_cq_context, ctxp, + sizeof (mlxcx_completionq_ctx_t)); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_cq_in_t in; + mlxcx_cmd_destroy_cq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC); + VERIFY(mlcq->mlcq_state & MLXCX_CQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_cq_head, + MLXCX_OP_DESTROY_CQ, 0); + + in.mlxi_destroy_cq_cqn = to_be24(mlcq->mlcq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlcq->mlcq_state |= MLXCX_CQ_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_rq_in_t in; + mlxcx_cmd_create_rq_out_t out; + boolean_t ret; + mlxcx_rq_ctx_t *ctx; + size_t rem, insize; + const ddi_dma_cookie_t *c; + uint64_t pa, npages; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_rq_head, + MLXCX_OP_CREATE_RQ, 0); + + ctx = &in.mlxi_create_rq_context; + + set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_RLKEY); + set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_FLUSH_IN_ERROR); + set_bit32(&ctx->mlrqc_flags, MLXCX_RQ_FLAGS_VLAN_STRIP_DISABLE); + ctx->mlrqc_cqn = to_be24(mlwq->mlwq_cq->mlcq_num); + + set_bits32(&ctx->mlrqc_wq.mlwqc_flags, MLXCX_WORKQ_CTX_TYPE, + MLXCX_WORKQ_TYPE_CYCLIC); + ctx->mlrqc_wq.mlwqc_pd = to_be24(mlwq->mlwq_pd->mlpd_num); + ctx->mlrqc_wq.mlwqc_log_wq_sz = mlwq->mlwq_entshift; + ctx->mlrqc_wq.mlwqc_log_wq_stride = MLXCX_RECVQ_STRIDE_SHIFT; + + c = mlxcx_dma_cookie_one(&mlwq->mlwq_doorbell_dma); + ctx->mlrqc_wq.mlwqc_dbr_addr = to_be64(c->dmac_laddress); + ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_workq_doorbell_t)); + + npages = 0; + c = NULL; + while ((c = mlxcx_dma_cookie_iter(&mlwq->mlwq_dma, c)) != NULL) { + pa = c->dmac_laddress; + rem = c->dmac_size; + while (rem > 0) { + ASSERT3U(pa & 0xfff, ==, 0); + ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE); + ctx->mlrqc_wq.mlwqc_pas[npages++] = to_be64(pa); + rem -= MLXCX_HW_PAGE_SIZE; + pa += MLXCX_HW_PAGE_SIZE; + } + } + ASSERT3U(npages, <=, MLXCX_WORKQ_CTX_MAX_ADDRESSES); + + insize = offsetof(mlxcx_cmd_create_rq_in_t, mlxi_create_rq_context) + + offsetof(mlxcx_rq_ctx_t, mlrqc_wq) + + offsetof(mlxcx_workq_ctx_t, mlwqc_pas) + + sizeof (uint64_t) * npages; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_CREATED; + mlwq->mlwq_num = from_be24(out.mlxo_create_rq_rqn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_start_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_modify_rq_in_t in; + mlxcx_cmd_modify_rq_out_t out; + boolean_t ret; + ddi_fm_error_t err; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + /* + * Before starting the queue, we have to be sure that it is + * empty and the doorbell and counters are set to 0. + */ + ASSERT(mutex_owned(&mlwq->mlwq_cq->mlcq_mtx)); + ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers)); + ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers_b)); + + mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(0); + MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) + return (B_FALSE); + mlwq->mlwq_pc = 0; + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_rq_head, + MLXCX_OP_MODIFY_RQ, 0); + + in.mlxi_modify_rq_rqn = to_be24(mlwq->mlwq_num); + + /* From state */ + set_bits8(&in.mlxi_modify_rq_state, MLXCX_CMD_MODIFY_RQ_STATE, + MLXCX_RQ_STATE_RST); + /* To state */ + set_bits32(&in.mlxi_modify_rq_context.mlrqc_flags, MLXCX_RQ_STATE, + MLXCX_RQ_STATE_RDY); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_STARTED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_stop_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_modify_rq_in_t in; + mlxcx_cmd_modify_rq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_rq_head, + MLXCX_OP_MODIFY_RQ, 0); + + in.mlxi_modify_rq_rqn = to_be24(mlwq->mlwq_num); + + /* From state */ + set_bits8(&in.mlxi_modify_rq_state, MLXCX_CMD_MODIFY_RQ_STATE, + MLXCX_RQ_STATE_RDY); + /* To state */ + set_bits32(&in.mlxi_modify_rq_context.mlrqc_flags, MLXCX_RQ_STATE, + MLXCX_RQ_STATE_RST); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state &= ~MLXCX_WQ_STARTED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_rq_in_t in; + mlxcx_cmd_destroy_rq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_rq_head, + MLXCX_OP_DESTROY_RQ, 0); + + in.mlxi_destroy_rq_rqn = to_be24(mlwq->mlwq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_tir(mlxcx_t *mlxp, mlxcx_tir_t *mltir) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_tir_in_t in; + mlxcx_cmd_create_tir_out_t out; + mlxcx_tir_ctx_t *ctx; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY0(mltir->mltir_state & MLXCX_TIR_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_tir_head, + MLXCX_OP_CREATE_TIR, 0); + + ctx = &in.mlxi_create_tir_context; + ctx->mltirc_transport_domain = to_be24(mltir->mltir_tdom->mltd_num); + set_bits8(&ctx->mltirc_disp_type, MLXCX_TIR_CTX_DISP_TYPE, + mltir->mltir_type); + switch (mltir->mltir_type) { + case MLXCX_TIR_INDIRECT: + VERIFY(mltir->mltir_rqtable != NULL); + VERIFY(mltir->mltir_rqtable->mlrqt_state & MLXCX_RQT_CREATED); + ctx->mltirc_indirect_table = + to_be24(mltir->mltir_rqtable->mlrqt_num); + set_bits8(&ctx->mltirc_hash_lb, MLXCX_TIR_RX_HASH_FN, + mltir->mltir_hash_fn); + bcopy(mltir->mltir_toeplitz_key, + ctx->mltirc_rx_hash_toeplitz_key, + sizeof (ctx->mltirc_rx_hash_toeplitz_key)); + set_bits32(&ctx->mltirc_rx_hash_fields_outer, + MLXCX_RX_HASH_L3_TYPE, mltir->mltir_l3_type); + set_bits32(&ctx->mltirc_rx_hash_fields_outer, + MLXCX_RX_HASH_L4_TYPE, mltir->mltir_l4_type); + set_bits32(&ctx->mltirc_rx_hash_fields_outer, + MLXCX_RX_HASH_FIELDS, mltir->mltir_hash_fields); + break; + case MLXCX_TIR_DIRECT: + VERIFY(mltir->mltir_rq != NULL); + VERIFY(mltir->mltir_rq->mlwq_state & MLXCX_WQ_CREATED); + ctx->mltirc_inline_rqn = to_be24(mltir->mltir_rq->mlwq_num); + break; + default: + VERIFY(0); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltir->mltir_state |= MLXCX_TIR_CREATED; + mltir->mltir_num = from_be24(out.mlxo_create_tir_tirn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_tir(mlxcx_t *mlxp, mlxcx_tir_t *mltir) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_tir_in_t in; + mlxcx_cmd_destroy_tir_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY(mltir->mltir_state & MLXCX_TIR_CREATED); + VERIFY0(mltir->mltir_state & MLXCX_TIR_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_tir_head, + MLXCX_OP_DESTROY_TIR, 0); + + in.mlxi_destroy_tir_tirn = to_be24(mltir->mltir_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltir->mltir_state |= MLXCX_TIR_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_tis(mlxcx_t *mlxp, mlxcx_tis_t *mltis) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_tis_in_t in; + mlxcx_cmd_create_tis_out_t out; + mlxcx_tis_ctx_t *ctx; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY0(mltis->mltis_state & MLXCX_TIS_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_tis_head, + MLXCX_OP_CREATE_TIS, 0); + + ctx = &in.mlxi_create_tis_context; + ctx->mltisc_transport_domain = to_be24(mltis->mltis_tdom->mltd_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltis->mltis_state |= MLXCX_TIS_CREATED; + mltis->mltis_num = from_be24(out.mlxo_create_tis_tisn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_tis(mlxcx_t *mlxp, mlxcx_tis_t *mltis) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_tis_in_t in; + mlxcx_cmd_destroy_tis_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY(mltis->mltis_state & MLXCX_TIR_CREATED); + VERIFY0(mltis->mltis_state & MLXCX_TIR_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_tis_head, + MLXCX_OP_DESTROY_TIS, 0); + + in.mlxi_destroy_tis_tisn = to_be24(mltis->mltis_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mltis->mltis_state |= MLXCX_TIS_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_flow_table_in_t in; + mlxcx_cmd_create_flow_table_out_t out; + mlxcx_flow_table_ctx_t *ctx; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_flow_table_head, + MLXCX_OP_CREATE_FLOW_TABLE, 0); + + in.mlxi_create_flow_table_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_create_flow_table_table_type = mlft->mlft_type; + ctx = &in.mlxi_create_flow_table_context; + ctx->mlftc_log_size = mlft->mlft_entshift; + ctx->mlftc_level = mlft->mlft_level; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlft->mlft_num = from_be24(out.mlxo_create_flow_table_table_id); + mlft->mlft_state |= MLXCX_FLOW_TABLE_CREATED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_flow_table_in_t in; + mlxcx_cmd_destroy_flow_table_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_flow_table_head, + MLXCX_OP_DESTROY_FLOW_TABLE, 0); + + in.mlxi_destroy_flow_table_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_destroy_flow_table_table_type = mlft->mlft_type; + in.mlxi_destroy_flow_table_table_id = to_be24(mlft->mlft_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlft->mlft_state |= MLXCX_FLOW_TABLE_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_set_flow_table_root(mlxcx_t *mlxp, mlxcx_flow_table_t *mlft) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_set_flow_table_root_in_t in; + mlxcx_cmd_set_flow_table_root_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_flow_table_root_head, + MLXCX_OP_SET_FLOW_TABLE_ROOT, 0); + + in.mlxi_set_flow_table_root_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_set_flow_table_root_table_type = mlft->mlft_type; + in.mlxi_set_flow_table_root_table_id = to_be24(mlft->mlft_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlft->mlft_state |= MLXCX_FLOW_TABLE_ROOT; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_flow_group(mlxcx_t *mlxp, mlxcx_flow_group_t *mlfg) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_flow_group_in_t in; + mlxcx_cmd_create_flow_group_out_t out; + boolean_t ret; + const mlxcx_flow_table_t *mlft; + mlxcx_flow_header_match_t *hdrs; + mlxcx_flow_params_match_t *params; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlft = mlfg->mlfg_table; + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_flow_group_head, + MLXCX_OP_CREATE_FLOW_GROUP, 0); + + in.mlxi_create_flow_group_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_create_flow_group_table_type = mlft->mlft_type; + in.mlxi_create_flow_group_table_id = to_be24(mlft->mlft_num); + in.mlxi_create_flow_group_start_flow_index = + to_be32(mlfg->mlfg_start_idx); + in.mlxi_create_flow_group_end_flow_index = + to_be32(mlfg->mlfg_start_idx + (mlfg->mlfg_size - 1)); + + hdrs = &in.mlxi_create_flow_group_match_criteria.mlfm_outer_headers; + params = &in.mlxi_create_flow_group_match_criteria.mlfm_misc_parameters; + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SMAC) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS; + (void) memset(&hdrs->mlfh_smac, 0xff, sizeof (hdrs->mlfh_smac)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DMAC) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS; + (void) memset(&hdrs->mlfh_dmac, 0xff, sizeof (hdrs->mlfh_dmac)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS; + set_bit24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_CVLAN_TAG); + set_bit24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_SVLAN_TAG); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VID) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN); + set_bits16(&hdrs->mlfh_first_vid_flags, + MLXCX_FLOW_HDR_FIRST_VID, UINT16_MAX); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS; + set_bits24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_IP_VERSION, + UINT32_MAX); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + (void) memset(&hdrs->mlfh_src_ip, 0xff, + sizeof (hdrs->mlfh_src_ip)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + (void) memset(&hdrs->mlfh_src_ip, 0xff, + sizeof (hdrs->mlfh_dst_ip)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_PROTO) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS; + hdrs->mlfh_ip_protocol = UINT8_MAX; + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + (void) memset(&hdrs->mlfh_src_ip, 0xff, + sizeof (hdrs->mlfh_src_ip)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + (void) memset(&hdrs->mlfh_src_ip, 0xff, + sizeof (hdrs->mlfh_dst_ip)); + } + + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SQN) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS; + params->mlfp_source_sqn = to_be24(UINT32_MAX); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VXLAN) { + in.mlxi_create_flow_group_match_criteria_en |= + MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS; + params->mlfp_vxlan_vni = to_be24(UINT32_MAX); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlfg->mlfg_state |= MLXCX_FLOW_GROUP_CREATED; + mlfg->mlfg_num = from_be24(out.mlxo_create_flow_group_group_id); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_flow_group(mlxcx_t *mlxp, mlxcx_flow_group_t *mlfg) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_flow_group_in_t in; + mlxcx_cmd_destroy_flow_group_out_t out; + boolean_t ret; + const mlxcx_flow_table_t *mlft; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlft = mlfg->mlfg_table; + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + VERIFY(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED); + VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_flow_group_head, + MLXCX_OP_DESTROY_FLOW_GROUP, 0); + + in.mlxi_destroy_flow_group_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_destroy_flow_group_table_type = mlft->mlft_type; + in.mlxi_destroy_flow_group_table_id = to_be24(mlft->mlft_num); + in.mlxi_destroy_flow_group_group_id = to_be32(mlfg->mlfg_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlfg->mlfg_state |= MLXCX_FLOW_GROUP_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_set_flow_table_entry(mlxcx_t *mlxp, mlxcx_flow_entry_t *mlfe) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_set_flow_table_entry_in_t in; + mlxcx_cmd_set_flow_table_entry_out_t out; + boolean_t ret; + size_t insize; + mlxcx_flow_entry_ctx_t *ctx; + const mlxcx_flow_table_t *mlft; + mlxcx_flow_group_t *mlfg; + mlxcx_flow_dest_t *d; + uint_t i; + mlxcx_flow_header_match_t *hdrs; + mlxcx_flow_params_match_t *params; + mlxcx_cmd_set_flow_table_entry_opmod_t opmod; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlft = mlfe->mlfe_table; + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + + mlfg = mlfe->mlfe_group; + VERIFY(mlfg->mlfg_state & MLXCX_FLOW_GROUP_CREATED); + VERIFY0(mlfg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED); + + opmod = MLXCX_CMD_FLOW_ENTRY_SET_NEW; + if (mlfe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + ASSERT(mlfe->mlfe_state & MLXCX_FLOW_ENTRY_DIRTY); + opmod = MLXCX_CMD_FLOW_ENTRY_MODIFY; + } + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_set_flow_table_entry_head, + MLXCX_OP_SET_FLOW_TABLE_ENTRY, opmod); + + in.mlxi_set_flow_table_entry_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_set_flow_table_entry_table_type = mlft->mlft_type; + in.mlxi_set_flow_table_entry_table_id = to_be24(mlft->mlft_num); + in.mlxi_set_flow_table_entry_flow_index = to_be32(mlfe->mlfe_index); + + if (mlfe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + set_bit8(&in.mlxi_set_flow_table_entry_modify_bitmask, + MLXCX_CMD_FLOW_ENTRY_SET_ACTION); + set_bit8(&in.mlxi_set_flow_table_entry_modify_bitmask, + MLXCX_CMD_FLOW_ENTRY_SET_DESTINATION); + } + + ctx = &in.mlxi_set_flow_table_entry_context; + ctx->mlfec_group_id = to_be32(mlfg->mlfg_num); + + insize = offsetof(mlxcx_cmd_set_flow_table_entry_in_t, + mlxi_set_flow_table_entry_context) + + offsetof(mlxcx_flow_entry_ctx_t, mlfec_destination); + + ctx->mlfec_action = to_be16(mlfe->mlfe_action); + + switch (mlfe->mlfe_action) { + case MLXCX_FLOW_ACTION_ALLOW: + case MLXCX_FLOW_ACTION_DROP: + break; + case MLXCX_FLOW_ACTION_FORWARD: + ASSERT3U(mlfe->mlfe_ndest, <=, MLXCX_FLOW_MAX_DESTINATIONS); + ASSERT3U(mlfe->mlfe_ndest, <=, + mlxp->mlx_caps->mlc_max_rx_fe_dest); + ctx->mlfec_destination_list_size = to_be24(mlfe->mlfe_ndest); + for (i = 0; i < mlfe->mlfe_ndest; ++i) { + insize += sizeof (mlxcx_flow_dest_t); + d = &ctx->mlfec_destination[i]; + if (mlfe->mlfe_dest[i].mlfed_tir != NULL) { + d->mlfd_destination_type = MLXCX_FLOW_DEST_TIR; + d->mlfd_destination_id = to_be24( + mlfe->mlfe_dest[i].mlfed_tir->mltir_num); + } else if (mlfe->mlfe_dest[i].mlfed_flow != NULL) { + d->mlfd_destination_type = + MLXCX_FLOW_DEST_FLOW_TABLE; + d->mlfd_destination_id = to_be24( + mlfe->mlfe_dest[i].mlfed_flow->mlft_num); + } else { + /* Invalid flow entry destination */ + VERIFY(0); + } + } + break; + case MLXCX_FLOW_ACTION_COUNT: + /* We don't support count actions yet. */ + VERIFY(0); + break; + case MLXCX_FLOW_ACTION_ENCAP: + case MLXCX_FLOW_ACTION_DECAP: + /* We don't support encap/decap actions yet. */ + VERIFY(0); + break; + } + + hdrs = &ctx->mlfec_match_value.mlfm_outer_headers; + params = &ctx->mlfec_match_value.mlfm_misc_parameters; + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SMAC) { + bcopy(mlfe->mlfe_smac, hdrs->mlfh_smac, + sizeof (hdrs->mlfh_smac)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DMAC) { + bcopy(mlfe->mlfe_dmac, hdrs->mlfh_dmac, + sizeof (hdrs->mlfh_dmac)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN) { + switch (mlfe->mlfe_vlan_type) { + case MLXCX_VLAN_TYPE_CVLAN: + set_bit24(&hdrs->mlfh_tcp_ip_flags, + MLXCX_FLOW_HDR_CVLAN_TAG); + break; + case MLXCX_VLAN_TYPE_SVLAN: + set_bit24(&hdrs->mlfh_tcp_ip_flags, + MLXCX_FLOW_HDR_SVLAN_TAG); + break; + default: + break; + } + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VID) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VLAN); + set_bits16(&hdrs->mlfh_first_vid_flags, + MLXCX_FLOW_HDR_FIRST_VID, mlfe->mlfe_vid); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER) { + set_bits24(&hdrs->mlfh_tcp_ip_flags, MLXCX_FLOW_HDR_IP_VERSION, + mlfe->mlfe_ip_version); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SRCIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + bcopy(mlfe->mlfe_srcip, hdrs->mlfh_src_ip, + sizeof (hdrs->mlfh_src_ip)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_DSTIP) { + ASSERT(mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_VER); + bcopy(mlfe->mlfe_dstip, hdrs->mlfh_src_ip, + sizeof (hdrs->mlfh_dst_ip)); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_IP_PROTO) { + hdrs->mlfh_ip_protocol = mlfe->mlfe_ip_proto; + } + + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_SQN) { + params->mlfp_source_sqn = to_be24(mlfe->mlfe_sqn); + } + if (mlfg->mlfg_mask & MLXCX_FLOW_MATCH_VXLAN) { + params->mlfp_vxlan_vni = to_be24(mlfe->mlfe_vxlan_vni); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlfe->mlfe_state |= MLXCX_FLOW_ENTRY_CREATED; + mlfe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY; + mlfg->mlfg_state |= MLXCX_FLOW_GROUP_BUSY; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_delete_flow_table_entry(mlxcx_t *mlxp, mlxcx_flow_entry_t *mlfe) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_delete_flow_table_entry_in_t in; + mlxcx_cmd_delete_flow_table_entry_out_t out; + boolean_t ret; + const mlxcx_flow_table_t *mlft; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlft = mlfe->mlfe_table; + ASSERT(mutex_owned(&mlft->mlft_mtx)); + VERIFY(mlft->mlft_state & MLXCX_FLOW_TABLE_CREATED); + VERIFY0(mlft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_delete_flow_table_entry_head, + MLXCX_OP_DELETE_FLOW_TABLE_ENTRY, 0); + + in.mlxi_delete_flow_table_entry_vport_number = + to_be16(mlft->mlft_port->mlp_num); + in.mlxi_delete_flow_table_entry_table_type = mlft->mlft_type; + in.mlxi_delete_flow_table_entry_table_id = to_be24(mlft->mlft_num); + in.mlxi_delete_flow_table_entry_flow_index = to_be32(mlfe->mlfe_index); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + /* + * Note that flow entries have a different lifecycle to most + * other things we create -- we have to be able to re-use them + * after they have been deleted, since they exist at a fixed + * position in their flow table. + * + * So we clear the CREATED bit here for them to let us call + * create_flow_table_entry() on the same entry again later. + */ + mlfe->mlfe_state &= ~MLXCX_FLOW_ENTRY_CREATED; + mlfe->mlfe_state |= MLXCX_FLOW_ENTRY_DELETED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_sq_in_t in; + mlxcx_cmd_create_sq_out_t out; + boolean_t ret; + mlxcx_sq_ctx_t *ctx; + size_t rem, insize; + const ddi_dma_cookie_t *c; + uint64_t pa, npages; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_sq_head, + MLXCX_OP_CREATE_SQ, 0); + + ctx = &in.mlxi_create_sq_context; + + set_bit32(&ctx->mlsqc_flags, MLXCX_SQ_FLAGS_RLKEY); + set_bit32(&ctx->mlsqc_flags, MLXCX_SQ_FLAGS_FLUSH_IN_ERROR); + set_bits32(&ctx->mlsqc_flags, MLXCX_SQ_MIN_WQE_INLINE, + mlwq->mlwq_inline_mode); + ctx->mlsqc_cqn = to_be24(mlwq->mlwq_cq->mlcq_num); + + VERIFY(mlwq->mlwq_tis != NULL); + ctx->mlsqc_tis_lst_sz = to_be16(1); + ctx->mlsqc_tis_num = to_be24(mlwq->mlwq_tis->mltis_num); + + set_bits32(&ctx->mlsqc_wq.mlwqc_flags, MLXCX_WORKQ_CTX_TYPE, + MLXCX_WORKQ_TYPE_CYCLIC); + ctx->mlsqc_wq.mlwqc_pd = to_be24(mlwq->mlwq_pd->mlpd_num); + ctx->mlsqc_wq.mlwqc_uar_page = to_be24(mlwq->mlwq_uar->mlu_num); + ctx->mlsqc_wq.mlwqc_log_wq_sz = mlwq->mlwq_entshift; + ctx->mlsqc_wq.mlwqc_log_wq_stride = MLXCX_SENDQ_STRIDE_SHIFT; + + c = mlxcx_dma_cookie_one(&mlwq->mlwq_doorbell_dma); + ctx->mlsqc_wq.mlwqc_dbr_addr = to_be64(c->dmac_laddress); + ASSERT3U(c->dmac_size, >=, sizeof (mlxcx_workq_doorbell_t)); + + npages = 0; + c = NULL; + while ((c = mlxcx_dma_cookie_iter(&mlwq->mlwq_dma, c)) != NULL) { + pa = c->dmac_laddress; + rem = c->dmac_size; + while (rem > 0) { + ASSERT3U(pa & 0xfff, ==, 0); + ASSERT3U(rem, >=, MLXCX_HW_PAGE_SIZE); + ctx->mlsqc_wq.mlwqc_pas[npages++] = to_be64(pa); + rem -= MLXCX_HW_PAGE_SIZE; + pa += MLXCX_HW_PAGE_SIZE; + } + } + ASSERT3U(npages, <=, MLXCX_WORKQ_CTX_MAX_ADDRESSES); + + insize = offsetof(mlxcx_cmd_create_sq_in_t, mlxi_create_sq_context) + + offsetof(mlxcx_sq_ctx_t, mlsqc_wq) + + offsetof(mlxcx_workq_ctx_t, mlwqc_pas) + + sizeof (uint64_t) * npages; + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, insize, &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_CREATED; + mlwq->mlwq_num = from_be24(out.mlxo_create_sq_sqn); + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_start_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_modify_sq_in_t in; + mlxcx_cmd_modify_sq_out_t out; + boolean_t ret; + ddi_fm_error_t err; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + ASSERT(mlwq->mlwq_cq != NULL); + + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + /* + * Before starting the queue, we have to be sure that it is + * empty and the doorbell and counters are set to 0. + */ + ASSERT(mutex_owned(&mlwq->mlwq_cq->mlcq_mtx)); + ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers)); + ASSERT(list_is_empty(&mlwq->mlwq_cq->mlcq_buffers_b)); + + mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(0); + MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) + return (B_FALSE); + mlwq->mlwq_pc = 0; + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_sq_head, + MLXCX_OP_MODIFY_SQ, 0); + + in.mlxi_modify_sq_sqn = to_be24(mlwq->mlwq_num); + + /* From state */ + set_bits8(&in.mlxi_modify_sq_state, MLXCX_CMD_MODIFY_SQ_STATE, + MLXCX_SQ_STATE_RST); + /* To state */ + set_bits32(&in.mlxi_modify_sq_context.mlsqc_flags, MLXCX_SQ_STATE, + MLXCX_SQ_STATE_RDY); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_STARTED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_stop_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_modify_sq_in_t in; + mlxcx_cmd_modify_sq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_modify_sq_head, + MLXCX_OP_MODIFY_SQ, 0); + + in.mlxi_modify_sq_sqn = to_be24(mlwq->mlwq_num); + + /* From state */ + set_bits8(&in.mlxi_modify_sq_state, MLXCX_CMD_MODIFY_SQ_STATE, + MLXCX_SQ_STATE_RDY); + /* To state */ + set_bits32(&in.mlxi_modify_sq_context.mlsqc_flags, MLXCX_SQ_STATE, + MLXCX_SQ_STATE_RST); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state &= ~MLXCX_WQ_STARTED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_sq_in_t in; + mlxcx_cmd_destroy_sq_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + VERIFY(mlwq->mlwq_state & MLXCX_WQ_CREATED); + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_STARTED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_sq_head, + MLXCX_OP_DESTROY_SQ, 0); + + in.mlxi_destroy_sq_sqn = to_be24(mlwq->mlwq_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlwq->mlwq_state |= MLXCX_WQ_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_create_rqt(mlxcx_t *mlxp, mlxcx_rqtable_t *mlrqt) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_create_rqt_in_t in; + mlxcx_cmd_create_rqt_out_t out; + mlxcx_rqtable_ctx_t *ctx; + boolean_t ret; + uint_t i; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY0(mlrqt->mlrqt_state & MLXCX_RQT_CREATED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_create_rqt_head, + MLXCX_OP_CREATE_RQT, 0); + + ctx = &in.mlxi_create_rqt_context; + ASSERT3U(mlrqt->mlrqt_max, <=, MLXCX_RQT_MAX_RQ_REFS); + ASSERT3U(mlrqt->mlrqt_max, <=, mlxp->mlx_caps->mlc_max_rqt_size); + ctx->mlrqtc_max_size = to_be16(mlrqt->mlrqt_max); + ctx->mlrqtc_actual_size = to_be16(mlrqt->mlrqt_used); + for (i = 0; i < mlrqt->mlrqt_used; ++i) { + ctx->mlrqtc_rqref[i].mlrqtr_rqn = to_be24( + mlrqt->mlrqt_rq[i]->mlwq_num); + } + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlrqt->mlrqt_num = from_be24(out.mlxo_create_rqt_rqtn); + mlrqt->mlrqt_state |= MLXCX_RQT_CREATED; + mlrqt->mlrqt_state &= ~MLXCX_RQT_DIRTY; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_destroy_rqt(mlxcx_t *mlxp, mlxcx_rqtable_t *mlrqt) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_destroy_rqt_in_t in; + mlxcx_cmd_destroy_rqt_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + VERIFY(mlrqt->mlrqt_state & MLXCX_RQT_CREATED); + VERIFY0(mlrqt->mlrqt_state & MLXCX_RQT_DESTROYED); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_destroy_rqt_head, + MLXCX_OP_DESTROY_RQT, 0); + + in.mlxi_destroy_rqt_rqtn = to_be24(mlrqt->mlrqt_num); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + if (ret) { + mlrqt->mlrqt_state |= MLXCX_RQT_DESTROYED; + } + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +boolean_t +mlxcx_cmd_set_int_mod(mlxcx_t *mlxp, uint_t intr, uint_t min_delay) +{ + mlxcx_cmd_t cmd; + mlxcx_cmd_config_int_mod_in_t in; + mlxcx_cmd_config_int_mod_out_t out; + boolean_t ret; + + bzero(&in, sizeof (in)); + bzero(&out, sizeof (out)); + + mlxcx_cmd_init(mlxp, &cmd); + mlxcx_cmd_in_header_init(&cmd, &in.mlxi_config_int_mod_head, + MLXCX_OP_CONFIG_INT_MODERATION, MLXCX_CMD_CONFIG_INT_MOD_WRITE); + + in.mlxi_config_int_mod_int_vector = to_be16(intr); + in.mlxi_config_int_mod_min_delay = to_be16(min_delay); + + if (!mlxcx_cmd_send(mlxp, &cmd, &in, sizeof (in), &out, sizeof (out))) { + mlxcx_cmd_fini(mlxp, &cmd); + return (B_FALSE); + } + mlxcx_cmd_wait(&cmd); + + ret = mlxcx_cmd_evaluate(mlxp, &cmd); + mlxcx_cmd_fini(mlxp, &cmd); + return (ret); +} + +/* + * CTASSERTs here are for the structs in mlxcx_reg.h, to check they match + * against offsets from the PRM. + * + * They're not in the header file, to avoid them being used by multiple .c + * files. + */ + +CTASSERT(offsetof(mlxcx_eventq_ent_t, mleqe_unknown_data) == 0x20); +CTASSERT(offsetof(mlxcx_eventq_ent_t, mleqe_signature) == 0x3c + 2); +CTASSERT(sizeof (mlxcx_eventq_ent_t) == 64); + +CTASSERT(offsetof(mlxcx_completionq_error_ent_t, mlcqee_byte_cnt) == 0x2C); +CTASSERT(offsetof(mlxcx_completionq_error_ent_t, mlcqee_wqe_opcode) == 0x38); + +CTASSERT(sizeof (mlxcx_completionq_error_ent_t) == + sizeof (mlxcx_completionq_ent_t)); +CTASSERT(sizeof (mlxcx_wqe_control_seg_t) == (1 << 4)); + +CTASSERT(offsetof(mlxcx_wqe_eth_seg_t, mles_inline_headers) == 0x0e); +CTASSERT(sizeof (mlxcx_wqe_eth_seg_t) == (1 << 5)); + +CTASSERT(sizeof (mlxcx_wqe_data_seg_t) == (1 << 4)); + +CTASSERT(sizeof (mlxcx_sendq_ent_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT)); + +CTASSERT(sizeof (mlxcx_sendq_bf_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT)); + +CTASSERT(sizeof (mlxcx_sendq_extra_ent_t) == (1 << MLXCX_SENDQ_STRIDE_SHIFT)); + +CTASSERT(sizeof (mlxcx_recvq_ent_t) == (1 << MLXCX_RECVQ_STRIDE_SHIFT)); + +CTASSERT(offsetof(mlxcx_workq_ctx_t, mlwqc_dbr_addr) == 0x10); +CTASSERT(offsetof(mlxcx_workq_ctx_t, mlwqc_pas) == 0xc0); + +CTASSERT(offsetof(mlxcx_rq_ctx_t, mlrqc_cqn) == 0x09); +CTASSERT(offsetof(mlxcx_rq_ctx_t, mlrqc_wq) == 0x30); + +CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_cqn) == 0x09); +CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_tis_lst_sz) == 0x20); +CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_tis_num) == 0x2d); +CTASSERT(offsetof(mlxcx_sq_ctx_t, mlsqc_wq) == 0x30); + +CTASSERT(sizeof (mlxcx_tis_ctx_t) == 0xa0); +CTASSERT(offsetof(mlxcx_tis_ctx_t, mltisc_transport_domain) == 0x25); + +CTASSERT(offsetof(mlxcx_rqtable_ctx_t, mlrqtc_max_size) == 0x16); +CTASSERT(offsetof(mlxcx_rqtable_ctx_t, mlrqtc_rqref) == 0xF0); + +CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_event_bitmask) == + 0x58); +CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_pas) == 0x110); +CTASSERT(offsetof(mlxcx_cmd_create_eq_in_t, mlxi_create_eq_context) == 0x10); + +CTASSERT(offsetof(mlxcx_cmd_create_tir_in_t, mlxi_create_tir_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_create_tis_in_t, mlxi_create_tis_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_query_special_ctxs_out_t, + mlxo_query_special_ctxs_resd_lkey) == 0x0c); + +CTASSERT(offsetof(mlxcx_cmd_query_cq_out_t, mlxo_query_cq_context) == 0x10); +CTASSERT(offsetof(mlxcx_cmd_query_cq_out_t, mlxo_query_cq_pas) == 0x110); + +CTASSERT(offsetof(mlxcx_cmd_query_rq_out_t, mlxo_query_rq_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_create_sq_in_t, mlxi_create_sq_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_modify_sq_in_t, mlxi_modify_sq_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_query_sq_out_t, mlxo_query_sq_context) == 0x20); + +CTASSERT(offsetof(mlxcx_cmd_create_rqt_in_t, mlxi_create_rqt_context) == 0x20); + +CTASSERT(offsetof(mlxcx_reg_pmtu_t, mlrd_pmtu_oper_mtu) == 0x0C); + +CTASSERT(sizeof (mlxcx_reg_ptys_t) == 64); +CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_cap) == 0x0c); +CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_admin) == 0x18); +CTASSERT(offsetof(mlxcx_reg_ptys_t, mlrd_ptys_proto_partner_advert) == 0x30); + +CTASSERT(offsetof(mlxcx_reg_mcia_t, mlrd_mcia_data) == 0x10); + +CTASSERT(offsetof(mlxcx_ppcnt_ieee_802_3_t, + mlppc_ieee_802_3_in_range_len_err) == 0x50); +CTASSERT(offsetof(mlxcx_ppcnt_ieee_802_3_t, + mlppc_ieee_802_3_pause_tx) == 0x90); + +CTASSERT(sizeof (mlxcx_reg_ppcnt_t) == 256); +CTASSERT(offsetof(mlxcx_reg_ppcnt_t, mlrd_ppcnt_data) == 0x08); + +CTASSERT(offsetof(mlxcx_cmd_access_register_in_t, + mlxi_access_register_argument) == 0x0C); +CTASSERT(offsetof(mlxcx_cmd_access_register_in_t, + mlxi_access_register_data) == 0x10); + +CTASSERT(offsetof(mlxcx_cmd_access_register_out_t, + mlxo_access_register_data) == 0x10); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_dma.c b/usr/src/uts/common/io/mlxcx/mlxcx_dma.c new file mode 100644 index 0000000000..79b9bb3746 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_dma.c @@ -0,0 +1,460 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * DMA allocation and tear down routines. + */ + +#include <mlxcx.h> + +void +mlxcx_dma_acc_attr(mlxcx_t *mlxp, ddi_device_acc_attr_t *accp) +{ + bzero(accp, sizeof (*accp)); + accp->devacc_attr_version = DDI_DEVICE_ATTR_V0; + accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; + accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC; + + if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) { + accp->devacc_attr_access = DDI_FLAGERR_ACC; + } else { + accp->devacc_attr_access = DDI_DEFAULT_ACC; + } +} + +void +mlxcx_dma_page_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp) +{ + bzero(attrp, sizeof (*attrp)); + attrp->dma_attr_version = DMA_ATTR_V0; + + /* + * This is a 64-bit PCIe device. We can use the entire address space. + */ + attrp->dma_attr_addr_lo = 0x0; + attrp->dma_attr_addr_hi = UINT64_MAX; + + /* + * The count max indicates the total amount that can fit into one + * cookie. Because we're creating a single page for tracking purposes, + * this can be a page in size. The alignment and segment are related to + * this same requirement. The alignment needs to be page aligned and the + * segment is the boundary that this can't cross, aka a 4k page. + */ + attrp->dma_attr_count_max = MLXCX_CMD_DMA_PAGE_SIZE - 1; + attrp->dma_attr_align = MLXCX_CMD_DMA_PAGE_SIZE; + attrp->dma_attr_seg = MLXCX_CMD_DMA_PAGE_SIZE - 1; + + attrp->dma_attr_burstsizes = 0xfff; + + /* + * The minimum and and maximum sizes that we can send. We cap this based + * on the use of this, which is a page size. + */ + attrp->dma_attr_minxfer = 0x1; + attrp->dma_attr_maxxfer = MLXCX_CMD_DMA_PAGE_SIZE; + + /* + * This is supposed to be used for static data structures, therefore we + * keep this just to a page. + */ + attrp->dma_attr_sgllen = 1; + + /* + * The granularity describe the addressing graularity. That is, the + * hardware can ask for chunks in this units of bytes. + */ + attrp->dma_attr_granular = MLXCX_CMD_DMA_PAGE_SIZE; + + if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) { + attrp->dma_attr_flags = DDI_DMA_FLAGERR; + } else { + attrp->dma_attr_flags = 0; + } +} + +/* + * DMA attributes for queue memory (EQ, CQ, WQ etc) + * + * These have to allocate in units of whole pages, but can be multiple + * pages and don't have to be physically contiguous. + */ +void +mlxcx_dma_queue_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp) +{ + bzero(attrp, sizeof (*attrp)); + attrp->dma_attr_version = DMA_ATTR_V0; + + /* + * This is a 64-bit PCIe device. We can use the entire address space. + */ + attrp->dma_attr_addr_lo = 0x0; + attrp->dma_attr_addr_hi = UINT64_MAX; + + attrp->dma_attr_count_max = MLXCX_QUEUE_DMA_PAGE_SIZE - 1; + + attrp->dma_attr_align = MLXCX_QUEUE_DMA_PAGE_SIZE; + + attrp->dma_attr_burstsizes = 0xfff; + + /* + * The minimum and and maximum sizes that we can send. We cap this based + * on the use of this, which is a page size. + */ + attrp->dma_attr_minxfer = MLXCX_QUEUE_DMA_PAGE_SIZE; + attrp->dma_attr_maxxfer = UINT32_MAX; + + attrp->dma_attr_seg = UINT64_MAX; + + attrp->dma_attr_granular = MLXCX_QUEUE_DMA_PAGE_SIZE; + + /* But we can have more than one. */ + attrp->dma_attr_sgllen = MLXCX_CREATE_QUEUE_MAX_PAGES; + + if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) { + attrp->dma_attr_flags = DDI_DMA_FLAGERR; + } else { + attrp->dma_attr_flags = 0; + } +} + +/* + * DMA attributes for packet buffers + */ +void +mlxcx_dma_buf_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp) +{ + bzero(attrp, sizeof (*attrp)); + attrp->dma_attr_version = DMA_ATTR_V0; + + /* + * This is a 64-bit PCIe device. We can use the entire address space. + */ + attrp->dma_attr_addr_lo = 0x0; + attrp->dma_attr_addr_hi = UINT64_MAX; + + /* + * Each scatter pointer has a 32-bit length field. + */ + attrp->dma_attr_count_max = UINT32_MAX; + + /* + * The PRM gives us no alignment requirements for scatter pointers, + * but it implies that units < 16bytes are a bad idea. + */ + attrp->dma_attr_align = 16; + attrp->dma_attr_granular = 1; + + attrp->dma_attr_burstsizes = 0xfff; + + attrp->dma_attr_minxfer = 1; + attrp->dma_attr_maxxfer = UINT64_MAX; + + attrp->dma_attr_seg = UINT64_MAX; + + /* + * We choose how many scatter pointers we're allowed per packet when + * we set the recv queue stride. This macro is from mlxcx_reg.h where + * we fix that for all of our receive queues. + */ + attrp->dma_attr_sgllen = MLXCX_RECVQ_MAX_PTRS; + + if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) { + attrp->dma_attr_flags = DDI_DMA_FLAGERR; + } else { + attrp->dma_attr_flags = 0; + } +} + +/* + * DMA attributes for queue doorbells + */ +void +mlxcx_dma_qdbell_attr(mlxcx_t *mlxp, ddi_dma_attr_t *attrp) +{ + bzero(attrp, sizeof (*attrp)); + attrp->dma_attr_version = DMA_ATTR_V0; + + /* + * This is a 64-bit PCIe device. We can use the entire address space. + */ + attrp->dma_attr_addr_lo = 0x0; + attrp->dma_attr_addr_hi = UINT64_MAX; + + /* + * Queue doorbells are always exactly 16 bytes in length, but + * the ddi_dma functions don't like such small values of count_max. + * + * We tell some lies here. + */ + attrp->dma_attr_count_max = MLXCX_QUEUE_DMA_PAGE_SIZE - 1; + attrp->dma_attr_align = 8; + attrp->dma_attr_burstsizes = 0x8; + attrp->dma_attr_minxfer = 1; + attrp->dma_attr_maxxfer = UINT16_MAX; + attrp->dma_attr_seg = MLXCX_QUEUE_DMA_PAGE_SIZE - 1; + attrp->dma_attr_granular = 1; + attrp->dma_attr_sgllen = 1; + + if (DDI_FM_DMA_ERR_CAP(mlxp->mlx_fm_caps)) { + attrp->dma_attr_flags = DDI_DMA_FLAGERR; + } else { + attrp->dma_attr_flags = 0; + } +} + +void +mlxcx_dma_free(mlxcx_dma_buffer_t *mxdb) +{ + int ret; + + if (mxdb->mxdb_flags & MLXCX_DMABUF_BOUND) { + VERIFY(mxdb->mxdb_dma_handle != NULL); + ret = ddi_dma_unbind_handle(mxdb->mxdb_dma_handle); + VERIFY3S(ret, ==, DDI_SUCCESS); + mxdb->mxdb_flags &= ~MLXCX_DMABUF_BOUND; + mxdb->mxdb_ncookies = 0; + } + + if (mxdb->mxdb_flags & MLXCX_DMABUF_MEM_ALLOC) { + ddi_dma_mem_free(&mxdb->mxdb_acc_handle); + mxdb->mxdb_acc_handle = NULL; + mxdb->mxdb_va = NULL; + mxdb->mxdb_len = 0; + mxdb->mxdb_flags &= ~MLXCX_DMABUF_MEM_ALLOC; + } + + if (mxdb->mxdb_flags & MLXCX_DMABUF_FOREIGN) { + /* The mblk will be freed separately */ + mxdb->mxdb_va = NULL; + mxdb->mxdb_len = 0; + mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN; + } + + if (mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC) { + ddi_dma_free_handle(&mxdb->mxdb_dma_handle); + mxdb->mxdb_dma_handle = NULL; + mxdb->mxdb_flags &= ~MLXCX_DMABUF_HDL_ALLOC; + } + + ASSERT3U(mxdb->mxdb_flags, ==, 0); + ASSERT3P(mxdb->mxdb_dma_handle, ==, NULL); + ASSERT3P(mxdb->mxdb_va, ==, NULL); + ASSERT3U(mxdb->mxdb_len, ==, 0); + ASSERT3U(mxdb->mxdb_ncookies, ==, 0); +} + +void +mlxcx_dma_unbind(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb) +{ + int ret; + + ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC); + ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_BOUND); + + if (mxdb->mxdb_flags & MLXCX_DMABUF_FOREIGN) { + /* The mblk will be freed separately */ + mxdb->mxdb_va = NULL; + mxdb->mxdb_len = 0; + mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN; + } + + ret = ddi_dma_unbind_handle(mxdb->mxdb_dma_handle); + VERIFY3S(ret, ==, DDI_SUCCESS); + mxdb->mxdb_flags &= ~MLXCX_DMABUF_BOUND; + mxdb->mxdb_ncookies = 0; +} + +boolean_t +mlxcx_dma_init(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb, + ddi_dma_attr_t *attrp, boolean_t wait) +{ + int ret; + int (*memcb)(caddr_t); + + if (wait == B_TRUE) { + memcb = DDI_DMA_SLEEP; + } else { + memcb = DDI_DMA_DONTWAIT; + } + + ASSERT3S(mxdb->mxdb_flags, ==, 0); + + ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL, + &mxdb->mxdb_dma_handle); + if (ret != 0) { + mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret); + mxdb->mxdb_dma_handle = NULL; + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC; + + return (B_TRUE); +} + +boolean_t +mlxcx_dma_bind_mblk(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb, + const mblk_t *mp, size_t off, boolean_t wait) +{ + int ret; + uint_t flags = DDI_DMA_STREAMING; + int (*memcb)(caddr_t); + + if (wait == B_TRUE) { + memcb = DDI_DMA_SLEEP; + } else { + memcb = DDI_DMA_DONTWAIT; + } + + ASSERT(mxdb->mxdb_flags & MLXCX_DMABUF_HDL_ALLOC); + ASSERT0(mxdb->mxdb_flags & + (MLXCX_DMABUF_FOREIGN | MLXCX_DMABUF_MEM_ALLOC)); + ASSERT0(mxdb->mxdb_flags & MLXCX_DMABUF_BOUND); + + ASSERT3U(off, <=, MBLKL(mp)); + mxdb->mxdb_va = (caddr_t)(mp->b_rptr + off); + mxdb->mxdb_len = MBLKL(mp) - off; + mxdb->mxdb_flags |= MLXCX_DMABUF_FOREIGN; + + ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL, + mxdb->mxdb_va, mxdb->mxdb_len, DDI_DMA_WRITE | flags, memcb, NULL, + NULL, NULL); + if (ret != DDI_DMA_MAPPED) { + mxdb->mxdb_va = NULL; + mxdb->mxdb_len = 0; + mxdb->mxdb_flags &= ~MLXCX_DMABUF_FOREIGN; + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND; + mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle); + + return (B_TRUE); +} + +boolean_t +mlxcx_dma_alloc(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb, + ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero, + size_t size, boolean_t wait) +{ + int ret; + uint_t flags = DDI_DMA_CONSISTENT; + size_t len; + int (*memcb)(caddr_t); + + if (wait == B_TRUE) { + memcb = DDI_DMA_SLEEP; + } else { + memcb = DDI_DMA_DONTWAIT; + } + + ASSERT3U(mxdb->mxdb_flags, ==, 0); + + ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL, + &mxdb->mxdb_dma_handle); + if (ret != 0) { + mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret); + mxdb->mxdb_dma_handle = NULL; + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC; + + ret = ddi_dma_mem_alloc(mxdb->mxdb_dma_handle, size, accp, flags, memcb, + NULL, &mxdb->mxdb_va, &len, &mxdb->mxdb_acc_handle); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "!failed to allocate DMA memory: %d", ret); + mxdb->mxdb_va = NULL; + mxdb->mxdb_acc_handle = NULL; + mlxcx_dma_free(mxdb); + return (B_FALSE); + } + mxdb->mxdb_len = size; + mxdb->mxdb_flags |= MLXCX_DMABUF_MEM_ALLOC; + + if (zero == B_TRUE) + bzero(mxdb->mxdb_va, len); + + ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL, + mxdb->mxdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, NULL, + NULL); + if (ret != 0) { + mlxcx_warn(mlxp, "!failed to bind DMA memory: %d", ret); + mlxcx_dma_free(mxdb); + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND; + mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle); + + return (B_TRUE); +} + +boolean_t +mlxcx_dma_alloc_offset(mlxcx_t *mlxp, mlxcx_dma_buffer_t *mxdb, + ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero, + size_t size, size_t offset, boolean_t wait) +{ + int ret; + uint_t flags = DDI_DMA_STREAMING; + size_t len; + int (*memcb)(caddr_t); + + if (wait == B_TRUE) { + memcb = DDI_DMA_SLEEP; + } else { + memcb = DDI_DMA_DONTWAIT; + } + + ASSERT3U(mxdb->mxdb_flags, ==, 0); + + ret = ddi_dma_alloc_handle(mlxp->mlx_dip, attrp, memcb, NULL, + &mxdb->mxdb_dma_handle); + if (ret != 0) { + mlxcx_warn(mlxp, "!failed to allocate DMA handle: %d", ret); + mxdb->mxdb_dma_handle = NULL; + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_HDL_ALLOC; + + ret = ddi_dma_mem_alloc(mxdb->mxdb_dma_handle, size + offset, accp, + flags, memcb, NULL, &mxdb->mxdb_va, &len, &mxdb->mxdb_acc_handle); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "!failed to allocate DMA memory: %d", ret); + mxdb->mxdb_va = NULL; + mxdb->mxdb_acc_handle = NULL; + mlxcx_dma_free(mxdb); + return (B_FALSE); + } + + if (zero == B_TRUE) + bzero(mxdb->mxdb_va, len); + + mxdb->mxdb_va += offset; + len -= offset; + mxdb->mxdb_len = len; + mxdb->mxdb_flags |= MLXCX_DMABUF_MEM_ALLOC; + + ret = ddi_dma_addr_bind_handle(mxdb->mxdb_dma_handle, NULL, + mxdb->mxdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, NULL, + NULL); + if (ret != 0) { + mlxcx_warn(mlxp, "!failed to bind DMA memory: %d", ret); + mlxcx_dma_free(mxdb); + return (B_FALSE); + } + mxdb->mxdb_flags |= MLXCX_DMABUF_BOUND; + mxdb->mxdb_ncookies = ddi_dma_ncookies(mxdb->mxdb_dma_handle); + + return (B_TRUE); +} diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_endint.h b/usr/src/uts/common/io/mlxcx/mlxcx_endint.h new file mode 100644 index 0000000000..4ad69173c0 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_endint.h @@ -0,0 +1,305 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + */ + +#ifndef _MLXCX_ENDINT_H +#define _MLXCX_ENDINT_H + +#include <sys/types.h> +#include <sys/byteorder.h> + +/* + * The inlines and structs in this file are used by mlxcx to ensure endian + * safety when dealing with memory-mapped structures from the device, and + * also simpler use of 24-bit integers (which Mellanox loves). + * + * By declaring all of these values in the memory-mapped structures as structs + * (e.g. uint32be_t) rather than bare integers (uint32_t) we ensure that the + * compiler will not allow them to be silently converted to integers and used + * without doing the necessary byte-swapping work. + * + * The uintXbe_t structs are designed to be used inside a #pragma pack(1) + * context only and we don't try to fix up their alignment. + * + * Also present in here are a number of bitsX_t types which can be used to + * gain a little bit of type safety when dealing with endian-swapped bitfields. + */ + +#pragma pack(1) +typedef struct { uint16_t be_val; } uint16be_t; +typedef struct { uint8_t be_val[3]; } uint24be_t; +typedef struct { uint32_t be_val; } uint32be_t; +typedef struct { uint64_t be_val; } uint64be_t; +#pragma pack() + +static inline uint16_t +from_be16(uint16be_t v) +{ + return (BE_16(v.be_val)); +} + +static inline uint32_t +from_be24(uint24be_t v) +{ + return (((uint32_t)v.be_val[0] << 16) | + ((uint32_t)v.be_val[1] << 8) | + ((uint32_t)v.be_val[2])); +} + +static inline uint32_t +from_be32(uint32be_t v) +{ + return (BE_32(v.be_val)); +} + +static inline uint64_t +from_be64(uint64be_t v) +{ + return (BE_64(v.be_val)); +} + +static inline uint16be_t +to_be16(uint16_t v) +{ + /* CSTYLED */ + return ((uint16be_t){ .be_val = BE_16(v) }); +} + +static inline uint24be_t +to_be24(uint32_t v) +{ + /* CSTYLED */ + return ((uint24be_t){ .be_val = { + (v & 0xFF0000) >> 16, + (v & 0x00FF00) >> 8, + (v & 0x0000FF) + }}); +} + +static inline uint32be_t +to_be32(uint32_t v) +{ + /* CSTYLED */ + return ((uint32be_t){ .be_val = BE_32(v) }); +} + +static inline uint64be_t +to_be64(uint64_t v) +{ + /* CSTYLED */ + return ((uint64be_t){ .be_val = BE_64(v) }); +} + +#pragma pack(1) +typedef struct { uint8_t bit_val; } bits8_t; +typedef struct { uint16_t bit_val; } bits16_t; +typedef struct { uint32_t bit_val; } bits32_t; +typedef struct { uint24be_t bit_val; } bits24_t; +typedef struct { uint64_t bit_val; } bits64_t; +typedef struct { uint64_t bit_shift; uint64_t bit_mask; } bitdef_t; +#pragma pack() + +static inline uint8_t +get_bits8(bits8_t v, bitdef_t d) +{ + return ((v.bit_val & d.bit_mask) >> d.bit_shift); +} +static inline void +set_bits8(bits8_t *v, bitdef_t d, uint8_t val) +{ + v->bit_val &= ~d.bit_mask; + v->bit_val |= (val << d.bit_shift) & d.bit_mask; +} +static inline uint8_t +get_bit8(bits8_t v, uint8_t mask) +{ + return ((v.bit_val & mask) != 0); +} +static inline void +set_bit8(bits8_t *v, uint8_t mask) +{ + v->bit_val |= mask; +} +static inline void +clear_bit8(bits8_t *v, uint8_t mask) +{ + v->bit_val &= ~mask; +} +static inline bits8_t +new_bits8(void) +{ + /* CSTYLED */ + return ((bits8_t){ .bit_val = 0 }); +} +static inline uint8_t +from_bits8(bits8_t v) +{ + return (v.bit_val); +} + +static inline uint16_t +get_bits16(bits16_t v, bitdef_t d) +{ + return ((BE_16(v.bit_val) & d.bit_mask) >> d.bit_shift); +} +static inline void +set_bits16(bits16_t *v, bitdef_t d, uint16_t val) +{ + v->bit_val &= BE_16(~d.bit_mask); + v->bit_val |= BE_16((val << d.bit_shift) & d.bit_mask); +} +static inline uint16_t +get_bit16(bits16_t v, uint16_t mask) +{ + return ((BE_16(v.bit_val) & mask) != 0); +} +static inline void +set_bit16(bits16_t *v, uint16_t mask) +{ + v->bit_val |= BE_16(mask); +} +static inline void +clear_bit16(bits16_t *v, uint16_t mask) +{ + v->bit_val &= BE_16(~mask); +} +static inline bits16_t +new_bits16(void) +{ + /* CSTYLED */ + return ((bits16_t){ .bit_val = 0 }); +} +static inline uint16_t +from_bits16(bits16_t v) +{ + return (BE_16(v.bit_val)); +} + +static inline uint32_t +get_bits32(bits32_t v, bitdef_t d) +{ + return ((BE_32(v.bit_val) & d.bit_mask) >> d.bit_shift); +} +static inline void +set_bits32(bits32_t *v, bitdef_t d, uint32_t val) +{ + v->bit_val &= BE_32(~d.bit_mask); + v->bit_val |= BE_32((val << d.bit_shift) & d.bit_mask); +} +static inline uint32_t +get_bit32(bits32_t v, uint32_t mask) +{ + return ((BE_32(v.bit_val) & mask) != 0); +} +static inline void +set_bit32(bits32_t *v, uint32_t mask) +{ + v->bit_val |= BE_32(mask); +} +static inline void +clear_bit32(bits32_t *v, uint32_t mask) +{ + v->bit_val &= BE_32(~mask); +} +static inline bits32_t +new_bits32(void) +{ + /* CSTYLED */ + return ((bits32_t){ .bit_val = 0 }); +} +static inline uint32_t +from_bits32(bits32_t v) +{ + return (BE_32(v.bit_val)); +} + +static inline uint32_t +get_bits24(bits24_t v, bitdef_t d) +{ + return ((from_be24(v.bit_val) & d.bit_mask) >> d.bit_shift); +} +static inline void +set_bits24(bits24_t *v, bitdef_t d, uint32_t val) +{ + uint32_t vv = from_be24(v->bit_val); + vv &= ~d.bit_mask; + vv |= (val << d.bit_shift) & d.bit_mask; + v->bit_val = to_be24(vv); +} +static inline uint32_t +get_bit24(bits24_t v, uint32_t mask) +{ + return ((from_be24(v.bit_val) & mask) != 0); +} +static inline void +set_bit24(bits24_t *v, uint32_t mask) +{ + v->bit_val = to_be24(from_be24(v->bit_val) | mask); +} +static inline void +clear_bit24(bits24_t *v, uint32_t mask) +{ + v->bit_val = to_be24(from_be24(v->bit_val) & ~mask); +} +static inline bits24_t +new_bits24(void) +{ + /* CSTYLED */ + return ((bits24_t){ .bit_val = to_be24(0) }); +} +static inline uint32_t +from_bits24(bits24_t v) +{ + return (from_be24(v.bit_val)); +} + +static inline uint64_t +get_bits64(bits64_t v, bitdef_t d) +{ + return ((BE_64(v.bit_val) & d.bit_mask) >> d.bit_shift); +} +static inline void +set_bits64(bits64_t *v, bitdef_t d, uint64_t val) +{ + v->bit_val &= BE_64(~d.bit_mask); + v->bit_val |= BE_64((val << d.bit_shift) & d.bit_mask); +} +static inline uint64_t +get_bit64(bits64_t v, uint64_t mask) +{ + return ((BE_64(v.bit_val) & mask) != 0); +} +static inline void +set_bit64(bits64_t *v, uint64_t mask) +{ + v->bit_val |= BE_64(mask); +} +static inline void +clear_bit64(bits64_t *v, uint64_t mask) +{ + v->bit_val &= BE_64(~mask); +} +static inline bits64_t +new_bits64(void) +{ + /* CSTYLED */ + return ((bits64_t){ .bit_val = 0 }); +} +static inline uint64_t +from_bits64(bits64_t v) +{ + return (BE_64(v.bit_val)); +} + +#endif /* _MLXCX_ENDINT_H */ diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c new file mode 100644 index 0000000000..871c4f30b3 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c @@ -0,0 +1,1254 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2020, the University of Queensland + */ + +/* + * Mellanox Connect-X 4/5/6 driver. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/sysmacros.h> +#include <sys/vlan.h> + +#include <sys/pattr.h> +#include <sys/dlpi.h> + +#include <sys/mac_provider.h> + +/* Need these for mac_vlan_header_info() */ +#include <sys/mac_client.h> +#include <sys/mac_client_priv.h> + +#include <mlxcx.h> + +static char *mlxcx_priv_props[] = { + NULL +}; + +#define MBITS 1000000ULL +#define GBITS (1000ULL * MBITS) + +static uint64_t +mlxcx_speed_to_bits(mlxcx_eth_proto_t v) +{ + switch (v) { + case MLXCX_PROTO_SGMII_100BASE: + return (100ULL * MBITS); + case MLXCX_PROTO_SGMII: + case MLXCX_PROTO_1000BASE_KX: + return (1000ULL * MBITS); + case MLXCX_PROTO_10GBASE_CX4: + case MLXCX_PROTO_10GBASE_KX4: + case MLXCX_PROTO_10GBASE_KR: + case MLXCX_PROTO_10GBASE_CR: + case MLXCX_PROTO_10GBASE_SR: + case MLXCX_PROTO_10GBASE_ER_LR: + return (10ULL * GBITS); + case MLXCX_PROTO_40GBASE_CR4: + case MLXCX_PROTO_40GBASE_KR4: + case MLXCX_PROTO_40GBASE_SR4: + case MLXCX_PROTO_40GBASE_LR4_ER4: + return (40ULL * GBITS); + case MLXCX_PROTO_25GBASE_CR: + case MLXCX_PROTO_25GBASE_KR: + case MLXCX_PROTO_25GBASE_SR: + return (25ULL * GBITS); + case MLXCX_PROTO_50GBASE_SR2: + case MLXCX_PROTO_50GBASE_CR2: + case MLXCX_PROTO_50GBASE_KR2: + return (50ULL * GBITS); + case MLXCX_PROTO_100GBASE_CR4: + case MLXCX_PROTO_100GBASE_SR4: + case MLXCX_PROTO_100GBASE_KR4: + return (100ULL * GBITS); + default: + return (0); + } +} + +static int +mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, + uint64_t *val) +{ + int ret = 0; + boolean_t ok; + mlxcx_register_data_t data; + mlxcx_ppcnt_rfc_2863_t *st; + + ASSERT(mutex_owned(&port->mlp_mtx)); + + bzero(&data, sizeof (data)); + data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; + data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_RFC_2863; + data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; + + ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PPCNT, &data); + if (!ok) + return (EIO); + st = &data.mlrd_ppcnt.mlrd_ppcnt_rfc_2863; + + switch (stat) { + case MAC_STAT_RBYTES: + *val = from_be64(st->mlppc_rfc_2863_in_octets); + break; + case MAC_STAT_MULTIRCV: + *val = from_be64(st->mlppc_rfc_2863_in_mcast_pkts); + break; + case MAC_STAT_BRDCSTRCV: + *val = from_be64(st->mlppc_rfc_2863_in_bcast_pkts); + break; + case MAC_STAT_MULTIXMT: + *val = from_be64(st->mlppc_rfc_2863_out_mcast_pkts); + break; + case MAC_STAT_BRDCSTXMT: + *val = from_be64(st->mlppc_rfc_2863_out_bcast_pkts); + break; + case MAC_STAT_IERRORS: + *val = from_be64(st->mlppc_rfc_2863_in_errors); + break; + case MAC_STAT_UNKNOWNS: + *val = from_be64(st->mlppc_rfc_2863_in_unknown_protos); + break; + case MAC_STAT_OERRORS: + *val = from_be64(st->mlppc_rfc_2863_out_errors); + break; + case MAC_STAT_OBYTES: + *val = from_be64(st->mlppc_rfc_2863_out_octets); + break; + default: + ret = ENOTSUP; + } + + return (ret); +} + +static int +mlxcx_mac_stat_ieee_802_3(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, + uint64_t *val) +{ + int ret = 0; + boolean_t ok; + mlxcx_register_data_t data; + mlxcx_ppcnt_ieee_802_3_t *st; + + ASSERT(mutex_owned(&port->mlp_mtx)); + + bzero(&data, sizeof (data)); + data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; + data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_IEEE_802_3; + data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; + + ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_PPCNT, &data); + if (!ok) + return (EIO); + st = &data.mlrd_ppcnt.mlrd_ppcnt_ieee_802_3; + + switch (stat) { + case MAC_STAT_IPACKETS: + *val = from_be64(st->mlppc_ieee_802_3_frames_rx); + break; + case MAC_STAT_OPACKETS: + *val = from_be64(st->mlppc_ieee_802_3_frames_tx); + break; + case ETHER_STAT_ALIGN_ERRORS: + *val = from_be64(st->mlppc_ieee_802_3_align_err); + break; + case ETHER_STAT_FCS_ERRORS: + *val = from_be64(st->mlppc_ieee_802_3_fcs_err); + break; + case ETHER_STAT_TOOLONG_ERRORS: + *val = from_be64(st->mlppc_ieee_802_3_frame_too_long_err); + break; + default: + ret = ENOTSUP; + } + + return (ret); +} + +static int +mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + int ret = 0; + + mutex_enter(&port->mlp_mtx); + + switch (stat) { + case MAC_STAT_IFSPEED: + *val = mlxcx_speed_to_bits(port->mlp_oper_proto); + break; + case ETHER_STAT_LINK_DUPLEX: + *val = LINK_DUPLEX_FULL; + break; + case MAC_STAT_RBYTES: + case MAC_STAT_MULTIRCV: + case MAC_STAT_BRDCSTRCV: + case MAC_STAT_MULTIXMT: + case MAC_STAT_BRDCSTXMT: + case MAC_STAT_IERRORS: + case MAC_STAT_UNKNOWNS: + case MAC_STAT_OERRORS: + case MAC_STAT_OBYTES: + ret = mlxcx_mac_stat_rfc_2863(mlxp, port, stat, val); + break; + case MAC_STAT_IPACKETS: + case MAC_STAT_OPACKETS: + case ETHER_STAT_ALIGN_ERRORS: + case ETHER_STAT_FCS_ERRORS: + case ETHER_STAT_TOOLONG_ERRORS: + ret = mlxcx_mac_stat_ieee_802_3(mlxp, port, stat, val); + break; + case MAC_STAT_NORCVBUF: + *val = port->mlp_stats.mlps_rx_drops; + break; + default: + ret = ENOTSUP; + } + + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +static int +mlxcx_mac_led_set(void *arg, mac_led_mode_t mode, uint_t flags) +{ + mlxcx_t *mlxp = arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + int ret = 0; + + if (flags != 0) { + return (EINVAL); + } + + mutex_enter(&port->mlp_mtx); + + switch (mode) { + case MAC_LED_DEFAULT: + case MAC_LED_OFF: + if (!mlxcx_cmd_set_port_led(mlxp, port, 0)) { + ret = EIO; + break; + } + break; + case MAC_LED_IDENT: + if (!mlxcx_cmd_set_port_led(mlxp, port, UINT16_MAX)) { + ret = EIO; + break; + } + break; + default: + ret = ENOTSUP; + } + + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +static int +mlxcx_mac_txr_info(void *arg, uint_t id, mac_transceiver_info_t *infop) +{ + mlxcx_t *mlxp = arg; + mlxcx_module_status_t st; + + if (!mlxcx_cmd_query_module_status(mlxp, id, &st, NULL)) + return (EIO); + + if (st != MLXCX_MODULE_UNPLUGGED) + mac_transceiver_info_set_present(infop, B_TRUE); + + if (st == MLXCX_MODULE_PLUGGED) + mac_transceiver_info_set_usable(infop, B_TRUE); + + return (0); +} + +static int +mlxcx_mac_txr_read(void *arg, uint_t id, uint_t page, void *vbuf, + size_t nbytes, off_t offset, size_t *nread) +{ + mlxcx_t *mlxp = arg; + mlxcx_register_data_t data; + uint8_t *buf = vbuf; + boolean_t ok; + size_t take, done = 0; + uint8_t i2c_addr; + + if (id != 0 || vbuf == NULL || nbytes == 0 || nread == NULL) + return (EINVAL); + + if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256)) + return (EINVAL); + + /* + * The PRM is really not very clear about any of this, but it seems + * that the i2c_device_addr field in MCIA is the SFP+ spec "page" + * number shifted right by 1 bit. They're written in the SFF spec + * like "1010000X" so Mellanox just dropped the X. + * + * This means that if we want page 0xA0, we put 0x50 in the + * i2c_device_addr field. + * + * The "page_number" field in MCIA means something else. Don't ask me + * what. FreeBSD leaves it as zero, so we will too! + */ + i2c_addr = page >> 1; + + while (done < nbytes) { + take = nbytes - done; + if (take > sizeof (data.mlrd_mcia.mlrd_mcia_data)) + take = sizeof (data.mlrd_mcia.mlrd_mcia_data); + + bzero(&data, sizeof (data)); + ASSERT3U(id, <=, 0xff); + data.mlrd_mcia.mlrd_mcia_module = (uint8_t)id; + data.mlrd_mcia.mlrd_mcia_i2c_device_addr = i2c_addr; + data.mlrd_mcia.mlrd_mcia_device_addr = to_be16(offset); + data.mlrd_mcia.mlrd_mcia_size = to_be16(take); + + ok = mlxcx_cmd_access_register(mlxp, + MLXCX_CMD_ACCESS_REGISTER_READ, MLXCX_REG_MCIA, &data); + if (!ok) { + *nread = 0; + return (EIO); + } + + if (data.mlrd_mcia.mlrd_mcia_status != MLXCX_MCIA_STATUS_OK) { + *nread = 0; + return (EIO); + } + + bcopy(data.mlrd_mcia.mlrd_mcia_data, &buf[done], take); + + done += take; + offset += take; + } + *nread = done; + return (0); +} + +static int +mlxcx_mac_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) +{ + mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; + (void) wq; + + /* + * We should add support for using hw flow counters and such to + * get per-ring statistics. Not done yet though! + */ + + switch (stat) { + default: + *val = 0; + return (ENOTSUP); + } + + return (0); +} + +static int +mlxcx_mac_start(void *arg) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + (void) mlxp; + return (0); +} + +static void +mlxcx_mac_stop(void *arg) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + (void) mlxp; +} + +static mblk_t * +mlxcx_mac_ring_tx(void *arg, mblk_t *mp) +{ + mlxcx_work_queue_t *sq = (mlxcx_work_queue_t *)arg; + mlxcx_t *mlxp = sq->mlwq_mlx; + mlxcx_completion_queue_t *cq; + mlxcx_buffer_t *b; + mac_header_info_t mhi; + mblk_t *kmp, *nmp; + uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN]; + size_t inline_hdrlen, rem, off; + uint32_t chkflags = 0; + boolean_t ok; + size_t take = 0; + + VERIFY(mp->b_next == NULL); + + mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags); + + if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) { + /* + * We got given a frame without a valid L2 header on it. We + * can't really transmit that (mlx parts don't like it), so + * we will just drop it on the floor. + */ + freemsg(mp); + return (NULL); + } + + inline_hdrlen = rem = mhi.mhi_hdrsize; + + kmp = mp; + off = 0; + while (rem > 0) { + const ptrdiff_t sz = MBLKL(kmp); + ASSERT3S(sz, >=, 0); + ASSERT3U(sz, <=, SIZE_MAX); + take = sz; + if (take > rem) + take = rem; + bcopy(kmp->b_rptr, inline_hdrs + off, take); + rem -= take; + off += take; + if (take == sz) { + take = 0; + kmp = kmp->b_cont; + } + } + + if (!mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b)) { + /* + * Something went really wrong, and we probably will never be + * able to TX again (all our buffers are broken and DMA is + * failing). Drop the packet on the floor -- FMA should be + * reporting this error elsewhere. + */ + freemsg(mp); + return (NULL); + } + + mutex_enter(&sq->mlwq_mtx); + VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2); + cq = sq->mlwq_cq; + + /* + * state is a single int, so read-only access without the CQ lock + * should be fine. + */ + if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) { + mutex_exit(&sq->mlwq_mtx); + mlxcx_buf_return_chain(mlxp, b, B_FALSE); + return (NULL); + } + + if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) { + mutex_exit(&sq->mlwq_mtx); + mlxcx_buf_return_chain(mlxp, b, B_FALSE); + return (NULL); + } + + /* + * Similar logic here: bufcnt is only manipulated atomically, and + * bufhwm is set at startup. + */ + if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm) { + atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); + mutex_exit(&sq->mlwq_mtx); + mlxcx_buf_return_chain(mlxp, b, B_TRUE); + return (mp); + } + + ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen, + chkflags, b); + if (!ok) { + atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); + mutex_exit(&sq->mlwq_mtx); + mlxcx_buf_return_chain(mlxp, b, B_TRUE); + return (mp); + } + + /* + * Now that we've successfully enqueued the rest of the packet, + * free any mblks that we cut off while inlining headers. + */ + for (; mp != kmp; mp = nmp) { + nmp = mp->b_cont; + freeb(mp); + } + + mutex_exit(&sq->mlwq_mtx); + + return (NULL); +} + +static int +mlxcx_mac_setpromisc(void *arg, boolean_t on) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe; + mlxcx_flow_table_t *ft; + mlxcx_ring_group_t *g; + int ret = 0; + uint_t idx; + + mutex_enter(&port->mlp_mtx); + + /* + * First, do the top-level flow entry on the root flow table for + * the port. This catches all traffic that doesn't match any MAC + * MAC filters. + */ + ft = port->mlp_rx_flow; + mutex_enter(&ft->mlft_mtx); + fg = port->mlp_promisc; + fe = list_head(&fg->mlfg_entries); + if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + ret = EIO; + } + } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { + if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { + ret = EIO; + } + } + mutex_exit(&ft->mlft_mtx); + + /* + * If we failed to change the top-level entry, don't bother with + * trying the per-group ones. + */ + if (ret != 0) { + mutex_exit(&port->mlp_mtx); + return (ret); + } + + /* + * Then, do the per-rx-group flow entries which catch traffic that + * matched a MAC filter but failed to match a VLAN filter. + */ + for (idx = 0; idx < mlxp->mlx_rx_ngroups; ++idx) { + g = &mlxp->mlx_rx_groups[idx]; + + mutex_enter(&g->mlg_mtx); + + ft = g->mlg_rx_vlan_ft; + mutex_enter(&ft->mlft_mtx); + + fg = g->mlg_rx_vlan_promisc_fg; + fe = list_head(&fg->mlfg_entries); + if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + ret = EIO; + } + } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { + if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { + ret = EIO; + } + } + + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + } + + mutex_exit(&port->mlp_mtx); + return (ret); +} + +static int +mlxcx_mac_multicast(void *arg, boolean_t add, const uint8_t *addr) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + mlxcx_ring_group_t *g = &mlxp->mlx_rx_groups[0]; + int ret = 0; + + mutex_enter(&port->mlp_mtx); + mutex_enter(&g->mlg_mtx); + if (add) { + if (!mlxcx_add_umcast_entry(mlxp, port, g, addr)) { + ret = EIO; + } + } else { + if (!mlxcx_remove_umcast_entry(mlxp, port, g, addr)) { + ret = EIO; + } + } + mutex_exit(&g->mlg_mtx); + mutex_exit(&port->mlp_mtx); + return (ret); +} + +static int +mlxcx_group_add_mac(void *arg, const uint8_t *mac_addr) +{ + mlxcx_ring_group_t *g = arg; + mlxcx_t *mlxp = g->mlg_mlx; + mlxcx_port_t *port = g->mlg_port; + int ret = 0; + + mutex_enter(&port->mlp_mtx); + mutex_enter(&g->mlg_mtx); + if (!mlxcx_add_umcast_entry(mlxp, port, g, mac_addr)) { + ret = EIO; + } + mutex_exit(&g->mlg_mtx); + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +/* + * Support for VLAN steering into groups is not yet available in upstream + * illumos. + */ +#if defined(MAC_VLAN_UNTAGGED) + +static int +mlxcx_group_add_vlan(mac_group_driver_t gh, uint16_t vid) +{ + mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; + mlxcx_t *mlxp = g->mlg_mlx; + int ret = 0; + boolean_t tagged = B_TRUE; + + if (vid == MAC_VLAN_UNTAGGED) { + vid = 0; + tagged = B_FALSE; + } + + mutex_enter(&g->mlg_mtx); + if (!mlxcx_add_vlan_entry(mlxp, g, tagged, vid)) { + ret = EIO; + } + mutex_exit(&g->mlg_mtx); + + return (ret); +} + +static int +mlxcx_group_remove_vlan(mac_group_driver_t gh, uint16_t vid) +{ + mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; + mlxcx_t *mlxp = g->mlg_mlx; + int ret = 0; + boolean_t tagged = B_TRUE; + + if (vid == MAC_VLAN_UNTAGGED) { + vid = 0; + tagged = B_FALSE; + } + + mutex_enter(&g->mlg_mtx); + if (!mlxcx_remove_vlan_entry(mlxp, g, tagged, vid)) { + ret = EIO; + } + mutex_exit(&g->mlg_mtx); + + return (ret); +} + +#endif /* MAC_VLAN_UNTAGGED */ + +static int +mlxcx_group_remove_mac(void *arg, const uint8_t *mac_addr) +{ + mlxcx_ring_group_t *g = arg; + mlxcx_t *mlxp = g->mlg_mlx; + mlxcx_port_t *port = g->mlg_port; + int ret = 0; + + mutex_enter(&port->mlp_mtx); + mutex_enter(&g->mlg_mtx); + if (!mlxcx_remove_umcast_entry(mlxp, port, g, mac_addr)) { + ret = EIO; + } + mutex_exit(&g->mlg_mtx); + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +static int +mlxcx_mac_ring_start(mac_ring_driver_t rh, uint64_t gen_num) +{ + mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; + mlxcx_completion_queue_t *cq = wq->mlwq_cq; + mlxcx_ring_group_t *g = wq->mlwq_group; + mlxcx_t *mlxp = wq->mlwq_mlx; + + ASSERT(cq != NULL); + ASSERT(g != NULL); + + ASSERT(wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ || + wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ); + if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && + !mlxcx_tx_ring_start(mlxp, g, wq)) + return (EIO); + if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && + !mlxcx_rx_ring_start(mlxp, g, wq)) + return (EIO); + + mutex_enter(&cq->mlcq_mtx); + cq->mlcq_mac_gen = gen_num; + mutex_exit(&cq->mlcq_mtx); + + return (0); +} + +static void +mlxcx_mac_ring_stop(mac_ring_driver_t rh) +{ + mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; + mlxcx_completion_queue_t *cq = wq->mlwq_cq; + mlxcx_t *mlxp = wq->mlwq_mlx; + mlxcx_buf_shard_t *s; + mlxcx_buffer_t *buf; + + mutex_enter(&cq->mlcq_mtx); + mutex_enter(&wq->mlwq_mtx); + if (wq->mlwq_state & MLXCX_WQ_STARTED) { + if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && + !mlxcx_cmd_stop_rq(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + return; + } + if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && + !mlxcx_cmd_stop_sq(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + return; + } + } + ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED); + + if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { + /* Return any outstanding buffers to the free pool. */ + while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + } + mutex_enter(&cq->mlcq_bufbmtx); + while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) { + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + } + mutex_exit(&cq->mlcq_bufbmtx); + cq->mlcq_bufcnt = 0; + + s = wq->mlwq_bufs; + mutex_enter(&s->mlbs_mtx); + while (!list_is_empty(&s->mlbs_busy)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + while ((buf = list_head(&s->mlbs_free)) != NULL) { + mlxcx_buf_destroy(mlxp, buf); + } + mutex_exit(&s->mlbs_mtx); + + s = wq->mlwq_foreign_bufs; + if (s != NULL) { + mutex_enter(&s->mlbs_mtx); + while (!list_is_empty(&s->mlbs_busy)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + while ((buf = list_head(&s->mlbs_free)) != NULL) { + mlxcx_buf_destroy(mlxp, buf); + } + mutex_exit(&s->mlbs_mtx); + } + + wq->mlwq_state &= ~MLXCX_WQ_BUFFERS; + } + ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS); + + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); +} + +static int +mlxcx_mac_group_start(mac_group_driver_t gh) +{ + mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; + mlxcx_t *mlxp = g->mlg_mlx; + + VERIFY3S(g->mlg_type, ==, MLXCX_GROUP_RX); + ASSERT(mlxp != NULL); + + if (g->mlg_state & MLXCX_GROUP_RUNNING) + return (0); + + if (!mlxcx_rx_group_start(mlxp, g)) + return (EIO); + + return (0); +} + +static void +mlxcx_mac_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index, + const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_ring_group_t *g; + mlxcx_work_queue_t *wq; + mac_intr_t *mintr = &infop->mri_intr; + + if (rtype != MAC_RING_TYPE_TX) + return; + ASSERT3S(group_index, ==, -1); + + g = &mlxp->mlx_tx_groups[0]; + ASSERT(g->mlg_state & MLXCX_GROUP_INIT); + mutex_enter(&g->mlg_mtx); + + ASSERT3S(ring_index, >=, 0); + ASSERT3S(ring_index, <, g->mlg_nwqs); + + wq = &g->mlg_wqs[ring_index]; + + wq->mlwq_cq->mlcq_mac_hdl = rh; + + infop->mri_driver = (mac_ring_driver_t)wq; + infop->mri_start = mlxcx_mac_ring_start; + infop->mri_stop = mlxcx_mac_ring_stop; + infop->mri_tx = mlxcx_mac_ring_tx; + infop->mri_stat = mlxcx_mac_ring_stat; + + mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ + wq->mlwq_cq->mlcq_eq->mleq_intr_index]; + + mutex_exit(&g->mlg_mtx); +} + +static int +mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh) +{ + mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; + mlxcx_event_queue_t *eq = cq->mlcq_eq; + mlxcx_t *mlxp = cq->mlcq_mlx; + + /* + * We are going to call mlxcx_arm_cq() here, so we take the EQ lock + * as well as the CQ one to make sure we don't race against + * mlxcx_intr_n(). + */ + mutex_enter(&eq->mleq_mtx); + mutex_enter(&cq->mlcq_mtx); + if (cq->mlcq_state & MLXCX_CQ_POLLING) { + cq->mlcq_state &= ~MLXCX_CQ_POLLING; + if (!(cq->mlcq_state & MLXCX_CQ_ARMED)) + mlxcx_arm_cq(mlxp, cq); + } + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&eq->mleq_mtx); + + return (0); +} + +static int +mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh) +{ + mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; + + atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_POLLING); + mutex_enter(&cq->mlcq_mtx); + VERIFY(cq->mlcq_state & MLXCX_CQ_POLLING); + mutex_exit(&cq->mlcq_mtx); + + return (0); +} + +static mblk_t * +mlxcx_mac_ring_rx_poll(void *arg, int poll_bytes) +{ + mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)arg; + mlxcx_completion_queue_t *cq = wq->mlwq_cq; + mlxcx_t *mlxp = wq->mlwq_mlx; + mblk_t *mp; + + ASSERT(cq != NULL); + ASSERT3S(poll_bytes, >, 0); + if (poll_bytes == 0) + return (NULL); + + mutex_enter(&cq->mlcq_mtx); + mp = mlxcx_rx_poll(mlxp, cq, poll_bytes); + mutex_exit(&cq->mlcq_mtx); + + return (mp); +} + +static void +mlxcx_mac_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index, + const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_ring_group_t *g; + mlxcx_work_queue_t *wq; + mac_intr_t *mintr = &infop->mri_intr; + + if (rtype != MAC_RING_TYPE_RX) + return; + ASSERT3S(group_index, >=, 0); + ASSERT3S(group_index, <, mlxp->mlx_rx_ngroups); + + g = &mlxp->mlx_rx_groups[group_index]; + ASSERT(g->mlg_state & MLXCX_GROUP_INIT); + mutex_enter(&g->mlg_mtx); + + ASSERT3S(ring_index, >=, 0); + ASSERT3S(ring_index, <, g->mlg_nwqs); + + ASSERT(g->mlg_state & MLXCX_GROUP_WQS); + wq = &g->mlg_wqs[ring_index]; + + wq->mlwq_cq->mlcq_mac_hdl = rh; + + infop->mri_driver = (mac_ring_driver_t)wq; + infop->mri_start = mlxcx_mac_ring_start; + infop->mri_stop = mlxcx_mac_ring_stop; + infop->mri_poll = mlxcx_mac_ring_rx_poll; + infop->mri_stat = mlxcx_mac_ring_stat; + + mintr->mi_handle = (mac_intr_handle_t)wq->mlwq_cq; + mintr->mi_enable = mlxcx_mac_ring_intr_enable; + mintr->mi_disable = mlxcx_mac_ring_intr_disable; + + mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ + wq->mlwq_cq->mlcq_eq->mleq_intr_index]; + + mutex_exit(&g->mlg_mtx); +} + +static void +mlxcx_mac_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index, + mac_group_info_t *infop, mac_group_handle_t gh) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_ring_group_t *g; + + if (rtype != MAC_RING_TYPE_RX) + return; + + ASSERT3S(index, >=, 0); + ASSERT3S(index, <, mlxp->mlx_rx_ngroups); + g = &mlxp->mlx_rx_groups[index]; + ASSERT(g->mlg_state & MLXCX_GROUP_INIT); + + g->mlg_mac_hdl = gh; + + infop->mgi_driver = (mac_group_driver_t)g; + infop->mgi_start = mlxcx_mac_group_start; + infop->mgi_stop = NULL; + infop->mgi_addmac = mlxcx_group_add_mac; + infop->mgi_remmac = mlxcx_group_remove_mac; +#if defined(MAC_VLAN_UNTAGGED) + infop->mgi_addvlan = mlxcx_group_add_vlan; + infop->mgi_remvlan = mlxcx_group_remove_vlan; +#endif /* MAC_VLAN_UNTAGGED */ + + infop->mgi_count = g->mlg_nwqs; +} + +static boolean_t +mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mac_capab_rings_t *cap_rings; + mac_capab_led_t *cap_leds; + mac_capab_transceiver_t *cap_txr; + uint_t i, n = 0; + + switch (cap) { + + case MAC_CAPAB_RINGS: + cap_rings = cap_data; + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + switch (cap_rings->mr_type) { + case MAC_RING_TYPE_TX: + cap_rings->mr_gnum = 0; + cap_rings->mr_rnum = mlxp->mlx_tx_groups[0].mlg_nwqs; + cap_rings->mr_rget = mlxcx_mac_fill_tx_ring; + cap_rings->mr_gget = NULL; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + break; + case MAC_RING_TYPE_RX: + cap_rings->mr_gnum = mlxp->mlx_rx_ngroups; + for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) + n += mlxp->mlx_rx_groups[i].mlg_nwqs; + cap_rings->mr_rnum = n; + cap_rings->mr_rget = mlxcx_mac_fill_rx_ring; + cap_rings->mr_gget = mlxcx_mac_fill_rx_group; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + break; + default: + return (B_FALSE); + } + break; + + case MAC_CAPAB_HCKSUM: + if (mlxp->mlx_caps->mlc_checksum) { + *(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 | + HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM; + } + break; + + case MAC_CAPAB_LED: + cap_leds = cap_data; + + cap_leds->mcl_flags = 0; + cap_leds->mcl_modes = MAC_LED_DEFAULT | MAC_LED_OFF | + MAC_LED_IDENT; + cap_leds->mcl_set = mlxcx_mac_led_set; + break; + + case MAC_CAPAB_TRANSCEIVER: + cap_txr = cap_data; + + cap_txr->mct_flags = 0; + cap_txr->mct_ntransceivers = 1; + cap_txr->mct_info = mlxcx_mac_txr_info; + cap_txr->mct_read = mlxcx_mac_txr_read; + break; + + default: + return (B_FALSE); + } + + return (B_TRUE); +} + +static void +mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, + mac_prop_info_handle_t prh) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + + mutex_enter(&port->mlp_mtx); + + switch (pr_num) { + case MAC_PROP_DUPLEX: + case MAC_PROP_SPEED: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + break; + case MAC_PROP_MTU: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); + mac_prop_info_set_range_uint32(prh, MLXCX_MTU_OFFSET, + port->mlp_max_mtu); + mac_prop_info_set_default_uint32(prh, + port->mlp_mtu - MLXCX_MTU_OFFSET); + break; + case MAC_PROP_AUTONEG: + mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); + mac_prop_info_set_default_uint8(prh, 1); + break; + default: + break; + } + + mutex_exit(&port->mlp_mtx); +} + +static int +mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, + uint_t pr_valsize, const void *pr_val) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + int ret = 0; + uint32_t new_mtu, new_hw_mtu, old_mtu; + mlxcx_buf_shard_t *sh; + boolean_t allocd = B_FALSE; + + mutex_enter(&port->mlp_mtx); + + switch (pr_num) { + case MAC_PROP_MTU: + bcopy(pr_val, &new_mtu, sizeof (new_mtu)); + new_hw_mtu = new_mtu + MLXCX_MTU_OFFSET; + if (new_hw_mtu == port->mlp_mtu) + break; + if (new_hw_mtu > port->mlp_max_mtu) { + ret = EINVAL; + break; + } + sh = list_head(&mlxp->mlx_buf_shards); + for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) { + mutex_enter(&sh->mlbs_mtx); + if (!list_is_empty(&sh->mlbs_free) || + !list_is_empty(&sh->mlbs_busy)) { + allocd = B_TRUE; + mutex_exit(&sh->mlbs_mtx); + break; + } + mutex_exit(&sh->mlbs_mtx); + } + if (allocd) { + ret = EBUSY; + break; + } + old_mtu = port->mlp_mtu; + ret = mac_maxsdu_update(mlxp->mlx_mac_hdl, new_mtu); + if (ret != 0) + break; + port->mlp_mtu = new_hw_mtu; + if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, port, + MLXCX_MODIFY_NIC_VPORT_CTX_MTU)) { + port->mlp_mtu = old_mtu; + (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); + ret = EIO; + break; + } + if (!mlxcx_cmd_set_port_mtu(mlxp, port)) { + port->mlp_mtu = old_mtu; + (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); + ret = EIO; + break; + } + break; + default: + ret = ENOTSUP; + break; + } + + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +static int +mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, + uint_t pr_valsize, void *pr_val) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_port_t *port = &mlxp->mlx_ports[0]; + uint64_t speed; + int ret = 0; + + mutex_enter(&port->mlp_mtx); + + switch (pr_num) { + case MAC_PROP_DUPLEX: + if (pr_valsize < sizeof (link_duplex_t)) { + ret = EOVERFLOW; + break; + } + /* connectx parts only support full duplex */ + *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL; + break; + case MAC_PROP_SPEED: + if (pr_valsize < sizeof (uint64_t)) { + ret = EOVERFLOW; + break; + } + speed = mlxcx_speed_to_bits(port->mlp_oper_proto); + bcopy(&speed, pr_val, sizeof (speed)); + break; + case MAC_PROP_STATUS: + if (pr_valsize < sizeof (link_state_t)) { + ret = EOVERFLOW; + break; + } + switch (port->mlp_oper_status) { + case MLXCX_PORT_STATUS_UP: + case MLXCX_PORT_STATUS_UP_ONCE: + *(link_state_t *)pr_val = LINK_STATE_UP; + break; + case MLXCX_PORT_STATUS_DOWN: + *(link_state_t *)pr_val = LINK_STATE_DOWN; + break; + default: + *(link_state_t *)pr_val = LINK_STATE_UNKNOWN; + } + break; + case MAC_PROP_AUTONEG: + if (pr_valsize < sizeof (uint8_t)) { + ret = EOVERFLOW; + break; + } + *(uint8_t *)pr_val = port->mlp_autoneg; + break; + case MAC_PROP_MTU: + if (pr_valsize < sizeof (uint32_t)) { + ret = EOVERFLOW; + break; + } + *(uint32_t *)pr_val = port->mlp_mtu - MLXCX_MTU_OFFSET; + break; + default: + ret = ENOTSUP; + break; + } + + mutex_exit(&port->mlp_mtx); + + return (ret); +} + +#define MLXCX_MAC_CALLBACK_FLAGS \ + (MC_GETCAPAB | MC_GETPROP | MC_PROPINFO | MC_SETPROP) + +static mac_callbacks_t mlxcx_mac_callbacks = { + .mc_callbacks = MLXCX_MAC_CALLBACK_FLAGS, + .mc_getstat = mlxcx_mac_stat, + .mc_start = mlxcx_mac_start, + .mc_stop = mlxcx_mac_stop, + .mc_setpromisc = mlxcx_mac_setpromisc, + .mc_multicst = mlxcx_mac_multicast, + .mc_ioctl = NULL, + .mc_getcapab = mlxcx_mac_getcapab, + .mc_setprop = mlxcx_mac_setprop, + .mc_getprop = mlxcx_mac_getprop, + .mc_propinfo = mlxcx_mac_propinfo, + .mc_tx = NULL, + .mc_unicst = NULL, +}; + +boolean_t +mlxcx_register_mac(mlxcx_t *mlxp) +{ + mac_register_t *mac = mac_alloc(MAC_VERSION); + mlxcx_port_t *port; + int ret; + + if (mac == NULL) + return (B_FALSE); + + VERIFY3U(mlxp->mlx_nports, ==, 1); + port = &mlxp->mlx_ports[0]; + + mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; + mac->m_driver = mlxp; + mac->m_dip = mlxp->mlx_dip; + mac->m_src_addr = port->mlp_mac_address; + mac->m_callbacks = &mlxcx_mac_callbacks; + mac->m_min_sdu = MLXCX_MTU_OFFSET; + mac->m_max_sdu = port->mlp_mtu - MLXCX_MTU_OFFSET; + mac->m_margin = VLAN_TAGSZ; + mac->m_priv_props = mlxcx_priv_props; + mac->m_v12n = MAC_VIRT_LEVEL1; + + ret = mac_register(mac, &mlxp->mlx_mac_hdl); + if (ret != 0) { + mlxcx_warn(mlxp, "mac_register() returned %d", ret); + } + mac_free(mac); + + mlxcx_update_link_state(mlxp, port); + + return (ret == 0); +} diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c new file mode 100644 index 0000000000..0516f86d6b --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c @@ -0,0 +1,1010 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2020, the University of Queensland + */ + +/* + * Mellanox Connect-X 4/5/6 driver. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/sysmacros.h> + +#include <sys/mac_provider.h> + +#include <mlxcx.h> + +void +mlxcx_intr_teardown(mlxcx_t *mlxp) +{ + int i; + int ret; + + for (i = 0; i < mlxp->mlx_intr_count; ++i) { + mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[i]; + mutex_enter(&mleq->mleq_mtx); + VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC); + if (mleq->mleq_state & MLXCX_EQ_CREATED) + VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED); + if (i != 0) { + VERIFY(avl_is_empty(&mleq->mleq_cqs)); + avl_destroy(&mleq->mleq_cqs); + } + mutex_exit(&mleq->mleq_mtx); + (void) ddi_intr_disable(mlxp->mlx_intr_handles[i]); + (void) ddi_intr_remove_handler(mlxp->mlx_intr_handles[i]); + ret = ddi_intr_free(mlxp->mlx_intr_handles[i]); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "failed to free interrupt %d: %d", + i, ret); + } + mutex_destroy(&mleq->mleq_mtx); + } + kmem_free(mlxp->mlx_intr_handles, mlxp->mlx_intr_size); + kmem_free(mlxp->mlx_eqs, mlxp->mlx_eqs_size); + mlxp->mlx_intr_handles = NULL; + mlxp->mlx_eqs = NULL; +} + +/* + * Get the next SW-owned entry on the event queue, or NULL if we reach the end. + */ +static mlxcx_eventq_ent_t * +mlxcx_eq_next(mlxcx_event_queue_t *mleq) +{ + mlxcx_eventq_ent_t *ent; + ddi_fm_error_t err; + uint_t ci; + const uint_t swowner = ((mleq->mleq_cc >> mleq->mleq_entshift) & 1); + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED); + + /* mleq_nents is always a power of 2 */ + ci = mleq->mleq_cc & (mleq->mleq_nents - 1); + + ent = &mleq->mleq_ent[ci]; + VERIFY0(ddi_dma_sync(mleq->mleq_dma.mxdb_dma_handle, + (uintptr_t)ent - (uintptr_t)mleq->mleq_ent, + sizeof (mlxcx_eventq_ent_t), DDI_DMA_SYNC_FORCPU)); + ddi_fm_dma_err_get(mleq->mleq_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status == DDI_FM_OK && (ent->mleqe_owner & 1) == swowner) { + /* The PRM says we have to membar here, so we're doing it */ + membar_consumer(); + ++mleq->mleq_cc; + return (ent); + } + /* + * In the case of a DMA error, we should re-arm this EQ and then come + * back and try again when the device wakes us back up. + * + * Hopefully the fault will be gone by then. + */ + ddi_fm_dma_err_clear(mleq->mleq_dma.mxdb_dma_handle, DDI_FME_VERSION); + + return (NULL); +} + +void +mlxcx_arm_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + uint_t try = 0; + ddi_fm_error_t err; + bits32_t v = new_bits32(); + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_POLLING); + + mleq->mleq_state |= MLXCX_EQ_ARMED; + mleq->mleq_cc_armed = mleq->mleq_cc; + + set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num); + set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc); + +retry: + mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_ARM, + from_bits32(v)); + ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err, + DDI_FME_VERSION); + if (err.fme_status == DDI_FM_OK) + return; + if (try++ < mlxcx_doorbell_tries) { + ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION); + goto retry; + } + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); +} + +static void +mlxcx_update_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq) +{ + bits32_t v = new_bits32(); + ddi_fm_error_t err; + + ASSERT(mutex_owned(&mleq->mleq_mtx)); + ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED); + ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED); + + set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num); + set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc); + + mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_NOARM, + from_bits32(v)); + ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err, + DDI_FME_VERSION); + ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION); + /* + * Ignore the error, if it's still happening when we try to re-arm the + * EQ, we will note the impact then. + */ +} + +static mlxcx_completionq_ent_t * +mlxcx_cq_next(mlxcx_completion_queue_t *mlcq) +{ + mlxcx_completionq_ent_t *ent; + ddi_fm_error_t err; + uint_t ci; + const uint_t swowner = ((mlcq->mlcq_cc >> mlcq->mlcq_entshift) & 1); + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED); + ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED); + + /* mlcq_nents is always a power of 2 */ + ci = mlcq->mlcq_cc & (mlcq->mlcq_nents - 1); + + ent = &mlcq->mlcq_ent[ci]; + VERIFY0(ddi_dma_sync(mlcq->mlcq_dma.mxdb_dma_handle, + (uintptr_t)ent - (uintptr_t)mlcq->mlcq_ent, + sizeof (mlxcx_completionq_ent_t), DDI_DMA_SYNC_FORCPU)); + ddi_fm_dma_err_get(mlcq->mlcq_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status == DDI_FM_OK && (ent->mlcqe_owner & 1) == swowner) { + /* The PRM says we have to membar here, so we're doing it */ + membar_consumer(); + ++mlcq->mlcq_cc; + return (ent); + } + ddi_fm_dma_err_clear(mlcq->mlcq_dma.mxdb_dma_handle, DDI_FME_VERSION); + + return (NULL); +} + +void +mlxcx_arm_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + bits32_t dbval = new_bits32(); + uint64_t udbval; + ddi_fm_error_t err; + uint_t try = 0; + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED); + ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED); + + if (mlcq->mlcq_state & MLXCX_CQ_ARMED) + ASSERT3U(mlcq->mlcq_ec, >, mlcq->mlcq_ec_armed); + + if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN) + return; + + mlcq->mlcq_state |= MLXCX_CQ_ARMED; + mlcq->mlcq_cc_armed = mlcq->mlcq_cc; + mlcq->mlcq_ec_armed = mlcq->mlcq_ec; + + set_bits32(&dbval, MLXCX_CQ_ARM_SEQ, mlcq->mlcq_ec); + set_bits32(&dbval, MLXCX_CQ_ARM_CI, mlcq->mlcq_cc); + + udbval = (uint64_t)from_bits32(dbval) << 32; + udbval |= mlcq->mlcq_num & 0xffffff; + + mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc); + mlcq->mlcq_doorbell->mlcqd_arm_ci = dbval; + +retry: + MLXCX_DMA_SYNC(mlcq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(mlcq->mlcq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + if (try++ < mlxcx_doorbell_tries) { + ddi_fm_dma_err_clear( + mlcq->mlcq_doorbell_dma.mxdb_dma_handle, + DDI_FME_VERSION); + goto retry; + } else { + goto err; + } + } + + mlxcx_uar_put64(mlxp, mlcq->mlcq_uar, MLXCX_UAR_CQ_ARM, udbval); + ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err, + DDI_FME_VERSION); + if (err.fme_status == DDI_FM_OK) + return; + if (try++ < mlxcx_doorbell_tries) { + ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION); + goto retry; + } + +err: + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); +} + +const char * +mlxcx_event_name(mlxcx_event_t evt) +{ + switch (evt) { + case MLXCX_EVENT_COMPLETION: + return ("COMPLETION"); + case MLXCX_EVENT_PATH_MIGRATED: + return ("PATH_MIGRATED"); + case MLXCX_EVENT_COMM_ESTABLISH: + return ("COMM_ESTABLISH"); + case MLXCX_EVENT_SENDQ_DRAIN: + return ("SENDQ_DRAIN"); + case MLXCX_EVENT_LAST_WQE: + return ("LAST_WQE"); + case MLXCX_EVENT_SRQ_LIMIT: + return ("SRQ_LIMIT"); + case MLXCX_EVENT_DCT_ALL_CLOSED: + return ("DCT_ALL_CLOSED"); + case MLXCX_EVENT_DCT_ACCKEY_VIOL: + return ("DCT_ACCKEY_VIOL"); + case MLXCX_EVENT_CQ_ERROR: + return ("CQ_ERROR"); + case MLXCX_EVENT_WQ_CATASTROPHE: + return ("WQ_CATASTROPHE"); + case MLXCX_EVENT_PATH_MIGRATE_FAIL: + return ("PATH_MIGRATE_FAIL"); + case MLXCX_EVENT_PAGE_FAULT: + return ("PAGE_FAULT"); + case MLXCX_EVENT_WQ_INVALID_REQ: + return ("WQ_INVALID_REQ"); + case MLXCX_EVENT_WQ_ACCESS_VIOL: + return ("WQ_ACCESS_VIOL"); + case MLXCX_EVENT_SRQ_CATASTROPHE: + return ("SRQ_CATASTROPHE"); + case MLXCX_EVENT_INTERNAL_ERROR: + return ("INTERNAL_ERROR"); + case MLXCX_EVENT_PORT_STATE: + return ("PORT_STATE"); + case MLXCX_EVENT_GPIO: + return ("GPIO"); + case MLXCX_EVENT_PORT_MODULE: + return ("PORT_MODULE"); + case MLXCX_EVENT_TEMP_WARNING: + return ("TEMP_WARNING"); + case MLXCX_EVENT_REMOTE_CONFIG: + return ("REMOTE_CONFIG"); + case MLXCX_EVENT_DCBX_CHANGE: + return ("DCBX_CHANGE"); + case MLXCX_EVENT_DOORBELL_CONGEST: + return ("DOORBELL_CONGEST"); + case MLXCX_EVENT_STALL_VL: + return ("STALL_VL"); + case MLXCX_EVENT_CMD_COMPLETION: + return ("CMD_COMPLETION"); + case MLXCX_EVENT_PAGE_REQUEST: + return ("PAGE_REQUEST"); + case MLXCX_EVENT_NIC_VPORT: + return ("NIC_VPORT"); + case MLXCX_EVENT_EC_PARAMS_CHANGE: + return ("EC_PARAMS_CHANGE"); + case MLXCX_EVENT_XRQ_ERROR: + return ("XRQ_ERROR"); + } + return ("UNKNOWN"); +} + +/* Should be called only when link state has changed. */ +void +mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port) +{ + link_state_t ls; + + mutex_enter(&port->mlp_mtx); + (void) mlxcx_cmd_query_port_status(mlxp, port); + (void) mlxcx_cmd_query_port_speed(mlxp, port); + + switch (port->mlp_oper_status) { + case MLXCX_PORT_STATUS_UP: + case MLXCX_PORT_STATUS_UP_ONCE: + ls = LINK_STATE_UP; + break; + case MLXCX_PORT_STATUS_DOWN: + ls = LINK_STATE_DOWN; + break; + default: + ls = LINK_STATE_UNKNOWN; + } + mac_link_update(mlxp->mlx_mac_hdl, ls); + + mutex_exit(&port->mlp_mtx); +} + +static void +mlxcx_give_pages_once(mlxcx_t *mlxp, size_t npages) +{ + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + mlxcx_dev_page_t *mdp; + int32_t togive; + mlxcx_dev_page_t *pages[MLXCX_MANAGE_PAGES_MAX_PAGES]; + uint_t i; + const ddi_dma_cookie_t *ck; + + togive = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES); + + for (i = 0; i < togive; i++) { + mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP); + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_page_attr(mlxp, &attr); + if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc, + B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) { + mlxcx_warn(mlxp, "failed to allocate 4k page %u/%u", i, + togive); + goto cleanup_npages; + } + ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma); + mdp->mxdp_pa = ck->dmac_laddress; + pages[i] = mdp; + } + + mutex_enter(&mlxp->mlx_pagemtx); + + if (!mlxcx_cmd_give_pages(mlxp, + MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, togive, pages)) { + mlxcx_warn(mlxp, "!hardware refused our gift of %u " + "pages!", togive); + goto cleanup_npages; + } + + for (i = 0; i < togive; i++) { + avl_add(&mlxp->mlx_pages, pages[i]); + } + mlxp->mlx_npages += togive; + mutex_exit(&mlxp->mlx_pagemtx); + + return; + +cleanup_npages: + for (i = 0; i < togive; i++) { + mdp = pages[i]; + mlxcx_dma_free(&mdp->mxdp_dma); + kmem_free(mdp, sizeof (mlxcx_dev_page_t)); + } + /* Tell the hardware we had an allocation failure. */ + (void) mlxcx_cmd_give_pages(mlxp, MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL, + 0, NULL); + mutex_exit(&mlxp->mlx_pagemtx); +} + +static void +mlxcx_take_pages_once(mlxcx_t *mlxp, size_t npages) +{ + uint_t i; + int32_t req, ret; + uint64_t pas[MLXCX_MANAGE_PAGES_MAX_PAGES]; + mlxcx_dev_page_t *mdp, probe; + + mutex_enter(&mlxp->mlx_pagemtx); + + ASSERT0(avl_is_empty(&mlxp->mlx_pages)); + req = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES); + + if (!mlxcx_cmd_return_pages(mlxp, req, pas, &ret)) { + return; + } + + for (i = 0; i < ret; i++) { + bzero(&probe, sizeof (probe)); + probe.mxdp_pa = pas[i]; + + mdp = avl_find(&mlxp->mlx_pages, &probe, NULL); + + if (mdp != NULL) { + avl_remove(&mlxp->mlx_pages, mdp); + mlxp->mlx_npages--; + mlxcx_dma_free(&mdp->mxdp_dma); + kmem_free(mdp, sizeof (mlxcx_dev_page_t)); + } else { + mlxcx_warn(mlxp, "hardware returned a page " + "with PA 0x%" PRIx64 " but we have no " + "record of giving out such a page", pas[i]); + } + } + + mutex_exit(&mlxp->mlx_pagemtx); +} + +static const char * +mlxcx_module_error_string(mlxcx_module_error_type_t err) +{ + switch (err) { + case MLXCX_MODULE_ERR_POWER_BUDGET: + return ("POWER_BUDGET"); + case MLXCX_MODULE_ERR_LONG_RANGE: + return ("LONG_RANGE"); + case MLXCX_MODULE_ERR_BUS_STUCK: + return ("BUS_STUCK"); + case MLXCX_MODULE_ERR_NO_EEPROM: + return ("NO_EEPROM"); + case MLXCX_MODULE_ERR_ENFORCEMENT: + return ("ENFORCEMENT"); + case MLXCX_MODULE_ERR_UNKNOWN_IDENT: + return ("UNKNOWN_IDENT"); + case MLXCX_MODULE_ERR_HIGH_TEMP: + return ("HIGH_TEMP"); + case MLXCX_MODULE_ERR_CABLE_SHORTED: + return ("CABLE_SHORTED"); + default: + return ("UNKNOWN"); + } +} + +static void +mlxcx_report_module_error(mlxcx_t *mlxp, mlxcx_evdata_port_mod_t *evd) +{ + uint64_t ena; + char buf[FM_MAX_CLASS]; + const char *lename; + const char *ename; + const char *stname; + uint_t eno = 0; + mlxcx_module_status_t state = evd->mled_port_mod_module_status; + + switch (state) { + case MLXCX_MODULE_ERROR: + stname = "error"; + eno = evd->mled_port_mod_error_type; + lename = mlxcx_module_error_string(eno); + switch (eno) { + case MLXCX_MODULE_ERR_ENFORCEMENT: + ename = DDI_FM_TXR_ERROR_WHITELIST; + break; + case MLXCX_MODULE_ERR_UNKNOWN_IDENT: + case MLXCX_MODULE_ERR_NO_EEPROM: + ename = DDI_FM_TXR_ERROR_NOTSUPP; + break; + case MLXCX_MODULE_ERR_HIGH_TEMP: + ename = DDI_FM_TXR_ERROR_OVERTEMP; + break; + case MLXCX_MODULE_ERR_POWER_BUDGET: + case MLXCX_MODULE_ERR_LONG_RANGE: + case MLXCX_MODULE_ERR_CABLE_SHORTED: + ename = DDI_FM_TXR_ERROR_HWFAIL; + break; + case MLXCX_MODULE_ERR_BUS_STUCK: + default: + ename = DDI_FM_TXR_ERROR_UNKNOWN; + } + break; + default: + return; + } + + (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", + DDI_FM_NIC, DDI_FM_TXR_ERROR); + ena = fm_ena_generate(0, FM_ENA_FMT1); + if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps)) + return; + + ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP, + /* compulsory FM props */ + FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, + /* generic NIC txr error event props */ + "error", DATA_TYPE_STRING, ename, + "port_index", DATA_TYPE_UINT8, 0, + "txr_index", DATA_TYPE_UINT8, evd->mled_port_mod_module, + /* local props */ + "mlxcx_state", DATA_TYPE_STRING, stname, + "mlxcx_error", DATA_TYPE_STRING, lename, + "mlxcx_error_num", DATA_TYPE_UINT8, eno, + NULL); + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); +} + +static uint_t +mlxcx_intr_0(caddr_t arg, caddr_t arg2) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2; + mlxcx_eventq_ent_t *ent; + mlxcx_port_t *port; + uint_t portn; + int32_t npages = 0; + + mutex_enter(&mleq->mleq_mtx); + + if (!(mleq->mleq_state & MLXCX_EQ_ALLOC) || + !(mleq->mleq_state & MLXCX_EQ_CREATED) || + (mleq->mleq_state & MLXCX_EQ_DESTROYED)) { + mlxcx_warn(mlxp, "int0 on bad eq state"); + mutex_exit(&mleq->mleq_mtx); + return (DDI_INTR_UNCLAIMED); + } + + ent = mlxcx_eq_next(mleq); + if (ent == NULL) { + mlxcx_warn(mlxp, "spurious int 0?"); + mutex_exit(&mleq->mleq_mtx); + return (DDI_INTR_UNCLAIMED); + } + + ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED); + mleq->mleq_state &= ~MLXCX_EQ_ARMED; + + for (; ent != NULL; ent = mlxcx_eq_next(mleq)) { + switch (ent->mleqe_event_type) { + case MLXCX_EVENT_PAGE_REQUEST: + VERIFY3U(from_be16(ent->mleqe_page_request. + mled_page_request_function_id), ==, 0); + npages += (int32_t)from_be32(ent->mleqe_page_request. + mled_page_request_num_pages); + break; + case MLXCX_EVENT_PORT_STATE: + portn = get_bits8( + ent->mleqe_port_state.mled_port_state_port_num, + MLXCX_EVENT_PORT_NUM) - 1; + if (portn >= mlxp->mlx_nports) + break; + port = &mlxp->mlx_ports[portn]; + mlxcx_update_link_state(mlxp, port); + break; + case MLXCX_EVENT_PORT_MODULE: + mlxcx_report_module_error(mlxp, &ent->mleqe_port_mod); + break; + default: + mlxcx_warn(mlxp, "unhandled event 0x%x on int0", + ent->mleqe_event_type); + } + } + + if (npages > 0) { + mlxcx_give_pages_once(mlxp, npages); + } else if (npages < 0) { + mlxcx_take_pages_once(mlxp, -1 * npages); + } + + mlxcx_arm_eq(mlxp, mleq); + mutex_exit(&mleq->mleq_mtx); + + return (DDI_INTR_CLAIMED); +} + +mblk_t * +mlxcx_rx_poll(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, size_t bytelim) +{ + mlxcx_buffer_t *buf; + mblk_t *mp, *cmp, *nmp; + mlxcx_completionq_ent_t *cent; + size_t bytes = 0; + boolean_t found; + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + + ASSERT(mlcq->mlcq_wq != NULL); + ASSERT3U(mlcq->mlcq_wq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ); + + if (!(mlcq->mlcq_state & MLXCX_CQ_ALLOC) || + !(mlcq->mlcq_state & MLXCX_CQ_CREATED) || + (mlcq->mlcq_state & MLXCX_CQ_DESTROYED) || + (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)) { + return (NULL); + } + + ASSERT(mlcq->mlcq_state & MLXCX_CQ_POLLING); + + nmp = cmp = mp = NULL; + + cent = mlxcx_cq_next(mlcq); + for (; cent != NULL; cent = mlxcx_cq_next(mlcq)) { + /* + * Teardown and ring stop can atomic_or this flag + * into our state if they want us to stop early. + */ + if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN) + break; + + if (cent->mlcqe_opcode == MLXCX_CQE_OP_REQ && + cent->mlcqe_send_wqe_opcode == MLXCX_WQE_OP_NOP) { + /* NOP */ + goto nextcq; + } + + buf = list_head(&mlcq->mlcq_buffers); + found = B_FALSE; + while (buf != NULL) { + if ((buf->mlb_wqe_index & UINT16_MAX) == + from_be16(cent->mlcqe_wqe_counter)) { + found = B_TRUE; + break; + } + buf = list_next(&mlcq->mlcq_buffers, buf); + } + if (!found) { + buf = list_head(&mlcq->mlcq_buffers); + mlxcx_warn(mlxp, "got completion on CQ %x but " + "no buffer matching wqe found: %x (first " + "buffer counter = %x)", mlcq->mlcq_num, + from_be16(cent->mlcqe_wqe_counter), + buf == NULL ? UINT32_MAX : buf->mlb_wqe_index); + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE); + goto nextcq; + } + list_remove(&mlcq->mlcq_buffers, buf); + atomic_dec_64(&mlcq->mlcq_bufcnt); + + nmp = mlxcx_rx_completion(mlxp, mlcq, cent, buf); + if (nmp != NULL) { + bytes += from_be32(cent->mlcqe_byte_cnt); + if (cmp != NULL) { + cmp->b_next = nmp; + cmp = nmp; + } else { + mp = cmp = nmp; + } + } +nextcq: + mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc); + + if (bytelim != 0 && bytes > bytelim) + break; + } + + return (mp); +} + +static uint_t +mlxcx_intr_n(caddr_t arg, caddr_t arg2) +{ + mlxcx_t *mlxp = (mlxcx_t *)arg; + mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2; + mlxcx_eventq_ent_t *ent; + mlxcx_completionq_ent_t *cent; + mlxcx_completion_queue_t *mlcq, probe; + mlxcx_buffer_t *buf; + mblk_t *mp, *cmp, *nmp; + boolean_t found, tellmac = B_FALSE, added; + + mutex_enter(&mleq->mleq_mtx); + + if (!(mleq->mleq_state & MLXCX_EQ_ALLOC) || + !(mleq->mleq_state & MLXCX_EQ_CREATED) || + (mleq->mleq_state & MLXCX_EQ_DESTROYED)) { + mutex_exit(&mleq->mleq_mtx); + return (DDI_INTR_CLAIMED); + } + + ent = mlxcx_eq_next(mleq); + if (ent == NULL) { + if (++mleq->mleq_badintrs > mlxcx_stuck_intr_count) { + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_BADINT_LIMIT); + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); + (void) ddi_intr_disable(mlxp->mlx_intr_handles[ + mleq->mleq_intr_index]); + } + mutex_exit(&mleq->mleq_mtx); + return (DDI_INTR_CLAIMED); + } + mleq->mleq_badintrs = 0; + + ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED); + mleq->mleq_state &= ~MLXCX_EQ_ARMED; + + for (; ent != NULL; ent = mlxcx_eq_next(mleq)) { + if (ent->mleqe_event_type != MLXCX_EVENT_COMPLETION) { + mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE); + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); + (void) ddi_intr_disable(mlxp->mlx_intr_handles[ + mleq->mleq_intr_index]); + mutex_exit(&mleq->mleq_mtx); + return (DDI_INTR_CLAIMED); + } + ASSERT3U(ent->mleqe_event_type, ==, MLXCX_EVENT_COMPLETION); + + probe.mlcq_num = + from_be24(ent->mleqe_completion.mled_completion_cqn); + mlcq = avl_find(&mleq->mleq_cqs, &probe, NULL); + + if (mlcq == NULL) + continue; + + /* + * The polling function might have the mutex and stop us from + * getting the lock here, so we increment the event counter + * atomically from outside. + * + * This way at the end of polling when we go back to interrupts + * from this CQ, the event counter is still correct. + * + * Note that mlxcx_mac_ring_intr_enable() takes the EQ lock so + * as to avoid any possibility of racing against us here, so we + * only have to consider mlxcx_rx_poll(). + */ + atomic_inc_32(&mlcq->mlcq_ec); + atomic_and_uint(&mlcq->mlcq_state, ~MLXCX_CQ_ARMED); + + if (mutex_tryenter(&mlcq->mlcq_mtx) == 0) { + /* + * If we failed to take the mutex because the polling + * function has it, just move on. We don't want to + * block other CQs behind this one. + */ + if (mlcq->mlcq_state & MLXCX_CQ_POLLING) + continue; + /* Otherwise we will wait. */ + mutex_enter(&mlcq->mlcq_mtx); + } + + if (!(mlcq->mlcq_state & MLXCX_CQ_ALLOC) || + !(mlcq->mlcq_state & MLXCX_CQ_CREATED) || + (mlcq->mlcq_state & MLXCX_CQ_DESTROYED) || + (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN) || + (mlcq->mlcq_state & MLXCX_CQ_POLLING)) { + mutex_exit(&mlcq->mlcq_mtx); + continue; + } + + nmp = cmp = mp = NULL; + tellmac = B_FALSE; + + cent = mlxcx_cq_next(mlcq); + for (; cent != NULL; cent = mlxcx_cq_next(mlcq)) { + /* + * Teardown and ring stop can atomic_or this flag + * into our state if they want us to stop early. + */ + if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN) + break; + if (mlcq->mlcq_state & MLXCX_CQ_POLLING) + break; + + if (cent->mlcqe_opcode == MLXCX_CQE_OP_REQ && + cent->mlcqe_send_wqe_opcode == MLXCX_WQE_OP_NOP) { + /* NOP */ + goto nextcq; + } + +lookagain: + /* + * Generally the buffer we're looking for will be + * at the front of the list, so this loop won't + * need to look far. + */ + buf = list_head(&mlcq->mlcq_buffers); + found = B_FALSE; + while (buf != NULL) { + if ((buf->mlb_wqe_index & UINT16_MAX) == + from_be16(cent->mlcqe_wqe_counter)) { + found = B_TRUE; + break; + } + buf = list_next(&mlcq->mlcq_buffers, buf); + } + if (!found) { + /* + * If there's any buffers waiting on the + * buffers_b list, then merge those into + * the main list and have another look. + * + * The wq enqueue routines push new buffers + * into buffers_b so that they can avoid + * taking the mlcq_mtx and blocking us for + * every single packet. + */ + added = B_FALSE; + mutex_enter(&mlcq->mlcq_bufbmtx); + if (!list_is_empty(&mlcq->mlcq_buffers_b)) { + list_move_tail(&mlcq->mlcq_buffers, + &mlcq->mlcq_buffers_b); + added = B_TRUE; + } + mutex_exit(&mlcq->mlcq_bufbmtx); + if (added) + goto lookagain; + } + if (!found) { + buf = list_head(&mlcq->mlcq_buffers); + mlxcx_warn(mlxp, "got completion on CQ %x but " + "no buffer matching wqe found: %x (first " + "buffer counter = %x)", mlcq->mlcq_num, + from_be16(cent->mlcqe_wqe_counter), + buf == NULL ? UINT32_MAX : + buf->mlb_wqe_index); + mlxcx_fm_ereport(mlxp, + DDI_FM_DEVICE_INVAL_STATE); + goto nextcq; + } + list_remove(&mlcq->mlcq_buffers, buf); + atomic_dec_64(&mlcq->mlcq_bufcnt); + + switch (mlcq->mlcq_wq->mlwq_type) { + case MLXCX_WQ_TYPE_SENDQ: + mlxcx_tx_completion(mlxp, mlcq, cent, buf); + break; + case MLXCX_WQ_TYPE_RECVQ: + nmp = mlxcx_rx_completion(mlxp, mlcq, cent, + buf); + if (nmp != NULL) { + if (cmp != NULL) { + cmp->b_next = nmp; + cmp = nmp; + } else { + mp = cmp = nmp; + } + } + break; + } + +nextcq: + /* + * Update the "doorbell" consumer counter for the queue + * every time. Unlike a UAR write, this is relatively + * cheap and doesn't require us to go out on the bus + * straight away (since it's our memory). + */ + mlcq->mlcq_doorbell->mlcqd_update_ci = + to_be24(mlcq->mlcq_cc); + + if ((mlcq->mlcq_state & MLXCX_CQ_BLOCKED_MAC) && + mlcq->mlcq_bufcnt < mlcq->mlcq_buflwm) { + mlcq->mlcq_state &= ~MLXCX_CQ_BLOCKED_MAC; + tellmac = B_TRUE; + } + } + + mlxcx_arm_cq(mlxp, mlcq); + mutex_exit(&mlcq->mlcq_mtx); + + if (tellmac) { + mac_tx_ring_update(mlxp->mlx_mac_hdl, + mlcq->mlcq_mac_hdl); + } + if (mp != NULL) { + mac_rx_ring(mlxp->mlx_mac_hdl, mlcq->mlcq_mac_hdl, + mp, mlcq->mlcq_mac_gen); + } + + /* + * Updating the consumer counter for an EQ requires a write + * to the UAR, which is possibly expensive. + * + * Try to do it only often enough to stop us wrapping around. + */ + if ((mleq->mleq_cc & 0x7) == 0) + mlxcx_update_eq(mlxp, mleq); + } + + mlxcx_arm_eq(mlxp, mleq); + mutex_exit(&mleq->mleq_mtx); + + return (DDI_INTR_CLAIMED); +} + +boolean_t +mlxcx_intr_setup(mlxcx_t *mlxp) +{ + dev_info_t *dip = mlxp->mlx_dip; + int ret; + int nintrs = 0; + int navail = 0; + int types, i; + mlxcx_eventq_type_t eqt = MLXCX_EQ_TYPE_ANY; + + ret = ddi_intr_get_supported_types(dip, &types); + if (ret != DDI_SUCCESS) { + return (B_FALSE); + } + + if (!(types & DDI_INTR_TYPE_MSIX)) { + mlxcx_warn(mlxp, "MSI-X interrupts not available, but mlxcx " + "requires MSI-X"); + return (B_FALSE); + } + + ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs); + if (ret != DDI_SUCCESS) { + return (B_FALSE); + } + if (nintrs < 2) { + mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx " + "requires 2", nintrs); + return (B_FALSE); + } + + ret = ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, &navail); + if (navail < 2) { + mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx " + "requires 2", navail); + return (B_FALSE); + } + + mlxp->mlx_intr_size = navail * sizeof (ddi_intr_handle_t); + mlxp->mlx_intr_handles = kmem_alloc(mlxp->mlx_intr_size, KM_SLEEP); + + ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX, + 0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL); + if (ret != DDI_SUCCESS) { + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + if (mlxp->mlx_intr_count < 2) { + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + mlxp->mlx_intr_type = DDI_INTR_TYPE_MSIX; + + ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri); + if (ret != DDI_SUCCESS) { + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + + mlxp->mlx_eqs_size = mlxp->mlx_intr_count * + sizeof (mlxcx_event_queue_t); + mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP); + + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_0, + (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]); + if (ret != DDI_SUCCESS) { + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + + /* + * If we have enough interrupts, set their "type" fields so that we + * avoid mixing RX and TX queues on the same EQs. + */ + if (mlxp->mlx_intr_count >= 8) { + eqt = MLXCX_EQ_TYPE_RX; + } + + for (i = 1; i < mlxp->mlx_intr_count; ++i) { + mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + avl_create(&mlxp->mlx_eqs[i].mleq_cqs, mlxcx_cq_compare, + sizeof (mlxcx_completion_queue_t), + offsetof(mlxcx_completion_queue_t, mlcq_eq_entry)); + mlxp->mlx_eqs[i].mleq_intr_index = i; + + mlxp->mlx_eqs[i].mleq_type = eqt; + /* + * If eqt is still ANY, just leave it set to that + * (no else here). + */ + if (eqt == MLXCX_EQ_TYPE_RX) { + eqt = MLXCX_EQ_TYPE_TX; + } else if (eqt == MLXCX_EQ_TYPE_TX) { + eqt = MLXCX_EQ_TYPE_RX; + } + + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i], + mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]); + if (ret != DDI_SUCCESS) { + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + } + + return (B_TRUE); +} diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h new file mode 100644 index 0000000000..76d0da30e7 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h @@ -0,0 +1,2481 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +#ifndef _MLXCX_REG_H +#define _MLXCX_REG_H + +#include <sys/types.h> +#include <sys/byteorder.h> + +#include <mlxcx_endint.h> + +#if !defined(_BIT_FIELDS_HTOL) && !defined(_BIT_FIELDS_LTOH) +#error "Need _BIT_FIELDS_HTOL or _BIT_FIELDS_LTOH" +#endif + +/* + * Register offsets. + */ + +#define MLXCX_ISS_FIRMWARE 0x0000 +#define MLXCX_ISS_FW_MAJOR(x) (((x) & 0xffff)) +#define MLXCX_ISS_FW_MINOR(x) (((x) >> 16) & 0xffff) +#define MLXCX_ISS_FW_CMD 0x0004 +#define MLXCX_ISS_FW_REV(x) (((x) & 0xffff)) +#define MLXCX_ISS_CMD_REV(x) (((x) >> 16) & 0xffff) +#define MLXCX_ISS_CMD_HIGH 0x0010 +#define MLXCX_ISS_CMD_LOW 0x0014 +#define MLXCX_ISS_CMDQ_SIZE(x) (((x) >> 4) & 0xf) +#define MLXCX_ISS_CMDQ_STRIDE(x) ((x) & 0xf) + +#define MLXCX_ISS_CMD_DOORBELL 0x0018 +#define MLXCX_ISS_INIT 0x01fc +#define MLXCX_ISS_INITIALIZING(x) (((x) >> 31) & 0x1) +#define MLXCX_ISS_HEALTH_BUF 0x0200 +#define MLXCX_ISS_NO_DRAM_NIC 0x0240 +#define MLXCX_ISS_TIMER 0x1000 +#define MLXCX_ISS_HEALTH_COUNT 0x1010 +#define MLXCX_ISS_HEALTH_SYND 0x1013 + +#define MLXCX_CMD_INLINE_INPUT_LEN 16 +#define MLXCX_CMD_INLINE_OUTPUT_LEN 16 + +#define MLXCX_CMD_MAILBOX_LEN 512 + +#define MLXCX_CMD_TRANSPORT_PCI 7 +#define MLXCX_CMD_HW_OWNED 0x01 +#define MLXCX_CMD_STATUS(x) ((x) >> 1) + +#define MLXCX_UAR_CQ_ARM 0x0020 +#define MLXCX_UAR_EQ_ARM 0x0040 +#define MLXCX_UAR_EQ_NOARM 0x0048 + +/* Number of blue flame reg pairs per UAR */ +#define MLXCX_BF_PER_UAR 2 +#define MLXCX_BF_PER_UAR_MASK 0x1 +#define MLXCX_BF_SIZE 0x100 +#define MLXCX_BF_BASE 0x0800 + +/* CSTYLED */ +#define MLXCX_EQ_ARM_EQN (bitdef_t){24, 0xff000000} +/* CSTYLED */ +#define MLXCX_EQ_ARM_CI (bitdef_t){0, 0x00ffffff} + +/* + * Hardware structure that is used to represent a command. + */ +#pragma pack(1) +typedef struct { + uint8_t mce_type; + uint8_t mce_rsvd[3]; + uint32be_t mce_in_length; + uint64be_t mce_in_mbox; + uint8_t mce_input[MLXCX_CMD_INLINE_INPUT_LEN]; + uint8_t mce_output[MLXCX_CMD_INLINE_OUTPUT_LEN]; + uint64be_t mce_out_mbox; + uint32be_t mce_out_length; + uint8_t mce_token; + uint8_t mce_sig; + uint8_t mce_rsvd1; + uint8_t mce_status; +} mlxcx_cmd_ent_t; + +typedef struct { + uint8_t mlxb_data[MLXCX_CMD_MAILBOX_LEN]; + uint8_t mlxb_rsvd[48]; + uint64be_t mlxb_nextp; + uint32be_t mlxb_blockno; + uint8_t mlxb_rsvd1; + uint8_t mlxb_token; + uint8_t mlxb_ctrl_sig; + uint8_t mlxb_sig; +} mlxcx_cmd_mailbox_t; + +typedef struct { + uint8_t mled_page_request_rsvd[2]; + uint16be_t mled_page_request_function_id; + uint32be_t mled_page_request_num_pages; +} mlxcx_evdata_page_request_t; + +/* CSTYLED */ +#define MLXCX_EVENT_PORT_NUM (bitdef_t){ .bit_shift = 4, .bit_mask = 0xF0 } + +typedef struct { + uint8_t mled_port_state_rsvd[8]; + bits8_t mled_port_state_port_num; +} mlxcx_evdata_port_state_t; + +typedef enum { + MLXCX_MODULE_INITIALIZING = 0x0, + MLXCX_MODULE_PLUGGED = 0x1, + MLXCX_MODULE_UNPLUGGED = 0x2, + MLXCX_MODULE_ERROR = 0x3 +} mlxcx_module_status_t; + +typedef enum { + MLXCX_MODULE_ERR_POWER_BUDGET = 0x0, + MLXCX_MODULE_ERR_LONG_RANGE = 0x1, + MLXCX_MODULE_ERR_BUS_STUCK = 0x2, + MLXCX_MODULE_ERR_NO_EEPROM = 0x3, + MLXCX_MODULE_ERR_ENFORCEMENT = 0x4, + MLXCX_MODULE_ERR_UNKNOWN_IDENT = 0x5, + MLXCX_MODULE_ERR_HIGH_TEMP = 0x6, + MLXCX_MODULE_ERR_CABLE_SHORTED = 0x7, +} mlxcx_module_error_type_t; + +typedef struct { + uint8_t mled_port_mod_rsvd; + uint8_t mled_port_mod_module; + uint8_t mled_port_mod_rsvd2; + uint8_t mled_port_mod_module_status; + uint8_t mled_port_mod_rsvd3[2]; + uint8_t mled_port_mod_error_type; + uint8_t mled_port_mod_rsvd4; +} mlxcx_evdata_port_mod_t; + +typedef struct { + uint8_t mled_completion_rsvd[25]; + uint24be_t mled_completion_cqn; +} mlxcx_evdata_completion_t; + +typedef enum { + MLXCX_EV_QUEUE_TYPE_QP = 0x0, + MLXCX_EV_QUEUE_TYPE_RQ = 0x1, + MLXCX_EV_QUEUE_TYPE_SQ = 0x2, +} mlxcx_evdata_queue_type_t; + +typedef struct { + uint8_t mled_queue_rsvd[20]; + uint8_t mled_queue_type; + uint8_t mled_queue_rsvd2[4]; + uint24be_t mled_queue_num; +} mlxcx_evdata_queue_t; + +#define MLXCX_EQ_OWNER_INIT 1 + +typedef struct { + uint8_t mleqe_rsvd[1]; + uint8_t mleqe_event_type; + uint8_t mleqe_rsvd2[1]; + uint8_t mleqe_event_sub_type; + uint8_t mleqe_rsvd3[28]; + union { + uint8_t mleqe_unknown_data[28]; + mlxcx_evdata_completion_t mleqe_completion; + mlxcx_evdata_page_request_t mleqe_page_request; + mlxcx_evdata_port_state_t mleqe_port_state; + mlxcx_evdata_port_mod_t mleqe_port_mod; + mlxcx_evdata_queue_t mleqe_queue; + }; + uint8_t mleqe_rsvd4[2]; + uint8_t mleqe_signature; + uint8_t mleqe_owner; +} mlxcx_eventq_ent_t; + +typedef enum { + MLXCX_CQE_L3_HDR_NONE = 0x0, + MLXCX_CQE_L3_HDR_RCV_BUF = 0x1, + MLXCX_CQE_L3_HDR_CQE = 0x2, +} mlxcx_cqe_l3_hdr_placement_t; + +typedef enum { + MLXCX_CQE_CSFLAGS_L4_OK = 1 << 2, + MLXCX_CQE_CSFLAGS_L3_OK = 1 << 1, + MLXCX_CQE_CSFLAGS_L2_OK = 1 << 0, +} mlxcx_cqe_csflags_t; + +typedef enum { + MLXCX_CQE_L4_TYPE_NONE = 0, + MLXCX_CQE_L4_TYPE_TCP = 1, + MLXCX_CQE_L4_TYPE_UDP = 2, + MLXCX_CQE_L4_TYPE_TCP_EMPTY_ACK = 3, + MLXCX_CQE_L4_TYPE_TCP_ACK = 4, +} mlxcx_cqe_l4_hdr_type_t; + +typedef enum { + MLXCX_CQE_L3_TYPE_NONE = 0, + MLXCX_CQE_L3_TYPE_IPv6 = 1, + MLXCX_CQE_L3_TYPE_IPv4 = 2, +} mlxcx_cqe_l3_hdr_type_t; + +typedef enum { + MLXCX_CQE_RX_HASH_NONE = 0, + MLXCX_CQE_RX_HASH_IPv4 = 1, + MLXCX_CQE_RX_HASH_IPv6 = 2, + MLXCX_CQE_RX_HASH_IPSEC_SPI = 3, +} mlxcx_cqe_rx_hash_type_t; +/* BEGIN CSTYLED */ +#define MLXCX_CQE_RX_HASH_IP_SRC (bitdef_t){0, 0x3} +#define MLXCX_CQE_RX_HASH_IP_DEST (bitdef_t){2, (0x3 << 2)} +#define MLXCX_CQE_RX_HASH_L4_SRC (bitdef_t){4, (0x3 << 4)} +#define MLXCX_CQE_RX_HASH_L4_DEST (bitdef_t){6, (0x3 << 6)} +/* END CSTYLED */ + +typedef enum { + MLXCX_CQE_OP_REQ = 0x0, + MLXCX_CQE_OP_RESP_RDMA = 0x1, + MLXCX_CQE_OP_RESP = 0x2, + MLXCX_CQE_OP_RESP_IMMEDIATE = 0x3, + MLXCX_CQE_OP_RESP_INVALIDATE = 0x4, + MLXCX_CQE_OP_RESIZE_CQ = 0x5, + MLXCX_CQE_OP_SIG_ERR = 0x12, + MLXCX_CQE_OP_REQ_ERR = 0xd, + MLXCX_CQE_OP_RESP_ERR = 0xe, + MLXCX_CQE_OP_INVALID = 0xf +} mlxcx_cqe_opcode_t; + +typedef enum { + MLXCX_CQE_FORMAT_BASIC = 0, + MLXCX_CQE_FORMAT_INLINE_32 = 1, + MLXCX_CQE_FORMAT_INLINE_64 = 2, + MLXCX_CQE_FORMAT_COMPRESSED = 3, +} mlxcx_cqe_format_t; + +typedef enum { + MLXCX_CQE_OWNER_INIT = 1 +} mlxcx_cqe_owner_t; + +typedef enum { + MLXCX_VLAN_TYPE_NONE, + MLXCX_VLAN_TYPE_CVLAN, + MLXCX_VLAN_TYPE_SVLAN, +} mlxcx_vlan_type_t; + +typedef enum { + MLXCX_CQ_ERR_LOCAL_LENGTH = 0x1, + MLXCX_CQ_ERR_LOCAL_QP_OP = 0x2, + MLXCX_CQ_ERR_LOCAL_PROTECTION = 0x4, + MLXCX_CQ_ERR_WR_FLUSHED = 0x5, + MLXCX_CQ_ERR_MEM_WINDOW_BIND = 0x6, + MLXCX_CQ_ERR_BAD_RESPONSE = 0x10, + MLXCX_CQ_ERR_LOCAL_ACCESS = 0x11, + MLXCX_CQ_ERR_XPORT_RETRY_CTR = 0x15, + MLXCX_CQ_ERR_RNR_RETRY_CTR = 0x16, + MLXCX_CQ_ERR_ABORTED = 0x22 +} mlxcx_cq_error_syndrome_t; + +typedef struct { + uint8_t mlcqee_rsvd[2]; + uint16be_t mlcqee_wqe_id; + uint8_t mlcqee_rsvd2[29]; + uint24be_t mlcqee_user_index; + uint8_t mlcqee_rsvd3[8]; + uint32be_t mlcqee_byte_cnt; + uint8_t mlcqee_rsvd4[6]; + uint8_t mlcqee_vendor_error_syndrome; + uint8_t mlcqee_syndrome; + uint8_t mlcqee_wqe_opcode; + uint24be_t mlcqee_flow_tag; + uint16be_t mlcqee_wqe_counter; + uint8_t mlcqee_signature; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcqe_opcode:4; + uint8_t mlcqe_rsvd5:3; + uint8_t mlcqe_owner:1; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcqe_owner:1; + uint8_t mlcqe_rsvd5:3; + uint8_t mlcqe_opcode:4; +#endif + }; +} mlxcx_completionq_error_ent_t; + +typedef struct { + uint8_t mlcqe_tunnel_flags; + uint8_t mlcqe_rsvd[3]; + uint8_t mlcqe_lro_flags; + uint8_t mlcqe_lro_min_ttl; + uint16be_t mlcqe_lro_tcp_win; + uint32be_t mlcqe_lro_ack_seq_num; + uint32be_t mlcqe_rx_hash_result; + bits8_t mlcqe_rx_hash_type; + uint8_t mlcqe_ml_path; + uint8_t mlcqe_rsvd2[2]; + uint16be_t mlcqe_checksum; + uint16be_t mlcqe_slid_smac_lo; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcqe_rsvd3:1; + uint8_t mlcqe_force_loopback:1; + uint8_t mlcqe_l3_hdr:2; + uint8_t mlcqe_sl_roce_pktype:4; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcqe_sl_roce_pktype:4; + uint8_t mlcqe_l3_hdr:2; + uint8_t mlcqe_force_loopback:1; + uint8_t mlcqe_rsvd3:1; +#endif + }; + uint24be_t mlcqe_rqpn; + bits8_t mlcqe_csflags; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcqe_ip_frag:1; + uint8_t mlcqe_l4_hdr_type:3; + uint8_t mlcqe_l3_hdr_type:2; + uint8_t mlcqe_ip_ext_opts:1; + uint8_t mlcqe_cv:1; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcqe_cv:1; + uint8_t mlcqe_ip_ext_opts:1; + uint8_t mlcqe_l3_hdr_type:2; + uint8_t mlcqe_l4_hdr_type:3; + uint8_t mlcqe_ip_frag:1; +#endif + }; + uint16be_t mlcqe_up_cfi_vid; + uint8_t mlcqe_lro_num_seg; + uint24be_t mlcqe_user_index; + uint32be_t mlcqe_immediate; + uint8_t mlcqe_rsvd4[4]; + uint32be_t mlcqe_byte_cnt; + union { + struct { + uint32be_t mlcqe_lro_timestamp_value; + uint32be_t mlcqe_lro_timestamp_echo; + }; + uint64be_t mlcqe_timestamp; + }; + union { + uint8_t mlcqe_rx_drop_counter; + uint8_t mlcqe_send_wqe_opcode; + }; + uint24be_t mlcqe_flow_tag; + uint16be_t mlcqe_wqe_counter; + uint8_t mlcqe_signature; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcqe_opcode:4; + uint8_t mlcqe_format:2; + uint8_t mlcqe_se:1; + uint8_t mlcqe_owner:1; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcqe_owner:1; + uint8_t mlcqe_se:1; + uint8_t mlcqe_format:2; + uint8_t mlcqe_opcode:4; +#endif + }; +} mlxcx_completionq_ent_t; + +typedef struct { + uint8_t mlcqe_data[64]; + mlxcx_completionq_ent_t mlcqe_ent; +} mlxcx_completionq_ent128_t; + +typedef enum { + MLXCX_WQE_OP_NOP = 0x00, + MLXCX_WQE_OP_SEND_INVALIDATE = 0x01, + MLXCX_WQE_OP_RDMA_W = 0x08, + MLXCX_WQE_OP_RDMA_W_IMMEDIATE = 0x09, + MLXCX_WQE_OP_SEND = 0x0A, + MLXCX_WQE_OP_SEND_IMMEDIATE = 0x0B, + MLXCX_WQE_OP_LSO = 0x0E, + MLXCX_WQE_OP_WAIT = 0x0F, + MLXCX_WQE_OP_RDMA_R = 0x10, +} mlxcx_wqe_opcode_t; + +#define MLXCX_SQE_MAX_DS ((1 << 6) - 1) +#define MLXCX_SQE_MAX_PTRS 61 + +typedef enum { + MLXCX_SQE_FENCE_NONE = 0x0, + MLXCX_SQE_FENCE_WAIT_OTHERS = 0x1, + MLXCX_SQE_FENCE_START = 0x2, + MLXCX_SQE_FENCE_STRONG_ORDER = 0x3, + MLXCX_SQE_FENCE_START_WAIT = 0x4 +} mlxcx_sqe_fence_mode_t; + +typedef enum { + MLXCX_SQE_CQE_ON_EACH_ERROR = 0x0, + MLXCX_SQE_CQE_ON_FIRST_ERROR = 0x1, + MLXCX_SQE_CQE_ALWAYS = 0x2, + MLXCX_SQE_CQE_ALWAYS_PLUS_EQE = 0x3 +} mlxcx_sqe_completion_mode_t; + +#define MLXCX_SQE_SOLICITED (1 << 1) +/* CSTYLED */ +#define MLXCX_SQE_FENCE_MODE (bitdef_t){5, 0xe0} +/* CSTYLED */ +#define MLXCX_SQE_COMPLETION_MODE (bitdef_t){2, 0x0c} + +typedef struct { + uint8_t mlcs_opcode_mod; + uint16be_t mlcs_wqe_index; + uint8_t mlcs_opcode; + uint24be_t mlcs_qp_or_sq; + uint8_t mlcs_ds; + uint8_t mlcs_signature; + uint8_t mlcs_rsvd2[2]; + bits8_t mlcs_flags; + uint32be_t mlcs_immediate; +} mlxcx_wqe_control_seg_t; + +typedef enum { + MLXCX_SQE_ETH_CSFLAG_L4_CHECKSUM = 1 << 7, + MLXCX_SQE_ETH_CSFLAG_L3_CHECKSUM = 1 << 6, + MLXCX_SQE_ETH_CSFLAG_L4_INNER_CHECKSUM = 1 << 5, + MLXCX_SQE_ETH_CSFLAG_L3_INNER_CHECKSUM = 1 << 4, +} mlxcx_wqe_eth_flags_t; + +/* CSTYLED */ +#define MLXCX_SQE_ETH_INLINE_HDR_SZ (bitdef_t){0, 0x03ff} +#define MLXCX_SQE_ETH_SZFLAG_VLAN (1 << 15) +#define MLXCX_MAX_INLINE_HEADERLEN 64 + +typedef struct { + uint8_t mles_rsvd[4]; + bits8_t mles_csflags; + uint8_t mles_rsvd2[1]; + uint16_t mles_mss; + uint8_t mles_rsvd3[4]; + bits16_t mles_szflags; + uint8_t mles_inline_headers[18]; +} mlxcx_wqe_eth_seg_t; + +typedef struct { + uint32be_t mlds_byte_count; + uint32be_t mlds_lkey; + uint64be_t mlds_address; +} mlxcx_wqe_data_seg_t; + +#define MLXCX_SENDQ_STRIDE_SHIFT 6 + +typedef struct { + mlxcx_wqe_control_seg_t mlsqe_control; + mlxcx_wqe_eth_seg_t mlsqe_eth; + mlxcx_wqe_data_seg_t mlsqe_data[1]; +} mlxcx_sendq_ent_t; + +typedef struct { + uint64be_t mlsqbf_qwords[8]; +} mlxcx_sendq_bf_t; + +typedef struct { + mlxcx_wqe_data_seg_t mlsqe_data[4]; +} mlxcx_sendq_extra_ent_t; + +#define MLXCX_RECVQ_STRIDE_SHIFT 7 +/* + * Each mlxcx_wqe_data_seg_t is 1<<4 bytes long (there's a CTASSERT to verify + * this in mlxcx_cmd.c), so the number of pointers is 1 << (shift - 4). + */ +#define MLXCX_RECVQ_MAX_PTRS (1 << (MLXCX_RECVQ_STRIDE_SHIFT - 4)) +typedef struct { + mlxcx_wqe_data_seg_t mlrqe_data[MLXCX_RECVQ_MAX_PTRS]; +} mlxcx_recvq_ent_t; + +/* CSTYLED */ +#define MLXCX_CQ_ARM_CI (bitdef_t){ .bit_shift = 0, \ + .bit_mask = 0x00ffffff } +/* CSTYLED */ +#define MLXCX_CQ_ARM_SEQ (bitdef_t){ .bit_shift = 28, \ + .bit_mask = 0x30000000 } +#define MLXCX_CQ_ARM_SOLICITED (1 << 24) + +typedef struct { + uint8_t mlcqd_rsvd; + uint24be_t mlcqd_update_ci; + bits32_t mlcqd_arm_ci; +} mlxcx_completionq_doorbell_t; + +typedef struct { + uint16be_t mlwqd_rsvd; + uint16be_t mlwqd_recv_counter; + uint16be_t mlwqd_rsvd2; + uint16be_t mlwqd_send_counter; +} mlxcx_workq_doorbell_t; + +#define MLXCX_EQ_STATUS_OK (0x0 << 4) +#define MLXCX_EQ_STATUS_WRITE_FAILURE (0xA << 4) + +#define MLXCX_EQ_OI (1 << 1) +#define MLXCX_EQ_EC (1 << 2) + +#define MLXCX_EQ_ST_ARMED 0x9 +#define MLXCX_EQ_ST_FIRED 0xA + +/* CSTYLED */ +#define MLXCX_EQ_LOG_PAGE_SIZE (bitdef_t){ .bit_shift = 24, \ + .bit_mask = 0x1F000000 } + +typedef struct { + uint8_t mleqc_status; + uint8_t mleqc_ecoi; + uint8_t mleqc_state; + uint8_t mleqc_rsvd[7]; + uint16be_t mleqc_page_offset; + uint8_t mleqc_log_eq_size; + uint24be_t mleqc_uar_page; + uint8_t mleqc_rsvd3[7]; + uint8_t mleqc_intr; + uint32be_t mleqc_log_page; + uint8_t mleqc_rsvd4[13]; + uint24be_t mleqc_consumer_counter; + uint8_t mleqc_rsvd5; + uint24be_t mleqc_producer_counter; + uint8_t mleqc_rsvd6[16]; +} mlxcx_eventq_ctx_t; + +typedef enum { + MLXCX_CQC_CQE_SIZE_64 = 0x0, + MLXCX_CQC_CQE_SIZE_128 = 0x1, +} mlxcx_cqc_cqe_sz_t; + +typedef enum { + MLXCX_CQC_STATUS_OK = 0x0, + MLXCX_CQC_STATUS_OVERFLOW = 0x9, + MLXCX_CQC_STATUS_WRITE_FAIL = 0xA, + MLXCX_CQC_STATUS_INVALID = 0xF +} mlxcx_cqc_status_t; + +typedef enum { + MLXCX_CQC_STATE_ARMED_SOLICITED = 0x6, + MLXCX_CQC_STATE_ARMED = 0x9, + MLXCX_CQC_STATE_FIRED = 0xA +} mlxcx_cqc_state_t; + +/* CSTYLED */ +#define MLXCX_CQ_CTX_STATUS (bitdef_t){28, 0xf0000000} +/* CSTYLED */ +#define MLXCX_CQ_CTX_CQE_SZ (bitdef_t){21, 0x00e00000} +/* CSTYLED */ +#define MLXCX_CQ_CTX_PERIOD_MODE (bitdef_t){15, 0x00018000} +/* CSTYLED */ +#define MLXCX_CQ_CTX_MINI_CQE_FORMAT (bitdef_t){12, 0x00003000} +/* CSTYLED */ +#define MLXCX_CQ_CTX_STATE (bitdef_t){8, 0x00000f00} + +typedef struct mlxcx_completionq_ctx { + bits32_t mlcqc_flags; + + uint8_t mlcqc_rsvd4[4]; + + uint8_t mlcqc_rsvd5[2]; + uint16be_t mlcqc_page_offset; + + uint8_t mlcqc_log_cq_size; + uint24be_t mlcqc_uar_page; + + uint16be_t mlcqc_cq_period; + uint16be_t mlcqc_cq_max_count; + + uint8_t mlcqc_rsvd7[3]; + uint8_t mlcqc_eqn; + + uint8_t mlcqc_log_page_size; + uint8_t mlcqc_rsvd8[3]; + + uint8_t mlcqc_rsvd9[4]; + + uint8_t mlcqc_rsvd10; + uint24be_t mlcqc_last_notified_index; + uint8_t mlcqc_rsvd11; + uint24be_t mlcqc_last_solicit_index; + uint8_t mlcqc_rsvd12; + uint24be_t mlcqc_consumer_counter; + uint8_t mlcqc_rsvd13; + uint24be_t mlcqc_producer_counter; + + uint8_t mlcqc_rsvd14[8]; + + uint64be_t mlcqc_dbr_addr; +} mlxcx_completionq_ctx_t; + +typedef enum { + MLXCX_WORKQ_TYPE_LINKED_LIST = 0x0, + MLXCX_WORKQ_TYPE_CYCLIC = 0x1, + MLXCX_WORKQ_TYPE_LINKED_LIST_STRIDING = 0x2, + MLXCX_WORKQ_TYPE_CYCLIC_STRIDING = 0x3 +} mlxcx_workq_ctx_type_t; + +typedef enum { + MLXCX_WORKQ_END_PAD_NONE = 0x0, + MLXCX_WORKQ_END_PAD_ALIGN = 0x1 +} mlxcx_workq_end_padding_t; + +/* CSTYLED */ +#define MLXCX_WORKQ_CTX_TYPE (bitdef_t){ \ + .bit_shift = 28, \ + .bit_mask = 0xf0000000 } +#define MLXCX_WORKQ_CTX_SIGNATURE (1 << 27) +#define MLXCX_WORKQ_CTX_CD_SLAVE (1 << 24) +/* CSTYLED */ +#define MLXCX_WORKQ_CTX_END_PADDING (bitdef_t){ \ + .bit_shift = 25, \ + .bit_mask = 0x06000000 } + +#define MLXCX_WORKQ_CTX_MAX_ADDRESSES 128 + +typedef struct mlxcx_workq_ctx { + bits32_t mlwqc_flags; + uint8_t mlwqc_rsvd[2]; + uint16be_t mlwqc_lwm; + uint8_t mlwqc_rsvd2; + uint24be_t mlwqc_pd; + uint8_t mlwqc_rsvd3; + uint24be_t mlwqc_uar_page; + uint64be_t mlwqc_dbr_addr; + uint32be_t mlwqc_hw_counter; + uint32be_t mlwqc_sw_counter; + uint8_t mlwqc_rsvd4; + uint8_t mlwqc_log_wq_stride; + uint8_t mlwqc_log_wq_pg_sz; + uint8_t mlwqc_log_wq_sz; + uint8_t mlwqc_rsvd5[2]; + bits16_t mlwqc_strides; + uint8_t mlwqc_rsvd6[152]; + uint64be_t mlwqc_pas[MLXCX_WORKQ_CTX_MAX_ADDRESSES]; +} mlxcx_workq_ctx_t; + +#define MLXCX_RQ_FLAGS_RLKEY (1UL << 31) +#define MLXCX_RQ_FLAGS_SCATTER_FCS (1 << 29) +#define MLXCX_RQ_FLAGS_VLAN_STRIP_DISABLE (1 << 28) +#define MLXCX_RQ_FLAGS_FLUSH_IN_ERROR (1 << 18) +/* CSTYLED */ +#define MLXCX_RQ_MEM_RQ_TYPE (bitdef_t){ \ + .bit_shift = 24, \ + .bit_mask = 0x0f000000 } +/* CSTYLED */ +#define MLXCX_RQ_STATE (bitdef_t){ \ + .bit_shift = 20, \ + .bit_mask = 0x00f00000 } + +typedef struct mlxcx_rq_ctx { + bits32_t mlrqc_flags; + uint8_t mlrqc_rsvd; + uint24be_t mlrqc_user_index; + uint8_t mlrqc_rsvd2; + uint24be_t mlrqc_cqn; + uint8_t mlrqc_counter_set_id; + uint8_t mlrqc_rsvd3[4]; + uint24be_t mlrqc_rmpn; + uint8_t mlrqc_rsvd4[28]; + mlxcx_workq_ctx_t mlrqc_wq; +} mlxcx_rq_ctx_t; + +#define MLXCX_SQ_FLAGS_RLKEY (1UL << 31) +#define MLXCX_SQ_FLAGS_CD_MASTER (1 << 30) +#define MLXCX_SQ_FLAGS_FRE (1 << 29) +#define MLXCX_SQ_FLAGS_FLUSH_IN_ERROR (1 << 28) +#define MLXCX_SQ_FLAGS_ALLOW_MULTI_PKT (1 << 27) +#define MLXCX_SQ_FLAGS_REG_UMR (1 << 19) + +typedef enum { + MLXCX_ETH_CAP_INLINE_REQUIRE_L2 = 0, + MLXCX_ETH_CAP_INLINE_VPORT_CTX = 1, + MLXCX_ETH_CAP_INLINE_NOT_REQUIRED = 2 +} mlxcx_eth_cap_inline_mode_t; + +typedef enum { + MLXCX_ETH_INLINE_NONE = 0, + MLXCX_ETH_INLINE_L2 = 1, + MLXCX_ETH_INLINE_L3 = 2, + MLXCX_ETH_INLINE_L4 = 3, + MLXCX_ETH_INLINE_INNER_L2 = 5, + MLXCX_ETH_INLINE_INNER_L3 = 6, + MLXCX_ETH_INLINE_INNER_L4 = 7 +} mlxcx_eth_inline_mode_t; + +/* CSTYLED */ +#define MLXCX_SQ_MIN_WQE_INLINE (bitdef_t){ \ + .bit_shift = 24, \ + .bit_mask = 0x07000000 } +/* CSTYLED */ +#define MLXCX_SQ_STATE (bitdef_t){ \ + .bit_shift = 20, \ + .bit_mask = 0x00f00000 } + +typedef struct mlxcx_sq_ctx { + bits32_t mlsqc_flags; + uint8_t mlsqc_rsvd; + uint24be_t mlsqc_user_index; + uint8_t mlsqc_rsvd2; + uint24be_t mlsqc_cqn; + uint8_t mlsqc_rsvd3[18]; + uint16be_t mlsqc_packet_pacing_rate_limit_index; + uint16be_t mlsqc_tis_lst_sz; + uint8_t mlsqc_rsvd4[11]; + uint24be_t mlsqc_tis_num; + mlxcx_workq_ctx_t mlsqc_wq; +} mlxcx_sq_ctx_t; + +#define MLXCX_NIC_VPORT_CTX_MAX_ADDRESSES 64 + +typedef enum { + MLXCX_VPORT_PROMISC_UCAST = 1 << 15, + MLXCX_VPORT_PROMISC_MCAST = 1 << 14, + MLXCX_VPORT_PROMISC_ALL = 1 << 13 +} mlxcx_nic_vport_ctx_promisc_t; + +#define MLXCX_VPORT_LIST_TYPE_MASK 0x07 +#define MLXCX_VPORT_LIST_TYPE_SHIFT 0 + +/* CSTYLED */ +#define MLXCX_VPORT_CTX_MIN_WQE_INLINE (bitdef_t){56, 0x0700000000000000} + +typedef struct { + bits64_t mlnvc_flags; + uint8_t mlnvc_rsvd[28]; + uint8_t mlnvc_rsvd2[2]; + uint16be_t mlnvc_mtu; + uint64be_t mlnvc_system_image_guid; + uint64be_t mlnvc_port_guid; + uint64be_t mlnvc_node_guid; + uint8_t mlnvc_rsvd3[40]; + uint16be_t mlnvc_qkey_violation_counter; + uint8_t mlnvc_rsvd4[2]; + uint8_t mlnvc_rsvd5[132]; + bits16_t mlnvc_promisc_list_type; + uint16be_t mlnvc_allowed_list_size; + uint8_t mlnvc_rsvd6[2]; + uint8_t mlnvc_permanent_address[6]; + uint8_t mlnvc_rsvd7[4]; + uint64be_t mlnvc_address[MLXCX_NIC_VPORT_CTX_MAX_ADDRESSES]; +} mlxcx_nic_vport_ctx_t; + +typedef struct { + uint8_t mlftc_flags; + uint8_t mlftc_level; + uint8_t mlftc_rsvd; + uint8_t mlftc_log_size; + uint8_t mlftc_rsvd2; + uint24be_t mlftc_table_miss_id; + uint8_t mlftc_rsvd3[4]; + uint8_t mlftc_rsvd4[28]; +} mlxcx_flow_table_ctx_t; + +/* CSTYLED */ +#define MLXCX_FLOW_HDR_FIRST_VID (bitdef_t){0, 0x07ff} +/* CSTYLED */ +#define MLXCX_FLOW_HDR_FIRST_PRIO (bitdef_t){13,0x7000} +#define MLXCX_FLOW_HDR_FIRST_CFI (1 << 12) + +#define MLXCX_FLOW_HDR_IP_DSCP_SHIFT 18 +#define MLXCX_FLOW_HDR_IP_DSCP_MASK 0xfc0000 +#define MLXCX_FLOW_HDR_IP_ECN_SHIFT 16 +#define MLXCX_FLOW_HDR_IP_ECN_MASK 0x030000 +#define MLXCX_FLOW_HDR_CVLAN_TAG (1 << 15) +#define MLXCX_FLOW_HDR_SVLAN_TAG (1 << 14) +#define MLXCX_FLOW_HDR_FRAG (1 << 13) +/* CSTYLED */ +#define MLXCX_FLOW_HDR_IP_VERSION (bitdef_t){ \ + .bit_shift = 9, \ + .bit_mask = 0x001e00 } +/* CSTYLED */ +#define MLXCX_FLOW_HDR_TCP_FLAGS (bitdef_t){ \ + .bit_shift = 0, \ + .bit_mask = 0x0001ff } + +typedef struct { + uint8_t mlfh_smac[6]; + uint16be_t mlfh_ethertype; + uint8_t mlfh_dmac[6]; + bits16_t mlfh_first_vid_flags; + uint8_t mlfh_ip_protocol; + bits24_t mlfh_tcp_ip_flags; + uint16be_t mlfh_tcp_sport; + uint16be_t mlfh_tcp_dport; + uint8_t mlfh_rsvd[3]; + uint8_t mlfh_ip_ttl_hoplimit; + uint16be_t mlfh_udp_sport; + uint16be_t mlfh_udp_dport; + uint8_t mlfh_src_ip[16]; + uint8_t mlfh_dst_ip[16]; +} mlxcx_flow_header_match_t; + +typedef struct { + uint8_t mlfp_rsvd; + uint24be_t mlfp_source_sqn; + uint8_t mlfp_rsvd2[2]; + uint16be_t mlfp_source_port; + bits16_t mlfp_outer_second_vid_flags; + bits16_t mlfp_inner_second_vid_flags; + bits16_t mlfp_vlan_flags; + uint16be_t mlfp_gre_protocol; + uint32be_t mlfp_gre_key; + uint24be_t mlfp_vxlan_vni; + uint8_t mlfp_rsvd3; + uint8_t mlfp_rsvd4[4]; + uint8_t mlfp_rsvd5; + uint24be_t mlfp_outer_ipv6_flow_label; + uint8_t mlfp_rsvd6; + uint24be_t mlfp_inner_ipv6_flow_label; + uint8_t mlfp_rsvd7[28]; +} mlxcx_flow_params_match_t; + +typedef struct { + mlxcx_flow_header_match_t mlfm_outer_headers; + mlxcx_flow_params_match_t mlfm_misc_parameters; + mlxcx_flow_header_match_t mlfm_inner_headers; + uint8_t mlfm_rsvd[320]; +} mlxcx_flow_match_t; + +#define MLXCX_FLOW_MAX_DESTINATIONS 64 +typedef enum { + MLXCX_FLOW_DEST_VPORT = 0x0, + MLXCX_FLOW_DEST_FLOW_TABLE = 0x1, + MLXCX_FLOW_DEST_TIR = 0x2, + MLXCX_FLOW_DEST_QP = 0x3 +} mlxcx_flow_destination_type_t; + +typedef struct { + uint8_t mlfd_destination_type; + uint24be_t mlfd_destination_id; + uint8_t mlfd_rsvd[4]; +} mlxcx_flow_dest_t; + +typedef enum { + MLXCX_FLOW_ACTION_ALLOW = 1 << 0, + MLXCX_FLOW_ACTION_DROP = 1 << 1, + MLXCX_FLOW_ACTION_FORWARD = 1 << 2, + MLXCX_FLOW_ACTION_COUNT = 1 << 3, + MLXCX_FLOW_ACTION_ENCAP = 1 << 4, + MLXCX_FLOW_ACTION_DECAP = 1 << 5 +} mlxcx_flow_action_t; + +typedef struct { + uint8_t mlfec_rsvd[4]; + uint32be_t mlfec_group_id; + uint8_t mlfec_rsvd2; + uint24be_t mlfec_flow_tag; + uint8_t mlfec_rsvd3[2]; + uint16be_t mlfec_action; + uint8_t mlfec_rsvd4; + uint24be_t mlfec_destination_list_size; + uint8_t mlfec_rsvd5; + uint24be_t mlfec_flow_counter_list_size; + uint32be_t mlfec_encap_id; + uint8_t mlfec_rsvd6[36]; + mlxcx_flow_match_t mlfec_match_value; + uint8_t mlfec_rsvd7[192]; + mlxcx_flow_dest_t mlfec_destination[MLXCX_FLOW_MAX_DESTINATIONS]; +} mlxcx_flow_entry_ctx_t; + +/* CSTYLED */ +#define MLXCX_TIR_CTX_DISP_TYPE (bitdef_t){ 4, 0xf0 } +typedef enum { + MLXCX_TIR_DIRECT = 0x0, + MLXCX_TIR_INDIRECT = 0x1, +} mlxcx_tir_type_t; + +/* CSTYLED */ +#define MLXCX_TIR_LRO_TIMEOUT (bitdef_t){ 12, 0x0ffff000 } +/* CSTYLED */ +#define MLXCX_TIR_LRO_ENABLE_MASK (bitdef_t){ 8, 0x00000f00 } +/* CSTYLED */ +#define MLXCX_TIR_LRO_MAX_MSG_SZ (bitdef_t){ 0, 0x000000ff } + +/* CSTYLED */ +#define MLXCX_TIR_RX_HASH_FN (bitdef_t){ 4, 0xf0 } +typedef enum { + MLXCX_TIR_HASH_NONE = 0x0, + MLXCX_TIR_HASH_XOR8 = 0x1, + MLXCX_TIR_HASH_TOEPLITZ = 0x2 +} mlxcx_tir_hash_fn_t; +#define MLXCX_TIR_LB_UNICAST (1 << 24) +#define MLXCX_TIR_LB_MULTICAST (1 << 25) + +/* CSTYLED */ +#define MLXCX_RX_HASH_L3_TYPE (bitdef_t){ 31, 0x80000000 } +typedef enum { + MLXCX_RX_HASH_L3_IPv4 = 0, + MLXCX_RX_HASH_L3_IPv6 = 1 +} mlxcx_tir_rx_hash_l3_type_t; +/* CSTYLED */ +#define MLXCX_RX_HASH_L4_TYPE (bitdef_t){ 30, 0x40000000 } +typedef enum { + MLXCX_RX_HASH_L4_TCP = 0, + MLXCX_RX_HASH_L4_UDP = 1 +} mlxcx_tir_rx_hash_l4_type_t; +/* CSTYLED */ +#define MLXCX_RX_HASH_FIELDS (bitdef_t){ 0, 0x3fffffff } +typedef enum { + MLXCX_RX_HASH_SRC_IP = 1 << 0, + MLXCX_RX_HASH_DST_IP = 1 << 1, + MLXCX_RX_HASH_L4_SPORT = 1 << 2, + MLXCX_RX_HASH_L4_DPORT = 1 << 3, + MLXCX_RX_HASH_IPSEC_SPI = 1 << 4 +} mlxcx_tir_rx_hash_fields_t; + +typedef struct { + uint8_t mltirc_rsvd[4]; + bits8_t mltirc_disp_type; + uint8_t mltirc_rsvd2[11]; + bits32_t mltirc_lro; + uint8_t mltirc_rsvd3[9]; + uint24be_t mltirc_inline_rqn; + bits8_t mltirc_flags; + uint24be_t mltirc_indirect_table; + bits8_t mltirc_hash_lb; + uint24be_t mltirc_transport_domain; + uint8_t mltirc_rx_hash_toeplitz_key[40]; + bits32_t mltirc_rx_hash_fields_outer; + bits32_t mltirc_rx_hash_fields_inner; + uint8_t mltirc_rsvd4[152]; +} mlxcx_tir_ctx_t; + +typedef struct { + uint8_t mltisc_rsvd; + uint8_t mltisc_prio_or_sl; + uint8_t mltisc_rsvd2[35]; + uint24be_t mltisc_transport_domain; + uint8_t mltisc_rsvd3[120]; +} mlxcx_tis_ctx_t; + +#define MLXCX_RQT_MAX_RQ_REFS 64 + +typedef struct { + uint8_t mlrqtr_rsvd; + uint24be_t mlrqtr_rqn; +} mlxcx_rqtable_rq_ref_t; + +typedef struct { + uint8_t mlrqtc_rsvd[22]; + uint16be_t mlrqtc_max_size; + uint8_t mlrqtc_rsvd2[2]; + uint16be_t mlrqtc_actual_size; + uint8_t mlrqtc_rsvd3[212]; + mlxcx_rqtable_rq_ref_t mlrqtc_rqref[MLXCX_RQT_MAX_RQ_REFS]; +} mlxcx_rqtable_ctx_t; + +#pragma pack() + +typedef enum { + MLXCX_EVENT_COMPLETION = 0x00, + MLXCX_EVENT_PATH_MIGRATED = 0x01, + MLXCX_EVENT_COMM_ESTABLISH = 0x02, + MLXCX_EVENT_SENDQ_DRAIN = 0x03, + MLXCX_EVENT_LAST_WQE = 0x13, + MLXCX_EVENT_SRQ_LIMIT = 0x14, + MLXCX_EVENT_DCT_ALL_CLOSED = 0x1C, + MLXCX_EVENT_DCT_ACCKEY_VIOL = 0x1D, + MLXCX_EVENT_CQ_ERROR = 0x04, + MLXCX_EVENT_WQ_CATASTROPHE = 0x05, + MLXCX_EVENT_PATH_MIGRATE_FAIL = 0x07, + MLXCX_EVENT_PAGE_FAULT = 0x0C, + MLXCX_EVENT_WQ_INVALID_REQ = 0x10, + MLXCX_EVENT_WQ_ACCESS_VIOL = 0x11, + MLXCX_EVENT_SRQ_CATASTROPHE = 0x12, + MLXCX_EVENT_INTERNAL_ERROR = 0x08, + MLXCX_EVENT_PORT_STATE = 0x09, + MLXCX_EVENT_GPIO = 0x15, + MLXCX_EVENT_PORT_MODULE = 0x16, + MLXCX_EVENT_TEMP_WARNING = 0x17, + MLXCX_EVENT_REMOTE_CONFIG = 0x19, + MLXCX_EVENT_DCBX_CHANGE = 0x1E, + MLXCX_EVENT_DOORBELL_CONGEST = 0x1A, + MLXCX_EVENT_STALL_VL = 0x1B, + MLXCX_EVENT_CMD_COMPLETION = 0x0A, + MLXCX_EVENT_PAGE_REQUEST = 0x0B, + MLXCX_EVENT_NIC_VPORT = 0x0D, + MLXCX_EVENT_EC_PARAMS_CHANGE = 0x0E, + MLXCX_EVENT_XRQ_ERROR = 0x18 +} mlxcx_event_t; + +typedef enum { + MLXCX_CMD_R_OK = 0x00, + MLXCX_CMD_R_INTERNAL_ERR = 0x01, + MLXCX_CMD_R_BAD_OP = 0x02, + MLXCX_CMD_R_BAD_PARAM = 0x03, + MLXCX_CMD_R_BAD_SYS_STATE = 0x04, + MLXCX_CMD_R_BAD_RESOURCE = 0x05, + MLXCX_CMD_R_RESOURCE_BUSY = 0x06, + MLXCX_CMD_R_EXCEED_LIM = 0x08, + MLXCX_CMD_R_BAD_RES_STATE = 0x09, + MLXCX_CMD_R_BAD_INDEX = 0x0a, + MLXCX_CMD_R_NO_RESOURCES = 0x0f, + MLXCX_CMD_R_BAD_INPUT_LEN = 0x50, + MLXCX_CMD_R_BAD_OUTPUT_LEN = 0x51, + MLXCX_CMD_R_BAD_RESOURCE_STATE = 0x10, + MLXCX_CMD_R_BAD_PKT = 0x30, + MLXCX_CMD_R_BAD_SIZE = 0x40, + MLXCX_CMD_R_TIMEOUT = 0xFF +} mlxcx_cmd_ret_t; + +typedef enum { + MLXCX_OP_QUERY_HCA_CAP = 0x100, + MLXCX_OP_QUERY_ADAPTER = 0x101, + MLXCX_OP_INIT_HCA = 0x102, + MLXCX_OP_TEARDOWN_HCA = 0x103, + MLXCX_OP_ENABLE_HCA = 0x104, + MLXCX_OP_DISABLE_HCA = 0x105, + MLXCX_OP_QUERY_PAGES = 0x107, + MLXCX_OP_MANAGE_PAGES = 0x108, + MLXCX_OP_SET_HCA_CAP = 0x109, + MLXCX_OP_QUERY_ISSI = 0x10A, + MLXCX_OP_SET_ISSI = 0x10B, + MLXCX_OP_SET_DRIVER_VERSION = 0x10D, + MLXCX_OP_QUERY_OTHER_HCA_CAP = 0x10E, + MLXCX_OP_MODIFY_OTHER_HCA_CAP = 0x10F, + MLXCX_OP_SET_TUNNELED_OPERATIONS = 0x110, + MLXCX_OP_CREATE_MKEY = 0x200, + MLXCX_OP_QUERY_MKEY = 0x201, + MLXCX_OP_DESTROY_MKEY = 0x202, + MLXCX_OP_QUERY_SPECIAL_CONTEXTS = 0x203, + MLXCX_OP_PAGE_FAULT_RESUME = 0x204, + MLXCX_OP_CREATE_EQ = 0x301, + MLXCX_OP_DESTROY_EQ = 0x302, + MLXCX_OP_QUERY_EQ = 0x303, + MLXCX_OP_GEN_EQE = 0x304, + MLXCX_OP_CREATE_CQ = 0x400, + MLXCX_OP_DESTROY_CQ = 0x401, + MLXCX_OP_QUERY_CQ = 0x402, + MLXCX_OP_MODIFY_CQ = 0x403, + MLXCX_OP_CREATE_QP = 0x500, + MLXCX_OP_DESTROY_QP = 0x501, + MLXCX_OP_RST2INIT_QP = 0x502, + MLXCX_OP_INIT2RTR_QP = 0x503, + MLXCX_OP_RTR2RTS_QP = 0x504, + MLXCX_OP_RTS2RTS_QP = 0x505, + MLXCX_OP_SQERR2RTS_QP = 0x506, + MLXCX_OP__2ERR_QP = 0x507, + MLXCX_OP__2RST_QP = 0x50A, + MLXCX_OP_QUERY_QP = 0x50B, + MLXCX_OP_SQD_RTS_QP = 0x50C, + MLXCX_OP_INIT2INIT_QP = 0x50E, + MLXCX_OP_CREATE_PSV = 0x600, + MLXCX_OP_DESTROY_PSV = 0x601, + MLXCX_OP_CREATE_SRQ = 0x700, + MLXCX_OP_DESTROY_SRQ = 0x701, + MLXCX_OP_QUERY_SRQ = 0x702, + MLXCX_OP_ARM_RQ = 0x703, + MLXCX_OP_CREATE_XRC_SRQ = 0x705, + MLXCX_OP_DESTROY_XRC_SRQ = 0x706, + MLXCX_OP_QUERY_XRC_SRQ = 0x707, + MLXCX_OP_ARM_XRC_SRQ = 0x708, + MLXCX_OP_CREATE_DCT = 0x710, + MLXCX_OP_DESTROY_DCT = 0x711, + MLXCX_OP_DRAIN_DCT = 0x712, + MLXCX_OP_QUERY_DCT = 0x713, + MLXCX_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714, + MLXCX_OP_CREATE_XRQ = 0x717, + MLXCX_OP_DESTROY_XRQ = 0x718, + MLXCX_OP_QUERY_XRQ = 0x719, + MLXCX_OP_CREATE_NVMF_BACKEND_CONTROLLER = 0x720, + MLXCX_OP_DESTROY_NVMF_BACKEND_CONTROLLER = 0x721, + MLXCX_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722, + MLXCX_OP_ATTACH_NVMF_NAMESPACE = 0x723, + MLXCX_OP_DETACH_NVMF_NAMESPACE = 0x724, + MLXCX_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, + MLXCX_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, + MLXCX_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLXCX_OP_QUERY_VPORT_STATE = 0x750, + MLXCX_OP_MODIFY_VPORT_STATE = 0x751, + MLXCX_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, + MLXCX_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753, + MLXCX_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, + MLXCX_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, + MLXCX_OP_QUERY_ROCE_ADDRESS = 0x760, + MLXCX_OP_SET_ROCE_ADDRESS = 0x761, + MLXCX_OP_QUERY_HCA_VPORT_CONTEXT = 0x762, + MLXCX_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, + MLXCX_OP_QUERY_HCA_VPORT_GID = 0x764, + MLXCX_OP_QUERY_HCA_VPORT_PKEY = 0x765, + MLXCX_OP_QUERY_VPORT_COUNTER = 0x770, + MLXCX_OP_ALLOC_Q_COUNTER = 0x771, + MLXCX_OP_DEALLOC_Q_COUNTER = 0x772, + MLXCX_OP_QUERY_Q_COUNTER = 0x773, + MLXCX_OP_SET_PP_RATE_LIMIT = 0x780, + MLXCX_OP_QUERY_PP_RATE_LIMIT = 0x781, + MLXCX_OP_ALLOC_PD = 0x800, + MLXCX_OP_DEALLOC_PD = 0x801, + MLXCX_OP_ALLOC_UAR = 0x802, + MLXCX_OP_DEALLOC_UAR = 0x803, + MLXCX_OP_CONFIG_INT_MODERATION = 0x804, + MLXCX_OP_ACCESS_REG = 0x805, + MLXCX_OP_ATTACH_TO_MCG = 0x806, + MLXCX_OP_DETACH_FROM_MCG = 0x807, + MLXCX_OP_MAD_IFC = 0x50D, + MLXCX_OP_QUERY_MAD_DEMUX = 0x80B, + MLXCX_OP_SET_MAD_DEMUX = 0x80C, + MLXCX_OP_NOP = 0x80D, + MLXCX_OP_ALLOC_XRCD = 0x80E, + MLXCX_OP_DEALLOC_XRCD = 0x80F, + MLXCX_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, + MLXCX_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, + MLXCX_OP_QUERY_CONG_STATUS = 0x822, + MLXCX_OP_MODIFY_CONG_STATUS = 0x823, + MLXCX_OP_QUERY_CONG_PARAMS = 0x824, + MLXCX_OP_MODIFY_CONG_PARAMS = 0x825, + MLXCX_OP_QUERY_CONG_STATISTICS = 0x826, + MLXCX_OP_ADD_VXLAN_UDP_DPORT = 0x827, + MLXCX_OP_DELETE_VXLAN_UDP_DPORT = 0x828, + MLXCX_OP_SET_L2_TABLE_ENTRY = 0x829, + MLXCX_OP_QUERY_L2_TABLE_ENTRY = 0x82A, + MLXCX_OP_DELETE_L2_TABLE_ENTRY = 0x82B, + MLXCX_OP_SET_WOL_ROL = 0x830, + MLXCX_OP_QUERY_WOL_ROL = 0x831, + MLXCX_OP_CREATE_TIR = 0x900, + MLXCX_OP_MODIFY_TIR = 0x901, + MLXCX_OP_DESTROY_TIR = 0x902, + MLXCX_OP_QUERY_TIR = 0x903, + MLXCX_OP_CREATE_SQ = 0x904, + MLXCX_OP_MODIFY_SQ = 0x905, + MLXCX_OP_DESTROY_SQ = 0x906, + MLXCX_OP_QUERY_SQ = 0x907, + MLXCX_OP_CREATE_RQ = 0x908, + MLXCX_OP_MODIFY_RQ = 0x909, + MLXCX_OP_DESTROY_RQ = 0x90A, + MLXCX_OP_QUERY_RQ = 0x90B, + MLXCX_OP_CREATE_RMP = 0x90C, + MLXCX_OP_MODIFY_RMP = 0x90D, + MLXCX_OP_DESTROY_RMP = 0x90E, + MLXCX_OP_QUERY_RMP = 0x90F, + MLXCX_OP_CREATE_TIS = 0x912, + MLXCX_OP_MODIFY_TIS = 0x913, + MLXCX_OP_DESTROY_TIS = 0x914, + MLXCX_OP_QUERY_TIS = 0x915, + MLXCX_OP_CREATE_RQT = 0x916, + MLXCX_OP_MODIFY_RQT = 0x917, + MLXCX_OP_DESTROY_RQT = 0x918, + MLXCX_OP_QUERY_RQT = 0x919, + MLXCX_OP_SET_FLOW_TABLE_ROOT = 0x92f, + MLXCX_OP_CREATE_FLOW_TABLE = 0x930, + MLXCX_OP_DESTROY_FLOW_TABLE = 0x931, + MLXCX_OP_QUERY_FLOW_TABLE = 0x932, + MLXCX_OP_CREATE_FLOW_GROUP = 0x933, + MLXCX_OP_DESTROY_FLOW_GROUP = 0x934, + MLXCX_OP_QUERY_FLOW_GROUP = 0x935, + MLXCX_OP_SET_FLOW_TABLE_ENTRY = 0x936, + MLXCX_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, + MLXCX_OP_DELETE_FLOW_TABLE_ENTRY = 0x938, + MLXCX_OP_ALLOC_FLOW_COUNTER = 0x939, + MLXCX_OP_DEALLOC_FLOW_COUNTER = 0x93a, + MLXCX_OP_QUERY_FLOW_COUNTER = 0x93b, + MLXCX_OP_MODIFY_FLOW_TABLE = 0x93c, + MLXCX_OP_ALLOC_ENCAP_HEADER = 0x93d, + MLXCX_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLXCX_OP_QUERY_ENCAP_HEADER = 0x93f +} mlxcx_cmd_op_t; + +/* + * Definitions for relevant commands + */ +#pragma pack(1) +typedef struct { + uint16be_t mci_opcode; + uint8_t mci_rsvd[4]; + uint16be_t mci_op_mod; +} mlxcx_cmd_in_t; + +typedef struct { + uint8_t mco_status; + uint8_t mco_rsvd[3]; + uint32be_t mco_syndrome; +} mlxcx_cmd_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_enable_hca_head; + uint8_t mlxi_enable_hca_rsvd[2]; + uint16be_t mlxi_enable_hca_func; + uint8_t mlxi_enable_hca_rsvd1[4]; +} mlxcx_cmd_enable_hca_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_enable_hca_head; + uint8_t mlxo_enable_hca_rsvd[8]; +} mlxcx_cmd_enable_hca_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_disable_hca_head; + uint8_t mlxi_disable_hca_rsvd[2]; + uint16be_t mlxi_disable_hca_func; + uint8_t mlxi_disable_hca_rsvd1[4]; +} mlxcx_cmd_disable_hca_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_disable_hca_head; + uint8_t mlxo_disable_hca_rsvd[8]; +} mlxcx_cmd_disable_hca_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_issi_head; + uint8_t mlxi_query_issi_rsvd[8]; +} mlxcx_cmd_query_issi_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_issi_head; + uint8_t mlxo_query_issi_rsv[2]; + uint16be_t mlxo_query_issi_current; + uint8_t mlxo_query_issi_rsvd1[20]; + /* + * To date we only support version 1 of the ISSI. The last byte has the + * ISSI data that we care about, therefore we phrase the struct this + * way. + */ + uint8_t mlxo_query_issi_rsvd2[79]; + uint8_t mlxo_supported_issi; +} mlxcx_cmd_query_issi_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_set_issi_head; + uint8_t mlxi_set_issi_rsvd[2]; + uint16be_t mlxi_set_issi_current; + uint8_t mlxi_set_iss_rsvd1[4]; +} mlxcx_cmd_set_issi_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_set_issi_head; + uint8_t mlxo_set_issi_rsvd[8]; +} mlxcx_cmd_set_issi_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_init_hca_head; + uint8_t mlxi_init_hca_rsvd[8]; +} mlxcx_cmd_init_hca_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_init_hca_head; + uint8_t mlxo_init_hca_rsvd[8]; +} mlxcx_cmd_init_hca_out_t; + +#define MLXCX_TEARDOWN_HCA_GRACEFUL 0x00 +#define MLXCX_TEARDOWN_HCA_FORCE 0x01 + +typedef struct { + mlxcx_cmd_in_t mlxi_teardown_hca_head; + uint8_t mlxi_teardown_hca_rsvd[2]; + uint16be_t mlxi_teardown_hca_profile; + uint8_t mlxi_teardown_hca_rsvd1[4]; +} mlxcx_cmd_teardown_hca_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_teardown_hca_head; + uint8_t mlxo_teardown_hca_rsvd[7]; + uint8_t mlxo_teardown_hca_state; +} mlxcx_cmd_teardown_hca_out_t; + +#define MLXCX_QUERY_PAGES_OPMOD_BOOT 0x01 +#define MLXCX_QUERY_PAGES_OPMOD_INIT 0x02 +#define MLXCX_QUERY_PAGES_OPMOD_REGULAR 0x03 + +typedef struct { + mlxcx_cmd_in_t mlxi_query_pages_head; + uint8_t mlxi_query_pages_rsvd[2]; + uint16be_t mlxi_query_pages_func; + uint8_t mlxi_query_pages_rsvd1[4]; +} mlxcx_cmd_query_pages_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_pages_head; + uint8_t mlxo_query_pages_rsvd[2]; + uint16be_t mlxo_query_pages_func; + uint32be_t mlxo_query_pages_npages; +} mlxcx_cmd_query_pages_out_t; + +#define MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL 0x00 +#define MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES 0x01 +#define MLXCX_MANAGE_PAGES_OPMOD_RETURN_PAGES 0x02 + +/* + * This is an artificial limit that we're imposing on our actions. + */ +#define MLXCX_MANAGE_PAGES_MAX_PAGES 512 + +typedef struct { + mlxcx_cmd_in_t mlxi_manage_pages_head; + uint8_t mlxi_manage_pages_rsvd[2]; + uint16be_t mlxi_manage_pages_func; + uint32be_t mlxi_manage_pages_npages; + uint64be_t mlxi_manage_pages_pas[MLXCX_MANAGE_PAGES_MAX_PAGES]; +} mlxcx_cmd_manage_pages_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_manage_pages_head; + uint32be_t mlxo_manage_pages_npages; + uint8_t mlxo_manage_pages_rsvd[4]; + uint64be_t mlxo_manage_pages_pas[MLXCX_MANAGE_PAGES_MAX_PAGES]; +} mlxcx_cmd_manage_pages_out_t; + +typedef enum { + MLXCX_HCA_CAP_MODE_MAX = 0x0, + MLXCX_HCA_CAP_MODE_CURRENT = 0x1 +} mlxcx_hca_cap_mode_t; + +typedef enum { + MLXCX_HCA_CAP_GENERAL = 0x0, + MLXCX_HCA_CAP_ETHERNET = 0x1, + MLXCX_HCA_CAP_ODP = 0x2, + MLXCX_HCA_CAP_ATOMIC = 0x3, + MLXCX_HCA_CAP_ROCE = 0x4, + MLXCX_HCA_CAP_IPoIB = 0x5, + MLXCX_HCA_CAP_NIC_FLOW = 0x7, + MLXCX_HCA_CAP_ESWITCH_FLOW = 0x8, + MLXCX_HCA_CAP_ESWITCH = 0x9, + MLXCX_HCA_CAP_VECTOR = 0xb, + MLXCX_HCA_CAP_QoS = 0xc, + MLXCX_HCA_CAP_NVMEoF = 0xe +} mlxcx_hca_cap_type_t; + +typedef enum { + MLXCX_CAP_GENERAL_PORT_TYPE_IB = 0x0, + MLXCX_CAP_GENERAL_PORT_TYPE_ETHERNET = 0x1, +} mlxcx_hca_cap_general_port_type_t; + +typedef enum { + MLXCX_CAP_GENERAL_FLAGS_C_ESW_FLOW_TABLE = (1 << 8), + MLXCX_CAP_GENERAL_FLAGS_C_NIC_FLOW_TABLE = (1 << 9), +} mlxcx_hca_cap_general_flags_c_t; + +typedef struct { + uint8_t mlcap_general_access_other_hca_roce; + uint8_t mlcap_general_rsvd[3]; + + uint8_t mlcap_general_rsvd2[12]; + + uint8_t mlcap_general_log_max_srq_sz; + uint8_t mlcap_general_log_max_qp_sz; + uint8_t mlcap_general_rsvd3[1]; + uint8_t mlcap_general_log_max_qp; + + uint8_t mlcap_general_rsvd4[1]; + uint8_t mlcap_general_log_max_srq; + uint8_t mlcap_general_rsvd5[2]; + + uint8_t mlcap_general_rsvd6[1]; + uint8_t mlcap_general_log_max_cq_sz; + uint8_t mlcap_general_rsvd7[1]; + uint8_t mlcap_general_log_max_cq; + + uint8_t mlcap_general_log_max_eq_sz; + uint8_t mlcap_general_log_max_mkey_flags; + uint8_t mlcap_general_rsvd8[1]; + uint8_t mlcap_general_log_max_eq; + + uint8_t mlcap_general_max_indirection; + uint8_t mlcap_general_log_max_mrw_sz_flags; + uint8_t mlcap_general_log_max_bsf_list_size_flags; + uint8_t mlcap_general_log_max_klm_list_size_flags; + + uint8_t mlcap_general_rsvd9[1]; + uint8_t mlcap_general_log_max_ra_req_dc; + uint8_t mlcap_general_rsvd10[1]; + uint8_t mlcap_general_log_max_ra_res_dc; + + uint8_t mlcap_general_rsvd11[1]; + uint8_t mlcap_general_log_max_ra_req_qp; + uint8_t mlcap_general_rsvd12[1]; + uint8_t mlcap_general_log_max_ra_res_qp; + + uint16be_t mlcap_general_flags_a; + uint16be_t mlcap_general_gid_table_size; + + bits16_t mlcap_general_flags_b; + uint16be_t mlcap_general_pkey_table_size; + + bits16_t mlcap_general_flags_c; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcap_general_flags_d:6; + uint8_t mlcap_general_port_type:2; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcap_general_port_type:2; + uint8_t mlcap_general_flags_d:6; +#endif + }; + uint8_t mlcap_general_num_ports; + + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcap_general_rsvd13:3; + uint8_t mlcap_general_log_max_msg:5; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcap_general_log_max_msg:5; + uint8_t mlcap_general_rsvd13:3; +#endif + }; + uint8_t mlcap_general_max_tc; + bits16_t mlcap_general_flags_d_wol; + + uint16be_t mlcap_general_state_rate_support; + uint8_t mlcap_general_rsvd14[1]; + struct { +#if defined(_BIT_FIELDS_HTOL) + uint8_t mlcap_general_rsvd15:4; + uint8_t mlcap_general_cqe_version:4; +#elif defined(_BIT_FIELDS_LTOH) + uint8_t mlcap_general_cqe_version:4; + uint8_t mlcap_general_rsvd15:4; +#endif + }; + + uint32be_t mlcap_general_flags_e; + + uint32be_t mlcap_general_flags_f; + + uint8_t mlcap_general_rsvd16[1]; + uint8_t mlcap_general_uar_sz; + uint8_t mlcap_general_cnak; + uint8_t mlcap_general_log_pg_sz; + uint8_t mlcap_general_rsvd17[32]; + bits8_t mlcap_general_log_max_rq_flags; + uint8_t mlcap_general_log_max_sq; + uint8_t mlcap_general_log_max_tir; + uint8_t mlcap_general_log_max_tis; +} mlxcx_hca_cap_general_caps_t; + +typedef enum { + MLXCX_ETH_CAP_TUNNEL_STATELESS_VXLAN = 1 << 0, + MLXCX_ETH_CAP_TUNNEL_STATELESS_GRE = 1 << 1, + MLXCX_ETH_CAP_TUNNEL_LSO_CONST_OUT_IP_ID = 1 << 4, + MLXCX_ETH_CAP_SCATTER_FCS = 1 << 6, + MLXCX_ETH_CAP_REG_UMR_SQ = 1 << 7, + MLXCX_ETH_CAP_SELF_LB_UC = 1 << 21, + MLXCX_ETH_CAP_SELF_LB_MC = 1 << 22, + MLXCX_ETH_CAP_SELF_LB_EN_MODIFIABLE = 1 << 23, + MLXCX_ETH_CAP_WQE_VLAN_INSERT = 1 << 24, + MLXCX_ETH_CAP_LRO_TIME_STAMP = 1 << 27, + MLXCX_ETH_CAP_LRO_PSH_FLAG = 1 << 28, + MLXCX_ETH_CAP_LRO_CAP = 1 << 29, + MLXCX_ETH_CAP_VLAN_STRIP = 1 << 30, + MLXCX_ETH_CAP_CSUM_CAP = 1UL << 31 +} mlxcx_hca_eth_cap_flags_t; + +/* CSTYLED */ +#define MLXCX_ETH_CAP_RSS_IND_TBL_CAP (bitdef_t){8, 0x00000f00} +/* CSTYLED */ +#define MLXCX_ETH_CAP_WQE_INLINE_MODE (bitdef_t){12, 0x00003000} +/* CSTYLED */ +#define MLXCX_ETH_CAP_MULTI_PKT_SEND_WQE (bitdef_t){14, 0x0000c000} +/* CSTYLED */ +#define MLXCX_ETH_CAP_MAX_LSO_CAP (bitdef_t){16, 0x001f0000} +/* CSTYLED */ +#define MLXCX_ETH_CAP_LRO_MAX_MSG_SZ_MODE (bitdef_t){25, 0x06000000} + +typedef struct { + bits32_t mlcap_eth_flags; + uint8_t mlcap_eth_rsvd[6]; + uint16be_t mlcap_eth_lro_min_mss_size; + uint8_t mlcap_eth_rsvd2[36]; + uint32be_t mlcap_eth_lro_timer_supported_periods[4]; +} mlxcx_hca_cap_eth_caps_t; + +typedef enum { + MLXCX_FLOW_CAP_PROPS_DECAP = 1 << 23, + MLXCX_FLOW_CAP_PROPS_ENCAP = 1 << 24, + MLXCX_FLOW_CAP_PROPS_MODIFY_TBL = 1 << 25, + MLXCX_FLOW_CAP_PROPS_MISS_TABLE = 1 << 26, + MLXCX_FLOW_CAP_PROPS_MODIFY_ROOT_TBL = 1 << 27, + MLXCX_FLOW_CAP_PROPS_MODIFY = 1 << 28, + MLXCX_FLOW_CAP_PROPS_COUNTER = 1 << 29, + MLXCX_FLOW_CAP_PROPS_TAG = 1 << 30, + MLXCX_FLOW_CAP_PROPS_SUPPORT = 1UL << 31 +} mlxcx_hca_cap_flow_cap_props_flags_t; + +typedef struct { + bits32_t mlcap_flow_prop_flags; + uint8_t mlcap_flow_prop_log_max_ft_size; + uint8_t mlcap_flow_prop_rsvd[2]; + uint8_t mlcap_flow_prop_max_ft_level; + uint8_t mlcap_flow_prop_rsvd2[7]; + uint8_t mlcap_flow_prop_log_max_ft_num; + uint8_t mlcap_flow_prop_rsvd3[2]; + uint8_t mlcap_flow_prop_log_max_flow_counter; + uint8_t mlcap_flow_prop_log_max_destination; + uint8_t mlcap_flow_prop_rsvd4[3]; + uint8_t mlcap_flow_prop_log_max_flow; + uint8_t mlcap_flow_prop_rsvd5[8]; + bits32_t mlcap_flow_prop_support[4]; + bits32_t mlcap_flow_prop_bitmask[4]; +} mlxcx_hca_cap_flow_cap_props_t; + +typedef struct { + bits32_t mlcap_flow_flags; + uint8_t mlcap_flow_rsvd[60]; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx_rdma; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_rx_sniffer; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx_rdma; + mlxcx_hca_cap_flow_cap_props_t mlcap_flow_nic_tx_sniffer; +} mlxcx_hca_cap_flow_caps_t; + +/* + * Size of a buffer that is required to hold the output data. + */ +#define MLXCX_HCA_CAP_SIZE 0x1000 + +typedef struct { + mlxcx_cmd_in_t mlxi_query_hca_cap_head; + uint8_t mlxi_query_hca_cap_rsvd[8]; +} mlxcx_cmd_query_hca_cap_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_hca_cap_head; + uint8_t mlxo_query_hca_cap_rsvd[8]; + uint8_t mlxo_query_hca_cap_data[MLXCX_HCA_CAP_SIZE]; +} mlxcx_cmd_query_hca_cap_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_set_driver_version_head; + uint8_t mlxi_set_driver_version_rsvd[8]; + char mlxi_set_driver_version_version[64]; +} mlxcx_cmd_set_driver_version_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_set_driver_version_head; + uint8_t mlxo_set_driver_version_rsvd[8]; +} mlxcx_cmd_set_driver_version_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_alloc_uar_head; + uint8_t mlxi_alloc_uar_rsvd[8]; +} mlxcx_cmd_alloc_uar_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_alloc_uar_head; + uint8_t mlxo_alloc_uar_rsvd; + uint24be_t mlxo_alloc_uar_uar; + uint8_t mlxo_alloc_uar_rsvd2[4]; +} mlxcx_cmd_alloc_uar_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_dealloc_uar_head; + uint8_t mlxi_dealloc_uar_rsvd; + uint24be_t mlxi_dealloc_uar_uar; + uint8_t mlxi_dealloc_uar_rsvd2[4]; +} mlxcx_cmd_dealloc_uar_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_dealloc_uar_head; + uint8_t mlxo_dealloc_uar_rsvd[8]; +} mlxcx_cmd_dealloc_uar_out_t; + +/* + * This is an artificial limit that we're imposing on our actions. + */ +#define MLXCX_CREATE_QUEUE_MAX_PAGES 128 + +typedef struct { + mlxcx_cmd_in_t mlxi_create_eq_head; + uint8_t mlxi_create_eq_rsvd[8]; + mlxcx_eventq_ctx_t mlxi_create_eq_context; + uint8_t mlxi_create_eq_rsvd2[8]; + uint64be_t mlxi_create_eq_event_bitmask; + uint8_t mlxi_create_eq_rsvd3[176]; + uint64be_t mlxi_create_eq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES]; +} mlxcx_cmd_create_eq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_eq_head; + uint8_t mlxo_create_eq_rsvd[3]; + uint8_t mlxo_create_eq_eqn; + uint8_t mlxo_create_eq_rsvd2[4]; +} mlxcx_cmd_create_eq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_eq_head; + uint8_t mlxi_query_eq_rsvd[3]; + uint8_t mlxi_query_eq_eqn; + uint8_t mlxi_query_eq_rsvd2[4]; +} mlxcx_cmd_query_eq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_eq_head; + uint8_t mlxo_query_eq_rsvd[8]; + mlxcx_eventq_ctx_t mlxo_query_eq_context; + uint8_t mlxi_query_eq_rsvd2[8]; + uint64be_t mlxi_query_eq_event_bitmask; + uint8_t mlxi_query_eq_rsvd3[176]; + uint64be_t mlxi_create_eq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES]; +} mlxcx_cmd_query_eq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_eq_head; + uint8_t mlxi_destroy_eq_rsvd[3]; + uint8_t mlxi_destroy_eq_eqn; + uint8_t mlxi_destroy_eq_rsvd2[4]; +} mlxcx_cmd_destroy_eq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_eq_head; + uint8_t mlxo_destroy_eq_rsvd[8]; +} mlxcx_cmd_destroy_eq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_alloc_pd_head; + uint8_t mlxi_alloc_pd_rsvd[8]; +} mlxcx_cmd_alloc_pd_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_alloc_pd_head; + uint8_t mlxo_alloc_pd_rsvd; + uint24be_t mlxo_alloc_pd_pdn; + uint8_t mlxo_alloc_pd_rsvd2[4]; +} mlxcx_cmd_alloc_pd_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_dealloc_pd_head; + uint8_t mlxi_dealloc_pd_rsvd; + uint24be_t mlxi_dealloc_pd_pdn; + uint8_t mlxi_dealloc_pd_rsvd2[4]; +} mlxcx_cmd_dealloc_pd_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_dealloc_pd_head; + uint8_t mlxo_dealloc_pd_rsvd[8]; +} mlxcx_cmd_dealloc_pd_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_alloc_tdom_head; + uint8_t mlxi_alloc_tdom_rsvd[8]; +} mlxcx_cmd_alloc_tdom_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_alloc_tdom_head; + uint8_t mlxo_alloc_tdom_rsvd; + uint24be_t mlxo_alloc_tdom_tdomn; + uint8_t mlxo_alloc_tdom_rsvd2[4]; +} mlxcx_cmd_alloc_tdom_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_dealloc_tdom_head; + uint8_t mlxi_dealloc_tdom_rsvd; + uint24be_t mlxi_dealloc_tdom_tdomn; + uint8_t mlxi_dealloc_tdom_rsvd2[4]; +} mlxcx_cmd_dealloc_tdom_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_dealloc_tdom_head; + uint8_t mlxo_dealloc_tdom_rsvd[8]; +} mlxcx_cmd_dealloc_tdom_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_tir_head; + uint8_t mlxi_create_tir_rsvd[24]; + mlxcx_tir_ctx_t mlxi_create_tir_context; +} mlxcx_cmd_create_tir_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_tir_head; + uint8_t mlxo_create_tir_rsvd; + uint24be_t mlxo_create_tir_tirn; + uint8_t mlxo_create_tir_rsvd2[4]; +} mlxcx_cmd_create_tir_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_tir_head; + uint8_t mlxi_destroy_tir_rsvd; + uint24be_t mlxi_destroy_tir_tirn; + uint8_t mlxi_destroy_tir_rsvd2[4]; +} mlxcx_cmd_destroy_tir_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_tir_head; + uint8_t mlxo_destroy_tir_rsvd[8]; +} mlxcx_cmd_destroy_tir_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_tis_head; + uint8_t mlxi_create_tis_rsvd[24]; + mlxcx_tis_ctx_t mlxi_create_tis_context; +} mlxcx_cmd_create_tis_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_tis_head; + uint8_t mlxo_create_tis_rsvd; + uint24be_t mlxo_create_tis_tisn; + uint8_t mlxo_create_tis_rsvd2[4]; +} mlxcx_cmd_create_tis_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_tis_head; + uint8_t mlxi_destroy_tis_rsvd; + uint24be_t mlxi_destroy_tis_tisn; + uint8_t mlxi_destroy_tis_rsvd2[4]; +} mlxcx_cmd_destroy_tis_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_tis_head; + uint8_t mlxo_destroy_tis_rsvd[8]; +} mlxcx_cmd_destroy_tis_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_special_ctxs_head; + uint8_t mlxi_query_special_ctxs_rsvd[8]; +} mlxcx_cmd_query_special_ctxs_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_special_ctxs_head; + uint8_t mlxo_query_special_ctxs_rsvd[4]; + uint32be_t mlxo_query_special_ctxs_resd_lkey; + uint32be_t mlxo_query_special_ctxs_null_mkey; + uint8_t mlxo_query_special_ctxs_rsvd2[12]; +} mlxcx_cmd_query_special_ctxs_out_t; + +typedef enum { + MLXCX_VPORT_TYPE_VNIC = 0x0, + MLXCX_VPORT_TYPE_ESWITCH = 0x1, + MLXCX_VPORT_TYPE_UPLINK = 0x2, +} mlxcx_cmd_vport_op_mod_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_nic_vport_ctx_head; + uint8_t mlxi_query_nic_vport_ctx_other_vport; + uint8_t mlxi_query_nic_vport_ctx_rsvd[1]; + uint16be_t mlxi_query_nic_vport_ctx_vport_number; + uint8_t mlxi_query_nic_vport_ctx_allowed_list_type; + uint8_t mlxi_query_nic_vport_ctx_rsvd2[3]; +} mlxcx_cmd_query_nic_vport_ctx_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_nic_vport_ctx_head; + uint8_t mlxo_query_nic_vport_ctx_rsvd[8]; + mlxcx_nic_vport_ctx_t mlxo_query_nic_vport_ctx_context; +} mlxcx_cmd_query_nic_vport_ctx_out_t; + +typedef enum { + MLXCX_MODIFY_NIC_VPORT_CTX_ROCE_EN = 1 << 1, + MLXCX_MODIFY_NIC_VPORT_CTX_ADDR_LIST = 1 << 2, + MLXCX_MODIFY_NIC_VPORT_CTX_PERM_ADDR = 1 << 3, + MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC = 1 << 4, + MLXCX_MODIFY_NIC_VPORT_CTX_EVENT = 1 << 5, + MLXCX_MODIFY_NIC_VPORT_CTX_MTU = 1 << 6, + MLXCX_MODIFY_NIC_VPORT_CTX_WQE_INLINE = 1 << 7, + MLXCX_MODIFY_NIC_VPORT_CTX_PORT_GUID = 1 << 8, + MLXCX_MODIFY_NIC_VPORT_CTX_NODE_GUID = 1 << 9, +} mlxcx_modify_nic_vport_ctx_fields_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_modify_nic_vport_ctx_head; + uint8_t mlxi_modify_nic_vport_ctx_other_vport; + uint8_t mlxi_modify_nic_vport_ctx_rsvd[1]; + uint16be_t mlxi_modify_nic_vport_ctx_vport_number; + uint32be_t mlxi_modify_nic_vport_ctx_field_select; + uint8_t mlxi_modify_nic_vport_ctx_rsvd2[240]; + mlxcx_nic_vport_ctx_t mlxi_modify_nic_vport_ctx_context; +} mlxcx_cmd_modify_nic_vport_ctx_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_modify_nic_vport_ctx_head; + uint8_t mlxo_modify_nic_vport_ctx_rsvd[8]; +} mlxcx_cmd_modify_nic_vport_ctx_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_vport_state_head; + uint8_t mlxi_query_vport_state_other_vport; + uint8_t mlxi_query_vport_state_rsvd[1]; + uint16be_t mlxi_query_vport_state_vport_number; + uint8_t mlxi_query_vport_state_rsvd2[4]; +} mlxcx_cmd_query_vport_state_in_t; + +/* CSTYLED */ +#define MLXCX_VPORT_ADMIN_STATE (bitdef_t){4, 0xF0} +/* CSTYLED */ +#define MLXCX_VPORT_OPER_STATE (bitdef_t){0, 0x0F} + +typedef enum { + MLXCX_VPORT_OPER_STATE_DOWN = 0x0, + MLXCX_VPORT_OPER_STATE_UP = 0x1, +} mlxcx_vport_oper_state_t; + +typedef enum { + MLXCX_VPORT_ADMIN_STATE_DOWN = 0x0, + MLXCX_VPORT_ADMIN_STATE_UP = 0x1, + MLXCX_VPORT_ADMIN_STATE_FOLLOW = 0x2, +} mlxcx_vport_admin_state_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_vport_state_head; + uint8_t mlxo_query_vport_state_rsvd[4]; + uint16be_t mlxo_query_vport_state_max_tx_speed; + uint8_t mlxo_query_vport_state_rsvd2[1]; + uint8_t mlxo_query_vport_state_state; +} mlxcx_cmd_query_vport_state_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_cq_head; + uint8_t mlxi_create_cq_rsvd[8]; + mlxcx_completionq_ctx_t mlxi_create_cq_context; + uint8_t mlxi_create_cq_rsvd2[192]; + uint64be_t mlxi_create_cq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES]; +} mlxcx_cmd_create_cq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_cq_head; + uint8_t mlxo_create_cq_rsvd; + uint24be_t mlxo_create_cq_cqn; + uint8_t mlxo_create_cq_rsvd2[4]; +} mlxcx_cmd_create_cq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_cq_head; + uint8_t mlxi_destroy_cq_rsvd; + uint24be_t mlxi_destroy_cq_cqn; + uint8_t mlxi_destroy_cq_rsvd2[4]; +} mlxcx_cmd_destroy_cq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_cq_head; + uint8_t mlxo_destroy_cq_rsvd[8]; +} mlxcx_cmd_destroy_cq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_cq_head; + uint8_t mlxi_query_cq_rsvd; + uint24be_t mlxi_query_cq_cqn; + uint8_t mlxi_query_cq_rsvd2[4]; +} mlxcx_cmd_query_cq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_cq_head; + uint8_t mlxo_query_cq_rsvd[8]; + mlxcx_completionq_ctx_t mlxo_query_cq_context; + uint8_t mlxo_query_cq_rsvd2[192]; + uint64be_t mlxo_query_cq_pas[MLXCX_CREATE_QUEUE_MAX_PAGES]; +} mlxcx_cmd_query_cq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_rq_head; + uint8_t mlxi_create_rq_rsvd[24]; + mlxcx_rq_ctx_t mlxi_create_rq_context; +} mlxcx_cmd_create_rq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_rq_head; + uint8_t mlxo_create_rq_rsvd; + uint24be_t mlxo_create_rq_rqn; + uint8_t mlxo_create_rq_rsvd2[4]; +} mlxcx_cmd_create_rq_out_t; + +/* CSTYLED */ +#define MLXCX_CMD_MODIFY_RQ_STATE (bitdef_t){ \ + .bit_shift = 4, .bit_mask = 0xF0 } + +typedef enum { + MLXCX_MODIFY_RQ_SCATTER_FCS = 1 << 2, + MLXCX_MODIFY_RQ_VSD = 1 << 1, + MLXCX_MODIFY_RQ_COUNTER_SET_ID = 1 << 3, + MLXCX_MODIFY_RQ_LWM = 1 << 0 +} mlxcx_cmd_modify_rq_bitmask_t; + +typedef enum { + MLXCX_RQ_STATE_RST = 0x0, + MLXCX_RQ_STATE_RDY = 0x1, + MLXCX_RQ_STATE_ERR = 0x3 +} mlxcx_rq_state_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_modify_rq_head; + bits8_t mlxi_modify_rq_state; + uint24be_t mlxi_modify_rq_rqn; + uint8_t mlxi_modify_rq_rsvd[4]; + uint64be_t mlxi_modify_rq_bitmask; + uint8_t mlxi_modify_rq_rsvd2[8]; + mlxcx_rq_ctx_t mlxi_modify_rq_context; +} mlxcx_cmd_modify_rq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_modify_rq_head; + uint8_t mlxo_modify_rq_rsvd[8]; +} mlxcx_cmd_modify_rq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_rq_head; + uint8_t mlxi_query_rq_rsvd; + uint24be_t mlxi_query_rq_rqn; + uint8_t mlxi_query_rq_rsvd2[4]; +} mlxcx_cmd_query_rq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_rq_head; + uint8_t mlxo_query_rq_rsvd[24]; + mlxcx_rq_ctx_t mlxo_query_rq_context; +} mlxcx_cmd_query_rq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_rq_head; + uint8_t mlxi_destroy_rq_rsvd; + uint24be_t mlxi_destroy_rq_rqn; + uint8_t mlxi_destroy_rq_rsvd2[4]; +} mlxcx_cmd_destroy_rq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_rq_head; + uint8_t mlxo_destroy_rq_rsvd[8]; +} mlxcx_cmd_destroy_rq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_sq_head; + uint8_t mlxi_create_sq_rsvd[24]; + mlxcx_sq_ctx_t mlxi_create_sq_context; +} mlxcx_cmd_create_sq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_sq_head; + uint8_t mlxo_create_sq_rsvd; + uint24be_t mlxo_create_sq_sqn; + uint8_t mlxo_create_sq_rsvd2[4]; +} mlxcx_cmd_create_sq_out_t; + +/* CSTYLED */ +#define MLXCX_CMD_MODIFY_SQ_STATE (bitdef_t){ \ + .bit_shift = 4, .bit_mask = 0xF0 } + +typedef enum { + MLXCX_MODIFY_SQ_PACKET_PACING_INDEX = 1 << 0, +} mlxcx_cmd_modify_sq_bitmask_t; + +typedef enum { + MLXCX_SQ_STATE_RST = 0x0, + MLXCX_SQ_STATE_RDY = 0x1, + MLXCX_SQ_STATE_ERR = 0x3 +} mlxcx_sq_state_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_modify_sq_head; + bits8_t mlxi_modify_sq_state; + uint24be_t mlxi_modify_sq_sqn; + uint8_t mlxi_modify_sq_rsvd[4]; + uint64be_t mlxi_modify_sq_bitmask; + uint8_t mlxi_modify_sq_rsvd2[8]; + mlxcx_sq_ctx_t mlxi_modify_sq_context; +} mlxcx_cmd_modify_sq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_modify_sq_head; + uint8_t mlxo_modify_sq_rsvd[8]; +} mlxcx_cmd_modify_sq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_query_sq_head; + uint8_t mlxi_query_sq_rsvd; + uint24be_t mlxi_query_sq_sqn; + uint8_t mlxi_query_sq_rsvd2[4]; +} mlxcx_cmd_query_sq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_query_sq_head; + uint8_t mlxo_query_sq_rsvd[24]; + mlxcx_sq_ctx_t mlxo_query_sq_context; +} mlxcx_cmd_query_sq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_sq_head; + uint8_t mlxi_destroy_sq_rsvd; + uint24be_t mlxi_destroy_sq_sqn; + uint8_t mlxi_destroy_sq_rsvd2[4]; +} mlxcx_cmd_destroy_sq_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_sq_head; + uint8_t mlxo_destroy_sq_rsvd[8]; +} mlxcx_cmd_destroy_sq_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_rqt_head; + uint8_t mlxi_create_rqt_rsvd[24]; + mlxcx_rqtable_ctx_t mlxi_create_rqt_context; +} mlxcx_cmd_create_rqt_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_rqt_head; + uint8_t mlxo_create_rqt_rsvd; + uint24be_t mlxo_create_rqt_rqtn; + uint8_t mlxo_create_rqt_rsvd2[4]; +} mlxcx_cmd_create_rqt_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_rqt_head; + uint8_t mlxi_destroy_rqt_rsvd; + uint24be_t mlxi_destroy_rqt_rqtn; + uint8_t mlxi_destroy_rqt_rsvd2[4]; +} mlxcx_cmd_destroy_rqt_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_rqt_head; + uint8_t mlxo_destroy_rqt_rsvd[8]; +} mlxcx_cmd_destroy_rqt_out_t; + +typedef enum { + MLXCX_FLOW_TABLE_NIC_RX = 0x0, + MLXCX_FLOW_TABLE_NIC_TX = 0x1, + MLXCX_FLOW_TABLE_ESW_OUT = 0x2, + MLXCX_FLOW_TABLE_ESW_IN = 0x3, + MLXCX_FLOW_TABLE_ESW_FDB = 0x4, + MLXCX_FLOW_TABLE_NIC_RX_SNIFF = 0x5, + MLXCX_FLOW_TABLE_NIC_TX_SNIFF = 0x6, + MLXCX_FLOW_TABLE_NIC_RX_RDMA = 0x7, + MLXCX_FLOW_TABLE_NIC_TX_RDMA = 0x8 +} mlxcx_flow_table_type_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_flow_table_head; + uint8_t mlxi_create_flow_table_other_vport; + uint8_t mlxi_create_flow_table_rsvd; + uint16be_t mlxi_create_flow_table_vport_number; + uint8_t mlxi_create_flow_table_rsvd2[4]; + uint8_t mlxi_create_flow_table_table_type; + uint8_t mlxi_create_flow_table_rsvd3[7]; + mlxcx_flow_table_ctx_t mlxi_create_flow_table_context; +} mlxcx_cmd_create_flow_table_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_flow_table_head; + uint8_t mlxo_create_flow_table_rsvd; + uint24be_t mlxo_create_flow_table_table_id; + uint8_t mlxo_create_flow_table_rsvd2[4]; +} mlxcx_cmd_create_flow_table_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_flow_table_head; + uint8_t mlxi_destroy_flow_table_other_vport; + uint8_t mlxi_destroy_flow_table_rsvd; + uint16be_t mlxi_destroy_flow_table_vport_number; + uint8_t mlxi_destroy_flow_table_rsvd2[4]; + uint8_t mlxi_destroy_flow_table_table_type; + uint8_t mlxi_destroy_flow_table_rsvd3[4]; + uint24be_t mlxi_destroy_flow_table_table_id; + uint8_t mlxi_destroy_flow_table_rsvd4[4]; +} mlxcx_cmd_destroy_flow_table_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_flow_table_head; + uint8_t mlxo_destroy_flow_table_rsvd[8]; +} mlxcx_cmd_destroy_flow_table_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_set_flow_table_root_head; + uint8_t mlxi_set_flow_table_root_other_vport; + uint8_t mlxi_set_flow_table_root_rsvd; + uint16be_t mlxi_set_flow_table_root_vport_number; + uint8_t mlxi_set_flow_table_root_rsvd2[4]; + uint8_t mlxi_set_flow_table_root_table_type; + uint8_t mlxi_set_flow_table_root_rsvd3[4]; + uint24be_t mlxi_set_flow_table_root_table_id; + uint8_t mlxi_set_flow_table_root_rsvd4[4]; +} mlxcx_cmd_set_flow_table_root_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_set_flow_table_root_head; + uint8_t mlxo_set_flow_table_root_rsvd[8]; +} mlxcx_cmd_set_flow_table_root_out_t; + +typedef enum { + MLXCX_FLOW_GROUP_MATCH_OUTER_HDRS = 1 << 0, + MLXCX_FLOW_GROUP_MATCH_MISC_PARAMS = 1 << 1, + MLXCX_FLOW_GROUP_MATCH_INNER_HDRS = 1 << 2, +} mlxcx_flow_group_match_criteria_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_create_flow_group_head; + uint8_t mlxi_create_flow_group_other_vport; + uint8_t mlxi_create_flow_group_rsvd; + uint16be_t mlxi_create_flow_group_vport_number; + uint8_t mlxi_create_flow_group_rsvd2[4]; + uint8_t mlxi_create_flow_group_table_type; + uint8_t mlxi_create_flow_group_rsvd3[4]; + uint24be_t mlxi_create_flow_group_table_id; + uint8_t mlxi_create_flow_group_rsvd4[4]; + uint32be_t mlxi_create_flow_group_start_flow_index; + uint8_t mlxi_create_flow_group_rsvd5[4]; + uint32be_t mlxi_create_flow_group_end_flow_index; + uint8_t mlxi_create_flow_group_rsvd6[23]; + uint8_t mlxi_create_flow_group_match_criteria_en; + mlxcx_flow_match_t mlxi_create_flow_group_match_criteria; + uint8_t mlxi_create_flow_group_rsvd7[448]; +} mlxcx_cmd_create_flow_group_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_create_flow_group_head; + uint8_t mlxo_create_flow_group_rsvd; + uint24be_t mlxo_create_flow_group_group_id; + uint8_t mlxo_create_flow_group_rsvd2[4]; +} mlxcx_cmd_create_flow_group_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_destroy_flow_group_head; + uint8_t mlxi_destroy_flow_group_other_vport; + uint8_t mlxi_destroy_flow_group_rsvd; + uint16be_t mlxi_destroy_flow_group_vport_number; + uint8_t mlxi_destroy_flow_group_rsvd2[4]; + uint8_t mlxi_destroy_flow_group_table_type; + uint8_t mlxi_destroy_flow_group_rsvd3[4]; + uint24be_t mlxi_destroy_flow_group_table_id; + uint32be_t mlxi_destroy_flow_group_group_id; + uint8_t mlxi_destroy_flow_group_rsvd4[36]; +} mlxcx_cmd_destroy_flow_group_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_destroy_flow_group_head; + uint8_t mlxo_destroy_flow_group_rsvd[8]; +} mlxcx_cmd_destroy_flow_group_out_t; + +typedef enum { + MLXCX_CMD_FLOW_ENTRY_SET_NEW = 0, + MLXCX_CMD_FLOW_ENTRY_MODIFY = 1, +} mlxcx_cmd_set_flow_table_entry_opmod_t; + +typedef enum { + MLXCX_CMD_FLOW_ENTRY_SET_ACTION = 1 << 0, + MLXCX_CMD_FLOW_ENTRY_SET_FLOW_TAG = 1 << 1, + MLXCX_CMD_FLOW_ENTRY_SET_DESTINATION = 1 << 2, + MLXCX_CMD_FLOW_ENTRY_SET_COUNTERS = 1 << 3, + MLXCX_CMD_FLOW_ENTRY_SET_ENCAP = 1 << 4 +} mlxcx_cmd_set_flow_table_entry_bitmask_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_set_flow_table_entry_head; + uint8_t mlxi_set_flow_table_entry_other_vport; + uint8_t mlxi_set_flow_table_entry_rsvd; + uint16be_t mlxi_set_flow_table_entry_vport_number; + uint8_t mlxi_set_flow_table_entry_rsvd2[4]; + uint8_t mlxi_set_flow_table_entry_table_type; + uint8_t mlxi_set_flow_table_entry_rsvd3[4]; + uint24be_t mlxi_set_flow_table_entry_table_id; + uint8_t mlxi_set_flow_table_entry_rsvd4[3]; + bits8_t mlxi_set_flow_table_entry_modify_bitmask; + uint8_t mlxi_set_flow_table_entry_rsvd5[4]; + uint32be_t mlxi_set_flow_table_entry_flow_index; + uint8_t mlxi_set_flow_table_entry_rsvd6[28]; + mlxcx_flow_entry_ctx_t mlxi_set_flow_table_entry_context; +} mlxcx_cmd_set_flow_table_entry_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_set_flow_table_entry_head; + uint8_t mlxo_set_flow_table_entry_rsvd[8]; +} mlxcx_cmd_set_flow_table_entry_out_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_delete_flow_table_entry_head; + uint8_t mlxi_delete_flow_table_entry_other_vport; + uint8_t mlxi_delete_flow_table_entry_rsvd; + uint16be_t mlxi_delete_flow_table_entry_vport_number; + uint8_t mlxi_delete_flow_table_entry_rsvd2[4]; + uint8_t mlxi_delete_flow_table_entry_table_type; + uint8_t mlxi_delete_flow_table_entry_rsvd3[4]; + uint24be_t mlxi_delete_flow_table_entry_table_id; + uint8_t mlxi_delete_flow_table_entry_rsvd4[8]; + uint32be_t mlxi_delete_flow_table_entry_flow_index; + uint8_t mlxi_delete_flow_table_entry_rsvd5[28]; +} mlxcx_cmd_delete_flow_table_entry_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_delete_flow_table_entry_head; + uint8_t mlxo_delete_flow_table_entry_rsvd[8]; +} mlxcx_cmd_delete_flow_table_entry_out_t; + +typedef enum { + MLXCX_CMD_CONFIG_INT_MOD_READ = 1, + MLXCX_CMD_CONFIG_INT_MOD_WRITE = 0 +} mlxcx_cmd_config_int_mod_opmod_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_config_int_mod_head; + uint16be_t mlxi_config_int_mod_min_delay; + uint16be_t mlxi_config_int_mod_int_vector; + uint8_t mlxi_config_int_mod_rsvd[4]; +} mlxcx_cmd_config_int_mod_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_config_int_mod_head; + uint16be_t mlxo_config_int_mod_min_delay; + uint16be_t mlxo_config_int_mod_int_vector; + uint8_t mlxo_config_int_mod_rsvd[4]; +} mlxcx_cmd_config_int_mod_out_t; + +typedef struct { + uint8_t mlrd_pmtu_rsvd; + uint8_t mlrd_pmtu_local_port; + uint8_t mlrd_pmtu_rsvd2[2]; + + uint16be_t mlrd_pmtu_max_mtu; + uint8_t mlrd_pmtu_rsvd3[2]; + + uint16be_t mlrd_pmtu_admin_mtu; + uint8_t mlrd_pmtu_rsvd4[2]; + + uint16be_t mlrd_pmtu_oper_mtu; + uint8_t mlrd_pmtu_rsvd5[2]; +} mlxcx_reg_pmtu_t; + +typedef enum { + MLXCX_PORT_STATUS_UP = 1, + MLXCX_PORT_STATUS_DOWN = 2, + MLXCX_PORT_STATUS_UP_ONCE = 3, + MLXCX_PORT_STATUS_DISABLED = 4, +} mlxcx_port_status_t; + +typedef enum { + MLXCX_PAOS_ADMIN_ST_EN = 1UL << 31, +} mlxcx_paos_flags_t; + +typedef struct { + uint8_t mlrd_paos_swid; + uint8_t mlrd_paos_local_port; + uint8_t mlrd_paos_admin_status; + uint8_t mlrd_paos_oper_status; + bits32_t mlrd_paos_flags; + uint8_t mlrd_paos_rsvd[8]; +} mlxcx_reg_paos_t; + +typedef enum { + MLXCX_PROTO_SGMII = 1 << 0, + MLXCX_PROTO_1000BASE_KX = 1 << 1, + MLXCX_PROTO_10GBASE_CX4 = 1 << 2, + MLXCX_PROTO_10GBASE_KX4 = 1 << 3, + MLXCX_PROTO_10GBASE_KR = 1 << 4, + MLXCX_PROTO_UNKNOWN_1 = 1 << 5, + MLXCX_PROTO_40GBASE_CR4 = 1 << 6, + MLXCX_PROTO_40GBASE_KR4 = 1 << 7, + MLXCX_PROTO_UNKNOWN_2 = 1 << 8, + MLXCX_PROTO_SGMII_100BASE = 1 << 9, + MLXCX_PROTO_UNKNOWN_3 = 1 << 10, + MLXCX_PROTO_UNKNOWN_4 = 1 << 11, + MLXCX_PROTO_10GBASE_CR = 1 << 12, + MLXCX_PROTO_10GBASE_SR = 1 << 13, + MLXCX_PROTO_10GBASE_ER_LR = 1 << 14, + MLXCX_PROTO_40GBASE_SR4 = 1 << 15, + MLXCX_PROTO_40GBASE_LR4_ER4 = 1 << 16, + MLXCX_PROTO_UNKNOWN_5 = 1 << 17, + MLXCX_PROTO_50GBASE_SR2 = 1 << 18, + MLXCX_PROTO_UNKNOWN_6 = 1 << 19, + MLXCX_PROTO_100GBASE_CR4 = 1 << 20, + MLXCX_PROTO_100GBASE_SR4 = 1 << 21, + MLXCX_PROTO_100GBASE_KR4 = 1 << 22, + MLXCX_PROTO_UNKNOWN_7 = 1 << 23, + MLXCX_PROTO_UNKNOWN_8 = 1 << 24, + MLXCX_PROTO_UNKNOWN_9 = 1 << 25, + MLXCX_PROTO_UNKNOWN_10 = 1 << 26, + MLXCX_PROTO_25GBASE_CR = 1 << 27, + MLXCX_PROTO_25GBASE_KR = 1 << 28, + MLXCX_PROTO_25GBASE_SR = 1 << 29, + MLXCX_PROTO_50GBASE_CR2 = 1 << 30, + MLXCX_PROTO_50GBASE_KR2 = 1UL << 31, +} mlxcx_eth_proto_t; + +typedef enum { + MLXCX_AUTONEG_DISABLE_CAP = 1 << 5, + MLXCX_AUTONEG_DISABLE = 1 << 6 +} mlxcx_autoneg_flags_t; + +typedef enum { + MLXCX_PTYS_PROTO_MASK_IB = 1 << 0, + MLXCX_PTYS_PROTO_MASK_ETH = 1 << 2, +} mlxcx_reg_ptys_proto_mask_t; + +typedef struct { + bits8_t mlrd_ptys_autoneg_flags; + uint8_t mlrd_ptys_local_port; + uint8_t mlrd_ptys_rsvd; + bits8_t mlrd_ptys_proto_mask; + + bits8_t mlrd_ptys_autoneg_status; + uint8_t mlrd_ptys_rsvd2; + uint16be_t mlrd_ptys_data_rate_oper; + + uint8_t mlrd_ptys_rsvd3[4]; + + bits32_t mlrd_ptys_proto_cap; + uint8_t mlrd_ptys_rsvd4[8]; + bits32_t mlrd_ptys_proto_admin; + uint8_t mlrd_ptys_rsvd5[8]; + bits32_t mlrd_ptys_proto_oper; + uint8_t mlrd_ptys_rsvd6[8]; + bits32_t mlrd_ptys_proto_partner_advert; + uint8_t mlrd_ptys_rsvd7[12]; +} mlxcx_reg_ptys_t; + +typedef enum { + MLXCX_LED_TYPE_BOTH = 0x0, + MLXCX_LED_TYPE_UID = 0x1, + MLXCX_LED_TYPE_PORT = 0x2, +} mlxcx_led_type_t; + +#define MLXCX_MLCR_INDIVIDUAL_ONLY (1 << 4) +/* CSTYLED */ +#define MLXCX_MLCR_LED_TYPE (bitdef_t){ 0, 0x0F } + +typedef struct { + uint8_t mlrd_mlcr_rsvd; + uint8_t mlrd_mlcr_local_port; + uint8_t mlrd_mlcr_rsvd2; + bits8_t mlrd_mlcr_flags; + uint8_t mlrd_mlcr_rsvd3[2]; + uint16be_t mlrd_mlcr_beacon_duration; + uint8_t mlrd_mlcr_rsvd4[2]; + uint16be_t mlrd_mlcr_beacon_remain; +} mlxcx_reg_mlcr_t; + +typedef struct { + uint8_t mlrd_pmaos_rsvd; + uint8_t mlrd_pmaos_module; + uint8_t mlrd_pmaos_admin_status; + uint8_t mlrd_pmaos_oper_status; + bits8_t mlrd_pmaos_flags; + uint8_t mlrd_pmaos_rsvd2; + uint8_t mlrd_pmaos_error_type; + uint8_t mlrd_pmaos_event_en; + uint8_t mlrd_pmaos_rsvd3[8]; +} mlxcx_reg_pmaos_t; + +typedef enum { + MLXCX_MCIA_STATUS_OK = 0x0, + MLXCX_MCIA_STATUS_NO_EEPROM = 0x1, + MLXCX_MCIA_STATUS_NOT_SUPPORTED = 0x2, + MLXCX_MCIA_STATUS_NOT_CONNECTED = 0x3, + MLXCX_MCIA_STATUS_I2C_ERROR = 0x9, + MLXCX_MCIA_STATUS_DISABLED = 0x10 +} mlxcx_mcia_status_t; + +typedef struct { + bits8_t mlrd_mcia_flags; + uint8_t mlrd_mcia_module; + uint8_t mlrd_mcia_rsvd; + uint8_t mlrd_mcia_status; + uint8_t mlrd_mcia_i2c_device_addr; + uint8_t mlrd_mcia_page_number; + uint16be_t mlrd_mcia_device_addr; + uint8_t mlrd_mcia_rsvd2[2]; + uint16be_t mlrd_mcia_size; + uint8_t mlrd_mcia_rsvd3[4]; + uint8_t mlrd_mcia_data[48]; +} mlxcx_reg_mcia_t; + +typedef struct { + uint64be_t mlppc_ieee_802_3_frames_tx; + uint64be_t mlppc_ieee_802_3_frames_rx; + uint64be_t mlppc_ieee_802_3_fcs_err; + uint64be_t mlppc_ieee_802_3_align_err; + uint64be_t mlppc_ieee_802_3_bytes_tx; + uint64be_t mlppc_ieee_802_3_bytes_rx; + uint64be_t mlppc_ieee_802_3_mcast_tx; + uint64be_t mlppc_ieee_802_3_bcast_tx; + uint64be_t mlppc_ieee_802_3_mcast_rx; + uint64be_t mlppc_ieee_802_3_bcast_rx; + uint64be_t mlppc_ieee_802_3_in_range_len_err; + uint64be_t mlppc_ieee_802_3_out_of_range_len_err; + uint64be_t mlppc_ieee_802_3_frame_too_long_err; + uint64be_t mlppc_ieee_802_3_symbol_err; + uint64be_t mlppc_ieee_802_3_mac_ctrl_tx; + uint64be_t mlppc_ieee_802_3_mac_ctrl_rx; + uint64be_t mlppc_ieee_802_3_unsup_opcodes_rx; + uint64be_t mlppc_ieee_802_3_pause_rx; + uint64be_t mlppc_ieee_802_3_pause_tx; +} mlxcx_ppcnt_ieee_802_3_t; + +typedef struct { + uint64be_t mlppc_rfc_2863_in_octets; + uint64be_t mlppc_rfc_2863_in_ucast_pkts; + uint64be_t mlppc_rfc_2863_in_discards; + uint64be_t mlppc_rfc_2863_in_errors; + uint64be_t mlppc_rfc_2863_in_unknown_protos; + uint64be_t mlppc_rfc_2863_out_octets; + uint64be_t mlppc_rfc_2863_out_ucast_pkts; + uint64be_t mlppc_rfc_2863_out_discards; + uint64be_t mlppc_rfc_2863_out_errors; + uint64be_t mlppc_rfc_2863_in_mcast_pkts; + uint64be_t mlppc_rfc_2863_in_bcast_pkts; + uint64be_t mlppc_rfc_2863_out_mcast_pkts; + uint64be_t mlppc_rfc_2863_out_bcast_pkts; +} mlxcx_ppcnt_rfc_2863_t; + +typedef struct { + uint64be_t mlppc_phy_stats_time_since_last_clear; + uint64be_t mlppc_phy_stats_rx_bits; + uint64be_t mlppc_phy_stats_symbol_errs; + uint64be_t mlppc_phy_stats_corrected_bits; + uint8_t mlppc_phy_stats_rsvd[2]; + uint8_t mlppc_phy_stats_raw_ber_mag; + uint8_t mlppc_phy_stats_raw_ber_coef; + uint8_t mlppc_phy_stats_rsvd2[2]; + uint8_t mlppc_phy_stats_eff_ber_mag; + uint8_t mlppc_phy_stats_eff_ber_coef; +} mlxcx_ppcnt_phy_stats_t; + +typedef enum { + MLXCX_PPCNT_GRP_IEEE_802_3 = 0x0, + MLXCX_PPCNT_GRP_RFC_2863 = 0x1, + MLXCX_PPCNT_GRP_RFC_2819 = 0x2, + MLXCX_PPCNT_GRP_RFC_3635 = 0x3, + MLXCX_PPCNT_GRP_ETH_EXTD = 0x5, + MLXCX_PPCNT_GRP_ETH_DISCARD = 0x6, + MLXCX_PPCNT_GRP_PER_PRIO = 0x10, + MLXCX_PPCNT_GRP_PER_TC = 0x11, + MLXCX_PPCNT_GRP_PER_TC_CONGEST = 0x13, + MLXCX_PPCNT_GRP_PHY_STATS = 0x16 +} mlxcx_ppcnt_grp_t; + +typedef enum { + MLXCX_PPCNT_CLEAR = (1 << 7), + MLXCX_PPCNT_NO_CLEAR = 0 +} mlxcx_ppcnt_clear_t; + +typedef struct { + uint8_t mlrd_ppcnt_swid; + uint8_t mlrd_ppcnt_local_port; + uint8_t mlrd_ppcnt_pnat; + uint8_t mlrd_ppcnt_grp; + uint8_t mlrd_ppcnt_clear; + uint8_t mlrd_ppcnt_rsvd[2]; + uint8_t mlrd_ppcnt_prio_tc; + union { + uint8_t mlrd_ppcnt_data[248]; + mlxcx_ppcnt_ieee_802_3_t mlrd_ppcnt_ieee_802_3; + mlxcx_ppcnt_rfc_2863_t mlrd_ppcnt_rfc_2863; + mlxcx_ppcnt_phy_stats_t mlrd_ppcnt_phy_stats; + }; +} mlxcx_reg_ppcnt_t; + +typedef enum { + MLXCX_REG_PMTU = 0x5003, + MLXCX_REG_PTYS = 0x5004, + MLXCX_REG_PAOS = 0x5006, + MLXCX_REG_PMAOS = 0x5012, + MLXCX_REG_MSGI = 0x9021, + MLXCX_REG_MLCR = 0x902B, + MLXCX_REG_MCIA = 0x9014, + MLXCX_REG_PPCNT = 0x5008, +} mlxcx_register_id_t; + +typedef union { + mlxcx_reg_pmtu_t mlrd_pmtu; + mlxcx_reg_paos_t mlrd_paos; + mlxcx_reg_ptys_t mlrd_ptys; + mlxcx_reg_mlcr_t mlrd_mlcr; + mlxcx_reg_pmaos_t mlrd_pmaos; + mlxcx_reg_mcia_t mlrd_mcia; + mlxcx_reg_ppcnt_t mlrd_ppcnt; +} mlxcx_register_data_t; + +typedef enum { + MLXCX_CMD_ACCESS_REGISTER_READ = 1, + MLXCX_CMD_ACCESS_REGISTER_WRITE = 0 +} mlxcx_cmd_reg_opmod_t; + +typedef struct { + mlxcx_cmd_in_t mlxi_access_register_head; + uint8_t mlxi_access_register_rsvd[2]; + uint16be_t mlxi_access_register_register_id; + uint32be_t mlxi_access_register_argument; + mlxcx_register_data_t mlxi_access_register_data; +} mlxcx_cmd_access_register_in_t; + +typedef struct { + mlxcx_cmd_out_t mlxo_access_register_head; + uint8_t mlxo_access_register_rsvd[8]; + mlxcx_register_data_t mlxo_access_register_data; +} mlxcx_cmd_access_register_out_t; + +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* _MLXCX_REG_H */ diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c new file mode 100644 index 0000000000..8337545b57 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c @@ -0,0 +1,2264 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020, The University of Queensland + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * Mellanox Connect-X 4/5/6 driver. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/sysmacros.h> +#include <sys/atomic.h> +#include <sys/cpuvar.h> + +#include <sys/pattr.h> +#include <sys/dlpi.h> + +#include <sys/mac_provider.h> + +#include <sys/random.h> + +#include <mlxcx.h> + +boolean_t +mlxcx_wq_alloc_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + boolean_t ret; + size_t sz; + + VERIFY0(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + + /* Receive and send queue entries might be different sizes. */ + switch (mlwq->mlwq_type) { + case MLXCX_WQ_TYPE_SENDQ: + mlwq->mlwq_entshift = mlxp->mlx_props.mldp_sq_size_shift; + mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift); + sz = mlwq->mlwq_nents * sizeof (mlxcx_sendq_ent_t); + break; + case MLXCX_WQ_TYPE_RECVQ: + mlwq->mlwq_entshift = mlxp->mlx_props.mldp_rq_size_shift; + mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift); + sz = mlwq->mlwq_nents * sizeof (mlxcx_recvq_ent_t); + break; + default: + VERIFY(0); + return (B_FALSE); + } + ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0); + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_queue_attr(mlxp, &attr); + + ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_dma, &attr, &acc, + B_TRUE, sz, B_TRUE); + if (!ret) { + mlxcx_warn(mlxp, "failed to allocate WQ memory"); + return (B_FALSE); + } + + /* + * Just set the first pointer in the union. Yes, this is a strict + * aliasing violation. No, I don't care. + */ + mlwq->mlwq_send_ent = (mlxcx_sendq_ent_t *)mlwq->mlwq_dma.mxdb_va; + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_qdbell_attr(mlxp, &attr); + sz = sizeof (mlxcx_workq_doorbell_t); + ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_doorbell_dma, &attr, &acc, + B_TRUE, sz, B_TRUE); + if (!ret) { + mlxcx_warn(mlxp, "failed to allocate WQ doorbell memory"); + mlxcx_dma_free(&mlwq->mlwq_dma); + mlwq->mlwq_send_ent = NULL; + return (B_FALSE); + } + + mlwq->mlwq_doorbell = + (mlxcx_workq_doorbell_t *)mlwq->mlwq_doorbell_dma.mxdb_va; + + mlwq->mlwq_state |= MLXCX_WQ_ALLOC; + + return (B_TRUE); +} + +void +mlxcx_wq_rele_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); + if (mlwq->mlwq_state & MLXCX_WQ_CREATED) + VERIFY(mlwq->mlwq_state & MLXCX_WQ_DESTROYED); + + mlxcx_dma_free(&mlwq->mlwq_dma); + mlwq->mlwq_send_ent = NULL; + mlxcx_dma_free(&mlwq->mlwq_doorbell_dma); + mlwq->mlwq_doorbell = NULL; + + mlwq->mlwq_state &= ~MLXCX_CQ_ALLOC; +} + +boolean_t +mlxcx_cq_alloc_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + boolean_t ret; + size_t sz, i; + + VERIFY0(mlcq->mlcq_state & MLXCX_EQ_ALLOC); + + mlcq->mlcq_entshift = mlxp->mlx_props.mldp_cq_size_shift; + mlcq->mlcq_nents = (1 << mlcq->mlcq_entshift); + sz = mlcq->mlcq_nents * sizeof (mlxcx_completionq_ent_t); + ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0); + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_queue_attr(mlxp, &attr); + + ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_dma, &attr, &acc, + B_TRUE, sz, B_TRUE); + if (!ret) { + mlxcx_warn(mlxp, "failed to allocate CQ memory"); + return (B_FALSE); + } + + mlcq->mlcq_ent = (mlxcx_completionq_ent_t *)mlcq->mlcq_dma.mxdb_va; + + for (i = 0; i < mlcq->mlcq_nents; ++i) { + mlcq->mlcq_ent[i].mlcqe_opcode = MLXCX_CQE_OP_INVALID; + mlcq->mlcq_ent[i].mlcqe_owner = MLXCX_CQE_OWNER_INIT; + } + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_qdbell_attr(mlxp, &attr); + sz = sizeof (mlxcx_completionq_doorbell_t); + ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_doorbell_dma, &attr, &acc, + B_TRUE, sz, B_TRUE); + if (!ret) { + mlxcx_warn(mlxp, "failed to allocate CQ doorbell memory"); + mlxcx_dma_free(&mlcq->mlcq_dma); + mlcq->mlcq_ent = NULL; + return (B_FALSE); + } + + mlcq->mlcq_doorbell = + (mlxcx_completionq_doorbell_t *)mlcq->mlcq_doorbell_dma.mxdb_va; + + mlcq->mlcq_state |= MLXCX_CQ_ALLOC; + + return (B_TRUE); +} + +void +mlxcx_cq_rele_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC); + if (mlcq->mlcq_state & MLXCX_CQ_CREATED) + VERIFY(mlcq->mlcq_state & MLXCX_CQ_DESTROYED); + + mlxcx_dma_free(&mlcq->mlcq_dma); + mlcq->mlcq_ent = NULL; + mlxcx_dma_free(&mlcq->mlcq_doorbell_dma); + mlcq->mlcq_doorbell = NULL; + + mlcq->mlcq_state &= ~MLXCX_CQ_ALLOC; +} + +void +mlxcx_wq_teardown(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + mlxcx_completion_queue_t *mlcq; + + /* + * If something is holding the lock on a long operation like a + * refill, setting this flag asks them to exit early if possible. + */ + atomic_or_uint(&mlwq->mlwq_state, MLXCX_WQ_TEARDOWN); + + mutex_enter(&mlwq->mlwq_mtx); + + list_remove(&mlxp->mlx_wqs, mlwq); + + if ((mlwq->mlwq_state & MLXCX_WQ_CREATED) && + !(mlwq->mlwq_state & MLXCX_WQ_DESTROYED)) { + if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && + mlwq->mlwq_state & MLXCX_WQ_STARTED && + !mlxcx_cmd_stop_rq(mlxp, mlwq)) { + mlxcx_warn(mlxp, "failed to stop " + "recv queue num %x", mlwq->mlwq_num); + } + if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && + mlwq->mlwq_state & MLXCX_WQ_STARTED && + !mlxcx_cmd_stop_sq(mlxp, mlwq)) { + mlxcx_warn(mlxp, "failed to stop " + "send queue num %x", mlwq->mlwq_num); + } + if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && + !mlxcx_cmd_destroy_rq(mlxp, mlwq)) { + mlxcx_warn(mlxp, "failed to destroy " + "recv queue num %x", mlwq->mlwq_num); + } + if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && + !mlxcx_cmd_destroy_sq(mlxp, mlwq)) { + mlxcx_warn(mlxp, "failed to destroy " + "send queue num %x", mlwq->mlwq_num); + } + } + if (mlwq->mlwq_state & MLXCX_WQ_ALLOC) { + mlxcx_wq_rele_dma(mlxp, mlwq); + } + mlcq = mlwq->mlwq_cq; + + /* These will be released by mlxcx_teardown_bufs() */ + mlwq->mlwq_bufs = NULL; + mlwq->mlwq_foreign_bufs = NULL; + + mutex_exit(&mlwq->mlwq_mtx); + + mutex_enter(&mlcq->mlcq_mtx); + mutex_enter(&mlwq->mlwq_mtx); + ASSERT3P(mlcq->mlcq_wq, ==, mlwq); + mlcq->mlcq_wq = NULL; + mutex_exit(&mlwq->mlwq_mtx); + mutex_exit(&mlcq->mlcq_mtx); + + mutex_destroy(&mlwq->mlwq_mtx); +} + +void +mlxcx_cq_teardown(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) +{ + mlxcx_event_queue_t *mleq; + mlxcx_buffer_t *b; + + /* + * If something is holding the lock on a long operation like polling + * which we're going to abort anyway, this flag asks them to exit + * early if possible. + */ + atomic_or_uint(&mlcq->mlcq_state, MLXCX_CQ_TEARDOWN); + + mutex_enter(&mlcq->mlcq_mtx); + + list_remove(&mlxp->mlx_cqs, mlcq); + + if ((mlcq->mlcq_state & MLXCX_CQ_CREATED) && + !(mlcq->mlcq_state & MLXCX_CQ_DESTROYED)) { + if (!mlxcx_cmd_destroy_cq(mlxp, mlcq)) { + mlxcx_warn(mlxp, "failed to destroy " + "completion queue num %u", + mlcq->mlcq_num); + } + } + if (mlcq->mlcq_state & MLXCX_CQ_ALLOC) { + mlxcx_cq_rele_dma(mlxp, mlcq); + } + /* + * If we're on an EQ AVL tree, then we need to grab + * the EQ's mutex to take it off. The ISR always takes + * EQ mutex before CQ mutex, so we have to let go of + * the CQ mutex then come back again. + * + * The ISR will bail out if tries to touch this CQ now since + * we added the CQ_DESTROYED flag above. + */ + if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) { + mleq = mlcq->mlcq_eq; + } else { + mleq = NULL; + } + + /* Return any outstanding buffers to the free pool. */ + while ((b = list_remove_head(&mlcq->mlcq_buffers)) != NULL) { + mlxcx_buf_return_chain(mlxp, b, B_FALSE); + } + mutex_enter(&mlcq->mlcq_bufbmtx); + while ((b = list_remove_head(&mlcq->mlcq_buffers_b)) != NULL) { + mlxcx_buf_return_chain(mlxp, b, B_FALSE); + } + mutex_exit(&mlcq->mlcq_bufbmtx); + + /* + * Since the interrupt handlers take the EQ lock before the CQ one, + * we must do the same here. That means letting go of the lock + * for a brief window here (we'll double-check the state when we + * get back in). + */ + mutex_exit(&mlcq->mlcq_mtx); + + if (mleq != NULL) { + mutex_enter(&mleq->mleq_mtx); + mutex_enter(&mlcq->mlcq_mtx); + /* + * Double-check the state, we let go of the + * mutex briefly. + */ + if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) { + avl_remove(&mleq->mleq_cqs, mlcq); + mlcq->mlcq_state &= ~MLXCX_CQ_EQAVL; + } + mutex_exit(&mlcq->mlcq_mtx); + mutex_exit(&mleq->mleq_mtx); + } + + mutex_enter(&mlcq->mlcq_mtx); + ASSERT0(mlcq->mlcq_state & ~(MLXCX_CQ_CREATED | MLXCX_CQ_DESTROYED | + MLXCX_CQ_TEARDOWN | MLXCX_CQ_ARMED)); + mutex_exit(&mlcq->mlcq_mtx); + + mutex_destroy(&mlcq->mlcq_mtx); + mutex_destroy(&mlcq->mlcq_bufbmtx); + list_destroy(&mlcq->mlcq_buffers); + list_destroy(&mlcq->mlcq_buffers_b); + kmem_free(mlcq, sizeof (mlxcx_completion_queue_t)); +} + +static boolean_t +mlxcx_cq_setup(mlxcx_t *mlxp, mlxcx_event_queue_t *eq, + mlxcx_completion_queue_t **cqp) +{ + mlxcx_completion_queue_t *cq; + + cq = kmem_zalloc(sizeof (mlxcx_completion_queue_t), KM_SLEEP); + mutex_init(&cq->mlcq_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + mutex_init(&cq->mlcq_bufbmtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + list_create(&cq->mlcq_buffers, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_cq_entry)); + list_create(&cq->mlcq_buffers_b, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_cq_entry)); + + cq->mlcq_mlx = mlxp; + list_insert_tail(&mlxp->mlx_cqs, cq); + + mutex_enter(&cq->mlcq_mtx); + + if (!mlxcx_cq_alloc_dma(mlxp, cq)) { + mutex_exit(&cq->mlcq_mtx); + return (B_FALSE); + } + + cq->mlcq_bufhwm = cq->mlcq_nents - MLXCX_CQ_HWM_GAP; + cq->mlcq_buflwm = cq->mlcq_nents - MLXCX_CQ_LWM_GAP; + + cq->mlcq_uar = &mlxp->mlx_uar; + cq->mlcq_eq = eq; + + cq->mlcq_cqemod_period_usec = mlxp->mlx_props.mldp_cqemod_period_usec; + cq->mlcq_cqemod_count = mlxp->mlx_props.mldp_cqemod_count; + + if (!mlxcx_cmd_create_cq(mlxp, cq)) { + mutex_exit(&cq->mlcq_mtx); + return (B_FALSE); + } + + mutex_exit(&cq->mlcq_mtx); + + mutex_enter(&eq->mleq_mtx); + mutex_enter(&cq->mlcq_mtx); + ASSERT0(cq->mlcq_state & MLXCX_CQ_EQAVL); + avl_add(&eq->mleq_cqs, cq); + cq->mlcq_state |= MLXCX_CQ_EQAVL; + mlxcx_arm_cq(mlxp, cq); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&eq->mleq_mtx); + + *cqp = cq; + return (B_TRUE); +} + +static boolean_t +mlxcx_rq_setup(mlxcx_t *mlxp, mlxcx_completion_queue_t *cq, + mlxcx_work_queue_t *wq) +{ + mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + + list_insert_tail(&mlxp->mlx_wqs, wq); + + mutex_enter(&wq->mlwq_mtx); + + wq->mlwq_mlx = mlxp; + wq->mlwq_type = MLXCX_WQ_TYPE_RECVQ; + wq->mlwq_cq = cq; + wq->mlwq_pd = &mlxp->mlx_pd; + wq->mlwq_uar = &mlxp->mlx_uar; + + wq->mlwq_bufs = mlxcx_mlbs_create(mlxp); + + if (!mlxcx_wq_alloc_dma(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + return (B_FALSE); + } + + if (!mlxcx_cmd_create_rq(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + return (B_FALSE); + } + + mutex_exit(&wq->mlwq_mtx); + + mutex_enter(&cq->mlcq_mtx); + mutex_enter(&wq->mlwq_mtx); + ASSERT3P(cq->mlcq_wq, ==, NULL); + cq->mlcq_wq = wq; + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + + return (B_TRUE); +} + +static boolean_t +mlxcx_sq_setup(mlxcx_t *mlxp, mlxcx_port_t *port, mlxcx_completion_queue_t *cq, + mlxcx_tis_t *tis, mlxcx_work_queue_t *wq) +{ + mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + + list_insert_tail(&mlxp->mlx_wqs, wq); + + mutex_enter(&wq->mlwq_mtx); + + wq->mlwq_mlx = mlxp; + wq->mlwq_type = MLXCX_WQ_TYPE_SENDQ; + wq->mlwq_cq = cq; + wq->mlwq_pd = &mlxp->mlx_pd; + wq->mlwq_uar = &mlxp->mlx_uar; + wq->mlwq_tis = tis; + + wq->mlwq_bufs = mlxcx_mlbs_create(mlxp); + wq->mlwq_foreign_bufs = mlxcx_mlbs_create(mlxp); + + VERIFY3U(port->mlp_wqe_min_inline, <=, MLXCX_ETH_INLINE_L2); + wq->mlwq_inline_mode = MLXCX_ETH_INLINE_L2; + + if (!mlxcx_wq_alloc_dma(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + return (B_FALSE); + } + + if (!mlxcx_cmd_create_sq(mlxp, wq)) { + mutex_exit(&wq->mlwq_mtx); + return (B_FALSE); + } + + mutex_exit(&wq->mlwq_mtx); + + mutex_enter(&cq->mlcq_mtx); + mutex_enter(&wq->mlwq_mtx); + ASSERT3P(cq->mlcq_wq, ==, NULL); + cq->mlcq_wq = wq; + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + + return (B_TRUE); +} + +void +mlxcx_teardown_rx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_work_queue_t *wq; + mlxcx_completion_queue_t *cq; + mlxcx_flow_entry_t *fe; + mlxcx_flow_group_t *fg; + mlxcx_flow_table_t *ft; + uint_t i; + + mutex_enter(&g->mlg_port->mlp_mtx); + mutex_enter(&g->mlg_mtx); + + if (g->mlg_state & MLXCX_GROUP_FLOWS) { + mlxcx_remove_all_umcast_entries(mlxp, g->mlg_port, g); + + if (g->mlg_rx_vlan_ft != NULL) + mlxcx_remove_all_vlan_entries(mlxp, g); + + if (g == &mlxp->mlx_rx_groups[0]) { + ft = g->mlg_port->mlp_rx_flow; + mutex_enter(&ft->mlft_mtx); + + fg = g->mlg_port->mlp_bcast; + fe = list_head(&fg->mlfg_entries); + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + (void) mlxcx_cmd_delete_flow_table_entry( + mlxp, fe); + } + + fg = g->mlg_port->mlp_promisc; + fe = list_head(&fg->mlfg_entries); + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + (void) mlxcx_cmd_delete_flow_table_entry( + mlxp, fe); + } + + mutex_exit(&ft->mlft_mtx); + } + + if (g->mlg_rx_vlan_ft != NULL) { + mutex_enter(&g->mlg_rx_vlan_ft->mlft_mtx); + ASSERT(list_is_empty(&g->mlg_rx_vlans)); + fg = g->mlg_rx_vlan_def_fg; + fe = list_head(&fg->mlfg_entries); + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + (void) mlxcx_cmd_delete_flow_table_entry( + mlxp, fe); + } + fg = g->mlg_rx_vlan_promisc_fg; + fe = list_head(&fg->mlfg_entries); + if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { + (void) mlxcx_cmd_delete_flow_table_entry( + mlxp, fe); + } + mlxcx_teardown_flow_table(mlxp, g->mlg_rx_vlan_ft); + list_destroy(&g->mlg_rx_vlans); + + g->mlg_rx_vlan_ft = NULL; + } + + mutex_enter(&g->mlg_rx_hash_ft->mlft_mtx); + mlxcx_teardown_flow_table(mlxp, g->mlg_rx_hash_ft); + g->mlg_rx_hash_ft = NULL; + + avl_destroy(&g->mlg_rx_macs); + g->mlg_state &= ~MLXCX_GROUP_FLOWS; + } + + if (g->mlg_state & MLXCX_GROUP_RUNNING) { + for (i = 0; i < g->mlg_nwqs; ++i) { + wq = &g->mlg_wqs[i]; + mutex_enter(&wq->mlwq_mtx); + if (wq->mlwq_state & MLXCX_WQ_STARTED && + !mlxcx_cmd_stop_rq(mlxp, wq)) { + mlxcx_warn(mlxp, "failed to stop rq %x", + wq->mlwq_num); + } + mutex_exit(&wq->mlwq_mtx); + } + g->mlg_state &= ~MLXCX_GROUP_RUNNING; + } + + if (g->mlg_state & MLXCX_GROUP_TIRTIS) { + for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) { + mlxcx_tir_t *tir = &g->mlg_tir[i]; + if (tir->mltir_state & MLXCX_TIR_CREATED && + !(tir->mltir_state & MLXCX_TIR_DESTROYED)) { + if (!mlxcx_cmd_destroy_tir(mlxp, tir)) { + mlxcx_warn(mlxp, + "failed to destroy tir %u " + "for rx ring", tir->mltir_num); + } + } + } + g->mlg_state &= ~MLXCX_GROUP_TIRTIS; + } + + if (g->mlg_state & MLXCX_GROUP_RQT) { + if (g->mlg_rqt->mlrqt_state & MLXCX_RQT_CREATED && + !(g->mlg_rqt->mlrqt_state & MLXCX_RQT_DESTROYED)) { + if (!mlxcx_cmd_destroy_rqt(mlxp, g->mlg_rqt)) { + mlxcx_warn(mlxp, "failed to destroy rqt %u " + "for rx ring", g->mlg_rqt->mlrqt_num); + } + kmem_free(g->mlg_rqt->mlrqt_rq, + g->mlg_rqt->mlrqt_rq_size); + g->mlg_rqt->mlrqt_rq = NULL; + kmem_free(g->mlg_rqt, sizeof (mlxcx_rqtable_t)); + g->mlg_rqt = NULL; + } + g->mlg_state &= ~MLXCX_GROUP_RQT; + } + + for (i = 0; i < g->mlg_nwqs; ++i) { + wq = &g->mlg_wqs[i]; + cq = wq->mlwq_cq; + mlxcx_wq_teardown(mlxp, wq); + if (cq != NULL) + mlxcx_cq_teardown(mlxp, cq); + } + kmem_free(g->mlg_wqs, g->mlg_wqs_size); + g->mlg_wqs = NULL; + g->mlg_state &= ~MLXCX_GROUP_WQS; + + mutex_exit(&g->mlg_mtx); + mutex_exit(&g->mlg_port->mlp_mtx); + + mutex_destroy(&g->mlg_mtx); + + g->mlg_state &= ~MLXCX_GROUP_INIT; + ASSERT3S(g->mlg_state, ==, 0); +} + +void +mlxcx_teardown_tx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_work_queue_t *wq; + mlxcx_completion_queue_t *cq; + uint_t i; + + mutex_enter(&g->mlg_mtx); + + if (g->mlg_state & MLXCX_GROUP_WQS) { + for (i = 0; i < g->mlg_nwqs; ++i) { + wq = &g->mlg_wqs[i]; + mutex_enter(&wq->mlwq_mtx); + cq = wq->mlwq_cq; + if (wq->mlwq_state & MLXCX_WQ_STARTED && + !mlxcx_cmd_stop_sq(mlxp, wq)) { + mlxcx_warn(mlxp, "failed to stop sq %x", + wq->mlwq_num); + } + mutex_exit(&wq->mlwq_mtx); + mlxcx_wq_teardown(mlxp, wq); + if (cq != NULL) + mlxcx_cq_teardown(mlxp, cq); + } + g->mlg_state &= ~MLXCX_GROUP_RUNNING; + kmem_free(g->mlg_wqs, g->mlg_wqs_size); + g->mlg_wqs = NULL; + g->mlg_state &= ~MLXCX_GROUP_WQS; + } + + if ((g->mlg_state & MLXCX_GROUP_TIRTIS) && + g->mlg_tis.mltis_state & MLXCX_TIS_CREATED && + !(g->mlg_tis.mltis_state & MLXCX_TIS_DESTROYED)) { + if (!mlxcx_cmd_destroy_tis(mlxp, &g->mlg_tis)) { + mlxcx_warn(mlxp, "failed to destroy tis %u for tx ring", + g->mlg_tis.mltis_num); + } + } + g->mlg_state &= ~MLXCX_GROUP_TIRTIS; + + mutex_exit(&g->mlg_mtx); + mutex_destroy(&g->mlg_mtx); + g->mlg_state &= ~MLXCX_GROUP_INIT; + ASSERT3S(g->mlg_state, ==, 0); +} + +void +mlxcx_teardown_groups(mlxcx_t *mlxp) +{ + mlxcx_ring_group_t *g; + uint_t i; + + for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) { + g = &mlxp->mlx_rx_groups[i]; + if (!(g->mlg_state & MLXCX_GROUP_INIT)) + continue; + ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_RX); + mlxcx_teardown_rx_group(mlxp, g); + } + kmem_free(mlxp->mlx_rx_groups, mlxp->mlx_rx_groups_size); + mlxp->mlx_rx_groups = NULL; + + for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) { + g = &mlxp->mlx_tx_groups[i]; + if (!(g->mlg_state & MLXCX_GROUP_INIT)) + continue; + ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_TX); + mlxcx_teardown_tx_group(mlxp, g); + } + kmem_free(mlxp->mlx_tx_groups, mlxp->mlx_tx_groups_size); + mlxp->mlx_tx_groups = NULL; +} + +boolean_t +mlxcx_rx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_event_queue_t *eq; + mlxcx_completion_queue_t *cq; + mlxcx_work_queue_t *rq; + mlxcx_flow_table_t *ft; + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe; + uint_t i, j; + + ASSERT3S(g->mlg_state, ==, 0); + + mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + mutex_enter(&g->mlg_mtx); + g->mlg_mlx = mlxp; + g->mlg_type = MLXCX_GROUP_RX; + g->mlg_port = &mlxp->mlx_ports[0]; + g->mlg_state |= MLXCX_GROUP_INIT; + + g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_small_group; + i = g - &mlxp->mlx_rx_groups[0]; + if (i < mlxp->mlx_props.mldp_rx_ngroups_large) + g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_large_group; + + g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t); + g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP); + g->mlg_state |= MLXCX_GROUP_WQS; + + g->mlg_rqt = kmem_zalloc(sizeof (mlxcx_rqtable_t), KM_SLEEP); + g->mlg_rqt->mlrqt_max = 2; + while (g->mlg_rqt->mlrqt_max < g->mlg_nwqs) + g->mlg_rqt->mlrqt_max <<= 1; + g->mlg_rqt->mlrqt_rq_size = g->mlg_rqt->mlrqt_max * + sizeof (mlxcx_work_queue_t *); + g->mlg_rqt->mlrqt_rq = kmem_zalloc(g->mlg_rqt->mlrqt_rq_size, KM_SLEEP); + g->mlg_state |= MLXCX_GROUP_RQT; + + for (i = 0; i < g->mlg_nwqs; ++i) { + eq = NULL; + while (eq == NULL) { + eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++]; + if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count) + mlxp->mlx_next_eq = 1; + if (eq->mleq_type != MLXCX_EQ_TYPE_ANY && + eq->mleq_type != MLXCX_EQ_TYPE_RX) { + /* Try the next one */ + eq = NULL; + } + } + + if (!mlxcx_cq_setup(mlxp, eq, &cq)) { + g->mlg_nwqs = i; + break; + } + cq->mlcq_stats = &g->mlg_port->mlp_stats; + + rq = &g->mlg_wqs[i]; + if (!mlxcx_rq_setup(mlxp, cq, rq)) { + g->mlg_nwqs = i; + break; + } + g->mlg_rqt->mlrqt_rq[g->mlg_rqt->mlrqt_used++] = rq; + g->mlg_rqt->mlrqt_state |= MLXCX_RQT_DIRTY; + rq->mlwq_group = g; + } + if (g->mlg_nwqs == 0) { + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + if (!mlxcx_cmd_create_rqt(mlxp, g->mlg_rqt)) { + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) { + mlxcx_tir_t *tir = &g->mlg_tir[i]; + tir->mltir_tdom = &mlxp->mlx_tdom; + switch (i) { + case MLXCX_TIR_ROLE_OTHER: + tir->mltir_type = MLXCX_TIR_DIRECT; + tir->mltir_rq = &g->mlg_wqs[0]; + break; + case MLXCX_TIR_ROLE_IPv4: + case MLXCX_TIR_ROLE_IPv6: + case MLXCX_TIR_ROLE_TCPv4: + case MLXCX_TIR_ROLE_TCPv6: + case MLXCX_TIR_ROLE_UDPv4: + case MLXCX_TIR_ROLE_UDPv6: + tir->mltir_type = MLXCX_TIR_INDIRECT; + tir->mltir_rqtable = g->mlg_rqt; + tir->mltir_hash_fn = MLXCX_TIR_HASH_TOEPLITZ; + (void) random_get_pseudo_bytes(tir->mltir_toeplitz_key, + sizeof (tir->mltir_toeplitz_key)); + break; + } + switch (i) { + case MLXCX_TIR_ROLE_OTHER: + break; + case MLXCX_TIR_ROLE_IPv4: + case MLXCX_TIR_ROLE_TCPv4: + case MLXCX_TIR_ROLE_UDPv4: + tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv4; + tir->mltir_hash_fields = + MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP; + break; + case MLXCX_TIR_ROLE_IPv6: + case MLXCX_TIR_ROLE_TCPv6: + case MLXCX_TIR_ROLE_UDPv6: + tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv6; + tir->mltir_hash_fields = + MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP; + break; + } + switch (i) { + case MLXCX_TIR_ROLE_OTHER: + case MLXCX_TIR_ROLE_IPv4: + case MLXCX_TIR_ROLE_IPv6: + break; + case MLXCX_TIR_ROLE_TCPv4: + case MLXCX_TIR_ROLE_TCPv6: + tir->mltir_l4_type = MLXCX_RX_HASH_L4_TCP; + tir->mltir_hash_fields |= + MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT; + break; + case MLXCX_TIR_ROLE_UDPv4: + case MLXCX_TIR_ROLE_UDPv6: + tir->mltir_l4_type = MLXCX_RX_HASH_L4_UDP; + tir->mltir_hash_fields |= + MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT; + break; + } + + if (!mlxcx_cmd_create_tir(mlxp, tir)) { + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + g->mlg_state |= MLXCX_GROUP_TIRTIS; + } + + /* + * Flow table: our RX hashing breakout table for RSS + */ + + g->mlg_rx_hash_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t), + KM_SLEEP)); + mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + avl_create(&g->mlg_rx_macs, mlxcx_grmac_compare, + sizeof (mlxcx_group_mac_t), + offsetof(mlxcx_group_mac_t, mlgm_group_entry)); + g->mlg_state |= MLXCX_GROUP_FLOWS; + + mutex_enter(&ft->mlft_mtx); + + ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX; + ft->mlft_level = 2; + ft->mlft_port = g->mlg_port; + ft->mlft_entshift = MLXCX_RX_HASH_FT_SIZE_SHIFT; + ft->mlft_nents = (1 << ft->mlft_entshift); + ASSERT3U(ft->mlft_nents, >=, MLXCX_TIRS_PER_GROUP); + ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t); + ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP); + list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t), + offsetof(mlxcx_flow_group_t, mlfg_entry)); + + for (j = 0; j < ft->mlft_nents; ++j) { + ft->mlft_ent[j].mlfe_table = ft; + ft->mlft_ent[j].mlfe_index = j; + } + + if (!mlxcx_cmd_create_flow_table(mlxp, ft)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 6; + fe->mlfe_ip_proto = IPPROTO_UDP; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_UDPv6]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 4; + fe->mlfe_ip_proto = IPPROTO_UDP; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_UDPv4]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 6; + fe->mlfe_ip_proto = IPPROTO_TCP; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_TCPv6]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 4; + fe->mlfe_ip_proto = IPPROTO_TCP; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_TCPv4]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 6; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_IPv6]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ip_version = 4; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_IPv4]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = + &g->mlg_tir[MLXCX_TIR_ROLE_OTHER]; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + mutex_exit(&ft->mlft_mtx); + + /* + * Flow table: the VLAN breakout table for doing VLAN filtering after + * we've matched a MAC address. + */ + + g->mlg_rx_vlan_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t), + KM_SLEEP)); + mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + list_create(&g->mlg_rx_vlans, sizeof (mlxcx_group_vlan_t), + offsetof(mlxcx_group_vlan_t, mlgv_entry)); + + mutex_enter(&ft->mlft_mtx); + + ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX; + ft->mlft_level = 1; + ft->mlft_port = g->mlg_port; + ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_vlan_size_shift; + ft->mlft_nents = (1 << ft->mlft_entshift); + ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t); + ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP); + list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t), + offsetof(mlxcx_flow_group_t, mlfg_entry)); + + for (j = 0; j < ft->mlft_nents; ++j) { + fe = &ft->mlft_ent[j]; + fe->mlfe_table = ft; + fe->mlfe_index = j; + fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; + } + + if (!mlxcx_cmd_create_flow_table(mlxp, ft)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + /* First group is all actual matched VLANs */ + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + g->mlg_rx_vlan_fg = fg; + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = ft->mlft_nents - 2; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_VLAN; + fg->mlfg_mask |= MLXCX_FLOW_MATCH_VID; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + /* + * Then the "default" entry which we enable when we have no VLAN IDs + * added to the group (we start with this enabled). + */ + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + g->mlg_rx_vlan_def_fg = fg; + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + /* + * Finally, the promisc entry which points at the *hash ft* from the + * default group. We only enable this when we have promisc on. + */ + fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); + g->mlg_rx_vlan_promisc_fg = fg; + list_insert_tail(&ft->mlft_groups, fg); + fg->mlfg_table = ft; + fg->mlfg_size = 1; + if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + fe = list_head(&fg->mlfg_entries); + fe->mlfe_ndest = 1; + fe->mlfe_dest[0].mlfed_flow = mlxp->mlx_rx_groups[0].mlg_rx_hash_ft; + + mutex_exit(&ft->mlft_mtx); + + mutex_exit(&g->mlg_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, + mlxcx_work_queue_t *rq) +{ + uint_t j; + mlxcx_buffer_t *b; + mlxcx_completion_queue_t *cq; + + mutex_enter(&g->mlg_mtx); + /* + * Sadly, even though MAC has the mgi_start callback, it is not always + * called -- in particular when we are being managed under an aggr, the + * mgi_start callback will only ever be called on the default group. + * + * So instead of asserting about the group state here, we have to + * check it and call group start if needed. + */ + if (!(g->mlg_state & MLXCX_GROUP_RUNNING)) { + mutex_exit(&g->mlg_mtx); + if (!mlxcx_rx_group_start(mlxp, g)) + return (B_FALSE); + mutex_enter(&g->mlg_mtx); + } + ASSERT(g->mlg_state & MLXCX_GROUP_RUNNING); + + cq = rq->mlwq_cq; + ASSERT(cq != NULL); + + mutex_enter(&cq->mlcq_mtx); + mutex_enter(&rq->mlwq_mtx); + + if (rq->mlwq_state & MLXCX_WQ_STARTED) { + mutex_exit(&rq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + return (B_TRUE); + } + + if (!mlxcx_cmd_start_rq(mlxp, rq)) { + mutex_exit(&rq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + ASSERT(rq->mlwq_state & MLXCX_WQ_STARTED); + + ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS); + rq->mlwq_state |= MLXCX_WQ_BUFFERS; + + for (j = 0; j < rq->mlwq_nents; ++j) { + if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) + break; + mlxcx_buf_return(mlxp, b); + } + for (j = 0; j < rq->mlwq_nents / 2; ++j) { + if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) + break; + mlxcx_buf_return(mlxp, b); + } + + mlxcx_rq_refill(mlxp, rq); + + mutex_exit(&rq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_rx_group_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_flow_table_t *ft; + mlxcx_flow_group_t *fg; + mlxcx_flow_entry_t *fe; + + mutex_enter(&g->mlg_mtx); + + if (g->mlg_state & MLXCX_GROUP_RUNNING) { + mutex_exit(&g->mlg_mtx); + return (B_TRUE); + } + + ASSERT0(g->mlg_state & MLXCX_GROUP_RUNNING); + + g->mlg_state |= MLXCX_GROUP_RUNNING; + + if (g == &mlxp->mlx_rx_groups[0]) { + ft = g->mlg_port->mlp_rx_flow; + mutex_enter(&ft->mlft_mtx); + + /* + * Broadcast and promisc entries go directly to group 0's + * RSS hash fanout flow table. They bypass VLAN filtering. + */ + fg = g->mlg_port->mlp_bcast; + fe = list_head(&fg->mlfg_entries); + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; + if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { + mutex_exit(&ft->mlft_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + fg = g->mlg_port->mlp_promisc; + fe = list_head(&fg->mlfg_entries); + fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; + /* + * Don't actually set the promisc entry until promisc is + * enabled. + */ + + mutex_exit(&ft->mlft_mtx); + } + + mutex_exit(&g->mlg_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_tx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g) +{ + mlxcx_event_queue_t *eq; + mlxcx_completion_queue_t *cq; + mlxcx_work_queue_t *sq; + uint_t i; + + ASSERT3S(g->mlg_state, ==, 0); + + mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_intr_pri)); + g->mlg_state |= MLXCX_GROUP_INIT; + mutex_enter(&g->mlg_mtx); + + g->mlg_mlx = mlxp; + g->mlg_type = MLXCX_GROUP_TX; + g->mlg_port = &mlxp->mlx_ports[0]; + + g->mlg_nwqs = mlxp->mlx_props.mldp_tx_nrings_per_group; + g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t); + g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP); + g->mlg_state |= MLXCX_GROUP_WQS; + + g->mlg_tis.mltis_tdom = &mlxp->mlx_tdom; + + if (!mlxcx_cmd_create_tis(mlxp, &g->mlg_tis)) { + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + + g->mlg_state |= MLXCX_GROUP_TIRTIS; + + for (i = 0; i < g->mlg_nwqs; ++i) { + eq = NULL; + while (eq == NULL) { + eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++]; + if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count) + mlxp->mlx_next_eq = 1; + if (eq->mleq_type != MLXCX_EQ_TYPE_ANY && + eq->mleq_type != MLXCX_EQ_TYPE_TX) { + /* Try the next one */ + eq = NULL; + } + } + + if (!mlxcx_cq_setup(mlxp, eq, &cq)) + return (B_FALSE); + cq->mlcq_stats = &g->mlg_port->mlp_stats; + + sq = &g->mlg_wqs[i]; + if (!mlxcx_sq_setup(mlxp, g->mlg_port, cq, &g->mlg_tis, sq)) { + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + sq->mlwq_group = g; + } + + mutex_exit(&g->mlg_mtx); + + return (B_TRUE); +} + +boolean_t +mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, + mlxcx_work_queue_t *sq) +{ + uint_t i; + mlxcx_buffer_t *b; + mlxcx_completion_queue_t *cq; + + mutex_enter(&g->mlg_mtx); + + cq = sq->mlwq_cq; + ASSERT(cq != NULL); + + mutex_enter(&cq->mlcq_mtx); + mutex_enter(&sq->mlwq_mtx); + if (sq->mlwq_state & MLXCX_WQ_STARTED) { + mutex_exit(&sq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + return (B_TRUE); + } + + ASSERT0(sq->mlwq_state & MLXCX_WQ_BUFFERS); + for (i = 0; i < sq->mlwq_nents; ++i) { + if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b)) + break; + mlxcx_buf_return(mlxp, b); + } + for (i = 0; i < sq->mlwq_nents / 2; ++i) { + if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b)) + break; + mlxcx_buf_return(mlxp, b); + } + for (i = 0; i < sq->mlwq_nents; ++i) { + if (!mlxcx_buf_create(mlxp, sq->mlwq_bufs, &b)) + break; + mlxcx_buf_return(mlxp, b); + } + sq->mlwq_state |= MLXCX_WQ_BUFFERS; + + if (!mlxcx_cmd_start_sq(mlxp, sq)) { + mutex_exit(&sq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + return (B_FALSE); + } + g->mlg_state |= MLXCX_GROUP_RUNNING; + + (void) mlxcx_sq_add_nop(mlxp, sq); + + mutex_exit(&sq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + mutex_exit(&g->mlg_mtx); + + return (B_TRUE); +} + +static boolean_t +mlxcx_sq_ring_dbell(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, uint_t first) +{ + uint_t idx; + mlxcx_bf_t *bf; + ddi_fm_error_t err; + uint_t try = 0; + + ASSERT3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ); + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + + mlwq->mlwq_doorbell->mlwqd_send_counter = to_be16(mlwq->mlwq_pc); + + ASSERT(mlwq->mlwq_cq != NULL); + ASSERT(mlwq->mlwq_cq->mlcq_eq != NULL); + idx = mlwq->mlwq_cq->mlcq_eq->mleq_intr_index & MLXCX_BF_PER_UAR_MASK; + bf = &mlwq->mlwq_uar->mlu_bf[idx]; + +retry: + MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + if (try++ < mlxcx_doorbell_tries) { + ddi_fm_dma_err_clear( + mlwq->mlwq_doorbell_dma.mxdb_dma_handle, + DDI_FME_VERSION); + goto retry; + } else { + goto err; + } + } + + mlxcx_put64(mlxp, bf->mbf_even, from_be64( + mlwq->mlwq_bf_ent[first].mlsqbf_qwords[0])); + ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err, + DDI_FME_VERSION); + if (err.fme_status == DDI_FM_OK) + return (B_TRUE); + if (try++ < mlxcx_doorbell_tries) { + ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION); + goto retry; + } + +err: + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); + return (B_FALSE); +} + +boolean_t +mlxcx_sq_add_nop(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + uint_t index, start_pc; + mlxcx_sendq_ent_t *ent0; + ddi_fm_error_t err; + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + + index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); + ent0 = &mlwq->mlwq_send_ent[index]; + start_pc = mlwq->mlwq_pc; + ++mlwq->mlwq_pc; + + bzero(ent0, sizeof (mlxcx_sendq_ent_t)); + ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_NOP; + ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num); + ent0->mlsqe_control.mlcs_wqe_index = to_be16(start_pc); + + set_bits8(&ent0->mlsqe_control.mlcs_flags, + MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_NONE); + set_bits8(&ent0->mlsqe_control.mlcs_flags, + MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS); + + ent0->mlsqe_control.mlcs_ds = 1; + + VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, + (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, + sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV)); + ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + return (B_FALSE); + } + if (!mlxcx_sq_ring_dbell(mlxp, mlwq, index)) { + return (B_FALSE); + } + return (B_TRUE); +} + +boolean_t +mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, + uint8_t *inlinehdrs, size_t inlinelen, uint32_t chkflags, + mlxcx_buffer_t *b0) +{ + uint_t index, first, ents = 0; + mlxcx_completion_queue_t *cq; + mlxcx_sendq_ent_t *ent0; + mlxcx_sendq_extra_ent_t *ent; + mlxcx_wqe_data_seg_t *seg; + uint_t ptri, nptr; + const ddi_dma_cookie_t *c; + size_t rem; + mlxcx_buffer_t *b; + ddi_fm_error_t err; + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + ASSERT3P(b0->mlb_tx_head, ==, b0); + ASSERT3U(b0->mlb_state, ==, MLXCX_BUFFER_ON_WQ); + cq = mlwq->mlwq_cq; + + index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); + ent0 = &mlwq->mlwq_send_ent[index]; + b0->mlb_wqe_index = mlwq->mlwq_pc; + ++mlwq->mlwq_pc; + ++ents; + + first = index; + + mutex_enter(&cq->mlcq_bufbmtx); + list_insert_tail(&cq->mlcq_buffers_b, b0); + atomic_inc_64(&cq->mlcq_bufcnt); + mutex_exit(&cq->mlcq_bufbmtx); + + bzero(ent0, sizeof (mlxcx_sendq_ent_t)); + ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_SEND; + ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num); + ent0->mlsqe_control.mlcs_wqe_index = to_be16(b0->mlb_wqe_index); + + set_bits8(&ent0->mlsqe_control.mlcs_flags, + MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_WAIT_OTHERS); + set_bits8(&ent0->mlsqe_control.mlcs_flags, + MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS); + + VERIFY3U(inlinelen, <=, sizeof (ent0->mlsqe_eth.mles_inline_headers)); + set_bits16(&ent0->mlsqe_eth.mles_szflags, + MLXCX_SQE_ETH_INLINE_HDR_SZ, inlinelen); + if (inlinelen > 0) { + bcopy(inlinehdrs, ent0->mlsqe_eth.mles_inline_headers, + inlinelen); + } + + ent0->mlsqe_control.mlcs_ds = + offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16; + + if (chkflags & HCK_IPV4_HDRCKSUM) { + ASSERT(mlxp->mlx_caps->mlc_checksum); + set_bit8(&ent0->mlsqe_eth.mles_csflags, + MLXCX_SQE_ETH_CSFLAG_L3_CHECKSUM); + } + if (chkflags & HCK_FULLCKSUM) { + ASSERT(mlxp->mlx_caps->mlc_checksum); + set_bit8(&ent0->mlsqe_eth.mles_csflags, + MLXCX_SQE_ETH_CSFLAG_L4_CHECKSUM); + } + + b = b0; + ptri = 0; + nptr = sizeof (ent0->mlsqe_data) / sizeof (mlxcx_wqe_data_seg_t); + seg = ent0->mlsqe_data; + while (b != NULL) { + rem = b->mlb_used; + + c = NULL; + while (rem > 0 && + (c = mlxcx_dma_cookie_iter(&b->mlb_dma, c)) != NULL) { + if (ptri >= nptr) { + index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); + ent = &mlwq->mlwq_send_extra_ent[index]; + ++mlwq->mlwq_pc; + ++ents; + + seg = ent->mlsqe_data; + ptri = 0; + nptr = sizeof (ent->mlsqe_data) / + sizeof (mlxcx_wqe_data_seg_t); + } + + seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey); + if (c->dmac_size > rem) { + seg->mlds_byte_count = to_be32(rem); + rem = 0; + } else { + seg->mlds_byte_count = to_be32(c->dmac_size); + rem -= c->dmac_size; + } + seg->mlds_address = to_be64(c->dmac_laddress); + ++seg; + ++ptri; + ++ent0->mlsqe_control.mlcs_ds; + + ASSERT3U(ent0->mlsqe_control.mlcs_ds, <=, + MLXCX_SQE_MAX_DS); + } + + if (b == b0) { + b = list_head(&b0->mlb_tx_chain); + } else { + b = list_next(&b0->mlb_tx_chain, b); + } + } + + for (; ptri < nptr; ++ptri, ++seg) { + seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY); + seg->mlds_byte_count = to_be32(0); + seg->mlds_address = to_be64(0); + } + + /* + * Make sure the workqueue entry is flushed out before updating + * the doorbell. + */ + VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, + (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, + ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV)); + ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + return (B_FALSE); + } + if (!mlxcx_sq_ring_dbell(mlxp, mlwq, first)) { + return (B_FALSE); + } + return (B_TRUE); +} + +boolean_t +mlxcx_rq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, + mlxcx_buffer_t *buf) +{ + return (mlxcx_rq_add_buffers(mlxp, mlwq, &buf, 1)); +} + +boolean_t +mlxcx_rq_add_buffers(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, + mlxcx_buffer_t **bufs, size_t nbufs) +{ + uint_t index; + mlxcx_recvq_ent_t *ent; + mlxcx_completion_queue_t *cq; + mlxcx_wqe_data_seg_t *seg; + uint_t bi, ptri; + const ddi_dma_cookie_t *c; + mlxcx_buffer_t *buf; + ddi_fm_error_t err; + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + cq = mlwq->mlwq_cq; + ASSERT(mutex_owned(&cq->mlcq_mtx)); + + for (bi = 0; bi < nbufs; ++bi) { + buf = bufs[bi]; + bufs[bi] = NULL; + ASSERT3U(buf->mlb_state, ==, MLXCX_BUFFER_ON_WQ); + + index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); + ent = &mlwq->mlwq_recv_ent[index]; + buf->mlb_wqe_index = mlwq->mlwq_pc; + + ++mlwq->mlwq_pc; + + mutex_enter(&cq->mlcq_bufbmtx); + list_insert_tail(&cq->mlcq_buffers, buf); + atomic_inc_64(&cq->mlcq_bufcnt); + mutex_exit(&cq->mlcq_bufbmtx); + + ASSERT3U(buf->mlb_dma.mxdb_ncookies, <=, MLXCX_RECVQ_MAX_PTRS); + ptri = 0; + c = NULL; + while ((c = mlxcx_dma_cookie_iter(&buf->mlb_dma, c)) != NULL) { + seg = &ent->mlrqe_data[ptri++]; + seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey); + seg->mlds_byte_count = to_be32(c->dmac_size); + seg->mlds_address = to_be64(c->dmac_laddress); + } + /* + * Fill any unused scatter pointers with the special null + * value. + */ + for (; ptri < MLXCX_RECVQ_MAX_PTRS; ++ptri) { + seg = &ent->mlrqe_data[ptri]; + seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY); + seg->mlds_byte_count = to_be32(0); + seg->mlds_address = to_be64(0); + } + + /* + * Make sure the workqueue entry is flushed out before updating + * the doorbell. + */ + VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, + (uintptr_t)ent - (uintptr_t)mlwq->mlwq_recv_ent, + sizeof (mlxcx_recvq_ent_t), DDI_DMA_SYNC_FORDEV)); + ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + return (B_FALSE); + } + } + + mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(mlwq->mlwq_pc); + /* + * Flush the CQ doorbell as well so that HW knows how many + * completions we've consumed. + */ + MLXCX_DMA_SYNC(cq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(cq->mlcq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + return (B_FALSE); + } + MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + return (B_FALSE); + } + return (B_TRUE); +} + +void +mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) +{ + size_t target, current, want, done, n; + mlxcx_completion_queue_t *cq; + mlxcx_buffer_t *b[MLXCX_RQ_REFILL_STEP]; + uint_t i; + + ASSERT(mutex_owned(&mlwq->mlwq_mtx)); + cq = mlwq->mlwq_cq; + ASSERT(mutex_owned(&cq->mlcq_mtx)); + + ASSERT(mlwq->mlwq_state & MLXCX_WQ_BUFFERS); + + target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP; + cq = mlwq->mlwq_cq; + + if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) + return; + + current = cq->mlcq_bufcnt; + + if (current >= target - MLXCX_RQ_REFILL_STEP) + return; + + want = target - current; + done = 0; + + while (!(mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) && done < want) { + n = mlxcx_buf_take_n(mlxp, mlwq, b, MLXCX_RQ_REFILL_STEP); + if (n == 0) { + mlxcx_warn(mlxp, "!exiting rq refill early, done %u " + "but wanted %u", done, want); + return; + } + if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) { + for (i = 0; i < n; ++i) + mlxcx_buf_return(mlxp, b[i]); + return; + } + if (!mlxcx_rq_add_buffers(mlxp, mlwq, b, n)) { + /* + * mlxcx_rq_add_buffers NULLs out the buffers as it + * enqueues them, so any that are non-NULL we have to + * free now. The others now belong to the WQ, even if + * we failed. + */ + for (i = 0; i < n; ++i) { + if (b[i] != NULL) { + mlxcx_buf_return(mlxp, b[i]); + } + } + return; + } + done += n; + } +} + +static const char * +mlxcx_cq_err_syndrome_string(mlxcx_cq_error_syndrome_t sy) +{ + switch (sy) { + case MLXCX_CQ_ERR_LOCAL_LENGTH: + return ("LOCAL_LENGTH"); + case MLXCX_CQ_ERR_LOCAL_QP_OP: + return ("LOCAL_QP_OP"); + case MLXCX_CQ_ERR_LOCAL_PROTECTION: + return ("LOCAL_PROTECTION"); + case MLXCX_CQ_ERR_WR_FLUSHED: + return ("WR_FLUSHED"); + case MLXCX_CQ_ERR_MEM_WINDOW_BIND: + return ("MEM_WINDOW_BIND"); + case MLXCX_CQ_ERR_BAD_RESPONSE: + return ("BAD_RESPONSE"); + case MLXCX_CQ_ERR_LOCAL_ACCESS: + return ("LOCAL_ACCESS"); + case MLXCX_CQ_ERR_XPORT_RETRY_CTR: + return ("XPORT_RETRY_CTR"); + case MLXCX_CQ_ERR_RNR_RETRY_CTR: + return ("RNR_RETRY_CTR"); + case MLXCX_CQ_ERR_ABORTED: + return ("ABORTED"); + default: + return ("UNKNOWN"); + } +} + +static void +mlxcx_fm_cqe_ereport(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, + mlxcx_completionq_error_ent_t *ent) +{ + uint64_t ena; + char buf[FM_MAX_CLASS]; + const char *name = mlxcx_cq_err_syndrome_string(ent->mlcqee_syndrome); + + if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps)) + return; + + (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", + MLXCX_FM_SERVICE_MLXCX, "cqe.err"); + ena = fm_ena_generate(0, FM_ENA_FMT1); + + ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP, + FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, + "syndrome", DATA_TYPE_STRING, name, + "syndrome_num", DATA_TYPE_UINT8, ent->mlcqee_syndrome, + "vendor_syndrome", DATA_TYPE_UINT8, + ent->mlcqee_vendor_error_syndrome, + "wqe_counter", DATA_TYPE_UINT16, from_be16(ent->mlcqee_wqe_counter), + "wq_type", DATA_TYPE_STRING, + (mlcq->mlcq_wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ) ? "send": "recv", + "cq_num", DATA_TYPE_UINT32, mlcq->mlcq_num, + "wq_num", DATA_TYPE_UINT32, mlcq->mlcq_wq->mlwq_num, + NULL); + ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED); +} + +void +mlxcx_tx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, + mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf) +{ + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + if (ent->mlcqe_opcode == MLXCX_CQE_OP_REQ_ERR) { + mlxcx_completionq_error_ent_t *eent = + (mlxcx_completionq_error_ent_t *)ent; + mlxcx_fm_cqe_ereport(mlxp, mlcq, eent); + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + mutex_enter(&mlcq->mlcq_wq->mlwq_mtx); + mlxcx_check_sq(mlxp, mlcq->mlcq_wq); + mutex_exit(&mlcq->mlcq_wq->mlwq_mtx); + return; + } + + if (ent->mlcqe_opcode != MLXCX_CQE_OP_REQ) { + mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode); + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + return; + } + + if (ent->mlcqe_send_wqe_opcode != MLXCX_WQE_OP_SEND) { + mlxcx_warn(mlxp, "!got weird cq wqe opcode: %x", + ent->mlcqe_send_wqe_opcode); + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + return; + } + + if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) { + mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format); + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); + return; + } + + mlxcx_buf_return_chain(mlxp, buf, B_FALSE); +} + +mblk_t * +mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, + mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf) +{ + uint32_t chkflags = 0; + ddi_fm_error_t err; + + ASSERT(mutex_owned(&mlcq->mlcq_mtx)); + + if (ent->mlcqe_opcode == MLXCX_CQE_OP_RESP_ERR) { + mlxcx_completionq_error_ent_t *eent = + (mlxcx_completionq_error_ent_t *)ent; + mlxcx_fm_cqe_ereport(mlxp, mlcq, eent); + mlxcx_buf_return(mlxp, buf); + mutex_enter(&mlcq->mlcq_wq->mlwq_mtx); + mlxcx_check_rq(mlxp, mlcq->mlcq_wq); + mutex_exit(&mlcq->mlcq_wq->mlwq_mtx); + return (NULL); + } + + if (ent->mlcqe_opcode != MLXCX_CQE_OP_RESP) { + mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode); + mlxcx_buf_return(mlxp, buf); + return (NULL); + } + + if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) { + mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format); + mlxcx_buf_return(mlxp, buf); + return (NULL); + } + + if (ent->mlcqe_rx_drop_counter > 0) { + atomic_add_64(&mlcq->mlcq_stats->mlps_rx_drops, + ent->mlcqe_rx_drop_counter); + } + + MLXCX_DMA_SYNC(buf->mlb_dma, DDI_DMA_SYNC_FORCPU); + ddi_fm_dma_err_get(buf->mlb_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + ddi_fm_dma_err_clear(buf->mlb_dma.mxdb_dma_handle, + DDI_FME_VERSION); + mlxcx_buf_return(mlxp, buf); + return (NULL); + } + + if (!mlxcx_buf_loan(mlxp, buf)) { + mlxcx_warn(mlxp, "!loan failed, dropping packet"); + mlxcx_buf_return(mlxp, buf); + return (NULL); + } + + buf->mlb_mp->b_next = NULL; + buf->mlb_mp->b_cont = NULL; + buf->mlb_mp->b_wptr = buf->mlb_mp->b_rptr + + from_be32(ent->mlcqe_byte_cnt); + + if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L4_OK)) { + chkflags |= HCK_FULLCKSUM_OK; + } + if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L3_OK)) { + chkflags |= HCK_IPV4_HDRCKSUM_OK; + } + if (chkflags != 0) { + mac_hcksum_set(buf->mlb_mp, 0, 0, 0, + from_be16(ent->mlcqe_checksum), chkflags); + } + + /* + * Don't check if a refill is needed on every single completion, + * since checking involves taking the RQ lock. + */ + if ((buf->mlb_wqe_index & 0x7) == 0) { + mlxcx_work_queue_t *wq = mlcq->mlcq_wq; + ASSERT(wq != NULL); + mutex_enter(&wq->mlwq_mtx); + if (!(wq->mlwq_state & MLXCX_WQ_TEARDOWN)) + mlxcx_rq_refill(mlxp, wq); + mutex_exit(&wq->mlwq_mtx); + } + + return (buf->mlb_mp); +} + +static void +mlxcx_buf_mp_return(caddr_t arg) +{ + mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg; + mlxcx_t *mlxp = b->mlb_mlx; + + if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) { + b->mlb_mp = NULL; + return; + } + /* + * The mblk for this buffer_t (in its mlb_mp field) has been used now, + * so NULL it out. + */ + b->mlb_mp = NULL; + mlxcx_buf_return(mlxp, b); +} + +boolean_t +mlxcx_buf_create(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard, mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b; + ddi_device_acc_attr_t acc; + ddi_dma_attr_t attr; + boolean_t ret; + + b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP); + b->mlb_shard = shard; + b->mlb_foreign = B_FALSE; + + mlxcx_dma_acc_attr(mlxp, &acc); + mlxcx_dma_buf_attr(mlxp, &attr); + + ret = mlxcx_dma_alloc_offset(mlxp, &b->mlb_dma, &attr, &acc, + B_FALSE, mlxp->mlx_ports[0].mlp_mtu, 2, B_TRUE); + if (!ret) { + kmem_cache_free(mlxp->mlx_bufs_cache, b); + return (B_FALSE); + } + + b->mlb_frtn.free_func = mlxcx_buf_mp_return; + b->mlb_frtn.free_arg = (caddr_t)b; + b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va, + b->mlb_dma.mxdb_len, 0, &b->mlb_frtn); + + *bp = b; + + return (B_TRUE); +} + +boolean_t +mlxcx_buf_create_foreign(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard, + mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b; + ddi_dma_attr_t attr; + boolean_t ret; + + b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP); + b->mlb_shard = shard; + b->mlb_foreign = B_TRUE; + + mlxcx_dma_buf_attr(mlxp, &attr); + + ret = mlxcx_dma_init(mlxp, &b->mlb_dma, &attr, B_TRUE); + if (!ret) { + kmem_cache_free(mlxp->mlx_bufs_cache, b); + return (B_FALSE); + } + + *bp = b; + + return (B_TRUE); +} + +static void +mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, + mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b; + mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs; + + mutex_enter(&s->mlbs_mtx); + while (list_is_empty(&s->mlbs_free)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + b = list_remove_head(&s->mlbs_free); + ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); + ASSERT(b->mlb_foreign); + b->mlb_state = MLXCX_BUFFER_ON_WQ; + list_insert_tail(&s->mlbs_busy, b); + mutex_exit(&s->mlbs_mtx); + + *bp = b; +} + +boolean_t +mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, + mblk_t *mpb, size_t off, mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b, *b0 = NULL; + boolean_t first = B_TRUE; + ddi_fm_error_t err; + mblk_t *mp; + uint8_t *rptr; + size_t sz; + size_t ncookies = 0; + boolean_t ret; + uint_t attempts = 0; + + for (mp = mpb; mp != NULL; mp = mp->b_cont) { + rptr = mp->b_rptr; + sz = MBLKL(mp); + + if (off > 0) + ASSERT3U(off, <, sz); + rptr += off; + sz -= off; + + if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) + goto copyb; + + mlxcx_buf_take_foreign(mlxp, wq, &b); + ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, B_FALSE); + + if (!ret) { + mlxcx_buf_return(mlxp, b); + +copyb: + mlxcx_buf_take(mlxp, wq, &b); + ASSERT3U(b->mlb_dma.mxdb_len, >=, sz); + bcopy(rptr, b->mlb_dma.mxdb_va, sz); + MLXCX_DMA_SYNC(b->mlb_dma, DDI_DMA_SYNC_FORDEV); + ddi_fm_dma_err_get(b->mlb_dma.mxdb_dma_handle, &err, + DDI_FME_VERSION); + if (err.fme_status != DDI_FM_OK) { + ddi_fm_dma_err_clear(b->mlb_dma.mxdb_dma_handle, + DDI_FME_VERSION); + mlxcx_buf_return(mlxp, b); + if (++attempts > MLXCX_BUF_BIND_MAX_ATTEMTPS) { + *bp = NULL; + return (B_FALSE); + } + goto copyb; + } + } + + /* + * We might overestimate here when we've copied data, since + * the buffer might be longer than what we copied into it. This + * is safe since it's always wrong in the conservative + * direction (and we will blow up later when we actually + * generate the WQE anyway). + * + * If the assert below ever blows, we'll have to come and fix + * this up so we can transmit these packets. + */ + ncookies += b->mlb_dma.mxdb_ncookies; + + if (first) + b0 = b; + + if (!first) + b->mlb_state = MLXCX_BUFFER_ON_CHAIN; + + b->mlb_tx_mp = mp; + b->mlb_tx_head = b0; + b->mlb_used = sz; + + if (!first) + list_insert_tail(&b0->mlb_tx_chain, b); + first = B_FALSE; + off = 0; + } + + ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS); + + *bp = b0; + return (B_TRUE); +} + +void +mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b; + mlxcx_buf_shard_t *s = wq->mlwq_bufs; + + mutex_enter(&s->mlbs_mtx); + while (list_is_empty(&s->mlbs_free)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + b = list_remove_head(&s->mlbs_free); + ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); + b->mlb_state = MLXCX_BUFFER_ON_WQ; + list_insert_tail(&s->mlbs_busy, b); + mutex_exit(&s->mlbs_mtx); + + *bp = b; +} + +#define MLXCX_BUF_TAKE_N_TIMEOUT_USEC 5000 +#define MLXCX_BUF_TAKE_N_MAX_RETRIES 3 + +size_t +mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, + mlxcx_buffer_t **bp, size_t nbufs) +{ + mlxcx_buffer_t *b; + size_t done = 0, empty = 0; + clock_t wtime = drv_usectohz(MLXCX_BUF_TAKE_N_TIMEOUT_USEC); + mlxcx_buf_shard_t *s; + + s = wq->mlwq_bufs; + + mutex_enter(&s->mlbs_mtx); + while (done < nbufs) { + while (list_is_empty(&s->mlbs_free)) { + (void) cv_reltimedwait(&s->mlbs_free_nonempty, + &s->mlbs_mtx, wtime, TR_MILLISEC); + if (list_is_empty(&s->mlbs_free) && + empty++ >= MLXCX_BUF_TAKE_N_MAX_RETRIES) { + mutex_exit(&s->mlbs_mtx); + return (done); + } + } + b = list_remove_head(&s->mlbs_free); + ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); + b->mlb_state = MLXCX_BUFFER_ON_WQ; + list_insert_tail(&s->mlbs_busy, b); + bp[done++] = b; + } + mutex_exit(&s->mlbs_mtx); + return (done); +} + +boolean_t +mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) +{ + VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ); + ASSERT3P(b->mlb_mlx, ==, mlxp); + + if (b->mlb_mp == NULL) { + b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va, + b->mlb_dma.mxdb_len, 0, &b->mlb_frtn); + if (b->mlb_mp == NULL) + return (B_FALSE); + } + + b->mlb_state = MLXCX_BUFFER_ON_LOAN; + b->mlb_wqe_index = 0; + return (B_TRUE); +} + +void +mlxcx_buf_return_chain(mlxcx_t *mlxp, mlxcx_buffer_t *b0, boolean_t keepmp) +{ + mlxcx_buffer_t *b; + + if (b0->mlb_tx_head != b0) { + mlxcx_buf_return(mlxp, b0); + return; + } + + while ((b = list_head(&b0->mlb_tx_chain)) != NULL) { + mlxcx_buf_return(mlxp, b); + } + if (keepmp) { + b0->mlb_tx_mp = NULL; + b0->mlb_tx_head = NULL; + } + mlxcx_buf_return(mlxp, b0); +} + +void +mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) +{ + mlxcx_buffer_state_t oldstate = b->mlb_state; + mlxcx_buffer_t *txhead = b->mlb_tx_head; + mlxcx_buf_shard_t *s = b->mlb_shard; + mblk_t *mp = b->mlb_tx_mp; + + VERIFY3U(oldstate, !=, MLXCX_BUFFER_FREE); + ASSERT3P(b->mlb_mlx, ==, mlxp); + b->mlb_state = MLXCX_BUFFER_FREE; + b->mlb_wqe_index = 0; + b->mlb_tx_head = NULL; + b->mlb_tx_mp = NULL; + b->mlb_used = 0; + ASSERT(list_is_empty(&b->mlb_tx_chain)); + + mutex_enter(&s->mlbs_mtx); + switch (oldstate) { + case MLXCX_BUFFER_INIT: + break; + case MLXCX_BUFFER_ON_WQ: + list_remove(&s->mlbs_busy, b); + break; + case MLXCX_BUFFER_ON_LOAN: + ASSERT(!b->mlb_foreign); + list_remove(&s->mlbs_busy, b); + break; + case MLXCX_BUFFER_FREE: + VERIFY(0); + break; + case MLXCX_BUFFER_ON_CHAIN: + ASSERT(txhead != NULL); + list_remove(&txhead->mlb_tx_chain, b); + list_remove(&s->mlbs_busy, b); + break; + } + + if (b->mlb_foreign) { + if (b->mlb_dma.mxdb_flags & MLXCX_DMABUF_BOUND) { + mlxcx_dma_unbind(mlxp, &b->mlb_dma); + } + } + + list_insert_tail(&s->mlbs_free, b); + cv_signal(&s->mlbs_free_nonempty); + + mutex_exit(&s->mlbs_mtx); + + /* + * For TX chain heads, free the mblk_t after we let go of the lock. + * This might be a borrowed buf that we in turn loaned to MAC, in which + * case calling freemsg() on it will re-enter this very function -- so + * we better not be holding the lock! + */ + if (txhead == b) + freemsg(mp); +} + +void +mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) +{ + mlxcx_buf_shard_t *s = b->mlb_shard; + VERIFY(b->mlb_state == MLXCX_BUFFER_FREE || + b->mlb_state == MLXCX_BUFFER_INIT); + ASSERT(mutex_owned(&s->mlbs_mtx)); + if (b->mlb_state == MLXCX_BUFFER_FREE) + list_remove(&s->mlbs_free, b); + + /* + * This is going back to the kmem cache, so it needs to be set up in + * the same way we expect a new buffer to come out (state INIT, other + * fields NULL'd) + */ + b->mlb_state = MLXCX_BUFFER_INIT; + b->mlb_shard = NULL; + if (b->mlb_mp != NULL) { + freeb(b->mlb_mp); + ASSERT(b->mlb_mp == NULL); + } + mlxcx_dma_free(&b->mlb_dma); + ASSERT(list_is_empty(&b->mlb_tx_chain)); + + kmem_cache_free(mlxp->mlx_bufs_cache, b); +} diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c index 4d24932269..d4dfe83766 100644 --- a/usr/src/uts/common/io/ptm.c +++ b/usr/src/uts/common/io/ptm.c @@ -24,7 +24,9 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - +/* + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + */ /* * Pseudo Terminal Master Driver. @@ -533,6 +535,13 @@ ptmwput(queue_t *qp, mblk_t *mp) DBG(("ack the UNLKPT/ISPTM\n")); miocack(qp, mp, 0, 0); break; + case PTSSTTY: + mutex_enter(&ptmp->pt_lock); + ptmp->pt_state |= PTSTTY; + mutex_exit(&ptmp->pt_lock); + DBG(("ack PTSSTTY\n")); + miocack(qp, mp, 0, 0); + break; case ZONEPT: { zoneid_t z; diff --git a/usr/src/uts/common/io/pts.c b/usr/src/uts/common/io/pts.c index d67beb255a..ff2d91f566 100644 --- a/usr/src/uts/common/io/pts.c +++ b/usr/src/uts/common/io/pts.c @@ -25,7 +25,9 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - +/* + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + */ /* * Pseudo Terminal Slave Driver. @@ -106,6 +108,7 @@ #include <sys/sysmacros.h> #include <sys/stream.h> #include <sys/stropts.h> +#include <sys/strsubr.h> #include <sys/stat.h> #include <sys/errno.h> #include <sys/debug.h> @@ -337,7 +340,6 @@ ptsopen( DDBGP("ptsopen: p = %p\n", (uintptr_t)ptsp); DDBG("ptsopen: state = %x\n", ptsp->pt_state); - ASSERT(ptsp->pt_minor == dminor); if ((ptsp->pt_state & PTLOCK) || !(ptsp->pt_state & PTMOPEN)) { @@ -347,7 +349,7 @@ ptsopen( } /* - * if already, open simply return... + * if already open, simply return... */ if (ptsp->pt_state & PTSOPEN) { ASSERT(rqp->q_ptr == ptsp); @@ -386,6 +388,9 @@ ptsopen( mutex_exit(&ptsp->pt_lock); mutex_exit(&ptms_lock); + if (ptsp->pt_state & PTSTTY) + STREAM(rqp)->sd_flag |= STRXPG4TTY; + qprocson(rqp); /* @@ -416,8 +421,6 @@ ptsopen( return (0); } - - /* * Find the address to private data identifying the slave's write * queue. Send a 0-length msg up the slave's read queue to designate diff --git a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c index 0e342e8bcc..721aead599 100644 --- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c +++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c @@ -1432,6 +1432,7 @@ ahci_tran_probe_port(dev_info_t *dip, sata_device_t *sd) uint8_t port; int rval = SATA_SUCCESS, rval_init; + port_state = 0; ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip)); port = ahci_ctlp->ahcictl_cport_to_port[cport]; @@ -1996,6 +1997,7 @@ ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, ahci_portp->ahciport_pending_tags, ahci_portp->ahciport_pending_ncq_tags); + free_slots = 0; /* * According to the AHCI spec, system software is responsible to * ensure that queued and non-queued commands are not mixed in @@ -9837,6 +9839,8 @@ ahci_watchdog_handler(ahci_ctl_t *ahci_ctlp) AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_watchdog_handler entered", NULL); + current_slot = 0; + current_tags = 0; for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) { if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) { continue; diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c index e532a551e7..d75db5f258 100644 --- a/usr/src/uts/common/io/vnic/vnic_dev.c +++ b/usr/src/uts/common/io/vnic/vnic_dev.c @@ -22,6 +22,7 @@ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2018 Joyent, Inc. * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/types.h> @@ -198,7 +199,7 @@ vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type, uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag, uint16_t vid, boolean_t req_hwgrp_flag) { - mac_diag_t mac_diag; + mac_diag_t mac_diag = MAC_DIAG_NONE; uint16_t mac_flags = 0; int err; uint_t addr_len; @@ -1060,7 +1061,7 @@ static int vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, const void *pr_val) { - int err = 0; + int err = 0; vnic_t *vn = m_driver; switch (pr_num) { @@ -1158,7 +1159,7 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, uint_t pr_valsize, void *pr_val) { vnic_t *vn = arg; - int ret = 0; + int ret = 0; boolean_t out; switch (pr_num) { diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index 1a1a734d5f..866fd3fc2c 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -25,7 +25,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2017 Joyent, Inc. - * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/types.h> @@ -79,6 +79,7 @@ #include <sys/dld.h> #include <sys/zone.h> #include <sys/limits.h> +#include <sys/ptms.h> #include <c2/audit.h> /* @@ -232,6 +233,50 @@ push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name, return (0); } +static int +xpg4_fixup(queue_t *qp, dev_t *devp, struct stdata *stp, cred_t *crp) +{ + static const char *ptsmods[] = { + "ptem", "ldterm", "ttcompat" + }; + dev_t dummydev = *devp; + struct strioctl strioc; + zoneid_t zoneid; + int32_t rval; + uint_t i; + + /* + * Push modules required for the slave PTY to have terminal + * semantics out of the box; this is required by XPG4v2. + * These three modules are flagged as single-instance so that + * the system will never end up with duplicate copies pushed + * onto a stream. + */ + + zoneid = crgetzoneid(crp); + for (i = 0; i < ARRAY_SIZE(ptsmods); i++) { + int error; + + error = push_mod(qp, &dummydev, stp, ptsmods[i], 0, + crp, zoneid); + if (error != 0) + return (error); + } + + /* + * Send PTSSTTY down the stream + */ + + strioc.ic_cmd = PTSSTTY; + strioc.ic_timout = 0; + strioc.ic_len = 0; + strioc.ic_dp = NULL; + + (void) strdoioctl(stp, &strioc, FNATIVE, K_TO_K, crp, &rval); + + return (0); +} + /* * Open a stream device. */ @@ -550,10 +595,15 @@ retryap: opendone: + if (error == 0 && + (stp->sd_flag & (STRISTTY|STRXPG4TTY)) == (STRISTTY|STRXPG4TTY)) { + error = xpg4_fixup(qp, devp, stp, crp); + } + /* * let specfs know that open failed part way through */ - if (error) { + if (error != 0) { mutex_enter(&stp->sd_lock); stp->sd_flag |= STREOPENFAIL; mutex_exit(&stp->sd_lock); diff --git a/usr/src/uts/common/sys/fm/io/ddi.h b/usr/src/uts/common/sys/fm/io/ddi.h index 75afff5c38..d8c772cdaf 100644 --- a/usr/src/uts/common/sys/fm/io/ddi.h +++ b/usr/src/uts/common/sys/fm/io/ddi.h @@ -66,6 +66,17 @@ extern "C" { #define DVR_STACK_DEPTH "dvr-stack-depth" #define DVR_ERR_SPECIFIC "dvr-error-specific" +/* Generic NIC driver ereports. */ +#define DDI_FM_NIC "nic" +#define DDI_FM_TXR_ERROR "txr-err" + +/* Valid values of the "error" field in txr-err ereports */ +#define DDI_FM_TXR_ERROR_WHITELIST "whitelist" +#define DDI_FM_TXR_ERROR_NOTSUPP "notsupp" +#define DDI_FM_TXR_ERROR_OVERTEMP "overtemp" +#define DDI_FM_TXR_ERROR_HWFAIL "hwfail" +#define DDI_FM_TXR_ERROR_UNKNOWN "unknown" + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/fs/namenode.h b/usr/src/uts/common/sys/fs/namenode.h index 9ebf2cf1ca..24d276b6c3 100644 --- a/usr/src/uts/common/sys/fs/namenode.h +++ b/usr/src/uts/common/sys/fs/namenode.h @@ -26,6 +26,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + */ + #ifndef _SYS_FS_NAMENODE_H #define _SYS_FS_NAMENODE_H @@ -93,6 +97,10 @@ extern struct vnodeops *nm_vnodeops; extern const struct fs_operation_def nm_vnodeops_template[]; extern kmutex_t ntable_lock; +typedef int nm_walk_mounts_f(const struct namenode *, cred_t *, void *); +extern int nm_walk_mounts(const vnode_t *, nm_walk_mounts_f *, cred_t *, + void *); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index 21641b884d..0f8be50fde 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -230,7 +230,7 @@ extern int mac_tx_percpu_cnt; &(mcip)->mci_flent->fe_resource_props) #define MCIP_EFFECTIVE_PROPS(mcip) \ - (mcip->mci_flent == NULL ? NULL : \ + (mcip->mci_flent == NULL ? NULL : \ &(mcip)->mci_flent->fe_effective_props) #define MCIP_RESOURCE_PROPS_MASK(mcip) \ diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 21f2c10a8e..3c103c073a 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -290,6 +290,54 @@ struct mac_group_s { #define GROUP_INTR_ENABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_enable #define GROUP_INTR_DISABLE_FUNC(g) (g)->mrg_info.mgi_intr.mi_disable +#define MAC_RING_TX(mhp, rh, mp, rest) { \ + mac_ring_handle_t mrh = rh; \ + mac_impl_t *mimpl = (mac_impl_t *)mhp; \ + /* \ + * Send packets through a selected tx ring, or through the \ + * default handler if there is no selected ring. \ + */ \ + if (mrh == NULL) \ + mrh = mimpl->mi_default_tx_ring; \ + if (mrh == NULL) { \ + rest = mimpl->mi_tx(mimpl->mi_driver, mp); \ + } else { \ + rest = mac_hwring_tx(mrh, mp); \ + } \ +} + +/* + * This is the final stop before reaching the underlying driver + * or aggregation, so this is where the bridging hook is implemented. + * Packets that are bridged will return through mac_bridge_tx(), with + * rh nulled out if the bridge chooses to send output on a different + * link due to forwarding. + */ +#define MAC_TX(mip, rh, mp, src_mcip) { \ + mac_ring_handle_t rhandle = (rh); \ + /* \ + * If there is a bound Hybrid I/O share, send packets through \ + * the default tx ring. (When there's a bound Hybrid I/O share, \ + * the tx rings of this client are mapped in the guest domain \ + * and not accessible from here.) \ + */ \ + _NOTE(CONSTANTCONDITION) \ + if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \ + rhandle = (mip)->mi_default_tx_ring; \ + if (mip->mi_promisc_list != NULL) \ + mac_promisc_dispatch(mip, mp, src_mcip); \ + /* \ + * Grab the proper transmit pointer and handle. Special \ + * optimization: we can test mi_bridge_link itself atomically, \ + * and if that indicates no bridge send packets through tx ring.\ + */ \ + if (mip->mi_bridge_link == NULL) { \ + MAC_RING_TX(mip, rhandle, mp, mp); \ + } else { \ + mp = mac_bridge_tx(mip, rhandle, mp); \ + } \ +} + /* mci_tx_flag */ #define MCI_TX_QUIESCE 0x1 diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 2dea3a4758..fc3b3892bd 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -567,14 +567,14 @@ extern void mac_free(mac_register_t *); extern int mac_register(mac_register_t *, mac_handle_t *); extern int mac_disable_nowait(mac_handle_t); extern int mac_disable(mac_handle_t); -extern int mac_unregister(mac_handle_t); -extern void mac_rx(mac_handle_t, mac_resource_handle_t, +extern int mac_unregister(mac_handle_t); +extern void mac_rx(mac_handle_t, mac_resource_handle_t, mblk_t *); -extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, +extern void mac_rx_ring(mac_handle_t, mac_ring_handle_t, mblk_t *, uint64_t); -extern void mac_link_update(mac_handle_t, link_state_t); -extern void mac_link_redo(mac_handle_t, link_state_t); -extern void mac_unicst_update(mac_handle_t, +extern void mac_link_update(mac_handle_t, link_state_t); +extern void mac_link_redo(mac_handle_t, link_state_t); +extern void mac_unicst_update(mac_handle_t, const uint8_t *); extern void mac_dst_update(mac_handle_t, const uint8_t *); extern void mac_tx_update(mac_handle_t); diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index 65bdfb2e17..14e17c1c0c 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -29,7 +29,7 @@ */ /* - * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #ifndef _SYS_STRSUBR_H @@ -289,7 +289,7 @@ typedef struct stdata { #define SNDMREAD 0x00008000 /* used for read notification */ #define OLDNDELAY 0x00010000 /* use old TTY semantics for */ /* NDELAY reads and writes */ - /* 0x00020000 unused */ +#define STRXPG4TTY 0x00020000 /* Use XPG4 TTY semantics */ /* 0x00040000 unused */ #define STRTOSTOP 0x00080000 /* block background writes */ #define STRCMDWAIT 0x00100000 /* someone is doing an _I_CMD */ diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c index efb4c81092..aea7f2e856 100644 --- a/usr/src/uts/i86pc/io/mp_platform_common.c +++ b/usr/src/uts/i86pc/io/mp_platform_common.c @@ -24,6 +24,7 @@ * Copyright 2017 Joyent, Inc. * Copyright (c) 2017 by Delphix. All rights reserved. * Copyright (c) 2019, Joyent, Inc. + * Copyright 2020 RackTop Systems, Inc. */ /* * Copyright (c) 2010, Intel Corporation. @@ -79,7 +80,7 @@ /* * Local Function Prototypes */ -static int apic_handle_defconf(); +static int apic_handle_defconf(void); static int apic_parse_mpct(caddr_t mpct, int bypass); static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); static int apic_checksum(caddr_t bptr, int len); @@ -182,7 +183,7 @@ int apic_num_rebind = 0; * Maximum number of APIC CPUs in the system, -1 indicates that dynamic * allocation of CPU ids is disabled. */ -int apic_max_nproc = -1; +int apic_max_nproc = -1; int apic_nproc = 0; size_t apic_cpus_size = 0; int apic_defconf = 0; @@ -589,10 +590,22 @@ apic_free_apic_cpus(void) } } +static uint32_t +acpi_get_apic_lid(void) +{ + uint32_t id; + + id = apic_reg_ops->apic_read(APIC_LID_REG); + if (apic_mode != LOCAL_X2APIC) + id >>= APIC_ID_BIT_OFFSET; + + return (id); +} + static int acpi_probe(char *modname) { - int i, intmax, index; + int i, intmax; uint32_t id, ver; int acpi_verboseflags = 0; int madt_seen, madt_size; @@ -640,9 +653,9 @@ acpi_probe(char *modname) return (PSM_FAILURE); } - id = apic_reg_ops->apic_read(APIC_LID_REG); - local_ids[0] = (uchar_t)(id >> 24); - apic_nproc = index = 1; + local_ids[0] = acpi_get_apic_lid(); + + apic_nproc = 1; apic_io_max = 0; ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1); @@ -657,25 +670,19 @@ acpi_probe(char *modname) if (mpa->Id == 255) { cmn_err(CE_WARN, "!%s: encountered " "invalid entry in MADT: CPU %d " - "has Local APIC Id equal to 255 ", + "has Local APIC Id equal to 255", psm_name, mpa->ProcessorId); } if (mpa->Id == local_ids[0]) { - ASSERT(index == 1); proc_ids[0] = mpa->ProcessorId; } else if (apic_nproc < NCPU && use_mp && apic_nproc < boot_ncpus) { - local_ids[index] = mpa->Id; - proc_ids[index] = mpa->ProcessorId; - index++; + local_ids[apic_nproc] = mpa->Id; + proc_ids[apic_nproc] = mpa->ProcessorId; apic_nproc++; } else if (apic_nproc == NCPU && !warned) { cmn_err(CE_WARN, "%s: CPU limit " - "exceeded" -#if !defined(__amd64) - " for 32-bit mode" -#endif - "; Solaris will use %d CPUs.", + "exceeded; will use %d CPUs.", psm_name, NCPU); warned = 1; } @@ -716,7 +723,7 @@ acpi_probe(char *modname) acpi_nmi_sp = mns; acpi_nmi_scnt++; - cmn_err(CE_NOTE, "!apic: nmi source: %d 0x%x\n", + cmn_err(CE_NOTE, "!apic: nmi source: %d 0x%x", mns->GlobalIrq, mns->IntiFlags); break; @@ -727,7 +734,7 @@ acpi_probe(char *modname) acpi_nmi_cp = mlan; acpi_nmi_ccnt++; - cmn_err(CE_NOTE, "!apic: local nmi: %d 0x%x %d\n", + cmn_err(CE_NOTE, "!apic: local nmi: %d 0x%x %d", mlan->ProcessorId, mlan->IntiFlags, mlan->Lint); break; @@ -735,7 +742,7 @@ acpi_probe(char *modname) case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE: /* UNIMPLEMENTED */ mao = (ACPI_MADT_LOCAL_APIC_OVERRIDE *) ap; - cmn_err(CE_NOTE, "!apic: address override: %lx\n", + cmn_err(CE_NOTE, "!apic: address override: %lx", (long)mao->Address); break; @@ -743,7 +750,7 @@ acpi_probe(char *modname) /* UNIMPLEMENTED */ misa = (ACPI_MADT_IO_SAPIC *) ap; - cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", + cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx", misa->Id, misa->GlobalIrqBase, (long)misa->Address); break; @@ -753,7 +760,7 @@ acpi_probe(char *modname) mis = (ACPI_MADT_INTERRUPT_SOURCE *) ap; cmn_err(CE_NOTE, - "!apic: irq source: %d %d %d 0x%x %d %d\n", + "!apic: irq source: %d %d %d 0x%x %d %d", mis->Id, mis->Eid, mis->GlobalIrq, mis->IntiFlags, mis->Type, mis->IoSapicVector); @@ -764,21 +771,16 @@ acpi_probe(char *modname) if (mpx2a->LapicFlags & ACPI_MADT_ENABLED) { if (mpx2a->LocalApicId == local_ids[0]) { - ASSERT(index == 1); proc_ids[0] = mpx2a->Uid; } else if (apic_nproc < NCPU && use_mp && apic_nproc < boot_ncpus) { - local_ids[index] = mpx2a->LocalApicId; - proc_ids[index] = mpx2a->Uid; - index++; + local_ids[apic_nproc] = + mpx2a->LocalApicId; + proc_ids[apic_nproc] = mpx2a->Uid; apic_nproc++; } else if (apic_nproc == NCPU && !warned) { cmn_err(CE_WARN, "%s: CPU limit " - "exceeded" -#if !defined(__amd64) - " for 32-bit mode" -#endif - "; Solaris will use %d CPUs.", + "exceeded; will use %d CPUs.", psm_name, NCPU); warned = 1; } @@ -792,9 +794,9 @@ acpi_probe(char *modname) if (mx2alan->Uid >> 8) acpi_nmi_ccnt++; -#ifdef DEBUG +#ifdef DEBUG cmn_err(CE_NOTE, - "!apic: local x2apic nmi: %d 0x%x %d\n", + "!apic: local x2apic nmi: %d 0x%x %d", mx2alan->Uid, mx2alan->IntiFlags, mx2alan->Lint); #endif @@ -848,19 +850,19 @@ acpi_probe(char *modname) * The state for each apic CPU info structure will be assigned according * to the following rules: * Rule 1: - * Slot index range: [0, min(apic_nproc, boot_ncpus)) + * Slot index range: [0, min(apic_nproc, boot_ncpus)) * State flags: 0 * Note: cpu exists and will be configured/enabled at boot time * Rule 2: - * Slot index range: [boot_ncpus, apic_nproc) + * Slot index range: [boot_ncpus, apic_nproc) * State flags: APIC_CPU_FREE | APIC_CPU_DIRTY * Note: cpu exists but won't be configured/enabled at boot time * Rule 3: - * Slot index range: [apic_nproc, boot_ncpus) + * Slot index range: [apic_nproc, boot_ncpus) * State flags: APIC_CPU_FREE * Note: cpu doesn't exist at boot time * Rule 4: - * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus) + * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus) * State flags: APIC_CPU_FREE * Note: cpu doesn't exist at boot time */ @@ -1014,10 +1016,8 @@ cleanup: * Fill all details as MP table does not give any more info */ static int -apic_handle_defconf() +apic_handle_defconf(void) { - uint_t lid; - /* Failed to probe ACPI MADT tables, disable CPU DR. */ apic_max_nproc = -1; apic_free_apic_cpus(); @@ -1035,8 +1035,7 @@ apic_handle_defconf() CPUSET_ONLY(apic_cpumask, 0); CPUSET_ADD(apic_cpumask, 1); apic_nproc = 2; - lid = apic_reg_ops->apic_read(APIC_LID_REG); - apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); + apic_cpus[0].aci_local_id = acpi_get_apic_lid(); /* * According to the PC+MP spec 1.1, the local ids * for the default configuration has to be 0 or 1 @@ -1081,10 +1080,9 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) struct apic_io_entry *ioapicp; struct apic_io_intr *intrp; int ioapic_ix; - uint_t lid; - uint32_t id; - uchar_t hid; - int warned = 0; + uint32_t lid, id; + uchar_t hid; + int warned = 0; /*LINTED: pointer cast may result in improper alignment */ procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); @@ -1103,11 +1101,7 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) apic_nproc++; } else if (apic_nproc == NCPU && !warned) { cmn_err(CE_WARN, "%s: CPU limit " - "exceeded" -#if !defined(__amd64) - " for 32-bit mode" -#endif - "; Solaris will use %d CPUs.", + "exceeded; will use %d CPUs.", psm_name, NCPU); warned = 1; } @@ -1137,10 +1131,9 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) if (!bypass_cpus_and_ioapics && procp->proc_cpuflags & CPUFLAGS_EN) { if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ - lid = apic_reg_ops->apic_read(APIC_LID_REG); + lid = acpi_get_apic_lid(); apic_cpus[0].aci_local_id = procp->proc_apicid; - if (apic_cpus[0].aci_local_id != - (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { + if (apic_cpus[0].aci_local_id != lid) { return (PSM_FAILURE); } apic_cpus[0].aci_local_ver = @@ -1624,7 +1617,8 @@ apic_allocate_irq(int irq) if (freeirq == -1) { /* This shouldn't happen, but just in case */ - cmn_err(CE_WARN, "%s: NO available IRQ", psm_name); + cmn_err(CE_WARN, "%s: NO available IRQ", + psm_name); return (-1); } } diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 820e0a4e31..aed47948a9 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -318,6 +318,7 @@ DRV_KMODS += log DRV_KMODS += logindmux DRV_KMODS += mega_sas DRV_KMODS += mc-amd +DRV_KMODS += mlxcx DRV_KMODS += mm DRV_KMODS += mouse8042 DRV_KMODS += mpt_sas diff --git a/usr/src/uts/intel/ipsecah/Makefile b/usr/src/uts/intel/ipsecah/Makefile index d744c131f1..dd8485f210 100644 --- a/usr/src/uts/intel/ipsecah/Makefile +++ b/usr/src/uts/intel/ipsecah/Makefile @@ -42,7 +42,6 @@ UTSBASE = ../.. # MODULE = ipsecah OBJECTS = $(IPSECAH_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IPSECAH_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip @@ -56,7 +55,6 @@ include $(UTSBASE)/intel/Makefile.intel # Define targets # ALL_TARGET = $(BINARY) $(SRC_CONFFILE) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # @@ -64,24 +62,9 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # LDFLAGS += -dy -Ndrv/ip -Ndrv/tcp -Nmisc/kcf -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += -_gcc=-Wno-parentheses CERRWARN += $(CNOWARN_UNINIT) -# needs work -$(OBJS_DIR)/ipsecahddi.o := SMOFF += index_overflow -$(OBJS_DIR)/ipsecah.o := SMOFF += deref_check -$(OBJS_DIR)/sadb.o := SMOFF += signed_integer_overflow_check,deref_check,indenting,shift_to_zero - # # Default build targets. # @@ -95,12 +78,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) $(SISCHECK_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) diff --git a/usr/src/uts/intel/ipsecesp/Makefile b/usr/src/uts/intel/ipsecesp/Makefile index 713ad82d7c..3ae4a4cac1 100644 --- a/usr/src/uts/intel/ipsecesp/Makefile +++ b/usr/src/uts/intel/ipsecesp/Makefile @@ -26,7 +26,7 @@ # Copyright (c) 2018, Joyent, Inc. # -# This makefile drives the production of the ipsecesp driver +# This makefile drives the production of the ipsecesp driver # kernel module. # # intel implementation architecture dependent @@ -42,7 +42,6 @@ UTSBASE = ../.. # MODULE = ipsecesp OBJECTS = $(IPSECESP_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IPSECESP_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip @@ -56,7 +55,6 @@ include $(UTSBASE)/intel/Makefile.intel # Define targets # ALL_TARGET = $(BINARY) $(SRC_CONFFILE) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # @@ -64,21 +62,8 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # LDFLAGS += -dy -Ndrv/ip -Ndrv/ipsecah -Nmisc/kcf -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += $(CNOWARN_UNINIT) -# needs work -$(OBJS_DIR)/ipsecespddi.o := SMOFF += index_overflow -$(OBJS_DIR)/ipsecesp.o := SMOFF += deref_check - # # Default build targets. # @@ -92,12 +77,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) $(SISCHECK_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) diff --git a/usr/src/uts/intel/mlxcx/Makefile b/usr/src/uts/intel/mlxcx/Makefile new file mode 100644 index 0000000000..27bdfa4b73 --- /dev/null +++ b/usr/src/uts/intel/mlxcx/Makefile @@ -0,0 +1,44 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = mlxcx +OBJECTS = $(MLXCX_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io/mlxcx + +include $(UTSBASE)/intel/Makefile.intel + +CPPFLAGS += -I$(UTSBASE)/common/io/mlxcx + +ALL_TARGET = $(BINARY) $(CONFMOD) +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +LDFLAGS += -dy -N misc/mac + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases index f9b5129d3b..f5b0a08489 100644 --- a/usr/src/uts/intel/os/driver_aliases +++ b/usr/src/uts/intel/os/driver_aliases @@ -1085,6 +1085,19 @@ mega_sas "pci1028,15.1028.1f01" mega_sas "pci1028,15.1028.1f02" mega_sas "pci1028,15.1028.1f03" mouse8042 "pnpPNP,f03" +mlxcx "pciex15b3,1013" +mlxcx "pciex15b3,1014" +mlxcx "pciex15b3,1015" +mlxcx "pciex15b3,1016" +mlxcx "pciex15b3,1017" +mlxcx "pciex15b3,1018" +mlxcx "pciex15b3,1019" +mlxcx "pciex15b3,101a" +mlxcx "pciex15b3,101b" +mlxcx "pciex15b3,101c" +mlxcx "pciex15b3,101d" +mlxcx "pciex15b3,101e" +mlxcx "pciex15b3,101f" mpt "pci1000,30" mpt "pci1000,50" mpt "pci1000,54" diff --git a/usr/src/uts/intel/procfs/Makefile b/usr/src/uts/intel/procfs/Makefile index 1db5848438..630b6a25d3 100644 --- a/usr/src/uts/intel/procfs/Makefile +++ b/usr/src/uts/intel/procfs/Makefile @@ -25,6 +25,7 @@ # Use is subject to license terms. # # Copyright 2019 Joyent, Inc. +# Copyright 2020 OmniOS Community Edition (OmniOSce) Association. # # This makefile drives the production of the procfs file system @@ -83,6 +84,8 @@ $(OBJS_DIR)/prsubr.o := SMOFF += all_func_returns $(OBJS_DIR)/prcontrol.o := SMOFF += all_func_returns $(OBJS_DIR)/prioctl.o := SMOFF += signed +LDFLAGS += -dy -Nfs/namefs + # # Default build targets. # diff --git a/usr/src/uts/sparc/ipsecah/Makefile b/usr/src/uts/sparc/ipsecah/Makefile index 55ee48c88f..ad14fa4e5b 100644 --- a/usr/src/uts/sparc/ipsecah/Makefile +++ b/usr/src/uts/sparc/ipsecah/Makefile @@ -24,7 +24,7 @@ # # -# This makefile drives the production of the ipsecah driver +# This makefile drives the production of the ipsecah driver # kernel module. # # sparc architecture dependent @@ -40,7 +40,6 @@ UTSBASE = ../.. # MODULE = ipsecah OBJECTS = $(IPSECAH_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IPSECAH_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip @@ -54,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc # Define targets # ALL_TARGET = $(BINARY) $(SRC_CONFFILE) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # @@ -62,21 +60,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # LDFLAGS += -dy -Ndrv/ip -Ndrv/tcp -Nmisc/kcf -# -# lint pass one enforcement -# -CFLAGS += $(CCVERBOSE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON - CERRWARN += -_gcc=-Wno-parentheses CERRWARN += $(CNOWARN_UNINIT) @@ -93,12 +76,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) $(SISCHECK_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) diff --git a/usr/src/uts/sparc/ipsecesp/Makefile b/usr/src/uts/sparc/ipsecesp/Makefile index 1a36e4fbc7..931dc913a2 100644 --- a/usr/src/uts/sparc/ipsecesp/Makefile +++ b/usr/src/uts/sparc/ipsecesp/Makefile @@ -24,7 +24,7 @@ # # -# This makefile drives the production of the ipsecesp driver +# This makefile drives the production of the ipsecesp driver # kernel module. # # sparc architecture dependent @@ -40,7 +40,6 @@ UTSBASE = ../.. # MODULE = ipsecesp OBJECTS = $(IPSECESP_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IPSECESP_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) ROOTLINK = $(ROOT_STRMOD_DIR)/$(MODULE) CONF_SRCDIR = $(UTSBASE)/common/inet/ip @@ -54,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc # Define targets # ALL_TARGET = $(BINARY) $(SRC_CONFFILE) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # @@ -62,20 +60,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) # LDFLAGS += -dy -Ndrv/ip -Ndrv/ipsecah -Nmisc/kcf -# -# lint pass one enforcement -# -CFLAGS += $(CCVERBOSE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += $(CNOWARN_UNINIT) # @@ -91,12 +75,6 @@ clean: $(CLEAN_DEPS) $(SISCLEAN_DEPS) clobber: $(CLOBBER_DEPS) $(SISCLEAN_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) $(SISCHECK_DEPS) $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) diff --git a/usr/src/uts/sparc/procfs/Makefile b/usr/src/uts/sparc/procfs/Makefile index 8dd05fe72b..3226238bd4 100644 --- a/usr/src/uts/sparc/procfs/Makefile +++ b/usr/src/uts/sparc/procfs/Makefile @@ -23,6 +23,7 @@ # Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# Copyright 2020 OmniOS Community Edition (OmniOSce) Association. # # This makefile drives the production of the procfs file system @@ -41,7 +42,6 @@ UTSBASE = ../.. # MODULE = procfs OBJECTS = $(PROC_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(PROC_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE) # @@ -53,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc # Define targets # ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # @@ -64,19 +63,12 @@ $(MODSTUBS_O) := AS_CPPFLAGS += -DPROC_MODULE CLEANFILES += $(MODSTUBS_O) CFLAGS += $(CCVERBOSE) -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-switch CERRWARN += $(CNOWARN_UNINIT) +LDFLAGS += -dy -Nfs/namefs + # # Default build targets. # @@ -90,12 +82,6 @@ clean: $(CLEAN_DEPS) clobber: $(CLOBBER_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) # diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c index f30ef8e2d4..96fb04175d 100644 --- a/usr/src/uts/sun4v/io/vnet.c +++ b/usr/src/uts/sun4v/io/vnet.c @@ -1133,9 +1133,9 @@ vnet_mac_register(vnet_t *vnetp) static int vnet_read_mac_address(vnet_t *vnetp) { - uchar_t *macaddr; - uint32_t size; - int rv; + uchar_t *macaddr; + uint32_t size; + int rv; rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip, DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size); @@ -2318,7 +2318,7 @@ vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, */ static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, - mac_group_info_t *infop, mac_group_handle_t handle) + mac_group_info_t *infop, mac_group_handle_t handle) { vnet_t *vnetp = (vnet_t *)arg; @@ -2631,7 +2631,7 @@ vnet_rx_poll(void *arg, int bytes_to_pickup) /* ARGSUSED */ void vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, - boolean_t loopback) + boolean_t loopback) { vnet_t *vnetp = (vnet_t *)arg; vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; |