diff options
author | ha137994 <none@none> | 2008-04-10 18:31:44 -0700 |
---|---|---|
committer | ha137994 <none@none> | 2008-04-10 18:31:44 -0700 |
commit | 20ae46ebaff1237662e05edf9db61538aa85d448 (patch) | |
tree | 3ea68e409475f0ef216d592f5c86d317aa4e9e54 /usr/src | |
parent | bfc848c632c9eacb2a640246d96e198f1b185c03 (diff) | |
download | illumos-joyent-20ae46ebaff1237662e05edf9db61538aa85d448.tar.gz |
6666476 LDC mode names don't conform to FWARC 2006/571
6666840 ldc.c should be split up for better maintainability
6671853 cnex interrupt distribution is imbalanced
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c | 4 | ||||
-rw-r--r-- | usr/src/cmd/mdb/sun4v/modules/ldc/ldc.c | 4 | ||||
-rw-r--r-- | usr/src/lib/fm/libldom/sparc/ldmsvcs_utils.c | 2 | ||||
-rw-r--r-- | usr/src/lib/libpcp/common/libpcp.c | 4 | ||||
-rw-r--r-- | usr/src/uts/sun4v/Makefile.files | 5 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/cnex.c | 319 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/ds.c | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/ldc.c | 2255 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/ldc_shm.c | 2236 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/vldc.c | 5 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/cnex.h | 6 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/ldc.h | 6 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/ldc_impl.h | 21 |
13 files changed, 2489 insertions, 2380 deletions
diff --git a/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c b/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c index 19c2e91a3d..42b417f76f 100644 --- a/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c +++ b/usr/src/cmd/fm/modules/sun4v/etm/etm_xport_api_dd.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -839,7 +839,7 @@ etm_xport_open(fmd_hdl_t *hdl, etm_xport_addr_t addr) /* Set the channel to reliable mode */ op.op_sel = VLDC_OP_SET; op.opt_sel = VLDC_OPT_MODE; - op.opt_val = LDC_MODE_STREAM; + op.opt_val = LDC_MODE_RELIABLE; if (ioctl(_conn->fd, VLDC_IOCTL_OPT_OP, &op) != 0) { /* errno assumed set by above call */ diff --git a/usr/src/cmd/mdb/sun4v/modules/ldc/ldc.c b/usr/src/cmd/mdb/sun4v/modules/ldc/ldc.c index 24e61dd465..8cd92bc435 100644 --- a/usr/src/cmd/mdb/sun4v/modules/ldc/ldc.c +++ b/usr/src/cmd/mdb/sun4v/modules/ldc/ldc.c @@ -40,7 +40,7 @@ const mdb_bitmask_t ldc_mode_bits[] = { { "raw ", ALLBITS, LDC_MODE_RAW }, { "unrel ", ALLBITS, LDC_MODE_UNRELIABLE }, - { "stream", ALLBITS, LDC_MODE_STREAM }, + { "rel ", ALLBITS, LDC_MODE_RELIABLE }, { NULL, 0, 0} }; @@ -203,7 +203,7 @@ ldcinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("Rx Info: 0x%p len=0x%lx intr=%b\n", ldcp.rx_q_va, ldcp.rx_q_entries, ldcp.rx_intr_state, ldc_intrstate_bits); - if (ldcp.mode == LDC_MODE_STREAM) { + if (ldcp.mode == LDC_MODE_RELIABLE) { mdb_printf("Rx Dq Info: 0x%p len=0x%lx hd=0x%lx " "tl=0x%lx ackhd=0x%lx", ldcp.rx_dq_va, ldcp.rx_dq_entries, ldcp.rx_dq_head, diff --git a/usr/src/lib/fm/libldom/sparc/ldmsvcs_utils.c b/usr/src/lib/fm/libldom/sparc/ldmsvcs_utils.c index a4d11c306b..c32bc8c412 100644 --- a/usr/src/lib/fm/libldom/sparc/ldmsvcs_utils.c +++ b/usr/src/lib/fm/libldom/sparc/ldmsvcs_utils.c @@ -1081,7 +1081,7 @@ channel_openreset(struct ldmsvcs_info *lsp) op.op_sel = VLDC_OP_SET; op.opt_sel = VLDC_OPT_MODE; - op.opt_val = LDC_MODE_STREAM; + op.opt_val = LDC_MODE_RELIABLE; if (ioctl(lsp->fds_chan.fd, VLDC_IOCTL_OPT_OP, &op) != 0) { diff --git a/usr/src/lib/libpcp/common/libpcp.c b/usr/src/lib/libpcp/common/libpcp.c index 7888f7588f..714781e513 100644 --- a/usr/src/lib/libpcp/common/libpcp.c +++ b/usr/src/lib/libpcp/common/libpcp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -225,7 +225,7 @@ pcp_init(char *channel_name) op.op_sel = VLDC_OP_SET; op.opt_sel = VLDC_OPT_MODE; - op.opt_val = LDC_MODE_STREAM; + op.opt_val = LDC_MODE_RELIABLE; if (ioctl(channel_fd, VLDC_IOCTL_OPT_OP, &op) != 0) { (void) close(channel_fd); return (PCPL_GLVC_ERROR); diff --git a/usr/src/uts/sun4v/Makefile.files b/usr/src/uts/sun4v/Makefile.files index 9cc13305af..c9ca629f68 100644 --- a/usr/src/uts/sun4v/Makefile.files +++ b/usr/src/uts/sun4v/Makefile.files @@ -20,7 +20,7 @@ # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -142,7 +142,8 @@ VNEX_OBJS = vnex.o CNEX_OBJS = cnex.o GLVC_OBJS = glvc.o glvc_hcall.o MDESC_OBJS = mdesc.o -LDC_OBJS = ldc.o vio_util.o vdsk_common.o vgen_stats.o vnet_common.o +LDC_OBJS = ldc.o ldc_shm.o vio_util.o vdsk_common.o vgen_stats.o \ + vnet_common.o NTWDT_OBJS = ntwdt.o VLDC_OBJS = vldc.o VCC_OBJS = vcc.o diff --git a/usr/src/uts/sun4v/io/cnex.c b/usr/src/uts/sun4v/io/cnex.c index fe831809e8..a38c311a36 100644 --- a/usr/src/uts/sun4v/io/cnex.c +++ b/usr/src/uts/sun4v/io/cnex.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,16 +61,18 @@ /* * Internal functions/information */ -static struct cnex_pil_map cnex_class_to_pil[] = { - {LDC_DEV_GENERIC, PIL_3}, - {LDC_DEV_BLK, PIL_4}, - {LDC_DEV_BLK_SVC, PIL_3}, - {LDC_DEV_NT, PIL_6}, - {LDC_DEV_NT_SVC, PIL_4}, - {LDC_DEV_SERIAL, PIL_6} +static struct cnex_intr_map cnex_class_to_intr[] = { + {LDC_DEV_GENERIC, PIL_3, 0}, + {LDC_DEV_BLK, PIL_4, 10}, + {LDC_DEV_BLK_SVC, PIL_3, 10}, + {LDC_DEV_NT, PIL_6, 35}, + {LDC_DEV_NT_SVC, PIL_4, 35}, + {LDC_DEV_SERIAL, PIL_6, 0} }; -#define CNEX_MAX_DEVS (sizeof (cnex_class_to_pil) / \ - sizeof (cnex_class_to_pil[0])) +#define CNEX_MAX_DEVS (sizeof (cnex_class_to_intr) / \ + sizeof (cnex_class_to_intr[0])) + +#define CNEX_TX_INTR_WEIGHT 0 #define SUN4V_REG_SPEC2CFG_HDL(x) ((x >> 32) & ~(0xfull << 28)) @@ -78,12 +80,43 @@ static clock_t cnex_wait_usecs = 1000; /* wait time in usecs */ static int cnex_wait_retries = 3; static void *cnex_state; -static void cnex_intr_redist(void *arg); static uint_t cnex_intr_wrapper(caddr_t arg); static dev_info_t *cnex_find_chan_dip(dev_info_t *dip, uint64_t chan_id, md_t *mdp, mde_cookie_t mde); /* + * Channel Interrupt Distribution + * + * In order to balance interrupts among available CPUs, we use + * the intr_dist_cpuid_{add,remove}_device_weight() interface to + * assign weights to channel interrupts. These weights, which are + * defined in the cnex_intr_map structure, influence which CPU + * is returned by intr_dist_cpuid() when called via the cnex + * interrupt redistribution callback cnex_intr_redist(). + * Interrupts for VIO devclass channels are given more weight than + * other interrupts because they are expected to occur more + * frequently and have a larger impact on overall performance. + * Transmit interrupts are given a zero weight because they are + * not used. + * + * The interrupt weights influence the target CPU selection when + * interrupts are redistributed and when they are added. However, + * removal of interrupts can unbalance the distribution even if + * they are removed in converse order--compared to the order they + * are added. This can occur when interrupts are removed after + * redistribution occurs. + * + * Channel interrupt weights affect interrupt-CPU distribution + * relative to other weighted interrupts on the system. For VIO + * devclass channels, values are chosen to match those used by + * the PCI express nexus driver for net and storage devices. + */ +static void cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight); +static int cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo); +static int cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo); +static int32_t cnex_class_weight(ldc_dev_t devclass); + +/* * Debug info */ #ifdef DEBUG @@ -280,15 +313,18 @@ _info(struct modinfo *modinfop) * Callback function invoked by the interrupt redistribution * framework. This will redirect interrupts at CPUs that are * currently available in the system. + * + * Note: any interrupts with weight greater than or equal to + * weight_max must be redistributed when this callback is + * invoked with (weight == weight_max) which will be once per + * redistribution. */ +/*ARGSUSED*/ static void -cnex_intr_redist(void *arg) +cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight) { cnex_ldc_t *cldcp; cnex_soft_state_t *cnex_ssp = arg; - int intr_state; - uint64_t cpuid; - int rv, retries = 0; ASSERT(cnex_ssp != NULL); mutex_enter(&cnex_ssp->clist_lock); @@ -298,130 +334,14 @@ cnex_intr_redist(void *arg) mutex_enter(&cldcp->lock); - if (cldcp->tx.hdlr) { - /* - * Don't do anything for disabled interrupts. - */ - rv = hvldc_intr_getvalid(cnex_ssp->cfghdl, - cldcp->tx.ino, &intr_state); - if (rv) { - DWARN("cnex_intr_redist: tx ino=0x%llx, " - "can't get valid\n", cldcp->tx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - if (intr_state == HV_INTR_NOTVALID) { - mutex_exit(&cldcp->lock); - cldcp = cldcp->next; - continue; - } - - cpuid = intr_dist_cpuid(); - - /* disable interrupts */ - rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, - cldcp->tx.ino, HV_INTR_NOTVALID); - if (rv) { - DWARN("cnex_intr_redist: tx ino=0x%llx, " - "can't set valid\n", cldcp->tx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - - /* - * Make a best effort to wait for pending interrupts - * to finish. There is not much we can do if we timeout. - */ - retries = 0; - - do { - rv = hvldc_intr_getstate(cnex_ssp->cfghdl, - cldcp->tx.ino, &intr_state); - if (rv) { - DWARN("cnex_intr_redist: tx ino=0x%llx," - "can't get state\n", cldcp->tx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - - if (intr_state != HV_INTR_DELIVERED_STATE) - break; - - drv_usecwait(cnex_wait_usecs); - - } while (!panicstr && ++retries <= cnex_wait_retries); - - cldcp->tx.cpuid = cpuid; - (void) hvldc_intr_settarget(cnex_ssp->cfghdl, - cldcp->tx.ino, cpuid); - (void) hvldc_intr_setvalid(cnex_ssp->cfghdl, - cldcp->tx.ino, HV_INTR_VALID); + if (cldcp->tx.hdlr && (cldcp->tx.weight == weight || + (weight_max == weight && cldcp->tx.weight > weight))) { + (void) cnex_intr_new_cpu(cnex_ssp, &cldcp->tx); } - if (cldcp->rx.hdlr) { - /* - * Don't do anything for disabled interrupts. - */ - rv = hvldc_intr_getvalid(cnex_ssp->cfghdl, - cldcp->rx.ino, &intr_state); - if (rv) { - DWARN("cnex_intr_redist: rx ino=0x%llx, " - "can't get valid\n", cldcp->rx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - if (intr_state == HV_INTR_NOTVALID) { - mutex_exit(&cldcp->lock); - cldcp = cldcp->next; - continue; - } - - cpuid = intr_dist_cpuid(); - - /* disable interrupts */ - rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, - cldcp->rx.ino, HV_INTR_NOTVALID); - if (rv) { - DWARN("cnex_intr_redist: rx ino=0x%llx, " - "can't set valid\n", cldcp->rx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - - /* - * Make a best effort to wait for pending interrupts - * to finish. There is not much we can do if we timeout. - */ - retries = 0; - - do { - rv = hvldc_intr_getstate(cnex_ssp->cfghdl, - cldcp->rx.ino, &intr_state); - if (rv) { - DWARN("cnex_intr_redist: rx ino=0x%llx," - "can't get state\n", cldcp->rx.ino); - mutex_exit(&cldcp->lock); - mutex_exit(&cnex_ssp->clist_lock); - return; - } - - if (intr_state != HV_INTR_DELIVERED_STATE) - break; - - drv_usecwait(cnex_wait_usecs); - - } while (!panicstr && ++retries <= cnex_wait_retries); - - cldcp->rx.cpuid = cpuid; - (void) hvldc_intr_settarget(cnex_ssp->cfghdl, - cldcp->rx.ino, cpuid); - (void) hvldc_intr_setvalid(cnex_ssp->cfghdl, - cldcp->rx.ino, HV_INTR_VALID); + if (cldcp->rx.hdlr && (cldcp->rx.weight == weight || + (weight_max == weight && cldcp->rx.weight > weight))) { + (void) cnex_intr_new_cpu(cnex_ssp, &cldcp->rx); } mutex_exit(&cldcp->lock); @@ -434,6 +354,112 @@ cnex_intr_redist(void *arg) } /* + * Internal function to replace the CPU used by an interrupt + * during interrupt redistribution. + */ +static int +cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo) +{ + int intr_state; + int rv; + + /* Determine if the interrupt is enabled */ + rv = hvldc_intr_getvalid(ssp->cfghdl, iinfo->ino, &intr_state); + if (rv) { + DWARN("cnex_intr_new_cpu: rx ino=0x%llx, can't get valid\n", + iinfo->ino); + return (rv); + } + + /* If it is enabled, disable it */ + if (intr_state == HV_INTR_VALID) { + rv = cnex_intr_dis_wait(ssp, iinfo); + if (rv) { + return (rv); + } + } + + /* Target the interrupt at a new CPU. */ + iinfo->cpuid = intr_dist_cpuid(); + (void) hvldc_intr_settarget(ssp->cfghdl, iinfo->ino, iinfo->cpuid); + intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip, + iinfo->weight); + + /* Re-enable the interrupt if it was enabled */ + if (intr_state == HV_INTR_VALID) { + (void) hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino, + HV_INTR_VALID); + } + + return (0); +} + +/* + * Internal function to disable an interrupt and wait + * for any pending interrupts to finish. + */ +static int +cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo) +{ + int rv, intr_state, retries; + + /* disable interrupts */ + rv = hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino, HV_INTR_NOTVALID); + if (rv) { + DWARN("cnex_intr_dis_wait: ino=0x%llx, can't set valid\n", + iinfo->ino); + return (ENXIO); + } + + /* + * Make a best effort to wait for pending interrupts + * to finish. There is not much we can do if we timeout. + */ + retries = 0; + + do { + rv = hvldc_intr_getstate(ssp->cfghdl, iinfo->ino, &intr_state); + if (rv) { + DWARN("cnex_intr_dis_wait: ino=0x%llx, can't get " + "state\n", iinfo->ino); + return (ENXIO); + } + + if (intr_state != HV_INTR_DELIVERED_STATE) + break; + + drv_usecwait(cnex_wait_usecs); + + } while (!panicstr && ++retries <= cnex_wait_retries); + + return (0); +} + +/* + * Returns the interrupt weight to use for the specified devclass. + */ +static int32_t +cnex_class_weight(ldc_dev_t devclass) +{ + int idx; + + for (idx = 0; idx < CNEX_MAX_DEVS; idx++) { + if (devclass == cnex_class_to_intr[idx].devclass) { + return (cnex_class_to_intr[idx].weight); + } + } + + /* + * If this code is reached, the specified devclass is + * invalid. New devclasses should be added to + * cnex_class_to_intr. + */ + ASSERT(0); + + return (0); +} + +/* * Exported interface to register a LDC endpoint with * the channel nexus */ @@ -551,6 +577,8 @@ cnex_reg_chan(dev_info_t *dip, uint64_t id, ldc_dev_t devclass) cldcp->tx.ino = txino; cldcp->rx.ino = rxino; cldcp->devclass = devclass; + cldcp->tx.weight = CNEX_TX_INTR_WEIGHT; + cldcp->rx.weight = cnex_class_weight(devclass); cldcp->dip = chan_dip; /* add channel to nexus channel list */ @@ -643,8 +671,8 @@ cnex_add_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype, /* Pick a PIL on the basis of the channel's devclass */ for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) { - if (cldcp->devclass == cnex_class_to_pil[idx].devclass) { - pil = cnex_class_to_pil[idx].pil; + if (cldcp->devclass == cnex_class_to_intr[idx].devclass) { + pil = cnex_class_to_intr[idx].pil; break; } } @@ -685,6 +713,9 @@ cnex_add_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype, goto hv_error; } + intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip, + iinfo->weight); + mutex_exit(&cldcp->lock); return (0); @@ -834,12 +865,14 @@ cnex_rem_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype) /* Pick a PIL on the basis of the channel's devclass */ for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) { - if (cldcp->devclass == cnex_class_to_pil[idx].devclass) { - pil = cnex_class_to_pil[idx].pil; + if (cldcp->devclass == cnex_class_to_intr[idx].devclass) { + pil = cnex_class_to_intr[idx].pil; break; } } + intr_dist_cpuid_rem_device_weight(iinfo->cpuid, iinfo->dip); + /* remove interrupt */ (void) rem_ivintr(iinfo->icookie, pil); @@ -1036,7 +1069,7 @@ cnex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) } /* Add interrupt redistribution callback. */ - intr_dist_add(cnex_intr_redist, cnex_ssp); + intr_dist_add_weighted(cnex_intr_redist, cnex_ssp); ddi_report_dev(devi); return (DDI_SUCCESS); @@ -1074,7 +1107,7 @@ cnex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) (void) ldc_unregister(&cinfo); /* Remove interrupt redistribution callback. */ - intr_dist_rem(cnex_intr_redist, cnex_ssp); + intr_dist_rem_weighted(cnex_intr_redist, cnex_ssp); /* destroy mutex */ mutex_destroy(&cnex_ssp->clist_lock); diff --git a/usr/src/uts/sun4v/io/ds.c b/usr/src/uts/sun4v/io/ds.c index b443bdfabd..4e81501ff9 100644 --- a/usr/src/uts/sun4v/io/ds.c +++ b/usr/src/uts/sun4v/io/ds.c @@ -610,7 +610,7 @@ ds_ldc_init(ds_port_t *port) ldc_attr.devclass = LDC_DEV_GENERIC; ldc_attr.instance = 0; - ldc_attr.mode = LDC_MODE_STREAM; + ldc_attr.mode = LDC_MODE_RELIABLE; ldc_attr.mtu = DS_STREAM_MTU; if ((rv = ldc_init(port->ldc.id, &ldc_attr, &port->ldc.hdl)) != 0) { diff --git a/usr/src/uts/sun4v/io/ldc.c b/usr/src/uts/sun4v/io/ldc.c index bd932c1d36..4a8e08c42b 100644 --- a/usr/src/uts/sun4v/io/ldc.c +++ b/usr/src/uts/sun4v/io/ldc.c @@ -41,7 +41,6 @@ #include <sys/modctl.h> #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ #include <sys/debug.h> -#include <sys/types.h> #include <sys/cred.h> #include <sys/promif.h> #include <sys/ddi.h> @@ -70,12 +69,13 @@ #include <sys/sdt.h> /* Core internal functions */ -static int i_ldc_h2v_error(int h_error); +int i_ldc_h2v_error(int h_error); +void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); + static int i_ldc_txq_reconf(ldc_chan_t *ldcp); static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); static int i_ldc_rxq_drain(ldc_chan_t *ldcp); static void i_ldc_reset_state(ldc_chan_t *ldcp); -static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head); @@ -125,12 +125,6 @@ static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); -/* Memory synchronization internal functions */ -static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, - uint8_t direction, uint64_t offset, size_t size); -static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, - uint8_t direction, uint64_t start, uint64_t end); - /* LDC Version */ static ldc_ver_t ldc_versions[] = { {1, 0} }; @@ -142,7 +136,7 @@ static ldc_ver_t ldc_versions[] = { {1, 0} }; /* Module State Pointer */ -static ldc_soft_state_t *ldcssp; +ldc_soft_state_t *ldcssp; static struct modldrv md = { &mod_miscops, /* This is a misc module */ @@ -161,14 +155,6 @@ static hsvc_info_t ldc_hsvc = { }; /* - * LDC framework supports mapping remote domain's memory - * either directly or via shadow memory pages. Default - * support is currently implemented via shadow copy. - * Direct map can be enabled by setting 'ldc_shmem_enabled' - */ -int ldc_shmem_enabled = 0; - -/* * The no. of MTU size messages that can be stored in * the LDC Tx queue. The number of Tx queue entries is * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) @@ -196,14 +182,6 @@ uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER; /* - * Pages exported for remote access over each channel is - * maintained in a table registered with the Hypervisor. - * The default number of entries in the table is set to - * 'ldc_mtbl_entries'. - */ -uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; - -/* * LDC retry count and delay - when the HV returns EWOULDBLOCK * the operation is retried 'ldc_max_retries' times with a * wait of 'ldc_delay' usecs between each retry. @@ -232,13 +210,11 @@ clock_t ldc_close_delay = LDC_CLOSE_DELAY; * NOTE: ldcdbgchan has no effect on error messages */ -#define DBG_ALL_LDCS -1 - int ldcdbg = 0x0; int64_t ldcdbgchan = DBG_ALL_LDCS; uint64_t ldc_inject_err_flag = 0; -static void +void ldcdebug(int64_t id, const char *fmt, ...) { char buf[512]; @@ -515,7 +491,7 @@ _fini(void) /* * Translate HV Errors to sun4v error codes */ -static int +int i_ldc_h2v_error(int h_error) { switch (h_error) { @@ -682,7 +658,7 @@ i_ldc_reset_state(ldc_chan_t *ldcp) /* * Reset a LDC channel */ -static void +void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) { DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); @@ -792,7 +768,7 @@ i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head) * up to the current location of tx_head. This needs to be done * as the peer will only ACK DATA/INFO pkts. */ - if (ldcp->mode == LDC_MODE_RELIABLE || ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { while (ldcp->tx_ackd_head != ldcp->tx_head) { pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head); if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) { @@ -990,7 +966,7 @@ i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) (void) i_ldc_rx_process_hvq(ldcp, ¬ify, &event); - if (ldcp->mode != LDC_MODE_STREAM) { + if (ldcp->mode != LDC_MODE_RELIABLE) { /* * If there are no data packets on the queue, clear * the interrupt. Otherwise, the ldc_read will clear @@ -1019,7 +995,7 @@ i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) ldcp->cb_inprogress = B_FALSE; } - if (ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { /* * If we are using a secondary data queue, clear the * interrupt. We should have processed all CTRL packets @@ -2146,8 +2122,8 @@ i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) * processed by the ldc_read code path. Control packets are processed * inline if they are encountered before any data packets. * - * STEAMING MODE - * For streaming mode channels, all packets on the receive queue + * RELIABLE MODE + * For reliable mode channels, all packets on the receive queue * are processed: data packets are copied to the data queue and * control packets are processed inline. Packets are only left on * the receive queue when the data queue is full. @@ -2275,8 +2251,8 @@ force_reset: } else { uint64_t dq_head, dq_tail; - /* process only STREAM mode data packets */ - if (ldcp->mode != LDC_MODE_STREAM) { + /* process only RELIABLE mode data packets */ + if (ldcp->mode != LDC_MODE_RELIABLE) { if ((ldcp->tstate & TS_IN_RESET) == 0) *notify_client = B_TRUE; *notify_event |= LDC_EVT_READ; @@ -2390,7 +2366,7 @@ force_reset: } if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { - ASSERT(ldcp->mode == LDC_MODE_STREAM); + ASSERT(ldcp->mode == LDC_MODE_RELIABLE); /* * Copy the data packet to the data queue. Note @@ -2417,7 +2393,7 @@ force_reset: loop_exit: - if (ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { /* ACK data packets */ if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) { @@ -2465,7 +2441,7 @@ skip_ackpeek: /* * Process any ACK packets on the HV receive queue. * - * This function is only used by STREAMING mode channels when the + * This function is only used by RELIABLE mode channels when the * secondary data queue fills up and there are packets remaining on * the HV receive queue. */ @@ -2619,11 +2595,6 @@ ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) break; case LDC_MODE_RELIABLE: ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; - ldcp->read_p = i_ldc_read_packet; - ldcp->write_p = i_ldc_write_packet; - break; - case LDC_MODE_STREAM: - ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; ldcp->stream_remains = 0; ldcp->stream_offset = 0; @@ -2692,7 +2663,7 @@ ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) ldcp->tstate |= TS_RXQ_RDY; /* Setup a separate read data queue */ - if (ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { ldcp->readq_get_state = i_ldc_dq_rx_get_state; ldcp->readq_set_head = i_ldc_set_rxdq_head; @@ -2749,7 +2720,7 @@ ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) cleanup_on_exit: - if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) + if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp) kmem_free(ldcp->stream_bufferp, ldcp->mtu); if (ldcp->tstate & TS_TXQ_RDY) @@ -2841,8 +2812,8 @@ ldc_fini(ldc_handle_t handle) mutex_destroy(&ldcp->imp_dlist_lock); mutex_destroy(&ldcp->mlist_lock); - /* Free the stream buffer for STREAM_MODE */ - if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp) + /* Free the stream buffer for RELIABLE_MODE */ + if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp) kmem_free(ldcp->stream_bufferp, ldcp->mtu); /* Free the RX queue */ @@ -2851,7 +2822,7 @@ ldc_fini(ldc_handle_t handle) ldcp->tstate &= ~TS_RXQ_RDY; /* Free the RX data queue */ - if (ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { kmem_free((caddr_t)ldcp->rx_dq_va, (ldcp->rx_dq_entries << LDC_PACKET_SHIFT)); } @@ -2987,8 +2958,7 @@ ldc_open(ldc_handle_t handle) } /* - * set the ACKd head to current head location for reliable & - * streaming mode + * set the ACKd head to current head location for reliable */ ldcp->tx_ackd_head = ldcp->tx_head; @@ -3604,9 +3574,9 @@ ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) *hasdata = (i_ldc_chkq(ldcp) == 0); break; - case LDC_MODE_STREAM: + case LDC_MODE_RELIABLE: /* - * In stream mode, first check for 'stream_remains' > 0. + * In reliable mode, first check for 'stream_remains' > 0. * Otherwise, if the data queue head and tail pointers * differ, there must be data to read. */ @@ -3657,7 +3627,7 @@ ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) "ldc_read: (0x%llx) channel is not in UP state\n", ldcp->id); exit_val = ECONNRESET; - } else if (ldcp->mode == LDC_MODE_STREAM) { + } else if (ldcp->mode == LDC_MODE_RELIABLE) { TRACE_RXDQ_LENGTH(ldcp); exit_val = ldcp->read_p(ldcp, bufp, sizep); mutex_exit(&ldcp->lock); @@ -3806,7 +3776,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) } /* Set q_va and compute increment mask for the appropriate queue */ - if (ldcp->mode == LDC_MODE_STREAM) { + if (ldcp->mode == LDC_MODE_RELIABLE) { q_va = ldcp->rx_dq_va; q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT; } else { @@ -3842,7 +3812,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) * queue. This will copy over any new data packets * that have arrived. */ - if (ldcp->mode == LDC_MODE_STREAM) + if (ldcp->mode == LDC_MODE_RELIABLE) (void) i_ldc_chkq(ldcp); rv = ldcp->readq_get_state(ldcp, @@ -3872,7 +3842,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) continue; *sizep = 0; - if (ldcp->mode != LDC_MODE_STREAM) + if (ldcp->mode != LDC_MODE_RELIABLE) ldcp->last_msg_rcd = first_fragment - 1; DWARN(DBG_ALL_LDCS, "ldc_read: " @@ -3896,7 +3866,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) ldcp->rx_q_va + curr_head); /* Check the message ID for the message received */ - if (ldcp->mode != LDC_MODE_STREAM) { + if (ldcp->mode != LDC_MODE_RELIABLE) { if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { DWARN(ldcp->id, "ldc_read: (0x%llx) seqid " @@ -3980,8 +3950,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { uint8_t *msgbuf = (uint8_t *)( - (ldcp->mode == LDC_MODE_RELIABLE || - ldcp->mode == LDC_MODE_STREAM) ? + (ldcp->mode == LDC_MODE_RELIABLE) ? msg->rdata : msg->udata); D2(ldcp->id, @@ -4079,7 +4048,7 @@ i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) } /* set the message id */ - if (ldcp->mode != LDC_MODE_STREAM) + if (ldcp->mode != LDC_MODE_RELIABLE) ldcp->last_msg_rcd = msg->seqid; /* move the head one position */ @@ -4130,8 +4099,7 @@ channel_is_reset: } /* - * Use underlying reliable packet mechanism to fetch - * and buffer incoming packets so we can hand them back as + * Fetch and buffer incoming packets so we can hand them back as * a basic byte stream. * * Enter and exit with ldcp->lock held by caller @@ -4384,8 +4352,7 @@ i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) ASSERT(MUTEX_HELD(&ldcp->tx_lock)); ASSERT(ldcp->mode == LDC_MODE_RELIABLE || - ldcp->mode == LDC_MODE_UNRELIABLE || - ldcp->mode == LDC_MODE_STREAM); + ldcp->mode == LDC_MODE_UNRELIABLE); /* compute mask for increment */ txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; @@ -4464,8 +4431,7 @@ i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); - msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE || - ldcp->mode == LDC_MODE_STREAM) ? + msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ? ldcmsg->rdata : ldcmsg->udata); ldcmsg->type = LDC_DATA; @@ -4560,7 +4526,7 @@ static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) { ASSERT(MUTEX_HELD(&ldcp->tx_lock)); - ASSERT(ldcp->mode == LDC_MODE_STREAM); + ASSERT(ldcp->mode == LDC_MODE_RELIABLE); /* Truncate packet to max of MTU size */ if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; @@ -4640,2152 +4606,3 @@ ldc_unregister(ldc_cnex_t *cinfo) return (0); } - - -/* ------------------------------------------------------------------------- */ - -/* - * Allocate a memory handle for the channel and link it into the list - * Also choose which memory table to use if this is the first handle - * being assigned to this channel - */ -int -ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) -{ - ldc_chan_t *ldcp; - ldc_mhdl_t *mhdl; - - if (handle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_alloc_handle: invalid channel handle\n"); - return (EINVAL); - } - ldcp = (ldc_chan_t *)handle; - - mutex_enter(&ldcp->lock); - - /* check to see if channel is initalized */ - if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { - DWARN(ldcp->id, - "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", - ldcp->id); - mutex_exit(&ldcp->lock); - return (EINVAL); - } - - /* allocate handle for channel */ - mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); - - /* initialize the lock */ - mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); - - mhdl->myshadow = B_FALSE; - mhdl->memseg = NULL; - mhdl->ldcp = ldcp; - mhdl->status = LDC_UNBOUND; - - /* insert memory handle (@ head) into list */ - if (ldcp->mhdl_list == NULL) { - ldcp->mhdl_list = mhdl; - mhdl->next = NULL; - } else { - /* insert @ head */ - mhdl->next = ldcp->mhdl_list; - ldcp->mhdl_list = mhdl; - } - - /* return the handle */ - *mhandle = (ldc_mem_handle_t)mhdl; - - mutex_exit(&ldcp->lock); - - D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", - ldcp->id, mhdl); - - return (0); -} - -/* - * Free memory handle for the channel and unlink it from the list - */ -int -ldc_mem_free_handle(ldc_mem_handle_t mhandle) -{ - ldc_mhdl_t *mhdl, *phdl; - ldc_chan_t *ldcp; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_free_handle: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - mutex_enter(&mhdl->lock); - - ldcp = mhdl->ldcp; - - if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { - DWARN(ldcp->id, - "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", - mhdl); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - mutex_exit(&mhdl->lock); - - mutex_enter(&ldcp->mlist_lock); - - phdl = ldcp->mhdl_list; - - /* first handle */ - if (phdl == mhdl) { - ldcp->mhdl_list = mhdl->next; - mutex_destroy(&mhdl->lock); - kmem_cache_free(ldcssp->memhdl_cache, mhdl); - - D1(ldcp->id, - "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", - ldcp->id, mhdl); - } else { - /* walk the list - unlink and free */ - while (phdl != NULL) { - if (phdl->next == mhdl) { - phdl->next = mhdl->next; - mutex_destroy(&mhdl->lock); - kmem_cache_free(ldcssp->memhdl_cache, mhdl); - D1(ldcp->id, - "ldc_mem_free_handle: (0x%llx) freed " - "handle 0x%llx\n", ldcp->id, mhdl); - break; - } - phdl = phdl->next; - } - } - - if (phdl == NULL) { - DWARN(ldcp->id, - "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); - mutex_exit(&ldcp->mlist_lock); - return (EINVAL); - } - - mutex_exit(&ldcp->mlist_lock); - - return (0); -} - -/* - * Bind a memory handle to a virtual address. - * The virtual address is converted to the corresponding real addresses. - * Returns pointer to the first ldc_mem_cookie and the total number - * of cookies for this virtual address. Other cookies can be obtained - * using the ldc_mem_nextcookie() call. If the pages are stored in - * consecutive locations in the table, a single cookie corresponding to - * the first location is returned. The cookie size spans all the entries. - * - * If the VA corresponds to a page that is already being exported, reuse - * the page and do not export it again. Bump the page's use count. - */ -int -ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, - uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) -{ - ldc_mhdl_t *mhdl; - ldc_chan_t *ldcp; - ldc_mtbl_t *mtbl; - ldc_memseg_t *memseg; - ldc_mte_t tmp_mte; - uint64_t index, prev_index = 0; - int64_t cookie_idx; - uintptr_t raddr, ra_aligned; - uint64_t psize, poffset, v_offset; - uint64_t pg_shift, pg_size, pg_size_code, pg_mask; - pgcnt_t npages; - caddr_t v_align, addr; - int i, rv; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_bind_handle: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - ldcp = mhdl->ldcp; - - /* clear count */ - *ccount = 0; - - mutex_enter(&mhdl->lock); - - if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { - DWARN(ldcp->id, - "ldc_mem_bind_handle: (0x%x) handle already bound\n", - mhandle); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - /* Force address and size to be 8-byte aligned */ - if ((((uintptr_t)vaddr | len) & 0x7) != 0) { - DWARN(ldcp->id, - "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - /* - * If this channel is binding a memory handle for the - * first time allocate it a memory map table and initialize it - */ - if ((mtbl = ldcp->mtbl) == NULL) { - - mutex_enter(&ldcp->lock); - - /* Allocate and initialize the map table structure */ - mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); - mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; - mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); - mtbl->next_entry = NULL; - mtbl->contigmem = B_TRUE; - - /* Allocate the table itself */ - mtbl->table = (ldc_mte_slot_t *) - contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); - if (mtbl->table == NULL) { - - /* allocate a page of memory using kmem_alloc */ - mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); - mtbl->size = MMU_PAGESIZE; - mtbl->contigmem = B_FALSE; - mtbl->num_entries = mtbl->num_avail = - mtbl->size / sizeof (ldc_mte_slot_t); - DWARN(ldcp->id, - "ldc_mem_bind_handle: (0x%llx) reduced tbl size " - "to %lx entries\n", ldcp->id, mtbl->num_entries); - } - - /* zero out the memory */ - bzero(mtbl->table, mtbl->size); - - /* initialize the lock */ - mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); - - /* register table for this channel */ - rv = hv_ldc_set_map_table(ldcp->id, - va_to_pa(mtbl->table), mtbl->num_entries); - if (rv != 0) { - cmn_err(CE_WARN, - "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", - ldcp->id, rv); - if (mtbl->contigmem) - contig_mem_free(mtbl->table, mtbl->size); - else - kmem_free(mtbl->table, mtbl->size); - mutex_destroy(&mtbl->lock); - kmem_free(mtbl, sizeof (ldc_mtbl_t)); - mutex_exit(&ldcp->lock); - mutex_exit(&mhdl->lock); - return (EIO); - } - - ldcp->mtbl = mtbl; - mutex_exit(&ldcp->lock); - - D1(ldcp->id, - "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", - ldcp->id, ldcp->mtbl->table); - } - - /* FUTURE: get the page size, pgsz code, and shift */ - pg_size = MMU_PAGESIZE; - pg_size_code = page_szc(pg_size); - pg_shift = page_get_shift(pg_size_code); - pg_mask = ~(pg_size - 1); - - D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " - "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", - ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); - - /* aligned VA and its offset */ - v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); - v_offset = ((uintptr_t)vaddr) & (pg_size - 1); - - npages = (len+v_offset)/pg_size; - npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; - - D1(ldcp->id, "ldc_mem_bind_handle: binding " - "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", - ldcp->id, vaddr, v_align, v_offset, npages); - - /* lock the memory table - exclusive access to channel */ - mutex_enter(&mtbl->lock); - - if (npages > mtbl->num_avail) { - D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", - ldcp->id); - mutex_exit(&mtbl->lock); - mutex_exit(&mhdl->lock); - return (ENOMEM); - } - - /* Allocate a memseg structure */ - memseg = mhdl->memseg = - kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); - - /* Allocate memory to store all pages and cookies */ - memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); - memseg->cookies = - kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); - - D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", - ldcp->id, npages); - - addr = v_align; - - /* - * Check if direct shared memory map is enabled, if not change - * the mapping type to include SHADOW_MAP. - */ - if (ldc_shmem_enabled == 0) - mtype = LDC_SHADOW_MAP; - - /* - * Table slots are used in a round-robin manner. The algorithm permits - * inserting duplicate entries. Slots allocated earlier will typically - * get freed before we get back to reusing the slot.Inserting duplicate - * entries should be OK as we only lookup entries using the cookie addr - * i.e. tbl index, during export, unexport and copy operation. - * - * One implementation what was tried was to search for a duplicate - * page entry first and reuse it. The search overhead is very high and - * in the vnet case dropped the perf by almost half, 50 to 24 mbps. - * So it does make sense to avoid searching for duplicates. - * - * But during the process of searching for a free slot, if we find a - * duplicate entry we will go ahead and use it, and bump its use count. - */ - - /* index to start searching from */ - index = mtbl->next_entry; - cookie_idx = -1; - - tmp_mte.ll = 0; /* initialise fields to 0 */ - - if (mtype & LDC_DIRECT_MAP) { - tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; - tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; - tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; - } - - if (mtype & LDC_SHADOW_MAP) { - tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; - tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; - } - - if (mtype & LDC_IO_MAP) { - tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; - tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; - } - - D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); - - tmp_mte.mte_pgszc = pg_size_code; - - /* initialize each mem table entry */ - for (i = 0; i < npages; i++) { - - /* check if slot is available in the table */ - while (mtbl->table[index].entry.ll != 0) { - - index = (index + 1) % mtbl->num_entries; - - if (index == mtbl->next_entry) { - /* we have looped around */ - DWARN(DBG_ALL_LDCS, - "ldc_mem_bind_handle: (0x%llx) cannot find " - "entry\n", ldcp->id); - *ccount = 0; - - /* NOTE: free memory, remove previous entries */ - /* this shouldnt happen as num_avail was ok */ - - mutex_exit(&mtbl->lock); - mutex_exit(&mhdl->lock); - return (ENOMEM); - } - } - - /* get the real address */ - raddr = va_to_pa((void *)addr); - ra_aligned = ((uintptr_t)raddr & pg_mask); - - /* build the mte */ - tmp_mte.mte_rpfn = ra_aligned >> pg_shift; - - D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); - - /* update entry in table */ - mtbl->table[index].entry = tmp_mte; - - D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" - " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); - - /* calculate the size and offset for this export range */ - if (i == 0) { - /* first page */ - psize = min((pg_size - v_offset), len); - poffset = v_offset; - - } else if (i == (npages - 1)) { - /* last page */ - psize = (((uintptr_t)(vaddr + len)) & - ((uint64_t)(pg_size-1))); - if (psize == 0) - psize = pg_size; - poffset = 0; - - } else { - /* middle pages */ - psize = pg_size; - poffset = 0; - } - - /* store entry for this page */ - memseg->pages[i].index = index; - memseg->pages[i].raddr = raddr; - memseg->pages[i].offset = poffset; - memseg->pages[i].size = psize; - memseg->pages[i].mte = &(mtbl->table[index]); - - /* create the cookie */ - if (i == 0 || (index != prev_index + 1)) { - cookie_idx++; - memseg->cookies[cookie_idx].addr = - IDX2COOKIE(index, pg_size_code, pg_shift); - memseg->cookies[cookie_idx].addr |= poffset; - memseg->cookies[cookie_idx].size = psize; - - } else { - memseg->cookies[cookie_idx].size += psize; - } - - D1(ldcp->id, "ldc_mem_bind_handle: bound " - "(0x%llx) va=0x%llx, idx=0x%llx, " - "ra=0x%llx(sz=0x%x,off=0x%x)\n", - ldcp->id, addr, index, raddr, psize, poffset); - - /* decrement number of available entries */ - mtbl->num_avail--; - - /* increment va by page size */ - addr += pg_size; - - /* increment index */ - prev_index = index; - index = (index + 1) % mtbl->num_entries; - - /* save the next slot */ - mtbl->next_entry = index; - } - - mutex_exit(&mtbl->lock); - - /* memory handle = bound */ - mhdl->mtype = mtype; - mhdl->perm = perm; - mhdl->status = LDC_BOUND; - - /* update memseg_t */ - memseg->vaddr = vaddr; - memseg->raddr = memseg->pages[0].raddr; - memseg->size = len; - memseg->npages = npages; - memseg->ncookies = cookie_idx + 1; - memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; - - /* return count and first cookie */ - *ccount = memseg->ncookies; - cookie->addr = memseg->cookies[0].addr; - cookie->size = memseg->cookies[0].size; - - D1(ldcp->id, - "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " - "pgs=0x%llx cookies=0x%llx\n", - ldcp->id, mhdl, vaddr, npages, memseg->ncookies); - - mutex_exit(&mhdl->lock); - return (0); -} - -/* - * Return the next cookie associated with the specified memory handle - */ -int -ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) -{ - ldc_mhdl_t *mhdl; - ldc_chan_t *ldcp; - ldc_memseg_t *memseg; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_nextcookie: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - mutex_enter(&mhdl->lock); - - ldcp = mhdl->ldcp; - memseg = mhdl->memseg; - - if (cookie == 0) { - DWARN(ldcp->id, - "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", - ldcp->id); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - if (memseg->next_cookie != 0) { - cookie->addr = memseg->cookies[memseg->next_cookie].addr; - cookie->size = memseg->cookies[memseg->next_cookie].size; - memseg->next_cookie++; - if (memseg->next_cookie == memseg->ncookies) - memseg->next_cookie = 0; - - } else { - DWARN(ldcp->id, - "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); - cookie->addr = 0; - cookie->size = 0; - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - D1(ldcp->id, - "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", - ldcp->id, cookie->addr, cookie->size); - - mutex_exit(&mhdl->lock); - return (0); -} - -/* - * Unbind the virtual memory region associated with the specified - * memory handle. Allassociated cookies are freed and the corresponding - * RA space is no longer exported. - */ -int -ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) -{ - ldc_mhdl_t *mhdl; - ldc_chan_t *ldcp; - ldc_mtbl_t *mtbl; - ldc_memseg_t *memseg; - uint64_t cookie_addr; - uint64_t pg_shift, pg_size_code; - int i, rv; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_unbind_handle: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - mutex_enter(&mhdl->lock); - - if (mhdl->status == LDC_UNBOUND) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", - mhandle); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - ldcp = mhdl->ldcp; - mtbl = ldcp->mtbl; - - memseg = mhdl->memseg; - - /* lock the memory table - exclusive access to channel */ - mutex_enter(&mtbl->lock); - - /* undo the pages exported */ - for (i = 0; i < memseg->npages; i++) { - - /* check for mapped pages, revocation cookie != 0 */ - if (memseg->pages[i].mte->cookie) { - - pg_size_code = page_szc(memseg->pages[i].size); - pg_shift = page_get_shift(memseg->pages[i].size); - cookie_addr = IDX2COOKIE(memseg->pages[i].index, - pg_size_code, pg_shift); - - D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " - "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, - cookie_addr, memseg->pages[i].mte->cookie); - rv = hv_ldc_revoke(ldcp->id, cookie_addr, - memseg->pages[i].mte->cookie); - if (rv) { - DWARN(ldcp->id, - "ldc_mem_unbind_handle: (0x%llx) cannot " - "revoke mapping, cookie %llx\n", ldcp->id, - cookie_addr); - } - } - - /* clear the entry from the table */ - memseg->pages[i].mte->entry.ll = 0; - mtbl->num_avail++; - } - mutex_exit(&mtbl->lock); - - /* free the allocated memseg and page structures */ - kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); - kmem_free(memseg->cookies, - (sizeof (ldc_mem_cookie_t) * memseg->npages)); - kmem_cache_free(ldcssp->memseg_cache, memseg); - - /* uninitialize the memory handle */ - mhdl->memseg = NULL; - mhdl->status = LDC_UNBOUND; - - D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", - ldcp->id, mhdl); - - mutex_exit(&mhdl->lock); - return (0); -} - -/* - * Get information about the dring. The base address of the descriptor - * ring along with the type and permission are returned back. - */ -int -ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) -{ - ldc_mhdl_t *mhdl; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - if (minfo == NULL) { - DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); - return (EINVAL); - } - - mutex_enter(&mhdl->lock); - - minfo->status = mhdl->status; - if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { - minfo->vaddr = mhdl->memseg->vaddr; - minfo->raddr = mhdl->memseg->raddr; - minfo->mtype = mhdl->mtype; - minfo->perm = mhdl->perm; - } - mutex_exit(&mhdl->lock); - - return (0); -} - -/* - * Copy data either from or to the client specified virtual address - * space to or from the exported memory associated with the cookies. - * The direction argument determines whether the data is read from or - * written to exported memory. - */ -int -ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, - ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) -{ - ldc_chan_t *ldcp; - uint64_t local_voff, local_valign; - uint64_t cookie_addr, cookie_size; - uint64_t pg_shift, pg_size, pg_size_code; - uint64_t export_caddr, export_poff, export_psize, export_size; - uint64_t local_ra, local_poff, local_psize; - uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; - pgcnt_t npages; - size_t len = *size; - int i, rv = 0; - - uint64_t chid; - - if (handle == NULL) { - DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); - return (EINVAL); - } - ldcp = (ldc_chan_t *)handle; - chid = ldcp->id; - - /* check to see if channel is UP */ - if (ldcp->tstate != TS_UP) { - DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", - chid); - return (ECONNRESET); - } - - /* Force address and size to be 8-byte aligned */ - if ((((uintptr_t)vaddr | len) & 0x7) != 0) { - DWARN(chid, - "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); - return (EINVAL); - } - - /* Find the size of the exported memory */ - export_size = 0; - for (i = 0; i < ccount; i++) - export_size += cookies[i].size; - - /* check to see if offset is valid */ - if (off > export_size) { - DWARN(chid, - "ldc_mem_copy: (0x%llx) start offset > export mem size\n", - chid); - return (EINVAL); - } - - /* - * Check to see if the export size is smaller than the size we - * are requesting to copy - if so flag an error - */ - if ((export_size - off) < *size) { - DWARN(chid, - "ldc_mem_copy: (0x%llx) copy size > export mem size\n", - chid); - return (EINVAL); - } - - total_bal = min(export_size, *size); - - /* FUTURE: get the page size, pgsz code, and shift */ - pg_size = MMU_PAGESIZE; - pg_size_code = page_szc(pg_size); - pg_shift = page_get_shift(pg_size_code); - - D1(chid, "ldc_mem_copy: copying data " - "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", - chid, vaddr, pg_size, pg_size_code, pg_shift); - - /* aligned VA and its offset */ - local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); - local_voff = ((uintptr_t)vaddr) & (pg_size - 1); - - npages = (len+local_voff)/pg_size; - npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; - - D1(chid, - "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", - chid, vaddr, local_valign, local_voff, npages); - - local_ra = va_to_pa((void *)local_valign); - local_poff = local_voff; - local_psize = min(len, (pg_size - local_voff)); - - len -= local_psize; - - /* - * find the first cookie in the list of cookies - * if the offset passed in is not zero - */ - for (idx = 0; idx < ccount; idx++) { - cookie_size = cookies[idx].size; - if (off < cookie_size) - break; - off -= cookie_size; - } - - cookie_addr = cookies[idx].addr + off; - cookie_size = cookies[idx].size - off; - - export_caddr = cookie_addr & ~(pg_size - 1); - export_poff = cookie_addr & (pg_size - 1); - export_psize = min(cookie_size, (pg_size - export_poff)); - - for (;;) { - - copy_size = min(export_psize, local_psize); - - D1(chid, - "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," - " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," - " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," - " total_bal=0x%llx\n", - chid, direction, export_caddr, local_ra, export_poff, - local_poff, export_psize, local_psize, copy_size, - total_bal); - - rv = hv_ldc_copy(chid, direction, - (export_caddr + export_poff), (local_ra + local_poff), - copy_size, &copied_len); - - if (rv != 0) { - int error = EIO; - uint64_t rx_hd, rx_tl; - - DWARN(chid, - "ldc_mem_copy: (0x%llx) err %d during copy\n", - (unsigned long long)chid, rv); - DWARN(chid, - "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " - "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," - " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," - " copied_len=0x%lx, total_bal=0x%lx\n", - chid, direction, export_caddr, local_ra, - export_poff, local_poff, export_psize, local_psize, - copy_size, copied_len, total_bal); - - *size = *size - total_bal; - - /* - * check if reason for copy error was due to - * a channel reset. we need to grab the lock - * just in case we have to do a reset. - */ - mutex_enter(&ldcp->lock); - mutex_enter(&ldcp->tx_lock); - - rv = hv_ldc_rx_get_state(ldcp->id, - &rx_hd, &rx_tl, &(ldcp->link_state)); - if (ldcp->link_state == LDC_CHANNEL_DOWN || - ldcp->link_state == LDC_CHANNEL_RESET) { - i_ldc_reset(ldcp, B_FALSE); - error = ECONNRESET; - } - - mutex_exit(&ldcp->tx_lock); - mutex_exit(&ldcp->lock); - - return (error); - } - - ASSERT(copied_len <= copy_size); - - D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); - export_poff += copied_len; - local_poff += copied_len; - export_psize -= copied_len; - local_psize -= copied_len; - cookie_size -= copied_len; - - total_bal -= copied_len; - - if (copy_size != copied_len) - continue; - - if (export_psize == 0 && total_bal != 0) { - - if (cookie_size == 0) { - idx++; - cookie_addr = cookies[idx].addr; - cookie_size = cookies[idx].size; - - export_caddr = cookie_addr & ~(pg_size - 1); - export_poff = cookie_addr & (pg_size - 1); - export_psize = - min(cookie_size, (pg_size-export_poff)); - } else { - export_caddr += pg_size; - export_poff = 0; - export_psize = min(cookie_size, pg_size); - } - } - - if (local_psize == 0 && total_bal != 0) { - local_valign += pg_size; - local_ra = va_to_pa((void *)local_valign); - local_poff = 0; - local_psize = min(pg_size, len); - len -= local_psize; - } - - /* check if we are all done */ - if (total_bal == 0) - break; - } - - - D1(chid, - "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", - chid, *size); - - return (0); -} - -/* - * Copy data either from or to the client specified virtual address - * space to or from HV physical memory. - * - * The direction argument determines whether the data is read from or - * written to HV memory. direction values are LDC_COPY_IN/OUT similar - * to the ldc_mem_copy interface - */ -int -ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, - caddr_t paddr, uint8_t direction) -{ - ldc_chan_t *ldcp; - uint64_t local_voff, local_valign; - uint64_t pg_shift, pg_size, pg_size_code; - uint64_t target_pa, target_poff, target_psize, target_size; - uint64_t local_ra, local_poff, local_psize; - uint64_t copy_size, copied_len = 0; - pgcnt_t npages; - size_t len = *size; - int rv = 0; - - if (handle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_rdwr_cookie: invalid channel handle\n"); - return (EINVAL); - } - ldcp = (ldc_chan_t *)handle; - - mutex_enter(&ldcp->lock); - - /* check to see if channel is UP */ - if (ldcp->tstate != TS_UP) { - DWARN(ldcp->id, - "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", - ldcp->id); - mutex_exit(&ldcp->lock); - return (ECONNRESET); - } - - /* Force address and size to be 8-byte aligned */ - if ((((uintptr_t)vaddr | len) & 0x7) != 0) { - DWARN(ldcp->id, - "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); - mutex_exit(&ldcp->lock); - return (EINVAL); - } - - target_size = *size; - - /* FUTURE: get the page size, pgsz code, and shift */ - pg_size = MMU_PAGESIZE; - pg_size_code = page_szc(pg_size); - pg_shift = page_get_shift(pg_size_code); - - D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " - "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", - ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); - - /* aligned VA and its offset */ - local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); - local_voff = ((uintptr_t)vaddr) & (pg_size - 1); - - npages = (len + local_voff) / pg_size; - npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; - - D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " - "val=0x%llx,off=0x%x,pgs=0x%x\n", - ldcp->id, vaddr, local_valign, local_voff, npages); - - local_ra = va_to_pa((void *)local_valign); - local_poff = local_voff; - local_psize = min(len, (pg_size - local_voff)); - - len -= local_psize; - - target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); - target_poff = ((uintptr_t)paddr) & (pg_size - 1); - target_psize = pg_size - target_poff; - - for (;;) { - - copy_size = min(target_psize, local_psize); - - D1(ldcp->id, - "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," - " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," - " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," - " total_bal=0x%llx\n", - ldcp->id, direction, target_pa, local_ra, target_poff, - local_poff, target_psize, local_psize, copy_size, - target_size); - - rv = hv_ldc_copy(ldcp->id, direction, - (target_pa + target_poff), (local_ra + local_poff), - copy_size, &copied_len); - - if (rv != 0) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", - ldcp->id, rv); - DWARN(DBG_ALL_LDCS, - "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " - "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " - "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " - "copy_sz=0x%llx, total_bal=0x%llx\n", - ldcp->id, direction, target_pa, local_ra, - target_poff, local_poff, target_psize, local_psize, - copy_size, target_size); - - *size = *size - target_size; - mutex_exit(&ldcp->lock); - return (i_ldc_h2v_error(rv)); - } - - D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", - copied_len); - target_poff += copied_len; - local_poff += copied_len; - target_psize -= copied_len; - local_psize -= copied_len; - - target_size -= copied_len; - - if (copy_size != copied_len) - continue; - - if (target_psize == 0 && target_size != 0) { - target_pa += pg_size; - target_poff = 0; - target_psize = min(pg_size, target_size); - } - - if (local_psize == 0 && target_size != 0) { - local_valign += pg_size; - local_ra = va_to_pa((void *)local_valign); - local_poff = 0; - local_psize = min(pg_size, len); - len -= local_psize; - } - - /* check if we are all done */ - if (target_size == 0) - break; - } - - mutex_exit(&ldcp->lock); - - D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", - ldcp->id, *size); - - return (0); -} - -/* - * Map an exported memory segment into the local address space. If the - * memory range was exported for direct map access, a HV call is made - * to allocate a RA range. If the map is done via a shadow copy, local - * shadow memory is allocated and the base VA is returned in 'vaddr'. If - * the mapping is a direct map then the RA is returned in 'raddr'. - */ -int -ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, - uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) -{ - int i, j, idx, rv, retries; - ldc_chan_t *ldcp; - ldc_mhdl_t *mhdl; - ldc_memseg_t *memseg; - caddr_t tmpaddr; - uint64_t map_perm = perm; - uint64_t pg_size, pg_shift, pg_size_code, pg_mask; - uint64_t exp_size = 0, base_off, map_size, npages; - uint64_t cookie_addr, cookie_off, cookie_size; - tte_t ldc_tte; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - mutex_enter(&mhdl->lock); - - if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || - mhdl->memseg != NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - ldcp = mhdl->ldcp; - - mutex_enter(&ldcp->lock); - - if (ldcp->tstate != TS_UP) { - DWARN(ldcp->id, - "ldc_mem_dring_map: (0x%llx) channel is not UP\n", - ldcp->id); - mutex_exit(&ldcp->lock); - mutex_exit(&mhdl->lock); - return (ECONNRESET); - } - - if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { - DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); - mutex_exit(&ldcp->lock); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", - ldcp->id, cookie->addr, cookie->size); - - /* FUTURE: get the page size, pgsz code, and shift */ - pg_size = MMU_PAGESIZE; - pg_size_code = page_szc(pg_size); - pg_shift = page_get_shift(pg_size_code); - pg_mask = ~(pg_size - 1); - - /* calculate the number of pages in the exported cookie */ - base_off = cookie[0].addr & (pg_size - 1); - for (idx = 0; idx < ccount; idx++) - exp_size += cookie[idx].size; - map_size = P2ROUNDUP((exp_size + base_off), pg_size); - npages = (map_size >> pg_shift); - - /* Allocate memseg structure */ - memseg = mhdl->memseg = - kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); - - /* Allocate memory to store all pages and cookies */ - memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); - memseg->cookies = - kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); - - D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," - "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); - - /* - * Check if direct map over shared memory is enabled, if not change - * the mapping type to SHADOW_MAP. - */ - if (ldc_shmem_enabled == 0) - mtype = LDC_SHADOW_MAP; - - /* - * Check to see if the client is requesting direct or shadow map - * If direct map is requested, try to map remote memory first, - * and if that fails, revert to shadow map - */ - if (mtype == LDC_DIRECT_MAP) { - - /* Allocate kernel virtual space for mapping */ - memseg->vaddr = vmem_xalloc(heap_arena, map_size, - pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); - if (memseg->vaddr == NULL) { - cmn_err(CE_WARN, - "ldc_mem_map: (0x%lx) memory map failed\n", - ldcp->id); - kmem_free(memseg->cookies, - (sizeof (ldc_mem_cookie_t) * ccount)); - kmem_free(memseg->pages, - (sizeof (ldc_page_t) * npages)); - kmem_cache_free(ldcssp->memseg_cache, memseg); - - mutex_exit(&ldcp->lock); - mutex_exit(&mhdl->lock); - return (ENOMEM); - } - - /* Unload previous mapping */ - hat_unload(kas.a_hat, memseg->vaddr, map_size, - HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); - - /* for each cookie passed in - map into address space */ - idx = 0; - cookie_size = 0; - tmpaddr = memseg->vaddr; - - for (i = 0; i < npages; i++) { - - if (cookie_size == 0) { - ASSERT(idx < ccount); - cookie_addr = cookie[idx].addr & pg_mask; - cookie_off = cookie[idx].addr & (pg_size - 1); - cookie_size = - P2ROUNDUP((cookie_off + cookie[idx].size), - pg_size); - idx++; - } - - D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " - "cookie 0x%llx, bal=0x%llx\n", ldcp->id, - cookie_addr, cookie_size); - - /* map the cookie into address space */ - for (retries = 0; retries < ldc_max_retries; - retries++) { - - rv = hv_ldc_mapin(ldcp->id, cookie_addr, - &memseg->pages[i].raddr, &map_perm); - if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) - break; - - drv_usecwait(ldc_delay); - } - - if (rv || memseg->pages[i].raddr == 0) { - DWARN(ldcp->id, - "ldc_mem_map: (0x%llx) hv mapin err %d\n", - ldcp->id, rv); - - /* remove previous mapins */ - hat_unload(kas.a_hat, memseg->vaddr, map_size, - HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); - for (j = 0; j < i; j++) { - rv = hv_ldc_unmap( - memseg->pages[j].raddr); - if (rv) { - DWARN(ldcp->id, - "ldc_mem_map: (0x%llx) " - "cannot unmap ra=0x%llx\n", - ldcp->id, - memseg->pages[j].raddr); - } - } - - /* free kernel virtual space */ - vmem_free(heap_arena, (void *)memseg->vaddr, - map_size); - - /* direct map failed - revert to shadow map */ - mtype = LDC_SHADOW_MAP; - break; - - } else { - - D1(ldcp->id, - "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " - "0x%llx, cookie=0x%llx, perm=0x%llx\n", - ldcp->id, tmpaddr, memseg->pages[i].raddr, - cookie_addr, perm); - - /* - * NOTE: Calling hat_devload directly, causes it - * to look for page_t using the pfn. Since this - * addr is greater than the memlist, it treates - * it as non-memory - */ - sfmmu_memtte(&ldc_tte, - (pfn_t)(memseg->pages[i].raddr >> pg_shift), - PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); - - D1(ldcp->id, - "ldc_mem_map: (0x%llx) ra 0x%llx -> " - "tte 0x%llx\n", ldcp->id, - memseg->pages[i].raddr, ldc_tte); - - sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, - NULL, HAT_LOAD_LOCK); - - cookie_size -= pg_size; - cookie_addr += pg_size; - tmpaddr += pg_size; - } - } - } - - if (mtype == LDC_SHADOW_MAP) { - if (*vaddr == NULL) { - memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); - mhdl->myshadow = B_TRUE; - - D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " - "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); - } else { - /* - * Use client supplied memory for memseg->vaddr - * WARNING: assuming that client mem is >= exp_size - */ - memseg->vaddr = *vaddr; - } - - /* Save all page and cookie information */ - for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { - memseg->pages[i].raddr = va_to_pa(tmpaddr); - memseg->pages[i].size = pg_size; - tmpaddr += pg_size; - } - - } - - /* save all cookies */ - bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); - - /* update memseg_t */ - memseg->raddr = memseg->pages[0].raddr; - memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; - memseg->npages = npages; - memseg->ncookies = ccount; - memseg->next_cookie = 0; - - /* memory handle = mapped */ - mhdl->mtype = mtype; - mhdl->perm = perm; - mhdl->status = LDC_MAPPED; - - D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " - "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", - ldcp->id, mhdl, memseg->raddr, memseg->vaddr, - memseg->npages, memseg->ncookies); - - if (mtype == LDC_SHADOW_MAP) - base_off = 0; - if (raddr) - *raddr = (caddr_t)(memseg->raddr | base_off); - if (vaddr) - *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); - - mutex_exit(&ldcp->lock); - mutex_exit(&mhdl->lock); - return (0); -} - -/* - * Unmap a memory segment. Free shadow memory (if any). - */ -int -ldc_mem_unmap(ldc_mem_handle_t mhandle) -{ - int i, rv; - ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; - ldc_chan_t *ldcp; - ldc_memseg_t *memseg; - - if (mhdl == 0 || mhdl->status != LDC_MAPPED) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_unmap: (0x%llx) handle is not mapped\n", - mhandle); - return (EINVAL); - } - - mutex_enter(&mhdl->lock); - - ldcp = mhdl->ldcp; - memseg = mhdl->memseg; - - D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", - ldcp->id, mhdl); - - /* if we allocated shadow memory - free it */ - if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { - kmem_free(memseg->vaddr, memseg->size); - } else if (mhdl->mtype == LDC_DIRECT_MAP) { - - /* unmap in the case of DIRECT_MAP */ - hat_unload(kas.a_hat, memseg->vaddr, memseg->size, - HAT_UNLOAD_UNLOCK); - - for (i = 0; i < memseg->npages; i++) { - rv = hv_ldc_unmap(memseg->pages[i].raddr); - if (rv) { - cmn_err(CE_WARN, - "ldc_mem_map: (0x%lx) hv unmap err %d\n", - ldcp->id, rv); - } - } - - vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); - } - - /* free the allocated memseg and page structures */ - kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); - kmem_free(memseg->cookies, - (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); - kmem_cache_free(ldcssp->memseg_cache, memseg); - - /* uninitialize the memory handle */ - mhdl->memseg = NULL; - mhdl->status = LDC_UNBOUND; - - D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", - ldcp->id, mhdl); - - mutex_exit(&mhdl->lock); - return (0); -} - -/* - * Internal entry point for LDC mapped memory entry consistency - * semantics. Acquire copies the contents of the remote memory - * into the local shadow copy. The release operation copies the local - * contents into the remote memory. The offset and size specify the - * bounds for the memory range being synchronized. - */ -static int -i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, - uint64_t offset, size_t size) -{ - int err; - ldc_mhdl_t *mhdl; - ldc_chan_t *ldcp; - ldc_memseg_t *memseg; - caddr_t local_vaddr; - size_t copy_size; - - if (mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "i_ldc_mem_acquire_release: invalid memory handle\n"); - return (EINVAL); - } - mhdl = (ldc_mhdl_t *)mhandle; - - mutex_enter(&mhdl->lock); - - if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { - DWARN(DBG_ALL_LDCS, - "i_ldc_mem_acquire_release: not mapped memory\n"); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - /* do nothing for direct map */ - if (mhdl->mtype == LDC_DIRECT_MAP) { - mutex_exit(&mhdl->lock); - return (0); - } - - /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ - if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || - (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { - mutex_exit(&mhdl->lock); - return (0); - } - - if (offset >= mhdl->memseg->size || - (offset + size) > mhdl->memseg->size) { - DWARN(DBG_ALL_LDCS, - "i_ldc_mem_acquire_release: memory out of range\n"); - mutex_exit(&mhdl->lock); - return (EINVAL); - } - - /* get the channel handle and memory segment */ - ldcp = mhdl->ldcp; - memseg = mhdl->memseg; - - if (mhdl->mtype == LDC_SHADOW_MAP) { - - local_vaddr = memseg->vaddr + offset; - copy_size = size; - - /* copy to/from remote from/to local memory */ - err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, - ©_size, memseg->cookies, memseg->ncookies, - direction); - if (err || copy_size != size) { - DWARN(ldcp->id, - "i_ldc_mem_acquire_release: copy failed\n"); - mutex_exit(&mhdl->lock); - return (err); - } - } - - mutex_exit(&mhdl->lock); - - return (0); -} - -/* - * Ensure that the contents in the remote memory seg are consistent - * with the contents if of local segment - */ -int -ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) -{ - return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); -} - - -/* - * Ensure that the contents in the local memory seg are consistent - * with the contents if of remote segment - */ -int -ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) -{ - return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); -} - -/* - * Allocate a descriptor ring. The size of each each descriptor - * must be 8-byte aligned and the entire ring should be a multiple - * of MMU_PAGESIZE. - */ -int -ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) -{ - ldc_dring_t *dringp; - size_t size = (dsize * len); - - D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", - len, dsize); - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); - return (EINVAL); - } - - if (len == 0) { - DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); - return (EINVAL); - } - - /* descriptor size should be 8-byte aligned */ - if (dsize == 0 || (dsize & 0x7)) { - DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); - return (EINVAL); - } - - *dhandle = 0; - - /* Allocate a desc ring structure */ - dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); - - /* Initialize dring */ - dringp->length = len; - dringp->dsize = dsize; - - /* round off to multiple of pagesize */ - dringp->size = (size & MMU_PAGEMASK); - if (size & MMU_PAGEOFFSET) - dringp->size += MMU_PAGESIZE; - - dringp->status = LDC_UNBOUND; - - /* allocate descriptor ring memory */ - dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); - - /* initialize the desc ring lock */ - mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); - - /* Add descriptor ring to the head of global list */ - mutex_enter(&ldcssp->lock); - dringp->next = ldcssp->dring_list; - ldcssp->dring_list = dringp; - mutex_exit(&ldcssp->lock); - - *dhandle = (ldc_dring_handle_t)dringp; - - D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); - - return (0); -} - - -/* - * Destroy a descriptor ring. - */ -int -ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) -{ - ldc_dring_t *dringp; - ldc_dring_t *tmp_dringp; - - D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_destroy: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - - if (dringp->status == LDC_BOUND) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_destroy: desc ring is bound\n"); - return (EACCES); - } - - mutex_enter(&dringp->lock); - mutex_enter(&ldcssp->lock); - - /* remove from linked list - if not bound */ - tmp_dringp = ldcssp->dring_list; - if (tmp_dringp == dringp) { - ldcssp->dring_list = dringp->next; - dringp->next = NULL; - - } else { - while (tmp_dringp != NULL) { - if (tmp_dringp->next == dringp) { - tmp_dringp->next = dringp->next; - dringp->next = NULL; - break; - } - tmp_dringp = tmp_dringp->next; - } - if (tmp_dringp == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_destroy: invalid descriptor\n"); - mutex_exit(&ldcssp->lock); - mutex_exit(&dringp->lock); - return (EINVAL); - } - } - - mutex_exit(&ldcssp->lock); - - /* free the descriptor ring */ - kmem_free(dringp->base, dringp->size); - - mutex_exit(&dringp->lock); - - /* destroy dring lock */ - mutex_destroy(&dringp->lock); - - /* free desc ring object */ - kmem_free(dringp, sizeof (ldc_dring_t)); - - return (0); -} - -/* - * Bind a previously allocated dring to a channel. The channel should - * be OPEN in order to bind the ring to the channel. Returns back a - * descriptor ring cookie. The descriptor ring is exported for remote - * access by the client at the other end of the channel. An entry for - * dring pages is stored in map table (via call to ldc_mem_bind_handle). - */ -int -ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, - uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) -{ - int err; - ldc_chan_t *ldcp; - ldc_dring_t *dringp; - ldc_mem_handle_t mhandle; - - /* check to see if channel is initalized */ - if (handle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: invalid channel handle\n"); - return (EINVAL); - } - ldcp = (ldc_chan_t *)handle; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - - if (cookie == NULL) { - DWARN(ldcp->id, - "ldc_mem_dring_bind: invalid cookie arg\n"); - return (EINVAL); - } - - mutex_enter(&dringp->lock); - - if (dringp->status == LDC_BOUND) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", - ldcp->id); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - if ((perm & LDC_MEM_RW) == 0) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: invalid permissions\n"); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { - DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - dringp->ldcp = ldcp; - - /* create an memory handle */ - err = ldc_mem_alloc_handle(handle, &mhandle); - if (err || mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", - ldcp->id); - mutex_exit(&dringp->lock); - return (err); - } - dringp->mhdl = mhandle; - - /* bind the descriptor ring to channel */ - err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, - mtype, perm, cookie, ccount); - if (err) { - DWARN(ldcp->id, - "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", - ldcp->id); - mutex_exit(&dringp->lock); - return (err); - } - - /* - * For now return error if we get more than one cookie - * FUTURE: Return multiple cookies .. - */ - if (*ccount > 1) { - (void) ldc_mem_unbind_handle(mhandle); - (void) ldc_mem_free_handle(mhandle); - - dringp->ldcp = NULL; - dringp->mhdl = NULL; - *ccount = 0; - - mutex_exit(&dringp->lock); - return (EAGAIN); - } - - /* Add descriptor ring to channel's exported dring list */ - mutex_enter(&ldcp->exp_dlist_lock); - dringp->ch_next = ldcp->exp_dring_list; - ldcp->exp_dring_list = dringp; - mutex_exit(&ldcp->exp_dlist_lock); - - dringp->status = LDC_BOUND; - - mutex_exit(&dringp->lock); - - return (0); -} - -/* - * Return the next cookie associated with the specified dring handle - */ -int -ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) -{ - int rv = 0; - ldc_dring_t *dringp; - ldc_chan_t *ldcp; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - mutex_enter(&dringp->lock); - - if (dringp->status != LDC_BOUND) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " - "is not bound\n", dringp); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - ldcp = dringp->ldcp; - - if (cookie == NULL) { - DWARN(ldcp->id, - "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", - ldcp->id); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); - mutex_exit(&dringp->lock); - - return (rv); -} -/* - * Unbind a previously bound dring from a channel. - */ -int -ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) -{ - ldc_dring_t *dringp; - ldc_dring_t *tmp_dringp; - ldc_chan_t *ldcp; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_unbind: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - - mutex_enter(&dringp->lock); - - if (dringp->status == LDC_UNBOUND) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", - dringp); - mutex_exit(&dringp->lock); - return (EINVAL); - } - ldcp = dringp->ldcp; - - mutex_enter(&ldcp->exp_dlist_lock); - - tmp_dringp = ldcp->exp_dring_list; - if (tmp_dringp == dringp) { - ldcp->exp_dring_list = dringp->ch_next; - dringp->ch_next = NULL; - - } else { - while (tmp_dringp != NULL) { - if (tmp_dringp->ch_next == dringp) { - tmp_dringp->ch_next = dringp->ch_next; - dringp->ch_next = NULL; - break; - } - tmp_dringp = tmp_dringp->ch_next; - } - if (tmp_dringp == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_unbind: invalid descriptor\n"); - mutex_exit(&ldcp->exp_dlist_lock); - mutex_exit(&dringp->lock); - return (EINVAL); - } - } - - mutex_exit(&ldcp->exp_dlist_lock); - - (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); - (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); - - dringp->ldcp = NULL; - dringp->mhdl = NULL; - dringp->status = LDC_UNBOUND; - - mutex_exit(&dringp->lock); - - return (0); -} - -/* - * Get information about the dring. The base address of the descriptor - * ring along with the type and permission are returned back. - */ -int -ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) -{ - ldc_dring_t *dringp; - int rv; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_info: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - - mutex_enter(&dringp->lock); - - if (dringp->mhdl) { - rv = ldc_mem_info(dringp->mhdl, minfo); - if (rv) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_info: error reading mem info\n"); - mutex_exit(&dringp->lock); - return (rv); - } - } else { - minfo->vaddr = dringp->base; - minfo->raddr = NULL; - minfo->status = dringp->status; - } - - mutex_exit(&dringp->lock); - - return (0); -} - -/* - * Map an exported descriptor ring into the local address space. If the - * descriptor ring was exported for direct map access, a HV call is made - * to allocate a RA range. If the map is done via a shadow copy, local - * shadow memory is allocated. - */ -int -ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, - uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, - ldc_dring_handle_t *dhandle) -{ - int err; - ldc_chan_t *ldcp = (ldc_chan_t *)handle; - ldc_mem_handle_t mhandle; - ldc_dring_t *dringp; - size_t dring_size; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_map: invalid dhandle\n"); - return (EINVAL); - } - - /* check to see if channel is initalized */ - if (handle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_map: invalid channel handle\n"); - return (EINVAL); - } - ldcp = (ldc_chan_t *)handle; - - if (cookie == NULL) { - DWARN(ldcp->id, - "ldc_mem_dring_map: (0x%llx) invalid cookie\n", - ldcp->id); - return (EINVAL); - } - - /* FUTURE: For now we support only one cookie per dring */ - ASSERT(ccount == 1); - - if (cookie->size < (dsize * len)) { - DWARN(ldcp->id, - "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", - ldcp->id); - return (EINVAL); - } - - *dhandle = 0; - - /* Allocate an dring structure */ - dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); - - D1(ldcp->id, - "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", - mtype, len, dsize, cookie->addr, cookie->size); - - /* Initialize dring */ - dringp->length = len; - dringp->dsize = dsize; - - /* round of to multiple of page size */ - dring_size = len * dsize; - dringp->size = (dring_size & MMU_PAGEMASK); - if (dring_size & MMU_PAGEOFFSET) - dringp->size += MMU_PAGESIZE; - - dringp->ldcp = ldcp; - - /* create an memory handle */ - err = ldc_mem_alloc_handle(handle, &mhandle); - if (err || mhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_map: cannot alloc hdl err=%d\n", - err); - kmem_free(dringp, sizeof (ldc_dring_t)); - return (ENOMEM); - } - - dringp->mhdl = mhandle; - dringp->base = NULL; - - /* map the dring into local memory */ - err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, - &(dringp->base), NULL); - if (err || dringp->base == NULL) { - cmn_err(CE_WARN, - "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); - (void) ldc_mem_free_handle(mhandle); - kmem_free(dringp, sizeof (ldc_dring_t)); - return (ENOMEM); - } - - /* initialize the desc ring lock */ - mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); - - /* Add descriptor ring to channel's imported dring list */ - mutex_enter(&ldcp->imp_dlist_lock); - dringp->ch_next = ldcp->imp_dring_list; - ldcp->imp_dring_list = dringp; - mutex_exit(&ldcp->imp_dlist_lock); - - dringp->status = LDC_MAPPED; - - *dhandle = (ldc_dring_handle_t)dringp; - - return (0); -} - -/* - * Unmap a descriptor ring. Free shadow memory (if any). - */ -int -ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) -{ - ldc_dring_t *dringp; - ldc_dring_t *tmp_dringp; - ldc_chan_t *ldcp; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_unmap: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - - if (dringp->status != LDC_MAPPED) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_unmap: not a mapped desc ring\n"); - return (EINVAL); - } - - mutex_enter(&dringp->lock); - - ldcp = dringp->ldcp; - - mutex_enter(&ldcp->imp_dlist_lock); - - /* find and unlink the desc ring from channel import list */ - tmp_dringp = ldcp->imp_dring_list; - if (tmp_dringp == dringp) { - ldcp->imp_dring_list = dringp->ch_next; - dringp->ch_next = NULL; - - } else { - while (tmp_dringp != NULL) { - if (tmp_dringp->ch_next == dringp) { - tmp_dringp->ch_next = dringp->ch_next; - dringp->ch_next = NULL; - break; - } - tmp_dringp = tmp_dringp->ch_next; - } - if (tmp_dringp == NULL) { - DWARN(DBG_ALL_LDCS, - "ldc_mem_dring_unmap: invalid descriptor\n"); - mutex_exit(&ldcp->imp_dlist_lock); - mutex_exit(&dringp->lock); - return (EINVAL); - } - } - - mutex_exit(&ldcp->imp_dlist_lock); - - /* do a LDC memory handle unmap and free */ - (void) ldc_mem_unmap(dringp->mhdl); - (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); - - dringp->status = 0; - dringp->ldcp = NULL; - - mutex_exit(&dringp->lock); - - /* destroy dring lock */ - mutex_destroy(&dringp->lock); - - /* free desc ring object */ - kmem_free(dringp, sizeof (ldc_dring_t)); - - return (0); -} - -/* - * Internal entry point for descriptor ring access entry consistency - * semantics. Acquire copies the contents of the remote descriptor ring - * into the local shadow copy. The release operation copies the local - * contents into the remote dring. The start and end locations specify - * bounds for the entries being synchronized. - */ -static int -i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, - uint8_t direction, uint64_t start, uint64_t end) -{ - int err; - ldc_dring_t *dringp; - ldc_chan_t *ldcp; - uint64_t soff; - size_t copy_size; - - if (dhandle == NULL) { - DWARN(DBG_ALL_LDCS, - "i_ldc_dring_acquire_release: invalid desc ring handle\n"); - return (EINVAL); - } - dringp = (ldc_dring_t *)dhandle; - mutex_enter(&dringp->lock); - - if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { - DWARN(DBG_ALL_LDCS, - "i_ldc_dring_acquire_release: not a mapped desc ring\n"); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - if (start >= dringp->length || end >= dringp->length) { - DWARN(DBG_ALL_LDCS, - "i_ldc_dring_acquire_release: index out of range\n"); - mutex_exit(&dringp->lock); - return (EINVAL); - } - - /* get the channel handle */ - ldcp = dringp->ldcp; - - copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : - ((dringp->length - start) * dringp->dsize); - - /* Calculate the relative offset for the first desc */ - soff = (start * dringp->dsize); - - /* copy to/from remote from/to local memory */ - D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", - soff, copy_size); - err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, - direction, soff, copy_size); - if (err) { - DWARN(ldcp->id, - "i_ldc_dring_acquire_release: copy failed\n"); - mutex_exit(&dringp->lock); - return (err); - } - - /* do the balance */ - if (start > end) { - copy_size = ((end + 1) * dringp->dsize); - soff = 0; - - /* copy to/from remote from/to local memory */ - D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " - "off=0x%llx sz=0x%llx\n", soff, copy_size); - err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, - direction, soff, copy_size); - if (err) { - DWARN(ldcp->id, - "i_ldc_dring_acquire_release: copy failed\n"); - mutex_exit(&dringp->lock); - return (err); - } - } - - mutex_exit(&dringp->lock); - - return (0); -} - -/* - * Ensure that the contents in the local dring are consistent - * with the contents if of remote dring - */ -int -ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) -{ - return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); -} - -/* - * Ensure that the contents in the remote dring are consistent - * with the contents if of local dring - */ -int -ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) -{ - return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); -} - - -/* ------------------------------------------------------------------------- */ diff --git a/usr/src/uts/sun4v/io/ldc_shm.c b/usr/src/uts/sun4v/io/ldc_shm.c new file mode 100644 index 0000000000..26d03c0521 --- /dev/null +++ b/usr/src/uts/sun4v/io/ldc_shm.c @@ -0,0 +1,2236 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * sun4v LDC Link Layer Shared Memory Routines + */ +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/cmn_err.h> +#include <sys/ksynch.h> +#include <sys/debug.h> +#include <sys/cyclic.h> +#include <sys/machsystm.h> +#include <sys/vm.h> +#include <sys/machcpuvar.h> +#include <sys/mmu.h> +#include <sys/pte.h> +#include <vm/hat.h> +#include <vm/as.h> +#include <vm/hat_sfmmu.h> +#include <sys/vm_machparam.h> +#include <vm/seg_kmem.h> +#include <vm/seg_kpm.h> +#include <sys/hypervisor_api.h> +#include <sys/ldc.h> +#include <sys/ldc_impl.h> + +/* LDC variables used by shared memory routines */ +extern ldc_soft_state_t *ldcssp; +extern int ldc_max_retries; +extern clock_t ldc_delay; + +#ifdef DEBUG +extern int ldcdbg; +#endif + +/* LDC internal functions used by shared memory routines */ +extern void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); +extern int i_ldc_h2v_error(int h_error); + +#ifdef DEBUG +extern void ldcdebug(int64_t id, const char *fmt, ...); +#endif + +/* Memory synchronization internal functions */ +static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, + uint8_t direction, uint64_t offset, size_t size); +static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, + uint8_t direction, uint64_t start, uint64_t end); + +/* + * LDC framework supports mapping remote domain's memory + * either directly or via shadow memory pages. Default + * support is currently implemented via shadow copy. + * Direct map can be enabled by setting 'ldc_shmem_enabled' + */ +int ldc_shmem_enabled = 0; + +/* + * Pages exported for remote access over each channel is + * maintained in a table registered with the Hypervisor. + * The default number of entries in the table is set to + * 'ldc_mtbl_entries'. + */ +uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES; + +#define IDX2COOKIE(idx, pg_szc, pg_shift) \ + (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) + +/* + * Allocate a memory handle for the channel and link it into the list + * Also choose which memory table to use if this is the first handle + * being assigned to this channel + */ +int +ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle) +{ + ldc_chan_t *ldcp; + ldc_mhdl_t *mhdl; + + if (handle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_alloc_handle: invalid channel handle\n"); + return (EINVAL); + } + ldcp = (ldc_chan_t *)handle; + + mutex_enter(&ldcp->lock); + + /* check to see if channel is initalized */ + if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { + DWARN(ldcp->id, + "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n", + ldcp->id); + mutex_exit(&ldcp->lock); + return (EINVAL); + } + + /* allocate handle for channel */ + mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP); + + /* initialize the lock */ + mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL); + + mhdl->myshadow = B_FALSE; + mhdl->memseg = NULL; + mhdl->ldcp = ldcp; + mhdl->status = LDC_UNBOUND; + + /* insert memory handle (@ head) into list */ + if (ldcp->mhdl_list == NULL) { + ldcp->mhdl_list = mhdl; + mhdl->next = NULL; + } else { + /* insert @ head */ + mhdl->next = ldcp->mhdl_list; + ldcp->mhdl_list = mhdl; + } + + /* return the handle */ + *mhandle = (ldc_mem_handle_t)mhdl; + + mutex_exit(&ldcp->lock); + + D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n", + ldcp->id, mhdl); + + return (0); +} + +/* + * Free memory handle for the channel and unlink it from the list + */ +int +ldc_mem_free_handle(ldc_mem_handle_t mhandle) +{ + ldc_mhdl_t *mhdl, *phdl; + ldc_chan_t *ldcp; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_free_handle: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + mutex_enter(&mhdl->lock); + + ldcp = mhdl->ldcp; + + if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { + DWARN(ldcp->id, + "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n", + mhdl); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + mutex_exit(&mhdl->lock); + + mutex_enter(&ldcp->mlist_lock); + + phdl = ldcp->mhdl_list; + + /* first handle */ + if (phdl == mhdl) { + ldcp->mhdl_list = mhdl->next; + mutex_destroy(&mhdl->lock); + kmem_cache_free(ldcssp->memhdl_cache, mhdl); + + D1(ldcp->id, + "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n", + ldcp->id, mhdl); + } else { + /* walk the list - unlink and free */ + while (phdl != NULL) { + if (phdl->next == mhdl) { + phdl->next = mhdl->next; + mutex_destroy(&mhdl->lock); + kmem_cache_free(ldcssp->memhdl_cache, mhdl); + D1(ldcp->id, + "ldc_mem_free_handle: (0x%llx) freed " + "handle 0x%llx\n", ldcp->id, mhdl); + break; + } + phdl = phdl->next; + } + } + + if (phdl == NULL) { + DWARN(ldcp->id, + "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl); + mutex_exit(&ldcp->mlist_lock); + return (EINVAL); + } + + mutex_exit(&ldcp->mlist_lock); + + return (0); +} + +/* + * Bind a memory handle to a virtual address. + * The virtual address is converted to the corresponding real addresses. + * Returns pointer to the first ldc_mem_cookie and the total number + * of cookies for this virtual address. Other cookies can be obtained + * using the ldc_mem_nextcookie() call. If the pages are stored in + * consecutive locations in the table, a single cookie corresponding to + * the first location is returned. The cookie size spans all the entries. + * + * If the VA corresponds to a page that is already being exported, reuse + * the page and do not export it again. Bump the page's use count. + */ +int +ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len, + uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) +{ + ldc_mhdl_t *mhdl; + ldc_chan_t *ldcp; + ldc_mtbl_t *mtbl; + ldc_memseg_t *memseg; + ldc_mte_t tmp_mte; + uint64_t index, prev_index = 0; + int64_t cookie_idx; + uintptr_t raddr, ra_aligned; + uint64_t psize, poffset, v_offset; + uint64_t pg_shift, pg_size, pg_size_code, pg_mask; + pgcnt_t npages; + caddr_t v_align, addr; + int i, rv; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_bind_handle: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + ldcp = mhdl->ldcp; + + /* clear count */ + *ccount = 0; + + mutex_enter(&mhdl->lock); + + if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) { + DWARN(ldcp->id, + "ldc_mem_bind_handle: (0x%x) handle already bound\n", + mhandle); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + /* Force address and size to be 8-byte aligned */ + if ((((uintptr_t)vaddr | len) & 0x7) != 0) { + DWARN(ldcp->id, + "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n"); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + /* + * If this channel is binding a memory handle for the + * first time allocate it a memory map table and initialize it + */ + if ((mtbl = ldcp->mtbl) == NULL) { + + mutex_enter(&ldcp->lock); + + /* Allocate and initialize the map table structure */ + mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP); + mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries; + mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t); + mtbl->next_entry = NULL; + mtbl->contigmem = B_TRUE; + + /* Allocate the table itself */ + mtbl->table = (ldc_mte_slot_t *) + contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE); + if (mtbl->table == NULL) { + + /* allocate a page of memory using kmem_alloc */ + mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP); + mtbl->size = MMU_PAGESIZE; + mtbl->contigmem = B_FALSE; + mtbl->num_entries = mtbl->num_avail = + mtbl->size / sizeof (ldc_mte_slot_t); + DWARN(ldcp->id, + "ldc_mem_bind_handle: (0x%llx) reduced tbl size " + "to %lx entries\n", ldcp->id, mtbl->num_entries); + } + + /* zero out the memory */ + bzero(mtbl->table, mtbl->size); + + /* initialize the lock */ + mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL); + + /* register table for this channel */ + rv = hv_ldc_set_map_table(ldcp->id, + va_to_pa(mtbl->table), mtbl->num_entries); + if (rv != 0) { + cmn_err(CE_WARN, + "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl", + ldcp->id, rv); + if (mtbl->contigmem) + contig_mem_free(mtbl->table, mtbl->size); + else + kmem_free(mtbl->table, mtbl->size); + mutex_destroy(&mtbl->lock); + kmem_free(mtbl, sizeof (ldc_mtbl_t)); + mutex_exit(&ldcp->lock); + mutex_exit(&mhdl->lock); + return (EIO); + } + + ldcp->mtbl = mtbl; + mutex_exit(&ldcp->lock); + + D1(ldcp->id, + "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n", + ldcp->id, ldcp->mtbl->table); + } + + /* FUTURE: get the page size, pgsz code, and shift */ + pg_size = MMU_PAGESIZE; + pg_size_code = page_szc(pg_size); + pg_shift = page_get_shift(pg_size_code); + pg_mask = ~(pg_size - 1); + + D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding " + "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", + ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); + + /* aligned VA and its offset */ + v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1)); + v_offset = ((uintptr_t)vaddr) & (pg_size - 1); + + npages = (len+v_offset)/pg_size; + npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1; + + D1(ldcp->id, "ldc_mem_bind_handle: binding " + "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", + ldcp->id, vaddr, v_align, v_offset, npages); + + /* lock the memory table - exclusive access to channel */ + mutex_enter(&mtbl->lock); + + if (npages > mtbl->num_avail) { + D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n", + ldcp->id); + mutex_exit(&mtbl->lock); + mutex_exit(&mhdl->lock); + return (ENOMEM); + } + + /* Allocate a memseg structure */ + memseg = mhdl->memseg = + kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); + + /* Allocate memory to store all pages and cookies */ + memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); + memseg->cookies = + kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP); + + D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n", + ldcp->id, npages); + + addr = v_align; + + /* + * Check if direct shared memory map is enabled, if not change + * the mapping type to include SHADOW_MAP. + */ + if (ldc_shmem_enabled == 0) + mtype = LDC_SHADOW_MAP; + + /* + * Table slots are used in a round-robin manner. The algorithm permits + * inserting duplicate entries. Slots allocated earlier will typically + * get freed before we get back to reusing the slot.Inserting duplicate + * entries should be OK as we only lookup entries using the cookie addr + * i.e. tbl index, during export, unexport and copy operation. + * + * One implementation what was tried was to search for a duplicate + * page entry first and reuse it. The search overhead is very high and + * in the vnet case dropped the perf by almost half, 50 to 24 mbps. + * So it does make sense to avoid searching for duplicates. + * + * But during the process of searching for a free slot, if we find a + * duplicate entry we will go ahead and use it, and bump its use count. + */ + + /* index to start searching from */ + index = mtbl->next_entry; + cookie_idx = -1; + + tmp_mte.ll = 0; /* initialise fields to 0 */ + + if (mtype & LDC_DIRECT_MAP) { + tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0; + tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0; + tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0; + } + + if (mtype & LDC_SHADOW_MAP) { + tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0; + tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0; + } + + if (mtype & LDC_IO_MAP) { + tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0; + tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0; + } + + D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); + + tmp_mte.mte_pgszc = pg_size_code; + + /* initialize each mem table entry */ + for (i = 0; i < npages; i++) { + + /* check if slot is available in the table */ + while (mtbl->table[index].entry.ll != 0) { + + index = (index + 1) % mtbl->num_entries; + + if (index == mtbl->next_entry) { + /* we have looped around */ + DWARN(DBG_ALL_LDCS, + "ldc_mem_bind_handle: (0x%llx) cannot find " + "entry\n", ldcp->id); + *ccount = 0; + + /* NOTE: free memory, remove previous entries */ + /* this shouldnt happen as num_avail was ok */ + + mutex_exit(&mtbl->lock); + mutex_exit(&mhdl->lock); + return (ENOMEM); + } + } + + /* get the real address */ + raddr = va_to_pa((void *)addr); + ra_aligned = ((uintptr_t)raddr & pg_mask); + + /* build the mte */ + tmp_mte.mte_rpfn = ra_aligned >> pg_shift; + + D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll); + + /* update entry in table */ + mtbl->table[index].entry = tmp_mte; + + D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx" + " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index); + + /* calculate the size and offset for this export range */ + if (i == 0) { + /* first page */ + psize = min((pg_size - v_offset), len); + poffset = v_offset; + + } else if (i == (npages - 1)) { + /* last page */ + psize = (((uintptr_t)(vaddr + len)) & + ((uint64_t)(pg_size-1))); + if (psize == 0) + psize = pg_size; + poffset = 0; + + } else { + /* middle pages */ + psize = pg_size; + poffset = 0; + } + + /* store entry for this page */ + memseg->pages[i].index = index; + memseg->pages[i].raddr = raddr; + memseg->pages[i].offset = poffset; + memseg->pages[i].size = psize; + memseg->pages[i].mte = &(mtbl->table[index]); + + /* create the cookie */ + if (i == 0 || (index != prev_index + 1)) { + cookie_idx++; + memseg->cookies[cookie_idx].addr = + IDX2COOKIE(index, pg_size_code, pg_shift); + memseg->cookies[cookie_idx].addr |= poffset; + memseg->cookies[cookie_idx].size = psize; + + } else { + memseg->cookies[cookie_idx].size += psize; + } + + D1(ldcp->id, "ldc_mem_bind_handle: bound " + "(0x%llx) va=0x%llx, idx=0x%llx, " + "ra=0x%llx(sz=0x%x,off=0x%x)\n", + ldcp->id, addr, index, raddr, psize, poffset); + + /* decrement number of available entries */ + mtbl->num_avail--; + + /* increment va by page size */ + addr += pg_size; + + /* increment index */ + prev_index = index; + index = (index + 1) % mtbl->num_entries; + + /* save the next slot */ + mtbl->next_entry = index; + } + + mutex_exit(&mtbl->lock); + + /* memory handle = bound */ + mhdl->mtype = mtype; + mhdl->perm = perm; + mhdl->status = LDC_BOUND; + + /* update memseg_t */ + memseg->vaddr = vaddr; + memseg->raddr = memseg->pages[0].raddr; + memseg->size = len; + memseg->npages = npages; + memseg->ncookies = cookie_idx + 1; + memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0; + + /* return count and first cookie */ + *ccount = memseg->ncookies; + cookie->addr = memseg->cookies[0].addr; + cookie->size = memseg->cookies[0].size; + + D1(ldcp->id, + "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, " + "pgs=0x%llx cookies=0x%llx\n", + ldcp->id, mhdl, vaddr, npages, memseg->ncookies); + + mutex_exit(&mhdl->lock); + return (0); +} + +/* + * Return the next cookie associated with the specified memory handle + */ +int +ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie) +{ + ldc_mhdl_t *mhdl; + ldc_chan_t *ldcp; + ldc_memseg_t *memseg; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_nextcookie: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + mutex_enter(&mhdl->lock); + + ldcp = mhdl->ldcp; + memseg = mhdl->memseg; + + if (cookie == 0) { + DWARN(ldcp->id, + "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n", + ldcp->id); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + if (memseg->next_cookie != 0) { + cookie->addr = memseg->cookies[memseg->next_cookie].addr; + cookie->size = memseg->cookies[memseg->next_cookie].size; + memseg->next_cookie++; + if (memseg->next_cookie == memseg->ncookies) + memseg->next_cookie = 0; + + } else { + DWARN(ldcp->id, + "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id); + cookie->addr = 0; + cookie->size = 0; + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + D1(ldcp->id, + "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n", + ldcp->id, cookie->addr, cookie->size); + + mutex_exit(&mhdl->lock); + return (0); +} + +/* + * Unbind the virtual memory region associated with the specified + * memory handle. Allassociated cookies are freed and the corresponding + * RA space is no longer exported. + */ +int +ldc_mem_unbind_handle(ldc_mem_handle_t mhandle) +{ + ldc_mhdl_t *mhdl; + ldc_chan_t *ldcp; + ldc_mtbl_t *mtbl; + ldc_memseg_t *memseg; + uint64_t cookie_addr; + uint64_t pg_shift, pg_size_code; + int i, rv; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_unbind_handle: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + mutex_enter(&mhdl->lock); + + if (mhdl->status == LDC_UNBOUND) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_unbind_handle: (0x%x) handle is not bound\n", + mhandle); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + ldcp = mhdl->ldcp; + mtbl = ldcp->mtbl; + + memseg = mhdl->memseg; + + /* lock the memory table - exclusive access to channel */ + mutex_enter(&mtbl->lock); + + /* undo the pages exported */ + for (i = 0; i < memseg->npages; i++) { + + /* check for mapped pages, revocation cookie != 0 */ + if (memseg->pages[i].mte->cookie) { + + pg_size_code = page_szc(memseg->pages[i].size); + pg_shift = page_get_shift(memseg->pages[i].size); + cookie_addr = IDX2COOKIE(memseg->pages[i].index, + pg_size_code, pg_shift); + + D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke " + "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id, + cookie_addr, memseg->pages[i].mte->cookie); + rv = hv_ldc_revoke(ldcp->id, cookie_addr, + memseg->pages[i].mte->cookie); + if (rv) { + DWARN(ldcp->id, + "ldc_mem_unbind_handle: (0x%llx) cannot " + "revoke mapping, cookie %llx\n", ldcp->id, + cookie_addr); + } + } + + /* clear the entry from the table */ + memseg->pages[i].mte->entry.ll = 0; + mtbl->num_avail++; + } + mutex_exit(&mtbl->lock); + + /* free the allocated memseg and page structures */ + kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); + kmem_free(memseg->cookies, + (sizeof (ldc_mem_cookie_t) * memseg->npages)); + kmem_cache_free(ldcssp->memseg_cache, memseg); + + /* uninitialize the memory handle */ + mhdl->memseg = NULL; + mhdl->status = LDC_UNBOUND; + + D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n", + ldcp->id, mhdl); + + mutex_exit(&mhdl->lock); + return (0); +} + +/* + * Get information about the dring. The base address of the descriptor + * ring along with the type and permission are returned back. + */ +int +ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo) +{ + ldc_mhdl_t *mhdl; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + if (minfo == NULL) { + DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n"); + return (EINVAL); + } + + mutex_enter(&mhdl->lock); + + minfo->status = mhdl->status; + if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) { + minfo->vaddr = mhdl->memseg->vaddr; + minfo->raddr = mhdl->memseg->raddr; + minfo->mtype = mhdl->mtype; + minfo->perm = mhdl->perm; + } + mutex_exit(&mhdl->lock); + + return (0); +} + +/* + * Copy data either from or to the client specified virtual address + * space to or from the exported memory associated with the cookies. + * The direction argument determines whether the data is read from or + * written to exported memory. + */ +int +ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size, + ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction) +{ + ldc_chan_t *ldcp; + uint64_t local_voff, local_valign; + uint64_t cookie_addr, cookie_size; + uint64_t pg_shift, pg_size, pg_size_code; + uint64_t export_caddr, export_poff, export_psize, export_size; + uint64_t local_ra, local_poff, local_psize; + uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0; + pgcnt_t npages; + size_t len = *size; + int i, rv = 0; + + uint64_t chid; + + if (handle == NULL) { + DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n"); + return (EINVAL); + } + ldcp = (ldc_chan_t *)handle; + chid = ldcp->id; + + /* check to see if channel is UP */ + if (ldcp->tstate != TS_UP) { + DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n", + chid); + return (ECONNRESET); + } + + /* Force address and size to be 8-byte aligned */ + if ((((uintptr_t)vaddr | len) & 0x7) != 0) { + DWARN(chid, + "ldc_mem_copy: addr/sz is not 8-byte aligned\n"); + return (EINVAL); + } + + /* Find the size of the exported memory */ + export_size = 0; + for (i = 0; i < ccount; i++) + export_size += cookies[i].size; + + /* check to see if offset is valid */ + if (off > export_size) { + DWARN(chid, + "ldc_mem_copy: (0x%llx) start offset > export mem size\n", + chid); + return (EINVAL); + } + + /* + * Check to see if the export size is smaller than the size we + * are requesting to copy - if so flag an error + */ + if ((export_size - off) < *size) { + DWARN(chid, + "ldc_mem_copy: (0x%llx) copy size > export mem size\n", + chid); + return (EINVAL); + } + + total_bal = min(export_size, *size); + + /* FUTURE: get the page size, pgsz code, and shift */ + pg_size = MMU_PAGESIZE; + pg_size_code = page_szc(pg_size); + pg_shift = page_get_shift(pg_size_code); + + D1(chid, "ldc_mem_copy: copying data " + "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", + chid, vaddr, pg_size, pg_size_code, pg_shift); + + /* aligned VA and its offset */ + local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1)); + local_voff = ((uintptr_t)vaddr) & (pg_size - 1); + + npages = (len+local_voff)/pg_size; + npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1; + + D1(chid, + "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n", + chid, vaddr, local_valign, local_voff, npages); + + local_ra = va_to_pa((void *)local_valign); + local_poff = local_voff; + local_psize = min(len, (pg_size - local_voff)); + + len -= local_psize; + + /* + * find the first cookie in the list of cookies + * if the offset passed in is not zero + */ + for (idx = 0; idx < ccount; idx++) { + cookie_size = cookies[idx].size; + if (off < cookie_size) + break; + off -= cookie_size; + } + + cookie_addr = cookies[idx].addr + off; + cookie_size = cookies[idx].size - off; + + export_caddr = cookie_addr & ~(pg_size - 1); + export_poff = cookie_addr & (pg_size - 1); + export_psize = min(cookie_size, (pg_size - export_poff)); + + for (;;) { + + copy_size = min(export_psize, local_psize); + + D1(chid, + "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx," + " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx," + " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," + " total_bal=0x%llx\n", + chid, direction, export_caddr, local_ra, export_poff, + local_poff, export_psize, local_psize, copy_size, + total_bal); + + rv = hv_ldc_copy(chid, direction, + (export_caddr + export_poff), (local_ra + local_poff), + copy_size, &copied_len); + + if (rv != 0) { + int error = EIO; + uint64_t rx_hd, rx_tl; + + DWARN(chid, + "ldc_mem_copy: (0x%llx) err %d during copy\n", + (unsigned long long)chid, rv); + DWARN(chid, + "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, " + "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx," + " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx," + " copied_len=0x%lx, total_bal=0x%lx\n", + chid, direction, export_caddr, local_ra, + export_poff, local_poff, export_psize, local_psize, + copy_size, copied_len, total_bal); + + *size = *size - total_bal; + + /* + * check if reason for copy error was due to + * a channel reset. we need to grab the lock + * just in case we have to do a reset. + */ + mutex_enter(&ldcp->lock); + mutex_enter(&ldcp->tx_lock); + + rv = hv_ldc_rx_get_state(ldcp->id, + &rx_hd, &rx_tl, &(ldcp->link_state)); + if (ldcp->link_state == LDC_CHANNEL_DOWN || + ldcp->link_state == LDC_CHANNEL_RESET) { + i_ldc_reset(ldcp, B_FALSE); + error = ECONNRESET; + } + + mutex_exit(&ldcp->tx_lock); + mutex_exit(&ldcp->lock); + + return (error); + } + + ASSERT(copied_len <= copy_size); + + D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len); + export_poff += copied_len; + local_poff += copied_len; + export_psize -= copied_len; + local_psize -= copied_len; + cookie_size -= copied_len; + + total_bal -= copied_len; + + if (copy_size != copied_len) + continue; + + if (export_psize == 0 && total_bal != 0) { + + if (cookie_size == 0) { + idx++; + cookie_addr = cookies[idx].addr; + cookie_size = cookies[idx].size; + + export_caddr = cookie_addr & ~(pg_size - 1); + export_poff = cookie_addr & (pg_size - 1); + export_psize = + min(cookie_size, (pg_size-export_poff)); + } else { + export_caddr += pg_size; + export_poff = 0; + export_psize = min(cookie_size, pg_size); + } + } + + if (local_psize == 0 && total_bal != 0) { + local_valign += pg_size; + local_ra = va_to_pa((void *)local_valign); + local_poff = 0; + local_psize = min(pg_size, len); + len -= local_psize; + } + + /* check if we are all done */ + if (total_bal == 0) + break; + } + + + D1(chid, + "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n", + chid, *size); + + return (0); +} + +/* + * Copy data either from or to the client specified virtual address + * space to or from HV physical memory. + * + * The direction argument determines whether the data is read from or + * written to HV memory. direction values are LDC_COPY_IN/OUT similar + * to the ldc_mem_copy interface + */ +int +ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size, + caddr_t paddr, uint8_t direction) +{ + ldc_chan_t *ldcp; + uint64_t local_voff, local_valign; + uint64_t pg_shift, pg_size, pg_size_code; + uint64_t target_pa, target_poff, target_psize, target_size; + uint64_t local_ra, local_poff, local_psize; + uint64_t copy_size, copied_len = 0; + pgcnt_t npages; + size_t len = *size; + int rv = 0; + + if (handle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_rdwr_cookie: invalid channel handle\n"); + return (EINVAL); + } + ldcp = (ldc_chan_t *)handle; + + mutex_enter(&ldcp->lock); + + /* check to see if channel is UP */ + if (ldcp->tstate != TS_UP) { + DWARN(ldcp->id, + "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n", + ldcp->id); + mutex_exit(&ldcp->lock); + return (ECONNRESET); + } + + /* Force address and size to be 8-byte aligned */ + if ((((uintptr_t)vaddr | len) & 0x7) != 0) { + DWARN(ldcp->id, + "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n"); + mutex_exit(&ldcp->lock); + return (EINVAL); + } + + target_size = *size; + + /* FUTURE: get the page size, pgsz code, and shift */ + pg_size = MMU_PAGESIZE; + pg_size_code = page_szc(pg_size); + pg_shift = page_get_shift(pg_size_code); + + D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data " + "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n", + ldcp->id, vaddr, pg_size, pg_size_code, pg_shift); + + /* aligned VA and its offset */ + local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1); + local_voff = ((uintptr_t)vaddr) & (pg_size - 1); + + npages = (len + local_voff) / pg_size; + npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1; + + D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, " + "val=0x%llx,off=0x%x,pgs=0x%x\n", + ldcp->id, vaddr, local_valign, local_voff, npages); + + local_ra = va_to_pa((void *)local_valign); + local_poff = local_voff; + local_psize = min(len, (pg_size - local_voff)); + + len -= local_psize; + + target_pa = ((uintptr_t)paddr) & ~(pg_size - 1); + target_poff = ((uintptr_t)paddr) & (pg_size - 1); + target_psize = pg_size - target_poff; + + for (;;) { + + copy_size = min(target_psize, local_psize); + + D1(ldcp->id, + "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx," + " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx," + " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx," + " total_bal=0x%llx\n", + ldcp->id, direction, target_pa, local_ra, target_poff, + local_poff, target_psize, local_psize, copy_size, + target_size); + + rv = hv_ldc_copy(ldcp->id, direction, + (target_pa + target_poff), (local_ra + local_poff), + copy_size, &copied_len); + + if (rv != 0) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n", + ldcp->id, rv); + DWARN(DBG_ALL_LDCS, + "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, " + "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, " + "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, " + "copy_sz=0x%llx, total_bal=0x%llx\n", + ldcp->id, direction, target_pa, local_ra, + target_poff, local_poff, target_psize, local_psize, + copy_size, target_size); + + *size = *size - target_size; + mutex_exit(&ldcp->lock); + return (i_ldc_h2v_error(rv)); + } + + D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n", + copied_len); + target_poff += copied_len; + local_poff += copied_len; + target_psize -= copied_len; + local_psize -= copied_len; + + target_size -= copied_len; + + if (copy_size != copied_len) + continue; + + if (target_psize == 0 && target_size != 0) { + target_pa += pg_size; + target_poff = 0; + target_psize = min(pg_size, target_size); + } + + if (local_psize == 0 && target_size != 0) { + local_valign += pg_size; + local_ra = va_to_pa((void *)local_valign); + local_poff = 0; + local_psize = min(pg_size, len); + len -= local_psize; + } + + /* check if we are all done */ + if (target_size == 0) + break; + } + + mutex_exit(&ldcp->lock); + + D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n", + ldcp->id, *size); + + return (0); +} + +/* + * Map an exported memory segment into the local address space. If the + * memory range was exported for direct map access, a HV call is made + * to allocate a RA range. If the map is done via a shadow copy, local + * shadow memory is allocated and the base VA is returned in 'vaddr'. If + * the mapping is a direct map then the RA is returned in 'raddr'. + */ +int +ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount, + uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr) +{ + int i, j, idx, rv, retries; + ldc_chan_t *ldcp; + ldc_mhdl_t *mhdl; + ldc_memseg_t *memseg; + caddr_t tmpaddr; + uint64_t map_perm = perm; + uint64_t pg_size, pg_shift, pg_size_code, pg_mask; + uint64_t exp_size = 0, base_off, map_size, npages; + uint64_t cookie_addr, cookie_off, cookie_size; + tte_t ldc_tte; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + mutex_enter(&mhdl->lock); + + if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED || + mhdl->memseg != NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + ldcp = mhdl->ldcp; + + mutex_enter(&ldcp->lock); + + if (ldcp->tstate != TS_UP) { + DWARN(ldcp->id, + "ldc_mem_dring_map: (0x%llx) channel is not UP\n", + ldcp->id); + mutex_exit(&ldcp->lock); + mutex_exit(&mhdl->lock); + return (ECONNRESET); + } + + if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { + DWARN(ldcp->id, "ldc_mem_map: invalid map type\n"); + mutex_exit(&ldcp->lock); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n", + ldcp->id, cookie->addr, cookie->size); + + /* FUTURE: get the page size, pgsz code, and shift */ + pg_size = MMU_PAGESIZE; + pg_size_code = page_szc(pg_size); + pg_shift = page_get_shift(pg_size_code); + pg_mask = ~(pg_size - 1); + + /* calculate the number of pages in the exported cookie */ + base_off = cookie[0].addr & (pg_size - 1); + for (idx = 0; idx < ccount; idx++) + exp_size += cookie[idx].size; + map_size = P2ROUNDUP((exp_size + base_off), pg_size); + npages = (map_size >> pg_shift); + + /* Allocate memseg structure */ + memseg = mhdl->memseg = + kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP); + + /* Allocate memory to store all pages and cookies */ + memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP); + memseg->cookies = + kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP); + + D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx," + "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages); + + /* + * Check if direct map over shared memory is enabled, if not change + * the mapping type to SHADOW_MAP. + */ + if (ldc_shmem_enabled == 0) + mtype = LDC_SHADOW_MAP; + + /* + * Check to see if the client is requesting direct or shadow map + * If direct map is requested, try to map remote memory first, + * and if that fails, revert to shadow map + */ + if (mtype == LDC_DIRECT_MAP) { + + /* Allocate kernel virtual space for mapping */ + memseg->vaddr = vmem_xalloc(heap_arena, map_size, + pg_size, 0, 0, NULL, NULL, VM_NOSLEEP); + if (memseg->vaddr == NULL) { + cmn_err(CE_WARN, + "ldc_mem_map: (0x%lx) memory map failed\n", + ldcp->id); + kmem_free(memseg->cookies, + (sizeof (ldc_mem_cookie_t) * ccount)); + kmem_free(memseg->pages, + (sizeof (ldc_page_t) * npages)); + kmem_cache_free(ldcssp->memseg_cache, memseg); + + mutex_exit(&ldcp->lock); + mutex_exit(&mhdl->lock); + return (ENOMEM); + } + + /* Unload previous mapping */ + hat_unload(kas.a_hat, memseg->vaddr, map_size, + HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); + + /* for each cookie passed in - map into address space */ + idx = 0; + cookie_size = 0; + tmpaddr = memseg->vaddr; + + for (i = 0; i < npages; i++) { + + if (cookie_size == 0) { + ASSERT(idx < ccount); + cookie_addr = cookie[idx].addr & pg_mask; + cookie_off = cookie[idx].addr & (pg_size - 1); + cookie_size = + P2ROUNDUP((cookie_off + cookie[idx].size), + pg_size); + idx++; + } + + D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping " + "cookie 0x%llx, bal=0x%llx\n", ldcp->id, + cookie_addr, cookie_size); + + /* map the cookie into address space */ + for (retries = 0; retries < ldc_max_retries; + retries++) { + + rv = hv_ldc_mapin(ldcp->id, cookie_addr, + &memseg->pages[i].raddr, &map_perm); + if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY) + break; + + drv_usecwait(ldc_delay); + } + + if (rv || memseg->pages[i].raddr == 0) { + DWARN(ldcp->id, + "ldc_mem_map: (0x%llx) hv mapin err %d\n", + ldcp->id, rv); + + /* remove previous mapins */ + hat_unload(kas.a_hat, memseg->vaddr, map_size, + HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK); + for (j = 0; j < i; j++) { + rv = hv_ldc_unmap( + memseg->pages[j].raddr); + if (rv) { + DWARN(ldcp->id, + "ldc_mem_map: (0x%llx) " + "cannot unmap ra=0x%llx\n", + ldcp->id, + memseg->pages[j].raddr); + } + } + + /* free kernel virtual space */ + vmem_free(heap_arena, (void *)memseg->vaddr, + map_size); + + /* direct map failed - revert to shadow map */ + mtype = LDC_SHADOW_MAP; + break; + + } else { + + D1(ldcp->id, + "ldc_mem_map: (0x%llx) vtop map 0x%llx -> " + "0x%llx, cookie=0x%llx, perm=0x%llx\n", + ldcp->id, tmpaddr, memseg->pages[i].raddr, + cookie_addr, perm); + + /* + * NOTE: Calling hat_devload directly, causes it + * to look for page_t using the pfn. Since this + * addr is greater than the memlist, it treates + * it as non-memory + */ + sfmmu_memtte(&ldc_tte, + (pfn_t)(memseg->pages[i].raddr >> pg_shift), + PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K); + + D1(ldcp->id, + "ldc_mem_map: (0x%llx) ra 0x%llx -> " + "tte 0x%llx\n", ldcp->id, + memseg->pages[i].raddr, ldc_tte); + + sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr, + NULL, HAT_LOAD_LOCK); + + cookie_size -= pg_size; + cookie_addr += pg_size; + tmpaddr += pg_size; + } + } + } + + if (mtype == LDC_SHADOW_MAP) { + if (*vaddr == NULL) { + memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP); + mhdl->myshadow = B_TRUE; + + D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated " + "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr); + } else { + /* + * Use client supplied memory for memseg->vaddr + * WARNING: assuming that client mem is >= exp_size + */ + memseg->vaddr = *vaddr; + } + + /* Save all page and cookie information */ + for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) { + memseg->pages[i].raddr = va_to_pa(tmpaddr); + memseg->pages[i].size = pg_size; + tmpaddr += pg_size; + } + + } + + /* save all cookies */ + bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t)); + + /* update memseg_t */ + memseg->raddr = memseg->pages[0].raddr; + memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size; + memseg->npages = npages; + memseg->ncookies = ccount; + memseg->next_cookie = 0; + + /* memory handle = mapped */ + mhdl->mtype = mtype; + mhdl->perm = perm; + mhdl->status = LDC_MAPPED; + + D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, " + "va=0x%llx, pgs=0x%llx cookies=0x%llx\n", + ldcp->id, mhdl, memseg->raddr, memseg->vaddr, + memseg->npages, memseg->ncookies); + + if (mtype == LDC_SHADOW_MAP) + base_off = 0; + if (raddr) + *raddr = (caddr_t)(memseg->raddr | base_off); + if (vaddr) + *vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off); + + mutex_exit(&ldcp->lock); + mutex_exit(&mhdl->lock); + return (0); +} + +/* + * Unmap a memory segment. Free shadow memory (if any). + */ +int +ldc_mem_unmap(ldc_mem_handle_t mhandle) +{ + int i, rv; + ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle; + ldc_chan_t *ldcp; + ldc_memseg_t *memseg; + + if (mhdl == 0 || mhdl->status != LDC_MAPPED) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_unmap: (0x%llx) handle is not mapped\n", + mhandle); + return (EINVAL); + } + + mutex_enter(&mhdl->lock); + + ldcp = mhdl->ldcp; + memseg = mhdl->memseg; + + D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n", + ldcp->id, mhdl); + + /* if we allocated shadow memory - free it */ + if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) { + kmem_free(memseg->vaddr, memseg->size); + } else if (mhdl->mtype == LDC_DIRECT_MAP) { + + /* unmap in the case of DIRECT_MAP */ + hat_unload(kas.a_hat, memseg->vaddr, memseg->size, + HAT_UNLOAD_UNLOCK); + + for (i = 0; i < memseg->npages; i++) { + rv = hv_ldc_unmap(memseg->pages[i].raddr); + if (rv) { + cmn_err(CE_WARN, + "ldc_mem_map: (0x%lx) hv unmap err %d\n", + ldcp->id, rv); + } + } + + vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size); + } + + /* free the allocated memseg and page structures */ + kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages)); + kmem_free(memseg->cookies, + (sizeof (ldc_mem_cookie_t) * memseg->ncookies)); + kmem_cache_free(ldcssp->memseg_cache, memseg); + + /* uninitialize the memory handle */ + mhdl->memseg = NULL; + mhdl->status = LDC_UNBOUND; + + D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n", + ldcp->id, mhdl); + + mutex_exit(&mhdl->lock); + return (0); +} + +/* + * Internal entry point for LDC mapped memory entry consistency + * semantics. Acquire copies the contents of the remote memory + * into the local shadow copy. The release operation copies the local + * contents into the remote memory. The offset and size specify the + * bounds for the memory range being synchronized. + */ +static int +i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction, + uint64_t offset, size_t size) +{ + int err; + ldc_mhdl_t *mhdl; + ldc_chan_t *ldcp; + ldc_memseg_t *memseg; + caddr_t local_vaddr; + size_t copy_size; + + if (mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "i_ldc_mem_acquire_release: invalid memory handle\n"); + return (EINVAL); + } + mhdl = (ldc_mhdl_t *)mhandle; + + mutex_enter(&mhdl->lock); + + if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) { + DWARN(DBG_ALL_LDCS, + "i_ldc_mem_acquire_release: not mapped memory\n"); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + /* do nothing for direct map */ + if (mhdl->mtype == LDC_DIRECT_MAP) { + mutex_exit(&mhdl->lock); + return (0); + } + + /* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */ + if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) || + (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) { + mutex_exit(&mhdl->lock); + return (0); + } + + if (offset >= mhdl->memseg->size || + (offset + size) > mhdl->memseg->size) { + DWARN(DBG_ALL_LDCS, + "i_ldc_mem_acquire_release: memory out of range\n"); + mutex_exit(&mhdl->lock); + return (EINVAL); + } + + /* get the channel handle and memory segment */ + ldcp = mhdl->ldcp; + memseg = mhdl->memseg; + + if (mhdl->mtype == LDC_SHADOW_MAP) { + + local_vaddr = memseg->vaddr + offset; + copy_size = size; + + /* copy to/from remote from/to local memory */ + err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset, + ©_size, memseg->cookies, memseg->ncookies, + direction); + if (err || copy_size != size) { + DWARN(ldcp->id, + "i_ldc_mem_acquire_release: copy failed\n"); + mutex_exit(&mhdl->lock); + return (err); + } + } + + mutex_exit(&mhdl->lock); + + return (0); +} + +/* + * Ensure that the contents in the remote memory seg are consistent + * with the contents if of local segment + */ +int +ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) +{ + return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size)); +} + + +/* + * Ensure that the contents in the local memory seg are consistent + * with the contents if of remote segment + */ +int +ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size) +{ + return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size)); +} + +/* + * Allocate a descriptor ring. The size of each each descriptor + * must be 8-byte aligned and the entire ring should be a multiple + * of MMU_PAGESIZE. + */ +int +ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle) +{ + ldc_dring_t *dringp; + size_t size = (dsize * len); + + D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n", + len, dsize); + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n"); + return (EINVAL); + } + + if (len == 0) { + DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n"); + return (EINVAL); + } + + /* descriptor size should be 8-byte aligned */ + if (dsize == 0 || (dsize & 0x7)) { + DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n"); + return (EINVAL); + } + + *dhandle = 0; + + /* Allocate a desc ring structure */ + dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); + + /* Initialize dring */ + dringp->length = len; + dringp->dsize = dsize; + + /* round off to multiple of pagesize */ + dringp->size = (size & MMU_PAGEMASK); + if (size & MMU_PAGEOFFSET) + dringp->size += MMU_PAGESIZE; + + dringp->status = LDC_UNBOUND; + + /* allocate descriptor ring memory */ + dringp->base = kmem_zalloc(dringp->size, KM_SLEEP); + + /* initialize the desc ring lock */ + mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); + + /* Add descriptor ring to the head of global list */ + mutex_enter(&ldcssp->lock); + dringp->next = ldcssp->dring_list; + ldcssp->dring_list = dringp; + mutex_exit(&ldcssp->lock); + + *dhandle = (ldc_dring_handle_t)dringp; + + D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n"); + + return (0); +} + + +/* + * Destroy a descriptor ring. + */ +int +ldc_mem_dring_destroy(ldc_dring_handle_t dhandle) +{ + ldc_dring_t *dringp; + ldc_dring_t *tmp_dringp; + + D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n"); + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_destroy: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + + if (dringp->status == LDC_BOUND) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_destroy: desc ring is bound\n"); + return (EACCES); + } + + mutex_enter(&dringp->lock); + mutex_enter(&ldcssp->lock); + + /* remove from linked list - if not bound */ + tmp_dringp = ldcssp->dring_list; + if (tmp_dringp == dringp) { + ldcssp->dring_list = dringp->next; + dringp->next = NULL; + + } else { + while (tmp_dringp != NULL) { + if (tmp_dringp->next == dringp) { + tmp_dringp->next = dringp->next; + dringp->next = NULL; + break; + } + tmp_dringp = tmp_dringp->next; + } + if (tmp_dringp == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_destroy: invalid descriptor\n"); + mutex_exit(&ldcssp->lock); + mutex_exit(&dringp->lock); + return (EINVAL); + } + } + + mutex_exit(&ldcssp->lock); + + /* free the descriptor ring */ + kmem_free(dringp->base, dringp->size); + + mutex_exit(&dringp->lock); + + /* destroy dring lock */ + mutex_destroy(&dringp->lock); + + /* free desc ring object */ + kmem_free(dringp, sizeof (ldc_dring_t)); + + return (0); +} + +/* + * Bind a previously allocated dring to a channel. The channel should + * be OPEN in order to bind the ring to the channel. Returns back a + * descriptor ring cookie. The descriptor ring is exported for remote + * access by the client at the other end of the channel. An entry for + * dring pages is stored in map table (via call to ldc_mem_bind_handle). + */ +int +ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle, + uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount) +{ + int err; + ldc_chan_t *ldcp; + ldc_dring_t *dringp; + ldc_mem_handle_t mhandle; + + /* check to see if channel is initalized */ + if (handle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: invalid channel handle\n"); + return (EINVAL); + } + ldcp = (ldc_chan_t *)handle; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + + if (cookie == NULL) { + DWARN(ldcp->id, + "ldc_mem_dring_bind: invalid cookie arg\n"); + return (EINVAL); + } + + mutex_enter(&dringp->lock); + + if (dringp->status == LDC_BOUND) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n", + ldcp->id); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + if ((perm & LDC_MEM_RW) == 0) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: invalid permissions\n"); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) { + DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n"); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + dringp->ldcp = ldcp; + + /* create an memory handle */ + err = ldc_mem_alloc_handle(handle, &mhandle); + if (err || mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n", + ldcp->id); + mutex_exit(&dringp->lock); + return (err); + } + dringp->mhdl = mhandle; + + /* bind the descriptor ring to channel */ + err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size, + mtype, perm, cookie, ccount); + if (err) { + DWARN(ldcp->id, + "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n", + ldcp->id); + mutex_exit(&dringp->lock); + return (err); + } + + /* + * For now return error if we get more than one cookie + * FUTURE: Return multiple cookies .. + */ + if (*ccount > 1) { + (void) ldc_mem_unbind_handle(mhandle); + (void) ldc_mem_free_handle(mhandle); + + dringp->ldcp = NULL; + dringp->mhdl = NULL; + *ccount = 0; + + mutex_exit(&dringp->lock); + return (EAGAIN); + } + + /* Add descriptor ring to channel's exported dring list */ + mutex_enter(&ldcp->exp_dlist_lock); + dringp->ch_next = ldcp->exp_dring_list; + ldcp->exp_dring_list = dringp; + mutex_exit(&ldcp->exp_dlist_lock); + + dringp->status = LDC_BOUND; + + mutex_exit(&dringp->lock); + + return (0); +} + +/* + * Return the next cookie associated with the specified dring handle + */ +int +ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie) +{ + int rv = 0; + ldc_dring_t *dringp; + ldc_chan_t *ldcp; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_nextcookie: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + mutex_enter(&dringp->lock); + + if (dringp->status != LDC_BOUND) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_nextcookie: descriptor ring 0x%llx " + "is not bound\n", dringp); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + ldcp = dringp->ldcp; + + if (cookie == NULL) { + DWARN(ldcp->id, + "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n", + ldcp->id); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie); + mutex_exit(&dringp->lock); + + return (rv); +} +/* + * Unbind a previously bound dring from a channel. + */ +int +ldc_mem_dring_unbind(ldc_dring_handle_t dhandle) +{ + ldc_dring_t *dringp; + ldc_dring_t *tmp_dringp; + ldc_chan_t *ldcp; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_unbind: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + + mutex_enter(&dringp->lock); + + if (dringp->status == LDC_UNBOUND) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n", + dringp); + mutex_exit(&dringp->lock); + return (EINVAL); + } + ldcp = dringp->ldcp; + + mutex_enter(&ldcp->exp_dlist_lock); + + tmp_dringp = ldcp->exp_dring_list; + if (tmp_dringp == dringp) { + ldcp->exp_dring_list = dringp->ch_next; + dringp->ch_next = NULL; + + } else { + while (tmp_dringp != NULL) { + if (tmp_dringp->ch_next == dringp) { + tmp_dringp->ch_next = dringp->ch_next; + dringp->ch_next = NULL; + break; + } + tmp_dringp = tmp_dringp->ch_next; + } + if (tmp_dringp == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_unbind: invalid descriptor\n"); + mutex_exit(&ldcp->exp_dlist_lock); + mutex_exit(&dringp->lock); + return (EINVAL); + } + } + + mutex_exit(&ldcp->exp_dlist_lock); + + (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl); + (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); + + dringp->ldcp = NULL; + dringp->mhdl = NULL; + dringp->status = LDC_UNBOUND; + + mutex_exit(&dringp->lock); + + return (0); +} + +/* + * Get information about the dring. The base address of the descriptor + * ring along with the type and permission are returned back. + */ +int +ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo) +{ + ldc_dring_t *dringp; + int rv; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_info: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + + mutex_enter(&dringp->lock); + + if (dringp->mhdl) { + rv = ldc_mem_info(dringp->mhdl, minfo); + if (rv) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_info: error reading mem info\n"); + mutex_exit(&dringp->lock); + return (rv); + } + } else { + minfo->vaddr = dringp->base; + minfo->raddr = NULL; + minfo->status = dringp->status; + } + + mutex_exit(&dringp->lock); + + return (0); +} + +/* + * Map an exported descriptor ring into the local address space. If the + * descriptor ring was exported for direct map access, a HV call is made + * to allocate a RA range. If the map is done via a shadow copy, local + * shadow memory is allocated. + */ +int +ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie, + uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype, + ldc_dring_handle_t *dhandle) +{ + int err; + ldc_chan_t *ldcp = (ldc_chan_t *)handle; + ldc_mem_handle_t mhandle; + ldc_dring_t *dringp; + size_t dring_size; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_map: invalid dhandle\n"); + return (EINVAL); + } + + /* check to see if channel is initalized */ + if (handle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_map: invalid channel handle\n"); + return (EINVAL); + } + ldcp = (ldc_chan_t *)handle; + + if (cookie == NULL) { + DWARN(ldcp->id, + "ldc_mem_dring_map: (0x%llx) invalid cookie\n", + ldcp->id); + return (EINVAL); + } + + /* FUTURE: For now we support only one cookie per dring */ + ASSERT(ccount == 1); + + if (cookie->size < (dsize * len)) { + DWARN(ldcp->id, + "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n", + ldcp->id); + return (EINVAL); + } + + *dhandle = 0; + + /* Allocate an dring structure */ + dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP); + + D1(ldcp->id, + "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n", + mtype, len, dsize, cookie->addr, cookie->size); + + /* Initialize dring */ + dringp->length = len; + dringp->dsize = dsize; + + /* round of to multiple of page size */ + dring_size = len * dsize; + dringp->size = (dring_size & MMU_PAGEMASK); + if (dring_size & MMU_PAGEOFFSET) + dringp->size += MMU_PAGESIZE; + + dringp->ldcp = ldcp; + + /* create an memory handle */ + err = ldc_mem_alloc_handle(handle, &mhandle); + if (err || mhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_map: cannot alloc hdl err=%d\n", + err); + kmem_free(dringp, sizeof (ldc_dring_t)); + return (ENOMEM); + } + + dringp->mhdl = mhandle; + dringp->base = NULL; + + /* map the dring into local memory */ + err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW, + &(dringp->base), NULL); + if (err || dringp->base == NULL) { + cmn_err(CE_WARN, + "ldc_mem_dring_map: cannot map desc ring err=%d\n", err); + (void) ldc_mem_free_handle(mhandle); + kmem_free(dringp, sizeof (ldc_dring_t)); + return (ENOMEM); + } + + /* initialize the desc ring lock */ + mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL); + + /* Add descriptor ring to channel's imported dring list */ + mutex_enter(&ldcp->imp_dlist_lock); + dringp->ch_next = ldcp->imp_dring_list; + ldcp->imp_dring_list = dringp; + mutex_exit(&ldcp->imp_dlist_lock); + + dringp->status = LDC_MAPPED; + + *dhandle = (ldc_dring_handle_t)dringp; + + return (0); +} + +/* + * Unmap a descriptor ring. Free shadow memory (if any). + */ +int +ldc_mem_dring_unmap(ldc_dring_handle_t dhandle) +{ + ldc_dring_t *dringp; + ldc_dring_t *tmp_dringp; + ldc_chan_t *ldcp; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_unmap: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + + if (dringp->status != LDC_MAPPED) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_unmap: not a mapped desc ring\n"); + return (EINVAL); + } + + mutex_enter(&dringp->lock); + + ldcp = dringp->ldcp; + + mutex_enter(&ldcp->imp_dlist_lock); + + /* find and unlink the desc ring from channel import list */ + tmp_dringp = ldcp->imp_dring_list; + if (tmp_dringp == dringp) { + ldcp->imp_dring_list = dringp->ch_next; + dringp->ch_next = NULL; + + } else { + while (tmp_dringp != NULL) { + if (tmp_dringp->ch_next == dringp) { + tmp_dringp->ch_next = dringp->ch_next; + dringp->ch_next = NULL; + break; + } + tmp_dringp = tmp_dringp->ch_next; + } + if (tmp_dringp == NULL) { + DWARN(DBG_ALL_LDCS, + "ldc_mem_dring_unmap: invalid descriptor\n"); + mutex_exit(&ldcp->imp_dlist_lock); + mutex_exit(&dringp->lock); + return (EINVAL); + } + } + + mutex_exit(&ldcp->imp_dlist_lock); + + /* do a LDC memory handle unmap and free */ + (void) ldc_mem_unmap(dringp->mhdl); + (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl); + + dringp->status = 0; + dringp->ldcp = NULL; + + mutex_exit(&dringp->lock); + + /* destroy dring lock */ + mutex_destroy(&dringp->lock); + + /* free desc ring object */ + kmem_free(dringp, sizeof (ldc_dring_t)); + + return (0); +} + +/* + * Internal entry point for descriptor ring access entry consistency + * semantics. Acquire copies the contents of the remote descriptor ring + * into the local shadow copy. The release operation copies the local + * contents into the remote dring. The start and end locations specify + * bounds for the entries being synchronized. + */ +static int +i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle, + uint8_t direction, uint64_t start, uint64_t end) +{ + int err; + ldc_dring_t *dringp; + ldc_chan_t *ldcp; + uint64_t soff; + size_t copy_size; + + if (dhandle == NULL) { + DWARN(DBG_ALL_LDCS, + "i_ldc_dring_acquire_release: invalid desc ring handle\n"); + return (EINVAL); + } + dringp = (ldc_dring_t *)dhandle; + mutex_enter(&dringp->lock); + + if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) { + DWARN(DBG_ALL_LDCS, + "i_ldc_dring_acquire_release: not a mapped desc ring\n"); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + if (start >= dringp->length || end >= dringp->length) { + DWARN(DBG_ALL_LDCS, + "i_ldc_dring_acquire_release: index out of range\n"); + mutex_exit(&dringp->lock); + return (EINVAL); + } + + /* get the channel handle */ + ldcp = dringp->ldcp; + + copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) : + ((dringp->length - start) * dringp->dsize); + + /* Calculate the relative offset for the first desc */ + soff = (start * dringp->dsize); + + /* copy to/from remote from/to local memory */ + D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n", + soff, copy_size); + err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, + direction, soff, copy_size); + if (err) { + DWARN(ldcp->id, + "i_ldc_dring_acquire_release: copy failed\n"); + mutex_exit(&dringp->lock); + return (err); + } + + /* do the balance */ + if (start > end) { + copy_size = ((end + 1) * dringp->dsize); + soff = 0; + + /* copy to/from remote from/to local memory */ + D1(ldcp->id, "i_ldc_dring_acquire_release: c2 " + "off=0x%llx sz=0x%llx\n", soff, copy_size); + err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl, + direction, soff, copy_size); + if (err) { + DWARN(ldcp->id, + "i_ldc_dring_acquire_release: copy failed\n"); + mutex_exit(&dringp->lock); + return (err); + } + } + + mutex_exit(&dringp->lock); + + return (0); +} + +/* + * Ensure that the contents in the local dring are consistent + * with the contents if of remote dring + */ +int +ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) +{ + return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end)); +} + +/* + * Ensure that the contents in the remote dring are consistent + * with the contents if of local dring + */ +int +ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end) +{ + return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end)); +} diff --git a/usr/src/uts/sun4v/io/vldc.c b/usr/src/uts/sun4v/io/vldc.c index ee54b14f12..703cccd34e 100644 --- a/usr/src/uts/sun4v/io/vldc.c +++ b/usr/src/uts/sun4v/io/vldc.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1100,12 +1100,11 @@ vldc_set_ldc_mode(vldc_port_t *vport, vldc_t *vldcp, int channel_mode) /* validate mode */ switch (channel_mode) { - case LDC_MODE_STREAM: + case LDC_MODE_RELIABLE: vport->is_stream = B_TRUE; break; case LDC_MODE_RAW: case LDC_MODE_UNRELIABLE: - case LDC_MODE_RELIABLE: vport->is_stream = B_FALSE; break; default: diff --git a/usr/src/uts/sun4v/sys/cnex.h b/usr/src/uts/sun4v/sys/cnex.h index 83e1c17feb..ff3b42f868 100644 --- a/usr/src/uts/sun4v/sys/cnex.h +++ b/usr/src/uts/sun4v/sys/cnex.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,9 +44,10 @@ typedef struct cnex_regspec { /* * Channel nexus interrupt map */ -struct cnex_pil_map { +struct cnex_intr_map { ldc_dev_t devclass; /* LDC device class */ uint32_t pil; /* PIL for device class */ + int32_t weight; /* Interrupt weight for device class */ }; /* @@ -61,6 +62,7 @@ typedef struct cnex_intr { uint_t (*hdlr)(); /* intr handler */ caddr_t arg1; /* intr argument 1 */ caddr_t arg2; /* intr argument 2 */ + int32_t weight; /* intr weight */ } cnex_intr_t; /* cnex interrupt types */ diff --git a/usr/src/uts/sun4v/sys/ldc.h b/usr/src/uts/sun4v/sys/ldc.h index 5a1287b008..c24ede002f 100644 --- a/usr/src/uts/sun4v/sys/ldc.h +++ b/usr/src/uts/sun4v/sys/ldc.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,8 +48,8 @@ typedef uint64_t ldc_dring_handle_t; /* Descriptor ring handle */ typedef enum { LDC_MODE_RAW, /* Raw mode */ LDC_MODE_UNRELIABLE, /* Unreliable packet mode */ - LDC_MODE_RELIABLE, /* Reliable packet mode */ - LDC_MODE_STREAM /* Reliable byte stream */ + _LDC_MODE_RESERVED_, /* reserved */ + LDC_MODE_RELIABLE /* Reliable packet mode */ } ldc_mode_t; /* LDC message payload sizes */ diff --git a/usr/src/uts/sun4v/sys/ldc_impl.h b/usr/src/uts/sun4v/sys/ldc_impl.h index fabab5886c..da28695afd 100644 --- a/usr/src/uts/sun4v/sys/ldc_impl.h +++ b/usr/src/uts/sun4v/sys/ldc_impl.h @@ -509,6 +509,27 @@ typedef struct ldc_soft_state { kmem_cache_t *memseg_cache; /* Memory segment cache */ } ldc_soft_state_t; + +/* + * Debugging Utilities + */ +#define DBG_ALL_LDCS -1 +#ifdef DEBUG +#define D1 \ +if (ldcdbg & 0x01) \ + ldcdebug +#define D2 \ +if (ldcdbg & 0x02) \ + ldcdebug +#define DWARN \ +if (ldcdbg & 0x04) \ + ldcdebug +#else +#define D1 +#define D2 +#define DWARN +#endif + #ifdef __cplusplus } #endif |