diff options
Diffstat (limited to 'usr/src/uts/common/avs/ns/rdc')
23 files changed, 0 insertions, 27612 deletions
diff --git a/usr/src/uts/common/avs/ns/rdc/Makefile b/usr/src/uts/common/avs/ns/rdc/Makefile deleted file mode 100644 index 91b1eb3d5b..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# include global definitions -include ../../../../../Makefile.master - -HDRS= rdc.h \ - rdc_io.h \ - rdc_ioctl.h \ - rdc_bitmap.h \ - rdc_diskq.h - -DERIVED_HDRS= rdc_prot.h - -ROOTDIR= $(ROOT)/usr/include/sys/nsctl - -ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) $(DERIVED_HDRS:%=$(ROOTDIR)/%) - -# install rule -$(ROOTDIR)/%: % - $(INS.file) - -DERIVED_FILES= rdc_prot.h - -CHECKHDRS= $(HDRS:%.h=%.check) - -.KEEP_STATE: - -.PARALLEL: $(CHECKHDRS) - -install_h: $(ROOTDIRS) $(ROOTHDRS) - -clobber clean: - $(RM) $(DERIVED_FILES) -rdc_prot.h: rdc_prot.x - $(RPCGEN) -h rdc_prot.x > $@ - -$(ROOTDIR): - $(INS.dir) - -check: $(CHECKHDRS) diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.c b/usr/src/uts/common/avs/ns/rdc/rdc.c deleted file mode 100644 index 28750c6e25..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.c +++ /dev/null @@ -1,1108 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#define _RDC_ -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/conf.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/cred.h> -#include <sys/ddi.h> -#include <sys/sysmacros.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc.h" -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" -#include "rdc_diskq.h" - -#define DIDINIT 0x01 -#define DIDNODES 0x02 -#define DIDCONFIG 0x04 - -static int rdcopen(dev_t *devp, int flag, int otyp, cred_t *crp); -static int rdcclose(dev_t dev, int flag, int otyp, cred_t *crp); -static int rdcprint(dev_t dev, char *str); -static int rdcioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, - int *rvp); -static int rdcattach(dev_info_t *dip, ddi_attach_cmd_t cmd); -static int rdcdetach(dev_info_t *dip, ddi_detach_cmd_t cmd); -static int rdcgetinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, - void **result); -#ifdef DEBUG -static int rdc_clrkstat(void *); -#endif - -/* - * kstat interface - */ -static kstat_t *sndr_kstats; - -int sndr_info_stats_update(kstat_t *ksp, int rw); - -static sndr_m_stats_t sndr_info_stats = { - {RDC_MKSTAT_MAXSETS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_MAXFBAS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_RPC_TIMEOUT, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_HEALTH_THRES, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_BITMAP_WRITES, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_CLNT_COTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_CLNT_CLTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_SVC_COTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_SVC_CLTS_CALLS, KSTAT_DATA_ULONG}, - {RDC_MKSTAT_BITMAP_REF_DELAY, KSTAT_DATA_ULONG} -}; - -int rdc_info_stats_update(kstat_t *ksp, int rw); - -static rdc_info_stats_t rdc_info_stats = { - {RDC_IKSTAT_FLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_SYNCFLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BMPFLAGS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_SYNCPOS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_VOLSIZE, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BITSSET, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_AUTOSYNC, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_MAXQFBAS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_MAXQITEMS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_FILE, KSTAT_DATA_STRING}, - {RDC_IKSTAT_SECFILE, KSTAT_DATA_STRING}, - {RDC_IKSTAT_BITMAP, KSTAT_DATA_STRING}, - {RDC_IKSTAT_PRIMARY_HOST, KSTAT_DATA_STRING}, - {RDC_IKSTAT_SECONDARY_HOST, KSTAT_DATA_STRING}, - {RDC_IKSTAT_TYPE_FLAG, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_BMP_SIZE, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_DISK_STATUS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_IF_DOWN, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_IF_RPC_VERSION, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_BLOCK_HWM, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_ITEM_HWM, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_THROTTLE_DELAY, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_ITEMS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_ASYNC_BLOCKS, KSTAT_DATA_ULONG}, - {RDC_IKSTAT_QUEUE_TYPE, KSTAT_DATA_CHAR} -}; - -static struct cb_ops rdc_cb_ops = { - rdcopen, - rdcclose, - nulldev, /* no strategy */ - rdcprint, - nodev, /* no dump */ - nodev, /* no read */ - nodev, /* no write */ - rdcioctl, - nodev, /* no devmap */ - nodev, /* no mmap */ - nodev, /* no segmap */ - nochpoll, - ddi_prop_op, - NULL, /* not STREAMS */ - D_NEW | D_MP | D_64BIT, - CB_REV, - nodev, /* no aread */ - nodev, /* no awrite */ -}; - -static struct dev_ops rdc_ops = { - DEVO_REV, - 0, - rdcgetinfo, - nulldev, /* identify */ - nulldev, /* probe */ - rdcattach, - rdcdetach, - nodev, /* no reset */ - &rdc_cb_ops, - (struct bus_ops *)NULL -}; - -static struct modldrv rdc_ldrv = { - &mod_driverops, - "nws:Remote Mirror:" ISS_VERSION_STR, - &rdc_ops -}; - -static struct modlinkage rdc_modlinkage = { - MODREV_1, - &rdc_ldrv, - NULL -}; - -const int sndr_major_rev = ISS_VERSION_MAJ; -const int sndr_minor_rev = ISS_VERSION_MIN; -const int sndr_micro_rev = ISS_VERSION_MIC; -const int sndr_baseline_rev = ISS_VERSION_NUM; -static char sndr_version[16]; - -static void *rdc_dip; - -extern int _rdc_init_dev(); -extern void _rdc_deinit_dev(); -extern void rdc_link_down_free(); - -int rdc_bitmap_mode; -int rdc_auto_sync; -int rdc_max_sets; -extern int rdc_health_thres; - -kmutex_t rdc_sync_mutex; -rdc_sync_event_t rdc_sync_event; -clock_t rdc_sync_event_timeout; - -static void -rdc_sync_event_init() -{ - mutex_init(&rdc_sync_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_sync_event.mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&rdc_sync_event.cv, NULL, CV_DRIVER, NULL); - cv_init(&rdc_sync_event.done_cv, NULL, CV_DRIVER, NULL); - rdc_sync_event.master[0] = 0; - rdc_sync_event.lbolt = (clock_t)0; - rdc_sync_event_timeout = RDC_SYNC_EVENT_TIMEOUT; -} - - -static void -rdc_sync_event_destroy() -{ - mutex_destroy(&rdc_sync_mutex); - mutex_destroy(&rdc_sync_event.mutex); - cv_destroy(&rdc_sync_event.cv); - cv_destroy(&rdc_sync_event.done_cv); -} - - - -int -_init(void) -{ - return (mod_install(&rdc_modlinkage)); -} - -int -_fini(void) -{ - return (mod_remove(&rdc_modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&rdc_modlinkage, modinfop)); -} - -static int -rdcattach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - intptr_t flags; - int instance; - int i; - - /*CONSTCOND*/ - ASSERT(sizeof (u_longlong_t) == 8); - - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - (void) strncpy(sndr_version, _VERSION_, sizeof (sndr_version)); - - instance = ddi_get_instance(dip); - rdc_dip = dip; - - flags = 0; - - rdc_sync_event_init(); - - /* - * rdc_max_sets must be set before calling _rdc_load(). - */ - - rdc_max_sets = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "rdc_max_sets", 64); - - if (_rdc_init_dev()) { - cmn_err(CE_WARN, "!rdc: _rdc_init_dev failed"); - goto out; - } - flags |= DIDINIT; - - if (_rdc_load() != 0) { - cmn_err(CE_WARN, "!rdc: _rdc_load failed"); - goto out; - } - - if (_rdc_configure()) { - cmn_err(CE_WARN, "!rdc: _rdc_configure failed"); - goto out; - } - flags |= DIDCONFIG; - - if (ddi_create_minor_node(dip, "rdc", S_IFCHR, instance, DDI_PSEUDO, 0) - != DDI_SUCCESS) { - cmn_err(CE_WARN, "!rdc: could not create node."); - goto out; - } - flags |= DIDNODES; - - rdc_bitmap_mode = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_bitmap_mode", 0); - - switch (rdc_bitmap_mode) { - case RDC_BMP_AUTO: /* 0 */ - break; - case RDC_BMP_ALWAYS: /* 1 */ - break; - case RDC_BMP_NEVER: /* 2 */ - cmn_err(CE_NOTE, "!SNDR bitmap mode override"); - cmn_err(CE_CONT, - "!SNDR: bitmaps will only be written on shutdown\n"); - break; - default: /* unknown */ - cmn_err(CE_NOTE, - "!SNDR: unknown bitmap mode %d - autodetecting mode", - rdc_bitmap_mode); - rdc_bitmap_mode = RDC_BMP_AUTO; - break; - } - - rdc_bitmap_init(); - - rdc_auto_sync = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_auto_sync", 0); - - i = ddi_prop_get_int(DDI_DEV_T_ANY, dip, - DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "rdc_health_thres", RDC_HEALTH_THRESHOLD); - if (i >= RDC_MIN_HEALTH_THRES) - rdc_health_thres = i; - else - cmn_err(CE_WARN, "!value rdc_heath_thres from rdc.conf ignored " - "as it is smaller than the min value of %d", - RDC_MIN_HEALTH_THRES); - - ddi_set_driver_private(dip, (caddr_t)flags); - ddi_report_dev(dip); - - sndr_kstats = kstat_create(RDC_KSTAT_MODULE, 0, - RDC_KSTAT_MINFO, RDC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (sndr_m_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (sndr_kstats) { - sndr_kstats->ks_data = &sndr_info_stats; - sndr_kstats->ks_update = sndr_info_stats_update; - sndr_kstats->ks_private = &rdc_k_info[0]; - kstat_install(sndr_kstats); - } else - cmn_err(CE_WARN, "!SNDR: module kstats failed"); - - return (DDI_SUCCESS); - -out: - DTRACE_PROBE(rdc_attach_failed); - ddi_set_driver_private(dip, (caddr_t)flags); - (void) rdcdetach(dip, DDI_DETACH); - return (DDI_FAILURE); -} - -static int -rdcdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rdcd; - intptr_t flags; - - - if (cmd != DDI_DETACH) { - DTRACE_PROBE(rdc_detach_unknown_cmd); - return (DDI_FAILURE); - } - - if (rdc_k_info == NULL || rdc_u_info == NULL) - goto cleanup; - - mutex_enter(&rdc_conf_lock); - - for (rdcd = 0; rdcd < rdc_max_sets; rdcd++) { - krdc = &rdc_k_info[rdcd]; - urdc = &rdc_u_info[rdcd]; - - if (IS_ENABLED(urdc) || krdc->devices) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc: cannot detach, rdcd %d still in use", rdcd); -#endif - mutex_exit(&rdc_conf_lock); - DTRACE_PROBE(rdc_detach_err_busy); - return (DDI_FAILURE); - } - } - - mutex_exit(&rdc_conf_lock); - -cleanup: - flags = (intptr_t)ddi_get_driver_private(dip); - - if (flags & DIDNODES) - ddi_remove_minor_node(dip, NULL); - - if (sndr_kstats) { - kstat_delete(sndr_kstats); - } - if (flags & DIDINIT) - _rdc_deinit_dev(); - - if (flags & DIDCONFIG) { - (void) _rdc_deconfigure(); - (void) _rdc_unload(); - rdcsrv_unload(); - } - - rdc_sync_event_destroy(); - rdc_link_down_free(); - - rdc_dip = NULL; - return (DDI_SUCCESS); -} - -/* ARGSUSED */ -static int -rdcgetinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int rc = DDI_FAILURE; - - switch (infocmd) { - - case DDI_INFO_DEVT2DEVINFO: - *result = rdc_dip; - rc = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - /* We only have a single instance */ - *result = 0; - rc = DDI_SUCCESS; - break; - - default: - break; - } - - return (rc); -} - - -/* ARGSUSED */ - -static int -rdcopen(dev_t *devp, int flag, int otyp, cred_t *crp) -{ - return (0); -} - - -/* ARGSUSED */ - -static int -rdcclose(dev_t dev, int flag, int otyp, cred_t *crp) -{ - return (0); -} - -/* ARGSUSED */ - -static int -rdcprint(dev_t dev, char *str) -{ - int instance = 0; - - cmn_err(CE_WARN, "!rdc%d: %s", instance, str); - return (0); -} - - -static int -convert_ioctl_args(int cmd, intptr_t arg, int mode, _rdc_ioctl_t *args) -{ - _rdc_ioctl32_t args32; - - if (ddi_copyin((void *)arg, &args32, sizeof (_rdc_ioctl32_t), mode)) - return (EFAULT); - - bzero((void *)args, sizeof (_rdc_ioctl_t)); - - switch (cmd) { - case RDC_CONFIG: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_config_t * */ - args->arg1 = (uint32_t)args32.arg1; /* pointer */ - args->arg2 = (uint32_t)args32.arg2; /* size */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_STATUS: - args->arg0 = (uint32_t)args32.arg0; /* pointer */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_ENABLE_SVR: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_svc_args * */ - break; - - case RDC_VERSION: - args->arg0 = (uint32_t)args32.arg0; /* _rdc_version_t * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_SYNC_EVENT: - args->arg0 = (uint32_t)args32.arg0; /* char * */ - args->arg1 = (uint32_t)args32.arg1; /* char * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - - case RDC_LINK_DOWN: - args->arg0 = (uint32_t)args32.arg0; /* char * */ - args->ustatus = (spcs_s_info_t)args32.ustatus; - break; - case RDC_POOL_CREATE: - args->arg0 = (uint32_t)args32.arg0; /* svcpool_args * */ - break; - case RDC_POOL_WAIT: - args->arg0 = (uint32_t)args32.arg0; /* int */ - break; - case RDC_POOL_RUN: - args->arg0 = (uint32_t)args32.arg0; /* int */ - break; - - default: - return (EINVAL); - } - - return (0); -} - -/* - * Build a 32bit rdc_set structure and copyout to the user level. - */ -int -rdc_status_copy32(const void *arg, void *usetp, size_t size, int mode) -{ - rdc_u_info_t *urdc = (rdc_u_info_t *)arg; - struct rdc_set32 set32; - size_t tailsize; -#ifdef DEBUG - size_t tailsize32; -#endif - - bzero(&set32, sizeof (set32)); - - tailsize = sizeof (struct rdc_addr32) - - offsetof(struct rdc_addr32, intf); - - /* primary address structure, avoiding netbuf */ - bcopy(&urdc->primary.intf[0], &set32.primary.intf[0], tailsize); - - /* secondary address structure, avoiding netbuf */ - bcopy(&urdc->secondary.intf[0], &set32.secondary.intf[0], tailsize); - - /* - * the rest, avoiding netconfig - * note: the tail must be the same size in both structures - */ - tailsize = sizeof (struct rdc_set) - offsetof(struct rdc_set, flags); -#ifdef DEBUG - /* - * ASSERT is calling for debug reason, and tailsize32 is only declared - * for ASSERT, put them under debug to avoid lint warning. - */ - tailsize32 = sizeof (struct rdc_set32) - - offsetof(struct rdc_set32, flags); - ASSERT(tailsize == tailsize32); -#endif - - bcopy(&urdc->flags, &set32.flags, tailsize); - - /* copyout to user level */ - return (ddi_copyout(&set32, usetp, size, mode)); -} - - -/* - * Status ioctl. - */ -static int -rdcstatus(_rdc_ioctl_t *args, int mode) -{ - int (*copyout)(const void *, void *, size_t, int); - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - disk_queue *dqp; - char *usetp; /* pointer to user rdc_set structure */ - size_t size; /* sizeof user rdc_set structure */ - int32_t *maxsetsp; /* address of status->maxsets; */ - int nset, max, i, j; - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - struct rdc_status32 status32; - - if (ddi_copyin((void *)args->arg0, &status32, - sizeof (status32), mode)) { - return (EFAULT); - } - - usetp = ((char *)args->arg0) + - offsetof(struct rdc_status32, rdc_set); - maxsetsp = (int32_t *)((char *)args->arg0 + - offsetof(struct rdc_status32, maxsets)); - nset = status32.nset; - - size = sizeof (struct rdc_set32); - copyout = rdc_status_copy32; - } else { - struct rdc_status status; - - if (ddi_copyin((void *)args->arg0, &status, - sizeof (status), mode)) { - return (EFAULT); - } - - usetp = ((char *)args->arg0) + - offsetof(struct rdc_status, rdc_set); - maxsetsp = (int32_t *)((char *)args->arg0 + - offsetof(struct rdc_status, maxsets)); - nset = status.nset; - - size = sizeof (struct rdc_set); - copyout = ddi_copyout; - } - - max = min(nset, rdc_max_sets); - - for (i = 0, j = 0; i < max; i++) { - urdc = &rdc_u_info[i]; - krdc = &rdc_k_info[i]; - - if (!IS_ENABLED(urdc)) - continue; - - /* - * sneak out qstate in urdc->flags - * this is harmless because it's value is not used - * in urdc->flags. the real qstate is kept in - * group->diskq->disk_hdr.h.state - */ - if (RDC_IS_DISKQ(krdc->group)) { - dqp = &krdc->group->diskq; - if (IS_QSTATE(dqp, RDC_QNOBLOCK)) - urdc->flags |= RDC_QNOBLOCK; - } - - j++; - if ((*copyout)(urdc, usetp, size, mode) != 0) - return (EFAULT); - - urdc->flags &= ~RDC_QNOBLOCK; /* clear qstate */ - usetp += size; - } - - /* copyout rdc_max_sets value */ - - if (ddi_copyout(&rdc_max_sets, maxsetsp, sizeof (*maxsetsp), mode) != 0) - return (EFAULT); - - /* copyout number of sets manipulated */ - - /*CONSTCOND*/ - ASSERT(offsetof(struct rdc_status32, nset) == 0); - /*CONSTCOND*/ - ASSERT(offsetof(struct rdc_status, nset) == 0); - - return (ddi_copyout(&j, (void *)args->arg0, sizeof (int), mode)); -} - - -/* ARGSUSED */ - -static int -rdcioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvp) -{ - spcs_s_info_t kstatus = NULL; - _rdc_ioctl_t args; - int error; - int rc = 0; - - if (cmd != RDC_STATUS) { - if ((error = drv_priv(crp)) != 0) - return (error); - } -#ifdef DEBUG - if (cmd == RDC_ASYNC6) { - rc = rdc_async6((void *)arg, mode, rvp); - return (rc); - } - - if (cmd == RDC_CLRKSTAT) { - rc = rdc_clrkstat((void *)arg); - return (rc); - } - - if (cmd == RDC_STALL0) { - if (((int)arg > 1) || ((int)arg < 0)) - return (EINVAL); - rdc_stallzero((int)arg); - return (0); - } - if (cmd == RDC_READGEN) { - rc = rdc_readgen((void *)arg, mode, rvp); - return (rc); - } -#endif - if (cmd == RDC_BITMAPOP) { - rdc_bitmap_op_t bmop; - rdc_bitmap_op32_t bmop32; - - if (ddi_model_convert_from(mode & FMODELS) - == DDI_MODEL_ILP32) { - if (ddi_copyin((void *)arg, &bmop32, sizeof (bmop32), - mode)) - return (EFAULT); - bmop.offset = bmop32.offset; - bmop.op = bmop32.op; - (void) strncpy(bmop.sechost, bmop32.sechost, - MAX_RDC_HOST_SIZE); - (void) strncpy(bmop.secfile, bmop32.secfile, - NSC_MAXPATH); - bmop.len = bmop32.len; - bmop.addr = (unsigned long)bmop32.addr; - } else { - if (ddi_copyin((void *)arg, &bmop, sizeof (bmop), - mode)) - return (EFAULT); - } - rc = rdc_bitmapset(bmop.op, bmop.sechost, bmop.secfile, - (void *)bmop.addr, bmop.len, bmop.offset, mode); - return (rc); - } - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - if ((rc = convert_ioctl_args(cmd, arg, mode, &args)) != 0) - return (rc); - } else { - if (ddi_copyin((void *)arg, &args, - sizeof (_rdc_ioctl_t), mode)) { - return (EFAULT); - } - } - - kstatus = spcs_s_kcreate(); - if (!kstatus) { - return (ENOMEM); - } - - - switch (cmd) { - - case RDC_POOL_CREATE: { - struct svcpool_args p; - - if (ddi_copyin((void *)arg, &p, sizeof (p), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - error = svc_pool_create(&p); - - break; - } - case RDC_POOL_WAIT: { - int id; - - if (ddi_copyin((void *)arg, &id, sizeof (id), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - - error = svc_wait(id); - break; - } - case RDC_POOL_RUN: { - int id; - - if (ddi_copyin((void *)arg, &id, sizeof (id), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - error = svc_do_run(id); - break; - } - case RDC_ENABLE_SVR: - { - STRUCT_DECL(rdc_svc_args, parms); - - STRUCT_INIT(parms, mode); - /* Only used by sndrd which does not use unistat */ - - if (ddi_copyin((void *)args.arg0, STRUCT_BUF(parms), - STRUCT_SIZE(parms), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - rc = rdc_start_server(STRUCT_BUF(parms), mode); - } - break; - - case RDC_STATUS: - rc = rdcstatus(&args, mode); - break; - - case RDC_CONFIG: - rc = _rdc_config((void *)args.arg0, mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - return (rc); - - case RDC_VERSION: - { - STRUCT_DECL(rdc_version, parms); - - STRUCT_INIT(parms, mode); - - STRUCT_FSET(parms, major, sndr_major_rev); - STRUCT_FSET(parms, minor, sndr_minor_rev); - STRUCT_FSET(parms, micro, sndr_micro_rev); - STRUCT_FSET(parms, baseline, sndr_baseline_rev); - - if (ddi_copyout(STRUCT_BUF(parms), (void *)args.arg0, - STRUCT_SIZE(parms), mode)) { - spcs_s_kfree(kstatus); - return (EFAULT); - } - break; - } - - case RDC_LINK_DOWN: - /* char *host from user */ - rc = _rdc_link_down((void *)args.arg0, mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - - return (rc); - - case RDC_SYNC_EVENT: - rc = _rdc_sync_event_wait((void *)args.arg0, (void *)args.arg1, - mode, kstatus, rvp); - spcs_s_copyoutf(&kstatus, args.ustatus); - - return (rc); - - - default: - rc = EINVAL; - break; - } - - spcs_s_kfree(kstatus); - return (rc); -} - -int -sndr_info_stats_update(kstat_t *ksp, int rw) -{ - extern int rdc_rpc_tmout; - extern int rdc_health_thres; - extern int rdc_bitmap_delay; - extern long rdc_clnt_count; - extern long rdc_svc_count; - sndr_m_stats_t *info_stats; - rdc_k_info_t *krdc; - - info_stats = (sndr_m_stats_t *)(ksp->ks_data); - krdc = (rdc_k_info_t *)(ksp->ks_private); - - /* no writes currently allowed */ - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* default to READ */ - info_stats->m_maxsets.value.ul = rdc_max_sets; - info_stats->m_maxfbas.value.ul = krdc->maxfbas; - info_stats->m_rpc_timeout.value.ul = rdc_rpc_tmout; - info_stats->m_health_thres.value.ul = rdc_health_thres; - info_stats->m_bitmap_writes.value.ul = krdc->bitmap_write; - info_stats->m_bitmap_ref_delay.value.ul = rdc_bitmap_delay; - - /* clts counters not implemented yet */ - info_stats->m_clnt_cots_calls.value.ul = rdc_clnt_count; - info_stats->m_clnt_clts_calls.value.ul = 0; - info_stats->m_svc_cots_calls.value.ul = rdc_svc_count; - info_stats->m_svc_clts_calls.value.ul = 0; - - return (0); -} - -/* - * copy tailsize-1 bytes of tail of s to s1. - */ -void -rdc_str_tail_cpy(char *s1, char *s, size_t tailsize) -{ - /* To avoid un-terminated string, max size is 16 - 1 */ - ssize_t offset = strlen(s) - (tailsize - 1); - - offset = (offset > 0) ? offset : 0; - - /* ensure it's null terminated */ - (void) strlcpy(s1, (const char *)(s + offset), tailsize); -} - -int -rdc_info_stats_update(kstat_t *ksp, int rw) -{ - rdc_info_stats_t *rdc_info_stats; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - rdc_info_stats = (rdc_info_stats_t *)(ksp->ks_data); - krdc = (rdc_k_info_t *)(ksp->ks_private); - urdc = &rdc_u_info[krdc->index]; - - /* no writes currently allowed */ - - if (rw == KSTAT_WRITE) { - return (EACCES); - } - - /* default to READ */ - rdc_info_stats->s_flags.value.ul = urdc->flags; - rdc_info_stats->s_syncflags.value.ul = - urdc->sync_flags; - rdc_info_stats->s_bmpflags.value.ul = - urdc->bmap_flags; - rdc_info_stats->s_syncpos.value.ul = - urdc->sync_pos; - rdc_info_stats->s_volsize.value.ul = - urdc->volume_size; - rdc_info_stats->s_bits_set.value.ul = - urdc->bits_set; - rdc_info_stats->s_autosync.value.ul = - urdc->autosync; - rdc_info_stats->s_maxqfbas.value.ul = - urdc->maxqfbas; - rdc_info_stats->s_maxqitems.value.ul = - urdc->maxqitems; - - kstat_named_setstr(&rdc_info_stats->s_primary_vol, - urdc->primary.file); - - kstat_named_setstr(&rdc_info_stats->s_secondary_vol, - urdc->secondary.file); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - kstat_named_setstr(&rdc_info_stats->s_bitmap, - urdc->primary.bitmap); - } else { - kstat_named_setstr(&rdc_info_stats->s_bitmap, - urdc->secondary.bitmap); - } - - kstat_named_setstr(&rdc_info_stats->s_primary_intf, - urdc->primary.intf); - - kstat_named_setstr(&rdc_info_stats->s_secondary_intf, - urdc->secondary.intf); - - rdc_info_stats->s_type_flag.value.ul = krdc->type_flag; - rdc_info_stats->s_bitmap_size.value.ul = krdc->bitmap_size; - rdc_info_stats->s_disk_status.value.ul = krdc->disk_status; - - if (krdc->intf) { - rdc_info_stats->s_if_if_down.value.ul = krdc->intf->if_down; - rdc_info_stats->s_if_rpc_version.value.ul = - krdc->intf->rpc_version; - } - - /* the type can change without disable/re-enable so... */ - bzero(rdc_info_stats->s_aqueue_type.value.c, KSTAT_DATA_CHAR_LEN); - if (RDC_IS_MEMQ(krdc->group)) { - (void) strcpy(rdc_info_stats->s_aqueue_type.value.c, "memory"); - rdc_info_stats->s_aqueue_blk_hwm.value.ul = - krdc->group->ra_queue.blocks_hwm; - rdc_info_stats->s_aqueue_itm_hwm.value.ul = - krdc->group->ra_queue.nitems_hwm; - rdc_info_stats->s_aqueue_throttle.value.ul = - krdc->group->ra_queue.throttle_delay; - rdc_info_stats->s_aqueue_items.value.ul = - krdc->group->ra_queue.nitems; - rdc_info_stats->s_aqueue_blocks.value.ul = - krdc->group->ra_queue.blocks; - - } else if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - rdc_info_stats->s_aqueue_blk_hwm.value.ul = - krdc->group->diskq.blocks_hwm; - rdc_info_stats->s_aqueue_itm_hwm.value.ul = - krdc->group->diskq.nitems_hwm; - rdc_info_stats->s_aqueue_throttle.value.ul = - krdc->group->diskq.throttle_delay; - rdc_info_stats->s_aqueue_items.value.ul = QNITEMS(q); - rdc_info_stats->s_aqueue_blocks.value.ul = QBLOCKS(q); - (void) strcpy(rdc_info_stats->s_aqueue_type.value.c, "disk"); - } - - return (0); -} - -void -rdc_kstat_create(int index) -{ - int j = index; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - size_t varsize; - - if (!krdc->set_kstats) { - krdc->set_kstats = kstat_create(RDC_KSTAT_MODULE, j, - RDC_KSTAT_INFO, RDC_KSTAT_CLASS, KSTAT_TYPE_NAMED, - sizeof (rdc_info_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); -#ifdef DEBUG - if (!krdc->set_kstats) - cmn_err(CE_NOTE, "!krdc:u_kstat null"); -#endif - - if (krdc->set_kstats) { - /* calculate exact size of KSTAT_DATA_STRINGs */ - varsize = strlen(urdc->primary.file) + 1 - + strlen(urdc->secondary.file) + 1 - + strlen(urdc->primary.intf) + 1 - + strlen(urdc->secondary.intf) + 1; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - varsize += strlen(urdc->primary.bitmap) + 1; - } else { - varsize += strlen(urdc->secondary.bitmap) + 1; - } - - krdc->set_kstats->ks_data_size += varsize; - krdc->set_kstats->ks_data = &rdc_info_stats; - krdc->set_kstats->ks_update = rdc_info_stats_update; - krdc->set_kstats->ks_private = &rdc_k_info[j]; - kstat_install(krdc->set_kstats); - } else - cmn_err(CE_WARN, "!SNDR: k-kstats failed"); - } - - krdc->io_kstats = kstat_create(RDC_KSTAT_MODULE, j, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->io_kstats) { - krdc->io_kstats->ks_lock = &krdc->kstat_mutex; - kstat_install(krdc->io_kstats); - } - krdc->bmp_kstats = kstat_create("sndrbmp", j, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->bmp_kstats) { - krdc->bmp_kstats->ks_lock = &krdc->bmp_kstat_mutex; - kstat_install(krdc->bmp_kstats); - } -} - -void -rdc_kstat_delete(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - - if (krdc->set_kstats) { - kstat_delete(krdc->set_kstats); - krdc->set_kstats = NULL; - } - - if (krdc->io_kstats) { - kstat_delete(krdc->io_kstats); - krdc->io_kstats = NULL; - } - if (krdc->bmp_kstats) { - kstat_delete(krdc->bmp_kstats); - krdc->bmp_kstats = NULL; - } -} - -#ifdef DEBUG -/* - * Reset the io_kstat structure of the krdc specified - * by the arg index. - */ -static int -rdc_clrkstat(void *arg) -{ - int index; - rdc_k_info_t *krdc; - - index = (int)(unsigned long)arg; - if ((index < 0) || (index >= rdc_max_sets)) { - return (EINVAL); - } - krdc = &rdc_k_info[index]; - if (krdc->io_kstats) { - kstat_delete(krdc->io_kstats); - krdc->io_kstats = NULL; - } else { - return (EINVAL); - } - krdc->io_kstats = kstat_create(RDC_KSTAT_MODULE, index, NULL, - "disk", KSTAT_TYPE_IO, 1, 0); - if (krdc->io_kstats) { - krdc->io_kstats->ks_lock = &krdc->kstat_mutex; - kstat_install(krdc->io_kstats); - } else { - return (EINVAL); - } - /* - * clear the high water marks and throttle. - */ - if (krdc->group) { - krdc->group->ra_queue.nitems_hwm = 0; - krdc->group->ra_queue.blocks_hwm = 0; - krdc->group->ra_queue.throttle_delay = 0; - } - return (0); -} -#endif diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.conf b/usr/src/uts/common/avs/ns/rdc/rdc.conf deleted file mode 100644 index 1ef5e0e420..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.conf +++ /dev/null @@ -1,55 +0,0 @@ -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# -name="rdc" parent="pseudo"; - -# -# rdc_bitmap_mode -# - Sets the mode of the RDC bitmap operation, acceptable values are: -# 0 - autodetect bitmap mode depending on the state of SDBC (default). -# 1 - force bitmap writes for every write operation, so an update resync -# can be performed after a crash or reboot. -# 2 - only write the bitmap on shutdown, so a full resync is -# required after a crash, but an update resync is required after -# a reboot. -# -rdc_bitmap_mode=1; - -# -# rdc_max_sets -# - Configure the maximum number of RDC sets that can be enabled on -# this host. The actual maximum number of sets that can be enabled -# will be the minimum of this value and nsc_max_devices (see -# nsctl.conf) at the time the rdc kernel module is loaded. -# -rdc_max_sets=64; - -# -# rdc_health_thres -# - Set the timeout (in seconds) for RDC health monitoring. If IPMP is in -# use over the RDC link this value should be set higher than in.mpathd's -# timeout. -# -#rdc_health_thres=20; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc.h b/usr/src/uts/common/avs/ns/rdc/rdc.h deleted file mode 100644 index 8ebb22ad17..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_H -#define _RDC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define RDCDEV "/dev/rdc" -#define RDC_KSTAT_CLASS "storedge" -#define RDC_KSTAT_MINFO "modinfo" -#define RDC_KSTAT_INFO "setinfo" -#define RDC_KSTAT_MODULE "sndr" - - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c b/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c deleted file mode 100644 index bbea681e09..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.c +++ /dev/null @@ -1,2659 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/errno.h> - -#include "../solaris/nsc_thread.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/kmem.h> -#include <sys/cmn_err.h> -#include <sys/ddi.h> - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_clnt.h" -#include "rdc_diskq.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifndef UINT8_MAX -#define UINT8_MAX 255 -#endif - -#ifndef UINT_MAX -#define UINT_MAX 0xffffffff -#endif - -/* - * RDC bitmap functions. - */ - -/* - * RDC cluster integration notes. - * - * 1. Configuration - * - * 1.1. Change 'rdc_bitmap_mode' in /usr/kernel/drv/rdc.conf to '1'. - * - * 2. Operation - * - * 2.1. SunCluster ensures that only one physical host has any rdc - * controlled device imported at any one time. Hence rdc will - * only be active on a single node for any set at a time. - * - * 2.2. So operation from the kernel perspective looks just like - * operation on a single, standalone, node. - * - */ - -struct rdc_bitmap_ops *rdc_bitmap_ops; /* the bitmap ops switch */ -static int rdc_wrflag; /* write flag for io */ -int rdc_bitmap_delay = 0; -extern nsc_io_t *_rdc_io_hc; - -int rdc_suspend_diskq(rdc_k_info_t *krdc); - -/* - * rdc_ns_io - * Perform read or write on an underlying ns device - * - * fd - nsc file descriptor - * flag - nsc io direction and characteristics flag - * fba_pos - offset from beginning of device in FBAs - * io_addr - pointer to data buffer - * io_len - length of io in bytes - */ - -int -rdc_ns_io(nsc_fd_t *fd, int flag, nsc_off_t fba_pos, uchar_t *io_addr, - nsc_size_t io_len) -{ - nsc_buf_t *tmp; - nsc_vec_t *vecp; - uchar_t *vaddr; - size_t copy_len; - int vlen; - int rc; - nsc_size_t fba_req, fba_len; - nsc_size_t maxfbas = 0; - nsc_size_t tocopy; - unsigned char *toaddr; - - rc = nsc_maxfbas(fd, 0, &maxfbas); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_ns_io: maxfbas failed (%d)", rc); -#endif - maxfbas = 256; - } - toaddr = io_addr; - fba_req = FBA_LEN(io_len); -loop: - tmp = NULL; - fba_len = min(fba_req, maxfbas); - tocopy = min(io_len, FBA_SIZE(fba_len)); - ASSERT(tocopy < INT32_MAX); - - rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp); - if (!RDC_SUCCESS(rc)) { - if (tmp) { - (void) nsc_free_buf(tmp); - } - return (EIO); - } - - if ((flag & NSC_WRITE) != 0 && (flag & NSC_READ) == 0 && - FBA_OFF(io_len) != 0) { - /* - * Not overwriting all of the last FBA, so read in the - * old contents now before we overwrite it with the new - * data. - */ - rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (EIO); - } - } - - vecp = tmp->sb_vec; - vlen = vecp->sv_len; - vaddr = vecp->sv_addr; - - while (tocopy > 0) { - if (vecp->sv_addr == 0 || vecp->sv_len == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_ns_io: ran off end of handle"); -#endif - break; - } - - copy_len = (size_t)min(vlen, (int)tocopy); - - if (flag & NSC_WRITE) - bcopy(toaddr, vaddr, copy_len); - else - bcopy(vaddr, toaddr, copy_len); - - toaddr += copy_len; - io_addr += copy_len; /* adjust position in callers buffer */ - io_len -= copy_len; /* adjust total byte length remaining */ - tocopy -= copy_len; /* adjust chunk byte length remaining */ - vaddr += copy_len; /* adjust location in sv_vec_t */ - vlen -= copy_len; /* adjust length left in sv_vec_t */ - - if (vlen <= 0) { - vecp++; - vaddr = vecp->sv_addr; - vlen = vecp->sv_len; - } - } - - if (flag & NSC_WRITE) { - rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(tmp); - return (rc); - } - } - - (void) nsc_free_buf(tmp); - - fba_pos += fba_len; - fba_req -= fba_len; - if (fba_req > 0) - goto loop; - - return (0); -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static void -rdc_fill_header(rdc_u_info_t *urdc, rdc_header_t *header) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; -#ifdef DEBUG - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); -#endif - - header->magic = RDC_HDR_MAGIC; - (void) strncpy(header->primary.file, urdc->primary.file, NSC_MAXPATH); - (void) strncpy(header->primary.bitmap, urdc->primary.bitmap, - NSC_MAXPATH); - (void) strncpy(header->secondary.file, urdc->secondary.file, - NSC_MAXPATH); - (void) strncpy(header->secondary.bitmap, urdc->secondary.bitmap, - NSC_MAXPATH); - header->flags = urdc->flags | urdc->sync_flags | urdc->bmap_flags; - header->autosync = urdc->autosync; - header->maxqfbas = urdc->maxqfbas; - header->maxqitems = urdc->maxqitems; - header->asyncthr = urdc->asyncthr; - header->syshostid = urdc->syshostid; - header->refcntsize = rdc_refcntsize(krdc); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static int -rdc_read_header(rdc_k_info_t *krdc, rdc_header_t *header) -{ - int sts; - rdc_u_info_t *urdc; - union { - rdc_header_t *current; - rdc_headerv4_t *v4; - } u_hdrp; - - if (krdc == NULL) { - return (-1); - } - - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmapfd == NULL) { - return (-1); - } - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, 0, (uchar_t *)header, - sizeof (rdc_header_t)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_read_header: %s read failed %d", - urdc->primary.file, sts); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read header failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) - return (-1); - switch (header->magic) { - case RDC_HDR_V4: - /* - * old header format - upgrade incore copy, disk copy will - * be changed when state is re-written. - */ -#ifdef DEBUG - cmn_err(CE_NOTE, "!sndr: old style (V4) bit map header"); -#endif - header->magic = RDC_HDR_MAGIC; - u_hdrp.current = header; - /* copy down items moved by new maxq??? sizes */ - u_hdrp.current->asyncthr = u_hdrp.v4->asyncthr; - u_hdrp.current->syshostid = u_hdrp.v4->syshostid; - u_hdrp.current->maxqitems = u_hdrp.v4->maxqitems; - u_hdrp.current->maxqfbas = u_hdrp.v4->maxqfbas; - u_hdrp.current->refcntsize = 1; /* new field */ -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)u_hdrp.current->refcntsize, __LINE__, __FILE__); -#endif - return (0); - case RDC_HDR_MAGIC: - /* current header type */ - return (0); - default: - /* not a header we currently understand */ - return (0); - } -} - -/* - * Must be called with krdc->bmapmutex held. - */ -static int -rdc_write_header(rdc_k_info_t *krdc, rdc_header_t *header) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmapfd == NULL) { - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, (uchar_t *)header, - sizeof (rdc_header_t)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_header: %s write failed %d", - urdc->primary.file, sts); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) - return (-1); - else - return (0); -} - -struct bm_ref_ops rdc_ref_byte_ops; -struct bm_ref_ops rdc_ref_int_ops; - -static void -rdc_set_refcnt_ops(rdc_k_info_t *krdc, size_t refcntsize) -{ - switch (refcntsize) { - default: - /* FALLTHRU */ - case sizeof (unsigned char): - krdc->bm_refs = &rdc_ref_byte_ops; - break; - case sizeof (unsigned int): - krdc->bm_refs = &rdc_ref_int_ops; - break; - } -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: set refcnt ops for refcntsize %d - %d:%s", - (int)refcntsize, __LINE__, __FILE__); -#endif -} - -size_t -rdc_refcntsize(rdc_k_info_t *krdc) -{ - if (krdc->bm_refs == &rdc_ref_int_ops) - return (sizeof (unsigned int)); - return (sizeof (unsigned char)); -} - -int -rdc_read_state(rdc_k_info_t *krdc, int *statep, int *hostidp) -{ - rdc_header_t header; - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - sts = rdc_read_header(krdc, &header); - mutex_exit(&krdc->bmapmutex); - - if (!RDC_SUCCESS(sts)) { - return (-1); - } - - switch (header.magic) { - case RDC_HDR_MAGIC: - *statep = header.flags; - *hostidp = header.syshostid; - rdc_set_refcnt_ops(krdc, header.refcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - sts = 0; - break; - default: - sts = -1; - break; - } - - return (sts); -} - -int -rdc_clear_state(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - rdc_header_t header; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - bzero(&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (rdc_header_t); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_clear_state: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - - if (!RDC_SUCCESS(sts)) - return (-1); - else - return (0); -} - -void -rdc_write_state(rdc_u_info_t *urdc) -{ - rdc_k_info_t *krdc; - int sts; - rdc_header_t header; - - if (urdc == NULL) { - return; - } - - krdc = &rdc_k_info[urdc->index]; - - mutex_enter(&krdc->bmapmutex); - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return; - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return; - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return; - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, 0, (uchar_t *)&header, - sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += sizeof (header); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_state: %s read failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read failed"); - goto done; - } - - rdc_fill_header(urdc, &header); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += sizeof (header); - } - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_state: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - } - -done: - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); -} - - -struct bitmapdata { - uchar_t *data; - size_t len; -}; - -static int -rdc_read_bitmap(rdc_k_info_t *krdc, struct bitmapdata *data) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - if (data != NULL) { - data->data = kmem_alloc(krdc->bitmap_size, KM_SLEEP); - data->len = krdc->bitmap_size; - - if (data->data == NULL) { - return (-1); - } - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (data == NULL && krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_read_bitmap: %s reserve failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, NSC_RDBUF, RDC_BITMAP_FBA, - data ? data->data : krdc->dcio_bitmap, krdc->bitmap_size); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->reads++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nread += krdc->bitmap_size; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_read_bitmap: %s read failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "read failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - -int -rdc_write_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, RDC_BITMAP_FBA, - krdc->dcio_bitmap, krdc->bitmap_size); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += krdc->bitmap_size; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_bitmap: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - -int -rdc_write_bitmap_fba(rdc_k_info_t *krdc, nsc_off_t fba) -{ - rdc_u_info_t *urdc; - int sts; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->dcio_bitmap == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_fba: %s reserve failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - sts = rdc_ns_io(krdc->bitmapfd, rdc_wrflag, RDC_BITMAP_FBA + fba, - krdc->dcio_bitmap + fba * 512, 512); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += 512; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_fba: %s write failed", - urdc->primary.file); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - mutex_exit(&krdc->bmapmutex); - return (0); -} - - -static int -rdc_write_bitmap_pattern(rdc_k_info_t *krdc, const char pattern) -{ - rdc_u_info_t *urdc; - char *buffer; - nsc_buf_t *h; - nsc_vec_t *v; - int rc; - size_t i; - nsc_size_t len; - int off; - size_t buffer_size; - size_t iolen; - nsc_size_t fba_req; - nsc_off_t fba_len, fba_pos; - nsc_size_t maxfbas = 0; - nsc_size_t tocopy; - - if (krdc == NULL) { - return (-1); - } - - mutex_enter(&krdc->bmapmutex); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (krdc->bitmapfd == NULL) { - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reserve failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - buffer_size = FBA_SIZE(1); - ASSERT(buffer_size < INT32_MAX); - buffer = kmem_alloc(buffer_size, KM_SLEEP); - - for (i = 0; i < buffer_size; i++) { - buffer[i] = pattern; - } - - rc = nsc_maxfbas(krdc->bitmapfd, 0, &maxfbas); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_write_bitmap_pattern: maxfbas failed (%d)", rc); -#endif - maxfbas = 256; - } - - fba_req = FBA_LEN(krdc->bitmap_size); /* total FBAs left to copy */ - fba_pos = RDC_BITMAP_FBA; /* current FBA position */ - tocopy = krdc->bitmap_size; /* total bytes left to copy */ -loop: - h = NULL; - fba_len = min(fba_req, maxfbas); /* FBAs to alloc this time */ - - rc = nsc_alloc_buf(krdc->bitmapfd, fba_pos, fba_len, rdc_wrflag, &h); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_pattern: %s " - "write failed %d", urdc->primary.file, rc); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "nsc_alloc_buf failed"); - if (h) { - (void) nsc_free_handle(h); - } - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = -1; - goto finish; - } - - /* bytes to copy this time */ - len = min(tocopy, FBA_SIZE(fba_len)); - v = h->sb_vec; - off = 0; - - while (len) { - if (off >= v->sv_len) { - off = 0; - v++; - } - - if (v->sv_addr == 0 || v->sv_len == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_write_bitmap_pattern: ran off end of handle"); -#endif - break; - } - - iolen = (size_t)min(len, buffer_size); - - bcopy(buffer, (char *)(v->sv_addr + off), iolen); - off += iolen; - len -= iolen; - } - - rc = nsc_write(h, h->sb_pos, h->sb_len, 0); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_write_bitmap_pattern: " - "%s write failed %d", urdc->primary.file, rc); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "write failed"); - (void) nsc_free_buf(h); - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = -1; - goto finish; - } - - (void) nsc_free_buf(h); - - fba_pos += fba_len; - fba_req -= fba_len; - tocopy -= FBA_SIZE(fba_len); /* adjust byte length remaining */ - if (fba_req > 0) - goto loop; - - _rdc_rlse_devs(krdc, RDC_BMP); - mutex_exit(&krdc->bmapmutex); - rc = 0; -finish: - kmem_free(buffer, buffer_size); - return (rc); -} - - -/* - * rdc_write_bitmap_fill() - * - * Write a bitmap full of 1's out to disk without touching the - * in-memory bitmap. - */ -int -rdc_write_bitmap_fill(rdc_k_info_t *krdc) -{ - return (rdc_write_bitmap_pattern(krdc, 0xff)); -} - - -void -rdc_merge_bitmaps(rdc_k_info_t *src, rdc_k_info_t *dst) -{ - if (src->dcio_bitmap == NULL || dst->dcio_bitmap == NULL) - return; - - rdc_lor(src->dcio_bitmap, dst->dcio_bitmap, - min(src->bitmap_size, dst->bitmap_size)); - if (dst->bitmap_write > 0) - (void) rdc_write_bitmap(dst); -} - - -/* - * bitmap size in bytes, vol_size fba's - */ - -size_t -rdc_ref_size_possible(nsc_size_t bitmap_size, nsc_size_t vol_size) -{ - nsc_size_t ref_size; - nsc_size_t bitmap_end_fbas; - - bitmap_end_fbas = RDC_BITMAP_FBA + FBA_LEN(bitmap_size); - ref_size = FBA_LEN(bitmap_size * BITS_IN_BYTE * sizeof (unsigned char)); - if (bitmap_end_fbas + ref_size > vol_size) - return ((size_t)0); - - ref_size = FBA_LEN(bitmap_size * BITS_IN_BYTE * sizeof (unsigned int)); - if (bitmap_end_fbas + ref_size > vol_size) - return (sizeof (unsigned char)); - return (sizeof (unsigned int)); -} - -int -rdc_move_bitmap(rdc_k_info_t *krdc, char *newbitmap) -{ - rdc_u_info_t *urdc; - nsc_fd_t *oldfd; - nsc_fd_t *newfd = NULL; - rdc_header_t header; - int sts; - nsc_size_t vol_size; - nsc_size_t req_size; - size_t ref_size; - - if (krdc == NULL) { - return (-1); - } - - if (krdc->bitmapfd == NULL) { - return (-1); - } - - req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - if (RDC_IS_DISKQ(krdc->group)) { - /* new volume must support at least the old refcntsize */ - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - rdc_refcntsize(krdc)); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - } - - mutex_enter(&krdc->bmapmutex); - - if (rdc_read_header(krdc, &header) < 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_move_bitmap: Read old header failed"); -#endif - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - oldfd = krdc->bitmapfd; - - newfd = nsc_open(newbitmap, NSC_RDCHR_ID|NSC_FILE|NSC_RDWR, 0, 0, 0); - if (newfd == NULL) { - newfd = nsc_open(newbitmap, - NSC_RDCHR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, 0, 0, 0); - if (newfd == NULL) { - /* Can't open new bitmap */ - cmn_err(CE_WARN, - "!rdc_move_bitmap: Cannot open new bitmap %s", - newbitmap); - goto fail; - } - } - - sts = nsc_reserve(newfd, 0); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_move_bitmap: Reserve failed for %s", - newbitmap); - goto fail; - } - sts = nsc_partsize(newfd, &vol_size); - nsc_release(newfd); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: nsc_partsize failed for %s", newbitmap); - goto fail; - } - - ref_size = rdc_ref_size_possible(krdc->bitmap_size, vol_size); - - if (vol_size < req_size) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: bitmap %s too small: %" NSC_SZFMT - " vs %" NSC_SZFMT " blocks", newbitmap, vol_size, req_size); - goto fail; - } - - mutex_enter(&krdc->devices->id_rlock); - krdc->bitmapfd = newfd; /* swap under lock */ - if (krdc->bmaprsrv > 0) { - sts = nsc_reserve(krdc->bitmapfd, 0); - if (!RDC_SUCCESS(sts)) { - krdc->bitmapfd = oldfd; /* replace under lock */ - mutex_exit(&krdc->devices->id_rlock); - cmn_err(CE_WARN, - "!rdc_move_bitmap: Reserve failed for %s", - newbitmap); - goto fail; - } - } - rdc_set_refcnt_ops(krdc, ref_size); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - mutex_exit(&krdc->devices->id_rlock); - - /* Forget newfd now it is krdc->bitmapfd */ - newfd = NULL; - - /* Put new bitmap name into header and user-visible data structure */ - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(header.primary.bitmap, newbitmap, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, newbitmap, NSC_MAXPATH); - } else { - (void) strncpy(header.secondary.bitmap, newbitmap, NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, newbitmap, NSC_MAXPATH); - } - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: Write header %s failed", newbitmap); - goto fail; - } - - mutex_exit(&krdc->bmapmutex); - - if (rdc_write_bitmap(krdc) < 0) { - mutex_enter(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_move_bitmap: Write bitmap %s failed", newbitmap); - goto fail; - } - - /* Unintercept the old bitmap */ - if (krdc->b_tok) { - int rc; - - rdc_group_exit(krdc); - rc = nsc_unregister_path(krdc->b_tok, 0); - if (rc) - cmn_err(CE_WARN, "!rdc_move_bitmap: " - "unregister bitmap failed %d", rc); - else - krdc->b_tok = nsc_register_path(newbitmap, - NSC_CACHE | NSC_DEVICE, _rdc_io_hc); - rdc_group_enter(krdc); - } - - /* clear the old bitmap header */ - bzero(&header, sizeof (header)); - - sts = nsc_held(oldfd) ? 0 : nsc_reserve(oldfd, 0); - if (sts == 0) { - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - } - - sts = rdc_ns_io(oldfd, rdc_wrflag, 0, - (uchar_t *)&header, sizeof (header)); - - if (krdc->bmp_kstats) { - mutex_enter(krdc->bmp_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->bmp_kstats)); - mutex_exit(krdc->bmp_kstats->ks_lock); - KSTAT_IO_PTR(krdc->bmp_kstats)->writes++; - KSTAT_IO_PTR(krdc->bmp_kstats)->nwritten += - sizeof (header); - } - - } -#ifdef DEBUG - if (sts != 0) { - cmn_err(CE_WARN, - "!rdc_move_bitmap: unable to clear bitmap header on %s", - nsc_pathname(oldfd)); - } -#endif - - /* nsc_close will undo any reservation */ - if (nsc_close(oldfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_move_bitmap: close old bitmap failed"); -#else - ; - /*EMPTY*/ -#endif - } - - return (0); - -fail: - /* Close newfd if it was unused */ - if (newfd && newfd != krdc->bitmapfd) { - (void) nsc_close(newfd); - newfd = NULL; - } - - mutex_exit(&krdc->bmapmutex); - return (-1); -} - - -void -rdc_close_bitmap(rdc_k_info_t *krdc) -{ - - if (krdc == NULL) { - return; - } - - mutex_enter(&krdc->bmapmutex); - - if (krdc->bitmapfd) { - if (nsc_close(krdc->bitmapfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_close on bitmap failed"); -#else - ; - /*EMPTY*/ -#endif - } - krdc->bitmapfd = 0; - } - - mutex_exit(&krdc->bmapmutex); -} - -void -rdc_free_bitmap(rdc_k_info_t *krdc, int cmd) -{ - rdc_header_t header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (krdc == NULL) { - return; - } - - mutex_enter(&krdc->bmapmutex); - - if (cmd != RDC_CMD_SUSPEND) { - - bzero((char *)&header, sizeof (rdc_header_t)); - - if (krdc->bitmapfd) - (void) rdc_write_header(krdc, &header); - } else { - mutex_exit(&krdc->bmapmutex); - /* gotta drop mutex, in case q needs to fail */ - if (RDC_IS_DISKQ(krdc->group) && rdc_suspend_diskq(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_free_bitmap: diskq suspend failed"); - } - - mutex_enter(&krdc->bmapmutex); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_free_bitmap: Read header failed"); - } else { - rdc_fill_header(urdc, &header); - - (void) rdc_write_header(krdc, &header); - } - } - - mutex_exit(&krdc->bmapmutex); - - if (krdc->dcio_bitmap != NULL) { - if (cmd == RDC_CMD_SUSPEND) { - if (krdc->bitmapfd) - (void) rdc_write_bitmap(krdc); - } - - kmem_free(krdc->dcio_bitmap, krdc->bitmap_size); - krdc->dcio_bitmap = NULL; - } - if (krdc->bitmap_ref != NULL) { - kmem_free(krdc->bitmap_ref, (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - - krdc->bitmap_size = 0; -} - -static int -rdc_alloc_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - char *bitmapname; - nsc_size_t bitmap_ref_size; - - if (krdc == NULL) { - return (-1); - } - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - if (krdc->dcio_bitmap) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_bitmap: bitmap %s already allocated", - bitmapname); -#endif - return (0); - } - - if (urdc->volume_size == 0) - return (-1); - - krdc->bitmap_size = BMAP_LOG_BYTES(urdc->volume_size); - /* Round up */ - krdc->bitmap_size = (krdc->bitmap_size + 511) / 512 * 512; - - krdc->dcio_bitmap = (uchar_t *)kmem_zalloc(krdc->bitmap_size, - KM_SLEEP); - if (krdc->dcio_bitmap == NULL) { - cmn_err(CE_WARN, "!rdc_alloc_bitmap: alloc %" NSC_SZFMT - " failed for %s", krdc->bitmap_size, bitmapname); - return (-1); - } - - /* - * use largest ref count type size as we haven't opened the bitmap - * volume yet to find out what has acutally be used. - */ - bitmap_ref_size = krdc->bitmap_size * BITS_IN_BYTE * BMAP_REF_PREF_SIZE; - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - krdc->bitmap_ref = (uchar_t *)kmem_zalloc(bitmap_ref_size, - KM_SLEEP); - if (krdc->bitmap_ref == NULL) { - cmn_err(CE_WARN, - "!rdc_alloc_bitmap: ref alloc %" NSC_SZFMT - " failed for %s", - bitmap_ref_size, bitmapname); - return (-1); - } - } - - return (0); -} - - -static int -rdc_open_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - int sts; - uint_t hints = 0; - nsc_size_t vol_size; - char *bitmapname; - nsc_size_t req_size; - nsc_size_t bit_size; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - urdc->bits_set = 0; - - bit_size = req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - if (RDC_IS_DISKQ(krdc->group)) { - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - sizeof (unsigned char)); - } - - mutex_enter(&krdc->bmapmutex); - - rdc_set_refcnt_ops(krdc, sizeof (unsigned char)); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - if (krdc->bitmapfd == NULL) - krdc->bitmapfd = nsc_open(bitmapname, - NSC_RDCHR_ID|NSC_FILE|NSC_RDWR, 0, 0, 0); - if (krdc->bitmapfd == NULL) { - krdc->bitmapfd = nsc_open(bitmapname, - NSC_RDCHR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, 0, 0, 0); - if (krdc->bitmapfd == NULL) { - cmn_err(CE_WARN, "!rdc_open_bitmap: Unable to open %s", - bitmapname); - goto fail; - } - } - - sts = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_open_bitmap: Reserve failed for %s", - bitmapname); - goto fail; - } - sts = nsc_partsize(krdc->bitmapfd, &vol_size); - _rdc_rlse_devs(krdc, RDC_BMP); - - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, - "!rdc_open_bitmap: nsc_partsize failed for %s", bitmapname); - goto fail; - } - - if (vol_size < req_size) { - /* minimum size supports unsigned char reference counts */ - cmn_err(CE_WARN, - "!rdc_open_bitmap: bitmap %s too small: %" NSC_SZFMT " vs %" - NSC_SZFMT "blocks", - bitmapname, vol_size, req_size); - goto fail; - } - - if (rdc_bitmap_mode == RDC_BMP_NEVER) { - krdc->bitmap_write = 0; /* forced off */ - } else if (rdc_bitmap_mode == RDC_BMP_ALWAYS || - (nsc_node_hints(&hints) == 0 && (hints & NSC_FORCED_WRTHRU) == 0)) { - krdc->bitmap_write = 1; /* forced or autodetect on */ - } else { - /* autodetect off */ - krdc->bitmap_write = 0; - } - - mutex_exit(&krdc->bmapmutex); - if (RDC_IS_DISKQ(krdc->group) && (rdc_refcntsize(krdc) < - BMAP_REF_PREF_SIZE)) { - /* test for larger ref counts */ -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - req_size = bit_size; - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE * - sizeof (unsigned int)); - if (vol_size >= req_size) - rdc_set_refcnt_ops(krdc, sizeof (unsigned int)); - } -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - return (0); - -fail: - mutex_exit(&krdc->bmapmutex); - return (-1); -} - -int -rdc_enable_bitmap(rdc_k_info_t *krdc, int set) -{ - rdc_header_t header; - rdc_u_info_t *urdc; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - - if (rdc_alloc_bitmap(krdc) < 0) - goto fail; - - if (rdc_open_bitmap(krdc) < 0) - goto fail; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Read header %s failed", bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - - rdc_fill_header(urdc, &header); - rdc_set_refcnt_ops(krdc, (size_t)header.refcntsize); - - if (set) - (void) RDC_FILL_BITMAP(krdc, FALSE); - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Write header %s failed", - bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - - if (rdc_write_bitmap(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_enable_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - - return (0); - -fail: - rdc_free_bitmap(krdc, RDC_CMD_ENABLE); - rdc_close_bitmap(krdc); - - mutex_enter(&krdc->bmapmutex); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "I/O failed"); - mutex_exit(&krdc->bmapmutex); - return (-1); -} - -static int -_rdc_rdwr_refcnt(rdc_k_info_t *krdc, int rwflg) -{ - rdc_u_info_t *urdc; - int rc; - nsc_off_t offset; - nsc_size_t len; - - urdc = &rdc_u_info[krdc->index]; - -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!rdc_rdwr_refcnt: %s refcount for %s", - (rwflg == NSC_READ) ? "resuming" : "writing", - urdc->primary.bitmap); -#endif - ASSERT(MUTEX_HELD(QLOCK((&krdc->group->diskq)))); - mutex_enter(&krdc->bmapmutex); - - if (_rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL)) { - cmn_err(CE_WARN, "!rdc_rdwr_refcnt: reserve failed"); - goto fail; - } - - if (krdc->bitmap_size == 0) { - cmn_err(CE_WARN, "!rdc_rdwr_refcnt: NULL bitmap!"); - goto fail; - } - - offset = RDC_BITREF_FBA(krdc); - len = krdc->bitmap_size * BITS_IN_BYTE * rdc_refcntsize(krdc); - - rc = rdc_ns_io(krdc->bitmapfd, rwflg, offset, - (uchar_t *)krdc->bitmap_ref, len); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!unable to %s refcount from bitmap %s", - (rwflg == NSC_READ) ? "retrieve" : "write", - urdc->primary.bitmap); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "refcount I/O failed"); - goto fail; - } - - _rdc_rlse_devs(krdc, RDC_BMP); - - mutex_exit(&krdc->bmapmutex); - -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!rdc_rdwr_refcnt: %s refcount for %s", - (rwflg == NSC_READ) ? "resumed" : "wrote", - urdc->primary.bitmap); -#endif - return (0); - - fail: - _rdc_rlse_devs(krdc, RDC_BMP); - - mutex_exit(&krdc->bmapmutex); - - return (-1); - -} - -/* - * rdc_read_refcount - * read the stored refcount from disk - * queue lock is held - */ -int -rdc_read_refcount(rdc_k_info_t *krdc) -{ - int rc; - - rc = _rdc_rdwr_refcnt(krdc, NSC_READ); - - return (rc); -} - -/* - * rdc_write_refcount - * writes krdc->bitmap_ref to the diskq - * called with qlock held - */ -int -rdc_write_refcount(rdc_k_info_t *krdc) -{ - int rc; - - rc = _rdc_rdwr_refcnt(krdc, NSC_WRBUF); - - return (rc); -} - -static int -rdc_resume_state(rdc_k_info_t *krdc, const rdc_header_t *header) -{ - rdc_u_info_t *urdc; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - if (header->magic != RDC_HDR_MAGIC) { - cmn_err(CE_WARN, "!rdc_resume_state: Bad magic in %s", - bitmapname); - return (-1); - } - - if (strncmp(urdc->primary.file, header->primary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->primary.file, urdc->primary.file); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->secondary.file, header->secondary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->secondary.file, urdc->secondary.file); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->primary.bitmap, header->primary.bitmap, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->primary.bitmap, urdc->primary.bitmap); -#endif /* DEBUG */ - return (-1); - } - - if (strncmp(urdc->secondary.bitmap, header->secondary.bitmap, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_state: Found %s Expected %s", - header->secondary.bitmap, urdc->secondary.bitmap); -#endif /* DEBUG */ - return (-1); - } - - if (header->maxqfbas) - urdc->maxqfbas = header->maxqfbas; - - if (header->maxqitems) - urdc->maxqitems = header->maxqitems; - - if (header->autosync >= 0) - urdc->autosync = header->autosync; - - if (header->asyncthr) - urdc->asyncthr = header->asyncthr; - - rdc_many_enter(krdc); - rdc_set_refcnt_ops(krdc, header->refcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - if (header->flags & RDC_VOL_FAILED) - rdc_set_flags(urdc, RDC_VOL_FAILED); - if (header->flags & RDC_QUEUING) - rdc_set_flags(urdc, RDC_QUEUING); - - rdc_clr_flags(urdc, RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED); - rdc_set_mflags(urdc, (header->flags & RDC_RSYNC_NEEDED)); - rdc_set_flags(urdc, (header->flags & RDC_SYNC_NEEDED)); - rdc_many_exit(krdc); - - if (urdc->flags & RDC_VOL_FAILED) { - - /* Our disk was failed so set all the bits in the bitmap */ - - if (RDC_FILL_BITMAP(krdc, TRUE) != 0) { - cmn_err(CE_WARN, - "!rdc_resume_state: Fill bitmap %s failed", - bitmapname); - return (-1); - } - rdc_many_enter(krdc); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - rdc_many_exit(krdc); - } else { - /* Header was good, so read in the bitmap */ - - if (rdc_read_bitmap(krdc, NULL) < 0) { - cmn_err(CE_WARN, - "!rdc_resume_state: Read bitmap %s failed", - bitmapname); - return (-1); - } - - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - - /* - * Check if another node went down with bits set, but - * without setting logging mode. - */ - if (urdc->bits_set != 0 && - (rdc_get_vflags(urdc) & RDC_ENABLED) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_group_log(krdc, RDC_NOFLUSH | RDC_NOREMOTE, NULL); - } - } - - /* if we are using a disk queue, read in the reference count bits */ - if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - mutex_enter(QLOCK(q)); - if ((rdc_read_refcount(krdc) < 0)) { - cmn_err(CE_WARN, - "!rdc_resume_state: Resume bitmap %s's refcount" - "failed", - urdc->primary.bitmap); - mutex_exit(QLOCK(q)); - rdc_many_enter(krdc); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - rdc_many_exit(krdc); - return (-1); - } - mutex_exit(QLOCK(q)); - } - - return (0); -} - - -int -rdc_resume_bitmap(rdc_k_info_t *krdc) -{ - rdc_header_t header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - char *bitmapname; - - if (rdc_alloc_bitmap(krdc) < 0) - goto allocfail; - - if (rdc_open_bitmap(krdc) < 0) - goto fail; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Read header %s failed", bitmapname); - mutex_exit(&krdc->bmapmutex); - goto fail; - } - - mutex_exit(&krdc->bmapmutex); - - /* Resuming from the bitmap, so do some checking */ - - /*CONSTCOND*/ - ASSERT(FBA_LEN(sizeof (rdc_header_t)) <= RDC_BITMAP_FBA); - /*CONSTCOND*/ - ASSERT(sizeof (rdc_header_t) >= sizeof (rdc_headerv2_t)); - - if (header.magic == RDC_HDR_V2) { - rdc_headerv2_t *hdr_v2 = (rdc_headerv2_t *)&header; - rdc_header_t new_header; - -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Converting v2 header for bitmap %s", - bitmapname); -#endif - bzero((char *)&new_header, sizeof (rdc_header_t)); - - new_header.autosync = -1; - new_header.magic = RDC_HDR_MAGIC; - new_header.syshostid = urdc->syshostid; - - if (hdr_v2->volume_failed) - new_header.flags |= RDC_VOL_FAILED; - if (hdr_v2->sync_needed == RDC_SYNC) - new_header.flags |= RDC_SYNC_NEEDED; - if (hdr_v2->sync_needed == RDC_FULL_SYNC) - new_header.flags |= RDC_SYNC_NEEDED; - if (hdr_v2->sync_needed == RDC_REV_SYNC) - new_header.flags |= RDC_RSYNC_NEEDED; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(new_header.primary.file, - hdr_v2->filename, NSC_MAXPATH); - (void) strncpy(new_header.primary.bitmap, - hdr_v2->bitmapname, NSC_MAXPATH); - (void) strncpy(new_header.secondary.file, - urdc->secondary.file, NSC_MAXPATH); - (void) strncpy(new_header.secondary.bitmap, - urdc->secondary.bitmap, NSC_MAXPATH); - } else { - (void) strncpy(new_header.secondary.file, - hdr_v2->filename, NSC_MAXPATH); - (void) strncpy(new_header.secondary.bitmap, - hdr_v2->bitmapname, NSC_MAXPATH); - (void) strncpy(new_header.primary.file, - urdc->primary.file, NSC_MAXPATH); - (void) strncpy(new_header.primary.bitmap, - urdc->primary.bitmap, NSC_MAXPATH); - } - - bcopy(&new_header, &header, sizeof (rdc_header_t)); - - mutex_enter(&krdc->bmapmutex); - if (rdc_write_header(krdc, &header) < 0) { - mutex_exit(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Write header %s failed", - bitmapname); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - - } else if (header.magic == RDC_HDR_V3) { - /* - * just update asyncthr and magic, and then we're done - */ - header.magic = RDC_HDR_MAGIC; - header.asyncthr = RDC_ASYNCTHR; - mutex_enter(&krdc->bmapmutex); - if (rdc_write_header(krdc, &header) < 0) { - mutex_exit(&krdc->bmapmutex); - cmn_err(CE_WARN, - "!rdc_resume_bitmap: Write header %s failed", - bitmapname); - goto fail; - } - mutex_exit(&krdc->bmapmutex); - } - - if (rdc_resume_state(krdc, &header) == 0) - return (0); - - rdc_close_bitmap(krdc); - -fail: - (void) RDC_FILL_BITMAP(krdc, FALSE); - rdc_clr_flags(urdc, RDC_QUEUING); - if (krdc->bitmap_ref) - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - rdc_refcntsize(krdc)); - -allocfail: - mutex_enter(&krdc->bmapmutex); - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "resume bitmap failed"); - mutex_exit(&krdc->bmapmutex); - - return (-1); -} - -void -rdc_std_zero_bitref(rdc_k_info_t *krdc) -{ - nsc_size_t vol_size; - int sts; - size_t newrefcntsize; - - if (krdc->bitmap_ref) { - mutex_enter(&krdc->bmapmutex); - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - if (RDC_IS_DISKQ(krdc->group) && rdc_refcntsize(krdc) != - BMAP_REF_PREF_SIZE) { - /* see if we can upgrade the size of the ref counters */ - sts = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - if (!RDC_SUCCESS(sts)) { - goto nochange; - } - sts = nsc_partsize(krdc->bitmapfd, &vol_size); - - newrefcntsize = rdc_ref_size_possible(krdc->bitmap_size, - vol_size); - if (newrefcntsize > rdc_refcntsize(krdc)) { - rdc_set_refcnt_ops(krdc, newrefcntsize); -#ifdef DEBUG_REFCNT - cmn_err(CE_NOTE, "!sndr: refcntsize %d - %d:%s", - (int)rdc_refcntsize(krdc), __LINE__, __FILE__); -#endif - } -nochange: - _rdc_rlse_devs(krdc, RDC_BMP); - } - mutex_exit(&krdc->bmapmutex); - } -} - -int -rdc_reset_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_header_t header; - char *bitmapname; - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - bitmapname = &urdc->primary.bitmap[0]; - else - bitmapname = &urdc->secondary.bitmap[0]; - - mutex_enter(&krdc->bmapmutex); - - rdc_clr_flags(urdc, RDC_BMP_FAILED); - if (rdc_read_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Read header %s failed", bitmapname); - goto fail_with_mutex; - } - - rdc_fill_header(urdc, &header); - - if (rdc_write_header(krdc, &header) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write header %s failed", - bitmapname); - goto fail_with_mutex; - } - mutex_exit(&krdc->bmapmutex); - - if (krdc->bitmap_write == -1) - krdc->bitmap_write = 0; - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - krdc->bitmap_write = -1; - } else if (rdc_write_bitmap(krdc) < 0) { - cmn_err(CE_WARN, - "!rdc_reset_bitmap: Write bitmap %s failed", - bitmapname); - goto fail; - } - - return (0); - -fail: - mutex_enter(&krdc->bmapmutex); -fail_with_mutex: - rdc_set_flags_log(urdc, RDC_BMP_FAILED, "reset failed"); - mutex_exit(&krdc->bmapmutex); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: unable to reset bitmap for %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - return (-1); -} - - -/* - * General bitmap operations - */ - -/* - * rdc_set_bitmap_many() - * - * Used during reverse syncs to a 1-to-many primary to keep the 'many' - * bitmaps up to date. - */ -void -rdc_set_bitmap_many(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len) -{ - uint_t dummy; - -#ifdef DEBUG - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - cmn_err(CE_PANIC, "rdc_set_bitmap_many: not primary, urdc %p", - (void *) urdc); - } -#endif - - if (IS_MANY(krdc)) { - rdc_k_info_t *krd; - rdc_u_info_t *urd; - - rdc_many_enter(krdc); - - for (krd = krdc->many_next; krd != krdc; krd = krd->many_next) { - urd = &rdc_u_info[krd->index]; - if (!IS_ENABLED(urd)) - continue; - ASSERT(urd->flags & RDC_PRIMARY); - (void) RDC_SET_BITMAP(krd, pos, len, &dummy); - } - - rdc_many_exit(krdc); - } -} - - -static int -_rdc_net_bmap(const struct bmap6 *b6, net_bdata6 *bd6) -{ - rdc_k_info_t *krdc = &rdc_k_info[b6->cd]; - struct timeval t; - int e, ret; - uint64_t left; - uint64_t bmap_blksize; - - bmap_blksize = krdc->rpc_version < RDC_VERSION7 ? - BMAP_BLKSIZE : BMAP_BLKSIZEV7; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (bd6->data.data_val == NULL) { - return (EINVAL); - } - - left = b6->size; - bd6->endoblk = 0; - while (left) { - if (left >= bmap_blksize) - bd6->size = (int)bmap_blksize; - else - bd6->size = (int)left; - - bd6->data.data_len = bd6->size; - - if ((uint64_t)bd6->size > left) { - left = 0; - } else { - left -= bd6->size; - } - /* - * mark the last block sent. - */ - if (left == 0) { - bd6->endoblk = 1; - } - ASSERT(krdc->rpc_version); - if (krdc->rpc_version <= RDC_VERSION5) { - struct net_bdata bd; - bd.cd = bd6->cd; - bd.offset = bd6->offset; - bd.size = bd6->size; - bd.data.data_len = bd6->data.data_len; - bd.data.data_val = bd6->data.data_val; - e = rdc_clnt_call(krdc->lsrv, RDCPROC_BDATA, - krdc->rpc_version, xdr_net_bdata, (char *)&bd, - xdr_int, (char *)&ret, &t); - } else { - e = rdc_clnt_call(krdc->lsrv, RDCPROC_BDATA6, - krdc->rpc_version, xdr_net_bdata6, (char *)bd6, - xdr_int, (char *)&ret, &t); - } - if (e || ret) { - if (e) - ret = e; - return (ret); - } - bd6->offset += bmap_blksize; - bd6->data.data_val += bmap_blksize; - } - return (0); -} - - -/* - * Standard bitmap operations (combined kmem/disk bitmaps). - */ - -/* - * rdc_std_set_bitmask(pos, len, &bitmask) - * set a bitmask for this range. used to clear the correct - * bits after flushing - */ -static void -rdc_std_set_bitmask(const nsc_off_t fba_pos, const nsc_size_t fba_len, - uint_t *bitmask) -{ - int first, st, en; - if (bitmask) - *bitmask = 0; - else - return; - - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - while (st <= en) { - BMAP_BIT_SET((uchar_t *)bitmask, st - first); - st++; - } - -} -/* - * rdc_std_set_bitmap(krdc, fba_pos, fba_len, &bitmask) - * - * Mark modified segments in the dual copy file bitmap - * to provide fast recovery - * Note that bitmask allows for 32 segments, which at 32k per segment equals - * 1 megabyte. If we ever allow more than this to be transferred in one - * operation, or decrease the segment size, then this code will have to be - * changed accordingly. - */ - -static int -rdc_std_set_bitmap(rdc_k_info_t *krdc, const nsc_off_t fba_pos, - const nsc_size_t fba_len, uint_t *bitmask) -{ - int first, st, en; - int fbaset = 0; - nsc_off_t fba = 0; - int printerr = 10; - int tries = RDC_FUTILE_ATTEMPTS; - int queuing = RDC_QUEUING; - rdc_u_info_t *urdc; - - if (bitmask) - *bitmask = 0; - else - return (-1); - - urdc = &rdc_u_info[krdc->index]; - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return (-1); - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) < 0) - return (-1); - krdc->bitmap_write = -1; - } - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - ASSERT(st <= en); - while (st <= en) { - int use_ref; -again: - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_set_bitmap: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return (-1); - } - - use_ref = IS_PRIMARY(urdc) && IS_ASYNC(urdc) && - ((rdc_get_vflags(urdc) & RDC_QUEUING) || - !(rdc_get_vflags(urdc) & RDC_LOGGING)); - - - if (!BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - BMAP_BIT_SET(krdc->dcio_bitmap, st); - if (use_ref) { - ASSERT(BMAP_REF_ISSET(krdc, st) == - 0); - BMAP_REF_FORCE(krdc, st, 1); - } - BMAP_BIT_SET((uchar_t *)bitmask, st - first); - urdc->bits_set++; - if ((!fbaset) || fba != BIT_TO_FBA(st)) { - if (fbaset && krdc->bitmap_write > 0) { - mutex_exit(&krdc->bmapmutex); - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return (-1); - mutex_enter(&krdc->bmapmutex); - } - fba = BIT_TO_FBA(st); - fbaset = 1; - } - } else { - /* - * Just bump reference count - * For logging or syncing we do not care what the reference - * is as it will be forced back on the state transition. - */ - if (use_ref) { - if (BMAP_REF_ISSET(krdc, st) == - BMAP_REF_MAXVAL(krdc)) { - /* - * Rollover of reference count. - */ - - if (!(rdc_get_vflags(urdc) & - RDC_VOL_FAILED)) { - /* - * Impose throttle to help dump - * queue - */ - mutex_exit(&krdc->bmapmutex); - delay(4); - rdc_bitmap_delay++; - if (printerr--) { - cmn_err(CE_WARN, "!SNDR: bitmap reference count maxed out for %s:%s", - urdc->secondary.intf, urdc->secondary.file); - - } - - if ((tries-- <= 0) && - IS_STATE(urdc, queuing)) { - cmn_err(CE_WARN, "!SNDR: giving up on reference count, logging set" - " %s:%s", urdc->secondary.intf, urdc->secondary.file); - rdc_group_enter(krdc); - rdc_group_log(krdc, - RDC_NOFLUSH | - RDC_NOREMOTE| - RDC_FORCE_GROUP, - "ref count retry limit exceeded"); - rdc_group_exit(krdc); - } - goto again; - } - } else { - BMAP_REF_SET(krdc, st); - } - } - } - mutex_exit(&krdc->bmapmutex); - st++; - } - if (fbaset && krdc->bitmap_write > 0) { - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return (-1); - } - return (0); -} - -static void -rdc_std_clr_bitmap(rdc_k_info_t *krdc, const nsc_off_t fba_pos, - const nsc_size_t fba_len, const uint_t bitmask, const int force) -{ - int first, st, en; - nsc_off_t fba = 0; - int fbaset = 0; - uint_t bm = bitmask; - uchar_t *ptr = (uchar_t *)&bm; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return; - - first = st = FBA_TO_LOG_NUM(fba_pos); - en = FBA_TO_LOG_NUM(fba_pos + fba_len - 1); - ASSERT(st <= en); - while (st <= en) { - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_clr_bitmap: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return; - } - - if (((bitmask == 0xffffffff) || - (BMAP_BIT_ISSET(ptr, st - first))) && - BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - - int use_ref = IS_PRIMARY(urdc) && IS_ASYNC(urdc) && - ((rdc_get_vflags(urdc) & RDC_QUEUING) || - !(rdc_get_vflags(urdc) & RDC_LOGGING)); - - if (force || (use_ref == 0)) { - if (krdc->bitmap_ref) - BMAP_REF_FORCE(krdc, st, 0); - } else if (use_ref) { - if (BMAP_REF_ISSET(krdc, st) != 0) - BMAP_REF_CLR(krdc, st); - - } - - if ((use_ref == 0) || (use_ref && - !BMAP_REF_ISSET(krdc, st))) { - BMAP_BIT_CLR(krdc->dcio_bitmap, st); - - urdc->bits_set--; - if (!fbaset || fba != BIT_TO_FBA(st)) { - if (fbaset && - krdc->bitmap_write > 0) { - mutex_exit(&krdc->bmapmutex); - if (rdc_write_bitmap_fba(krdc, - fba) < 0) - return; - mutex_enter(&krdc->bmapmutex); - } - fba = BIT_TO_FBA(st); - fbaset = 1; - } - } - } - mutex_exit(&krdc->bmapmutex); - st++; - } - if (fbaset && krdc->bitmap_write > 0) { - if (rdc_write_bitmap_fba(krdc, fba) < 0) - return; - } -} - -/* - * make sure that this bit is set. if it isn't, set it - * used when transitioning from async to sync while going - * from rep to log. an overlapping sync write may unconditionally - * clear the bit that has not been replicated. when the queue - * is being dumped or this is called just to make sure pending stuff - * is in the bitmap - */ -void -rdc_std_check_bit(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len) -{ - int st; - int en; - nsc_off_t fba; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - st = FBA_TO_LOG_NUM(pos); - en = FBA_TO_LOG_NUM(pos + len - 1); - - if (rdc_get_vflags(urdc) & RDC_BMP_FAILED) - return; - - while (st <= en) { - mutex_enter(&krdc->bmapmutex); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_std_check_bit: " - "recovery bitmaps not allocated"); -#endif - mutex_exit(&krdc->bmapmutex); - return; - } - - if (!BMAP_BIT_ISSET(krdc->dcio_bitmap, st)) { - BMAP_BIT_SET(krdc->dcio_bitmap, st); - if (krdc->bitmap_write > 0) { - fba = BIT_TO_FBA(st); - mutex_exit(&krdc->bmapmutex); - (void) rdc_write_bitmap_fba(krdc, fba); - mutex_enter(&krdc->bmapmutex); - } - urdc->bits_set++; - - } - mutex_exit(&krdc->bmapmutex); - st++; - } - -} - -/* - * rdc_std_count_dirty(krdc): - * - * Determine the number of segments that need to be flushed, This should - * agree with the number of segments logged, but since we don't lock when - * we increment, we force these values to agree - */ -static int -rdc_std_count_dirty(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, count, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_count_dirty: no bitmap configured for %s", - urdc->primary.file); -#endif - return (0); - } - - count = 0; - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - if (BMAP_BIT_ISSET(krdc->dcio_bitmap, i)) - count++; - - if (count > size) - count = size; - - return (count); -} - - -static int -rdc_std_bit_isset(rdc_k_info_t *krdc, const int bit) -{ - return (BMAP_BIT_ISSET(krdc->dcio_bitmap, bit)); -} - - -/* - * rdc_std_fill_bitmap(krdc, write) - * - * Called to force bitmaps to a fully dirty state - */ -static int -rdc_std_fill_bitmap(rdc_k_info_t *krdc, const int write) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_fill_bitmap: no bitmap configured for %s", - urdc->primary.file); -#endif - return (-1); - } - - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - BMAP_BIT_SET(krdc->dcio_bitmap, i); - - urdc->bits_set = size; - - if (write) - return (rdc_write_bitmap(krdc)); - - return (0); -} - - -/* - * rdc_std_zero_bitmap(krdc) - * - * Called on the secondary after a sync has completed to force bitmaps - * to a fully clean state - */ -static void -rdc_std_zero_bitmap(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int i, size; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_std_zero_bitmap: no bitmap configured for %s", - urdc->primary.file); -#endif - return; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!Clearing bitmap for %s", urdc->secondary.file); -#endif - - ASSERT(urdc->volume_size != 0); - size = FBA_TO_LOG_LEN(urdc->volume_size); - for (i = 0; i < size; i++) - BMAP_BIT_CLR(krdc->dcio_bitmap, i); - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); - - urdc->bits_set = 0; -} - - -/* - * rdc_std_net_bmap() - * - * WARNING acts as both client and server - */ -static int -rdc_std_net_bmap(const struct bmap6 *b) -{ - rdc_k_info_t *krdc = &rdc_k_info[b->cd]; - struct net_bdata6 bd; - - bd.data.data_val = (char *)krdc->dcio_bitmap; - bd.cd = b->dual; - bd.offset = 0; - - return (_rdc_net_bmap(b, &bd)); -} - - -/* - * rdc_std_net_bdata - */ -static int -rdc_std_net_bdata(const struct net_bdata6 *bd) -{ - rdc_k_info_t *krdc = &rdc_k_info[bd->cd]; - - rdc_lor((uchar_t *)bd->data.data_val, - (uchar_t *)(((char *)krdc->dcio_bitmap) + bd->offset), bd->size); - - return (0); -} - - -static struct rdc_bitmap_ops rdc_std_bitmap_ops = { - rdc_std_set_bitmap, - rdc_std_clr_bitmap, - rdc_std_count_dirty, - rdc_std_bit_isset, - rdc_std_fill_bitmap, - rdc_std_zero_bitmap, - rdc_std_net_bmap, - rdc_std_net_bdata, - rdc_std_zero_bitref, - rdc_std_set_bitmask, - rdc_std_check_bit -}; - - -void -rdc_bitmap_init() -{ - rdc_bitmap_ops = &rdc_std_bitmap_ops; - rdc_wrflag = NSC_WRITE; -} - -static void -rdc_bmap_ref_byte_set(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind]++; -} - -static void -rdc_bmap_ref_byte_clr(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind]--; -} - -static unsigned int -rdc_bmap_ref_byte_isset(rdc_k_info_t *krdc, int ind) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - return ((unsigned int)(bmap[ind])); -} - -static void -rdc_bmap_ref_byte_force(rdc_k_info_t *krdc, int ind, unsigned int val) -{ - unsigned char *bmap = (unsigned char *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - bmap[ind] = (unsigned char) val; -} - -/* ARGSUSED */ -static unsigned int -rdc_bmap_ref_byte_maxval(rdc_k_info_t *krdc) -{ - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned char)); - return ((unsigned int)(UINT8_MAX)); -} - -struct bm_ref_ops rdc_ref_byte_ops = { - rdc_bmap_ref_byte_set, - rdc_bmap_ref_byte_clr, - rdc_bmap_ref_byte_isset, - rdc_bmap_ref_byte_force, - rdc_bmap_ref_byte_maxval, - sizeof (unsigned char) -}; - -static void -rdc_bmap_ref_int_set(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind]++; -} - -static void -rdc_bmap_ref_int_clr(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind]--; -} - -static unsigned int -rdc_bmap_ref_int_isset(rdc_k_info_t *krdc, int ind) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - return ((bmap[ind])); -} - -static void -rdc_bmap_ref_int_force(rdc_k_info_t *krdc, int ind, unsigned int val) -{ - unsigned int *bmap = (unsigned int *)krdc->bitmap_ref; - - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - bmap[ind] = val; -} - -/* ARGSUSED */ -static unsigned int -rdc_bmap_ref_int_maxval(rdc_k_info_t *krdc) -{ - ASSERT(BMAP_REF_SIZE(krdc) == sizeof (unsigned int)); - return ((unsigned int)(UINT_MAX)); -} - -struct bm_ref_ops rdc_ref_int_ops = { - rdc_bmap_ref_int_set, - rdc_bmap_ref_int_clr, - rdc_bmap_ref_int_isset, - rdc_bmap_ref_int_force, - rdc_bmap_ref_int_maxval, - sizeof (unsigned int) -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h b/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h deleted file mode 100644 index 72ee2b791a..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_bitmap.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_BITMAP_H -#define _RDC_BITMAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -extern int rdc_bitmap_mode; /* property from rdc.conf */ - -/* - * Possible values of rdc_bitmap_mode - integer flag. - */ -#define RDC_BMP_AUTO 0x0 /* auto detect bitmap mode */ -#define RDC_BMP_ALWAYS 0x1 /* always write the bitmap */ -#define RDC_BMP_NEVER 0x2 /* never write the bitmap */ - -#endif /* _KERNEL */ - -/* - * Public bitmap interface - * The bitmaps are maintained on 32 Kbyte segments - */ - -#define LOG_SHFT 15 -#define IND_BYTE(ind) ((ind) >> 3) -#define IND_BIT(ind) (1 << ((ind) & 0x7)) - -#define FBA_LOG_SHFT (LOG_SHFT - FBA_SHFT) -#define FBA_TO_LOG_NUM(x) ((x) >> FBA_LOG_SHFT) -#define LOG_TO_FBA_NUM(x) ((x) << FBA_LOG_SHFT) -#define FBA_TO_LOG_LEN(x) (FBA_TO_LOG_NUM((x)-1) + 1) - -#define BMAP_LOG_BYTES(fbas) (IND_BYTE(FBA_TO_LOG_NUM((fbas)-1))+1) - -#define BITS_IN_BYTE 8 - -/* - * Private macros for bitmap manipulation - */ - -#define BMAP_BIT_SET(bmap, ind) ((bmap)[IND_BYTE(ind)] |= IND_BIT(ind)) -#define BMAP_BIT_CLR(bmap, ind) ((bmap)[IND_BYTE(ind)] &= ~IND_BIT(ind)) -#define BMAP_BIT_ISSET(bmap, ind) \ - ((bmap)[IND_BYTE(ind)] & IND_BIT(ind)) - -#define BIT_TO_FBA(b) (FBA_NUM(b) >> 3) - -#define BMAP_REF_SET(krdc, ind) (((krdc)->bm_refs->bmap_ref_set)(krdc, ind)) -#define BMAP_REF_CLR(krdc, ind) (((krdc)->bm_refs->bmap_ref_clr)(krdc, ind)) -#define BMAP_REF_ISSET(krdc, ind) (((krdc)->bm_refs->bmap_ref_isset)(krdc, ind)) -#define BMAP_REF_FORCE(krdc, ind, val) \ - (((krdc)->bm_refs->bmap_ref_force)(krdc, ind, val)) -#define BMAP_REF_MAXVAL(krdc) (((krdc)->bm_refs->bmap_ref_maxval)(krdc)) -#define BMAP_REF_SIZE(krdc) ((krdc)->bm_refs->bmap_ref_size) -#define BMAP_REF_PREF_SIZE (sizeof (unsigned int)) - -#ifndef _KERNEL - -struct bm_ref_ops { - void (*bmap_ref_set)(void *, int); - void (*bmap_ref_clr)(void *, int); - unsigned int (*bmap_ref_isset)(void *, int); - void (*bmap_ref_force)(void *, int, unsigned int); - unsigned int (*bmap_ref_maxval)(void *); - size_t bmap_ref_size; -}; - -#else - -struct bm_ref_ops { - void (*bmap_ref_set)(rdc_k_info_t *, int); - void (*bmap_ref_clr)(rdc_k_info_t *, int); - unsigned int (*bmap_ref_isset)(rdc_k_info_t *, int); - void (*bmap_ref_force)(rdc_k_info_t *, int, unsigned int); - unsigned int (*bmap_ref_maxval)(rdc_k_info_t *); - size_t bmap_ref_size; -}; - - -/* convert fba to block number */ -#define _BNUM(x) (FBA_TO_LOG_NUM(x)) - -/* force reference clear during sync */ -#define RDC_BIT_BUMP 0x0 -#define RDC_BIT_FORCE 0x1 -#define RDC_BIT_FLUSHER 0x2 - -/* check for overlap, taking account of blocking factor */ -#define RDC_OVERLAP(p1, l1, p2, l2) \ - ((_BNUM(((p1) + (l1) - 1)) >= _BNUM((p2))) && \ - (_BNUM((p1)) <= _BNUM(((p2) + (l2) - 1)))) - -struct rdc_bitmap_ops { - int (*set_bitmap)(rdc_k_info_t *, const nsc_off_t, const nsc_size_t, - uint_t *); - void (*clr_bitmap)(rdc_k_info_t *, const nsc_off_t, const nsc_size_t, - const uint_t, const int); - int (*count_dirty)(rdc_k_info_t *); - int (*bit_isset)(rdc_k_info_t *, const int); - int (*fill_bitmap)(rdc_k_info_t *, const int); - void (*zero_bitmap)(rdc_k_info_t *); - int (*net_bmap)(const struct bmap6 *); - int (*net_b_data)(const struct net_bdata6 *); - void (*zero_bitref)(rdc_k_info_t *); - void (*set_bitmask)(const nsc_off_t, const nsc_size_t, uint_t *); - void (*check_bit)(rdc_k_info_t *, nsc_off_t, nsc_size_t); -}; - -extern struct rdc_bitmap_ops *rdc_bitmap_ops; - -#define RDC_SET_BITMAP(krdc, pos, len, bitmaskp) \ - (*rdc_bitmap_ops->set_bitmap)(krdc, pos, len, bitmaskp) -#define RDC_CLR_BITMAP(krdc, pos, len, bitmask, flag) \ - (*rdc_bitmap_ops->clr_bitmap)(krdc, pos, len, bitmask, flag) -#define RDC_COUNT_BITMAP(krdc) \ - (*rdc_bitmap_ops->count_dirty)(krdc) -#define RDC_BIT_ISSET(krdc, bit) \ - (*rdc_bitmap_ops->bit_isset)(krdc, bit) -#define RDC_FILL_BITMAP(krdc, write) \ - (*rdc_bitmap_ops->fill_bitmap)(krdc, write) -#define RDC_ZERO_BITMAP(krdc) \ - (*rdc_bitmap_ops->zero_bitmap)(krdc) -#define RDC_SEND_BITMAP(argp) \ - (*rdc_bitmap_ops->net_bmap)(argp) -#define RDC_OR_BITMAP(argp) \ - (*rdc_bitmap_ops->net_b_data)(argp) -#define RDC_ZERO_BITREF(krdc) \ - (*rdc_bitmap_ops->zero_bitref)(krdc) -#define RDC_SET_BITMASK(off, len, maskp) \ - (*rdc_bitmap_ops->set_bitmask)(off, len, maskp) -#define RDC_CHECK_BIT(krdc, pos, len) \ - (*rdc_bitmap_ops->check_bit)(krdc, pos, len) - -/* - * Functions - */ - -extern void rdc_bitmap_init(void); -extern int rdc_move_bitmap(rdc_k_info_t *, char *); -extern int rdc_enable_bitmap(rdc_k_info_t *, int); -extern int rdc_resume_bitmap(rdc_k_info_t *); -extern int rdc_reset_bitmap(rdc_k_info_t *); -extern void rdc_free_bitmap(rdc_k_info_t *, int); -extern void rdc_close_bitmap(rdc_k_info_t *); -extern int rdc_write_bitmap(rdc_k_info_t *); -extern int rdc_write_bitmap_fill(rdc_k_info_t *); -extern void rdc_set_bitmap_many(rdc_k_info_t *, nsc_off_t, nsc_size_t); -extern void rdc_merge_bitmaps(rdc_k_info_t *, rdc_k_info_t *); - -extern int rdc_read_state(rdc_k_info_t *, int *, int *); -extern int rdc_clear_state(rdc_k_info_t *); -extern void rdc_write_state(rdc_u_info_t *); -extern int rdc_ns_io(nsc_fd_t *, int, nsc_off_t, uchar_t *, nsc_size_t); -extern int rdc_read_refcount(rdc_k_info_t *); -extern int rdc_write_refcount(rdc_k_info_t *); -extern size_t rdc_refcntsize(rdc_k_info_t *); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_BITMAP_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c b/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c deleted file mode 100644 index 971cb09ec0..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.c +++ /dev/null @@ -1,3381 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Network data replicator Client side */ - - -#include <sys/types.h> -#include <sys/debug.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/byteorder.h> -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#endif -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_clnt.h" -#include "rdc_bitmap.h" -#include "rdc_diskq.h" - - -kmutex_t rdc_clnt_lock; - -#ifdef DEBUG -int noflush = 0; -#endif - -int rdc_rpc_tmout = RDC_CLNT_TMOUT; -static void rdc_clnt_free(struct chtab *, CLIENT *); -static void _rdc_remote_flush(rdc_aio_t *); - -void rdc_flush_memq(int index); -void rdc_flush_diskq(int index); -int rdc_drain_net_queue(int index); -void rdc_flusher_thread(int index); -int rdc_diskq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *); -void rdc_init_diskq_header(rdc_group_t *grp, dqheader *hd); -void rdc_dump_iohdrs(disk_queue *dq); -rdc_aio_t *rdc_dequeue(rdc_k_info_t *krdc, int *rc); -void rdc_clr_iohdr(rdc_k_info_t *krdc, nsc_off_t qpos); -void rdc_close_diskq(rdc_group_t *krdc); - -int rdc_writer(int index); - -static struct chtab *rdc_chtable = NULL; -static int rdc_clnt_toomany; -#ifdef DEBUG -static int rdc_ooreply; -#endif - -extern void rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int flag); -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -static enum clnt_stat -cl_call_sig(struct __client *rh, rpcproc_t proc, - xdrproc_t xargs, caddr_t argsp, xdrproc_t xres, - caddr_t resp, struct timeval secs) -{ - enum clnt_stat stat; - k_sigset_t smask; - sigintr(&smask, 0); - rh->cl_nosignal = TRUE; - stat = ((*(rh)->cl_ops->cl_call)\ - (rh, proc, xargs, argsp, xres, resp, secs)); - rh->cl_nosignal = FALSE; - sigunintr(&smask); - return (stat); -} - -int -rdc_net_getsize(int index, uint64_t *sizeptr) -{ - struct timeval t; - int err, size; - rdc_k_info_t *krdc = &rdc_k_info[index]; - int remote_index = krdc->remote_index; - - *sizeptr = 0; - if (krdc->remote_index < 0) - return (EINVAL); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_getsize: null intf for index %d", index); -#endif - if (krdc->rpc_version <= RDC_VERSION5) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSIZE, - krdc->rpc_version, xdr_int, (char *)&remote_index, - xdr_int, (char *)&size, &t); - if (err == 0) - *sizeptr = size; - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSIZE6, - krdc->rpc_version, xdr_int, (char *)&remote_index, - xdr_u_longlong_t, (char *)sizeptr, &t); - } - return (err); -} - - -int -rdc_net_state(int index, int options) -{ - struct timeval t; - int err; - int remote_index = -1; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_k_info_t *krdc = &rdc_k_info[index]; - struct set_state s; - struct set_state4 s4; - char neta[32], rneta[32]; - unsigned short *sp; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (krdc->rpc_version < RDC_VERSION7) { - s4.netaddrlen = urdc->primary.addr.len; - s4.rnetaddrlen = urdc->secondary.addr.len; - bcopy(urdc->primary.addr.buf, s4.netaddr, s4.netaddrlen); - bcopy(urdc->secondary.addr.buf, s4.rnetaddr, s4.rnetaddrlen); - (void) strncpy(s4.pfile, urdc->primary.file, RDC_MAXNAMLEN); - (void) strncpy(s4.sfile, urdc->secondary.file, RDC_MAXNAMLEN); - s4.flag = options; - - err = rdc_clnt_call(krdc->lsrv, RDCPROC_STATE, - krdc->rpc_version, xdr_set_state4, (char *)&s4, xdr_int, - (char *)&remote_index, &t); - } else { - s.netaddrlen = urdc->primary.addr.len; - s.rnetaddrlen = urdc->secondary.addr.len; - s.netaddr.buf = neta; - s.rnetaddr.buf = rneta; - bcopy(urdc->primary.addr.buf, s.netaddr.buf, s.netaddrlen); - bcopy(urdc->secondary.addr.buf, s.rnetaddr.buf, s.rnetaddrlen); - s.netaddr.len = urdc->primary.addr.len; - s.rnetaddr.len = urdc->secondary.addr.len; - s.netaddr.maxlen = urdc->primary.addr.len; - s.rnetaddr.maxlen = urdc->secondary.addr.len; - sp = (unsigned short *)s.netaddr.buf; - *sp = htons(*sp); - sp = (unsigned short *)s.rnetaddr.buf; - *sp = htons(*sp); - s.pfile = urdc->primary.file; - s.sfile = urdc->secondary.file; - s.flag = options; - - err = rdc_clnt_call(krdc->lsrv, RDCPROC_STATE, - krdc->rpc_version, xdr_set_state, (char *)&s, xdr_int, - (char *)&remote_index, &t); - } - - if (err) - return (-1); - else - return (remote_index); -} - - -/* - * rdc_net_getbmap - * gets the bitmaps from remote side and or's them with remote bitmap - */ -int -rdc_net_getbmap(int index, int size) -{ - struct timeval t; - int err; - struct bmap b; - struct bmap6 b6; - rdc_k_info_t *krdc; - - krdc = &rdc_k_info[index]; - - if (krdc->remote_index < 0) - return (EINVAL); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_getbmap: null intf for index %d", index); -#endif - - if (krdc->rpc_version <= RDC_VERSION5) { - b.cd = krdc->remote_index; - b.dual = index; - b.size = size; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_BMAP, - krdc->rpc_version, xdr_bmap, (char *)&b, xdr_int, - (char *)&err, &t); - - } else { - b6.cd = krdc->remote_index; - b6.dual = index; - b6.size = size; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_BMAP6, - krdc->rpc_version, xdr_bmap6, (char *)&b6, xdr_int, - (char *)&err, &t); - } - return (err); -} - -int sndr_proto = 0; - -/* - * return state corresponding to rdc_host - */ -int -rdc_net_getstate(rdc_k_info_t *krdc, int *serial_mode, int *use_mirror, - int *mirror_down, int network) -{ - int err; - struct timeval t; - int state; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - struct set_state s; -#ifdef sparc - struct set_state4 s4; -#endif - char neta[32]; - char rneta[32]; - unsigned short *sp; - char *setp = (char *)&s; - xdrproc_t xdr_proc = xdr_set_state; - - if (krdc->lsrv && (krdc->intf == NULL || krdc->intf->if_down) && - network) /* fail fast */ - return (-1); - - s.netaddrlen = urdc->primary.addr.len; - s.rnetaddrlen = urdc->secondary.addr.len; - s.pfile = urdc->primary.file; - s.sfile = urdc->secondary.file; - s.netaddr.buf = neta; - s.rnetaddr.buf = rneta; - bcopy(urdc->primary.addr.buf, s.netaddr.buf, s.netaddrlen); - bcopy(urdc->secondary.addr.buf, s.rnetaddr.buf, s.rnetaddrlen); - sp = (unsigned short *) s.netaddr.buf; - *sp = htons(*sp); - sp = (unsigned short *) s.rnetaddr.buf; - *sp = htons(*sp); - s.netaddr.len = urdc->primary.addr.len; - s.rnetaddr.len = urdc->secondary.addr.len; - s.netaddr.maxlen = urdc->primary.addr.maxlen; - s.rnetaddr.maxlen = urdc->secondary.addr.maxlen; - s.flag = 0; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (sndr_proto) - krdc->rpc_version = sndr_proto; - else - krdc->rpc_version = RDC_VERS_MAX; - -again: - err = rdc_clnt_call(krdc->lsrv, RDCPROC_GETSTATE4, krdc->rpc_version, - xdr_proc, setp, xdr_int, (char *)&state, &t); - - if (err == RPC_PROGVERSMISMATCH && (krdc->rpc_version != - RDC_VERS_MIN)) { - if (krdc->rpc_version-- == RDC_VERSION7) { - /* set_state struct changed with v7 of protocol */ -#ifdef sparc - s4.netaddrlen = urdc->primary.addr.len; - s4.rnetaddrlen = urdc->secondary.addr.len; - bcopy(urdc->primary.addr.buf, s4.netaddr, - s4.netaddrlen); - bcopy(urdc->secondary.addr.buf, s4.rnetaddr, - s4.rnetaddrlen); - (void) strncpy(s4.pfile, urdc->primary.file, - RDC_MAXNAMLEN); - (void) strncpy(s4.sfile, urdc->secondary.file, - RDC_MAXNAMLEN); - s4.flag = 0; - xdr_proc = xdr_set_state4; - setp = (char *)&s4; -#else - /* x64 can not use protocols < 7 */ - return (-1); -#endif - } - goto again; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!sndr get_state: Protocol ver %d", krdc->rpc_version); -#endif - - if (err) { - return (-1); - } - - if (state == -1) - return (-1); - - if (serial_mode) - *serial_mode = (state >> 2) & 1; - if (use_mirror) - *use_mirror = (state >> 1) & 1; - if (mirror_down) - *mirror_down = state & 1; - - return (0); -} - - -static struct xdr_discrim rdres_discrim[2] = { - { (int)RDC_OK, xdr_readok }, - { __dontcare__, NULL_xdrproc_t } -}; - - -/* - * Reply from remote read (client side) - */ -static bool_t -xdr_rdresult(XDR *xdrs, readres *rr) -{ - - return (xdr_union(xdrs, (enum_t *)&(rr->rr_status), - (caddr_t)&(rr->rr_ok), rdres_discrim, xdr_void)); -} - -static int -rdc_rrstatus_decode(int status) -{ - int ret = 0; - - if (status != RDC_OK) { - switch (status) { - case RDCERR_NOENT: - ret = ENOENT; - break; - case RDCERR_NOMEM: - ret = ENOMEM; - break; - default: - ret = EIO; - break; - } - } - - return (ret); -} - - -int -rdc_net_read(int local_index, int remote_index, nsc_buf_t *handle, - nsc_off_t fba_pos, nsc_size_t fba_len) -{ - struct rdcrdresult rr; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct rread list; - struct rread6 list6; - struct timeval t; - uchar_t *sv_addr; - nsc_vec_t *vec; - int rpc_flag; - nsc_size_t sv_len; - int err; - int ret; - nsc_size_t len; - nsc_size_t maxfbas; - int transflag; - - if (handle == NULL) - return (EINVAL); - - if (!RDC_HANDLE_LIMITS(handle, fba_pos, fba_len)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: handle bounds"); -#endif - return (EINVAL); - } - - krdc = &rdc_k_info[local_index]; - urdc = &rdc_u_info[local_index]; - - maxfbas = MAX_RDC_FBAS; - - if (krdc->remote_fd && !(rdc_get_vflags(urdc) & RDC_FCAL_FAILED)) { - nsc_buf_t *remote_h = NULL; - int reserved = 0; - - ret = nsc_reserve(krdc->remote_fd, NSC_MULTI); - if (RDC_SUCCESS(ret)) { - reserved = 1; - ret = nsc_alloc_buf(krdc->remote_fd, fba_pos, fba_len, - NSC_RDBUF, &remote_h); - } - if (RDC_SUCCESS(ret)) { - ret = nsc_copy(remote_h, handle, fba_pos, fba_pos, - fba_len); - if (RDC_SUCCESS(ret)) { - (void) nsc_free_buf(remote_h); - nsc_release(krdc->remote_fd); - return (0); - } - } - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_FCAL_FAILED); - rdc_group_exit(krdc); - if (remote_h) - (void) nsc_free_buf(remote_h); - if (reserved) - nsc_release(krdc->remote_fd); - } - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (rdc_get_vflags(urdc) & RDC_VOL_FAILED) - rpc_flag = RDC_RREAD_FAIL; - else - rpc_flag = 0; - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_read: null intf for index %d", local_index); -#endif - /* - * switch on proto version. - */ - len = fba_len; /* length (FBAs) still to xfer */ - rr.rr_bufsize = 0; /* rpc data buffer length (bytes) */ - rr.rr_data = NULL; /* rpc data buffer */ - transflag = rpc_flag | RDC_RREAD_START; /* setup rpc */ - if (krdc->rpc_version <= RDC_VERSION5) { - ASSERT(fba_pos <= INT32_MAX); - list.pos = (int)fba_pos; /* fba position of start of chunk */ - list.cd = remote_index; /* remote end cd */ - /* send setup rpc */ - list.flag = transflag; - ASSERT(len <= INT32_MAX); - list.len = (int)len; /* total fba length */ - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - krdc->rpc_version, xdr_rread, (char *)&list, xdr_int, - (char *)&ret, &t); - - } else { - list6.pos = fba_pos; /* fba position of start of chunk */ - list6.cd = remote_index; /* remote end cd */ - /* send setup rpc */ - list6.flag = transflag; /* setup rpc */ - ASSERT(len <= INT32_MAX); - list6.len = (int)len; /* total fba length */ - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - krdc->rpc_version, xdr_rread6, (char *)&list6, xdr_int, - (char *)&ret, &t); - } - - if (err) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: setup err %d", err); -#endif - if (err == RPC_INTR) - ret = EINTR; - else - ret = ENOLINK; - - goto remote_rerror; - } - - if (ret == 0) { /* No valid index from r_net_read */ -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_read: no valid index from r_net_read"); -#endif - return (ENOBUFS); - } - transflag = rpc_flag | RDC_RREAD_DATA; - if (krdc->rpc_version <= RDC_VERSION5) { - list.idx = ret; /* save idx to return to server */ - list.flag = transflag; - /* move onto to data xfer rpcs */ - } else { - list6.idx = ret; /* save idx to return to server */ - list6.flag = transflag; - } - - /* find starting position in handle */ - - vec = handle->sb_vec; - - fba_pos -= handle->sb_pos; - - for (; fba_pos >= FBA_NUM(vec->sv_len); vec++) - fba_pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(fba_pos); /* data in vector */ - sv_len = vec->sv_len - FBA_SIZE(fba_pos); /* bytes in vector */ - - while (len) { - nsc_size_t translen; - if (len > maxfbas) { - translen = maxfbas; - } else { - translen = len; - } - - if (FBA_SIZE(translen) > sv_len) { - translen = FBA_NUM(sv_len); - } - - len -= translen; - if (len == 0) { - /* last data xfer rpc - tell server to cleanup */ - transflag |= RDC_RREAD_END; - } - - if (!rr.rr_data || (nsc_size_t)rr.rr_bufsize != - FBA_SIZE(translen)) { - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - ASSERT(FBA_SIZE(translen) <= INT32_MAX); - rr.rr_bufsize = FBA_SIZE(translen); - rr.rr_data = kmem_alloc(rr.rr_bufsize, KM_NOSLEEP); - } - - if (!rr.rr_data) { - /* error */ -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: kmem_alloc failed"); -#endif - return (ENOMEM); - } - - /* get data from remote end */ - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_read: null intf for index %d", - local_index); -#endif - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /*CONSTCOND*/ - ASSERT(RDC_MAXDATA <= INT32_MAX); - ASSERT(translen <= RDC_MAXDATA); - if (krdc->rpc_version <= RDC_VERSION5) { - list.len = (int)translen; - list.flag = transflag; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - krdc->rpc_version, xdr_rread, (char *)&list, - xdr_rdresult, (char *)&rr, &t); - } else { - list6.len = (int)translen; - list6.flag = transflag; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - krdc->rpc_version, xdr_rread6, (char *)&list6, - xdr_rdresult, (char *)&rr, &t); - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - if (err) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_read: rpc err %d", err); -#endif - if (err == RPC_INTR) { - ret = EINTR; - } else { - ret = ENOLINK; - } - - goto remote_rerror; - } - - if (rr.rr_status != RDC_OK) { - ret = rdc_rrstatus_decode(rr.rr_status); - if (!ret) - ret = EIO; - - goto remote_rerror; - } - - /* copy into handle */ - - bcopy(rr.rr_data, sv_addr, (size_t)rr.rr_bufsize); - - /* update counters */ - - sv_addr += rr.rr_bufsize; - if (krdc->rpc_version <= RDC_VERSION5) { - list.pos += translen; - } else { - list6.pos += translen; - } - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->reads++; - KSTAT_IO_PTR(krdc->io_kstats)->nread += rr.rr_bufsize; - } - ASSERT(sv_len <= INT32_MAX); - ASSERT(sv_len >= (nsc_size_t)rr.rr_bufsize); - sv_len -= rr.rr_bufsize; - - if (sv_len == 0) { - /* goto next vector */ - vec++; - sv_addr = vec->sv_addr; - sv_len = vec->sv_len; - } - } - - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - return (0); - -remote_rerror: - if (rr.rr_data) - kmem_free(rr.rr_data, rr.rr_bufsize); - - return (ret ? ret : ENOLINK); -} - -/* - * rdc_net_write - * Main remote write client side - * Handles protocol selection as well as requests for remote allocation - * and data transfer - * Does local IO for FCAL - * caller must clear bitmap on success - */ - -int -rdc_net_write(int local_index, int remote_index, nsc_buf_t *handle, - nsc_off_t fba_pos, nsc_size_t fba_len, uint_t aseq, int qpos, - netwriteres *netres) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct timeval t; - nsc_vec_t *vec; - int sv_len; - nsc_off_t fpos; - int err; - struct netwriteres netret; - struct netwriteres *netresptr; - struct net_data5 dlist5; - struct net_data6 dlist6; - int ret; - nsc_size_t maxfbas; - int transflag; - int translen; - int transendoblk; - char *transptr; - int vflags; - - if (handle == NULL) - return (EINVAL); - - /* if not a diskq buffer */ - if ((qpos == -1) && (!RDC_HANDLE_LIMITS(handle, fba_pos, fba_len))) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_net_write: handle bounds"); -#endif - return (EINVAL); - } - - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - krdc = &rdc_k_info[local_index]; - urdc = &rdc_u_info[local_index]; - - maxfbas = MAX_RDC_FBAS; - - /* FCAL IO */ - if (krdc->remote_fd && !(rdc_get_vflags(urdc) & RDC_FCAL_FAILED)) { - nsc_buf_t *remote_h = NULL; - int reserved = 0; - - ret = nsc_reserve(krdc->remote_fd, NSC_MULTI); - if (RDC_SUCCESS(ret)) { - reserved = 1; - ret = nsc_alloc_buf(krdc->remote_fd, fba_pos, fba_len, - NSC_WRBUF, &remote_h); - } - if (RDC_SUCCESS(ret)) { - ret = nsc_copy(handle, remote_h, fba_pos, fba_pos, - fba_len); - if (RDC_SUCCESS(ret)) - ret = nsc_write(remote_h, fba_pos, fba_len, 0); - if (RDC_SUCCESS(ret)) { - (void) nsc_free_buf(remote_h); - nsc_release(krdc->remote_fd); - return (0); - } - } - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_FCAL_FAILED); - rdc_group_exit(krdc); - if (remote_h) - (void) nsc_free_buf(remote_h); - if (reserved) - nsc_release(krdc->remote_fd); - } - - /* - * At this point we must decide which protocol we are using and - * do the right thing - */ - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - if (netres) { - netresptr = netres; - } else { - netresptr = &netret; - } - - vflags = rdc_get_vflags(urdc); - - if (vflags & (RDC_VOL_FAILED|RDC_BMP_FAILED)) - transflag = RDC_RWRITE_FAIL; - else - transflag = 0; - - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, "!rdc_net_write: null intf for index %d", - local_index); -#endif - - vec = handle->sb_vec; - - /* - * find starting position in vector - */ - if ((qpos == -1) || (handle->sb_user == RDC_NULLBUFREAD)) - fpos = fba_pos - handle->sb_pos; - else - fpos = (qpos + 1) - handle->sb_pos; - - for (; fpos >= FBA_NUM(vec->sv_len); vec++) - fpos -= FBA_NUM(vec->sv_len); - sv_len = vec->sv_len - FBA_SIZE(fpos); /* bytes in vector */ - transptr = (char *)vec->sv_addr + FBA_SIZE(fpos); - - if (krdc->rpc_version <= RDC_VERSION5) { - dlist5.local_cd = local_index; - dlist5.cd = remote_index; - ASSERT(fba_len <= INT32_MAX); - ASSERT(fba_pos <= INT32_MAX); - dlist5.len = (int)fba_len; - dlist5.pos = (int)fba_pos; - dlist5.idx = -1; /* Starting index */ - dlist5.flag = transflag; - dlist5.seq = aseq; /* sequence number */ - dlist5.sfba = (int)fba_pos; /* starting fba for this xfer */ - } else { - dlist6.local_cd = local_index; - dlist6.cd = remote_index; - ASSERT(fba_len <= INT32_MAX); - dlist6.len = (int)fba_len; - dlist6.qpos = qpos; - dlist6.pos = fba_pos; - dlist6.idx = -1; /* Starting index */ - dlist6.flag = transflag; - dlist6.seq = aseq; /* sequence number */ - dlist6.sfba = fba_pos; /* starting fba for this xfer */ - } - - transendoblk = 0; - while (fba_len) { - if (!transptr) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_net_write: walked off end of handle!"); -#endif - ret = EINVAL; - goto remote_error; - } - - if (fba_len > maxfbas) { - ASSERT(maxfbas <= INT32_MAX); - translen = (int)maxfbas; - } else { - ASSERT(fba_len <= INT32_MAX); - translen = (int)fba_len; - } - - if (FBA_SIZE(translen) > sv_len) { - translen = FBA_NUM(sv_len); - } - - fba_len -= translen; - if (fba_len == 0) { - /* last data xfer - tell server to commit */ - transendoblk = 1; - } - - -#ifdef DEBUG - if (krdc->intf == NULL) - cmn_err(CE_WARN, - "!rdc_net_write: null intf for index %d", - local_index); -#endif - DTRACE_PROBE(rdc_netwrite_clntcall_start); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - if (krdc->rpc_version <= RDC_VERSION5) { - ret = 0; - dlist5.nfba = translen; - dlist5.endoblk = transendoblk; - dlist5.data.data_len = FBA_SIZE(translen); - dlist5.data.data_val = transptr; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE5, - krdc->rpc_version, xdr_net_data5, - (char *)&dlist5, xdr_int, - (char *)&ret, &t); - if (ret >= 0) { - netresptr->result = 0; - netresptr->index = ret; - } else { - netresptr->result = ret; - } - } else { - netresptr->result = 0; - dlist6.nfba = translen; - dlist6.endoblk = transendoblk; - dlist6.data.data_len = FBA_SIZE(translen); - dlist6.data.data_val = transptr; - err = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE6, - krdc->rpc_version, xdr_net_data6, - (char *)&dlist6, xdr_netwriteres, - (char *)netresptr, &t); - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - DTRACE_PROBE(rdc_netwrite_clntcall_end); - ret = netresptr->result; - if (err) { - if (err == RPC_INTR) - ret = EINTR; - else if (err && ret != EPROTO) - ret = ENOLINK; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write(5): cd %d err %d ret %d", - remote_index, err, ret); -#endif - goto remote_error; - } - /* Error from r_net_write5 */ - if (netresptr->result < 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write: r_net_write(5) " - "returned: %d", - -netresptr->result); -#endif - ret = -netresptr->result; - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - goto remote_error; - } else if (netresptr->index == 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc_net_write: no valid index from " - "r_net_write(5)"); -#endif - ret = ENOBUFS; - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - goto remote_error; - } - if (krdc->rpc_version <= RDC_VERSION5) { - dlist5.idx = netresptr->index; - dlist5.sfba += dlist5.nfba; - } else { - dlist6.idx = netresptr->index; - dlist6.sfba += dlist6.nfba; - } - /* update counters */ - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->writes++; - KSTAT_IO_PTR(krdc->io_kstats)->nwritten += - FBA_SIZE(translen); - } - transptr += FBA_SIZE(translen); - sv_len -= FBA_SIZE(translen); - - if (sv_len <= 0) { - /* goto next vector */ - vec++; - transptr = (char *)vec->sv_addr; - sv_len = vec->sv_len; - } - } - /* - * this can't happen..... - */ - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - - return (0); - -remote_error: - return (ret ? ret : ENOLINK); -} - -void -rdc_fixlen(rdc_aio_t *aio) -{ - nsc_vec_t *vecp = aio->qhandle->sb_vec; - nsc_size_t len = 0; - - while (vecp->sv_addr) { - len += FBA_NUM(vecp->sv_len); - vecp++; - } - aio->qhandle->sb_len = len; -} - -/* - * rdc_dump_alloc_bufs_cd - * Dump allocated buffers (rdc_net_hnd's) for the specified cd. - * this could be the flusher failing, if so, don't do the delay forever - * Returns: 0 (success), EAGAIN (caller needs to try again). - */ -int -rdc_dump_alloc_bufs_cd(int index) -{ - rdc_k_info_t *krdc; - rdc_aio_t *aio; - net_queue *q; - disk_queue *dq; - kmutex_t *qlock; - - krdc = &rdc_k_info[index]; - - - if (!krdc->c_fd) { - /* cannot do anything! */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dump_alloc_bufs_cd(%d): c_fd NULL", - index); -#endif - return (0); - } - rdc_dump_dsets(index); - - dq = &krdc->group->diskq; - - if (RDC_IS_DISKQ(krdc->group)) { - qlock = QLOCK(dq); - (void) _rdc_rsrv_diskq(krdc->group); - } else { - qlock = &krdc->group->ra_queue.net_qlock; - } - - /* - * Now dump the async queue anonymous buffers - * if we are a diskq, the we are using the diskq mutex. - * However, we are flushing from diskq to memory queue - * so we now need to grab the memory lock also - */ - - q = &krdc->group->ra_queue; - - if (RDC_IS_DISKQ(krdc->group)) { - mutex_enter(&q->net_qlock); - if (q->qfill_sleeping == RDC_QFILL_AWAKE) { - int tries = 5; -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!dumpalloccd sending diskq->memq flush to sleep"); -#endif - q->qfflags |= RDC_QFILLSLEEP; - mutex_exit(&q->net_qlock); - - while (q->qfill_sleeping == RDC_QFILL_AWAKE && tries--) - delay(5); - mutex_enter(&q->net_qlock); - } - } - - mutex_enter(qlock); - - while ((q->net_qhead != NULL)) { - rdc_k_info_t *tmpkrdc; - aio = q->net_qhead; - tmpkrdc = &rdc_k_info[aio->index]; - - if (RDC_IS_DISKQ(krdc->group)) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - - if (tmpkrdc->io_kstats && !RDC_IS_DISKQ(krdc->group)) { - mutex_enter(tmpkrdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(tmpkrdc->io_kstats)); - mutex_exit(tmpkrdc->io_kstats->ks_lock); - } - q->net_qhead = q->net_qhead->next; - q->blocks -= aio->len; - q->nitems--; - - RDC_CHECK_BIT(tmpkrdc, aio->pos, aio->len); - - kmem_free(aio, sizeof (*aio)); - } - q->net_qtail = NULL; - - if (krdc->group->asyncstall) { - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - } - if (krdc->group->sleepq) { - rdc_sleepqdiscard(krdc->group); - } - - krdc->group->seq = RDC_NEWSEQ; - krdc->group->seqack = RDC_NEWSEQ; - if (RDC_IS_DISKQ(krdc->group)) { - rdc_dump_iohdrs(dq); - SET_QNXTIO(dq, QHEAD(dq)); - SET_QCOALBOUNDS(dq, QHEAD(dq)); - } - mutex_exit(qlock); - - if (RDC_IS_DISKQ(krdc->group)) { - mutex_exit(&q->net_qlock); - _rdc_rlse_diskq(krdc->group); - } - - return (0); -} - - -/* - * rdc_dump_alloc_bufs - * We have an error on the link - * Try to dump all of the allocated bufs so we can cleanly recover - * and not hang - */ -void -rdc_dump_alloc_bufs(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - int repeat; - int index; - - for (index = 0; index < rdc_max_sets; index++) { - do { - krdc = &rdc_k_info[index]; - repeat = 0; - if (krdc->intf == ip) { - if (rdc_dump_alloc_bufs_cd(index) == EAGAIN) { - repeat = 1; - delay(2); - } - } - } while (repeat); - } -} - -/* - * returns 1 if the the throttle should throttle, 0 if not. - */ -int -_rdc_diskq_isfull(disk_queue *q, long len) -{ - /* ---T----H----N--- */ - mutex_enter(QLOCK(q)); - - if (FITSONQ(q, len + 1)) { - mutex_exit(QLOCK(q)); - return (0); - } - mutex_exit(QLOCK(q)); - return (1); -} - -void -_rdc_async_throttle(rdc_k_info_t *this, long len) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int print_msg = 1; - int tries = RDC_FUTILE_ATTEMPTS; - - /* - * Throttle entries on queue - */ - - /* Need to take the 1-many case into account, checking all sets */ - - /* ADD HANDY HUERISTIC HERE TO SLOW DOWN IO */ - for (krdc = this; /* CSTYLED */; krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - - /* - * this may be the last set standing in a one to many setup. - * we may also be stuck in unintercept, after marking - * the volume as not enabled, but have not removed it - * from the many list resulting in an endless loop if - * we just continue here. Lets jump over this stuff - * and check to see if we are the only dude here. - */ - if (!IS_ENABLED(urdc)) - goto thischeck; - - if (IS_ASYNC(urdc) && RDC_IS_MEMQ(krdc->group)) { - net_queue *q = &krdc->group->ra_queue; - while ((q->blocks + q->inflbls) > urdc->maxqfbas || - (q->nitems + q->inflitems) > urdc->maxqitems) { - - if (!IS_ENABLED(urdc)) /* disable race */ - goto thischeck; - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - delay(2); - q->throttle_delay++; - } - } - - /* do a much more aggressive delay, get disk flush going */ - if (IS_ASYNC(urdc) && RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - while ((!IS_QSTATE(q, RDC_QNOBLOCK)) && - (_rdc_diskq_isfull(q, len)) && - (!IS_STATE(urdc, RDC_DISKQ_FAILED))) { - if (print_msg) { - cmn_err(CE_WARN, "!rdc async throttle:" - " disk queue %s full", - &urdc->disk_queue[0]); - - print_msg = 0; - } - if (!IS_ENABLED(urdc)) /* disable race */ - goto thischeck; - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - delay(10); - q->throttle_delay += 10; - - if (!(tries--) && IS_STATE(urdc, RDC_QUEUING)) { - cmn_err(CE_WARN, "!SNDR: disk queue " - "%s full & not flushing. giving up", - &urdc->disk_queue[0]); - cmn_err(CE_WARN, "!SNDR: %s:%s entering" - " logging mode", - urdc->secondary.intf, - urdc->secondary.file); - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - mutex_enter(QLOCK(q)); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - } - - } - if ((IS_QSTATE(q, RDC_QNOBLOCK)) && - _rdc_diskq_isfull(q, len) && - !IS_STATE(urdc, RDC_DISKQ_FAILED)) { - if (print_msg) { - cmn_err(CE_WARN, "!disk queue %s full", - &urdc->disk_queue[0]); - print_msg = 0; - } - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - mutex_enter(QLOCK(q)); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - } - } - -thischeck: - if (krdc->many_next == this) - break; - } -} - -int rdc_coalesce = 1; -static int rdc_joins = 0; - -int -rdc_aio_coalesce(rdc_aio_t *queued, rdc_aio_t *new) -{ - nsc_buf_t *h = NULL; - int rc; - rdc_k_info_t *krdc; - uint_t bitmask; - - if (rdc_coalesce == 0) - return (0); /* don't even try */ - - if ((queued == NULL) || - (queued->handle == NULL) || - (new->handle == NULL)) { - return (0); /* existing queue is empty */ - } - if (queued->index != new->index || queued->len + new->len > - MAX_RDC_FBAS) { - return (0); /* I/O to big */ - } - if ((queued->pos + queued->len == new->pos) || - (new->pos + new->len == queued->pos)) { - rc = nsc_alloc_abuf(queued->pos, queued->len + new->len, 0, - &h); - if (!RDC_SUCCESS(rc)) { - if (h != NULL) - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - rc = nsc_copy(queued->handle, h, queued->pos, queued->pos, - queued->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - rc = nsc_copy(new->handle, h, new->pos, new->pos, - new->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(h); - return (0); /* couldn't do coalesce */ - } - - krdc = &rdc_k_info[queued->index]; - - RDC_SET_BITMASK(queued->pos, queued->len, &bitmask); - RDC_CLR_BITMAP(krdc, queued->pos, queued->len, \ - bitmask, RDC_BIT_BUMP); - - RDC_SET_BITMASK(new->pos, new->len, &bitmask); - RDC_CLR_BITMAP(krdc, new->pos, new->len, \ - bitmask, RDC_BIT_BUMP); - - (void) nsc_free_buf(queued->handle); - (void) nsc_free_buf(new->handle); - queued->handle = h; - queued->len += new->len; - bitmask = 0; - /* - * bump the ref count back up - */ - - RDC_SET_BITMAP(krdc, queued->pos, queued->len, &bitmask); - return (1); /* new I/O succeeds last I/O queued */ - } - return (0); -} - -int -rdc_memq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *aio) -{ - net_queue *q; - rdc_group_t *group; - - group = krdc->group; - q = &group->ra_queue; - - mutex_enter(&q->net_qlock); - - if (rdc_aio_coalesce(q->net_qtail, aio)) { - rdc_joins++; - q->blocks += aio->len; - kmem_free(aio, sizeof (*aio)); - goto out; - } - aio->seq = group->seq++; - if (group->seq < aio->seq) - group->seq = RDC_NEWSEQ + 1; /* skip magics */ - - if (q->net_qhead == NULL) { - /* adding to empty q */ - q->net_qhead = q->net_qtail = aio; - -#ifdef DEBUG - if (q->blocks != 0 || q->nitems != 0) { - cmn_err(CE_PANIC, - "rdc enqueue: q %p, qhead 0, q blocks %" NSC_SZFMT - ", nitems %" NSC_SZFMT, - (void *) q, q->blocks, q->nitems); - } -#endif - - } else { - /* discontiguous, add aio to q tail */ - q->net_qtail->next = aio; - q->net_qtail = aio; - } - - q->blocks += aio->len; - q->nitems++; - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } -out: -#ifdef DEBUG - /* sum the q and check for sanity */ - { - nsc_size_t qblocks = 0; - uint64_t nitems = 0; - rdc_aio_t *a; - - for (a = q->net_qhead; a != NULL; a = a->next) { - qblocks += a->len; - nitems++; - } - - if (qblocks != q->blocks || nitems != q->nitems) { - cmn_err(CE_PANIC, - "rdc enqueue: q %p, q blocks %" NSC_SZFMT " (%" - NSC_SZFMT "), nitems %" NSC_SZFMT " (%" - NSC_SZFMT ")", (void *) q, q->blocks, qblocks, - q->nitems, nitems); - } - } -#endif - - mutex_exit(&q->net_qlock); - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - - if (q->blocks > q->blocks_hwm) { - q->blocks_hwm = q->blocks; - } - - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - return (0); -} - -int -_rdc_enqueue_write(rdc_k_info_t *krdc, nsc_off_t pos, nsc_size_t len, int flag, - nsc_buf_t *h) -{ - rdc_aio_t *aio; - rdc_group_t *group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (!aio) { - return (ENOMEM); - } - - group = krdc->group; - - aio->pos = pos; - aio->qpos = -1; - aio->len = len; - aio->flag = flag; - aio->index = krdc->index; - aio->handle = h; - - if (group->flags & RDC_MEMQUE) { - return (rdc_memq_enqueue(krdc, aio)); - } else if ((group->flags & RDC_DISKQUE) && - !IS_STATE(urdc, RDC_DISKQ_FAILED)) { - rc = rdc_diskq_enqueue(krdc, aio); - kmem_free(aio, sizeof (*aio)); - return (rc); - } - return (-1); /* keep lint quiet */ -} - - - - -/* - * Async Network RDC flusher - */ - -/* - * don't allow any new writer threads to start if a member of the set - * is disable pending - */ -int -is_disable_pending(rdc_k_info_t *krdc) -{ - rdc_k_info_t *this = krdc; - int rc = 0; - - do { - if (krdc->type_flag & RDC_DISABLEPEND) { - krdc = this; - rc = 1; - break; - } - krdc = krdc->group_next; - - } while (krdc != this); - - return (rc); -} - -/* - * rdc_writer -- spawn new writer if not running already - * called after enqueing the dirty blocks - */ -int -rdc_writer(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - nsthread_t *t; - rdc_group_t *group; - kmutex_t *qlock; - int tries; - const int MAX_TRIES = 16; - - group = krdc->group; - - if (RDC_IS_DISKQ(group)) - qlock = &group->diskq.disk_qlock; - else - qlock = &group->ra_queue.net_qlock; - - mutex_enter(qlock); - -#ifdef DEBUG - if (noflush) { - mutex_exit(qlock); - return (0); - } -#endif - - if ((group->rdc_writer) || is_disable_pending(krdc)) { - mutex_exit(qlock); - return (0); - } - - if ((group->rdc_thrnum >= 1) && (group->seqack == RDC_NEWSEQ)) { - /* - * We also need to check if we are starting a new - * sequence, and if so don't create a new thread, - * as we must ensure that the start of new sequence - * requests arrives first to re-init the server. - */ - mutex_exit(qlock); - return (0); - } - /* - * For version 6, - * see if we can fit in another thread. - */ - group->rdc_thrnum++; - - if (krdc->intf && (krdc->intf->rpc_version >= RDC_VERSION6)) { - rdc_u_info_t *urdc = &rdc_u_info[index]; - if (group->rdc_thrnum >= urdc->asyncthr) - group->rdc_writer = 1; - } else { - group->rdc_writer = 1; - } - - mutex_exit(qlock); - - - /* - * If we got here, we know that we have not exceeded the allowed - * number of async threads for our group. If we run out of threads - * in _rdc_flset, we add a new thread to the set. - */ - tries = 0; - do { - /* first try to grab a thread from the free list */ - if (t = nst_create(_rdc_flset, rdc_flusher_thread, - (blind_t)(unsigned long)index, 0)) { - break; - } - - /* that failed; add a thread to the set and try again */ - if (nst_add_thread(_rdc_flset, 1) != 1) { - cmn_err(CE_WARN, "!rdc_writer index %d nst_add_thread " - "error, tries: %d", index, tries); - break; - } - } while (++tries < MAX_TRIES); - - if (tries) { - mutex_enter(&group->addthrnumlk); - group->rdc_addthrnum += tries; - mutex_exit(&group->addthrnumlk); - } - - if (t) { - return (1); - } - - cmn_err(CE_WARN, "!rdc_writer: index %d nst_create error", index); - rdc_many_enter(krdc); - mutex_enter(qlock); - group->rdc_thrnum--; - group->rdc_writer = 0; - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(qlock); - /* - * Race with remove_from_group while write thread was - * failing to be created. - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_writer: group being destroyed"); -#endif - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return (-1); - } - mutex_exit(qlock); - rdc_many_exit(krdc); - return (-1); -} - -/* - * Either we need to flush the - * kmem (net_queue) queue or the disk (disk_queue) - * determine which, and do it. - */ -void -rdc_flusher_thread(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - - if (krdc->group->flags & RDC_MEMQUE) { - rdc_flush_memq(index); - return; - } else if (krdc->group->flags & RDC_DISKQUE) { - rdc_flush_diskq(index); - return; - } else { /* uh-oh, big time */ - cmn_err(CE_PANIC, "flusher trying to flush unknown queue type"); - } - -} - -void -rdc_flush_memq(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_aio_t *aio; - net_queue *q; - int dowork; - rdc_group_t *group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no group left!"); -#endif - return; - } - - if (!krdc->c_fd) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no c_fd!"); -#endif - goto thread_death; - } - -#ifdef DEBUG_DISABLE - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - cmn_err(CE_WARN, "!rdc_flush_memq: DISABLE PENDING!"); - /* - * Need to continue as we may be trying to flush IO - * while trying to disable or suspend - */ - } -#endif - - q = &group->ra_queue; - - dowork = 1; - /* CONSTCOND */ - while (dowork) { - if (net_exit == ATM_EXIT) - break; - - group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: no group left!"); -#endif - break; - } - - mutex_enter(&q->net_qlock); - aio = q->net_qhead; - - if (aio == NULL) { -#ifdef DEBUG - if (q->nitems != 0 || - q->blocks != 0 || - q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(1): q %p, q blocks %" - NSC_SZFMT ", nitems %" NSC_SZFMT - ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) aio, (void *) q->net_qtail); - } -#endif - mutex_exit(&q->net_qlock); - break; - } - - /* aio remove from q */ - - q->net_qhead = aio->next; - aio->next = NULL; - - if (q->net_qtail == aio) - q->net_qtail = q->net_qhead; - - q->blocks -= aio->len; - q->nitems--; - - /* - * in flight numbers. - */ - q->inflbls += aio->len; - q->inflitems++; - -#ifdef DEBUG - if (q->net_qhead == NULL) { - if (q->nitems != 0 || - q->blocks != 0 || - q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(2): q %p, q blocks %" - NSC_SZFMT ", nitems %" NSC_SZFMT - ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, - (void *) q->net_qtail); - } - } - -#ifndef NSC_MULTI_TERABYTE - if (q->blocks < 0) { - cmn_err(CE_PANIC, - "rdc_flush_memq(3): q %p, q blocks %" NSC_SZFMT - ", nitems %d, qhead %p, qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, (void *) q->net_qtail); - } -#else - /* blocks and nitems are unsigned for NSC_MULTI_TERABYTE */ -#endif -#endif - - mutex_exit(&q->net_qlock); - - aio->iostatus = RDC_IO_INIT; - - _rdc_remote_flush(aio); - - mutex_enter(&q->net_qlock); - q->inflbls -= aio->len; - q->inflitems--; - if ((group->seqack == RDC_NEWSEQ) && - (group->seq != RDC_NEWSEQ + 1)) { - if ((q->net_qhead == NULL) || - (q->net_qhead->seq != RDC_NEWSEQ + 1)) { - /* - * We are an old thread, and the - * queue sequence has been reset - * during the network write above. - * As such we mustn't pull another - * job from the queue until the - * first sequence message has been ack'ed. - * Just die instead. Unless this thread - * is the first sequence that has just - * been ack'ed - */ - dowork = 0; - } - } - mutex_exit(&q->net_qlock); - - if ((aio->iostatus != RDC_IO_DONE) && (group->count)) { - rdc_k_info_t *krdctmp = &rdc_k_info[aio->index]; - if (krdctmp->type_flag & RDC_DISABLEPEND) { - kmem_free(aio, sizeof (*aio)); - goto thread_death; - } - rdc_group_enter(krdc); - ASSERT(krdc->group); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_ALLREMOTE, - "memq flush aio status not RDC_IO_DONE"); - rdc_group_exit(krdc); - rdc_dump_queue(aio->index); - } - kmem_free(aio, sizeof (*aio)); - - if (krdc->remote_index < 0 || !krdc->lsrv || !krdc->intf) - break; - } - -thread_death: - rdc_many_enter(krdc); - mutex_enter(&group->ra_queue.net_qlock); - group->rdc_thrnum--; - group->rdc_writer = 0; - /* - * all threads must be dead. - */ - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(&group->ra_queue.net_qlock); - /* - * Group now empty, so destroy - * Race with remove_from_group while write thread was running - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_memq: group being destroyed"); -#endif - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return; - } - mutex_exit(&group->ra_queue.net_qlock); - rdc_many_exit(krdc); -} - -/* - * rdc_flush_diskq - * disk queue flusher - */ -void -rdc_flush_diskq(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_aio_t *aio = NULL; - disk_queue *q; - net_queue *nq; - int dowork; - int rc; - rdc_group_t *group = krdc->group; - - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no group left!"); -#endif - return; - } - - if (!krdc->c_fd) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no c_fd!"); -#endif - return; - } - -#ifdef DEBUG_DISABLE - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - cmn_err(CE_WARN, "!rdc_flush_diskq: DISABLE PENDING!"); - /* - * Need to continue as we may be trying to flush IO - * while trying to disable or suspend - */ - } -#endif - q = &group->diskq; - nq = &group->ra_queue; - - if (IS_QSTATE(q, RDC_QDISABLEPEND) || IS_STATE(urdc, RDC_LOGGING)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!flusher thread death 1 %x", QSTATE(q)); -#endif - goto thread_death; - } - - dowork = 1; - /* CONSTCOND */ - while (dowork) { - if (net_exit == ATM_EXIT) - break; - - group = krdc->group; - if (!group || group->count == 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: no group left!"); -#endif - break; - } - - do { - rc = 0; - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_SYNCING)) || - (nq->qfflags & RDC_QFILLSLEEP)) - goto thread_death; - - aio = rdc_dequeue(krdc, &rc); - - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_SYNCING)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - goto thread_death; - } - if (rc == EAGAIN) { - delay(40); - } - - } while (rc == EAGAIN); - - if (aio == NULL) { - break; - } - - aio->iostatus = RDC_IO_INIT; - - mutex_enter(QLOCK(q)); - q->inflbls += aio->len; - q->inflitems++; - mutex_exit(QLOCK(q)); - - _rdc_remote_flush(aio); - - mutex_enter(QLOCK(q)); - q->inflbls -= aio->len; - q->inflitems--; - - if ((group->seqack == RDC_NEWSEQ) && - (group->seq != RDC_NEWSEQ + 1)) { - if ((nq->net_qhead == NULL) || - (nq->net_qhead->seq != RDC_NEWSEQ + 1)) { - /* - * We are an old thread, and the - * queue sequence has been reset - * during the network write above. - * As such we mustn't pull another - * job from the queue until the - * first sequence message has been ack'ed. - * Just die instead. Unless of course, - * this thread is the first sequence that - * has just been ack'ed. - */ - dowork = 0; - } - } - mutex_exit(QLOCK(q)); - - if (aio->iostatus == RDC_IO_CANCELLED) { - rdc_dump_queue(aio->index); - kmem_free(aio, sizeof (*aio)); - aio = NULL; - if (group) { /* seq gets bumped on dequeue */ - mutex_enter(QLOCK(q)); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_exit(QLOCK(q)); - } - break; - } - - if ((aio->iostatus != RDC_IO_DONE) && (group->count)) { - rdc_k_info_t *krdctmp = &rdc_k_info[aio->index]; - if (krdctmp->type_flag & RDC_DISABLEPEND) { - kmem_free(aio, sizeof (*aio)); - aio = NULL; - goto thread_death; - } - rdc_group_enter(krdc); - rdc_group_log(krdc, - RDC_NOFLUSH | RDC_ALLREMOTE | RDC_QUEUING, - "diskq flush aio status not RDC_IO_DONE"); - rdc_group_exit(krdc); - rdc_dump_queue(aio->index); - } - - kmem_free(aio, sizeof (*aio)); - aio = NULL; - -#ifdef DEBUG_DISABLE - if (krdc->type_flag & RDC_DISABLEPEND) { - cmn_err(CE_WARN, - "!rdc_flush_diskq: DISABLE PENDING after IO!"); - } -#endif - if (krdc->remote_index < 0 || !krdc->lsrv || !krdc->intf) - break; - - if (IS_QSTATE(q, RDC_QDISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!flusher thread death 2"); -#endif - break; - } - } -thread_death: - rdc_many_enter(krdc); - mutex_enter(QLOCK(q)); - group->rdc_thrnum--; - group->rdc_writer = 0; - - if (aio && aio->qhandle) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(krdc->group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(krdc->group); - } - } - if ((group->count == 0) && (group->rdc_thrnum == 0)) { - mutex_exit(QLOCK(q)); - /* - * Group now empty, so destroy - * Race with remove_from_group while write thread was running - */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_flush_diskq: group being destroyed"); -#endif - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - rdc_delgroup(group); - krdc->group = NULL; - rdc_many_exit(krdc); - return; - } - mutex_exit(QLOCK(q)); - rdc_many_exit(krdc); -} - -/* - * _rdc_remote_flush - * Flush a single block ANON block - * this function will flush from either the disk queue - * or the memory queue. The appropriate locks must be - * taken out etc, etc ... - */ -static void -_rdc_remote_flush(rdc_aio_t *aio) -{ - rdc_k_info_t *krdc = &rdc_k_info[aio->index]; - rdc_u_info_t *urdc = &rdc_u_info[aio->index]; - disk_queue *q = &krdc->group->diskq; - kmutex_t *qlock; - rdc_group_t *group; - nsc_buf_t *h = NULL; - int reserved = 0; - int rtype = RDC_RAW; - int rc; - uint_t maxseq; - struct netwriteres netret; - int waitq = 1; - int vflags; - - group = krdc->group; - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - - /* Where did we get this aio from anyway? */ - if (RDC_IS_DISKQ(group)) { - qlock = &group->diskq.disk_qlock; - } else { - qlock = &group->ra_queue.net_qlock; - } - - /* - * quench transmission if we are too far ahead of the - * server Q, or it will overflow. - * Must fail all requests while asyncdis is set. - * It will be cleared when the last thread to be discarded - * sets the asyncstall counter to zero. - * Note the thread within rdc_net_write - * also bumps the asyncstall counter. - */ - - mutex_enter(qlock); - if (group->asyncdis) { - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - /* don't go to sleep if we have gone logging! */ - vflags = rdc_get_vflags(urdc); - if ((vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - if ((vflags & RDC_LOGGING) && RDC_IS_DISKQ(group)) - aio->iostatus = RDC_IO_CANCELLED; - - mutex_exit(qlock); - goto failed; - } - - while (maxseq = group->seqack + RDC_MAXPENDQ + 1, - maxseq = (maxseq < group->seqack) ? maxseq + RDC_NEWSEQ + 1 - : maxseq, !RDC_INFRONT(aio->seq, maxseq)) { - group->asyncstall++; - ASSERT(!IS_STATE(urdc, RDC_LOGGING)); - cv_wait(&group->asyncqcv, qlock); - group->asyncstall--; - ASSERT(group->asyncstall >= 0); - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - /* - * See if we have gone into logging mode - * since sleeping. - */ - vflags = rdc_get_vflags(urdc); - if (vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING)) { - if ((vflags & RDC_LOGGING) && RDC_IS_DISKQ(group)) - aio->iostatus = RDC_IO_CANCELLED; - - mutex_exit(qlock); - goto failed; - } - } - mutex_exit(qlock); - - if ((krdc->io_kstats) && (!RDC_IS_DISKQ(krdc->group))) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - waitq = 0; - } - - - rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_remote_flush: reserve, index %d, rc %d", - aio->index, rc); -#endif - goto failed; - } - - reserved = 1; - /* - * Case where we are multihop and calling with no ANON bufs - * Need to do the read to fill the buf. - */ - if (!aio->handle) { - rc = nsc_alloc_buf(RDC_U_FD(krdc), aio->pos, aio->len, - (aio->flag & ~NSC_WRITE) | NSC_READ, &h); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_remote_flush: alloc_buf, index %d, pos %" - NSC_SZFMT ", len %" NSC_SZFMT ", rc %d", - aio->index, aio->pos, aio->len, rc); -#endif - - goto failed; - } - aio->handle = h; - aio->handle->sb_user = RDC_NULLBUFREAD; - } - - mutex_enter(qlock); - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - group->asyncstall++; - mutex_exit(qlock); - - - if (krdc->remote_index < 0) { - /* - * this should be ok, we are flushing, not rev syncing. - * remote_index could be -1 if we lost a race with - * resume and the flusher trys to flush an io from - * another set that has not resumed - */ - krdc->remote_index = rdc_net_state(krdc->index, CCIO_SLAVE); - DTRACE_PROBE1(remote_index_negative, int, krdc->remote_index); - - } - - /* - * double check for logging, no check in net_write() - * skip the write if you can, otherwise, if logging - * avoid clearing the bit .. you don't know whose bit it may - * also be. - */ - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { - aio->iostatus = RDC_IO_CANCELLED; - mutex_enter(qlock); - group->asyncstall--; - mutex_exit(qlock); - goto failed; - } - - rc = rdc_net_write(krdc->index, krdc->remote_index, - aio->handle, aio->pos, aio->len, aio->seq, aio->qpos, &netret); - - mutex_enter(qlock); - group->asyncstall--; - if (group->asyncdis) { - if (group->asyncstall == 0) { - group->asyncdis = 0; - } - aio->iostatus = RDC_IO_CANCELLED; - mutex_exit(qlock); - goto failed; - } - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { - mutex_exit(qlock); - aio->iostatus = RDC_IO_CANCELLED; - goto failed; - } - - ASSERT(aio->handle); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_remote_flush: write, index %d, pos %" NSC_SZFMT - ", len %" NSC_SZFMT ", " - "rc %d seq %u group seq %u seqack %u qpos %" NSC_SZFMT, - aio->index, aio->pos, aio->len, rc, aio->seq, - group->seq, group->seqack, aio->qpos); -#endif - if (rc == ENOLINK) { - cmn_err(CE_WARN, - "!Hard timeout detected (%d sec) " - "on SNDR set %s:%s", - rdc_rpc_tmout, urdc->secondary.intf, - urdc->secondary.file); - } - mutex_exit(qlock); - goto failed; - } else { - aio->iostatus = RDC_IO_DONE; - } - - if (RDC_IS_DISKQ(group)) { - /* free locally alloc'd handle */ - if (aio->handle->sb_user == RDC_NULLBUFREAD) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(group); - } - - } else { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - - mutex_exit(qlock); - - _rdc_rlse_devs(krdc, rtype); - - if (netret.result == 0) { - vflags = rdc_get_vflags(urdc); - - if (!(vflags & (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - RDC_CLR_BITMAP(krdc, aio->pos, aio->len, \ - 0xffffffff, RDC_BIT_BUMP); - - if (RDC_IS_DISKQ(krdc->group)) { - if (!IS_STATE(urdc, RDC_LOGGING)) { - /* tell queue data has been flushed */ - rdc_clr_iohdr(krdc, aio->qpos); - } else { /* throw away queue, logging */ - mutex_enter(qlock); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - mutex_exit(qlock); - } - } - } - - mutex_enter(qlock); - /* - * Check to see if the reply has arrived out of - * order, if so don't update seqack. - */ - if (!RDC_INFRONT(aio->seq, group->seqack)) { - group->seqack = aio->seq; - } -#ifdef DEBUG - else { - rdc_ooreply++; - } -#endif - if (group->asyncstall) { - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - } else if (netret.result < 0) { - aio->iostatus = RDC_IO_FAILED; - } - - /* - * see if we have any pending async requests we can mark - * as done. - */ - - if (netret.vecdata.vecdata_len) { - net_pendvec_t *vecp; - net_pendvec_t *vecpe; - vecp = netret.vecdata.vecdata_val; - vecpe = netret.vecdata.vecdata_val + netret.vecdata.vecdata_len; - while (vecp < vecpe) { - rdc_k_info_t *krdcp = &rdc_k_info[vecp->pindex]; - rdc_u_info_t *urdcp = &rdc_u_info[vecp->pindex]; - /* - * we must always still be in the same group. - */ - ASSERT(krdcp->group == group); - vflags = rdc_get_vflags(urdcp); - - if (!(vflags & - (RDC_BMP_FAILED|RDC_VOL_FAILED|RDC_LOGGING))) { - RDC_CLR_BITMAP(krdcp, vecp->apos, vecp->alen, \ - 0xffffffff, RDC_BIT_BUMP); - if (RDC_IS_DISKQ(krdcp->group)) { - if (!IS_STATE(urdc, RDC_LOGGING)) { - /* update queue info */ - rdc_clr_iohdr(krdc, vecp->qpos); - } else { /* we've gone logging */ - mutex_enter(qlock); - rdc_dump_iohdrs(q); - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, QHEAD(q)); - mutex_exit(qlock); - } - } - } - - /* - * see if we can re-start transmission - */ - mutex_enter(qlock); - if (!RDC_INFRONT(vecp->seq, group->seqack)) { - group->seqack = vecp->seq; - } -#ifdef DEBUG - else { - rdc_ooreply++; - } -#endif - DTRACE_PROBE1(pendvec_return, int, vecp->seq); - - if (group->asyncstall) { - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - vecp++; - } - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - return; -failed: - - /* perhaps we have a few threads stuck .. */ - if (group->asyncstall) { - group->asyncdis = 1; - cv_broadcast(&group->asyncqcv); - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - - mutex_enter(qlock); - if (RDC_IS_DISKQ(group)) { - /* free locally alloc'd hanlde */ - if ((aio->handle) && - (aio->handle->sb_user == RDC_NULLBUFREAD)) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - (void) _rdc_rsrv_diskq(group); - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - _rdc_rlse_diskq(group); - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - mutex_exit(qlock); - - if (reserved) { - _rdc_rlse_devs(krdc, rtype); - } - - if ((waitq && krdc->io_kstats) && (!RDC_IS_DISKQ(krdc->group))) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* make sure that the bit is still set */ - RDC_CHECK_BIT(krdc, aio->pos, aio->len); - - if (aio->iostatus != RDC_IO_CANCELLED) - aio->iostatus = RDC_IO_FAILED; -} - - -/* - * rdc_drain_disk_queue - * drain the async network queue for the whole group. Bail out if nothing - * happens in 20 sec - * returns -1 if it bails before the queues are drained. - */ -#define NUM_RETRIES 15 /* Number of retries to wait if no progress */ -int -rdc_drain_disk_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - volatile rdc_group_t *group; - volatile disk_queue *diskq; - int threads, counter; - long blocks; - - /* Sanity checking */ - if (index > rdc_max_sets) - return (0); - - /* - * If there is no group or diskq configured, we can leave now - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * No need to wait if EMPTY and threads are gone - */ - counter = 0; - while (!QEMPTY(diskq) || group->rdc_thrnum) { - - /* - * Capture counters to determine if progress is being made - */ - blocks = QBLOCKS(diskq); - threads = group->rdc_thrnum; - - /* - * Wait - */ - delay(HZ); - - /* - * Has the group or disk queue gone away while delayed? - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * Are we still seeing progress? - */ - if (blocks == QBLOCKS(diskq) && threads == group->rdc_thrnum) { - /* - * No progress seen, increment retry counter - */ - if (counter++ > NUM_RETRIES) { - return (-1); - } - } else { - /* - * Reset counter, as we've made progress - */ - counter = 0; - } - } - - return (0); -} - -/* - * decide what needs to be drained, disk or core - * and drain it - */ -int -rdc_drain_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_group_t *group = krdc->group; - - if (!group) - return (0); - - if (RDC_IS_DISKQ(group)) - return (rdc_drain_disk_queue(index)); - if (RDC_IS_MEMQ(group)) - return (rdc_drain_net_queue(index)); - /* oops.. */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_drain_queue: " - "attempting drain of unknown Q type"); -#endif - return (0); -} - -/* - * rdc_drain_net_queue - * drain the async network queue for the whole group. Bail out if nothing - * happens in 20 sec - * returns -1 if it bails before the queues are drained. - */ -int -rdc_drain_net_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - volatile net_queue *q; - int bail = 20; /* bail out in about 20 secs */ - nsc_size_t blocks; - - /* Sanity checking */ - if (index > rdc_max_sets) - return (0); - if (!krdc->group) - return (0); - /* LINTED */ - if (!(q = &krdc->group->ra_queue)) - return (0); - - /* CONSTCOND */ - while (1) { - - if (((volatile rdc_aio_t *)q->net_qhead == NULL) && - (krdc->group->rdc_thrnum == 0)) { - break; - } - - blocks = q->blocks; - - q = (volatile net_queue *)&krdc->group->ra_queue; - - if ((blocks == q->blocks) && - (--bail <= 0)) { - break; - } - - delay(HZ); - } - - if (bail <= 0) - return (-1); - - return (0); -} - -/* - * rdc_dump_queue - * We want to release all the blocks currently on the network flushing queue - * We already have them logged in the bitmap. - */ -void -rdc_dump_queue(int index) -{ - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_aio_t *aio; - net_queue *q; - rdc_group_t *group; - disk_queue *dq; - kmutex_t *qlock; - - group = krdc->group; - - q = &group->ra_queue; - dq = &group->diskq; - - /* - * gotta have both locks here for diskq - */ - - if (RDC_IS_DISKQ(group)) { - mutex_enter(&q->net_qlock); - if (q->qfill_sleeping == RDC_QFILL_AWAKE) { - int tries = 3; -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!dumpq sending diskq->memq flusher to sleep"); -#endif - q->qfflags |= RDC_QFILLSLEEP; - mutex_exit(&q->net_qlock); - while (q->qfill_sleeping == RDC_QFILL_AWAKE && tries--) - delay(5); - mutex_enter(&q->net_qlock); - } - } - - if (RDC_IS_DISKQ(group)) { - qlock = &dq->disk_qlock; - (void) _rdc_rsrv_diskq(group); - } else { - qlock = &q->net_qlock; - } - - mutex_enter(qlock); - - group->seq = RDC_NEWSEQ; /* reset the sequence number */ - group->seqack = RDC_NEWSEQ; - - /* if the q is on disk, dump the q->iohdr chain */ - if (RDC_IS_DISKQ(group)) { - rdc_dump_iohdrs(dq); - - /* back up the nxtio pointer */ - SET_QNXTIO(dq, QHEAD(dq)); - SET_QCOALBOUNDS(dq, QHEAD(dq)); - } - - while (q->net_qhead) { - rdc_k_info_t *tmpkrdc; - aio = q->net_qhead; - tmpkrdc = &rdc_k_info[aio->index]; - - if (RDC_IS_DISKQ(group)) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - aio->qhandle = NULL; - aio->handle = NULL; - } - } else { - if (aio->handle) { - (void) nsc_free_buf(aio->handle); - aio->handle = NULL; - } - } - - q->net_qhead = aio->next; - RDC_CHECK_BIT(tmpkrdc, aio->pos, aio->len); - - kmem_free(aio, sizeof (*aio)); - if (tmpkrdc->io_kstats && !RDC_IS_DISKQ(group)) { - mutex_enter(tmpkrdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(tmpkrdc->io_kstats)); - mutex_exit(tmpkrdc->io_kstats->ks_lock); - } - - } - - q->net_qtail = NULL; - q->blocks = 0; - q->nitems = 0; - - /* - * See if we have stalled threads. - */ -done: - if (group->asyncstall) { - group->asyncdis = 1; - cv_broadcast(&group->asyncqcv); - } - mutex_exit(qlock); - if (RDC_IS_DISKQ(group)) { - mutex_exit(&q->net_qlock); - _rdc_rlse_diskq(group); - } - -} - - -/* - * rdc_clnt_get - * Get a CLIENT handle and cache it - */ - -static int -rdc_clnt_get(rdc_srv_t *svp, rpcvers_t vers, struct chtab **rch, CLIENT **clp) -{ - uint_t max_msgsize; - int retries; - int ret; - struct cred *cred; - int num_clnts = 0; - register struct chtab *ch; - struct chtab **plistp; - CLIENT *client = 0; - - if (rch) { - *rch = 0; - } - - if (clp) { - *clp = 0; - } - - retries = 6; /* Never used for COTS in Solaris */ - cred = ddi_get_cred(); - max_msgsize = RDC_RPC_MAX; - - mutex_enter(&rdc_clnt_lock); - - ch = rdc_chtable; - plistp = &rdc_chtable; - - /* find the right ch_list chain */ - - for (ch = rdc_chtable; ch != NULL; ch = ch->ch_next) { - if (ch->ch_prog == RDC_PROGRAM && - ch->ch_vers == vers && - ch->ch_dev == svp->ri_knconf->knc_rdev && - ch->ch_protofmly != NULL && - strcmp(ch->ch_protofmly, - svp->ri_knconf->knc_protofmly) == 0) { - /* found the correct chain to walk */ - break; - } - plistp = &ch->ch_next; - } - - if (ch != NULL) { - /* walk the ch_list and try and find a free client */ - - for (num_clnts = 0; ch != NULL; ch = ch->ch_list, num_clnts++) { - if (ch->ch_inuse == FALSE) { - /* suitable handle to reuse */ - break; - } - plistp = &ch->ch_list; - } - } - - if (ch == NULL && num_clnts >= MAXCLIENTS) { - /* alloc a temporary handle and return */ - - rdc_clnt_toomany++; - mutex_exit(&rdc_clnt_lock); - - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, cred, &client); - - if (ret != 0) { - cmn_err(CE_NOTE, - "!rdc_call: tli_kcreate failed %d", ret); - return (ret); - } - - *rch = 0; - *clp = client; - (void) CLNT_CONTROL(client, CLSET_PROGRESS, NULL); - return (ret); - } - - if (ch != NULL) { - /* reuse a cached handle */ - - ch->ch_inuse = TRUE; - ch->ch_timesused++; - mutex_exit(&rdc_clnt_lock); - - *rch = ch; - - if (ch->ch_client == NULL) { - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, - cred, &ch->ch_client); - if (ret != 0) { - ch->ch_inuse = FALSE; - return (ret); - } - - (void) CLNT_CONTROL(ch->ch_client, CLSET_PROGRESS, - NULL); - *clp = ch->ch_client; - - return (0); - } else { - /* - * Consecutive calls to CLNT_CALL() on the same client handle - * get the same transaction ID. We want a new xid per call, - * so we first reinitialise the handle. - */ - (void) clnt_tli_kinit(ch->ch_client, svp->ri_knconf, - &(svp->ri_addr), max_msgsize, retries, cred); - - *clp = ch->ch_client; - return (0); - } - } - - /* create new handle and cache it */ - ch = (struct chtab *)kmem_zalloc(sizeof (*ch), KM_SLEEP); - - if (ch) { - ch->ch_inuse = TRUE; - ch->ch_prog = RDC_PROGRAM; - ch->ch_vers = vers; - ch->ch_dev = svp->ri_knconf->knc_rdev; - ch->ch_protofmly = (char *)kmem_zalloc( - strlen(svp->ri_knconf->knc_protofmly)+1, KM_SLEEP); - if (ch->ch_protofmly) - (void) strcpy(ch->ch_protofmly, - svp->ri_knconf->knc_protofmly); - *plistp = ch; - } - - mutex_exit(&rdc_clnt_lock); - - ret = clnt_tli_kcreate(svp->ri_knconf, &(svp->ri_addr), - RDC_PROGRAM, vers, max_msgsize, retries, cred, clp); - - if (ret != 0) { - if (ch) - ch->ch_inuse = FALSE; - cmn_err(CE_NOTE, "!rdc_call: tli_kcreate failed %d", ret); - return (ret); - } - - *rch = ch; - if (ch) - ch->ch_client = *clp; - - (void) CLNT_CONTROL(*clp, CLSET_PROGRESS, NULL); - - return (ret); -} - - -long rdc_clnt_count = 0; - -/* - * rdc_clnt_call - * Arguments: - * rdc_srv_t *svp - rdc servinfo - * rpcproc_t proc; - rpcid - * rpcvers_t vers; - protocol version - * xdrproc_t xargs;- xdr function - * caddr_t argsp;- args to xdr function - * xdrproc_t xres;- xdr function - * caddr_t resp;- args to xdr function - * struct timeval timeout; - * Performs RPC client call using specific protocol and version - */ - -int -rdc_clnt_call(rdc_srv_t *svp, rpcproc_t proc, rpcvers_t vers, - xdrproc_t xargs, caddr_t argsp, - xdrproc_t xres, caddr_t resp, struct timeval *timeout) -{ - CLIENT *rh = NULL; - int err; - int tries = 0; - struct chtab *ch = NULL; - - err = rdc_clnt_get(svp, vers, &ch, &rh); - if (err || !rh) - return (err); - - do { - DTRACE_PROBE3(rdc_clnt_call_1, - CLIENT *, rh, rpcproc_t, proc, xdrproc_t, xargs); - - err = cl_call_sig(rh, proc, xargs, argsp, xres, resp, *timeout); - - DTRACE_PROBE1(rdc_clnt_call_end, int, err); - - switch (err) { - case RPC_SUCCESS: /* bail now */ - goto done; - case RPC_INTR: /* No recovery from this */ - goto done; - case RPC_PROGVERSMISMATCH: - goto done; - case RPC_TLIERROR: - /* fall thru */ - case RPC_XPRTFAILED: - /* Delay here to err on side of caution */ - /* fall thru */ - case RPC_VERSMISMATCH: - - default: - if (IS_UNRECOVERABLE_RPC(err)) { - goto done; - } - tries++; - /* - * The call is in progress (over COTS) - * Try the CLNT_CALL again, but don't - * print a noisy error message - */ - if (err == RPC_INPROGRESS) - break; - cmn_err(CE_NOTE, "!SNDR client: err %d %s", - err, clnt_sperrno(err)); - } - } while (tries && (tries < 2)); -done: - ++rdc_clnt_count; - rdc_clnt_free(ch, rh); - return (err); -} - - -/* - * Call an rpc from the client side, not caring which protocol is used. - */ -int -rdc_clnt_call_any(rdc_srv_t *svp, rdc_if_t *ip, rpcproc_t proc, - xdrproc_t xargs, caddr_t argsp, - xdrproc_t xres, caddr_t resp, struct timeval *timeout) -{ - rpcvers_t vers; - int rc; - - if (ip != NULL) { - vers = ip->rpc_version; - } else { - vers = RDC_VERS_MAX; - } - - do { - rc = rdc_clnt_call(svp, proc, vers, xargs, argsp, - xres, resp, timeout); - - if (rc == RPC_PROGVERSMISMATCH) { - /* - * Downgrade and try again. - */ - vers--; - } - } while ((vers >= RDC_VERS_MIN) && (rc == RPC_PROGVERSMISMATCH)); - - if ((rc == 0) && (ip != NULL) && (vers != ip->rpc_version)) { - mutex_enter(&rdc_ping_lock); - ip->rpc_version = vers; - mutex_exit(&rdc_ping_lock); - } - - return (rc); -} - -/* - * Call an rpc from the client side, starting with protocol specified - */ -int -rdc_clnt_call_walk(rdc_k_info_t *krdc, rpcproc_t proc, xdrproc_t xargs, - caddr_t argsp, xdrproc_t xres, caddr_t resp, - struct timeval *timeout) -{ - int rc; - rpcvers_t vers; - rdc_srv_t *svp = krdc->lsrv; - rdc_if_t *ip = krdc->intf; - vers = krdc->rpc_version; - - do { - rc = rdc_clnt_call(svp, proc, vers, xargs, argsp, - xres, resp, timeout); - - if (rc == RPC_PROGVERSMISMATCH) { - /* - * Downgrade and try again. - */ - vers--; - } - } while ((vers >= RDC_VERS_MIN) && (rc == RPC_PROGVERSMISMATCH)); - - if ((rc == 0) && (ip != NULL) && (vers != ip->rpc_version)) { - mutex_enter(&rdc_ping_lock); - ip->rpc_version = vers; - mutex_exit(&rdc_ping_lock); - } - - return (rc); -} - -/* - * rdc_clnt_free - * Free a client structure into the cache, or if this was a temporary - * handle allocated above MAXCLIENTS, destroy it. - */ -static void -rdc_clnt_free(struct chtab *ch, CLIENT *clp) -{ - if (ch != NULL) { - /* cached client, just clear inuse flag and return */ - ASSERT(ch->ch_client == clp); - ch->ch_inuse = FALSE; - return; - } - - /* temporary handle allocated above MAXCLIENTS, so destroy it */ - - if (clp->cl_auth) { - AUTH_DESTROY(clp->cl_auth); - clp->cl_auth = 0; - } - - CLNT_DESTROY(clp); -} - - -/* - * _rdc_clnt_destroy - * Free a chain (ch_list or ch_next) of cached clients - */ -static int -_rdc_clnt_destroy(struct chtab **p, const int list) -{ - struct chtab *ch; - int leak = 0; - - if (!p) - return (0); - - while (*p != NULL) { - ch = *p; - - /* - * unlink from the chain - * - this leaks the client if it was inuse - */ - - *p = list ? ch->ch_list : ch->ch_next; - - if (!ch->ch_inuse) { - /* unused client - destroy it */ - - if (ch->ch_client) { - if (ch->ch_client->cl_auth) { - AUTH_DESTROY(ch->ch_client->cl_auth); - ch->ch_client->cl_auth = 0; - } - - CLNT_DESTROY(ch->ch_client); - ch->ch_client = 0; - } - - if (ch->ch_protofmly) - kmem_free(ch->ch_protofmly, - strlen(ch->ch_protofmly)+1); - - kmem_free(ch, sizeof (*ch)); - } else { - /* remember client leak */ - leak++; - } - } - - return (leak); -} - - -/* - * rdc_clnt_destroy - * Free client caching table on unconfigure - */ -void -rdc_clnt_destroy(void) -{ - struct chtab *ch; - int leak = 0; - - mutex_enter(&rdc_clnt_lock); - - /* destroy each ch_list chain */ - - for (ch = rdc_chtable; ch; ch = ch->ch_next) { - leak += _rdc_clnt_destroy(&ch->ch_list, 1); - } - - /* destroy the main ch_next chain */ - leak += _rdc_clnt_destroy(&rdc_chtable, 0); - - if (leak) { - /* we are about to leak clients */ - cmn_err(CE_WARN, - "!rdc_clnt_destroy: leaking %d inuse clients", leak); - } - - mutex_exit(&rdc_clnt_lock); -} - -#ifdef DEBUG -/* - * Function to send an asynchronous net_data6 request - * direct to a server to allow the generation of - * out of order requests for ZatoIchi tests. - */ -int -rdc_async6(void *arg, int mode, int *rvp) -{ - int index; - rdc_async6_t async6; - struct net_data6 data6; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - char *data; - int datasz; - char *datap; - int rc; - struct timeval t; - struct netwriteres netret; - int i; - - rc = 0; - *rvp = 0; - /* - * copyin the user's arguments. - */ - if (ddi_copyin(arg, &async6, sizeof (async6), mode) < 0) { - return (EFAULT); - } - - /* - * search by the secondary host and file. - */ - mutex_enter(&rdc_conf_lock); - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - continue; - if (!IS_ENABLED(urdc)) - continue; - if (!IS_ASYNC(urdc)) - continue; - if (krdc->rpc_version < RDC_VERSION6) - continue; - - if ((strncmp(urdc->secondary.intf, async6.sechost, - MAX_RDC_HOST_SIZE) == 0) && - (strncmp(urdc->secondary.file, async6.secfile, - NSC_MAXPATH) == 0)) { - break; - } - } - mutex_exit(&rdc_conf_lock); - if (index >= rdc_max_sets) { - return (ENOENT); - } - - if (async6.spos != -1) { - if ((async6.spos < async6.pos) || - ((async6.spos + async6.slen) > - (async6.pos + async6.len))) { - cmn_err(CE_WARN, "!Sub task not within range " - "start %d length %d sub start %d sub length %d", - async6.pos, async6.len, async6.spos, async6.slen); - return (EIO); - } - } - - datasz = FBA_SIZE(1); - data = kmem_alloc(datasz, KM_SLEEP); - datap = data; - while (datap < &data[datasz]) { - /* LINTED */ - *datap++ = async6.pat; - } - - /* - * Fill in the net databuffer prior to transmission. - */ - - data6.local_cd = krdc->index; - if (krdc->remote_index == -1) { - cmn_err(CE_WARN, "!Remote index not known"); - kmem_free(data, datasz); - return (EIO); - } else { - data6.cd = krdc->remote_index; - } - data6.pos = async6.pos; - data6.len = async6.len; - data6.flag = 0; - data6.idx = async6.idx; - data6.seq = async6.seq; - - if (async6.spos == -1) { - data6.sfba = async6.pos; - data6.nfba = async6.len; - data6.endoblk = 1; - - } else { - data6.sfba = async6.spos; - data6.nfba = async6.slen; - data6.endoblk = async6.endind; - } - - data6.data.data_len = datasz; - data6.data.data_val = data; - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - - - rc = rdc_clnt_call(krdc->lsrv, RDCPROC_WRITE6, krdc->rpc_version, - xdr_net_data6, (char *)&data6, xdr_netwriteres, (char *)&netret, - &t); - - kmem_free(data, datasz); - if (rc == 0) { - if (netret.result < 0) { - rc = -netret.result; - } - cmn_err(CE_NOTE, "!async6: seq %u result %d index %d " - "pendcnt %d", - netret.seq, netret.result, netret.index, - netret.vecdata.vecdata_len); - for (i = 0; i < netret.vecdata.vecdata_len; i++) { - net_pendvec_t pvec; - bcopy(netret.vecdata.vecdata_val + i, &pvec, - sizeof (net_pendvec_t)); - cmn_err(CE_NOTE, "!Seq %u pos %llu len %llu", - pvec.seq, (unsigned long long)pvec.apos, - (unsigned long long)pvec.alen); - } - if (netret.vecdata.vecdata_val) - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * - sizeof (net_pendvec_t)); - } else { - cmn_err(CE_NOTE, "!async6: rpc call failed %d", rc); - } - *rvp = netret.index; - return (rc); -} - -/* - * Function to send an net_read6 request - * direct to a server to allow the generation of - * read requests. - */ -int -rdc_readgen(void *arg, int mode, int *rvp) -{ - int index; - rdc_readgen_t readgen; - rdc_readgen32_t readgen32; - struct rread6 read6; - struct rread read5; - rdc_k_info_t *krdc; - int ret; - struct timeval t; - struct rdcrdresult rr; - int err; - - *rvp = 0; - rr.rr_bufsize = 0; /* rpc data buffer length (bytes) */ - rr.rr_data = NULL; /* rpc data buffer */ - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - if (ddi_copyin(arg, &readgen32, sizeof (readgen32), mode)) { - return (EFAULT); - } - (void) strncpy(readgen.sechost, readgen32.sechost, - MAX_RDC_HOST_SIZE); - (void) strncpy(readgen.secfile, readgen32.secfile, NSC_MAXPATH); - readgen.len = readgen32.len; - readgen.pos = readgen32.pos; - readgen.idx = readgen32.idx; - readgen.flag = readgen32.flag; - readgen.data = (void *)(unsigned long)readgen32.data; - readgen.rpcversion = readgen32.rpcversion; - } else { - if (ddi_copyin(arg, &readgen, sizeof (readgen), mode)) { - return (EFAULT); - } - } - switch (readgen.rpcversion) { - case 5: - case 6: - break; - default: - return (EINVAL); - } - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byhostdev(readgen.sechost, readgen.secfile); - if (index >= 0) { - krdc = &rdc_k_info[index]; - } - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - return (ENODEV); - } - /* - * we should really call setbusy here. - */ - mutex_exit(&rdc_conf_lock); - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - if (krdc->remote_index == -1) { - cmn_err(CE_WARN, "!Remote index not known"); - ret = EIO; - goto out; - } - if (readgen.rpcversion == 6) { - read6.cd = krdc->remote_index; - read6.len = readgen.len; - read6.pos = readgen.pos; - read6.idx = readgen.idx; - read6.flag = readgen.flag; - } else { - read5.cd = krdc->remote_index; - read5.len = readgen.len; - read5.pos = readgen.pos; - read5.idx = readgen.idx; - read5.flag = readgen.flag; - } - - if (readgen.flag & RDC_RREAD_START) { - if (readgen.rpcversion == 6) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - RDC_VERSION6, xdr_rread6, (char *)&read6, - xdr_int, (char *)&ret, &t); - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - RDC_VERSION5, xdr_rread, (char *)&read5, - xdr_int, (char *)&ret, &t); - } - if (err == 0) { - *rvp = ret; - ret = 0; - } else { - ret = EPROTO; - } - } else { - if (readgen.rpcversion == 6) { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ6, - RDC_VERSION6, xdr_rread6, (char *)&read6, - xdr_rdresult, (char *)&rr, &t); - } else { - err = rdc_clnt_call(krdc->lsrv, RDCPROC_READ5, - RDC_VERSION5, xdr_rread, (char *)&read5, - xdr_rdresult, (char *)&rr, &t); - } - if (err == 0) { - if (rr.rr_status != RDC_OK) { - ret = EIO; - goto out; - } - *rvp = rr.rr_bufsize; - if (ddi_copyout(rr.rr_data, readgen.data, - rr.rr_bufsize, mode) != 0) { - ret = EFAULT; - goto out; - } - ret = 0; - } else { - ret = EPROTO; - goto out; - } - } -out: - if (rr.rr_data) { - kmem_free(rr.rr_data, rr.rr_bufsize); - } - return (ret); -} - - -#endif diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h b/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h deleted file mode 100644 index d58a0bdc8f..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_clnt.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_CLNT_H -#define _RDC_CLNT_H - -#ifdef __cplusplus -extern "C" { -#endif - -extern kmutex_t rdc_clnt_lock; - -struct chtab { - uint_t ch_timesused; - bool_t ch_inuse; - ulong_t ch_prog; - rpcvers_t ch_vers; - dev_t ch_dev; - char *ch_protofmly; - CLIENT *ch_client; - struct chtab *ch_next; /* chain of different prog/vers/dev/proto */ - struct chtab *ch_list; /* chain of similar clients */ -}; - -#define MAXCLIENTS 64 - -extern int rdc_clnt_call(rdc_srv_t *, rpcproc_t, rpcvers_t, xdrproc_t, - caddr_t, xdrproc_t, caddr_t, struct timeval *); -extern int rdc_clnt_call_any(rdc_srv_t *, rdc_if_t *, rpcproc_t, - xdrproc_t, caddr_t, xdrproc_t, caddr_t, - struct timeval *); -extern int rdc_clnt_call_walk(rdc_k_info_t *, rpcproc_t, xdrproc_t, caddr_t, - xdrproc_t, caddr_t, struct timeval *); - -extern int rdc_rpc_tmout; - -extern int rdc_aio_coalesce(rdc_aio_t *, rdc_aio_t *); - - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_CLNT_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_dev.c b/usr/src/uts/common/avs/ns/rdc/rdc_dev.c deleted file mode 100644 index 7957999c59..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_dev.c +++ /dev/null @@ -1,3019 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/cred.h> -#include <sys/file.h> -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_errors.h> - -#include <sys/unistat/spcs_s_k.h> -#ifdef DS_DDICT -#include "../contract.h" -#endif - -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc.h" -#include "rdc_io.h" -#include "rdc_bitmap.h" - -/* - * Remote Dual Copy - * - * This file contains the nsctl io provider functionality for RDC. - * - * RDC is implemented as a simple filter module that pushes itself between - * user (SIMCKD, STE, etc.) and SDBC. - */ - - -static int _rdc_open_count; -int rdc_eio_nobmp = 0; - -nsc_io_t *_rdc_io_hc; -static nsc_io_t *_rdc_io_hr; -static nsc_def_t _rdc_fd_def[], _rdc_io_def[], _rdc_ior_def[]; - -void _rdc_deinit_dev(); -int rdc_diskq_enqueue(rdc_k_info_t *, rdc_aio_t *); -extern void rdc_unintercept_diskq(rdc_group_t *); -rdc_aio_t *rdc_aio_tbuf_get(void *, void *, int, int, int, int, int); - -static nsc_buf_t *_rdc_alloc_handle(void (*)(), void (*)(), - void (*)(), rdc_fd_t *); -static int _rdc_free_handle(rdc_buf_t *, rdc_fd_t *); - -#ifdef DEBUG -int rdc_overlap_cnt; -int rdc_overlap_hnd_cnt; -#endif - -static rdc_info_dev_t *rdc_devices; - -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -/* - * _rdc_init_dev - * Initialise the io provider. - */ - -int -_rdc_init_dev() -{ - _rdc_io_hc = nsc_register_io("rdc-high-cache", - NSC_RDCH_ID|NSC_REFCNT|NSC_FILTER, _rdc_io_def); - if (_rdc_io_hc == NULL) - cmn_err(CE_WARN, "!rdc: nsc_register_io (high, cache) failed."); - - _rdc_io_hr = nsc_register_io("rdc-high-raw", - NSC_RDCHR_ID|NSC_REFCNT|NSC_FILTER, _rdc_ior_def); - if (_rdc_io_hr == NULL) - cmn_err(CE_WARN, "!rdc: nsc_register_io (high, raw) failed."); - - if (!_rdc_io_hc || !_rdc_io_hr) { - _rdc_deinit_dev(); - return (ENOMEM); - } - - return (0); -} - - -/* - * _rdc_deinit_dev - * De-initialise the io provider. - * - */ - -void -_rdc_deinit_dev() -{ - int rc; - - if (_rdc_io_hc) { - if ((rc = nsc_unregister_io(_rdc_io_hc, 0)) != 0) - cmn_err(CE_WARN, - "!rdc: nsc_unregister_io (high, cache) failed: %d", - rc); - } - - if (_rdc_io_hr) { - if ((rc = nsc_unregister_io(_rdc_io_hr, 0)) != 0) - cmn_err(CE_WARN, - "!rdc: nsc_unregister_io (high, raw) failed: %d", - rc); - } -} - - -/* - * rdc_idev_open - * - Open the nsctl file descriptors for the data devices. - * - * Must be called with rdc_conf_lock held. - * id_sets is protected by rdc_conf_lock. - */ -static rdc_info_dev_t * -rdc_idev_open(rdc_k_info_t *krdc, char *pathname, int *rc) -{ - rdc_info_dev_t *dp; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (dp = rdc_devices; dp; dp = dp->id_next) { - if (dp->id_cache_dev.bi_fd && - strcmp(pathname, nsc_pathname(dp->id_cache_dev.bi_fd)) == 0) - break; - } - - if (!dp) { - dp = kmem_zalloc(sizeof (*dp), KM_SLEEP); - if (!dp) - return (NULL); - - dp->id_cache_dev.bi_krdc = krdc; - dp->id_cache_dev.bi_fd = nsc_open(pathname, - NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE, - _rdc_fd_def, (blind_t)&dp->id_cache_dev, rc); - if (!dp->id_cache_dev.bi_fd) { - kmem_free(dp, sizeof (*dp)); - return (NULL); - } - - dp->id_raw_dev.bi_krdc = krdc; - dp->id_raw_dev.bi_fd = nsc_open(pathname, - NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE, - _rdc_fd_def, (blind_t)&dp->id_raw_dev, rc); - if (!dp->id_raw_dev.bi_fd) { - (void) nsc_close(dp->id_cache_dev.bi_fd); - kmem_free(dp, sizeof (*dp)); - return (NULL); - } - - mutex_init(&dp->id_rlock, NULL, MUTEX_DRIVER, NULL); - cv_init(&dp->id_rcv, NULL, CV_DRIVER, NULL); - - dp->id_next = rdc_devices; - rdc_devices = dp; - } - - dp->id_sets++; - return (dp); -} - - -/* - * rdc_idev_close - * - Close the nsctl file descriptors for the data devices. - * - * Must be called with rdc_conf_lock and dp->id_rlock held. - * Will release dp->id_rlock before returning. - * - * id_sets is protected by rdc_conf_lock. - */ -static void -rdc_idev_close(rdc_k_info_t *krdc, rdc_info_dev_t *dp) -{ - rdc_info_dev_t **dpp; -#ifdef DEBUG - int count = 0; -#endif - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&dp->id_rlock)); - - dp->id_sets--; - if (dp->id_sets > 0) { - mutex_exit(&dp->id_rlock); - return; - } - - /* external references must have gone */ - ASSERT((krdc->c_ref + krdc->r_ref + krdc->b_ref) == 0); - - /* unlink from chain */ - - for (dpp = &rdc_devices; *dpp; dpp = &((*dpp)->id_next)) { - if (*dpp == dp) { - /* unlink */ - *dpp = dp->id_next; - break; - } - } - - /* - * Wait for all reserves to go away - the rpc server is - * running asynchronously with this close, and so we - * have to wait for it to spot that the krdc is !IS_ENABLED() - * and throw away the nsc_buf_t's that it has allocated - * and release the device. - */ - - while (IS_CRSRV(krdc) || IS_RRSRV(krdc)) { -#ifdef DEBUG - if (!(++count % 16)) { - cmn_err(CE_NOTE, - "!_rdc_idev_close(%s): waiting for nsc_release", - rdc_u_info[krdc->index].primary.file); - } - if (count > (16*20)) { - /* waited for 20 seconds - too long - panic */ - cmn_err(CE_PANIC, - "!_rdc_idev_close(%s, %p): lost nsc_release", - rdc_u_info[krdc->index].primary.file, (void *)krdc); - } -#endif - mutex_exit(&dp->id_rlock); - delay(HZ>>4); - mutex_enter(&dp->id_rlock); - } - - if (dp->id_cache_dev.bi_fd) { - (void) nsc_close(dp->id_cache_dev.bi_fd); - dp->id_cache_dev.bi_fd = NULL; - } - - if (dp->id_raw_dev.bi_fd) { - (void) nsc_close(dp->id_raw_dev.bi_fd); - dp->id_raw_dev.bi_fd = NULL; - } - - mutex_exit(&dp->id_rlock); - mutex_destroy(&dp->id_rlock); - cv_destroy(&dp->id_rcv); - - kmem_free(dp, sizeof (*dp)); -} - - -/* - * This function provokes an nsc_reserve() for the device which - * if successful will populate krdc->maxfbas and urdc->volume_size - * via the _rdc_attach_fd() callback. - */ -void -rdc_get_details(rdc_k_info_t *krdc) -{ - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - nsc_size_t vol_size, maxfbas; - - if (_rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL) == 0) { - /* - * if the vol is already reserved, - * volume_size won't be populated on enable because - * it is a *fake* reserve and does not make it to - * _rdc_attach_fd(). So do it here. - */ - rc = nsc_partsize(RDC_U_FD(krdc), &vol_size); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_get_details: partsize failed (%d)", rc); -#endif /* DEBUG */ - urdc->volume_size = vol_size = 0; - } - - urdc->volume_size = vol_size; - rc = nsc_maxfbas(RDC_U_FD(krdc), 0, &maxfbas); - if (rc != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_get_details: maxfbas failed (%d)", rc); -#endif /* DEBUG */ - maxfbas = 0; - } - krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas); - - _rdc_rlse_devs(krdc, RDC_RAW); - } -} - - -/* - * Should only be used by the config code. - */ - -int -rdc_dev_open(rdc_set_t *rdc_set, int options) -{ - rdc_k_info_t *krdc; - int index; - int rc; - char *pathname; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (options & RDC_OPT_PRIMARY) - pathname = rdc_set->primary.file; - else - pathname = rdc_set->secondary.file; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - break; - } - - if (index == rdc_max_sets) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dev_open: out of cd\'s"); -#endif - index = -EINVAL; - goto out; - } - - if (krdc->devices && (krdc->c_fd || krdc->r_fd)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dev_open: %s already open", pathname); -#endif - index = -EINVAL; - goto out; - } - - _rdc_open_count++; - - krdc->devices = rdc_idev_open(krdc, pathname, &rc); - if (!krdc->devices) { - index = -rc; - goto open_fail; - } - - /* - * Grab the device size and maxfbas now. - */ - - rdc_get_details(krdc); - -out: - return (index); - -open_fail: - _rdc_open_count--; - - return (index); -} - - -void -rdc_dev_close(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&rdc_conf_lock); - - if (krdc->devices) - mutex_enter(&krdc->devices->id_rlock); - -#ifdef DEBUG - if (!krdc->devices || !krdc->c_fd || !krdc->r_fd) { - cmn_err(CE_WARN, - "!rdc_dev_close(%p): c_fd %p r_fd %p", (void *)krdc, - (void *) (krdc->devices ? krdc->c_fd : 0), - (void *) (krdc->devices ? krdc->r_fd : 0)); - } -#endif - - if (krdc->devices) { - /* rdc_idev_close will release id_rlock */ - rdc_idev_close(krdc, krdc->devices); - krdc->devices = NULL; - } - - urdc->primary.file[0] = '\0'; - - if (_rdc_open_count <= 0) { - cmn_err(CE_WARN, "!rdc: _rdc_open_count corrupt: %d", - _rdc_open_count); - } - - _rdc_open_count--; - - mutex_exit(&rdc_conf_lock); -} - - -/* - * rdc_intercept - * - * Register for IO on this device with nsctl. - * - * For a 1-to-many primary we register for each krdc and let nsctl sort - * out which it wants to be using. This means that we cannot tell which - * krdc will receive the incoming io from nsctl, though we do know that - * at any one time only one krdc will be 'attached' and so get io from - * nsctl. - * - * So the krdc->many_next pointer is maintained as a circular list. The - * result of these multiple nsc_register_paths is that we will see a - * few more attach and detach io provider calls during enable/resume - * and disable/suspend of the 1-to-many whilst nsctl settles down to - * using a single krdc. - * - * The major advantage of this scheme is that nsctl sorts out all the - * rdc_fd_t's so that they can only point to krdc's that are currently - * active. - */ -int -rdc_intercept(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - char *pathname; - char *bitmap; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - pathname = urdc->primary.file; - bitmap = urdc->primary.bitmap; - } else { - pathname = urdc->secondary.file; - bitmap = urdc->secondary.bitmap; - } - - if (!krdc->b_tok) - krdc->b_tok = nsc_register_path(bitmap, NSC_CACHE | NSC_DEVICE, - _rdc_io_hc); - - if (!krdc->c_tok) - krdc->c_tok = nsc_register_path(pathname, NSC_CACHE, - _rdc_io_hc); - - if (!krdc->r_tok) - krdc->r_tok = nsc_register_path(pathname, NSC_DEVICE, - _rdc_io_hr); - - if (!krdc->c_tok || !krdc->r_tok) { - (void) rdc_unintercept(krdc); - return (ENXIO); - } - - return (0); -} - - -static void -wait_unregistering(rdc_k_info_t *krdc) -{ - while (krdc->group->unregistering > 0) - (void) cv_wait_sig(&krdc->group->unregistercv, &rdc_conf_lock); -} - -static void -set_unregistering(rdc_k_info_t *krdc) -{ - wait_unregistering(krdc); - - krdc->group->unregistering++; -} - -static void -wakeup_unregistering(rdc_k_info_t *krdc) -{ - if (krdc->group->unregistering <= 0) - return; - - krdc->group->unregistering--; - cv_broadcast(&krdc->group->unregistercv); -} - - -/* - * rdc_unintercept - * - * Unregister for IO on this device. - * - * See comments above rdc_intercept. - */ -int -rdc_unintercept(rdc_k_info_t *krdc) -{ - int err = 0; - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&rdc_conf_lock); - set_unregistering(krdc); - krdc->type_flag |= RDC_UNREGISTER; - mutex_exit(&rdc_conf_lock); - - if (krdc->r_tok) { - rc = nsc_unregister_path(krdc->r_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister rawfd %d", rc); - err = rc; - } - krdc->r_tok = NULL; - } - - if (krdc->c_tok) { - rc = nsc_unregister_path(krdc->c_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister cachefd %d", rc); - if (!err) - err = rc; - } - krdc->c_tok = NULL; - } - - if (krdc->b_tok) { - rc = nsc_unregister_path(krdc->b_tok, 0); - if (rc) { - cmn_err(CE_WARN, "!rdc: unregister bitmap %d", rc); - err = rc; - } - krdc->b_tok = NULL; - } - - rdc_group_enter(krdc); - - /* Wait for all necessary _rdc_close() calls to complete */ - while ((krdc->c_ref + krdc->r_ref + krdc->b_ref) != 0) { - krdc->closing++; - cv_wait(&krdc->closingcv, &krdc->group->lock); - krdc->closing--; - } - - rdc_clr_flags(urdc, RDC_ENABLED); - rdc_group_exit(krdc); - - - /* - * Check there are no outstanding writes in progress. - * This can happen when a set is being disabled which - * is one of the 'one_to_many' chain, that did not - * intercept the original write call. - */ - - for (;;) { - rdc_group_enter(krdc); - if (krdc->aux_state & RDC_AUXWRITE) { - rdc_group_exit(krdc); - /* - * This doesn't happen very often, - * just delay a bit and re-look. - */ - delay(50); - } else { - rdc_group_exit(krdc); - break; - } - } - - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_UNREGISTER; - wakeup_unregistering(krdc); - mutex_exit(&rdc_conf_lock); - - return (err); -} - - -/* - * _rdc_rlse_d - * Internal version of _rdc_rlse_devs(), only concerned with the - * data device, not the bitmap. - */ - -static void -_rdc_rlse_d(rdc_k_info_t *krdc, int devs) -{ - _rdc_info_dev_t *cip; - _rdc_info_dev_t *rip; - int raw = (devs & RDC_RAW); - - if (!krdc) { - cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs null krdc"); - return; - } - - ASSERT((devs & (~RDC_BMP)) != 0); - - cip = &krdc->devices->id_cache_dev; - rip = &krdc->devices->id_raw_dev; - - if (IS_RSRV(cip)) { - /* decrement count */ - - if (raw) { - if (cip->bi_ofailed > 0) { - cip->bi_ofailed--; - } else if (cip->bi_orsrv > 0) { - cip->bi_orsrv--; - } - } else { - if (cip->bi_failed > 0) { - cip->bi_failed--; - } else if (cip->bi_rsrv > 0) { - cip->bi_rsrv--; - } - } - - /* - * reset nsc_fd ownership back link, it is only set if - * we have really done an underlying reserve, not for - * failed (faked) reserves. - */ - - if (cip->bi_rsrv > 0 || cip->bi_orsrv > 0) { - nsc_set_owner(cip->bi_fd, krdc->iodev); - } else { - nsc_set_owner(cip->bi_fd, NULL); - } - - /* release nsc_fd */ - - if (!IS_RSRV(cip)) { - nsc_release(cip->bi_fd); - } - } else if (IS_RSRV(rip)) { - /* decrement count */ - - if (raw) { - if (rip->bi_failed > 0) { - rip->bi_failed--; - } else if (rip->bi_rsrv > 0) { - rip->bi_rsrv--; - } - } else { - if (rip->bi_ofailed > 0) { - rip->bi_ofailed--; - } else if (rip->bi_orsrv > 0) { - rip->bi_orsrv--; - } - } - - /* - * reset nsc_fd ownership back link, it is only set if - * we have really done an underlying reserve, not for - * failed (faked) reserves. - */ - - if (rip->bi_rsrv > 0 || rip->bi_orsrv > 0) { - nsc_set_owner(rip->bi_fd, krdc->iodev); - } else { - nsc_set_owner(rip->bi_fd, NULL); - } - - /* release nsc_fd and any waiters */ - - if (!IS_RSRV(rip)) { - rip->bi_flag = 0; - nsc_release(rip->bi_fd); - cv_broadcast(&krdc->devices->id_rcv); - } - } else { - cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs no reserve? krdc %p", - (void *) krdc); - } -} - -/* - * _rdc_rlse_devs - * Release named underlying devices and take care of setting the - * back link on the nsc_fd to the correct parent iodev. - * - * NOTE: the 'devs' argument must be the same as that passed to - * the preceding _rdc_rsrv_devs call. - */ - -void -_rdc_rlse_devs(rdc_k_info_t *krdc, int devs) -{ - - DTRACE_PROBE(_rdc_rlse_devs_start); - mutex_enter(&krdc->devices->id_rlock); - - ASSERT(!(devs & RDC_CACHE)); - - if ((devs & (~RDC_BMP)) != 0) { - _rdc_rlse_d(krdc, devs); - } - - if ((devs & RDC_BMP) != 0) { - if (krdc->bmaprsrv > 0 && --krdc->bmaprsrv == 0) { - nsc_release(krdc->bitmapfd); - } - } - - mutex_exit(&krdc->devices->id_rlock); - -} - -/* - * _rdc_rsrv_d - * Reserve device flagged, unless its companion is already reserved, - * in that case increase the reserve on the companion. Take care - * of setting the nsc_fd ownership back link to the correct parent - * iodev pointer. - */ - -static int -_rdc_rsrv_d(int raw, _rdc_info_dev_t *rid, _rdc_info_dev_t *cid, int flag, - rdc_k_info_t *krdc) -{ - _rdc_info_dev_t *p = NULL; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int other = 0; - int rc; - - -#ifdef DEBUG - if ((rid->bi_rsrv < 0) || - (cid->bi_rsrv < 0) || - (rid->bi_orsrv < 0) || - (cid->bi_orsrv < 0) || - (rid->bi_failed < 0) || - (cid->bi_failed < 0) || - (rid->bi_ofailed < 0) || - (cid->bi_ofailed < 0)) { - cmn_err(CE_WARN, - "!_rdc_rsrv_d: negative counts (rsrv %d %d orsrv %d %d)", - rid->bi_rsrv, cid->bi_rsrv, - rid->bi_orsrv, cid->bi_orsrv); - cmn_err(CE_WARN, - "!_rdc_rsrv_d: negative counts (fail %d %d ofail %d %d)", - rid->bi_failed, cid->bi_failed, - rid->bi_ofailed, cid->bi_ofailed); - cmn_err(CE_PANIC, "_rdc_rsrv_d: negative counts (krdc %p)", - (void *) krdc); - } -#endif - - /* - * If user wants to do a cache reserve and it's already - * raw reserved internally, we need to do a real nsc_reserve, so wait - * until the release has been done. - */ - if (IS_RSRV(rid) && (flag == RDC_EXTERNAL) && - (raw == 0) && (rid->bi_flag != RDC_EXTERNAL)) { - krdc->devices->id_release++; - while (IS_RSRV(rid)) - cv_wait(&krdc->devices->id_rcv, - &krdc->devices->id_rlock); - krdc->devices->id_release--; - } - - /* select underlying device to use */ - - if (IS_RSRV(rid)) { - p = rid; - if (!raw) { - other = 1; - } - } else if (IS_RSRV(cid)) { - p = cid; - if (raw) { - other = 1; - } - } - - /* just increment count and return if already reserved */ - - if (p && !RFAILED(p)) { - if (other) { - p->bi_orsrv++; - } else { - p->bi_rsrv++; - } - - /* set nsc_fd ownership back link */ - nsc_set_owner(p->bi_fd, krdc->iodev); - return (0); - } - - /* attempt reserve */ - - if (!p) { - p = raw ? rid : cid; - } - - if (!p->bi_fd) { - /* rpc server raced with rdc_dev_close() */ - return (EIO); - } - if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) { - /* - * convert failed counts into reserved counts, and add - * in this reserve. - */ - - p->bi_orsrv = p->bi_ofailed; - p->bi_rsrv = p->bi_failed; - - if (other) { - p->bi_orsrv++; - } else { - p->bi_rsrv++; - } - - p->bi_ofailed = 0; - p->bi_failed = 0; - - /* set nsc_fd ownership back link */ - - nsc_set_owner(p->bi_fd, krdc->iodev); - } else if (rc != EINTR) { - /* - * If this is the master, and the secondary is not - * failed, then just fake this external reserve so that - * we can do remote io to the secondary and continue to - * provide service to the client. - * - * Subsequent calls to _rdc_rsrv_d() will re-try the - * nsc_reserve() until it succeeds. - */ - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING) && - !((rdc_get_vflags(urdc) & RDC_SLAVE) && - (rdc_get_vflags(urdc) & RDC_SYNCING))) { - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_reserve failed"); - rdc_many_exit(krdc); - rc = -1; -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_reserve failed " - "with rc == %d\n", rc); -#endif - } else { - rc = 0; - } - - if (other) { - p->bi_ofailed++; - } else { - p->bi_failed++; - } - - if (krdc->maxfbas == 0) { - /* - * fake a maxfbas value for remote i/o, - * this will get reset when the next - * successful reserve happens as part - * of the rdc_attach_fd() callback. - */ - krdc->maxfbas = 128; - } - } - } - - if (rc == 0 && raw) { - p->bi_flag = flag; - } - - - return (rc); -} - -/* - * _rdc_rsrv_devs - * Reserve named underlying devices. - * - */ - -int -_rdc_rsrv_devs(rdc_k_info_t *krdc, int devs, int flag) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int write = 0; - int rc = 0; - int got = 0; - - if (!krdc) { - return (EINVAL); - } - - ASSERT(!(devs & RDC_CACHE)); - - mutex_enter(&krdc->devices->id_rlock); - - if ((devs & (~RDC_BMP)) != 0) { - if ((rc = _rdc_rsrv_d((devs & RDC_CACHE) == 0, - &krdc->devices->id_raw_dev, &krdc->devices->id_cache_dev, - flag, krdc)) != 0) { - if (rc == -1) { - /* - * we need to call rdc_write_state() - * after we drop the mutex - */ - write = 1; - rc = 0; - } else { - cmn_err(CE_WARN, - "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(krdc->c_fd), rc); - } - } else { - got |= (devs & (~RDC_BMP)); - } - } - - if (rc == 0 && (devs & RDC_BMP) != 0) { - if (krdc->bitmapfd == NULL) - rc = EIO; - else if ((krdc->bmaprsrv == 0) && - (rc = nsc_reserve(krdc->bitmapfd, 0)) != 0) { - cmn_err(CE_WARN, "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(krdc->bitmapfd), rc); - } else { - krdc->bmaprsrv++; - got |= RDC_BMP; - } - if (!RDC_SUCCESS(rc)) { - /* Undo any previous reserve */ - if (got != 0) - _rdc_rlse_d(krdc, got); - } - } - - mutex_exit(&krdc->devices->id_rlock); - - if (write) { - rdc_write_state(urdc); - } - - return (rc); -} - - -/* - * Read from the remote end, ensuring that if this is a many group in - * slave mode that we only remote read from the secondary with the - * valid data. - */ -int -_rdc_remote_read(rdc_k_info_t *krdc, nsc_buf_t *h, nsc_off_t pos, - nsc_size_t len, int flag) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this = krdc; /* krdc that was requested */ - int rc; - - if (flag & NSC_RDAHEAD) { - /* - * no point in doing readahead remotely, - * just say we did it ok - the client is about to - * throw this buffer away as soon as we return. - */ - return (NSC_DONE); - } - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io from the correct secondary. - */ - if ((rdc_get_mflags(urdc) & RDC_SLAVE) && - !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - -read1: - if (rdc_get_vflags(urdc) & RDC_LOGGING) { - /* cannot do remote io without the remote node! */ - rc = ENETDOWN; - goto read2; - } - - - /* wait for the remote end to have the latest data */ - - if (IS_ASYNC(urdc)) { - while (krdc->group->ra_queue.blocks != 0) { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - } - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - rc = rdc_net_read(krdc->index, krdc->remote_index, h, pos, len); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* If read error keep trying every secondary until no more */ -read2: - if (!RDC_SUCCESS(rc) && IS_MANY(krdc) && - !(rdc_get_mflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rdc_many_exit(krdc); - goto read1; - } - rdc_many_exit(krdc); - } - - return (rc); -} - - -/* - * _rdc_alloc_buf - * Allocate a buffer of data - * - * Calling/Exit State: - * Returns NSC_DONE or NSC_HIT for success, NSC_PENDING for async - * I/O, > 0 is an error code. - * - * Description: - */ -int rdcbufs = 0; - -static int -_rdc_alloc_buf(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len, int flag, - rdc_buf_t **ptr) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - nsc_vec_t *vec = NULL; - rdc_buf_t *h; - size_t size; - int ioflag; - int rc = 0; - - if (RDC_IS_BMP(rfd) || RDC_IS_QUE(rfd)) - return (EIO); - - if (len == 0) - return (EINVAL); - - if (flag & NSC_WRBUF) { - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - /* - * Forbid writes to secondary unless logging. - */ - return (EIO); - } - } - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) { - /* - * Forbid any io to secondary if it needs a sync. - */ - return (EIO); - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) && - !(rdc_get_vflags(urdc) & RDC_VOL_FAILED) && - !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - /* - * Forbid any io to primary if it needs a reverse sync - * and is not actively syncing. - */ - return (EIO); - } - - /* Bounds checking */ - ASSERT(urdc->volume_size != 0); - if (pos + len > urdc->volume_size) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: Attempt to access beyond end of rdc volume"); -#endif - return (EIO); - } - - h = *ptr; - if (h == NULL) { - /* should never happen (nsctl does this for us) */ -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_alloc_buf entered without buffer!"); -#endif - h = (rdc_buf_t *)_rdc_alloc_handle(NULL, NULL, NULL, rfd); - if (h == NULL) - return (ENOMEM); - - h->rdc_bufh.sb_flag &= ~NSC_HALLOCATED; - *ptr = h; - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_alloc_buf: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - h->rdc_bufh.sb_error = 0; - h->rdc_bufh.sb_flag |= flag; - h->rdc_bufh.sb_pos = pos; - h->rdc_bufh.sb_len = len; - ioflag = flag; - - bzero(&h->rdc_sync, sizeof (h->rdc_sync)); - mutex_init(&h->rdc_sync.lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&h->rdc_sync.cv, NULL, CV_DRIVER, NULL); - - if (flag & NSC_WRBUF) - _rdc_async_throttle(krdc, len); /* throttle incoming io */ - - /* - * Use remote io when: - * - local volume is failed - * - reserve status is failed - */ - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || IS_RFAILED(krdc)) { - rc = EIO; - } else { - rc = nsc_alloc_buf(RDC_U_FD(krdc), pos, len, - ioflag, &h->rdc_bufp); - if (!RDC_SUCCESS(rc)) { - rdc_many_enter(krdc); - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - } else { - /* Secondary, so forward sync needed */ - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - } - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_alloc_buf failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - if (RDC_SUCCESS(rc)) { - h->rdc_bufh.sb_vec = h->rdc_bufp->sb_vec; - h->rdc_flags |= RDC_ALLOC; - - /* - * If in slave and reading data, remote read on top of - * the buffer to ensure that we have the latest data. - */ - if ((flag & NSC_READ) && - (rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_mflags(urdc) & RDC_SLAVE)) { - rc = _rdc_remote_read(krdc, &h->rdc_bufh, - pos, len, flag); - /* - * Set NSC_MIXED so that the - * cache will throw away this buffer when we free - * it since we have combined data from multiple - * sources into a single buffer. - */ - h->rdc_bufp->sb_flag |= NSC_MIXED; - } - } - - /* - * If nsc_alloc_buf above fails, or local volume is failed or - * bitmap is failed or reserve, then we fill the buf from remote - */ - - if ((!RDC_SUCCESS(rc)) && (rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - if (flag & NSC_NODATA) { - ASSERT(!(flag & NSC_READ)); - h->rdc_flags |= RDC_REMOTE_BUF; - h->rdc_bufh.sb_vec = NULL; - } else { - size = sizeof (nsc_vec_t) * 2; - h->rdc_vsize = size + FBA_SIZE(len); - vec = kmem_zalloc(h->rdc_vsize, KM_SLEEP); - - if (!vec) { - rc = ENOMEM; - goto error; - } - - /* single flat buffer */ - - vec[0].sv_addr = (uchar_t *)vec + size; - vec[0].sv_len = FBA_SIZE(len); - vec[0].sv_vme = 0; - - /* null terminator */ - - vec[1].sv_addr = NULL; - vec[1].sv_len = 0; - vec[1].sv_vme = 0; - - h->rdc_bufh.sb_vec = vec; - h->rdc_flags |= RDC_REMOTE_BUF; - h->rdc_flags |= RDC_VEC_ALLOC; - } - - if (flag & NSC_READ) { - rc = _rdc_remote_read(krdc, &h->rdc_bufh, - pos, len, flag); - } else { - rc = NSC_DONE; - } - } -error: - if (!RDC_SUCCESS(rc)) { - h->rdc_bufh.sb_error = rc; - } - - return (rc); -} - - -/* - * _rdc_free_buf - */ - -static int -_rdc_free_buf(rdc_buf_t *h) -{ - int rc = 0; - - if (h->rdc_flags & RDC_ALLOC) { - if (h->rdc_bufp) { - rc = nsc_free_buf(h->rdc_bufp); - } - h->rdc_flags &= ~(RDC_ALLOC); - - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_free_buf(%p): nsc_free_buf(%p) returned %d", - (void *) h, (void *) h->rdc_bufp, rc); -#endif - return (rc); - } - } - - if (h->rdc_flags & (RDC_REMOTE_BUF|RDC_VEC_ALLOC)) { - if (h->rdc_flags & RDC_VEC_ALLOC) { - kmem_free(h->rdc_bufh.sb_vec, h->rdc_vsize); - } - h->rdc_flags &= ~(RDC_REMOTE_BUF|RDC_VEC_ALLOC); - } - - if (h->rdc_anon) { - /* anon buffers still pending */ - DTRACE_PROBE1(rdc_free_buf_err, aio_buf_t, h->rdc_anon); - } - - if ((h->rdc_bufh.sb_flag & NSC_HALLOCATED) == 0) { - rc = _rdc_free_handle(h, h->rdc_fd); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_free_buf(%p): _rdc_free_handle returned %d", - (void *) h, rc); -#endif - return (rc); - } - } else { - h->rdc_bufh.sb_flag = NSC_HALLOCATED; - h->rdc_bufh.sb_vec = NULL; - h->rdc_bufh.sb_error = 0; - h->rdc_bufh.sb_pos = 0; - h->rdc_bufh.sb_len = 0; - h->rdc_anon = NULL; - h->rdc_vsize = 0; - - cv_destroy(&h->rdc_sync.cv); - mutex_destroy(&h->rdc_sync.lock); - - } - - return (0); -} - - -/* - * _rdc_open - * Open a device - * - * Calling/Exit State: - * Returns a token to identify the device. - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the nsctl stack opening a device. - */ - -/* ARGSUSED */ - -static int -_rdc_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc; -#ifdef DEBUG - rdc_u_info_t *urdc; -#endif - rdc_fd_t *rfd; - int raw = ((flag & NSC_CACHE) == 0); - int index; - int bmp = 0; - int queue = 0; - - rfd = kmem_zalloc(sizeof (*rfd), KM_SLEEP); - if (!rfd) - return (ENOMEM); - - /* - * Take config lock to prevent a race with the - * (de)configuration code. - */ - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_enabled(path, 0); - if (index < 0) { - index = rdc_lookup_bitmap(path); - if (index >= 0) - bmp = 1; - } - if (index < 0) { - index = rdc_lookup_diskq(path); - if (index >= 0) - queue = 1; - } - if (index < 0) { - /* not found in config */ - mutex_exit(&rdc_conf_lock); - kmem_free(rfd, sizeof (*rfd)); - return (ENXIO); - } -#ifdef DEBUG - urdc = &rdc_u_info[index]; -#endif - krdc = &rdc_k_info[index]; - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - ASSERT(IS_ENABLED(urdc)); - - if (bmp) { - krdc->b_ref++; - } else if (raw) { - krdc->r_ref++; - } else if (!queue) { - krdc->c_ref++; - } - - rfd->rdc_info = krdc; - if (bmp) - rfd->rdc_type = RDC_BMP; - else if (queue) - rfd->rdc_type = RDC_QUE; - else - rfd->rdc_oflags = flag; - - rdc_group_exit(krdc); - - *cdp = (blind_t)rfd; - - return (0); -} - -static int -_rdc_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_rdc_open(path, NSC_CACHE|flag, cdp, iodev)); -} - -static int -_rdc_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev) -{ - return (_rdc_open(path, NSC_DEVICE|flag, cdp, iodev)); -} - - -/* - * _rdc_close - * Close a device - * - * Calling/Exit State: - * Always succeeds - returns 0 - * - * Description: - * Performs the housekeeping operations associated with an upper layer - * of the sd stack closing a shadowed device. - */ - -static int -_rdc_close(rfd) -rdc_fd_t *rfd; -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int bmp = RDC_IS_BMP(rfd); - int raw = RDC_IS_RAW(rfd); - int queue = RDC_IS_QUE(rfd); - - /* - * we don't keep ref counts for the queue, so skip this stuff. - * we may not even have a valid krdc at this point - */ - if (queue) - goto queue; - rdc_group_enter(krdc); - - if (bmp) { - krdc->b_ref--; - } else if (raw && !queue) { - krdc->r_ref--; - } else if (!queue) { - krdc->c_ref--; - } - - if (krdc->closing) { - cv_broadcast(&krdc->closingcv); - } - - rdc_group_exit(krdc); -queue: - kmem_free(rfd, sizeof (*rfd)); - return (0); -} - -/* - * _rdc_alloc_handle - * Allocate a handle - * - */ - -static nsc_buf_t * -_rdc_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), rdc_fd_t *rfd) -{ - rdc_buf_t *h; - - h = kmem_zalloc(sizeof (*h), KM_SLEEP); - if (!h) - return (NULL); - - h->rdc_bufp = nsc_alloc_handle(RDC_FD(rfd), d_cb, r_cb, w_cb); - if (!h->rdc_bufp) { - if (!IS_RFAILED(rfd->rdc_info)) { - /* - * This is a real failure from the io provider below. - */ - kmem_free(h, sizeof (*h)); - return (NULL); - } else { - /* EMPTY */ - /* - * This is just a failed primary device where - * we can do remote io to the secondary. - */ - } - } - - h->rdc_bufh.sb_flag = NSC_HALLOCATED; - h->rdc_fd = rfd; - mutex_init(&h->aio_lock, NULL, MUTEX_DRIVER, NULL); - - return (&h->rdc_bufh); -} - - -/* - * _rdc_free_handle - * Free a handle - * - */ - -/* ARGSUSED */ -static int -_rdc_free_handle(rdc_buf_t *h, rdc_fd_t *rfd) -{ - int rc; - - mutex_destroy(&h->aio_lock); - if (h->rdc_bufp) { - rc = nsc_free_handle(h->rdc_bufp); - if (!RDC_SUCCESS(rc)) - return (rc); - } - kmem_free(h, sizeof (rdc_buf_t)); - return (0); -} - - -/* - * _rdc_attach - * Attach - * - * Calling/Exit State: - * Returns 0 for success, errno on failure. - * - * Description: - */ - -static int -_rdc_attach(rdc_fd_t *rfd, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc; - int raw = RDC_IS_RAW(rfd); - int rc; - - if ((RDC_IS_BMP(rfd)) || RDC_IS_QUE(rfd)) - return (EINVAL); - - krdc = rfd->rdc_info; - if (krdc == NULL) - return (EINVAL); - - mutex_enter(&krdc->devices->id_rlock); - krdc->iodev = iodev; - mutex_exit(&krdc->devices->id_rlock); - - rc = _rdc_rsrv_devs(krdc, (raw ? RDC_RAW : RDC_CACHE), RDC_EXTERNAL); - return (rc); -} - - -/* - * _rdc_detach - * Detach - * - * Calling/Exit State: - * Returns 0 for success, always succeeds - * - * Description: - */ - -static int -_rdc_detach(rdc_fd_t *rfd, nsc_iodev_t *iodev) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int raw = RDC_IS_RAW(rfd); - - /* - * Flush the async queue if necessary. - */ - - if (IS_ASYNC(&rdc_u_info[krdc->index]) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 1; - - while (krdc->group->ra_queue.blocks != 0 && tries--) { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - } - - /* force disgard of possibly blocked flusher threads */ - if (rdc_drain_queue(krdc->index) != 0) { -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!RDC: async I/O pending and not drained " - "for %s during detach", - rdc_u_info[krdc->index].primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&krdc->devices->id_rlock); - if (krdc->iodev != iodev) - cmn_err(CE_WARN, "!_rdc_detach: iodev mismatch %p : %p", - (void *) krdc->iodev, (void *) iodev); - - krdc->iodev = NULL; - mutex_exit(&krdc->devices->id_rlock); - - _rdc_rlse_devs(krdc, (raw ? RDC_RAW : RDC_CACHE)); - - return (0); -} - -/* - * _rdc_get_pinned - * - * only affects local node. - */ - -static int -_rdc_get_pinned(rdc_fd_t *rfd) -{ - return (nsc_get_pinned(RDC_FD(rfd))); -} - -/* - * _rdc_discard_pinned - * - * only affects local node. - */ - -static int -_rdc_discard_pinned(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len) -{ - return (nsc_discard_pinned(RDC_FD(rfd), pos, len)); -} - -/* - * _rdc_partsize - * - * only affects the local node. - */ - -static int -_rdc_partsize(rdc_fd_t *rfd, nsc_size_t *ptr) -{ - rdc_u_info_t *urdc; - - urdc = &rdc_u_info[rfd->rdc_info->index]; - /* Always return saved size */ - ASSERT(urdc->volume_size != 0); - *ptr = urdc->volume_size; - return (0); -} - -/* - * _rdc_maxfbas - * - * only affects local node - */ - -/* ARGSUSED */ -static int -_rdc_maxfbas(rdc_fd_t *rfd, int flag, nsc_size_t *ptr) -{ - rdc_k_info_t *krdc = rfd->rdc_info; - int raw = RDC_IS_RAW(rfd); - int rtype = raw ? RDC_RAW : RDC_CACHE; - int rc = 0; - - if (krdc == NULL) - return (EINVAL); - if (flag == NSC_RDAHEAD || flag == NSC_CACHEBLK) { - rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (rc == 0) { - rc = nsc_maxfbas(RDC_U_FD(krdc), flag, ptr); - _rdc_rlse_devs(krdc, rtype); - } - } else { - /* Always return saved size */ - ASSERT(krdc->maxfbas != 0); - *ptr = krdc->maxfbas - 1; - } - - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_control(rdc_fd_t *rfd, int cmd, void *ptr, int len) -{ - return (nsc_control(RDC_FD(rfd), cmd, ptr, len)); -} - -/* - * _rdc_attach_fd - * - * called by nsctl as part of nsc_reserve() processing when one of - * SNDR's underlying file descriptors becomes available and metadata - * should be re-acquired. - */ -static int -_rdc_attach_fd(blind_t arg) -{ - _rdc_info_dev_t *dip = (_rdc_info_dev_t *)arg; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - nsc_size_t maxfbas, partsize; - int rc; - - krdc = dip->bi_krdc; - urdc = &rdc_u_info[krdc->index]; - - if ((rc = nsc_partsize(dip->bi_fd, &partsize)) != 0) { - cmn_err(CE_WARN, - "!SNDR: cannot get volume size of %s, error %d", - nsc_pathname(dip->bi_fd), rc); - } else if (urdc->volume_size == 0 && partsize > 0) { - /* set volume size for the first time */ - urdc->volume_size = partsize; - } else if (urdc->volume_size != partsize) { - /* - * SNDR cannot yet cope with a volume being resized, - * so fail it. - */ - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - rdc_set_mflags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "volume resized"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - - cmn_err(CE_WARN, - "!SNDR: %s changed size from %" NSC_SZFMT " to %" NSC_SZFMT, - nsc_pathname(dip->bi_fd), urdc->volume_size, partsize); - } - - if ((rc = nsc_maxfbas(dip->bi_fd, 0, &maxfbas)) != 0) { - cmn_err(CE_WARN, - "!SNDR: cannot get max transfer size for %s, error %d", - nsc_pathname(dip->bi_fd), rc); - } else if (maxfbas > 0) { - krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas); - } - - return (0); -} - - -/* - * _rdc_pinned - * - * only affects local node - */ - -static void -_rdc_pinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - nsc_pinned_data(dip->bi_krdc->iodev, pos, len); -} - - -/* - * _rdc_unpinned - * - * only affects local node. - */ - -static void -_rdc_unpinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len) -{ - nsc_unpinned_data(dip->bi_krdc->iodev, pos, len); -} - - -/* - * _rdc_read - * - * read the specified data into the buffer - go remote if local down, - * or the remote end has more recent data because an reverse sync is - * in progress. - */ - -static int -_rdc_read(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int remote = (RDC_REMOTE(h) || (rdc_get_mflags(urdc) & RDC_SLAVE)); - int rc1, rc2; - - rc1 = rc2 = 0; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_read: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_read: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - - if (!remote) { - rc1 = nsc_read(h->rdc_bufp, pos, len, flag); - } - - if (remote || !RDC_SUCCESS(rc1)) { - rc2 = _rdc_remote_read(krdc, &h->rdc_bufh, pos, len, flag); - } - - if (remote && !RDC_SUCCESS(rc2)) - h->rdc_bufh.sb_error = rc2; - else if (!RDC_SUCCESS(rc1) && !RDC_SUCCESS(rc2)) - h->rdc_bufh.sb_error = rc1; - - return (h->rdc_bufh.sb_error); -} - - -static int -_rdc_remote_write(rdc_k_info_t *krdc, rdc_buf_t *h, nsc_buf_t *nsc_h, - nsc_off_t pos, nsc_size_t len, int flag, uint_t bitmask) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc = 0; - nsc_size_t plen, syncblockpos; - aio_buf_t *anon = NULL; - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) - return (EINVAL); - - if ((rdc_get_vflags(urdc) & RDC_LOGGING) && - (!IS_STATE(urdc, RDC_QUEUING))) { - goto done; - } - - /* - * this check for RDC_SYNCING may seem redundant, but there is a window - * in rdc_sync, where an async set has not yet been transformed into a - * sync set. - */ - if ((!IS_ASYNC(urdc) || IS_STATE(urdc, RDC_SYNCING)) || - RDC_REMOTE(h) || - krdc->group->synccount > 0 || - (rdc_get_vflags(urdc) & RDC_SLAVE) || - (rdc_get_vflags(urdc) & RDC_VOL_FAILED) || - (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) { - - /* sync mode, or remote io mode, or local device is dead */ - rc = rdc_net_write(krdc->index, krdc->remote_index, - nsc_h, pos, len, RDC_NOSEQ, RDC_NOQUE, NULL); - - if ((rc == 0) && - !(rdc_get_vflags(urdc) & RDC_BMP_FAILED) && - !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - if (IS_STATE(urdc, RDC_SYNCING) && - !IS_STATE(urdc, RDC_FULL) || - !IS_STATE(urdc, RDC_SLAVE)) { - mutex_enter(&krdc->syncbitmutex); - - syncblockpos = LOG_TO_FBA_NUM(krdc->syncbitpos); - - DTRACE_PROBE4(rdc_remote_write, - nsc_off_t, krdc->syncbitpos, - nsc_off_t, syncblockpos, - nsc_off_t, pos, - nsc_size_t, len); - - /* - * If the current I/O's position plus length is - * greater then the sync block position, only - * clear those blocks upto sync block position - */ - if (pos < syncblockpos) { - if ((pos + len) > syncblockpos) - plen = syncblockpos - pos; - else - plen = len; - RDC_CLR_BITMAP(krdc, pos, plen, bitmask, - RDC_BIT_BUMP); - } - mutex_exit(&krdc->syncbitmutex); - } else { - RDC_CLR_BITMAP(krdc, pos, len, bitmask, - RDC_BIT_BUMP); - } - } else if (rc != 0) { - rdc_group_enter(krdc); - rdc_set_flags_log(urdc, RDC_LOGGING, - "net write failed"); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_SYNCING) - krdc->disk_status = 1; - rdc_group_exit(krdc); - } - } else if (!IS_STATE(urdc, RDC_SYNCING)) { - DTRACE_PROBE1(async_enque_start, rdc_buf_t *, h); - - ASSERT(krdc->group->synccount == 0); - /* async mode */ - if ((h == NULL) || ((h->rdc_flags & RDC_ASYNC_VEC) == 0)) { - - rc = _rdc_enqueue_write(krdc, pos, len, flag, NULL); - - } else { - anon = rdc_aio_buf_get(h, krdc->index); - if (anon == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!enqueue write failed for handle %p", - (void *) h); -#endif - return (EINVAL); - } - rc = _rdc_enqueue_write(krdc, pos, len, flag, - anon->rdc_abufp); - - /* - * get rid of the aio_buf_t now, as this - * may not be the set that this rdc_buf - * was allocated on, we are done with it anyways - * enqueuing code frees the nsc_abuf - */ - rdc_aio_buf_del(h, krdc); - } - - } else { - ASSERT(IS_STATE(urdc, RDC_SYNCING)); - ASSERT(0); - } - -done: - if ((anon == NULL) && h && (h->rdc_flags & RDC_ASYNC_VEC)) { - /* - * Toss the anonymous buffer if we have one allocated. - */ - anon = rdc_aio_buf_get(h, krdc->index); - if (anon) { - (void) nsc_free_buf(anon->rdc_abufp); - rdc_aio_buf_del(h, krdc); - } - } - - return (rc); -} - -/* - * _rdc_multi_write - * - * Send to multihop remote. Obeys 1 to many if present and we are crazy - * enough to support it. - * - */ -int -_rdc_multi_write(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag, - rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this = krdc; /* krdc that was requested */ - int rc, retval; - uint_t bitmask; - - retval = rc = 0; - if (!RDC_HANDLE_LIMITS(h, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_multi_write: bounds check: io(handle) pos %" - NSC_XSZFMT "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" - NSC_XSZFMT ")", pos, h->sb_pos, len, h->sb_len); - return (EINVAL); - } - - /* if this is a 1 to many, set all the bits for all the sets */ - do { - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - (void) nsc_uncommit(h, pos, len, flag); - /* set the error, but try other sets */ - retval = EIO; - } - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - - urdc = &rdc_u_info[krdc->index]; - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_multi_write: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - -multiwrite1: - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (!IS_STATE(urdc, RDC_LOGGING) || - (IS_STATE(urdc, RDC_LOGGING) && - IS_STATE(urdc, RDC_QUEUING)))) { - rc = _rdc_remote_write(krdc, NULL, h, pos, len, flag, bitmask); - } - - if (!RDC_SUCCESS(rc) && retval == 0) { - retval = rc; - } - -multiwrite2: - if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc = 0; - rdc_many_exit(krdc); - - goto multiwrite1; - } - rdc_many_exit(krdc); - } - - return (retval); -} - -void -_rdc_diskq_enqueue_thr(rdc_aio_t *p) -{ - rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next; - rdc_k_info_t *krdc = &rdc_k_info[p->index]; - int rc2; - - - rc2 = rdc_diskq_enqueue(krdc, p); - - /* - * overload flag with error return if any - */ - if (!RDC_SUCCESS(rc2)) { - p->flag = rc2; - } else { - p->flag = 0; - } - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -/* - * _rdc_sync_write_thr - * syncronous write thread which writes to network while - * local write is occuring - */ -void -_rdc_sync_write_thr(rdc_aio_t *p) -{ - rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next; - rdc_buf_t *h = (rdc_buf_t *)p->handle; - rdc_k_info_t *krdc = &rdc_k_info[p->index]; -#ifdef DEBUG - rdc_u_info_t *urdc; -#endif - int rc2; - int bitmask; - - rdc_group_enter(krdc); - krdc->aux_state |= RDC_AUXWRITE; -#ifdef DEBUG - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) { - cmn_err(CE_WARN, "!rdc_sync_write_thr: set not enabled %s:%s", - urdc->secondary.file, - urdc->secondary.bitmap); - } -#endif - rdc_group_exit(krdc); - bitmask = p->iostatus; /* overload */ - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, p->pos, p->len, - p->flag, bitmask); - - - /* - * overload flag with error return if any - */ - if (!RDC_SUCCESS(rc2)) { - p->flag = rc2; - } else { - p->flag = 0; - } - - rdc_group_enter(krdc); - krdc->aux_state &= ~RDC_AUXWRITE; - rdc_group_exit(krdc); - - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -/* - * _rdc_write - * - * Commit changes to the buffer locally and send remote. - * - * If this write is whilst the local primary volume is being synced, - * then we write the remote end first to ensure that the new data - * cannot be overwritten by a concurrent sync operation. - */ - -static int -_rdc_write(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this; - rdc_k_info_t *multi = NULL; - int remote = RDC_REMOTE(h); - int rc1, rc2; - uint_t bitmask; - int first; - int rsync; - int nthr; - int winddown; - int thrrc = 0; - rdc_aio_t *bp[SNDR_MAXTHREADS]; - aio_buf_t *anon; - nsthread_t *tp; - rdc_thrsync_t *sync = &h->rdc_sync; - - /* If this is the multi-hop secondary, move along to the primary */ - if (IS_MULTI(krdc) && !IS_PRIMARY(urdc)) { - multi = krdc; - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - krdc = h->rdc_fd->rdc_info; - urdc = &rdc_u_info[krdc->index]; - multi = NULL; - } - } - this = krdc; - - rsync = (IS_PRIMARY(urdc)) && (IS_SLAVE(urdc)); - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io to the correct secondary - * before the local io. - */ - if (rsync && !(IS_SLAVE(urdc))) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - - urdc = &rdc_u_info[krdc->index]; - - rc1 = rc2 = 0; - first = 1; - nthr = 0; - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_write: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - DTRACE_PROBE(rdc_write_bitmap_start); - - /* if this is a 1 to many, set all the bits for all the sets */ - do { - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - if (rdc_eio_nobmp) { - (void) nsc_uncommit - (h->rdc_bufp, pos, len, flag); - /* set the error, but try the other sets */ - h->rdc_bufh.sb_error = EIO; - } - } - - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - - } while (krdc != this); - - urdc = &rdc_u_info[krdc->index]; - - DTRACE_PROBE(rdc_write_bitmap_end); - -write1: - /* just in case we switch mode during write */ - if (IS_ASYNC(urdc) && (!IS_STATE(urdc, RDC_SYNCING)) && - (!IS_STATE(urdc, RDC_LOGGING) || - IS_STATE(urdc, RDC_QUEUING))) { - h->rdc_flags |= RDC_ASYNC_BUF; - } - if (BUF_IS_ASYNC(h)) { - /* - * We are async mode - */ - aio_buf_t *p; - DTRACE_PROBE(rdc_write_async_start); - - if ((krdc->type_flag & RDC_DISABLEPEND) || - ((IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING)))) { - goto localwrite; - } - if (IS_STATE(urdc, RDC_VOL_FAILED)) { - /* - * overload remote as we don't want to do local - * IO later. forge ahead with async - */ - remote++; - } - if ((IS_STATE(urdc, RDC_SYNCING)) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - goto localwrite; - } - - p = rdc_aio_buf_add(krdc->index, h); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf aio_buf allocation failed"); -#endif - goto localwrite; - } - - mutex_enter(&h->aio_lock); - - DTRACE_PROBE(rdc_write_async__allocabuf_start); - rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp); - DTRACE_PROBE(rdc_write_async__allocabuf_end); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf NSC_ANON allocation failed rc %d", - rc1); -#endif - mutex_exit(&h->aio_lock); - goto localwrite; - } - h->rdc_flags |= RDC_ASYNC_VEC; - mutex_exit(&h->aio_lock); - - /* - * Copy buffer into anonymous buffer - */ - - DTRACE_PROBE(rdc_write_async_nsccopy_start); - rc1 = - nsc_copy(&h->rdc_bufh, p->rdc_abufp, pos, pos, len); - DTRACE_PROBE(rdc_write_async_nsccopy_end); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_write: nsc_copy failed rc=%d state %x", - rc1, rdc_get_vflags(urdc)); -#endif - rc1 = nsc_free_buf(p->rdc_abufp); - rdc_aio_buf_del(h, krdc); - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_FLUSH|RDC_OTHERREMOTE, - "nsc_copy failure"); - rdc_group_exit(krdc); - } - DTRACE_PROBE(rdc_write_async_end); - - /* - * using a diskq, launch a thread to queue it - * and free the aio->h and aio - * if the thread fails, do it the old way (see localwrite) - */ - - if (RDC_IS_DISKQ(krdc->group)) { - - if (nthr >= SNDR_MAXTHREADS) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!nthr overrun in _rdc_write"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - anon = rdc_aio_buf_get(h, krdc->index); - if (anon == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_aio_buf_get failed for " - "%p", (void *)h); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - /* get a populated rdc_aio_t */ - bp[nthr] = - rdc_aio_tbuf_get(sync, anon->rdc_abufp, pos, len, - flag, krdc->index, bitmask); - - if (bp[nthr] == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcwrite: " - "kmem_alloc failed bp aio (1)"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - /* start the queue io */ - tp = nst_create(_rdc_ioset, _rdc_diskq_enqueue_thr, - (void *)bp[nthr], NST_SLEEP); - - if (tp == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!_rdcwrite: nst_create failure"); -#endif - thrrc = ENOEXEC; - } else { - mutex_enter(&(sync->lock)); - sync->threads++; - mutex_exit(&(sync->lock)); - nthr++; - - } - /* - * the handle that is to be enqueued is now in - * the rdc_aio_t, and will be freed there. - * dump the aio_t now. If this is 1 to many - * we may not do this in _rdc_free_buf() - * if this was not the index that the rdc_buf_t - * was allocated on. - */ - rdc_aio_buf_del(h, krdc); - - } - } /* end of async */ - - /* - * We try to overlap local and network IO for the sync case - * (we already do it for async) - * If one to many, we need to track the resulting nst_thread - * so we don't trash the nsc_buf on a free - * Start network IO first then do local (sync only) - */ - - if (IS_PRIMARY(urdc) && !IS_STATE(urdc, RDC_LOGGING) && - !BUF_IS_ASYNC(h)) { - /* - * if forward syncing, we must do local IO first - * then remote io. Don't spawn thread - */ - if (!rsync && (IS_STATE(urdc, RDC_SYNCING))) { - thrrc = ENOEXEC; - goto localwrite; - } - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - if (nthr >= SNDR_MAXTHREADS) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!nthr overrun in _rdc_write"); -#endif - thrrc = ENOEXEC; - goto localwrite; - } - - bp[nthr] = rdc_aio_tbuf_get(sync, h, pos, len, - flag, krdc->index, bitmask); - - if (bp[nthr] == NULL) { - thrrc = ENOEXEC; - goto localwrite; - } - tp = nst_create(_rdc_ioset, _rdc_sync_write_thr, - (void *)bp[nthr], NST_SLEEP); - if (tp == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcwrite: nst_create failure"); -#endif - thrrc = ENOEXEC; - } else { - mutex_enter(&(sync->lock)); - sync->threads++; - mutex_exit(&(sync->lock)); - nthr++; - } - } -localwrite: - if (!remote && !rsync && first) { - DTRACE_PROBE(rdc_write_nscwrite_start); - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - DTRACE_PROBE(rdc_write_nscwrite_end); - if (!RDC_SUCCESS(rc1)) { - rdc_many_enter(krdc); - if (IS_PRIMARY(urdc)) - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - /* Secondary, so sync needed */ - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "local write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - /* - * This is where we either enqueue async IO for the flusher - * or do sync IO in the case of an error in thread creation - * or we are doing a forward sync - * NOTE: if we are async, and using a diskq, we have - * already enqueued this write. - * _rdc_remote_write will end up enqueuueing to memory, - * or in case of a thread creation error above, try again - * enqueue the diskq write if thrrc == ENOEXEC - */ - if ((IS_PRIMARY(urdc)) && (thrrc == ENOEXEC) || - (BUF_IS_ASYNC(h) && !RDC_IS_DISKQ(krdc->group))) { - thrrc = 0; - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - - DTRACE_PROBE(rdc_write_remote_start); - - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, - pos, len, flag, bitmask); - - DTRACE_PROBE(rdc_rdcwrite_remote_end); - } - - if (!RDC_SUCCESS(rc1)) { - if ((IS_PRIMARY(urdc)) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc1; - } - } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc2; - } -write2: - /* - * If one to many, jump back into the loop to continue IO - */ - if (IS_MANY(krdc) && (IS_PRIMARY(urdc))) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc2 = first = 0; - h->rdc_flags &= ~RDC_ASYNC_BUF; - rdc_many_exit(krdc); - goto write1; - } - rdc_many_exit(krdc); - } - urdc = &rdc_u_info[krdc->index]; - - /* - * collect all of our threads if any - */ - if (nthr) { - - mutex_enter(&(sync->lock)); - /* wait for the threads */ - while (sync->complete != sync->threads) { - cv_wait(&(sync->cv), &(sync->lock)); - } - mutex_exit(&(sync->lock)); - - /* collect status */ - - winddown = 0; - while (winddown < nthr) { - /* - * Get any error return from thread - */ - if ((remote || rsync) && bp[winddown]->flag) { - h->rdc_bufh.sb_error = bp[winddown]->flag; - } - if (bp[winddown]) - kmem_free(bp[winddown], sizeof (rdc_aio_t)); - winddown++; - } - } - - if (rsync && !(IS_STATE(urdc, RDC_VOL_FAILED))) { - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - /* rsync, so reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "rsync local write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - - /* - * only report the error if a remote error - * occurred as well. - */ - if (h->rdc_bufh.sb_error) - h->rdc_bufh.sb_error = rc1; - } - } - - if (multi) { - /* Multi-hop secondary, just set bits in the bitmap */ - (void) RDC_SET_BITMAP(multi, pos, len, &bitmask); - } - - return (h->rdc_bufh.sb_error); -} - - -static void -_rdc_bzero(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len) -{ - nsc_vec_t *v; - uchar_t *a; - size_t sz; - int l; - - if (!RDC_HANDLE_LIMITS(h, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_bzero: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->sb_pos, len, h->sb_len); - return; - } - - if (!len) - return; - - /* find starting point */ - - v = h->sb_vec; - pos -= h->sb_pos; - - for (; pos >= FBA_NUM(v->sv_len); v++) - pos -= FBA_NUM(v->sv_len); - - a = v->sv_addr + FBA_SIZE(pos); - l = v->sv_len - FBA_SIZE(pos); - - /* zero */ - - len = FBA_SIZE(len); /* convert to bytes */ - - while (len) { - if (!a) /* end of vec */ - break; - - sz = (size_t)min((nsc_size_t)l, len); - - bzero(a, sz); - - len -= sz; - l -= sz; - a += sz; - - if (!l) { - v++; - a = v->sv_addr; - l = v->sv_len; - } - } -} - - -/* - * _rdc_zero - * - * Zero and commit the specified area of the buffer. - * - * If this write is whilst the local primary volume is being synced, - * then we write the remote end first to ensure that the new data - * cannot be overwritten by a concurrent sync operation. - */ - -static int -_rdc_zero(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - rdc_k_info_t *krdc = h->rdc_fd->rdc_info; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *this; - rdc_k_info_t *multi = NULL; - int remote = RDC_REMOTE(h); - int rc1, rc2; - uint_t bitmask; - int first; - int rsync; - - /* If this is the multi-hop secondary, move along to the primary */ - if (IS_MULTI(krdc) && !(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - multi = krdc; - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - krdc = h->rdc_fd->rdc_info; - urdc = &rdc_u_info[krdc->index]; - multi = NULL; - } - } - this = krdc; - - rsync = ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_mflags(urdc) & RDC_SLAVE)); - - /* - * If this is a many group with a reverse sync in progress and - * this is not the slave krdc/urdc, then search for the slave - * so that we can do the remote io to the correct secondary - * before the local io. - */ - if (rsync && !(rdc_get_vflags(urdc) & RDC_SLAVE)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - break; - } - rdc_many_exit(krdc); - - this = krdc; - } - - rc1 = rc2 = 0; - first = 1; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_zero: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - -zero1: - if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) { - (void) nsc_uncommit(h->rdc_bufp, pos, len, flag); - h->rdc_bufh.sb_error = EIO; - goto zero2; - } - - if (IS_ASYNC(urdc)) { - /* - * We are async mode - */ - aio_buf_t *p; - - if ((krdc->type_flag & RDC_DISABLEPEND) || - (rdc_get_vflags(urdc) & RDC_LOGGING)) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto localzero; - } - - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || - (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto zero2; - } - if (rdc_get_vflags(urdc) & RDC_LOGGING) { - mutex_exit(&krdc->group->ra_queue.net_qlock); - goto localzero; - } - p = rdc_aio_buf_add(krdc->index, h); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf aio_buf allocation failed"); -#endif - goto localzero; - } - mutex_enter(&h->aio_lock); - rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_alloc_buf NSC_ANON allocation failed rc %d", - rc1); -#endif - mutex_exit(&h->aio_lock); - goto localzero; - } - h->rdc_flags |= RDC_ASYNC_VEC; - mutex_exit(&h->aio_lock); - - /* - * Copy buffer into anonymous buffer - */ - - rc1 = nsc_zero(p->rdc_abufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!_rdc_zero: nsc_zero failed rc=%d state %x", - rc1, rdc_get_vflags(urdc)); -#endif - rc1 = nsc_free_buf(p->rdc_abufp); - rdc_aio_buf_del(h, krdc); - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_FLUSH | RDC_OTHERREMOTE, - "nsc_zero failed"); - rdc_group_exit(krdc); - } - } /* end of async */ - -localzero: - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_zero: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - if (!remote && !rsync && first) { - rc1 = nsc_zero(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY); - rdc_many_enter(krdc); - /* Primary, so reverse sync needed */ - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_zero failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } - - /* - * send new data to remote end - nsc_zero has zero'd - * the data in the buffer, or _rdc_bzero will be used below. - */ - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (first && (remote || rsync || !RDC_SUCCESS(rc1))) { - /* bzero so that we can send new data to remote node */ - _rdc_bzero(&h->rdc_bufh, pos, len); - } - - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - multi = ktmp; - } - - rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, - pos, len, flag, bitmask); - } - - if (!RDC_SUCCESS(rc1)) { - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc1; - } - } else if ((remote || rsync) && !RDC_SUCCESS(rc2)) { - h->rdc_bufh.sb_error = rc2; - } - -zero2: - if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - rc2 = first = 0; - rdc_many_exit(krdc); - goto zero1; - } - rdc_many_exit(krdc); - } - - if (rsync && !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rc1 = nsc_write(h->rdc_bufp, pos, len, flag); - if (!RDC_SUCCESS(rc1)) { - /* rsync, so reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "nsc_write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - - /* - * only report the error if a remote error - * occurred as well. - */ - if (h->rdc_bufh.sb_error) - h->rdc_bufh.sb_error = rc1; - } - } - - if (multi) { - /* Multi-hop secondary, just set bits in the bitmap */ - (void) RDC_SET_BITMAP(multi, pos, len, &bitmask); - } - - return (h->rdc_bufh.sb_error); -} - - -/* - * _rdc_uncommit - * - refresh specified data region in the buffer to prevent the cache - * serving the scribbled on data back to another client. - * - * Only needs to happen on the local node. If in remote io mode, then - * just return 0 - we do not cache the data on the local node and the - * changed data will not have made it to the cache on the other node, - * so it has no need to uncommit. - */ - -static int -_rdc_uncommit(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag) -{ - int remote = RDC_REMOTE(h); - int rc = 0; - - if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) { - cmn_err(CE_WARN, - "!_rdc_uncommit: bounds check: io(handle) pos %" NSC_XSZFMT - "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")", - pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len); - h->rdc_bufh.sb_error = EINVAL; - return (h->rdc_bufh.sb_error); - } - - if (flag & NSC_NOBLOCK) { - cmn_err(CE_WARN, - "!_rdc_uncommit: removing unsupported NSC_NOBLOCK flag"); - flag &= ~(NSC_NOBLOCK); - } - - if (!remote) { - rc = nsc_uncommit(h->rdc_bufp, pos, len, flag); - } - - if (!RDC_SUCCESS(rc)) - h->rdc_bufh.sb_error = rc; - - return (rc); -} - - -/* - * _rdc_trksize - * - * only needs to happen on local node. - */ - -static int -_rdc_trksize(rdc_fd_t *rfd, nsc_size_t trksize) -{ - return (nsc_set_trksize(RDC_FD(rfd), trksize)); -} - - -static nsc_def_t _rdc_fd_def[] = { - "Attach", (uintptr_t)_rdc_attach_fd, 0, - "Pinned", (uintptr_t)_rdc_pinned, 0, - "Unpinned", (uintptr_t)_rdc_unpinned, 0, - 0, 0, 0 -}; - - -static nsc_def_t _rdc_io_def[] = { - "Open", (uintptr_t)_rdc_openc, 0, - "Close", (uintptr_t)_rdc_close, 0, - "Attach", (uintptr_t)_rdc_attach, 0, - "Detach", (uintptr_t)_rdc_detach, 0, - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0, - "FreeHandle", (uintptr_t)_rdc_free_handle, 0, - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0, - "FreeBuf", (uintptr_t)_rdc_free_buf, 0, - "GetPinned", (uintptr_t)_rdc_get_pinned, 0, - "Discard", (uintptr_t)_rdc_discard_pinned, 0, - "PartSize", (uintptr_t)_rdc_partsize, 0, - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0, - "Control", (uintptr_t)_rdc_control, 0, - "Read", (uintptr_t)_rdc_read, 0, - "Write", (uintptr_t)_rdc_write, 0, - "Zero", (uintptr_t)_rdc_zero, 0, - "Uncommit", (uintptr_t)_rdc_uncommit, 0, - "TrackSize", (uintptr_t)_rdc_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; - -static nsc_def_t _rdc_ior_def[] = { - "Open", (uintptr_t)_rdc_openr, 0, - "Close", (uintptr_t)_rdc_close, 0, - "Attach", (uintptr_t)_rdc_attach, 0, - "Detach", (uintptr_t)_rdc_detach, 0, - "AllocHandle", (uintptr_t)_rdc_alloc_handle, 0, - "FreeHandle", (uintptr_t)_rdc_free_handle, 0, - "AllocBuf", (uintptr_t)_rdc_alloc_buf, 0, - "FreeBuf", (uintptr_t)_rdc_free_buf, 0, - "GetPinned", (uintptr_t)_rdc_get_pinned, 0, - "Discard", (uintptr_t)_rdc_discard_pinned, 0, - "PartSize", (uintptr_t)_rdc_partsize, 0, - "MaxFbas", (uintptr_t)_rdc_maxfbas, 0, - "Control", (uintptr_t)_rdc_control, 0, - "Read", (uintptr_t)_rdc_read, 0, - "Write", (uintptr_t)_rdc_write, 0, - "Zero", (uintptr_t)_rdc_zero, 0, - "Uncommit", (uintptr_t)_rdc_uncommit, 0, - "TrackSize", (uintptr_t)_rdc_trksize, 0, - "Provide", 0, 0, - 0, 0, 0 -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c deleted file mode 100644 index b01866c9cc..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c +++ /dev/null @@ -1,3252 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/errno.h> - -#include "../solaris/nsc_thread.h" -#ifdef DS_DDICT -#include "../contract.h" -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/kmem.h> -#include <sys/ddi.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_diskq.h" -#include "rdc_clnt.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -extern nsc_io_t *_rdc_io_hc; - -int rdc_diskq_coalesce = 0; - -int -_rdc_rsrv_diskq(rdc_group_t *group) -{ - int rc = 0; - - mutex_enter(&group->diskqmutex); - if (group->diskqfd == NULL) { - mutex_exit(&group->diskqmutex); - return (EIO); - } else if ((group->diskqrsrv == 0) && - (rc = nsc_reserve(group->diskqfd, 0)) != 0) { - cmn_err(CE_WARN, - "!rdc: nsc_reserve(%s) failed %d\n", - nsc_pathname(group->diskqfd), rc); - } else { - group->diskqrsrv++; - } - - mutex_exit(&group->diskqmutex); - return (rc); -} - -void -_rdc_rlse_diskq(rdc_group_t *group) -{ - mutex_enter(&group->diskqmutex); - if (group->diskqrsrv > 0 && --group->diskqrsrv == 0) { - nsc_release(group->diskqfd); - } - mutex_exit(&group->diskqmutex); -} - -void -rdc_wait_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - while (q->busycnt > 0) - cv_wait(&q->busycv, QLOCK(q)); -} - -void -rdc_set_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - q->busycnt++; -} - -void -rdc_clr_qbusy(disk_queue *q) -{ - ASSERT(MUTEX_HELD(QLOCK(q))); - q->busycnt--; - if (q->busycnt == 0) - cv_broadcast(&q->busycv); -} - -int -rdc_lookup_diskq(char *pathname) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - if (!IS_ENABLED(urdc)) - continue; - - if (strncmp(pathname, urdc->disk_queue, - NSC_MAXPATH) == 0) - return (index); - } - - return (-1); -} - -void -rdc_unintercept_diskq(rdc_group_t *grp) -{ - if (!RDC_IS_DISKQ(grp)) - return; - if (grp->q_tok) - (void) nsc_unregister_path(grp->q_tok, 0); - grp->q_tok = NULL; -} - -void -rdc_close_diskq(rdc_group_t *grp) -{ - - if (grp == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_close_diskq: NULL group!"); -#endif - return; - } - - if (grp->diskqfd) { - if (nsc_close(grp->diskqfd) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_close on diskq failed"); -#else - ; - /*EMPTY*/ -#endif - } - grp->diskqfd = 0; - grp->diskqrsrv = 0; - } - bzero(&grp->diskq.disk_hdr, sizeof (diskq_header)); -} - -/* - * nsc_open the diskq and attach - * the nsc_fd to krdc->diskqfd - */ -int -rdc_open_diskq(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_group_t *grp; - int sts; - nsc_size_t size; - char *diskqname; - int mutexheld = 0; - - grp = krdc->group; - urdc = &rdc_u_info[krdc->index]; - - mutex_enter(&grp->diskqmutex); - mutexheld++; - if (urdc->disk_queue[0] == '\0') { - goto fail; - } - - diskqname = &urdc->disk_queue[0]; - - if (grp->diskqfd == NULL) { - grp->diskqfd = nsc_open(diskqname, - NSC_RDCHR_ID|NSC_DEVICE|NSC_WRITE, 0, 0, 0); - if (grp->diskqfd == NULL) { - cmn_err(CE_WARN, "!rdc_open_diskq: Unable to open %s", - diskqname); - goto fail; - } - } - if (!grp->q_tok) - grp->q_tok = nsc_register_path(urdc->disk_queue, - NSC_DEVICE | NSC_CACHE, _rdc_io_hc); - - grp->diskqrsrv = 0; /* init reserve count */ - - mutex_exit(&grp->diskqmutex); - mutexheld--; - /* just test a reserve release */ - sts = _rdc_rsrv_diskq(grp); - if (!RDC_SUCCESS(sts)) { - cmn_err(CE_WARN, "!rdc_open_diskq: Reserve failed for %s", - diskqname); - goto fail; - } - sts = nsc_partsize(grp->diskqfd, &size); - _rdc_rlse_diskq(grp); - - if ((sts == 0) && (size < 1)) { - rdc_unintercept_diskq(grp); - rdc_close_diskq(grp); - goto fail; - } - - return (0); - -fail: - bzero(&urdc->disk_queue, NSC_MAXPATH); - if (mutexheld) - mutex_exit(&grp->diskqmutex); - return (-1); - -} - -/* - * rdc_count_vecs - * simply vec++'s until sb_addr is null - * returns number of vectors encountered - */ -int -rdc_count_vecs(nsc_vec_t *vec) -{ - nsc_vec_t *vecp; - int i = 0; - vecp = vec; - while (vecp->sv_addr) { - vecp++; - i++; - } - return (i+1); -} -/* - * rdc_setid2idx - * given setid, return index - */ -int -rdc_setid2idx(int setid) -{ - - int index = 0; - - for (index = 0; index < rdc_max_sets; index++) { - if (rdc_u_info[index].setid == setid) - break; - } - if (index >= rdc_max_sets) - index = -1; - return (index); -} - -/* - * rdc_idx2setid - * given an index, return its setid - */ -int -rdc_idx2setid(int index) -{ - return (rdc_u_info[index].setid); -} - -/* - * rdc_fill_ioheader - * fill in all the stuff you want to save on disk - * at the beginnig of each queued write - */ -void -rdc_fill_ioheader(rdc_aio_t *aio, io_hdr *hd, int qpos) -{ - ASSERT(MUTEX_HELD(&rdc_k_info[aio->index].group->diskq.disk_qlock)); - - hd->dat.magic = RDC_IOHDR_MAGIC; - hd->dat.type = RDC_QUEUEIO; - hd->dat.pos = aio->pos; - hd->dat.hpos = aio->pos; - hd->dat.qpos = qpos; - hd->dat.len = aio->len; - hd->dat.flag = aio->flag; - hd->dat.iostatus = aio->iostatus; - hd->dat.setid = rdc_idx2setid(aio->index); - hd->dat.time = nsc_time(); - if (!aio->handle) - hd->dat.flag |= RDC_NULL_BUF; /* no real data to queue */ -} - -/* - * rdc_dump_iohdrs - * give back the iohdr list - * and clear out q->lastio - */ -void -rdc_dump_iohdrs(disk_queue *q) -{ - io_hdr *p, *r; - - ASSERT(MUTEX_HELD(QLOCK(q))); - - p = q->iohdrs; - while (p) { - r = p->dat.next; - kmem_free(p, sizeof (*p)); - q->hdrcnt--; - p = r; - } - q->iohdrs = q->hdr_last = NULL; - q->hdrcnt = 0; - if (q->lastio->handle) - (void) nsc_free_buf(q->lastio->handle); - bzero(&(*q->lastio), sizeof (*q->lastio)); -} - -/* - * rdc_fail_diskq - * set flags, throw away q info - * clean up what you can - * wait for flusher threads to stop (taking into account this may be one) - * takes group_lock, so conf, many, and bitmap may not be held - */ -void -rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int flag) -{ - rdc_k_info_t *p; - rdc_u_info_t *q = &rdc_u_info[krdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *dq = &krdc->group->diskq; - - if (IS_STATE(q, RDC_DISKQ_FAILED)) - return; - - if (!(flag & RDC_NOFAIL)) - cmn_err(CE_WARN, "!disk queue %s failure", q->disk_queue); - - if (flag & RDC_DOLOG) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_ALLREMOTE, - "disk queue failed"); - rdc_group_exit(krdc); - } - mutex_enter(QHEADLOCK(dq)); - mutex_enter(QLOCK(dq)); - /* - * quick stop of the flushers - * other cleanup is done on the un-failing of the diskq - */ - SET_QHEAD(dq, RDC_DISKQ_DATA_OFF); - SET_QTAIL(dq, RDC_DISKQ_DATA_OFF); - SET_QNXTIO(dq, RDC_DISKQ_DATA_OFF); - SET_LASTQTAIL(dq, 0); - - rdc_dump_iohdrs(dq); - - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - - bzero(krdc->bitmap_ref, krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - - if (flag & RDC_DOLOG) /* otherwise, we already have the conf lock */ - rdc_group_enter(krdc); - - else if (!(flag & RDC_GROUP_LOCKED)) - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (!(flag & RDC_NOFAIL)) { - rdc_set_flags(q, RDC_DISKQ_FAILED); - } - rdc_clr_flags(q, RDC_QUEUING); - - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - if (!(flag & RDC_NOFAIL)) { - rdc_set_flags(q, RDC_DISKQ_FAILED); - } - rdc_clr_flags(q, RDC_QUEUING); - bzero(p->bitmap_ref, p->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - /* RDC_QUEUING is cleared in group_log() */ - } - - if (flag & RDC_DOLOG) - rdc_group_exit(krdc); - - /* can't wait for myself to go away, I'm a flusher */ - if (wait & RDC_WAIT) - while (group->rdc_thrnum) - delay(2); - -} - -/* - * rdc_stamp_diskq - * write out diskq header info - * must have disk_qlock held - * if rsrvd flag is 0, the nsc_reserve is done - */ -int -rdc_stamp_diskq(rdc_k_info_t *krdc, int rsrvd, int failflags) -{ - nsc_vec_t vec[2]; - nsc_buf_t *head = NULL; - rdc_group_t *grp; - rdc_u_info_t *urdc; - disk_queue *q; - int rc, flags; - - grp = krdc->group; - q = &krdc->group->diskq; - - ASSERT(MUTEX_HELD(&q->disk_qlock)); - - urdc = &rdc_u_info[krdc->index]; - - if (!rsrvd && _rdc_rsrv_diskq(grp)) { - cmn_err(CE_WARN, "!rdc_stamp_diskq: %s reserve failed", - urdc->disk_queue); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - flags = NSC_WRITE | NSC_NOCACHE | NSC_NODATA; - rc = nsc_alloc_buf(grp->diskqfd, 0, 1, flags, &head); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!Alloc buf failed for disk queue %s", - &urdc->disk_queue[0]); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - vec[0].sv_len = FBA_SIZE(1); - vec[0].sv_addr = (uchar_t *)&q->disk_hdr; - vec[1].sv_len = 0; - vec[1].sv_addr = NULL; - - head->sb_vec = &vec[0]; - -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, "!rdc_stamp_diskq: hdr: %p magic: %x state: " - "%x head: %d tail: %d size: %d nitems: %d blocks: %d", - q, QMAGIC(q), QSTATE(q), QHEAD(q), - QTAIL(q), QSIZE(q), QNITEMS(q), QBLOCKS(q)); -#endif - - rc = nsc_write(head, 0, 1, 0); - - if (!RDC_SUCCESS(rc)) { - if (!rsrvd) - _rdc_rlse_diskq(grp); - cmn_err(CE_CONT, "!disk queue %s failed rc %d", - &urdc->disk_queue[0], rc); - mutex_exit(QLOCK(q)); - rdc_fail_diskq(krdc, RDC_NOWAIT, failflags); - mutex_enter(QLOCK(q)); - return (-1); - } - - (void) nsc_free_buf(head); - if (!rsrvd) - _rdc_rlse_diskq(grp); - - return (0); -} - -/* - * rdc_init_diskq_header - * load initial values into the header - */ -void -rdc_init_diskq_header(rdc_group_t *grp, dqheader *header) -{ - int rc; - int type = 0; - disk_queue *q = &grp->diskq; - - ASSERT(MUTEX_HELD(QLOCK(q))); - - /* save q type if this is a failure */ - if (QSTATE(q) & RDC_QNOBLOCK) - type = RDC_QNOBLOCK; - bzero(header, sizeof (*header)); - header->h.magic = RDC_DISKQ_MAGIC; - header->h.vers = RDC_DISKQ_VERS; - header->h.state |= (RDC_SHUTDOWN_BAD|type); /* SHUTDOWN_OK on suspend */ - header->h.head_offset = RDC_DISKQ_DATA_OFF; - header->h.tail_offset = RDC_DISKQ_DATA_OFF; - header->h.nitems = 0; - header->h.blocks = 0; - header->h.qwrap = 0; - SET_QNXTIO(q, QHEAD(q)); - SET_QCOALBOUNDS(q, RDC_DISKQ_DATA_OFF); - - /* do this last, as this might be a failure. get the kernel state ok */ - rc = _rdc_rsrv_diskq(grp); - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!init_diskq_hdr: Reserve failed for queue"); - return; - } - (void) nsc_partsize(grp->diskqfd, &header->h.disk_size); - _rdc_rlse_diskq(grp); - -} - -/* - * rdc_unfail_diskq - * the diskq failed for some reason, lets try and re-start it - * the old stuff has already been thrown away - * should just be called from rdc_sync - */ -void -rdc_unfail_diskq(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_u_info_t *q = &rdc_u_info[krdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *dq = &group->diskq; - - rdc_group_enter(krdc); - rdc_clr_flags(q, RDC_ASYNC); - /* someone else won the race... */ - if (!IS_STATE(q, RDC_DISKQ_FAILED)) { - rdc_group_exit(krdc); - return; - } - rdc_clr_flags(q, RDC_DISKQ_FAILED); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_clr_flags(q, RDC_DISKQ_FAILED); - rdc_clr_flags(q, RDC_ASYNC); - if (IS_STATE(q, RDC_QUEUING)) - rdc_clr_flags(q, RDC_QUEUING); - } - rdc_group_exit(krdc); - - mutex_enter(QLOCK(dq)); - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - /* real i/o to the queue */ - /* clear RDC_AUXSYNCIP because we cannot halt a sync that's not here */ - krdc->aux_state &= ~RDC_AUXSYNCIP; - if (rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED | RDC_DOLOG) < 0) { - mutex_exit(QLOCK(dq)); - goto fail; - } - - SET_QNXTIO(dq, QHEAD(dq)); - SET_QHDRCNT(dq, 0); - SET_QSTATE(dq, RDC_SHUTDOWN_BAD); /* only suspend can write good */ - dq->iohdrs = NULL; - dq->hdr_last = NULL; - - /* should be none, but.. */ - rdc_dump_iohdrs(dq); - - mutex_exit(QLOCK(dq)); - - -fail: - krdc->aux_state |= RDC_AUXSYNCIP; - return; - -} - -int -rdc_read_diskq_header(rdc_k_info_t *krdc) -{ - int rc; - diskq_header *header; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - if (krdc->group->diskqfd == NULL) { - char buf[NSC_MAXPATH]; - (void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf, - &urdc->secondary.intf[0]); - cmn_err(CE_WARN, "!Disk Queue Header read failed for %s", - urdc->group_name[0] == '\0' ? buf: - &urdc->group_name[0]); - return (-1); - } - - header = &krdc->group->diskq.disk_hdr.h; - if (_rdc_rsrv_diskq(krdc->group)) { - return (-1); - } - - rc = rdc_ns_io(krdc->group->diskqfd, NSC_RDBUF, 0, - (uchar_t *)header, sizeof (diskq_header)); - - _rdc_rlse_diskq(krdc->group); - - if (!RDC_SUCCESS(rc)) { - char buf[NSC_MAXPATH]; - (void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf, - &urdc->secondary.file[0]); - cmn_err(CE_WARN, "!Disk Queue Header read failed(%d) for %s", - rc, urdc->group_name[0] == '\0' ? buf : - &urdc->group_name[0]); - return (-1); - } - return (0); -} - -/* - * rdc_stop_diskq_flusher - */ -void -rdc_stop_diskq_flusher(rdc_k_info_t *krdc) -{ - disk_queue q, *qp; - rdc_group_t *group; -#ifdef DEBUG - cmn_err(CE_NOTE, "!stopping flusher threads"); -#endif - group = krdc->group; - qp = &krdc->group->diskq; - - /* save the queue info */ - q = *qp; - - /* lie a little */ - SET_QTAIL(qp, RDC_DISKQ_DATA_OFF); - SET_QHEAD(qp, RDC_DISKQ_DATA_OFF); - SET_QSTATE(qp, RDC_QDISABLEPEND); - SET_QSTATE(qp, RDC_STOPPINGFLUSH); - - /* drop locks to allow flushers to die */ - mutex_exit(QLOCK(qp)); - mutex_exit(QHEADLOCK(qp)); - rdc_group_exit(krdc); - - while (group->rdc_thrnum) - delay(2); - - rdc_group_enter(krdc); - mutex_enter(QHEADLOCK(qp)); - mutex_enter(QLOCK(qp)); - - CLR_QSTATE(qp, RDC_STOPPINGFLUSH); - *qp = q; -} - -/* - * rdc_enable_diskq - * open the diskq - * and stamp the header onto it. - */ -int -rdc_enable_diskq(rdc_k_info_t *krdc) -{ - rdc_group_t *group; - disk_queue *q; - - group = krdc->group; - q = &group->diskq; - - if (rdc_open_diskq(krdc) < 0) - goto fail; - - mutex_enter(QLOCK(q)); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) { - mutex_exit(QLOCK(q)); - goto fail; - } - - SET_QNXTIO(q, QHEAD(q)); - - mutex_exit(QLOCK(q)); - return (0); - -fail: - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - - /* caller has to fail diskq after dropping conf & many locks */ - return (RDC_EQNOADD); -} - -/* - * rdc_resume_diskq - * open the diskq and read the header - */ -int -rdc_resume_diskq(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_group_t *group; - disk_queue *q; - int rc = 0; - - urdc = &rdc_u_info[krdc->index]; - group = krdc->group; - q = &group->diskq; - - if (rdc_open_diskq(krdc) < 0) { - rc = RDC_EQNOADD; - goto fail; - } - - mutex_enter(QLOCK(q)); - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - - if (rdc_read_diskq_header(krdc) < 0) { - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - - /* check diskq magic number */ - if (QMAGIC(q) != RDC_DISKQ_MAGIC) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " incorrect magic number in header", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } else switch (QVERS(q)) { - diskq_header1 h1; /* version 1 header */ - diskq_header *hc; /* current header */ - -#ifdef NSC_MULTI_TERABYTE - case RDC_DISKQ_VER_ORIG: - /* version 1 diskq header, upgrade to 64bit version */ - h1 = *(diskq_header1 *)(&group->diskq.disk_hdr.h); - hc = &group->diskq.disk_hdr.h; - - cmn_err(CE_WARN, "!SNDR: old version header for diskq %s," - " upgrading to current version", urdc->disk_queue); - hc->vers = RDC_DISKQ_VERS; - hc->state = h1.state; - hc->head_offset = h1.head_offset; - hc->tail_offset = h1.tail_offset; - hc->disk_size = h1.disk_size; - hc->nitems = h1.nitems; - hc->blocks = h1.blocks; - hc->qwrap = h1.qwrap; - hc->auxqwrap = h1.auxqwrap; - hc->seq_last = h1.seq_last; - hc->ack_last = h1.ack_last; - - if (hc->nitems > 0) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " old version Q contains data", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - break; -#else - case RDC_DISKQ_VER_64BIT: - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " diskq header newer than current version", - urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - break; -#endif - case RDC_DISKQ_VERS: - /* okay, current version diskq */ - break; - default: - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " unknown diskq header version", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - break; - } - if (IS_QSTATE(q, RDC_SHUTDOWN_BAD)) { - cmn_err(CE_WARN, "!SNDR: unable to resume diskq %s," - " unsafe shutdown", urdc->disk_queue); - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QSTATE(q, RDC_QBADRESUME); - rc = RDC_EQNOADD; - } - - CLR_QSTATE(q, RDC_SHUTDOWN_OK); - SET_QSTATE(q, RDC_SHUTDOWN_BAD); - - /* bad, until proven not bad */ - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) { - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_NOLOG); - rc = RDC_EQNOADD; - } - - SET_QNXTIO(q, QHEAD(q)); - group->diskq.nitems_hwm = QNITEMS(q); - group->diskq.blocks_hwm = QBLOCKS(q); - - mutex_exit(QLOCK(q)); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_resume_diskq: resuming diskq %s \n", - urdc->disk_queue); - cmn_err(CE_NOTE, "!qinfo: " QDISPLAY(q)); -#endif - if (rc == 0) - return (0); - -fail: - - /* caller has to set the diskq failed after dropping it's locks */ - return (rc); - -} - -int -rdc_suspend_diskq(rdc_k_info_t *krdc) -{ - int rc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - disk_queue *q; - - q = &krdc->group->diskq; - - /* grab both diskq locks as we are going to kill the flusher */ - mutex_enter(QHEADLOCK(q)); - mutex_enter(QLOCK(q)); - - if ((krdc->group->rdc_thrnum) && (!IS_QSTATE(q, RDC_STOPPINGFLUSH))) { - SET_QSTATE(q, RDC_STOPPINGFLUSH); - rdc_stop_diskq_flusher(krdc); - CLR_QSTATE(q, RDC_STOPPINGFLUSH); - } - - krdc->group->diskq.disk_hdr.h.state &= ~RDC_SHUTDOWN_BAD; - krdc->group->diskq.disk_hdr.h.state |= RDC_SHUTDOWN_OK; - krdc->group->diskq.disk_hdr.h.state &= ~RDC_QBADRESUME; - - /* let's make sure that the flusher has stopped.. */ - if (krdc->group->rdc_thrnum) { - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - rdc_group_exit(krdc); - - while (krdc->group->rdc_thrnum) - delay(5); - - rdc_group_enter(krdc); - mutex_enter(QLOCK(q)); - mutex_enter(QHEADLOCK(q)); - } - /* write refcount to the bitmap */ - if ((rc = rdc_write_refcount(krdc)) < 0) { - rdc_group_exit(krdc); - goto fail; - } - - if (!QEMPTY(q)) { - rdc_set_flags(urdc, RDC_QUEUING); - } else { - rdc_clr_flags(urdc, RDC_QUEUING); - } - - /* fill in diskq header info */ - krdc->group->diskq.disk_hdr.h.state &= ~RDC_QDISABLEPEND; - -#ifdef DEBUG - cmn_err(CE_NOTE, "!suspending disk queue\n" QDISPLAY(q)); -#endif - - /* to avoid a possible deadlock, release in order, and reacquire */ - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - - if (krdc->group->count > 1) { - rdc_group_exit(krdc); - goto fail; /* just stamp on the last suspend */ - } - rdc_group_exit(krdc); /* in case this stamp fails */ - mutex_enter(QLOCK(q)); - - rc = rdc_stamp_diskq(krdc, 0, RDC_NOLOG); - - mutex_exit(QLOCK(q)); - -fail: - rdc_group_enter(krdc); - - /* diskq already failed if stamp failed */ - - return (rc); -} - -/* - * copy orig aio to copy, including the nsc_buf_t - */ -int -rdc_dup_aio(rdc_aio_t *orig, rdc_aio_t *copy) -{ - int rc; - bcopy(orig, copy, sizeof (*orig)); - copy->handle = NULL; - - if (orig->handle == NULL) /* no buf to alloc/copy */ - return (0); - - rc = nsc_alloc_abuf(orig->pos, orig->len, 0, ©->handle); - if (!RDC_SUCCESS(rc)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dup_aio: alloc_buf failed (%d)", rc); -#endif - return (rc); - } - rc = nsc_copy(orig->handle, copy->handle, orig->pos, - orig->pos, orig->len); - if (!RDC_SUCCESS(rc)) { - (void) nsc_free_buf(copy->handle); -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_dup_aio: copy buf failed (%d)", rc); -#endif - return (rc); - } - return (0); -} - -/* - * rdc_qfill_shldwakeup() - * 0 if the memory queue has filled, and the low water - * mark has not been reached. 0 if diskq is empty. - * 1 if less than low water mark - * net_queue mutex is already held - */ -int -rdc_qfill_shldwakeup(rdc_k_info_t *krdc) -{ - rdc_group_t *group = krdc->group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - net_queue *nq = &group->ra_queue; - disk_queue *dq = &group->diskq; - - ASSERT(MUTEX_HELD(&nq->net_qlock)); - - if (!RDC_IS_DISKQ(krdc->group)) - return (0); - - if (nq->qfill_sleeping != RDC_QFILL_ASLEEP) - return (0); - - if (nq->qfflags & RDC_QFILLSTOP) - return (1); - - if (nq->qfflags & RDC_QFILLSLEEP) - return (0); - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) - return (0); - - mutex_enter(QLOCK(dq)); - if ((QNXTIO(dq) == QTAIL(dq)) && !IS_QSTATE(dq, RDC_QFULL)) { - mutex_exit(QLOCK(dq)); - return (0); - } - mutex_exit(QLOCK(dq)); - - if (nq->qfill_sleeping == RDC_QFILL_ASLEEP) { - if (nq->hwmhit) { - if (nq->blocks <= RDC_LOW_QBLOCKS) { - nq->hwmhit = 0; - } else { - return (0); - } - } -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Waking up diskq->memq flusher, flags 0x%x" - " idx: %d", rdc_get_vflags(urdc), urdc->index); -#endif - return (1); - } - return (0); - -} - -/* - * rdc_diskq_enqueue - * enqueue one i/o to the diskq - * after appending some metadata to the front - */ -int -rdc_diskq_enqueue(rdc_k_info_t *krdc, rdc_aio_t *aio) -{ - nsc_vec_t *vec = NULL; - nsc_buf_t *bp = NULL; - nsc_buf_t *qbuf = NULL; - io_hdr *iohdr = NULL; - disk_queue *q; - rdc_group_t *group; - int numvecs; - int i, j, rc = 0; - int retries = 0; - rdc_u_info_t *urdc; - nsc_size_t iofbas; /* len of io + io header len */ - int qtail; - int delay_time = 2; - int print_msg = 1; - -#ifdef DEBUG_WRITER_UBERNOISE - int qhead; -#endif - urdc = &rdc_u_info[krdc->index]; - group = krdc->group; - q = &group->diskq; - - mutex_enter(QLOCK(q)); - - /* - * there is a thread that is blocking because the queue is full, - * don't try to set up this write until all is clear - * check before and after for logging or failed queue just - * in case a thread was in flight while the queue was full, - * and in the proccess of failing - */ - while (IS_QSTATE(q, RDC_QFULL)) { - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - cv_wait(&q->qfullcv, QLOCK(q)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && - !IS_STATE(urdc, RDC_QUEUING))) { - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - - } - - SET_QSTATE(q, QTAILBUSY); - - if (aio->handle == NULL) { - /* we're only going to write the header to the queue */ - numvecs = 2; /* kmem_alloc io header + null terminate */ - iofbas = FBA_LEN(sizeof (io_hdr)); - - } else { - /* find out how many vecs */ - numvecs = rdc_count_vecs(aio->handle->sb_vec) + 1; - iofbas = aio->len + FBA_LEN(sizeof (io_hdr)); - } - - /* - * this, in conjunction with QTAILBUSY, will prevent - * premature dequeuing - */ - - SET_LASTQTAIL(q, QTAIL(q)); - - iohdr = (io_hdr *) kmem_zalloc(sizeof (io_hdr), KM_NOSLEEP); - vec = (nsc_vec_t *) kmem_zalloc(sizeof (nsc_vec_t) * numvecs, - KM_NOSLEEP); - - if (!vec || !iohdr) { - if (!vec) { - cmn_err(CE_WARN, "!vec kmem alloc failed"); - } else { - cmn_err(CE_WARN, "!iohdr kmem alloc failed"); - } - if (vec) - kmem_free(vec, sizeof (*vec)); - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (ENOMEM); - } - - vec[numvecs - 1].sv_len = 0; - vec[numvecs - 1].sv_addr = 0; - - /* now add the write itself */ - bp = aio->handle; - - for (i = 1, j = 0; bp && bp->sb_vec[j].sv_addr && - i < numvecs; i++, j++) { - vec[i].sv_len = bp->sb_vec[j].sv_len; - vec[i].sv_addr = bp->sb_vec[j].sv_addr; - } - -retry: - - /* check for queue wrap, then check for overflow */ - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - (IS_STATE(urdc, RDC_LOGGING) && !IS_STATE(urdc, RDC_QUEUING))) { - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - if (IS_QSTATE(q, RDC_QFULL)) { /* wakeup blocked threads */ - CLR_QSTATE(q, RDC_QFULL); - cv_broadcast(&q->qfullcv); - } - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - - return (-1); - } - - if (QTAILSHLDWRAP(q, iofbas)) { - /* - * just go back to the beginning of the disk - * it's not worth the trouble breaking up the write - */ -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q tail: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQTAIL(q); - } - - /* - * prepend the write's metadata - */ - rdc_fill_ioheader(aio, iohdr, QTAIL(q)); - - vec[0].sv_len = FBA_SIZE(1); - vec[0].sv_addr = (uchar_t *)iohdr; - - /* check for tail < head */ - - if (!(FITSONQ(q, iofbas))) { - /* - * don't allow any more writes to start - */ - SET_QSTATE(q, RDC_QFULL); - mutex_exit(QLOCK(q)); - - if ((!group->rdc_writer) && !IS_STATE(urdc, RDC_LOGGING)) - (void) rdc_writer(krdc->index); - - delay(delay_time); - q->throttle_delay += delay_time; - retries++; - delay_time *= 2; /* fairly aggressive */ - if ((retries >= 8) || (delay_time >= 256)) { - delay_time = 2; - if (print_msg) { - cmn_err(CE_WARN, "!enqueue: disk queue %s full", - &urdc->disk_queue[0]); - print_msg = 0; -#ifdef DEBUG - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(q)); -#else - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(q)); -#endif - } - /* - * if this is a no-block queue, or this is a blocking - * queue that is not flushing. reset and log - */ - if ((QSTATE(q) & RDC_QNOBLOCK) || - (IS_STATE(urdc, RDC_QUEUING))) { - - if (IS_STATE(urdc, RDC_QUEUING)) { - cmn_err(CE_WARN, "!SNDR: disk queue %s full and not flushing. " - "giving up", &urdc->disk_queue[0]); - cmn_err(CE_WARN, "!SNDR: %s:%s entering logging mode", - urdc->secondary.intf, urdc->secondary.file); - } - - rdc_fail_diskq(krdc, RDC_WAIT, - RDC_DOLOG | RDC_NOFAIL); - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - mutex_enter(QLOCK(q)); - CLR_QSTATE(q, QTAILBUSY | RDC_QFULL); - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); - SET_LASTQTAIL(q, 0); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (ENOMEM); - } - } - - mutex_enter(QLOCK(q)); - goto retry; - - } - - qtail = QTAIL(q); -#ifdef DEBUG_WRITER_UBERNOISE - qhead = QHEAD(q); -#endif - - /* update tail pointer, nitems on queue and blocks on queue */ - INC_QTAIL(q, iofbas); /* increment tail over i/o size + ioheader size */ - INC_QNITEMS(q, 1); - /* increment counter for i/o blocks only */ - INC_QBLOCKS(q, (iofbas - FBA_LEN(sizeof (io_hdr)))); - - if (QNITEMS(q) > q->nitems_hwm) - q->nitems_hwm = QNITEMS(q); - if (QBLOCKS(q) > q->blocks_hwm) - q->blocks_hwm = QBLOCKS(q); - - if (IS_QSTATE(q, RDC_QFULL)) { - CLR_QSTATE(q, RDC_QFULL); - cv_broadcast(&q->qfullcv); - } - - mutex_exit(QLOCK(q)); - - /* - * if (krdc->io_kstats) { - * mutex_enter(krdc->io_kstats->ks_lock); - * kstat_waitq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - * mutex_exit(krdc->io_kstats->ks_lock); - * } - */ - - DTRACE_PROBE(rdc_diskq_rsrv); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_enqueue: %s reserve failed", - &urdc->disk_queue[0]); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - kmem_free(iohdr, sizeof (*iohdr)); - kmem_free(vec, sizeof (*vec) * numvecs); - mutex_enter(QLOCK(q)); - CLR_QSTATE(q, QTAILBUSY); - SET_LASTQTAIL(q, 0); - mutex_exit(QLOCK(q)); - if (aio->handle) - (void) nsc_free_buf(aio->handle); - return (-1); - } - -/* XXX for now do this, but later pre-alloc handle in enable/resume */ - - DTRACE_PROBE(rdc_diskq_alloc_start); - rc = nsc_alloc_buf(group->diskqfd, qtail, iofbas, - NSC_NOCACHE | NSC_WRITE | NSC_NODATA, &qbuf); - - DTRACE_PROBE(rdc_diskq_alloc_end); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s alloc failed(%d) %" NSC_SZFMT, - &urdc->disk_queue[0], rc, iofbas); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - rc = ENOMEM; - goto fail; - } - /* move vec and write to queue */ - qbuf->sb_vec = &vec[0]; - -#ifdef DEBUG_WRITER_UBERNOISE - - cmn_err(CE_NOTE, "!about to write to queue, qbuf: %p, qhead: %d, " - "qtail: %d, len: %d contents: %c%c%c%c%c", - (void *) qbuf, qhead, qtail, iofbas, - qbuf->sb_vec[1].sv_addr[0], - qbuf->sb_vec[1].sv_addr[1], - qbuf->sb_vec[1].sv_addr[2], - qbuf->sb_vec[1].sv_addr[3], - qbuf->sb_vec[1].sv_addr[4]); - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(q)); - -#endif - - DTRACE_PROBE2(rdc_diskq_nswrite_start, int, qtail, nsc_size_t, iofbas); - rc = nsc_write(qbuf, qtail, iofbas, 0); - DTRACE_PROBE2(rdc_diskq_nswrite_end, int, qtail, nsc_size_t, iofbas); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s write failed %d", - &urdc->disk_queue[0], rc); - rdc_fail_diskq(krdc, RDC_WAIT, RDC_DOLOG); - goto fail; - - } - - mutex_enter(QLOCK(q)); - - SET_LASTQTAIL(q, 0); - CLR_QSTATE(q, QTAILBUSY); - - mutex_exit(QLOCK(q)); - -fail: - - /* - * return what should be returned - * the aio is returned in _rdc_write after status is gathered. - */ - - if (qbuf) - qbuf->sb_vec = 0; - (void) nsc_free_buf(qbuf); - - if (aio->handle) - (void) nsc_free_buf(aio->handle); - - _rdc_rlse_diskq(group); - DTRACE_PROBE(rdc_diskq_rlse); - - /* free the iohdr and the vecs */ - - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - if (vec) - kmem_free(vec, sizeof (*vec) * numvecs); - - /* if no flusher running, start one */ - if ((!krdc->group->rdc_writer) && !IS_STATE(urdc, RDC_LOGGING)) - (void) rdc_writer(krdc->index); - - return (rc); -} - -/* - * place this on the pending list of io_hdr's out for flushing - */ -void -rdc_add_iohdr(io_hdr *header, rdc_group_t *group) -{ - disk_queue *q = NULL; -#ifdef DEBUG - io_hdr *p; -#endif - - q = &group->diskq; - - /* paranoia */ - header->dat.next = NULL; - - mutex_enter(QLOCK(q)); -#ifdef DEBUG /* AAAH! double flush!? */ - p = q->iohdrs; - while (p) { - if (p->dat.qpos == header->dat.qpos) { - cmn_err(CE_WARN, "!ADDING DUPLICATE HEADER %" NSC_SZFMT, - p->dat.qpos); - kmem_free(header, sizeof (*header)); - mutex_exit(QLOCK(q)); - return; - } - p = p->dat.next; - } -#endif - if (q->iohdrs == NULL) { - q->iohdrs = q->hdr_last = header; - q->hdrcnt = 1; - mutex_exit(QLOCK(q)); - return; - } - - q->hdr_last->dat.next = header; - q->hdr_last = header; - q->hdrcnt++; - mutex_exit(QLOCK(q)); - return; - -} - -/* - * mark an io header as flushed. If it is the qhead, - * then update the qpointers - * free the io_hdrs - * called after the bitmap is cleared by flusher - */ -void -rdc_clr_iohdr(rdc_k_info_t *krdc, nsc_size_t qpos) -{ - rdc_group_t *group = krdc->group; - disk_queue *q = NULL; - io_hdr *hp = NULL; - io_hdr *p = NULL; - int found = 0; - int cnt = 0; - -#ifndef NSC_MULTI_TERABYTE - ASSERT(qpos >= 0); /* assertion to validate change for 64bit */ - if (qpos < 0) /* not a diskq offset */ - return; -#endif - - q = &group->diskq; - mutex_enter(QLOCK(q)); - - hp = p = q->iohdrs; - - /* find outstanding io_hdr */ - while (hp) { - if (hp->dat.qpos == qpos) { - found++; - break; - } - cnt++; - p = hp; - hp = hp->dat.next; - } - - if (!found) { - if (RDC_BETWEEN(QHEAD(q), QNXTIO(q), qpos)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!iohdr already cleared? " - "qpos %" NSC_SZFMT " cnt %d ", qpos, cnt); - cmn_err(CE_WARN, "!Qinfo: " QDISPLAY(q)); -#endif - mutex_exit(QLOCK(q)); - return; - } - mutex_exit(QLOCK(q)); - return; - } - - /* mark it as flushed */ - hp->dat.iostatus = RDC_IOHDR_DONE; - - /* - * if it is the head pointer, travel the list updating the queue - * pointers until the next unflushed is reached, freeing on the way. - */ - while (hp && (hp->dat.qpos == QHEAD(q)) && - (hp->dat.iostatus == RDC_IOHDR_DONE)) { -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!clr_iohdr info: magic %x type %d pos %d" - " qpos %d hpos %d len %d flag 0x%x iostatus %x setid %d", - hp->dat.magic, hp->dat.type, hp->dat.pos, hp->dat.qpos, - hp->dat.hpos, hp->dat.len, hp->dat.flag, - hp->dat.iostatus, hp->dat.setid); -#endif - if (hp->dat.flag & RDC_NULL_BUF) { - INC_QHEAD(q, FBA_LEN(sizeof (io_hdr))); - } else { - INC_QHEAD(q, FBA_LEN(sizeof (io_hdr)) + hp->dat.len); - DEC_QBLOCKS(q, hp->dat.len); - } - - DEC_QNITEMS(q, 1); - - if (QHEADSHLDWRAP(q)) { /* simple enough */ -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q head: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQHEAD(q); - } - - /* get rid of the iohdr */ - if (hp == q->iohdrs) { - q->iohdrs = hp->dat.next; - kmem_free(hp, sizeof (*hp)); - hp = q->iohdrs; - } else { - if (hp == q->hdr_last) - q->hdr_last = p; - p->dat.next = hp->dat.next; - kmem_free(hp, sizeof (*hp)); - hp = p->dat.next; - } - q->hdrcnt--; - } - - if (QEMPTY(q) && !IS_QSTATE(q, RDC_QFULL) && - !(IS_QSTATE(q, RDC_QDISABLEPEND))) { -#ifdef DEBUG_FLUSHER_UBERNOISE - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - cmn_err(CE_NOTE, "!clr_iohdr: diskq %s empty, " - "resetting defaults", urdc->disk_queue); -#endif - - rdc_init_diskq_header(group, &q->disk_hdr); - SET_QNXTIO(q, QHEAD(q)); - } - - /* wakeup any blocked enqueue threads */ - cv_broadcast(&q->qfullcv); - mutex_exit(QLOCK(q)); -} - -/* - * put in whatever useful checks we can on the io header - */ -int -rdc_iohdr_ok(io_hdr *hdr) -{ - if (hdr->dat.magic != RDC_IOHDR_MAGIC) - goto bad; - return (1); -bad: - -#ifdef DEBUG - cmn_err(CE_WARN, "!Bad io header magic %x type %d pos %" NSC_SZFMT - " hpos %" NSC_SZFMT " qpos %" NSC_SZFMT " len %" NSC_SZFMT - " flag %d iostatus %d setid %d", hdr->dat.magic, - hdr->dat.type, hdr->dat.pos, hdr->dat.hpos, hdr->dat.qpos, - hdr->dat.len, hdr->dat.flag, hdr->dat.iostatus, hdr->dat.setid); -#else - cmn_err(CE_WARN, "!Bad io header retrieved"); -#endif - return (0); -} - -/* - * rdc_netqueue_insert() - * add an item to a netqueue. No locks necessary as it should only - * be used in a single threaded manor. If that changes, then - * a lock or assertion should be done here - */ -void -rdc_netqueue_insert(rdc_aio_t *aio, net_queue *q) -{ - rdc_k_info_t *krdc = &rdc_k_info[aio->index]; - - /* paranoid check for bit set */ - RDC_CHECK_BIT(krdc, aio->pos, aio->len); - - if (q->net_qhead == NULL) { - q->net_qhead = q->net_qtail = aio; - - } else { - q->net_qtail->next = aio; - q->net_qtail = aio; - } - q->blocks += aio->len; - q->nitems++; - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - if (q->blocks > q->blocks_hwm) { - q->nitems_hwm = q->blocks; - } -} - -/* - * rdc_fill_aio(aio, hdr) - * take the pertinent info from an io_hdr and stick it in - * an aio, including seq number, abuf. - */ -void -rdc_fill_aio(rdc_group_t *grp, rdc_aio_t *aio, io_hdr *hdr, nsc_buf_t *abuf) -{ - if (hdr->dat.flag & RDC_NULL_BUF) { - aio->handle = NULL; - } else { - aio->handle = abuf; - } - aio->qhandle = abuf; - aio->pos = hdr->dat.pos; - aio->qpos = hdr->dat.qpos; - aio->len = hdr->dat.len; - aio->flag = hdr->dat.flag; - if ((aio->index = rdc_setid2idx(hdr->dat.setid)) < 0) - return; - mutex_enter(&grp->diskq.disk_qlock); - if (grp->ra_queue.qfflags & RDC_QFILLSLEEP) { - mutex_exit(&grp->diskq.disk_qlock); - aio->seq = RDC_NOSEQ; - return; - } - if (abuf && aio->qhandle) { - abuf->sb_user++; - } - aio->seq = grp->seq++; - if (grp->seq < aio->seq) - grp->seq = RDC_NEWSEQ + 1; - mutex_exit(&grp->diskq.disk_qlock); - hdr->dat.iostatus = aio->seq; - -} - -#ifdef DEBUG -int maxaios_perbuf = 0; -int midaios_perbuf = 0; -int aveaios_perbuf = 0; -int totaios_perbuf = 0; -int buf2qcalls = 0; - -void -calc_perbuf(int items) -{ - if (totaios_perbuf < 0) { - maxaios_perbuf = 0; - midaios_perbuf = 0; - aveaios_perbuf = 0; - totaios_perbuf = 0; - buf2qcalls = 0; - } - - if (items > maxaios_perbuf) - maxaios_perbuf = items; - midaios_perbuf = maxaios_perbuf / 2; - totaios_perbuf += items; - aveaios_perbuf = totaios_perbuf / buf2qcalls; -} -#endif - -/* - * rdc_discard_tmpq() - * free up the passed temporary queue - * NOTE: no cv's or mutexes have been initialized - */ -void -rdc_discard_tmpq(net_queue *q) -{ - rdc_aio_t *aio; - - if (q == NULL) - return; - - while (q->net_qhead) { - aio = q->net_qhead; - q->net_qhead = q->net_qhead->next; - if (aio->qhandle) { - aio->qhandle->sb_user--; - if (aio->qhandle->sb_user == 0) { - rdc_fixlen(aio); - (void) nsc_free_buf(aio->qhandle); - } - } - kmem_free(aio, sizeof (*aio)); - q->nitems--; - } - kmem_free(q, sizeof (*q)); - -} - -/* - * rdc_diskq_buf2queue() - * take a chunk of the diskq, parse it and assemble - * a chain of rdc_aio_t's. - * updates QNXTIO() - */ -net_queue * -rdc_diskq_buf2queue(rdc_group_t *grp, nsc_buf_t **abuf, int index) -{ - rdc_aio_t *aio = NULL; - nsc_vec_t *vecp = NULL; - uchar_t *vaddr = NULL; - uchar_t *ioaddr = NULL; - net_queue *netq = NULL; - io_hdr *hdr = NULL; - nsc_buf_t *buf = *abuf; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_k_info_t *krdc = &rdc_k_info[index]; - disk_queue *dq = &grp->diskq; - net_queue *nq = &grp->ra_queue; - int nullbuf = 0; - nsc_off_t endobuf; - nsc_off_t bufoff; - int vlen; - nsc_off_t fpos; - long bufcnt = 0; - int nullblocks = 0; - int fail = 1; - - if (buf == NULL) - return (NULL); - - netq = kmem_zalloc(sizeof (*netq), KM_NOSLEEP); - if (netq == NULL) { - cmn_err(CE_WARN, "!SNDR: unable to allocate net queue"); - return (NULL); - } - - vecp = buf->sb_vec; - vlen = vecp->sv_len; - vaddr = vecp->sv_addr; - bufoff = buf->sb_pos; - endobuf = bufoff + buf->sb_len; - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!BUFFOFFENTER %d", bufoff); -#endif - /* CONSTCOND */ - while (1) { - if (IS_STATE(urdc, RDC_LOGGING) || - (nq->qfflags & RDC_QFILLSLEEP)) { - fail = 0; - goto fail; - } -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!BUFFOFF_0 %d", bufoff); -#endif - - if ((vaddr == NULL) || (vlen == 0)) - break; - - if (vlen <= 0) { - vecp++; - vaddr = vecp->sv_addr; - vlen = vecp->sv_len; - if (vaddr == NULL) - break; - } - - /* get the iohdr information */ - - hdr = kmem_zalloc(sizeof (*hdr), KM_NOSLEEP); - if (hdr == NULL) { - cmn_err(CE_WARN, - "!SNDR: unable to alocate net queue header"); - goto fail; - } - - ioaddr = (uchar_t *)hdr; - - bcopy(vaddr, ioaddr, sizeof (*hdr)); - - if (!rdc_iohdr_ok(hdr)) { - cmn_err(CE_WARN, - "!unable to retrieve i/o data from queue %s " - "at offset %" NSC_SZFMT " bp: %" NSC_SZFMT " bl: %" - NSC_SZFMT, urdc->disk_queue, - bufoff, buf->sb_pos, buf->sb_len); -#ifdef DEBUG_DISKQ - cmn_err(CE_WARN, "!FAILING QUEUE state: %x", - rdc_get_vflags(urdc)); - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(dq)); - cmn_err(CE_WARN, "!VADDR %p, IOADDR %p", vaddr, ioaddr); - cmn_err(CE_WARN, "!BUF %p", buf); -#endif - cmn_err(CE_WARN, "!qinfo: " QDISPLAYND(dq)); - - goto fail; - } - - nullbuf = hdr->dat.flag & RDC_NULL_BUF; - - bufoff += FBA_NUM(sizeof (*hdr)); - - /* out of buffer, set nxtio to re read this last hdr */ - if (!nullbuf && ((bufoff + hdr->dat.len) > endobuf)) { - break; - } - - bufcnt += FBA_NUM(sizeof (*hdr)); - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (aio == NULL) { - bufcnt -= FBA_NUM(sizeof (*hdr)); - cmn_err(CE_WARN, "!SNDR: net queue aio alloc failed"); - goto fail; - } - - if (!nullbuf) { - /* move to next iohdr in big buf */ - bufoff += hdr->dat.len; - bufcnt += hdr->dat.len; - } - - rdc_fill_aio(grp, aio, hdr, buf); - - if (aio->index < 0) { - cmn_err(CE_WARN, "!Set id %d not found or no longer " - "enabled, failing disk queue", hdr->dat.setid); - kmem_free(aio, sizeof (*aio)); - goto fail; - } - if (aio->seq == RDC_NOSEQ) { - kmem_free(aio, sizeof (*aio)); - fail = 0; - goto fail; - } - if (aio->handle == NULL) - nullblocks += aio->len; - - rdc_add_iohdr(hdr, grp); - hdr = NULL; /* don't accidentally free on break or fail */ - rdc_netqueue_insert(aio, netq); - - /* no more buffer, skip the below logic */ - if ((bufoff + FBA_NUM(sizeof (*hdr))) >= endobuf) { - break; - } - - fpos = bufoff - buf->sb_pos; - vecp = buf->sb_vec; - for (; fpos >= FBA_NUM(vecp->sv_len); vecp++) - fpos -= FBA_NUM(vecp->sv_len); - vlen = vecp->sv_len - FBA_SIZE(fpos); - vaddr = vecp->sv_addr + FBA_SIZE(fpos); - /* abuf = NULL; */ - - } - - /* free extraneous header */ - if (hdr) { - kmem_free(hdr, sizeof (*hdr)); - hdr = NULL; - } - - /* - * probably won't happen, but if we didn't goto fail, but - * we don't contain anything meaningful.. return NULL - * and let the flusher or the sleep/wakeup routines - * decide - */ - if (netq && netq->nitems == 0) { - kmem_free(netq, sizeof (*netq)); - return (NULL); - } - -#ifdef DEBUG - buf2qcalls++; - calc_perbuf(netq->nitems); -#endif - if (IS_STATE(urdc, RDC_LOGGING) || - nq->qfflags & RDC_QFILLSLEEP) { - fail = 0; - goto fail; - } - - mutex_enter(QLOCK(dq)); - INC_QNXTIO(dq, bufcnt); - mutex_exit(QLOCK(dq)); - - netq->net_qtail->orig_len = nullblocks; /* overload */ - - return (netq); - -fail: - - if (hdr) { - kmem_free(hdr, sizeof (*hdr)); - } - - if (netq) { - if (netq->nitems > 0) { - /* the never can happen case ... */ - if ((netq->nitems == 1) && - (netq->net_qhead->handle == NULL)) { - (void) nsc_free_buf(buf); - *abuf = NULL; - } - - } - rdc_discard_tmpq(netq); - } - - mutex_enter(QLOCK(dq)); - rdc_dump_iohdrs(dq); - mutex_exit(QLOCK(dq)); - - if (fail) { /* real failure, not just state change */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_diskq_buf2queue: failing disk queue %s", - urdc->disk_queue); -#endif - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - } - - return (NULL); - -} - -/* - * rdc_diskq_unqueue - * remove one chunk from the diskq belonging to - * rdc_k_info[index] - * updates the head and tail pointers in the disk header - * but does not write. The header should be written on ack - * flusher should free whatever.. - */ -rdc_aio_t * -rdc_diskq_unqueue(int index) -{ - int rc, rc1, rc2; - nsc_off_t qhead; - int nullhandle = 0; - io_hdr *iohdr; - rdc_aio_t *aio = NULL; - nsc_buf_t *buf = NULL; - nsc_buf_t *abuf = NULL; - rdc_group_t *group = NULL; - disk_queue *q = NULL; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - - group = krdc->group; - q = &group->diskq; - - if (group->diskqfd == NULL) /* we've been disabled */ - return (NULL); - - aio = kmem_zalloc(sizeof (*aio), KM_NOSLEEP); - if (!aio) { - return (NULL); - } - - iohdr = kmem_zalloc(sizeof (*iohdr), KM_NOSLEEP); - if (!iohdr) { - kmem_free(aio, sizeof (*aio)); - return (NULL); - } - - mutex_enter(QLOCK(q)); - rdc_set_qbusy(q); /* make sure no one disables the queue */ - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_unq_rsrv); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_unqueue: %s reserve failed", - urdc->disk_queue); - goto fail; - } - - mutex_enter(QHEADLOCK(q)); - mutex_enter(QLOCK(q)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || IS_STATE(urdc, RDC_LOGGING)) { - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - kmem_free(aio, sizeof (*aio)); - kmem_free(iohdr, sizeof (*iohdr)); - return (NULL); - } - - if (QNXTIOSHLDWRAP(q)) { -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q nxtio: " QDISPLAY(q)); -#endif - /*LINTED*/ - WRAPQNXTIO(q); - } - - /* read the metainfo at q->nxt_io first */ - if (QNXTIO(q) == QTAIL(q)) { /* empty */ - - _rdc_rlse_diskq(group); - if (q->lastio->handle) - (void) nsc_free_buf(q->lastio->handle); - bzero(&(*q->lastio), sizeof (*q->lastio)); - - mutex_exit(QHEADLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - kmem_free(aio, sizeof (*aio)); - kmem_free(iohdr, sizeof (*iohdr)); - return (NULL); - } - - qhead = QNXTIO(q); - - /* - * have to drop the lock here, sigh. Cannot block incoming io - * we have to wait until after this read to find out how - * much to increment QNXTIO. Might as well grab the seq then too - */ - - while ((qhead == LASTQTAIL(q)) && (IS_QSTATE(q, QTAILBUSY))) { - mutex_exit(QLOCK(q)); -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, "!Qtail busy delay lastqtail: %d", qhead); -#endif - delay(5); - mutex_enter(QLOCK(q)); - } - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_iohdr_read_start); - - rc = rdc_ns_io(group->diskqfd, NSC_READ, qhead, - (uchar_t *)iohdr, FBA_SIZE(1)); - - DTRACE_PROBE(rdc_diskq_iohdr_read_end); - - if (!RDC_SUCCESS(rc) || !rdc_iohdr_ok(iohdr)) { - cmn_err(CE_WARN, "!unable to retrieve i/o data from queue %s" - " at offset %" NSC_SZFMT " rc %d", urdc->disk_queue, - qhead, rc); -#ifdef DEBUG_DISKQ - cmn_err(CE_WARN, "!qinfo: " QDISPLAY(q)); -#endif - mutex_exit(QHEADLOCK(q)); - goto fail; - } - -/* XXX process buffer here, creating rdc_aio_t's */ - - mutex_enter(QLOCK(q)); - /* update the next pointer */ - if (iohdr->dat.flag == RDC_NULL_BUF) { - INC_QNXTIO(q, FBA_LEN(sizeof (io_hdr))); - nullhandle = 1; - } else { - INC_QNXTIO(q, (FBA_LEN(sizeof (io_hdr)) + iohdr->dat.len)); - } - - aio->seq = group->seq++; - if (group->seq < aio->seq) - group->seq = RDC_NEWSEQ + 1; - - mutex_exit(QLOCK(q)); - mutex_exit(QHEADLOCK(q)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - p = &iohdr->dat; - cmn_err(CE_NOTE, "!unqueued iohdr from %d pos: %d len: %d flag: %d " - "iostatus: %d setid: %d time: %d", qhead, p->pos, p->len, - p->flag, p->iostatus, p->setid, p->time); -#endif - - if (nullhandle) /* nothing to get from queue */ - goto nullbuf; - - /* now that we know how much to get (iohdr.dat.len), get it */ - DTRACE_PROBE(rdc_diskq_unq_allocbuf1_start); - - rc = nsc_alloc_buf(group->diskqfd, qhead + 1, iohdr->dat.len, - NSC_NOCACHE | NSC_READ, &buf); - - DTRACE_PROBE(rdc_diskq_unq_allocbuf1_end); - - /* and get somewhere to keep it for a bit */ - DTRACE_PROBE(rdc_diskq_unq_allocbuf2_start); - - rc1 = nsc_alloc_abuf(qhead + 1, iohdr->dat.len, 0, &abuf); - - DTRACE_PROBE(rdc_diskq_unq_allocbuf2_end); - - if (!RDC_SUCCESS(rc) || !RDC_SUCCESS(rc1)) { /* uh-oh */ - cmn_err(CE_WARN, "!disk queue %s read failure", - urdc->disk_queue); - goto fail; - } - - /* move it on over... */ - rc2 = nsc_copy(buf, abuf, qhead + 1, qhead + 1, iohdr->dat.len); - - if (!RDC_SUCCESS(rc2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!nsc_copy failed for diskq unqueue"); -#endif - goto fail; - } - - /* let go of the real buf, we've got the abuf */ - (void) nsc_free_buf(buf); - buf = NULL; - - aio->handle = abuf; - /* Hack in the original sb_pos */ - aio->handle->sb_pos = iohdr->dat.hpos; - - /* skip the RDC_HANDLE_LIMITS check */ - abuf->sb_user |= RDC_DISKQUE; - -nullbuf: - if (nullhandle) { - aio->handle = NULL; - } - - /* set up the rest of the aio values, seq set above ... */ - aio->pos = iohdr->dat.pos; - aio->qpos = iohdr->dat.qpos; - aio->len = iohdr->dat.len; - aio->flag = iohdr->dat.flag; - aio->index = rdc_setid2idx(iohdr->dat.setid); - if (aio->index < 0) { /* uh-oh */ -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_diskq_unqueue: index < 0"); -#endif - goto fail; - } - - -#ifdef DEBUG_FLUSHER_UBERNOISE_STAMP - h = &q->disk_hdr.h; - cmn_err(CE_NOTE, "!stamping diskq header:\n" - "magic: %x\nstate: %d\nhead_offset: %d\n" - "tail_offset: %d\ndisk_size: %d\nnitems: %d\nblocks: %d\n", - h->magic, h->state, h->head_offset, h->tail_offset, - h->disk_size, h->nitems, h->blocks); -#endif - - _rdc_rlse_diskq(group); - - mutex_enter(QLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - - DTRACE_PROBE(rdc_diskq_unq_rlse); - - iohdr->dat.iostatus = aio->seq; - rdc_add_iohdr(iohdr, group); - -#ifdef DEBUG_FLUSHER_UBERNOISE - if (!nullhandle) { - cmn_err(CE_NOTE, "!UNQUEUING, %p" - " contents: %c%c%c%c%c pos: %d len: %d", - (void *)aio->handle, - aio->handle->sb_vec[0].sv_addr[0], - aio->handle->sb_vec[0].sv_addr[1], - aio->handle->sb_vec[0].sv_addr[2], - aio->handle->sb_vec[0].sv_addr[3], - aio->handle->sb_vec[0].sv_addr[4], - aio->handle->sb_pos, aio->handle->sb_len); - } else { - cmn_err(CE_NOTE, "!UNQUEUING, NULL " QDISPLAY(q)); - } - cmn_err(CE_NOTE, "!qinfo: " QDISPLAY(q)); -#endif - - return (aio); - -fail: - if (aio) - kmem_free(aio, sizeof (*aio)); - if (iohdr) - kmem_free(iohdr, sizeof (*iohdr)); - if (buf) - (void) nsc_free_buf(buf); - if (abuf) - (void) nsc_free_buf(abuf); - - _rdc_rlse_diskq(group); -#ifdef DEBUG - cmn_err(CE_WARN, "!diskq_unqueue: failing diskq"); -#endif - mutex_enter(QLOCK(q)); - rdc_clr_qbusy(q); - mutex_exit(QLOCK(q)); - - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - - return (NULL); -} - -int -rdc_diskq_inuse(rdc_set_t *set, char *diskq) -{ - rdc_u_info_t *urdc; - char *group; - int index; - - group = set->group_name; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if ((rdc_lookup_bitmap(diskq) >= 0) || - (rdc_lookup_configured(diskq) >= 0)) { - return (1); - } - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) - continue; - - /* same diskq different group */ - if ((strcmp(urdc->disk_queue, diskq) == 0) && - (urdc->group_name[0] == '\0' || - strcmp(urdc->group_name, group))) { - return (1); - } - } - /* last, but not least, lets see if someone is getting really funky */ - if ((strcmp(set->disk_queue, set->primary.file) == 0) || - (strcmp(set->disk_queue, set->primary.bitmap) == 0)) { - return (1); - } - - return (0); - -} - -#ifdef DEBUG -int maxlen = 0; -int avelen = 0; -int totalen = 0; -int lencalls = 0; - -void -update_lenstats(int len) -{ - if (lencalls == 0) { - lencalls = 1; - avelen = 0; - maxlen = 0; - totalen = 0; - } - - if (len > maxlen) - maxlen = len; - totalen += len; - avelen = totalen / lencalls; -} -#endif - -/* - * rdc_calc_len() - * returns the size of the diskq that can be read for dequeuing - * always <= RDC_MAX_DISKQREAD - */ -int -rdc_calc_len(rdc_k_info_t *krdc, disk_queue *dq) -{ - nsc_size_t len = 0; - - ASSERT(MUTEX_HELD(QLOCK(dq))); - - /* ---H-----N-----T--- */ - if (QNXTIO(dq) < QTAIL(dq)) { - - len = min(RDC_MAX_DISKQREAD, QTAIL(dq) - QNXTIO(dq)); - - /* ---T-----H-----N--- */ - } else if (QNXTIO(dq) > QTAIL(dq)) { - if (QWRAP(dq)) { - len = min(RDC_MAX_DISKQREAD, QWRAP(dq) - QNXTIO(dq)); - } else { /* should never happen */ - len = min(RDC_MAX_DISKQREAD, QSIZE(dq) - QNXTIO(dq)); - } - } else if (QNXTIO(dq) == QTAIL(dq)) { - if (QWRAP(dq) && !IS_QSTATE(dq, QNXTIOWRAPD)) - len = min(RDC_MAX_DISKQREAD, QWRAP(dq) - QNXTIO(dq)); - } - - len = min(len, krdc->maxfbas); - -#ifdef DEBUG - lencalls++; - update_lenstats(len); -#endif - - return ((int)len); -} - -/* - * lie a little if we can, so we don't get tied up in - * _nsc_wait_dbuf() on the next read. sb_len MUST be - * restored before nsc_free_buf() however, or we will - * be looking at memory leak city.. - * so update the entire queue with the info as well - * and the one that ends up freeing it, can fix the len - * IMPORTANT: This assumes that we are not cached, in - * 3.2 caching was turned off for data volumes, if that - * changes, then this must too - */ -void -rdc_trim_buf(nsc_buf_t *buf, net_queue *q) -{ - rdc_aio_t *p; - int len; - - if (buf == NULL || q == NULL) - return; - - if (q && (buf->sb_len > - (q->blocks + q->nitems - q->net_qtail->orig_len))) { - len = buf->sb_len; - buf->sb_len = (q->blocks + q->nitems - q->net_qtail->orig_len); - } - - p = q->net_qhead; - do { - p->orig_len = len; - p = p->next; - - } while (p); - -} - -/* - * rdc_read_diskq_buf() - * read a large as possible chunk of the diskq into a nsc_buf_t - * and convert it to a net_queue of rdc_aio_t's to be appended - * to the group's netqueue - */ -net_queue * -rdc_read_diskq_buf(int index) -{ - nsc_buf_t *buf = NULL; - net_queue *tmpnq = NULL; - disk_queue *dq = NULL; - rdc_k_info_t *krdc = &rdc_k_info[index]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - rdc_group_t *group = krdc->group; - net_queue *nq = &group->ra_queue; - int len = 0; - int rc; - int fail = 0; - int offset = 0; - - if (group == NULL || group->diskqfd == NULL) { - DTRACE_PROBE(rdc_read_diskq_buf_bail1); - return (NULL); - } - - dq = &group->diskq; - - mutex_enter(QLOCK(dq)); - rdc_set_qbusy(dq); /* prevent disables on the queue */ - mutex_exit(QLOCK(dq)); - - if (_rdc_rsrv_diskq(group)) { - cmn_err(CE_WARN, "!rdc_readdiskqbuf: %s reserve failed", - urdc->disk_queue); - mutex_enter(QLOCK(dq)); - rdc_clr_qbusy(dq); /* prevent disables on the queue */ - mutex_exit(QLOCK(dq)); - return (NULL); - } - - mutex_enter(QHEADLOCK(dq)); - mutex_enter(QLOCK(dq)); - - if (IS_STATE(urdc, RDC_DISKQ_FAILED) || - IS_STATE(urdc, RDC_LOGGING) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - DTRACE_PROBE(rdc_read_diskq_buf_bail2); - goto done; - } - - /* - * real corner case here, we need to let the flusher wrap first. - * we've gotten too far ahead, so just delay and try again - */ - if (IS_QSTATE(dq, QNXTIOWRAPD) && AUXQWRAP(dq)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - goto done; - } - - if (QNXTIOSHLDWRAP(dq)) { -#ifdef DEBUG_DISKQWRAP - cmn_err(CE_NOTE, "!wrapping Q nxtio: " QDISPLAY(dq)); -#endif - /*LINTED*/ - WRAPQNXTIO(dq); - } - - /* read the metainfo at q->nxt_io first */ - if (!QNITEMS(dq)) { /* empty */ - - if (dq->lastio->handle) - (void) nsc_free_buf(dq->lastio->handle); - bzero(&(*dq->lastio), sizeof (*dq->lastio)); - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - DTRACE_PROBE(rdc_read_diskq_buf_bail3); - goto done; - } - - - len = rdc_calc_len(krdc, dq); - - if ((len <= 0) || (IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_DISKQ_FAILED)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - /* - * a write could be trying to get on the queue, or if - * the queue is really really small, a complete image - * of it could be on the net queue waiting for flush. - * the latter being a fairly stupid scenario and a gross - * misconfiguration.. but what the heck, why make the thread - * thrash around.. just pause a little here. - */ - if (len <= 0) - delay(50); - - DTRACE_PROBE3(rdc_read_diskq_buf_bail4, int, len, - int, rdc_get_vflags(urdc), int, nq->qfflags); - - goto done; - } - - DTRACE_PROBE2(rdc_calc_len, int, len, int, (int)QNXTIO(dq)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_WARN, "!CALC_LEN(%d) h:%d n%d t%d, w%d", - len, QHEAD(dq), QNXTIO(dq), QTAIL(dq), QWRAP(dq)); - cmn_err(CE_CONT, "!qinfo: " QDISPLAYND(dq)); -#endif - SET_QCOALBOUNDS(dq, QNXTIO(dq) + len); - - while ((LASTQTAIL(dq) > 0) && !QWRAP(dq) && - ((QNXTIO(dq) + len) >= LASTQTAIL(dq)) && - (IS_QSTATE(dq, QTAILBUSY))) { - mutex_exit(QLOCK(dq)); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!Qtail busy delay nxtio %d len %d " - "lastqtail: %d", QNXTIO(dq), len, LASTQTAIL(dq)); -#endif - delay(20); - mutex_enter(QLOCK(dq)); - } - - offset = QNXTIO(dq); - - /* - * one last check to see if we have gone logging, or should. - * we may have released the mutex above, so check again - */ - if ((IS_STATE(urdc, RDC_LOGGING)) || - (IS_STATE(urdc, RDC_DISKQ_FAILED)) || - (nq->qfflags & RDC_QFILLSLEEP)) { - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - goto done; - } - - mutex_exit(QLOCK(dq)); - mutex_exit(QHEADLOCK(dq)); - - DTRACE_PROBE2(rdc_buf2q_preread, int, offset, int, len); - - rc = nsc_alloc_buf(group->diskqfd, offset, len, - NSC_NOCACHE | NSC_READ, &buf); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!disk queue %s read failure pos %" NSC_SZFMT - " len %d", urdc->disk_queue, QNXTIO(dq), len); - fail++; - buf = NULL; - DTRACE_PROBE(rdc_read_diskq_buf_bail5); - goto done; - } - - DTRACE_PROBE2(rdc_buf2q_postread, int, offset, nsc_size_t, buf->sb_len); - - /* - * convert buf to a net_queue. buf2queue will - * update the QNXTIO pointer for us, based on - * the last readable queue item - */ - tmpnq = rdc_diskq_buf2queue(group, &buf, index); - -#ifdef DEBUG_FLUSHER_UBERNOISE - cmn_err(CE_NOTE, "!QBUF p: %d l: %d p+l: %d users: %d qblocks: %d ", - "qitems: %d WASTED: %d", buf->sb_pos, buf->sb_len, - buf->sb_pos+buf->sb_len, buf->sb_user, tmpnq?tmpnq->blocks:-1, - tmpnq?tmpnq->nitems:-1, - tmpnq?((buf->sb_len-tmpnq->nitems) - tmpnq->blocks):-1); -#endif - - DTRACE_PROBE3(rdc_buf2que_returned, net_queue *, tmpnq?tmpnq:0, - uint64_t, tmpnq?tmpnq->nitems:0, - uint_t, tmpnq?tmpnq->net_qhead->seq:0); -done: - - /* we don't need to retain the buf */ - if (tmpnq == NULL) - if (buf) { - (void) nsc_free_buf(buf); - buf = NULL; - } - - rdc_trim_buf(buf, tmpnq); - - mutex_enter(QLOCK(dq)); - rdc_clr_qbusy(dq); - mutex_exit(QLOCK(dq)); - - _rdc_rlse_diskq(group); - - if (fail) { - rdc_fail_diskq(krdc, RDC_NOWAIT, RDC_DOLOG); - tmpnq = NULL; - } - - return (tmpnq); -} - -/* - * rdc_dequeue() - * removes the head of the memory queue - */ -rdc_aio_t * -rdc_dequeue(rdc_k_info_t *krdc, int *rc) -{ - net_queue *q = &krdc->group->ra_queue; - disk_queue *dq = &krdc->group->diskq; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_aio_t *aio; - - *rc = 0; - - if (q == NULL) - return (NULL); - - mutex_enter(&q->net_qlock); - - aio = q->net_qhead; - - if (aio == NULL) { -#ifdef DEBUG - if (q->nitems != 0 || q->blocks != 0 || q->net_qtail != 0) { - cmn_err(CE_PANIC, - "rdc_dequeue(1): q %p, q blocks %" NSC_SZFMT - " , nitems %" NSC_SZFMT ", qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) aio, (void *) q->net_qtail); - } -#endif - - mutex_exit(&q->net_qlock); - - if ((!IS_STATE(urdc, RDC_LOGGING)) && - (!(q->qfflags & RDC_QFILLSLEEP)) && - (!IS_STATE(urdc, RDC_SYNCING)) && (QNITEMS(dq) > 0)) { - *rc = EAGAIN; - } - - goto done; - } - - /* aio remove from q */ - - q->net_qhead = aio->next; - aio->next = NULL; - - if (q->net_qtail == aio) - q->net_qtail = q->net_qhead; - - q->blocks -= aio->len; - q->nitems--; - -#ifdef DEBUG - if (q->net_qhead == NULL) { - if (q->nitems != 0 || q->blocks != 0 || q->net_qtail != 0) { - cmn_err(CE_PANIC, "rdc_dequeue(2): q %p, q blocks %" - NSC_SZFMT " nitems %" NSC_SZFMT - " , qhead %p qtail %p", - (void *) q, q->blocks, q->nitems, - (void *) q->net_qhead, (void *) q->net_qtail); - } - } -#endif - mutex_exit(&q->net_qlock); -done: - - mutex_enter(&q->net_qlock); - - if (rdc_qfill_shldwakeup(krdc)) - cv_broadcast(&q->qfcv); - - /* - * clear EAGAIN if - * logging or q filler thread is sleeping or stopping altogether - * or if q filler thread is dead already - * or if syncing, this will return a null aio, with no error code set - * telling the flusher to die - */ - if (*rc == EAGAIN) { - if (IS_STATE(urdc, RDC_LOGGING) || - (q->qfflags & (RDC_QFILLSLEEP | RDC_QFILLSTOP)) || - (IS_QSTATE(dq, (RDC_QDISABLEPEND | RDC_STOPPINGFLUSH))) || - (q->qfill_sleeping == RDC_QFILL_DEAD) || - (IS_STATE(urdc, RDC_SYNCING))) - *rc = 0; - } - - mutex_exit(&q->net_qlock); - - return (aio); - -} - -/* - * rdc_qfill_shldsleep() - * returns 1 if the qfilling code should cv_wait() 0 if not. - * reasons for going into cv_wait(); - * there is nothing in the diskq to flush to mem. - * the memory queue has gotten too big and needs more flushing attn. - */ -int -rdc_qfill_shldsleep(rdc_k_info_t *krdc) -{ - net_queue *nq = &krdc->group->ra_queue; - disk_queue *dq = &krdc->group->diskq; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&nq->net_qlock)); - - if (!RDC_IS_DISKQ(krdc->group)) - return (1); - - if (nq->qfflags & RDC_QFILLSLEEP) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: QFILLSLEEP idx: %d", - krdc->index); -#endif - return (1); - } - - if (IS_STATE(urdc, RDC_LOGGING) || IS_STATE(urdc, RDC_SYNCING)) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: Sync|Log (0x%x)" - " idx: %d", rdc_get_vflags(urdc), urdc->index); -#endif - return (1); - } - - mutex_enter(QLOCK(dq)); - if ((QNXTIO(dq) == QTAIL(dq)) && !IS_QSTATE(dq, RDC_QFULL)) { -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: QEMPTY"); -#endif - mutex_exit(QLOCK(dq)); - return (1); - } - mutex_exit(QLOCK(dq)); - - if (nq->blocks >= RDC_MAX_QBLOCKS) { - nq->hwmhit = 1; - /* stuck flushers ? */ -#ifdef DEBUG_DISKQ_NOISY - cmn_err(CE_NOTE, "!Sleeping diskq->memq flusher: memq full:" - " seq: %d seqack %d", krdc->group->seq, - krdc->group->seqack); -#endif - return (1); - } - - return (0); -} - -/* - * rdc_join_netqueues(a, b) - * appends queue b to queue a updating all the queue info - * as it is assumed queue a is the important one, - * it's mutex must be held. no one can add to queue b - */ -void -rdc_join_netqueues(net_queue *q, net_queue *tmpq) -{ - ASSERT(MUTEX_HELD(&q->net_qlock)); - - if (q->net_qhead == NULL) { /* empty */ -#ifdef DEBUG - if (q->blocks != 0 || q->nitems != 0) { - cmn_err(CE_PANIC, "rdc filler: q %p, qhead 0, " - " q blocks %" NSC_SZFMT ", nitems %" NSC_SZFMT, - (void *) q, q->blocks, q->nitems); - } -#endif - q->net_qhead = tmpq->net_qhead; - q->net_qtail = tmpq->net_qtail; - q->nitems = tmpq->nitems; - q->blocks = tmpq->blocks; - } else { - q->net_qtail->next = tmpq->net_qhead; - q->net_qtail = tmpq->net_qtail; - q->nitems += tmpq->nitems; - q->blocks += tmpq->blocks; - } - - if (q->nitems > q->nitems_hwm) { - q->nitems_hwm = q->nitems; - } - - if (q->blocks > q->blocks_hwm) { - q->blocks_hwm = q->blocks; - } -} - -/* - * rdc_qfiller_thr() single thread that moves - * data from the diskq to a memory queue for - * the flusher to pick up. - */ -void -rdc_qfiller_thr(rdc_k_info_t *krdc) -{ - rdc_group_t *grp = krdc->group; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - net_queue *q = &grp->ra_queue; - net_queue *tmpq = NULL; - int index = krdc->index; - - q->qfill_sleeping = RDC_QFILL_AWAKE; - while (!(q->qfflags & RDC_QFILLSTOP)) { - if (!RDC_IS_DISKQ(grp) || - IS_STATE(urdc, RDC_LOGGING) || - IS_STATE(urdc, RDC_DISKQ_FAILED) || - (q->qfflags & RDC_QFILLSLEEP)) { - goto nulltmpq; - } - - DTRACE_PROBE(qfiller_top); - tmpq = rdc_read_diskq_buf(index); - - if (tmpq == NULL) - goto nulltmpq; - - if ((q->qfflags & RDC_QFILLSLEEP) || - IS_STATE(urdc, RDC_LOGGING)) { - rdc_discard_tmpq(tmpq); - goto nulltmpq; - } - - mutex_enter(&q->net_qlock); - - /* race with log, redundant yet paranoid */ - if ((q->qfflags & RDC_QFILLSLEEP) || - IS_STATE(urdc, RDC_LOGGING)) { - rdc_discard_tmpq(tmpq); - mutex_exit(&q->net_qlock); - goto nulltmpq; - } - - - rdc_join_netqueues(q, tmpq); - kmem_free(tmpq, sizeof (*tmpq)); - tmpq = NULL; - - mutex_exit(&q->net_qlock); -nulltmpq: - /* - * sleep for a while if we can. - * the enqueuing or flushing code will - * wake us if if necessary. - */ - mutex_enter(&q->net_qlock); - while (rdc_qfill_shldsleep(krdc)) { - q->qfill_sleeping = RDC_QFILL_ASLEEP; - DTRACE_PROBE(qfiller_sleep); - cv_wait(&q->qfcv, &q->net_qlock); - DTRACE_PROBE(qfiller_wakeup); - q->qfill_sleeping = RDC_QFILL_AWAKE; - if (q->qfflags & RDC_QFILLSTOP) { -#ifdef DEBUG_DISKQ - cmn_err(CE_NOTE, - "!rdc_qfiller_thr: recieved kill signal"); -#endif - mutex_exit(&q->net_qlock); - goto done; - } - } - mutex_exit(&q->net_qlock); - - DTRACE_PROBE(qfiller_bottom); - } -done: - DTRACE_PROBE(qfiller_done); - q->qfill_sleeping = RDC_QFILL_DEAD; /* the big sleep */ - -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_qfiller_thr stopping"); -#endif - q->qfflags &= ~RDC_QFILLSTOP; - -} - -int -_rdc_add_diskq(int index, char *diskq) -{ - rdc_k_info_t *krdc, *kp; - rdc_u_info_t *urdc, *up; - rdc_group_t *group; - int rc; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - group = krdc->group; - - if (!diskq || urdc->disk_queue[0]) { /* how'd that happen? */ -#ifdef DEBUG - cmn_err(CE_WARN, "!NULL diskq in _rdc_add_diskq"); -#endif - rc = -1; - goto fail; - } - - /* if the enable fails, this is bzero'ed */ - (void) strncpy(urdc->disk_queue, diskq, NSC_MAXPATH); - group->flags &= ~RDC_MEMQUE; - group->flags |= RDC_DISKQUE; - -#ifdef DEBUG - cmn_err(CE_NOTE, "!adding diskq to group %s", urdc->group_name); -#endif - mutex_enter(&rdc_conf_lock); - rc = rdc_enable_diskq(krdc); - mutex_exit(&rdc_conf_lock); - - if (rc == RDC_EQNOADD) { - goto fail; - } - - RDC_ZERO_BITREF(krdc); - for (kp = krdc->group_next; kp != krdc; kp = kp->group_next) { - up = &rdc_u_info[kp->index]; - (void) strncpy(up->disk_queue, diskq, NSC_MAXPATH); - /* size lives in the diskq structure, already set by enable */ - RDC_ZERO_BITREF(kp); - } - -fail: - return (rc); - -} - -/* - * add a diskq to an existing set/group - */ -int -rdc_add_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - char *diskq; - int rc; - int index; - rdc_k_info_t *krdc, *this; - rdc_u_info_t *urdc; - rdc_group_t *group; - nsc_size_t vol_size = 0; - nsc_size_t req_size = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto failed; - } - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - this = &rdc_k_info[index]; - group = krdc->group; - diskq = uparms->rdc_set->disk_queue; - - if (!IS_ASYNC(urdc)) { - spcs_s_add(kstatus, RDC_EQWRONGMODE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto failed; - } - - do { - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQNOTLOGGING, - uparms->rdc_set->disk_queue); - rc = RDC_EQNOTLOGGING; - goto failed; - } - /* make sure that we have enough bitmap vol */ - req_size = RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size); - req_size += FBA_LEN(krdc->bitmap_size * BITS_IN_BYTE); - - rc = _rdc_rsrv_devs(krdc, RDC_BMP, RDC_INTERNAL); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, - "!rdc_open_diskq: Bitmap reserve failed"); - spcs_s_add(kstatus, RDC_EBITMAP, - urdc->primary.bitmap); - rc = RDC_EBITMAP; - goto failed; - } - - (void) nsc_partsize(krdc->bitmapfd, &vol_size); - - _rdc_rlse_devs(krdc, RDC_BMP); - - if (vol_size < req_size) { - spcs_s_add(kstatus, RDC_EBITMAP2SMALL, - urdc->primary.bitmap); - rc = RDC_EBITMAP2SMALL; - goto failed; - } - - krdc = krdc->group_next; - urdc = &rdc_u_info[krdc->index]; - - } while (krdc != this); - - if (urdc->disk_queue[0] != '\0') { - spcs_s_add(kstatus, RDC_EQALREADY, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQALREADY; - goto failed; - } - - if (uparms->options & RDC_OPT_SECONDARY) { /* how'd we get here? */ - spcs_s_add(kstatus, RDC_EQWRONGMODE); - rc = RDC_EQWRONGMODE; - goto failed; - } - - mutex_enter(&rdc_conf_lock); - if (rdc_diskq_inuse(uparms->rdc_set, uparms->rdc_set->disk_queue)) { - spcs_s_add(kstatus, RDC_EDISKQINUSE, - uparms->rdc_set->disk_queue); - rc = RDC_EDISKQINUSE; - mutex_exit(&rdc_conf_lock); - goto failed; - } - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - rc = _rdc_add_diskq(urdc->index, diskq); - if (rc < 0 || rc == RDC_EQNOADD) { - group->flags &= ~RDC_DISKQUE; - group->flags |= RDC_MEMQUE; - spcs_s_add(kstatus, RDC_EQNOADD, uparms->rdc_set->disk_queue); - rc = RDC_EQNOADD; - } - rdc_group_exit(krdc); -failed: - return (rc); -} - -int -_rdc_init_diskq(rdc_k_info_t *krdc) -{ - rdc_group_t *group = krdc->group; - disk_queue *q = &group->diskq; - - rdc_init_diskq_header(group, &group->diskq.disk_hdr); - SET_QNXTIO(q, QHEAD(q)); - - if (rdc_stamp_diskq(krdc, 0, RDC_NOLOG) < 0) - goto fail; - - return (0); -fail: - return (-1); -} - -/* - * inititalize the disk queue. This is a destructive - * operation that will not check for emptiness of the queue. - */ -int -rdc_init_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc = 0; - int index; - rdc_k_info_t *krdc, *kp; - rdc_u_info_t *urdc, *up; - rdc_set_t *uset; - rdc_group_t *group; - disk_queue *qp; - - uset = uparms->rdc_set; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uset); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uset->primary.file, - uset->secondary.file); - rc = RDC_EALREADY; - goto fail; - } - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - group = krdc->group; - qp = &group->diskq; - - if (!IS_STATE(urdc, RDC_SYNCING) && !IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQUEISREP, urdc->disk_queue); - rc = RDC_EQUEISREP; - goto fail; - } - - /* - * a couple of big "ifs" here. in the first implementation - * neither of these will be possible. This will come into - * play when we persist the queue across reboots - */ - if (!(uparms->options & RDC_OPT_FORCE_QINIT)) { - if (!QEMPTY(qp)) { - if (group->rdc_writer) { - spcs_s_add(kstatus, RDC_EQFLUSHING, - urdc->disk_queue); - rc = RDC_EQFLUSHING; - } else { - spcs_s_add(kstatus, RDC_EQNOTEMPTY, - urdc->disk_queue); - rc = RDC_EQNOTEMPTY; - } - goto fail; - } - } - - mutex_enter(QLOCK(qp)); - if (_rdc_init_diskq(krdc) < 0) { - mutex_exit(QLOCK(qp)); - goto fail; - } - rdc_dump_iohdrs(qp); - - rdc_group_enter(krdc); - - rdc_clr_flags(urdc, RDC_QUEUING); - for (kp = krdc->group_next; kp != krdc; kp = kp->group_next) { - up = &rdc_u_info[kp->index]; - rdc_clr_flags(up, RDC_QUEUING); - } - rdc_group_exit(krdc); - - mutex_exit(QLOCK(qp)); - - return (0); -fail: - /* generic queue failure */ - if (!rc) { - spcs_s_add(kstatus, RDC_EQINITFAIL, urdc->disk_queue); - rc = RDC_EQINITFAIL; - } - - return (rc); -} - -int -_rdc_kill_diskq(rdc_u_info_t *urdc) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_group_t *group = krdc->group; - disk_queue *q = &group->diskq; - rdc_u_info_t *up; - rdc_k_info_t *p; - - group->flags |= RDC_DISKQ_KILL; -#ifdef DEBUG - cmn_err(CE_NOTE, "!disabling disk queue %s", urdc->disk_queue); -#endif - - mutex_enter(QLOCK(q)); - rdc_init_diskq_header(group, &q->disk_hdr); - rdc_dump_iohdrs(q); - - /* - * nsc_close the queue and zero out the queue name - */ - rdc_wait_qbusy(q); - rdc_close_diskq(group); - mutex_exit(QLOCK(q)); - SET_QSIZE(q, 0); - rdc_clr_flags(urdc, RDC_DISKQ_FAILED); - bzero(urdc->disk_queue, NSC_MAXPATH); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - up = &rdc_u_info[p->index]; - rdc_clr_flags(up, RDC_DISKQ_FAILED); - bzero(up->disk_queue, NSC_MAXPATH); - } - -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_kill_diskq: enabling memory queue"); -#endif - group->flags &= ~(RDC_DISKQUE|RDC_DISKQ_KILL); - group->flags |= RDC_MEMQUE; - return (0); -} - -/* - * remove this diskq regardless of whether it is draining or not - * stops the flusher by invalidating the qdata (ie, instant empty) - * remove the disk qeueue from the group, leaving the group with a memory - * queue. - */ -int -rdc_kill_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc; - int index; - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - rdc_set_t *rdc_set = uparms->rdc_set; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto failed; - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_EQNOQUEUE; - goto failed; - } - -/* - * if (!IS_STATE(urdc, RDC_LOGGING)) { - * spcs_s_add(kstatus, RDC_EQNOTLOGGING, - * uparms->rdc_set->disk_queue); - * rc = RDC_EQNOTLOGGING; - * goto failed; - * } - */ - rdc_unintercept_diskq(krdc->group); /* stop protecting queue */ - rdc_group_enter(krdc); /* to prevent further flushing */ - rc = _rdc_kill_diskq(urdc); - rdc_group_exit(krdc); - -failed: - return (rc); -} - -/* - * remove a diskq from a group. - * removal of a diskq from a set, or rather - * a set from a queue, is done by reconfigging out - * of the group. This removes the diskq from a whole - * group and replaces it with a memory based queue - */ -#define NUM_RETRIES 15 /* Number of retries to wait if no progress */ -int -rdc_rem_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int index; - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - rdc_k_info_t *this; - volatile rdc_group_t *group; - volatile disk_queue *diskq; - int threads, counter; - long blocks; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - mutex_exit(&rdc_conf_lock); - if (index < 0) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - this = &rdc_k_info[index]; - krdc = &rdc_k_info[index]; - - do { - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_EQNOTLOGGING, - urdc->disk_queue); - return (RDC_EQNOTLOGGING); - } - krdc = krdc->group_next; - urdc = &rdc_u_info[krdc->index]; - - } while (krdc != this); - - /* - * If there is no group or diskq configured, we can leave now - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - - /* - * Wait if not QEMPTY or threads still active - */ - counter = 0; - while (!QEMPTY(diskq) || group->rdc_thrnum) { - - /* - * Capture counters to determine if progress is being made - */ - blocks = QBLOCKS(diskq); - threads = group->rdc_thrnum; - - /* - * Wait - */ - delay(HZ); - - /* - * Has the group or disk queue gone away while delayed? - */ - if (!(group = krdc->group) || !(diskq = &group->diskq)) - return (0); - - /* - * Are we still seeing progress? - */ - if (blocks == QBLOCKS(diskq) && threads == group->rdc_thrnum) { - /* - * No progress see, decrement retry counter - */ - if (counter++ > NUM_RETRIES) { - /* - * No progress seen, increment retry counter - */ - int rc = group->rdc_thrnum ? - RDC_EQFLUSHING : RDC_EQNOTEMPTY; - spcs_s_add(kstatus, rc, urdc->disk_queue); - return (rc); - } - } else { - /* - * Reset counter, as we've made progress - */ - counter = 0; - } - } - - return (0); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h deleted file mode 100644 index 27b476d293..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.h +++ /dev/null @@ -1,332 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_DISKQ_H -#define _RDC_DISKQ_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -#define RDC_DISKQ_HEADER_OFF 0 /* beginning of disk */ -#define RDC_DISKQ_DATA_OFF FBA_LEN(1024) /* beginning of queue */ - -typedef struct qentry { - int magic; - int type; /* special data ? io? bitmap? */ - nsc_off_t pos; /* position it will be in the rdc_aio_t */ - nsc_off_t hpos; /* starting pos of orig nsc_buf_t */ - nsc_off_t qpos; /* where this info is in the queue */ - nsc_size_t len; /* len */ - int flag; - int iostatus; - uint32_t setid; /* krdc */ - time_t time; - void *next; -} q_data; - -typedef union io_dat { - q_data dat; - char dummy[512]; -} io_hdr; - -#define RDC_IOHDR_MAGIC 0x494F4844 /* IOHD */ -#define RDC_IOHDR_DONE 0xDEADCAFE /* this q entry has been flushed */ -#define RDC_IOHDR_WAITING 0xBEEFCAFE /* this q entry is waiting for ack */ - -/* type */ -#define RDC_QUEUEIO 0x02 - -#define RDC_DISKQ_MAGIC 0x44534B51 -#define RDC_DISKQ_VER_ORIG 0x01 -#define RDC_DISKQ_VER_64BIT 0x02 - -#ifdef NSC_MULTI_TERABYTE -#define RDC_DISKQ_VERS RDC_DISKQ_VER_64BIT -#else -#define RDC_DISKQ_VERS RDC_DISKQ_VER_ORIG -#endif - -typedef struct diskqheader1 { - int magic; - int vers; - int state; - int head_offset; /* offset of meta-info of head (fbas) */ - int tail_offset; /* addr of next write (fbas) */ - int disk_size; /* allow growing ? (fbas) */ - long nitems; /* items */ - long blocks; /* fbas */ - int qwrap; /* where the tail wrapped */ - int auxqwrap; /* if the tail wraps again, before head wraps once */ - uint_t seq_last; /* last sequence before suspend */ - uint_t ack_last; /* last ack before suspend */ -} diskq_header1; - -typedef struct diskqheader2 { - int magic; - int vers; - int state; - uint64_t head_offset; /* offset of meta-info of head (fbas) */ - uint64_t tail_offset; /* addr of next write (fbas) */ - uint64_t disk_size; /* allow growing ? (fbas) */ - uint64_t nitems; /* items */ - uint64_t blocks; /* fbas */ - uint64_t qwrap; /* where the tail wrapped */ - uint64_t auxqwrap; /* if the tail wraps again, before head wraps once */ - uint_t seq_last; /* last sequence before suspend */ - uint_t ack_last; /* last ack before suspend */ -} diskq_header2; - -#ifdef NSC_MULTI_TERABYTE -typedef diskq_header2 diskq_header; -#ifdef _LP64 -#define RDC_DQFMT "lu" -#else -#define RDC_DQFMT "llu" -#endif -#else -typedef diskq_header1 diskq_header; -#define RDC_DQFMT "ld" -#endif -typedef union headr { - diskq_header h; - char dummy[512]; -} dqheader; - -/* flags for the state field in the header */ - -#define RDC_SHUTDOWN_OK 0x01 -#define RDC_SHUTDOWN_BAD 0x02 -#define QNXTIOWRAPD 0x04 -#define QHEADWRAPD 0x08 -#define QTAILBUSY 0x10 /* tell flusher not to grab, incomplete */ -#define RDC_QNOBLOCK 0x10000 /* can also be passed out by status */ -#define RDC_QBADRESUME 0x20 /* don't resume bit ref */ -#define RDC_QFULL 0x40 /* the queue is in a full delay loop */ -#define RDC_STOPPINGFLUSH 0x80 - -#define RDC_QFILLSTOP 0x01 /* diskq->memq flusher kill switch */ -#define RDC_QFILLSLEEP 0x02 /* explicit diskq->memq flusher sleep */ - -#define RDC_MAX_DISKQREAD 0x1000 /* max 2 mb q read */ - -typedef struct diskqueue { /* the incore info about the diskq */ - dqheader disk_hdr; /* info about the queue */ - long nitems_hwm; - long blocks_hwm; - long throttle_delay; - nsc_off_t last_tail; /* pos of the last tail write */ - volatile int inflbls; /* number of inflight blocks */ - volatile int inflitems; /* number of inflight blocks */ - - kmutex_t disk_qlock; /* protects all things in diskq */ - /* and all things in dqheader */ - - kmutex_t head_lock; - kcondvar_t busycv; - int busycnt; - nsc_off_t nxt_io; /* flushers head pointer */ - int hdrcnt; /* number of io_hdrs on list */ - nsc_off_t coalesc_bounds; /* don't coalesce below this offset */ - rdc_aio_t *lastio; /* cached copy of the last write on q */ - io_hdr *iohdrs; /* flushed, not ack'd on queue */ - io_hdr *hdr_last; /* tail of iohdr list */ - kcondvar_t qfullcv; /* block, queue is full */ -} disk_queue; - -/* diskq macros (gets) */ - -#define QHEAD(q) q->disk_hdr.h.head_offset -#define QNXTIO(q) q->nxt_io -#define QTAIL(q) q->disk_hdr.h.tail_offset -#define QNITEMS(q) q->disk_hdr.h.nitems -#define QBLOCKS(q) q->disk_hdr.h.blocks -#define QSTATE(q) q->disk_hdr.h.state -#define IS_QSTATE(q, s) (q->disk_hdr.h.state & s) -#define QSIZE(q) q->disk_hdr.h.disk_size -#define QMAGIC(q) q->disk_hdr.h.magic -#define QVERS(q) q->disk_hdr.h.vers -#define QSEQ(q) q->disk_hdr.h.seq_last -#define QACK(q) q->disk_hdr.h.ack_last -#define QEMPTY(q) ((QTAIL(q) == QHEAD(q))&&(!(QNITEMS(q)))) -#define QWRAP(q) q->disk_hdr.h.qwrap -#define AUXQWRAP(q) q->disk_hdr.h.auxqwrap -#define LASTQTAIL(q) q->last_tail -#define QCOALBOUNDS(q) q->coalesc_bounds - -/* diskq macros (sets) */ - -#define INC_QHEAD(q, n) q->disk_hdr.h.head_offset += n -#define INC_QNXTIO(q, n) q->nxt_io += n -#define DEC_QNXTIO(q, n) q->nxt_io -= n -#define DEC_QHEAD(q, n) q->disk_hdr.h.head_offset -= n -#define INC_QTAIL(q, n) q->disk_hdr.h.tail_offset += n -#define DEC_QTAIL(q, n) q->disk_hdr.h.tail_offset -= n -#define INC_QNITEMS(q, n) q->disk_hdr.h.nitems += n -#define DEC_QNITEMS(q, n) q->disk_hdr.h.nitems -= n -#define INC_QBLOCKS(q, n) q->disk_hdr.h.blocks += n -#define DEC_QBLOCKS(q, n) q->disk_hdr.h.blocks -= n - -#define SET_QMAGIC(q, n) q->disk_hdr.h.magic = n -#define SET_QSTATE(q, n) q->disk_hdr.h.state |= n -#define CLR_QSTATE(q, n) q->disk_hdr.h.state &= ~n -#define SET_QHEAD(q, n) q->disk_hdr.h.head_offset = n -#define SET_QNXTIO(q, n) q->nxt_io = n -#define SET_QHDRCNT(q, n) q->hdrcnt = n -#define SET_QTAIL(q, n) q->disk_hdr.h.tail_offset = n -#define SET_LASTQTAIL(q, n) q->last_tail = n -#define SET_LASTQWRITE(q, w) q->last_qwrite = w -#define SET_QSIZE(q, n) q->disk_hdr.h.disk_size = n -#define SET_QNITEMS(q, n) q->disk_hdr.h.nitems = n -#define SET_QBLOCKS(q, n) q->disk_hdr.h.blocks = n - -#define SET_QWRAP(q, n) q->disk_hdr.h.qwrap = n -#define CLR_QWRAP(q) q->disk_hdr.h.qwrap = 0 -#define SET_AUXQWRAP(q, n) q->disk_hdr.h.auxqwrap = n -#define CLR_AUXQWRAP(q) q->disk_hdr.h.auxqwrap = 0 -#define SET_QCOALBOUNDS(q, n) q->coalesc_bounds = n - -#define WRAPQTAIL(q) \ - do { \ - if (QWRAP(q)) { \ - SET_AUXQWRAP(q, QTAIL(q)); \ - } else { \ - SET_QWRAP(q, QTAIL(q)); \ - } \ - SET_QTAIL(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define DO_AUXQWRAP(q) \ - do { \ - SET_QWRAP(q, AUXQWRAP(q)); \ - SET_AUXQWRAP(q, 0); \ - } while (0) - -/* these can be wrapped by different threads, avoid the race */ -#define WRAPQHEAD(q) \ - do { \ - if (IS_QSTATE(q, QNXTIOWRAPD)) { \ - if (AUXQWRAP(q)) { \ - DO_AUXQWRAP(q); \ - } else { \ - SET_QWRAP(q, 0); \ - } \ - CLR_QSTATE(q, QNXTIOWRAPD); \ - } else { \ - SET_QSTATE(q, QHEADWRAPD); \ - } \ - SET_QHEAD(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define WRAPQNXTIO(q) \ - do { \ - if (IS_QSTATE(q, QHEADWRAPD)) { \ - if (AUXQWRAP(q)) { \ - DO_AUXQWRAP(q); \ - } else { \ - SET_QWRAP(q, 0); \ - } \ - CLR_QSTATE(q, QHEADWRAPD); \ - } else { \ - SET_QSTATE(q, QNXTIOWRAPD); \ - } \ - SET_QNXTIO(q, RDC_DISKQ_DATA_OFF); \ - } while (0) - -#define DQEND(q) (QWRAP(q)?QWRAP(q):QSIZE(q)) - -#define FITSONQ(q, n) \ - (((QBLOCKS(q)+QNITEMS(q)+RDC_DISKQ_DATA_OFF+n) >= \ - (uint64_t)DQEND(q))?0:1) - -/* diskq defines/macros (non-specific) */ - -#define RDC_NOLOG 0x00 -#define RDC_WAIT 0x01 -#define RDC_NOWAIT 0x02 -#define RDC_DOLOG 0x04 /* put the group into logging */ -#define RDC_NOFAIL 0x08 /* don't fail the queue, just init */ -#define RDC_GROUP_LOCKED 0x10 /* trust me, I have the group lock */ - -#define RDC_WRITTEN 0x10 /* data has been commited to queue */ -#define RDC_LAST 0x20 /* end of dequeued buffer, discard */ - -/* CSTYLED */ -#define RDC_BETWEEN(a,b,c) (a<b?((c>=a)&&(c<=b)):((a!=b)&&((c<b)||(c>=a)))) -/* CSTYLED */ - -#define QHEADSHLDWRAP(q) (QWRAP(q) && (QHEAD(q) >= QWRAP(q))) -#define QNXTIOSHLDWRAP(q) (QWRAP(q) && (QNXTIO(q) >= QWRAP(q))) -#define QTAILSHLDWRAP(q, size) (QTAIL(q) + size > QSIZE(q)) -#define QCOALESCEOK(q, dec) ((q->lastio->iostatus & RDC_WRITTEN) && \ - ((QTAIL(q) > QNXTIO(q)) ? \ - (((QTAIL(q) - dec) > QNXTIO(q)) && ((QTAIL(q) - dec) > \ - QCOALBOUNDS(q))):\ - (QNXTIOSHLDWRAP(q) && QTAIL(q) > RDC_DISKQ_DATA_OFF))) - -#define QLOCK(q) &q->disk_qlock -#define QTAILLOCK(q) &q->tail_lock -#define QHEADLOCK(q) &q->head_lock - -#define QDISPLAY(q) "qmagic: %x qvers: %d qstate: %x qhead: %" \ - NSC_SZFMT " qnxtio: %" NSC_SZFMT " qtail: %" NSC_SZFMT " qtaillast: %" \ - NSC_SZFMT " qsize: %" NSC_SZFMT " qnitems: %" RDC_DQFMT \ - " qblocks: %" RDC_DQFMT " coalbounds %" NSC_SZFMT, QMAGIC(q), \ - QVERS(q), QSTATE(q), QHEAD(q), QNXTIO(q), QTAIL(q), LASTQTAIL(q), \ - QSIZE(q), QNITEMS(q), QBLOCKS(q), QCOALBOUNDS(q) - -#define QDISPLAYND(q) "m: %x v: %d s: %d h: %" NSC_SZFMT " n: %" \ - NSC_SZFMT " t: %" NSC_SZFMT " l: %" NSC_SZFMT " z: %" NSC_SZFMT \ - " i: %" RDC_DQFMT " b: %" RDC_DQFMT " w: %" NSC_SZFMT \ - " a: %" NSC_SZFMT, \ - QMAGIC(q), QVERS(q), QSTATE(q), QHEAD(q), \ - QNXTIO(q), QTAIL(q), LASTQTAIL(q), QSIZE(q), QNITEMS(q), \ - QBLOCKS(q), QWRAP(q), AUXQWRAP(q) - -/* Disk queue flusher state */ -#define RDC_QFILL_AWAKE (0) -#define RDC_QFILL_ASLEEP (1) -#define RDC_QFILL_DEAD (-1) - -/* functions */ - -int rdc_add_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_rem_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_kill_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_init_diskq(rdc_config_t *uparms, spcs_s_info_t kstatus); -int rdc_lookup_diskq(char *path); -int rdc_diskq_inuse(rdc_set_t *set, char *diskq); -void rdc_dump_iohdrs(disk_queue *q); -extern void rdc_fixlen(rdc_aio_t *aio); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_DISKQ_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_health.c b/usr/src/uts/common/avs/ns/rdc/rdc_health.c deleted file mode 100644 index 16bc34242d..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_health.c +++ /dev/null @@ -1,800 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * RDC interface health monitoring code. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/debug.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> - -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#include "rdc_io.h" -#include "rdc_clnt.h" - - -/* - * Forward declarations. - */ - -static void rdc_update_health(rdc_if_t *); - -/* - * Global data. - */ - -/* - * These structures are added when a new host name is introduced to the - * kernel. They never disappear (but that won't waste much space at all). - */ -typedef struct rdc_link_down { - char host[MAX_RDC_HOST_SIZE]; /* The host name of this link */ - int waiting; /* A user is waiting to be woken up */ - int link_down; /* The current state of the link */ - struct rdc_link_down *next; /* Chain */ - kcondvar_t syncd_cv; /* Syncd wakeup */ - kmutex_t syncd_mutex; /* Lock for syncd_cv */ -} rdc_link_down_t; -static rdc_link_down_t *rdc_link_down = NULL; - -int rdc_health_thres = RDC_HEALTH_THRESHOLD; -rdc_if_t *rdc_if_top; - - -/* - * IPv6 addresses are represented as 16bit hexadecimal integers - * separated by colons. Contiguous runs of zeros can be abbreviated by - * double colons: - * FF02:0:0:0:0:1:200E:8C6C - * | - * v - * FF02::1:200E:8C6C - */ -void -rdc_if_ipv6(const uint16_t *addr, char *buf) -{ - const int end = 8; /* 8 shorts, 128 bits in an IPv6 address */ - int i; - - for (i = 0; i < end; i++) { - if (i > 0) - (void) sprintf(buf, "%s:", buf); - - if (addr[i] != 0 || i == 0 || i == (end - 1)) { - /* first, last, or non-zero value */ - (void) sprintf(buf, "%s%x", buf, (int)addr[i]); - } else { - if ((i + 1) < end && addr[i + 1] != 0) { - /* single zero */ - (void) sprintf(buf, "%s%x", buf, (int)addr[i]); - } else { - /* skip contiguous zeros */ - while ((i + 1) < end && addr[i + 1] == 0) - i++; - } - } - } -} - -static void -rdc_if_xxx(rdc_if_t *ip, char *updown) -{ - if (strcmp("inet6", ip->srv->ri_knconf->knc_protofmly) == 0) { - uint16_t *this = (uint16_t *)ip->ifaddr.buf; - uint16_t *other = (uint16_t *)ip->r_ifaddr.buf; - char this_str[256], other_str[256]; - - bzero(this_str, sizeof (this_str)); - bzero(other_str, sizeof (other_str)); - rdc_if_ipv6(&this[4], this_str); - rdc_if_ipv6(&other[4], other_str); - - cmn_err(CE_NOTE, "!SNDR: Interface %s <==> %s : %s", - this_str, other_str, updown); - } else { - uchar_t *this = (uchar_t *)ip->ifaddr.buf; - uchar_t *other = (uchar_t *)ip->r_ifaddr.buf; - - cmn_err(CE_NOTE, - "!SNDR: Interface %d.%d.%d.%d <==> %d.%d.%d.%d : %s", - (int)this[4], (int)this[5], (int)this[6], (int)this[7], - (int)other[4], (int)other[5], (int)other[6], (int)other[7], - updown); - } -} - - -static void -rdc_if_down(rdc_if_t *ip) -{ - rdc_if_xxx(ip, "Down"); -} - - -static void -rdc_if_up(rdc_if_t *ip) -{ - rdc_if_xxx(ip, "Up"); -} - - -/* - * Health monitor for a single interface. - * - * The secondary sends ping RPCs to the primary. - * The primary just stores the results and updates its structures. - */ -static void -rdc_health_thread(void *arg) -{ - rdc_if_t *ip = (rdc_if_t *)arg; - struct rdc_ping ping; - struct rdc_ping6 ping6; - struct timeval t; - int down = 1; - int ret, err; - int sec = 0; - char ifaddr[RDC_MAXADDR]; - char r_ifaddr[RDC_MAXADDR]; - uint16_t *sp; - - bcopy(ip->ifaddr.buf, ifaddr, ip->ifaddr.len); - sp = (uint16_t *)ifaddr; - *sp = htons(*sp); - bcopy(ip->r_ifaddr.buf, r_ifaddr, ip->r_ifaddr.len); - sp = (uint16_t *)r_ifaddr; - *sp = htons(*sp); - - while ((ip->exiting != 1) && (net_exit != ATM_EXIT)) { - delay(HZ); - - /* setup RPC timeout */ - - t.tv_sec = rdc_rpc_tmout; - t.tv_usec = 0; - - if (ip->issecondary && !ip->no_ping) { - if (ip->rpc_version < RDC_VERSION7) { - bcopy(ip->r_ifaddr.buf, ping6.p_ifaddr, - RDC_MAXADDR); - /* primary ifaddr */ - bcopy(ip->ifaddr.buf, ping6.s_ifaddr, - RDC_MAXADDR); - /* secondary ifaddr */ - err = rdc_clnt_call_any(ip->srv, ip, - RDCPROC_PING4, xdr_rdc_ping6, - (char *)&ping6, xdr_int, (char *)&ret, &t); - } else { - ping.p_ifaddr.buf = r_ifaddr; - ping.p_ifaddr.len = ip->r_ifaddr.len; - ping.p_ifaddr.maxlen = ip->r_ifaddr.len; - ping.s_ifaddr.buf = ifaddr; - ping.s_ifaddr.len = ip->ifaddr.len; - ping.s_ifaddr.maxlen = ip->ifaddr.len; - err = rdc_clnt_call_any(ip->srv, ip, - RDCPROC_PING4, xdr_rdc_ping, (char *)&ping, - xdr_int, (char *)&ret, &t); - } - - - if (err || ret) { - /* RPC failed - link is down */ - if (!down && !ip->isprimary) { - /* - * don't print messages if also - * a primary - the primary will - * take care of it. - */ - rdc_if_down(ip); - down = 1; - } - rdc_dump_alloc_bufs(ip); - ip->no_ping = 1; - - /* - * Start back at the max possible version - * since the remote server could come back - * on a different protocol version. - */ - mutex_enter(&rdc_ping_lock); - ip->rpc_version = RDC_VERS_MAX; - mutex_exit(&rdc_ping_lock); - } else { - if (down && !ip->isprimary) { - /* - * was failed, but now ok - * - * don't print messages if also - * a primary - the primary will - * take care of it. - */ - rdc_if_up(ip); - down = 0; - } - } - } - if (!ip->isprimary && down && ++sec == 5) { - sec = 0; - rdc_dump_alloc_bufs(ip); - } - - if (ip->isprimary) - rdc_update_health(ip); - } - - /* signal that this thread is done */ - ip->exiting = 2; -} - - -int -rdc_isactive_if(struct netbuf *addr, struct netbuf *r_addr) -{ - rdc_if_t *ip; - int rc = 0; - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - for (ip = rdc_if_top; ip; ip = ip->next) { - if (ip->exiting != 0) - continue; - if (((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0)) || - ((bcmp(ip->r_ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->ifaddr.buf, r_addr->buf, r_addr->len) == 0))) { - /* found matching interface structure */ - if (ip->isprimary && !ip->if_down) { - rc = 1; - } else if (ip->issecondary && !ip->no_ping) { - rc = 1; - } - break; - } - } - mutex_exit(&rdc_ping_lock); - return (rc); -} - -/* - * Set the rdc rpc version of the rdc_if_t. - * - * Called from incoming rpc calls which start before - * the health service becomes established. - */ -void -rdc_set_if_vers(rdc_u_info_t *urdc, rpcvers_t vers) -{ - rdc_if_t *ip; - struct netbuf *addr, *r_addr; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - addr = &(urdc->primary.addr); - r_addr = &(urdc->secondary.addr); - } else { - addr = &(urdc->secondary.addr); - r_addr = &(urdc->primary.addr); - } - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - for (ip = rdc_if_top; ip; ip = ip->next) { - if (ip->exiting != 0) - continue; - if (((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0)) || - ((bcmp(ip->r_ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->ifaddr.buf, r_addr->buf, r_addr->len) == 0))) { - /* found matching interface structure */ - ip->rpc_version = vers; -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc intf %p rpc version set to %u", - (void *)ip, vers); -#endif - break; - } - } - mutex_exit(&rdc_ping_lock); -} - -/* - * Free all the rdc_link_down structures (only at module unload time) - */ -void -rdc_link_down_free() -{ - rdc_link_down_t *p; - rdc_link_down_t *q; - - if (rdc_link_down == NULL) - return; - - for (p = rdc_link_down->next; p != rdc_link_down; ) { - q = p; - p = p->next; - kmem_free(q, sizeof (*q)); - } - kmem_free(rdc_link_down, sizeof (*q)); - rdc_link_down = NULL; -} - - -/* - * Look up the supplied hostname in the rdc_link_down chain. Add a new - * entry if it isn't found. Return a pointer to the new or found entry. - */ -static rdc_link_down_t * -rdc_lookup_host(char *host) -{ - rdc_link_down_t *p; - - mutex_enter(&rdc_ping_lock); - - if (rdc_link_down == NULL) { - rdc_link_down = kmem_zalloc(sizeof (*rdc_link_down), KM_SLEEP); - rdc_link_down->next = rdc_link_down; - } - - for (p = rdc_link_down->next; p != rdc_link_down; p = p->next) { - if (strcmp(host, p->host) == 0) { - /* Match */ - mutex_exit(&rdc_ping_lock); - return (p); - } - } - - /* No match, must create a new entry */ - - p = kmem_zalloc(sizeof (*p), KM_SLEEP); - p->link_down = 1; - p->next = rdc_link_down->next; - rdc_link_down->next = p; - (void) strncpy(p->host, host, MAX_RDC_HOST_SIZE); - mutex_init(&p->syncd_mutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&p->syncd_cv, NULL, CV_DRIVER, NULL); - - mutex_exit(&rdc_ping_lock); - return (p); -} - - -/* - * Handle the RDC_LINK_DOWN ioctl. - * The user specifies which host they're interested in. - * This function is woken up when the link to that host goes down. - */ - -/* ARGSUSED3 */ -int -_rdc_link_down(void *arg, int mode, spcs_s_info_t kstatus, int *rvp) -{ - char host[MAX_RDC_HOST_SIZE]; - rdc_link_down_t *syncdp; - clock_t timeout = RDC_SYNC_EVENT_TIMEOUT * 2; /* 2 min */ - int rc = 0; - - if (ddi_copyin(arg, host, MAX_RDC_HOST_SIZE, mode)) - return (EFAULT); - - - syncdp = rdc_lookup_host(host); - - mutex_enter(&syncdp->syncd_mutex); - if (!syncdp->link_down) { - syncdp->waiting = 1; - if (cv_timedwait_sig(&syncdp->syncd_cv, &syncdp->syncd_mutex, - nsc_lbolt() + timeout) == 0) { - /* Woken by a signal, not a link down event */ - syncdp->waiting = 0; - rc = EAGAIN; - spcs_s_add(kstatus, rc); - } - - } - mutex_exit(&syncdp->syncd_mutex); - - return (rc); -} - - -/* - * Add an RDC set to an interface - * - * If the interface is new, add it to the list of interfaces. - */ -rdc_if_t * -rdc_add_to_if(rdc_srv_t *svp, struct netbuf *addr, struct netbuf *r_addr, - int primary) -{ - rdc_if_t *new, *ip; - - if ((addr->buf == NULL) || (r_addr->buf == NULL)) - return (NULL); - - /* setup a new interface structure */ - new = (rdc_if_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); - if (!new) - return (NULL); - - dup_rdc_netbuf(addr, &new->ifaddr); - dup_rdc_netbuf(r_addr, &new->r_ifaddr); - new->rpc_version = RDC_VERS_MAX; - new->srv = rdc_create_svinfo(svp->ri_hostname, &svp->ri_addr, - svp->ri_knconf); - new->old_pulse = -1; - new->new_pulse = 0; - - if (!new->srv) { - free_rdc_netbuf(&new->r_ifaddr); - free_rdc_netbuf(&new->ifaddr); - kmem_free(new, sizeof (*new)); - return (NULL); - } - - /* search for existing interface structure */ - - mutex_enter(&rdc_ping_lock); - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ip->ifaddr.buf, addr->buf, addr->len) == 0) && - (bcmp(ip->r_ifaddr.buf, r_addr->buf, r_addr->len) == 0) && - ip->exiting == 0) { - /* found matching interface structure */ - break; - } - } - - if (!ip) { - /* add new into the chain */ - - new->next = rdc_if_top; - rdc_if_top = new; - ip = new; - - /* start daemon */ - - ip->last = nsc_time(); - ip->deadness = 1; - ip->if_down = 1; - - if (nsc_create_process(rdc_health_thread, ip, TRUE)) { - mutex_exit(&rdc_ping_lock); - return (NULL); - } - } - - /* mark usage type */ - - if (primary) { - ip->isprimary = 1; - } else { - ip->issecondary = 1; - ip->no_ping = 0; - } - - mutex_exit(&rdc_ping_lock); - - /* throw away new if it was not used */ - - if (ip != new) { - free_rdc_netbuf(&new->r_ifaddr); - free_rdc_netbuf(&new->ifaddr); - rdc_destroy_svinfo(new->srv); - kmem_free(new, sizeof (*new)); - } - - return (ip); -} - - -/* - * Update an interface following the removal of an RDC set. - * - * If there are no more RDC sets using the interface, delete it from - * the list of interfaces. - * - * Either clear krdc->intf, or ensure !IS_CONFIGURED(krdc) before calling this. - */ -void -rdc_remove_from_if(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_if_t **ipp; - int pfound = 0; - int sfound = 0; - int delete = 1; - int index; - - mutex_enter(&rdc_ping_lock); - - /* - * search for RDC sets using this interface and update - * the isprimary and issecondary flags. - */ - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - if (IS_CONFIGURED(krdc) && krdc->intf == ip) { - delete = 0; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - pfound = 1; - } else { - sfound = 1; - } - - if (pfound && sfound) - break; - } - } - - ip->isprimary = pfound; - ip->issecondary = sfound; - - if (!delete || ip->exiting > 0) { - mutex_exit(&rdc_ping_lock); - return; - } - - /* mark and wait for daemon to exit */ - - ip->exiting = 1; - - mutex_exit(&rdc_ping_lock); - - while (ip->exiting == 1) - delay(drv_usectohz(10)); - - mutex_enter(&rdc_ping_lock); - - ASSERT(ip->exiting == 2); - - /* remove from chain */ - - for (ipp = &rdc_if_top; *ipp; ipp = &((*ipp)->next)) { - if (*ipp == ip) { - *ipp = ip->next; - break; - } - } - - mutex_exit(&rdc_ping_lock); - - /* free unused interface structure */ - - free_rdc_netbuf(&ip->r_ifaddr); - free_rdc_netbuf(&ip->ifaddr); - rdc_destroy_svinfo(ip->srv); - kmem_free(ip, sizeof (*ip)); -} - - -/* - * Check the status of the link to the secondary, and optionally update - * the primary-side ping variables. - * - * For use on a primary only. - * - * Returns: - * TRUE - interface up. - * FALSE - interface down. - */ -int -rdc_check_secondary(rdc_if_t *ip, int update) -{ - int rc = TRUE; - - if (!ip || !ip->isprimary) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_check_secondary: ip %p, isprimary %d, issecondary %d", - (void *) ip, ip ? ip->isprimary : 0, - ip ? ip->issecondary : 0); -#endif - return (FALSE); - } - - if (!ip->deadness) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check_secondary: ip %p, ip->deadness %d", - (void *) ip, ip->deadness); -#endif - return (FALSE); - } - - if (!update) { - /* quick look */ - return ((ip->deadness > rdc_health_thres) ? FALSE : TRUE); - } - - /* update (slow) with lock */ - - mutex_enter(&rdc_ping_lock); - - if (ip->old_pulse == ip->new_pulse) { - /* - * ping has not been received since last update - * or we have not yet been pinged, - * the health thread has started only as a - * local client so far, not so on the other side - */ - - if (ip->last != nsc_time()) { - /* time has passed, so move closer to death */ - - ip->last = nsc_time(); - ip->deadness++; - - if (ip->deadness <= 0) { - /* avoid the wrap */ - ip->deadness = rdc_health_thres + 1; - } - } - - if (ip->deadness > rdc_health_thres) { - rc = FALSE; - /* - * Start back at the max possible version - * since the remote server could come back - * on a different protocol version. - */ - ip->rpc_version = RDC_VERS_MAX; - } - } else { - ip->old_pulse = ip->new_pulse; - } - - mutex_exit(&rdc_ping_lock); - return (rc); -} - - -/* - * Update the interface structure with the latest ping info, and - * perform interface up/down transitions if required. - * - * For use on a primary only. - */ -static void -rdc_update_health(rdc_if_t *ip) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - rdc_link_down_t *syncdp; - - if (!ip->isprimary) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc_update_health: ip %p, isprimary %d, issecondary %d", - (void *) ip, ip ? ip->isprimary : 0, - ip ? ip->issecondary : 0); -#endif - return; - } - - if (!rdc_check_secondary(ip, TRUE)) { - /* interface down */ - if (!ip->if_down) { - rdc_if_down(ip); - ip->if_down = 1; - - /* scan rdc sets and update status */ - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - if (IS_ENABLED(urdc) && (krdc->intf == ip) && - (rdc_get_vflags(urdc) & RDC_PRIMARY) && - !(rdc_get_vflags(urdc) & RDC_LOGGING)) { - /* mark down */ - - rdc_group_enter(krdc); - /* - * check for possible race with - * with delete logic - */ - if (!IS_ENABLED(urdc)) { - rdc_group_exit(krdc); - continue; - } - rdc_group_log(krdc, RDC_NOFLUSH | - RDC_NOREMOTE | RDC_QUEUING, - "hm detected secondary " - "interface down"); - - rdc_group_exit(krdc); - - /* dump async queues */ - rdc_dump_queue(index); - } - } - - /* dump allocated bufs */ - rdc_dump_alloc_bufs(ip); - } - - syncdp = rdc_lookup_host(ip->srv->ri_hostname); - mutex_enter(&syncdp->syncd_mutex); - if (syncdp->link_down == 0) { - /* Link has gone down, notify rdcsyncd daemon */ - syncdp->link_down = 1; - if (syncdp->waiting) { - syncdp->waiting = 0; - cv_signal(&syncdp->syncd_cv); - } - } - mutex_exit(&syncdp->syncd_mutex); - } else { - /* interface up */ - if (ip->if_down && ip->isprimary) { - rdc_if_up(ip); - ip->if_down = 0; - } - - syncdp = rdc_lookup_host(ip->srv->ri_hostname); - mutex_enter(&syncdp->syncd_mutex); - if (syncdp->link_down) { - /* Link has come back up */ - syncdp->link_down = 0; - } - mutex_exit(&syncdp->syncd_mutex); - } -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_io.c b/usr/src/uts/common/avs/ns/rdc/rdc_io.c deleted file mode 100644 index 89949b0b33..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_io.c +++ /dev/null @@ -1,6718 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/conf.h> -#include <sys/errno.h> -#include <sys/sysmacros.h> - -#ifdef _SunOS_5_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_5_6 */ - -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdc_update.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" -#include "rdc_diskq.h" - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -volatile int net_exit; -nsc_size_t MAX_RDC_FBAS; - -#ifdef DEBUG -int RDC_MAX_SYNC_THREADS = 8; -int rdc_maxthreads_last = 8; -#endif - -kmutex_t rdc_ping_lock; /* Ping lock */ -static kmutex_t net_blk_lock; - -/* - * rdc_conf_lock is used as a global device configuration lock. - * It is also used by enable/resume and disable/suspend code to ensure that - * the transition of an rdc set between configured and unconfigured is - * atomic. - * - * krdc->group->lock is used to protect state changes of a configured rdc - * set (e.g. changes to urdc->flags), such as enabled to disabled and vice - * versa. - * - * rdc_many_lock is also used to protect changes in group membership. A group - * linked list cannot change while this lock is held. The many list and the - * multi-hop list are both protected by rdc_many_lock. - */ -kmutex_t rdc_conf_lock; -kmutex_t rdc_many_lock; /* Many/multi-list lock */ - -static kmutex_t rdc_net_hnd_id_lock; /* Network handle id lock */ -int rdc_debug = 0; -int rdc_debug_sleep = 0; - -static int rdc_net_hnd_id = 1; - -extern kmutex_t rdc_clnt_lock; - -static void rdc_ditemsfree(rdc_net_dataset_t *); -void rdc_clnt_destroy(void); - -rdc_k_info_t *rdc_k_info; -rdc_u_info_t *rdc_u_info; - -unsigned long rdc_async_timeout; - -nsc_size_t rdc_maxthres_queue = RDC_MAXTHRES_QUEUE; -int rdc_max_qitems = RDC_MAX_QITEMS; -int rdc_asyncthr = RDC_ASYNCTHR; -static nsc_svc_t *rdc_volume_update; -static int rdc_prealloc_handle = 1; - -extern int _rdc_rsrv_diskq(rdc_group_t *group); -extern void _rdc_rlse_diskq(rdc_group_t *group); - -/* - * Forward declare all statics that are used before defined - * to enforce parameter checking - * - * Some (if not all) of these could be removed if the code were reordered - */ - -static void rdc_volume_update_svc(intptr_t); -static void halt_sync(rdc_k_info_t *krdc); -void rdc_kstat_create(int index); -void rdc_kstat_delete(int index); -static int rdc_checkforbitmap(int, nsc_off_t); -static int rdc_installbitmap(int, void *, int, nsc_off_t, int, int *, int); -static rdc_group_t *rdc_newgroup(); - -int rdc_enable_diskq(rdc_k_info_t *krdc); -void rdc_close_diskq(rdc_group_t *group); -int rdc_suspend_diskq(rdc_k_info_t *krdc); -int rdc_resume_diskq(rdc_k_info_t *krdc); -void rdc_init_diskq_header(rdc_group_t *grp, dqheader *header); -void rdc_fail_diskq(rdc_k_info_t *krdc, int wait, int dolog); -void rdc_unfail_diskq(rdc_k_info_t *krdc); -void rdc_unintercept_diskq(rdc_group_t *grp); -int rdc_stamp_diskq(rdc_k_info_t *krdc, int rsrvd, int flags); -void rdc_qfiller_thr(rdc_k_info_t *krdc); - -nstset_t *_rdc_ioset; -nstset_t *_rdc_flset; - -/* - * RDC threadset tunables - */ -int rdc_threads = 64; /* default number of threads */ -int rdc_threads_inc = 8; /* increment for changing the size of the set */ - -/* - * Private threadset manipulation variables - */ -static int rdc_threads_hysteresis = 2; - /* hysteresis for threadset resizing */ -static int rdc_sets_active; /* number of sets currently enabled */ - -#ifdef DEBUG -kmutex_t rdc_cntlock; -#endif - -/* - * rdc_thread_deconfigure - rdc is being deconfigured, stop any - * thread activity. - * - * Inherently single-threaded by the Solaris module unloading code. - */ -static void -rdc_thread_deconfigure(void) -{ - nst_destroy(_rdc_ioset); - _rdc_ioset = NULL; - - nst_destroy(_rdc_flset); - _rdc_flset = NULL; - - nst_destroy(sync_info.rdc_syncset); - sync_info.rdc_syncset = NULL; -} - -/* - * rdc_thread_configure - rdc is being configured, initialize the - * threads we need for flushing aync volumes. - * - * Must be called with rdc_conf_lock held. - */ -static int -rdc_thread_configure(void) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if ((_rdc_ioset = nst_init("rdc_thr", rdc_threads)) == NULL) - return (EINVAL); - - if ((_rdc_flset = nst_init("rdc_flushthr", 2)) == NULL) - return (EINVAL); - - if ((sync_info.rdc_syncset = - nst_init("rdc_syncthr", RDC_MAX_SYNC_THREADS)) == NULL) - return (EINVAL); - - return (0); -} - - -/* - * rdc_thread_tune - called to tune the size of the rdc threadset. - * - * Called from the config code when an rdc_set has been enabled or disabled. - * 'sets' is the increment to the number of active rdc_sets. - * - * Must be called with rdc_conf_lock held. - */ -static void -rdc_thread_tune(int sets) -{ - int incr = (sets > 0) ? 1 : -1; - int change = 0; - int nthreads; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (sets < 0) - sets = -sets; - - while (sets--) { - nthreads = nst_nthread(_rdc_ioset); - rdc_sets_active += incr; - - if (rdc_sets_active >= nthreads) - change += nst_add_thread(_rdc_ioset, rdc_threads_inc); - else if ((rdc_sets_active < - (nthreads - (rdc_threads_inc + rdc_threads_hysteresis))) && - ((nthreads - rdc_threads_inc) >= rdc_threads)) - change -= nst_del_thread(_rdc_ioset, rdc_threads_inc); - } - -#ifdef DEBUG - if (change) { - cmn_err(CE_NOTE, "!rdc_thread_tune: " - "nsets %d, nthreads %d, nthreads change %d", - rdc_sets_active, nst_nthread(_rdc_ioset), change); - } -#endif -} - - -/* - * _rdc_unload() - cache is being unloaded, - * deallocate any dual copy structures allocated during cache - * loading. - */ -void -_rdc_unload(void) -{ - int i; - rdc_k_info_t *krdc; - - if (rdc_volume_update) { - (void) nsc_unregister_svc(rdc_volume_update); - rdc_volume_update = NULL; - } - - rdc_thread_deconfigure(); - - if (rdc_k_info != NULL) { - for (i = 0; i < rdc_max_sets; i++) { - krdc = &rdc_k_info[i]; - mutex_destroy(&krdc->dc_sleep); - mutex_destroy(&krdc->bmapmutex); - mutex_destroy(&krdc->kstat_mutex); - mutex_destroy(&krdc->bmp_kstat_mutex); - mutex_destroy(&krdc->syncbitmutex); - cv_destroy(&krdc->busycv); - cv_destroy(&krdc->closingcv); - cv_destroy(&krdc->haltcv); - cv_destroy(&krdc->synccv); - } - } - - mutex_destroy(&sync_info.lock); - mutex_destroy(&rdc_ping_lock); - mutex_destroy(&net_blk_lock); - mutex_destroy(&rdc_conf_lock); - mutex_destroy(&rdc_many_lock); - mutex_destroy(&rdc_net_hnd_id_lock); - mutex_destroy(&rdc_clnt_lock); -#ifdef DEBUG - mutex_destroy(&rdc_cntlock); -#endif - net_exit = ATM_EXIT; - - if (rdc_k_info != NULL) - kmem_free(rdc_k_info, sizeof (*rdc_k_info) * rdc_max_sets); - if (rdc_u_info != NULL) - kmem_free(rdc_u_info, sizeof (*rdc_u_info) * rdc_max_sets); - rdc_k_info = NULL; - rdc_u_info = NULL; - rdc_max_sets = 0; -} - - -/* - * _rdc_load() - rdc is being loaded, Allocate anything - * that will be needed while the cache is loaded but doesn't really - * depend on configuration parameters. - * - */ -int -_rdc_load(void) -{ - int i; - rdc_k_info_t *krdc; - - mutex_init(&rdc_ping_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&net_blk_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_conf_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_many_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_net_hnd_id_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&rdc_clnt_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&sync_info.lock, NULL, MUTEX_DRIVER, NULL); - -#ifdef DEBUG - mutex_init(&rdc_cntlock, NULL, MUTEX_DRIVER, NULL); -#endif - - if ((i = nsc_max_devices()) < rdc_max_sets) - rdc_max_sets = i; - /* following case for partial installs that may fail */ - if (!rdc_max_sets) - rdc_max_sets = 1024; - - rdc_k_info = kmem_zalloc(sizeof (*rdc_k_info) * rdc_max_sets, KM_SLEEP); - if (!rdc_k_info) - return (ENOMEM); - - rdc_u_info = kmem_zalloc(sizeof (*rdc_u_info) * rdc_max_sets, KM_SLEEP); - if (!rdc_u_info) { - kmem_free(rdc_k_info, sizeof (*rdc_k_info) * rdc_max_sets); - return (ENOMEM); - } - - net_exit = ATM_NONE; - for (i = 0; i < rdc_max_sets; i++) { - krdc = &rdc_k_info[i]; - bzero(krdc, sizeof (*krdc)); - krdc->index = i; - mutex_init(&krdc->dc_sleep, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->bmapmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->kstat_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->bmp_kstat_mutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&krdc->syncbitmutex, NULL, MUTEX_DRIVER, NULL); - cv_init(&krdc->busycv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->closingcv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->haltcv, NULL, CV_DRIVER, NULL); - cv_init(&krdc->synccv, NULL, CV_DRIVER, NULL); - } - - rdc_volume_update = nsc_register_svc("RDCVolumeUpdated", - rdc_volume_update_svc); - - return (0); -} - -static void -rdc_u_init(rdc_u_info_t *urdc) -{ - const int index = (int)(urdc - &rdc_u_info[0]); - - if (urdc->secondary.addr.maxlen) - free_rdc_netbuf(&urdc->secondary.addr); - if (urdc->primary.addr.maxlen) - free_rdc_netbuf(&urdc->primary.addr); - - bzero(urdc, sizeof (rdc_u_info_t)); - - urdc->index = index; - urdc->maxqfbas = rdc_maxthres_queue; - urdc->maxqitems = rdc_max_qitems; - urdc->asyncthr = rdc_asyncthr; -} - -/* - * _rdc_configure() - cache is being configured. - * - * Initialize dual copy structures - */ -int -_rdc_configure(void) -{ - int index; - rdc_k_info_t *krdc; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - - krdc->remote_index = -1; - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - krdc->many_next = krdc; - - rdc_u_init(&rdc_u_info[index]); - } - - rdc_async_timeout = 120 * HZ; /* Seconds * HZ */ - MAX_RDC_FBAS = FBA_LEN(RDC_MAXDATA); - if (net_exit != ATM_INIT) { - net_exit = ATM_INIT; - return (0); - } - return (0); -} - -/* - * _rdc_deconfigure - rdc is being deconfigured, shut down any - * dual copy operations and return to an unconfigured state. - */ -void -_rdc_deconfigure(void) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - - for (index = 0; index < rdc_max_sets; index++) { - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - krdc->remote_index = -1; - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - krdc->many_next = krdc; - - if (urdc->primary.addr.maxlen) - free_rdc_netbuf(&(urdc->primary.addr)); - - if (urdc->secondary.addr.maxlen) - free_rdc_netbuf(&(urdc->secondary.addr)); - - bzero(urdc, sizeof (rdc_u_info_t)); - urdc->index = index; - } - net_exit = ATM_EXIT; - rdc_clnt_destroy(); - -} - - -/* - * Lock primitives, containing checks that lock ordering isn't broken - */ -/*ARGSUSED*/ -void -rdc_many_enter(rdc_k_info_t *krdc) -{ - ASSERT(!MUTEX_HELD(&krdc->bmapmutex)); - - mutex_enter(&rdc_many_lock); -} - -/* ARGSUSED */ -void -rdc_many_exit(rdc_k_info_t *krdc) -{ - mutex_exit(&rdc_many_lock); -} - -void -rdc_group_enter(rdc_k_info_t *krdc) -{ - ASSERT(!MUTEX_HELD(&rdc_many_lock)); - ASSERT(!MUTEX_HELD(&rdc_conf_lock)); - ASSERT(!MUTEX_HELD(&krdc->bmapmutex)); - - mutex_enter(&krdc->group->lock); -} - -void -rdc_group_exit(rdc_k_info_t *krdc) -{ - mutex_exit(&krdc->group->lock); -} - -/* - * Suspend and disable operations use this function to wait until it is safe - * to do continue, without trashing data structures used by other ioctls. - */ -static void -wait_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - while (krdc->busy_count > 0) - cv_wait(&krdc->busycv, &rdc_conf_lock); -} - - -/* - * Other ioctls use this function to hold off disable and suspend. - */ -void -set_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - wait_busy(krdc); - - krdc->busy_count++; -} - - -/* - * Other ioctls use this function to allow disable and suspend to continue. - */ -void -wakeup_busy(rdc_k_info_t *krdc) -{ - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - if (krdc->busy_count <= 0) - return; - - krdc->busy_count--; - cv_broadcast(&krdc->busycv); -} - - -/* - * Remove the rdc set from its group, and destroy the group if no longer in - * use. - */ -static void -remove_from_group(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_group_t *group; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(krdc); - group = krdc->group; - - group->count--; - - /* - * lock queue while looking at thrnum - */ - mutex_enter(&group->ra_queue.net_qlock); - if ((group->rdc_thrnum == 0) && (group->count == 0)) { - - /* - * Assure the we've stopped and the flusher thread has not - * fallen back to sleep - */ - if (krdc->group->ra_queue.qfill_sleeping != RDC_QFILL_DEAD) { - group->ra_queue.qfflags |= RDC_QFILLSTOP; - while (krdc->group->ra_queue.qfflags & RDC_QFILLSTOP) { - if (krdc->group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&group->ra_queue.qfcv); - mutex_exit(&group->ra_queue.net_qlock); - delay(2); - mutex_enter(&group->ra_queue.net_qlock); - } - } - mutex_exit(&group->ra_queue.net_qlock); - - mutex_enter(&group->diskqmutex); - rdc_close_diskq(group); - mutex_exit(&group->diskqmutex); - rdc_delgroup(group); - rdc_many_exit(krdc); - krdc->group = NULL; - return; - } - mutex_exit(&group->ra_queue.net_qlock); - /* - * Always clear the group field. - * no, you need it set in rdc_flush_memq(). - * to call rdc_group_log() - * krdc->group = NULL; - */ - - /* Take this rdc structure off the group list */ - - for (p = krdc->group_next; p->group_next != krdc; p = p->group_next) - ; - p->group_next = krdc->group_next; - - rdc_many_exit(krdc); -} - - -/* - * Add the rdc set to its group, setting up a new group if it's the first one. - */ -static int -add_to_group(rdc_k_info_t *krdc, int options, int cmd) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - int index; - rdc_group_t *group; - int rc = 0; - nsthread_t *trc; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - /* - * Look for matching group name, primary host name and secondary - * host name. - */ - - rdc_many_enter(krdc); - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (urdc->group_name[0] == 0) - break; - - if (!IS_CONFIGURED(ktmp)) - continue; - - if (strncmp(utmp->group_name, urdc->group_name, - NSC_MAXPATH) != 0) - continue; - if (strncmp(utmp->primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE) != 0) { - /* Same group name, different primary interface */ - rdc_many_exit(krdc); - return (-1); - } - if (strncmp(utmp->secondary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) { - /* Same group name, different secondary interface */ - rdc_many_exit(krdc); - return (-1); - } - - /* Group already exists, so add this set to the group */ - - if (((options & RDC_OPT_ASYNC) == 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) != 0)) { - /* Must be same mode as existing group members */ - rdc_many_exit(krdc); - return (-1); - } - if (((options & RDC_OPT_ASYNC) != 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) == 0)) { - /* Must be same mode as existing group members */ - rdc_many_exit(krdc); - return (-1); - } - - /* cannont reconfigure existing group into new queue this way */ - if ((cmd != RDC_CMD_RESUME) && - !RDC_IS_DISKQ(ktmp->group) && urdc->disk_queue[0] != '\0') { - rdc_many_exit(krdc); - return (RDC_EQNOADD); - } - - ktmp->group->count++; - krdc->group = ktmp->group; - krdc->group_next = ktmp->group_next; - ktmp->group_next = krdc; - - urdc->autosync = utmp->autosync; /* Same as rest */ - - (void) strncpy(urdc->disk_queue, utmp->disk_queue, NSC_MAXPATH); - - rdc_many_exit(krdc); - return (0); - } - - /* This must be a new group */ - group = rdc_newgroup(); - krdc->group = group; - krdc->group_next = krdc; - urdc->autosync = -1; /* Unknown */ - - /* - * Tune the thread set by one for each thread created - */ - rdc_thread_tune(1); - - trc = nst_create(_rdc_ioset, rdc_qfiller_thr, (void *)krdc, NST_SLEEP); - if (trc == NULL) { - rc = -1; - cmn_err(CE_NOTE, "!unable to create queue filler daemon"); - goto fail; - } - - if (urdc->disk_queue[0] == '\0') { - krdc->group->flags |= RDC_MEMQUE; - } else { - krdc->group->flags |= RDC_DISKQUE; - - /* XXX check here for resume or enable and act accordingly */ - - if (cmd == RDC_CMD_RESUME) { - rc = rdc_resume_diskq(krdc); - - } else if (cmd == RDC_CMD_ENABLE) { - rc = rdc_enable_diskq(krdc); - if ((rc == RDC_EQNOADD) && (cmd != RDC_CMD_ENABLE)) { - cmn_err(CE_WARN, "!disk queue %s enable failed," - " enabling memory queue", - urdc->disk_queue); - krdc->group->flags &= ~RDC_DISKQUE; - krdc->group->flags |= RDC_MEMQUE; - bzero(urdc->disk_queue, NSC_MAXPATH); - } - } - } -fail: - rdc_many_exit(krdc); - return (rc); -} - - -/* - * Move the set to a new group if possible - */ -static int -change_group(rdc_k_info_t *krdc, int options) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - rdc_k_info_t *next; - char tmpq[NSC_MAXPATH]; - int index; - int rc = -1; - rdc_group_t *group, *old_group; - nsthread_t *trc; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - /* - * Look for matching group name, primary host name and secondary - * host name. - */ - - bzero(&tmpq, sizeof (tmpq)); - rdc_many_enter(krdc); - - old_group = krdc->group; - next = krdc->group_next; - - if (RDC_IS_DISKQ(old_group)) { /* can't keep your own queue */ - (void) strncpy(tmpq, urdc->disk_queue, NSC_MAXPATH); - bzero(urdc->disk_queue, sizeof (urdc->disk_queue)); - } - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (ktmp == krdc) - continue; - - if (urdc->group_name[0] == 0) - break; - - if (!IS_CONFIGURED(ktmp)) - continue; - - if (strncmp(utmp->group_name, urdc->group_name, - NSC_MAXPATH) != 0) - continue; - if (strncmp(utmp->primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE) != 0) - goto bad; - if (strncmp(utmp->secondary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) - goto bad; - - /* Group already exists, so add this set to the group */ - - if (((options & RDC_OPT_ASYNC) == 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) != 0)) { - /* Must be same mode as existing group members */ - goto bad; - } - if (((options & RDC_OPT_ASYNC) != 0) && - ((ktmp->type_flag & RDC_ASYNCMODE) == 0)) { - /* Must be same mode as existing group members */ - goto bad; - } - - ktmp->group->count++; - krdc->group = ktmp->group; - krdc->group_next = ktmp->group_next; - ktmp->group_next = krdc; - bzero(urdc->disk_queue, sizeof (urdc->disk_queue)); - (void) strncpy(urdc->disk_queue, utmp->disk_queue, NSC_MAXPATH); - - goto good; - } - - /* This must be a new group */ - group = rdc_newgroup(); - krdc->group = group; - krdc->group_next = krdc; - - trc = nst_create(_rdc_ioset, rdc_qfiller_thr, (void *)krdc, NST_SLEEP); - if (trc == NULL) { - rc = -1; - cmn_err(CE_NOTE, "!unable to create queue filler daemon"); - goto bad; - } - - if (urdc->disk_queue[0] == 0) { - krdc->group->flags |= RDC_MEMQUE; - } else { - krdc->group->flags |= RDC_DISKQUE; - if ((rc = rdc_enable_diskq(krdc)) < 0) - goto bad; - } -good: - if (options & RDC_OPT_ASYNC) { - krdc->type_flag |= RDC_ASYNCMODE; - rdc_set_flags(urdc, RDC_ASYNC); - } else { - krdc->type_flag &= ~RDC_ASYNCMODE; - rdc_clr_flags(urdc, RDC_ASYNC); - } - - old_group->count--; - if (!old_group->rdc_writer && old_group->count == 0) { - /* Group now empty, so destroy */ - if (RDC_IS_DISKQ(old_group)) { - rdc_unintercept_diskq(old_group); - mutex_enter(&old_group->diskqmutex); - rdc_close_diskq(old_group); - mutex_exit(&old_group->diskqmutex); - } - - mutex_enter(&old_group->ra_queue.net_qlock); - - /* - * Assure the we've stopped and the flusher thread has not - * fallen back to sleep - */ - if (old_group->ra_queue.qfill_sleeping != RDC_QFILL_DEAD) { - old_group->ra_queue.qfflags |= RDC_QFILLSTOP; - while (old_group->ra_queue.qfflags & RDC_QFILLSTOP) { - if (old_group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&old_group->ra_queue.qfcv); - mutex_exit(&old_group->ra_queue.net_qlock); - delay(2); - mutex_enter(&old_group->ra_queue.net_qlock); - } - } - mutex_exit(&old_group->ra_queue.net_qlock); - - rdc_delgroup(old_group); - rdc_many_exit(krdc); - return (0); - } - - /* Take this rdc structure off the old group list */ - - for (ktmp = next; ktmp->group_next != krdc; ktmp = ktmp->group_next) - ; - ktmp->group_next = next; - - rdc_many_exit(krdc); - return (0); - -bad: - /* Leave existing group status alone */ - (void) strncpy(urdc->disk_queue, tmpq, NSC_MAXPATH); - rdc_many_exit(krdc); - return (rc); -} - - -/* - * Set flags for an rdc set, setting the group flags as necessary. - */ -void -rdc_set_flags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - int vflags, sflags, bflags, ssflags; - - DTRACE_PROBE2(rdc_set_flags, int, krdc->index, int, flags); - vflags = flags & RDC_VFLAGS; - sflags = flags & RDC_SFLAGS; - bflags = flags & RDC_BFLAGS; - ssflags = flags & RDC_SYNC_STATE_FLAGS; - - if (vflags) { - /* normal volume flags */ - ASSERT(MUTEX_HELD(&rdc_conf_lock) || - MUTEX_HELD(&krdc->group->lock)); - if (ssflags) - mutex_enter(&krdc->bmapmutex); - - urdc->flags |= vflags; - - if (ssflags) - mutex_exit(&krdc->bmapmutex); - } - - if (sflags) { - /* Sync state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&rdc_many_lock)); - urdc->sync_flags |= sflags; - } - - if (bflags) { - /* Bmap state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - urdc->bmap_flags |= bflags; - } - -} - - -/* - * Clear flags for an rdc set, clearing the group flags as necessary. - */ -void -rdc_clr_flags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - int vflags, sflags, bflags; - - DTRACE_PROBE2(rdc_clr_flags, int, krdc->index, int, flags); - vflags = flags & RDC_VFLAGS; - sflags = flags & RDC_SFLAGS; - bflags = flags & RDC_BFLAGS; - - if (vflags) { - /* normal volume flags */ - ASSERT(MUTEX_HELD(&rdc_conf_lock) || - MUTEX_HELD(&krdc->group->lock)); - urdc->flags &= ~vflags; - - } - - if (sflags) { - /* Sync state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&rdc_many_lock)); - urdc->sync_flags &= ~sflags; - } - - if (bflags) { - /* Bmap state flags that are protected by a different lock */ - ASSERT(MUTEX_HELD(&krdc->bmapmutex)); - urdc->bmap_flags &= ~bflags; - } -} - - -/* - * Get the flags for an rdc set. - */ -int -rdc_get_vflags(rdc_u_info_t *urdc) -{ - return (urdc->flags | urdc->sync_flags | urdc->bmap_flags); -} - - -/* - * Initialise flags for an rdc set. - */ -static void -rdc_init_flags(rdc_u_info_t *urdc) -{ - urdc->flags = 0; - urdc->mflags = 0; - urdc->sync_flags = 0; - urdc->bmap_flags = 0; -} - - -/* - * Set flags for a many group. - */ -void -rdc_set_mflags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *this = krdc; - - ASSERT(!(flags & ~RDC_MFLAGS)); - - if (flags == 0) - return; - - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - rdc_set_flags(urdc, flags); /* set flags on local urdc */ - - urdc->mflags |= flags; - for (krdc = krdc->many_next; krdc != this; krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - urdc->mflags |= flags; - } -} - - -/* - * Clear flags for a many group. - */ -void -rdc_clr_mflags(rdc_u_info_t *urdc, int flags) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *this = krdc; - rdc_u_info_t *utmp; - - ASSERT(!(flags & ~RDC_MFLAGS)); - - if (flags == 0) - return; - - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - rdc_clr_flags(urdc, flags); /* clear flags on local urdc */ - - /* - * We must maintain the mflags based on the set of flags for - * all the urdc's that are chained up. - */ - - /* - * First look through all the urdc's and remove bits from - * the 'flags' variable that are in use elsewhere. - */ - - for (krdc = krdc->many_next; krdc != this; krdc = krdc->many_next) { - utmp = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(utmp)) - continue; - flags &= ~(rdc_get_vflags(utmp) & RDC_MFLAGS); - if (flags == 0) - break; - } - - /* - * Now clear flags as necessary. - */ - - if (flags != 0) { - urdc->mflags &= ~flags; - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - utmp = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(utmp)) - continue; - utmp->mflags &= ~flags; - } - } -} - - -int -rdc_get_mflags(rdc_u_info_t *urdc) -{ - return (urdc->mflags); -} - - -void -rdc_set_flags_log(rdc_u_info_t *urdc, int flags, char *why) -{ - DTRACE_PROBE2(rdc_set_flags_log, int, urdc->index, int, flags); - - rdc_set_flags(urdc, flags); - - if (why == NULL) - return; - - if (flags & RDC_LOGGING) - cmn_err(CE_NOTE, "!sndr: %s:%s entered logging mode: %s", - urdc->secondary.intf, urdc->secondary.file, why); - if (flags & RDC_VOL_FAILED) - cmn_err(CE_NOTE, "!sndr: %s:%s volume failed: %s", - urdc->secondary.intf, urdc->secondary.file, why); - if (flags & RDC_BMP_FAILED) - cmn_err(CE_NOTE, "!sndr: %s:%s bitmap failed: %s", - urdc->secondary.intf, urdc->secondary.file, why); -} -/* - * rdc_lor(source, dest, len) - * logically OR memory pointed to by source and dest, copying result into dest. - */ -void -rdc_lor(const uchar_t *source, uchar_t *dest, int len) -{ - int i; - - if (source == NULL) - return; - - for (i = 0; i < len; i++) - *dest++ |= *source++; -} - - -static int -check_filesize(int index, spcs_s_info_t kstatus) -{ - uint64_t remote_size; - char tmp1[16], tmp2[16]; - rdc_u_info_t *urdc = &rdc_u_info[index]; - int status; - - status = rdc_net_getsize(index, &remote_size); - if (status) { - (void) spcs_s_inttostring(status, tmp1, sizeof (tmp1), 0); - spcs_s_add(kstatus, RDC_EGETSIZE, urdc->secondary.intf, - urdc->secondary.file, tmp1); - (void) rdc_net_state(index, CCIO_ENABLELOG); - return (RDC_EGETSIZE); - } - if (remote_size < (unsigned long long)urdc->volume_size) { - (void) spcs_s_inttostring( - urdc->volume_size, tmp1, sizeof (tmp1), 0); - /* - * Cheat, and covert to int, until we have - * spcs_s_unsignedlonginttostring(). - */ - status = (int)remote_size; - (void) spcs_s_inttostring(status, tmp2, sizeof (tmp2), 0); - spcs_s_add(kstatus, RDC_ESIZE, urdc->primary.intf, - urdc->primary.file, tmp1, urdc->secondary.intf, - urdc->secondary.file, tmp2); - (void) rdc_net_state(index, CCIO_ENABLELOG); - return (RDC_ESIZE); - } - return (0); -} - - -static void -rdc_volume_update_svc(intptr_t arg) -{ - rdc_update_t *update = (rdc_update_t *)arg; - rdc_k_info_t *krdc; - rdc_k_info_t *this; - rdc_u_info_t *urdc; - struct net_bdata6 bd; - int index; - int rc; - -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR received update request for %s", - update->volume); -#endif - - if ((update->protocol != RDC_SVC_ONRETURN) && - (update->protocol != RDC_SVC_VOL_ENABLED)) { - /* don't understand what the client intends to do */ - update->denied = 1; - spcs_s_add(update->status, RDC_EVERSION); - return; - } - - index = rdc_lookup_enabled(update->volume, 0); - if (index < 0) - return; - - /* - * warn II that this volume is in use by sndr so - * II can validate the sizes of the master vs shadow - * and avoid trouble later down the line with - * size mis-matches between urdc->volume_size and - * what is returned from nsc_partsize() which may - * be the size of the master when replicating the shadow - */ - if (update->protocol == RDC_SVC_VOL_ENABLED) { - if (index >= 0) - update->denied = 1; - return; - } - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - this = krdc; - - do { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR refused update request for %s", - update->volume); -#endif - update->denied = 1; - spcs_s_add(update->status, RDC_EMIRRORUP); - return; - } - /* 1->many - all must be logging */ - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - urdc = &rdc_u_info[krdc->index]; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - -#ifdef DEBUG_IIUPDATE - cmn_err(CE_NOTE, "!SNDR allowed update request for %s", update->volume); -#endif - urdc = &rdc_u_info[krdc->index]; - do { - - bd.size = min(krdc->bitmap_size, (nsc_size_t)update->size); - bd.data.data_val = (char *)update->bitmap; - bd.offset = 0; - bd.cd = index; - - if ((rc = RDC_OR_BITMAP(&bd)) != 0) { - update->denied = 1; - spcs_s_add(update->status, rc); - return; - } - urdc = &rdc_u_info[index]; - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) { - rdc_many_enter(krdc); - for (krdc = krdc->many_next; krdc != this; - krdc = krdc->many_next) { - index = krdc->index; - if (!IS_ENABLED(urdc)) - continue; - break; - } - rdc_many_exit(krdc); - } - } while (krdc != this); - - - /* II (or something else) has updated us, so no need for a sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED)) { - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } - - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); -} - - -/* - * rdc_check() - * - * Return 0 if the set is configured, enabled and the supplied - * addressing information matches the in-kernel config, otherwise - * return 1. - */ -static int -rdc_check(rdc_k_info_t *krdc, rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - - if (!IS_ENABLED(urdc)) - return (1); - - if (strncmp(urdc->primary.file, rdc_set->primary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: primary file mismatch %s vs %s", - urdc->primary.file, rdc_set->primary.file); -#endif - return (1); - } - - if (rdc_set->primary.addr.len != 0 && - bcmp(urdc->primary.addr.buf, rdc_set->primary.addr.buf, - urdc->primary.addr.len) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: primary address mismatch for %s", - urdc->primary.file); -#endif - return (1); - } - - if (strncmp(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: secondary file mismatch %s vs %s", - urdc->secondary.file, rdc_set->secondary.file); -#endif - return (1); - } - - if (rdc_set->secondary.addr.len != 0 && - bcmp(urdc->secondary.addr.buf, rdc_set->secondary.addr.buf, - urdc->secondary.addr.len) != 0) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_check: secondary addr mismatch for %s", - urdc->secondary.file); -#endif - return (1); - } - - return (0); -} - - -/* - * Lookup enabled sets for a bitmap match - */ - -int -rdc_lookup_bitmap(char *pathname) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.bitmap, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.bitmap, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -/* - * Translate a pathname to index into rdc_k_info[]. - * Returns first match that is enabled. - */ - -int -rdc_lookup_enabled(char *pathname, int allow_disabling) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - -restart: - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (allow_disabling == 0 && krdc->type_flag & RDC_UNREGISTER) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.file, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - if (allow_disabling == 0) { - /* None found, or only a disabling one found, so try again */ - allow_disabling = 1; - goto restart; - } - - return (-1); -} - - -/* - * Translate a pathname to index into rdc_k_info[]. - * Returns first match that is configured. - * - * Used by enable & resume code. - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_configured(char *pathname) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - if (strncmp(pathname, urdc->primary.file, - NSC_MAXPATH) == 0) - return (index); - } else { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -/* - * Looks up a configured set with matching secondary interface:volume - * to check for illegal many-to-one volume configs. To be used during - * enable and resume processing. - * - * Must be called with rdc_conf_lock held. - */ - -static int -rdc_lookup_many2one(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->secondary.file, - rdc_set->secondary.file, NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, - rdc_set->secondary.intf, MAX_RDC_HOST_SIZE) != 0) - continue; - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Looks up an rdc set to check if it is already configured, to be used from - * functions called from the config ioctl where the interface names can be - * used for comparison. - * - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_byname(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->primary.file, rdc_set->primary.file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - if (strncmp(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - -/* - * Looks up a secondary hostname and device, to be used from - * functions called from the config ioctl where the interface names can be - * used for comparison. - * - * Must be called with rdc_conf_lock held. - */ - -int -rdc_lookup_byhostdev(char *intf, char *file) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (strncmp(urdc->secondary.file, file, - NSC_MAXPATH) != 0) - continue; - if (strncmp(urdc->secondary.intf, intf, - MAX_RDC_HOST_SIZE) != 0) - continue; - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Looks up an rdc set to see if it is currently enabled, to be used on the - * server so that the interface addresses must be used for comparison, as - * the interface names may differ from those used on the client. - * - */ - -int -rdc_lookup_byaddr(rdc_set_t *rdc_set) -{ - rdc_u_info_t *urdc; -#ifdef DEBUG - rdc_k_info_t *krdc; -#endif - int index; - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; -#ifdef DEBUG - krdc = &rdc_k_info[index]; -#endif - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_ENABLED(urdc)) - continue; - - if (strcmp(urdc->primary.file, rdc_set->primary.file) != 0) - continue; - - if (strcmp(urdc->secondary.file, rdc_set->secondary.file) != 0) - continue; - - if (bcmp(urdc->primary.addr.buf, rdc_set->primary.addr.buf, - urdc->primary.addr.len) != 0) { - continue; - } - - if (bcmp(urdc->secondary.addr.buf, rdc_set->secondary.addr.buf, - urdc->secondary.addr.len) != 0) { - continue; - } - - break; - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - - -/* - * Return index of first multihop or 1-to-many - * Behavior controlled by setting ismany. - * ismany TRUE (one-to-many) - * ismany FALSE (multihops) - * - */ -static int -rdc_lookup_multimany(rdc_k_info_t *krdc, const int ismany) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_u_info_t *utmp; - rdc_k_info_t *ktmp; - char *pathname; - int index; - int role; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* this host is the primary of the krdc set */ - pathname = urdc->primary.file; - if (ismany) { - /* - * 1-many sets are linked by primary : - * look for matching primary on this host - */ - role = RDC_PRIMARY; - } else { - /* - * multihop sets link primary to secondary : - * look for matching secondary on this host - */ - role = 0; - } - } else { - /* this host is the secondary of the krdc set */ - pathname = urdc->secondary.file; - if (ismany) { - /* - * 1-many sets are linked by primary, so if - * this host is the secondary of the set this - * cannot require 1-many linkage. - */ - return (-1); - } else { - /* - * multihop sets link primary to secondary : - * look for matching primary on this host - */ - role = RDC_PRIMARY; - } - } - - for (index = 0; index < rdc_max_sets; index++) { - utmp = &rdc_u_info[index]; - ktmp = &rdc_k_info[index]; - - if (!IS_CONFIGURED(ktmp)) { - continue; - } - - if (role == RDC_PRIMARY) { - /* - * Find a primary that is this host and is not - * krdc but shares the same data volume as krdc. - */ - if ((rdc_get_vflags(utmp) & RDC_PRIMARY) && - strncmp(utmp->primary.file, pathname, - NSC_MAXPATH) == 0 && (krdc != ktmp)) { - break; - } - } else { - /* - * Find a secondary that is this host and is not - * krdc but shares the same data volume as krdc. - */ - if (!(rdc_get_vflags(utmp) & RDC_PRIMARY) && - strncmp(utmp->secondary.file, pathname, - NSC_MAXPATH) == 0 && (krdc != ktmp)) { - break; - } - } - } - - if (index < rdc_max_sets) - return (index); - else - return (-1); -} - -/* - * Returns secondary match that is configured. - * - * Used by enable & resume code. - * Must be called with rdc_conf_lock held. - */ - -static int -rdc_lookup_secondary(char *pathname) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - int index; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - for (index = 0; index < rdc_max_sets; index++) { - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - ASSERT(krdc->index == index); - ASSERT(urdc->index == index); - - if (!IS_CONFIGURED(krdc)) - continue; - - if (!IS_STATE(urdc, RDC_PRIMARY)) { - if (strncmp(pathname, urdc->secondary.file, - NSC_MAXPATH) == 0) - return (index); - } - } - - return (-1); -} - - -static nsc_fd_t * -rdc_open_direct(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - if (krdc->remote_fd == NULL) - krdc->remote_fd = nsc_open(urdc->direct_file, - NSC_RDCHR_ID|NSC_DEVICE|NSC_RDWR, 0, 0, &rc); - return (krdc->remote_fd); -} - -static void -rdc_close_direct(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - urdc->direct_file[0] = 0; - if (krdc->remote_fd) { - if (nsc_close(krdc->remote_fd) == 0) { - krdc->remote_fd = NULL; - } - } -} - - -#ifdef DEBUG_MANY -static void -print_many(rdc_k_info_t *start) -{ - rdc_k_info_t *p = start; - rdc_u_info_t *q = &rdc_u_info[p->index]; - - do { - cmn_err(CE_CONT, "!krdc %p, %s %s (many_nxt %p multi_nxt %p)\n", - p, q->primary.file, q->secondary.file, p->many_next, - p->multi_next); - delay(10); - p = p->many_next; - q = &rdc_u_info[p->index]; - } while (p && p != start); -} -#endif /* DEBUG_MANY */ - - -static int -add_to_multi(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - int mindex; - int domulti; - - urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - /* Now find companion krdc */ - mindex = rdc_lookup_multimany(krdc, FALSE); - -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, - "!add_to_multi: lookup_multimany: mindex %d prim %s sec %s", - mindex, urdc->primary.file, urdc->secondary.file); -#endif - - if (mindex >= 0) { - ktmp = &rdc_k_info[mindex]; - utmp = &rdc_u_info[mindex]; - - domulti = 1; - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ktmp->multi_next != NULL) { - /* - * We are adding a new primary to a many - * group that is the target of a multihop, just - * ignore it since we are linked in elsewhere. - */ - domulti = 0; - } - - if (domulti) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - /* Is previous leg using direct file I/O? */ - if (utmp->direct_file[0] != 0) { - /* It is, so cannot proceed */ - return (-1); - } - } else { - /* Is this leg using direct file I/O? */ - if (urdc->direct_file[0] != 0) { - /* It is, so cannot proceed */ - return (-1); - } - } - krdc->multi_next = ktmp; - ktmp->multi_next = krdc; - } - } else { - krdc->multi_next = NULL; -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, "!add_to_multi: NULL multi_next index %d", - krdc->index); -#endif - } - - return (0); -} - - -/* - * Add a new set to the circular list of 1-to-many primaries and chain - * up any multihop as well. - */ -static int -add_to_many(rdc_k_info_t *krdc) -{ - rdc_k_info_t *okrdc; - int oindex; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(krdc); - - if (add_to_multi(krdc) < 0) { - rdc_many_exit(krdc); - return (-1); - } - - oindex = rdc_lookup_multimany(krdc, TRUE); - if (oindex < 0) { -#ifdef DEBUG_MANY - print_many(krdc); -#endif - rdc_many_exit(krdc); - return (0); - } - - okrdc = &rdc_k_info[oindex]; - -#ifdef DEBUG_MANY - print_many(okrdc); -#endif - krdc->many_next = okrdc->many_next; - okrdc->many_next = krdc; - -#ifdef DEBUG_MANY - print_many(okrdc); -#endif - rdc_many_exit(krdc); - return (0); -} - - -/* - * Remove a set from the circular list of 1-to-many primaries. - */ -static void -remove_from_many(rdc_k_info_t *old) -{ - rdc_u_info_t *uold = &rdc_u_info[old->index]; - rdc_k_info_t *p, *q; - - ASSERT(MUTEX_HELD(&rdc_conf_lock)); - - rdc_many_enter(old); - -#ifdef DEBUG_MANY - cmn_err(CE_NOTE, "!rdc: before remove_from_many"); - print_many(old); -#endif - - if (old->many_next == old) { - /* remove from multihop */ - if ((q = old->multi_next) != NULL) { - ASSERT(q->multi_next == old); - q->multi_next = NULL; - old->multi_next = NULL; - } - - rdc_many_exit(old); - return; - } - - /* search */ - for (p = old->many_next; p->many_next != old; p = p->many_next) - ; - - p->many_next = old->many_next; - old->many_next = old; - - if ((q = old->multi_next) != NULL) { - /* - * old was part of a multihop, so switch multi pointers - * to someone remaining on the many chain - */ - ASSERT(p->multi_next == NULL); - - q->multi_next = p; - p->multi_next = q; - old->multi_next = NULL; - } - -#ifdef DEBUG_MANY - if (p == old) { - cmn_err(CE_NOTE, "!rdc: after remove_from_many empty"); - } else { - cmn_err(CE_NOTE, "!rdc: after remove_from_many"); - print_many(p); - } -#endif - - rdc_clr_mflags(&rdc_u_info[p->index], - (rdc_get_vflags(uold) & RDC_MFLAGS)); - - rdc_many_exit(old); -} - - -static int -_rdc_enable(rdc_set_t *rdc_set, int options, spcs_s_info_t kstatus) -{ - int index; - char *rhost; - struct netbuf *addrp; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_srv_t *svp = NULL; - char *local_file; - char *local_bitmap; - char *diskq; - int rc; - nsc_size_t maxfbas; - rdc_group_t *grp; - - if ((rdc_set->primary.intf[0] == 0) || - (rdc_set->primary.addr.len == 0) || - (rdc_set->primary.file[0] == 0) || - (rdc_set->primary.bitmap[0] == 0) || - (rdc_set->secondary.intf[0] == 0) || - (rdc_set->secondary.addr.len == 0) || - (rdc_set->secondary.file[0] == 0) || - (rdc_set->secondary.bitmap[0] == 0)) { - spcs_s_add(kstatus, RDC_EEMPTY); - return (RDC_EEMPTY); - } - - /* Next check there aren't any enabled rdc sets which match. */ - - mutex_enter(&rdc_conf_lock); - - if (rdc_lookup_byname(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EENABLED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EENABLED); - } - - if (rdc_lookup_many2one(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMANY2ONE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EMANY2ONE); - } - - if (rdc_set->netconfig->knc_proto == NULL) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETCONFIG); - return (RDC_ENETCONFIG); - } - - if (rdc_set->primary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->primary.file); - return (RDC_ENETBUF); - } - - if (rdc_set->secondary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->secondary.file); - return (RDC_ENETBUF); - } - - /* Check that the local data volume isn't in use as a bitmap */ - if (options & RDC_OPT_PRIMARY) - local_file = rdc_set->primary.file; - else - local_file = rdc_set->secondary.file; - if (rdc_lookup_bitmap(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - - /* check that the secondary data volume isn't in use */ - if (!(options & RDC_OPT_PRIMARY)) { - local_file = rdc_set->secondary.file; - if (rdc_lookup_secondary(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - } - - /* check that the local data vol is not in use as a diskqueue */ - if (options & RDC_OPT_PRIMARY) { - if (rdc_lookup_diskq(rdc_set->primary.file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, - RDC_EVOLINUSE, rdc_set->primary.file); - return (RDC_EVOLINUSE); - } - } - - /* Check that the bitmap isn't in use as a data volume */ - if (options & RDC_OPT_PRIMARY) - local_bitmap = rdc_set->primary.bitmap; - else - local_bitmap = rdc_set->secondary.bitmap; - if (rdc_lookup_configured(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Check that the bitmap isn't already in use as a bitmap */ - if (rdc_lookup_bitmap(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* check that the diskq (if here) is not in use */ - diskq = rdc_set->disk_queue; - if (diskq[0] && rdc_diskq_inuse(rdc_set, diskq)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EDISKQINUSE, diskq); - return (RDC_EDISKQINUSE); - } - - - /* Set urdc->volume_size */ - index = rdc_dev_open(rdc_set, options); - if (index < 0) { - mutex_exit(&rdc_conf_lock); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->primary.intf, - rdc_set->primary.file); - else - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EOPEN); - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - /* copy relevant parts of rdc_set to urdc field by field */ - - (void) strncpy(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE); - - (void) strncpy(urdc->group_name, rdc_set->group_name, NSC_MAXPATH); - (void) strncpy(urdc->disk_queue, rdc_set->disk_queue, NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->primary.addr, &urdc->primary.addr); - (void) strncpy(urdc->primary.file, rdc_set->primary.file, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, rdc_set->primary.bitmap, - NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->secondary.addr, &urdc->secondary.addr); - (void) strncpy(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set->secondary.bitmap, - NSC_MAXPATH); - - urdc->setid = rdc_set->setid; - - /* - * before we try to add to group, or create one, check out - * if we are doing the wrong thing with the diskq - */ - - if (urdc->disk_queue[0] && (options & RDC_OPT_SYNC)) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EQWRONGMODE); - return (RDC_EQWRONGMODE); - } - - if ((rc = add_to_group(krdc, options, RDC_CMD_ENABLE)) != 0) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - if (rc == RDC_EQNOADD) { - spcs_s_add(kstatus, RDC_EQNOADD, rdc_set->disk_queue); - return (RDC_EQNOADD); - } else { - spcs_s_add(kstatus, RDC_EGROUP, - rdc_set->primary.intf, rdc_set->primary.file, - rdc_set->secondary.intf, rdc_set->secondary.file, - rdc_set->group_name); - return (RDC_EGROUP); - } - } - - /* - * maxfbas was set in rdc_dev_open as primary's maxfbas. - * If diskq's maxfbas is smaller, then use diskq's. - */ - grp = krdc->group; - if (grp && RDC_IS_DISKQ(grp) && (grp->diskqfd != 0)) { - rc = _rdc_rsrv_diskq(grp); - if (RDC_SUCCESS(rc)) { - rc = nsc_maxfbas(grp->diskqfd, 0, &maxfbas); - if (rc == 0) { -#ifdef DEBUG - if (krdc->maxfbas != maxfbas) - cmn_err(CE_NOTE, - "!_rdc_enable: diskq maxfbas = %" - NSC_SZFMT ", primary maxfbas = %" - NSC_SZFMT, maxfbas, krdc->maxfbas); -#endif - krdc->maxfbas = min(krdc->maxfbas, maxfbas); - } else { - cmn_err(CE_WARN, - "!_rdc_enable: diskq maxfbas failed (%d)", - rc); - } - _rdc_rlse_diskq(grp); - } else { - cmn_err(CE_WARN, - "!_rdc_enable: diskq reserve failed (%d)", rc); - } - } - - rdc_init_flags(urdc); - (void) strncpy(urdc->direct_file, rdc_set->direct_file, NSC_MAXPATH); - if ((options & RDC_OPT_PRIMARY) && rdc_set->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - } - - krdc->many_next = krdc; - - ASSERT(krdc->type_flag == 0); - krdc->type_flag = RDC_CONFIGURED; - - if (options & RDC_OPT_PRIMARY) - rdc_set_flags(urdc, RDC_PRIMARY); - - if (options & RDC_OPT_ASYNC) - krdc->type_flag |= RDC_ASYNCMODE; - - set_busy(krdc); - urdc->syshostid = rdc_set->syshostid; - - if (add_to_many(krdc) < 0) { - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - spcs_s_add(kstatus, RDC_EMULTI); - rc = RDC_EMULTI; - goto fail; - } - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * The rdc set is configured but not yet enabled. Other operations must - * ignore this set until it is enabled. - */ - - urdc->sync_pos = 0; - - if (rdc_set->maxqfbas > 0) - urdc->maxqfbas = rdc_set->maxqfbas; - else - urdc->maxqfbas = rdc_maxthres_queue; - - if (rdc_set->maxqitems > 0) - urdc->maxqitems = rdc_set->maxqitems; - else - urdc->maxqitems = rdc_max_qitems; - - if (rdc_set->asyncthr > 0) - urdc->asyncthr = rdc_set->asyncthr; - else - urdc->asyncthr = rdc_asyncthr; - - if (urdc->autosync == -1) { - /* Still unknown */ - if (rdc_set->autosync > 0) - urdc->autosync = 1; - else - urdc->autosync = 0; - } - - urdc->netconfig = rdc_set->netconfig; - - if (options & RDC_OPT_PRIMARY) { - rhost = rdc_set->secondary.intf; - addrp = &rdc_set->secondary.addr; - } else { - rhost = rdc_set->primary.intf; - addrp = &rdc_set->primary.addr; - } - - if (options & RDC_OPT_ASYNC) - rdc_set_flags(urdc, RDC_ASYNC); - - svp = rdc_create_svinfo(rhost, addrp, urdc->netconfig); - if (svp == NULL) { - spcs_s_add(kstatus, ENOMEM); - rc = ENOMEM; - goto fail; - } - urdc->netconfig = NULL; /* This will be no good soon */ - - rdc_kstat_create(index); - - /* Don't set krdc->intf here */ - - if (rdc_enable_bitmap(krdc, options & RDC_OPT_SETBMP) < 0) - goto bmpfail; - - RDC_ZERO_BITREF(krdc); - if (krdc->lsrv == NULL) - krdc->lsrv = svp; - else { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_enable: krdc->lsrv already set: %p", - (void *) krdc->lsrv); -#endif - rdc_destroy_svinfo(svp); - } - svp = NULL; - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* And finally */ - - krdc->remote_index = -1; - /* Should we set the whole group logging? */ - rdc_set_flags(urdc, RDC_ENABLED | RDC_LOGGING); - - rdc_group_exit(krdc); - - if (rdc_intercept(krdc) != 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ENABLED); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EREGISTER, urdc->primary.file); - else - spcs_s_add(kstatus, RDC_EREGISTER, - urdc->secondary.file); -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_register_path failed %s", - urdc->primary.file); -#endif - rc = RDC_EREGISTER; - goto bmpfail; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: enabled %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - rdc_write_state(urdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (0); - -bmpfail: - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EBITMAP, rdc_set->primary.bitmap); - else - spcs_s_add(kstatus, RDC_EBITMAP, rdc_set->secondary.bitmap); - rc = RDC_EBITMAP; - if (rdc_get_vflags(urdc) & RDC_ENABLED) { - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - rdc_group_enter(krdc); - } - -fail: - rdc_kstat_delete(index); - rdc_group_exit(krdc); - if (krdc->intf) { - rdc_if_t *ip = krdc->intf; - mutex_enter(&rdc_conf_lock); - krdc->intf = NULL; - rdc_remove_from_if(ip); - mutex_exit(&rdc_conf_lock); - } - rdc_group_enter(krdc); - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - rdc_destroy_svinfo(svp); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - remove_from_group(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - rdc_u_init(urdc); - - ASSERT(krdc->type_flag & RDC_CONFIGURED); - krdc->type_flag = 0; - wakeup_busy(krdc); - - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_enable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - int rc; - char itmp[10]; - - if (!(uparms->options & RDC_OPT_SYNC) && - !(uparms->options & RDC_OPT_ASYNC)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - if (!(uparms->options & RDC_OPT_PRIMARY) && - !(uparms->options & RDC_OPT_SECONDARY)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - if (!(uparms->options & RDC_OPT_SETBMP) && - !(uparms->options & RDC_OPT_CLRBMP)) { - rc = RDC_EEINVAL; - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - goto done; - } - - rc = _rdc_enable(uparms->rdc_set, uparms->options, kstatus); -done: - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_disable(rdc_k_info_t *krdc, rdc_config_t *uap, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_if_t *ip; - int index = krdc->index; - disk_queue *q; - rdc_set_t *rdc_set = uap->rdc_set; - - ASSERT(krdc->group != NULL); - rdc_group_enter(krdc); -#ifdef DEBUG - ASSERT(rdc_check(krdc, rdc_set) == 0); -#else - if (((uap->options & RDC_OPT_FORCE_DISABLE) == 0) && - rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } -#endif - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - ASSERT(IS_ENABLED(urdc)); - } - q = &krdc->group->diskq; - - if (IS_ASYNC(urdc) && RDC_IS_DISKQ(krdc->group) && - ((!IS_STATE(urdc, RDC_LOGGING)) && (!QEMPTY(q)))) { - krdc->type_flag &= ~RDC_DISABLEPEND; - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EQNOTEMPTY, urdc->disk_queue); - return (RDC_EQNOTEMPTY); - } - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: disabled %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * No new io can come in through the io provider. - * Wait for the async flusher to finish. - */ - - if (IS_ASYNC(urdc) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 2; /* in case of hopelessly stuck flusher threads */ -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - - } while (krdc->group->rdc_writer && tries--); - - /* ok, force it to happen... */ - if (rdc_drain_queue(krdc->index) != 0) { - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!SNDR: async I/O pending and not flushed " - "for %s during disable", - urdc->primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&rdc_conf_lock); - ip = krdc->intf; - krdc->intf = 0; - - if (ip) { - rdc_remove_from_if(ip); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* Must not hold group lock during this function */ - rdc_group_exit(krdc); - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - rdc_group_enter(krdc); - - (void) rdc_clear_state(krdc); - - rdc_free_bitmap(krdc, RDC_CMD_DISABLE); - rdc_close_bitmap(krdc); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - /* - * we should now unregister the queue, with no conflicting - * locks held. This is the last(only) member of the group - */ - if (krdc->group && RDC_IS_DISKQ(krdc->group) && - krdc->group->count == 1) { /* stop protecting queue */ - rdc_unintercept_diskq(krdc->group); - } - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - wait_busy(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - remove_from_group(krdc); - - krdc->remote_index = -1; - ASSERT(krdc->type_flag & RDC_CONFIGURED); - ASSERT(krdc->type_flag & RDC_DISABLEPEND); - krdc->type_flag = 0; -#ifdef DEBUG - if (krdc->dcio_bitmap) - cmn_err(CE_WARN, "!_rdc_disable: possible mem leak, " - "dcio_bitmap"); -#endif - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->maxfbas = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - rdc_destroy_svinfo(krdc->lsrv); - krdc->lsrv = NULL; - krdc->multi_next = NULL; - - rdc_u_init(urdc); - - mutex_exit(&rdc_conf_lock); - rdc_kstat_delete(index); - - return (0); -} - -static int -rdc_disable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int index; - int rc; - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - krdc->type_flag |= RDC_DISABLEPEND; - wait_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_disable(krdc, uparms, kstatus); - return (rc); -} - - -/* - * Checks whether the state of one of the other sets in the 1-many or - * multi-hop config should prevent a sync from starting on this one. - * Return NULL if no just cause or impediment is found, otherwise return - * a pointer to the offending set. - */ -static rdc_u_info_t * -rdc_allow_pri_sync(rdc_u_info_t *urdc, int options) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - rdc_k_info_t *kmulti = NULL; - - ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY); - - rdc_many_enter(krdc); - - /* - * In the reverse sync case we need to check the previous leg of - * the multi-hop config. The link to that set can be from any of - * the 1-many list, so as we go through we keep an eye open for it. - */ - if ((options & RDC_OPT_REVERSE) && (IS_MULTI(krdc))) { - /* This set links to the first leg */ - ktmp = krdc->multi_next; - utmp = &rdc_u_info[ktmp->index]; - if (IS_ENABLED(utmp)) - kmulti = ktmp; - } - - if (IS_MANY(krdc)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - - if (!IS_ENABLED(utmp)) - continue; - - if (options & RDC_OPT_FORWARD) { - /* - * Reverse sync needed is bad, as it means a - * reverse sync in progress or started and - * didn't complete, so this primary volume - * is not consistent. So we shouldn't copy - * it to its secondary. - */ - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) { - rdc_many_exit(krdc); - return (utmp); - } - } else { - /* Reverse, so see if we need to spot kmulti */ - if ((kmulti == NULL) && (IS_MULTI(ktmp))) { - /* This set links to the first leg */ - kmulti = ktmp->multi_next; - if (!IS_ENABLED( - &rdc_u_info[kmulti->index])) - kmulti = NULL; - } - - /* - * Non-logging is bad, as the bitmap will - * be updated with the bits for this sync. - */ - if (!(rdc_get_vflags(utmp) & RDC_LOGGING)) { - rdc_many_exit(krdc); - return (utmp); - } - } - } - } - - if (kmulti) { - utmp = &rdc_u_info[kmulti->index]; - ktmp = kmulti; /* In case we decide we do need to use ktmp */ - - ASSERT(options & RDC_OPT_REVERSE); - - if (IS_REPLICATING(utmp)) { - /* - * Replicating is bad as data is already flowing to - * the target of the requested sync operation. - */ - rdc_many_exit(krdc); - return (utmp); - } - - if (rdc_get_vflags(utmp) & RDC_SYNCING) { - /* - * Forward sync in progress is bad, as data is - * already flowing to the target of the requested - * sync operation. - * Reverse sync in progress is bad, as the primary - * has already decided which data to copy. - */ - rdc_many_exit(krdc); - return (utmp); - } - - /* - * Clear the "sync needed" flags, as the multi-hop secondary - * will be updated via this requested sync operation, so does - * not need to complete its aborted forward sync. - */ - if (rdc_get_vflags(utmp) & RDC_SYNC_NEEDED) - rdc_clr_flags(utmp, RDC_SYNC_NEEDED); - } - - if (IS_MANY(krdc) && (options & RDC_OPT_REVERSE)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - if (!IS_ENABLED(utmp)) - continue; - - /* - * Clear any "reverse sync needed" flags, as the - * volume will be updated via this requested - * sync operation, so does not need to complete - * its aborted reverse sync. - */ - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) - rdc_clr_mflags(utmp, RDC_RSYNC_NEEDED); - } - } - - rdc_many_exit(krdc); - - return (NULL); -} - -static void -_rdc_sync_wrthr(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - nsc_buf_t *handle = NULL; - rdc_k_info_t *krdc = syncinfo->krdc; - int rc; - int tries = 0; - - DTRACE_PROBE2(rdc_sync_loop_netwrite_start, int, krdc->index, - nsc_buf_t *, handle); - -retry: - rc = nsc_alloc_buf(RDC_U_FD(krdc), syncinfo->offset, syncinfo->len, - NSC_READ | NSC_NOCACHE, &handle); - - if (!RDC_SUCCESS(rc) || krdc->remote_index < 0) { - DTRACE_PROBE(rdc_sync_wrthr_alloc_buf_err); - goto failed; - } - - rdc_group_enter(krdc); - if ((krdc->disk_status == 1) || (krdc->dcio_bitmap == NULL)) { - rdc_group_exit(krdc); - goto failed; - } - rdc_group_exit(krdc); - - if ((rc = rdc_net_write(krdc->index, krdc->remote_index, handle, - handle->sb_pos, handle->sb_len, RDC_NOSEQ, RDC_NOQUE, NULL)) > 0) { - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - /* - * The following is to handle - * the case where the secondary side - * has thrown our buffer handle token away in a - * attempt to preserve its health on restart - */ - if ((rc == EPROTO) && (tries < 3)) { - (void) nsc_free_buf(handle); - handle = NULL; - tries++; - delay(HZ >> 2); - goto retry; - } - - DTRACE_PROBE(rdc_sync_wrthr_remote_write_err); - cmn_err(CE_WARN, "!rdc_sync_wrthr: remote write failed (%d) " - "0x%x", rc, rdc_get_vflags(urdc)); - - goto failed; - } - (void) nsc_free_buf(handle); - handle = NULL; - - return; -failed: - (void) nsc_free_buf(handle); - syncinfo->status->offset = syncinfo->offset; -} - -/* - * see above comments on _rdc_sync_wrthr - */ -static void -_rdc_sync_rdthr(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - nsc_buf_t *handle = NULL; - rdc_k_info_t *krdc = syncinfo->krdc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rc; - - rc = nsc_alloc_buf(RDC_U_FD(krdc), syncinfo->offset, syncinfo->len, - NSC_WRITE | NSC_WRTHRU | NSC_NOCACHE, &handle); - - if (!RDC_SUCCESS(rc) || krdc->remote_index < 0) { - goto failed; - } - rdc_group_enter(krdc); - if ((krdc->disk_status == 1) || (krdc->dcio_bitmap == NULL)) { - rdc_group_exit(krdc); - goto failed; - } - rdc_group_exit(krdc); - - rc = rdc_net_read(krdc->index, krdc->remote_index, handle, - handle->sb_pos, handle->sb_len); - - if (!RDC_SUCCESS(rc)) { - cmn_err(CE_WARN, "!rdc_sync_rdthr: remote read failed(%d)", rc); - goto failed; - } - if (!IS_STATE(urdc, RDC_FULL)) - rdc_set_bitmap_many(krdc, handle->sb_pos, handle->sb_len); - - rc = nsc_write(handle, handle->sb_pos, handle->sb_len, 0); - - if (!RDC_SUCCESS(rc)) { - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, "nsc_write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - goto failed; - } - - (void) nsc_free_buf(handle); - handle = NULL; - - return; -failed: - (void) nsc_free_buf(handle); - syncinfo->status->offset = syncinfo->offset; -} - -/* - * _rdc_sync_wrthr - * sync loop write thread - * if there are avail threads, we have not - * used up the pipe, so the sync loop will, if - * possible use these to multithread the write/read - */ -void -_rdc_sync_thread(void *thrinfo) -{ - rdc_syncthr_t *syncinfo = (rdc_syncthr_t *)thrinfo; - rdc_k_info_t *krdc = syncinfo->krdc; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_thrsync_t *sync = &krdc->syncs; - uint_t bitmask; - int rc; - - rc = _rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL); - if (!RDC_SUCCESS(rc)) - goto failed; - - if (IS_STATE(urdc, RDC_SLAVE)) - _rdc_sync_rdthr(thrinfo); - else - _rdc_sync_wrthr(thrinfo); - - _rdc_rlse_devs(krdc, RDC_RAW); - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_sync_wrthr: NULL bitmap"); -#else - /*EMPTY*/ -#endif - } else if (syncinfo->status->offset < 0) { - - RDC_SET_BITMASK(syncinfo->offset, syncinfo->len, &bitmask); - RDC_CLR_BITMAP(krdc, syncinfo->offset, syncinfo->len, \ - bitmask, RDC_BIT_FORCE); - } - -failed: - /* - * done with this, get rid of it. - * the status is not freed, it should still be a status chain - * that _rdc_sync() has the head of - */ - kmem_free(syncinfo, sizeof (*syncinfo)); - - /* - * decrement the global sync thread num - */ - mutex_enter(&sync_info.lock); - sync_info.active_thr--; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - - /* - * krdc specific stuff - */ - mutex_enter(&sync->lock); - sync->complete++; - cv_broadcast(&sync->cv); - mutex_exit(&sync->lock); -} - -int -_rdc_setup_syncthr(rdc_syncthr_t **synthr, nsc_off_t offset, - nsc_size_t len, rdc_k_info_t *krdc, sync_status_t *stats) -{ - rdc_syncthr_t *tmp; - /* alloc here, free in the sync thread */ - tmp = - (rdc_syncthr_t *)kmem_zalloc(sizeof (rdc_syncthr_t), KM_NOSLEEP); - - if (tmp == NULL) - return (-1); - tmp->offset = offset; - tmp->len = len; - tmp->status = stats; - tmp->krdc = krdc; - - *synthr = tmp; - return (0); -} - -sync_status_t * -_rdc_new_sync_status() -{ - sync_status_t *s; - - s = (sync_status_t *)kmem_zalloc(sizeof (*s), KM_NOSLEEP); - s->offset = -1; - return (s); -} - -void -_rdc_free_sync_status(sync_status_t *status) -{ - sync_status_t *s; - - while (status) { - s = status->next; - kmem_free(status, sizeof (*status)); - status = s; - } -} -int -_rdc_sync_status_ok(sync_status_t *status, int *offset) -{ -#ifdef DEBUG_SYNCSTATUS - int i = 0; -#endif - while (status) { - if (status->offset >= 0) { - *offset = status->offset; - return (-1); - } - status = status->next; -#ifdef DEBUG_SYNCSTATUS - i++; -#endif - } -#ifdef DEBUGSYNCSTATUS - cmn_err(CE_NOTE, "!rdc_sync_status_ok: checked %d statuses", i); -#endif - return (0); -} - -int mtsync = 1; -/* - * _rdc_sync() : rdc sync loop - * - */ -static void -_rdc_sync(rdc_k_info_t *krdc) -{ - nsc_size_t size = 0; - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - int rtype; - int sts; - int reserved = 0; - nsc_buf_t *alloc_h = NULL; - nsc_buf_t *handle = NULL; - nsc_off_t mask; - nsc_size_t maxbit; - nsc_size_t len; - nsc_off_t offset = 0; - int sync_completed = 0; - int tries = 0; - int rc; - int queuing = 0; - uint_t bitmask; - sync_status_t *ss, *sync_status = NULL; - rdc_thrsync_t *sync = &krdc->syncs; - rdc_syncthr_t *syncinfo; - nsthread_t *trc = NULL; - - if (IS_STATE(urdc, RDC_QUEUING) && !IS_STATE(urdc, RDC_FULL)) { - /* flusher is handling the sync in the update case */ - queuing = 1; - goto sync_done; - } - - /* - * Main sync/resync loop - */ - DTRACE_PROBE(rdc_sync_loop_start); - - rtype = RDC_RAW; - sts = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - - DTRACE_PROBE(rdc_sync_loop_rsrv); - - if (sts != 0) - goto failed_noincr; - - reserved = 1; - - /* - * pre-allocate a handle if we can - speeds up the sync. - */ - - if (rdc_prealloc_handle) { - alloc_h = nsc_alloc_handle(RDC_U_FD(krdc), NULL, NULL, NULL); -#ifdef DEBUG - if (!alloc_h) { - cmn_err(CE_WARN, - "!rdc sync: failed to pre-alloc handle"); - } -#endif - } else { - alloc_h = NULL; - } - - ASSERT(urdc->volume_size != 0); - size = urdc->volume_size; - mask = ~(LOG_TO_FBA_NUM(1) - 1); - maxbit = FBA_TO_LOG_NUM(size - 1); - - /* - * as this while loop can also move data, it is counted as a - * sync loop thread - */ - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - krdc->group->synccount++; - rdc_group_exit(krdc); - mutex_enter(&sync_info.lock); - sync_info.active_thr++; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - - while (offset < size) { - rdc_group_enter(krdc); - ASSERT(krdc->aux_state & RDC_AUXSYNCIP); - if (krdc->disk_status == 1 || krdc->dcio_bitmap == NULL) { - rdc_group_exit(krdc); - if (krdc->disk_status == 1) { - DTRACE_PROBE(rdc_sync_loop_disk_status_err); - } else { - DTRACE_PROBE(rdc_sync_loop_dcio_bitmap_err); - } - goto failed; /* halt sync */ - } - rdc_group_exit(krdc); - - if (!(rdc_get_vflags(urdc) & RDC_FULL)) { - mutex_enter(&krdc->syncbitmutex); - krdc->syncbitpos = FBA_TO_LOG_NUM(offset); - len = 0; - - /* skip unnecessary chunks */ - - while (krdc->syncbitpos <= maxbit && - !RDC_BIT_ISSET(krdc, krdc->syncbitpos)) { - offset += LOG_TO_FBA_NUM(1); - krdc->syncbitpos++; - } - - /* check for boundary */ - - if (offset >= size) { - mutex_exit(&krdc->syncbitmutex); - goto sync_done; - } - - /* find maximal length we can transfer */ - - while (krdc->syncbitpos <= maxbit && - RDC_BIT_ISSET(krdc, krdc->syncbitpos)) { - len += LOG_TO_FBA_NUM(1); - krdc->syncbitpos++; - /* we can only read maxfbas anyways */ - if (len >= krdc->maxfbas) - break; - } - - len = min(len, (size - offset)); - - } else { - len = size - offset; - } - - /* truncate to the io provider limit */ - ASSERT(krdc->maxfbas != 0); - len = min(len, krdc->maxfbas); - - if (len > LOG_TO_FBA_NUM(1)) { - /* - * If the update is larger than a bitmap chunk, - * then truncate to a whole number of bitmap - * chunks. - * - * If the update is smaller than a bitmap - * chunk, this must be the last write. - */ - len &= mask; - } - - if (!(rdc_get_vflags(urdc) & RDC_FULL)) { - krdc->syncbitpos = FBA_TO_LOG_NUM(offset + len); - mutex_exit(&krdc->syncbitmutex); - } - - /* - * Find out if we can reserve a thread here ... - * note: skip the mutex for the first check, if the number - * is up there, why bother even grabbing the mutex to - * only realize that we can't have a thread anyways - */ - - if (mtsync && sync_info.active_thr < RDC_MAX_SYNC_THREADS) { - - mutex_enter(&sync_info.lock); - if (sync_info.avail_thr >= 1) { - if (sync_status == NULL) { - ss = sync_status = - _rdc_new_sync_status(); - } else { - ss = ss->next = _rdc_new_sync_status(); - } - if (ss == NULL) { - mutex_exit(&sync_info.lock); -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_sync: can't " - "allocate status for mt sync"); -#endif - goto retry; - } - /* - * syncinfo protected by sync_info lock but - * not part of the sync_info structure - * be careful if moving - */ - if (_rdc_setup_syncthr(&syncinfo, - offset, len, krdc, ss) < 0) { - _rdc_free_sync_status(ss); - } - - trc = nst_create(sync_info.rdc_syncset, - _rdc_sync_thread, syncinfo, NST_SLEEP); - - if (trc == NULL) { - mutex_exit(&sync_info.lock); -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_sync: unable to " - "mt sync"); -#endif - _rdc_free_sync_status(ss); - kmem_free(syncinfo, sizeof (*syncinfo)); - syncinfo = NULL; - goto retry; - } else { - mutex_enter(&sync->lock); - sync->threads++; - mutex_exit(&sync->lock); - } - - sync_info.active_thr++; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - - mutex_exit(&sync_info.lock); - goto threaded; - } - mutex_exit(&sync_info.lock); - } -retry: - handle = alloc_h; - DTRACE_PROBE(rdc_sync_loop_allocbuf_start); - if (rdc_get_vflags(urdc) & RDC_SLAVE) - sts = nsc_alloc_buf(RDC_U_FD(krdc), offset, len, - NSC_WRITE | NSC_WRTHRU | NSC_NOCACHE, &handle); - else - sts = nsc_alloc_buf(RDC_U_FD(krdc), offset, len, - NSC_READ | NSC_NOCACHE, &handle); - - DTRACE_PROBE(rdc_sync_loop_allocbuf_end); - if (sts > 0) { - if (handle && handle != alloc_h) { - (void) nsc_free_buf(handle); - } - - handle = NULL; - DTRACE_PROBE(rdc_sync_loop_allocbuf_err); - goto failed; - } - - if (rdc_get_vflags(urdc) & RDC_SLAVE) { - /* overwrite buffer with remote data */ - sts = rdc_net_read(krdc->index, krdc->remote_index, - handle, handle->sb_pos, handle->sb_len); - - if (!RDC_SUCCESS(sts)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc sync: remote read failed (%d)", sts); -#endif - DTRACE_PROBE(rdc_sync_loop_remote_read_err); - goto failed; - } - if (!(rdc_get_vflags(urdc) & RDC_FULL)) - rdc_set_bitmap_many(krdc, handle->sb_pos, - handle->sb_len); - - /* commit locally */ - - sts = nsc_write(handle, handle->sb_pos, - handle->sb_len, 0); - - if (!RDC_SUCCESS(sts)) { - /* reverse sync needed already set */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "write failed during sync"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - DTRACE_PROBE(rdc_sync_loop_nsc_write_err); - goto failed; - } - } else { - /* send local data to remote */ - DTRACE_PROBE2(rdc_sync_loop_netwrite_start, - int, krdc->index, nsc_buf_t *, handle); - - if ((sts = rdc_net_write(krdc->index, - krdc->remote_index, handle, handle->sb_pos, - handle->sb_len, RDC_NOSEQ, RDC_NOQUE, NULL)) > 0) { - - /* - * The following is to handle - * the case where the secondary side - * has thrown our buffer handle token away in a - * attempt to preserve its health on restart - */ - if ((sts == EPROTO) && (tries < 3)) { - (void) nsc_free_buf(handle); - handle = NULL; - tries++; - delay(HZ >> 2); - goto retry; - } -#ifdef DEBUG - cmn_err(CE_WARN, - "!rdc sync: remote write failed (%d) 0x%x", - sts, rdc_get_vflags(urdc)); -#endif - DTRACE_PROBE(rdc_sync_loop_netwrite_err); - goto failed; - } - DTRACE_PROBE(rdc_sync_loop_netwrite_end); - } - - (void) nsc_free_buf(handle); - handle = NULL; - - if (krdc->dcio_bitmap == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdc_sync: NULL bitmap"); -#else - ; - /*EMPTY*/ -#endif - } else { - - RDC_SET_BITMASK(offset, len, &bitmask); - RDC_CLR_BITMAP(krdc, offset, len, bitmask, \ - RDC_BIT_FORCE); - ASSERT(!IS_ASYNC(urdc)); - } - - /* - * Only release/reserve if someone is waiting - */ - if (krdc->devices->id_release || nsc_waiting(RDC_U_FD(krdc))) { - DTRACE_PROBE(rdc_sync_loop_rlse_start); - if (alloc_h) { - (void) nsc_free_handle(alloc_h); - alloc_h = NULL; - } - - _rdc_rlse_devs(krdc, rtype); - reserved = 0; - delay(2); - - rtype = RDC_RAW; - sts = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (sts != 0) { - handle = NULL; - DTRACE_PROBE(rdc_sync_loop_rdc_rsrv_err); - goto failed; - } - - reserved = 1; - - if (rdc_prealloc_handle) { - alloc_h = nsc_alloc_handle(RDC_U_FD(krdc), - NULL, NULL, NULL); -#ifdef DEBUG - if (!alloc_h) { - cmn_err(CE_WARN, "!rdc_sync: " - "failed to pre-alloc handle"); - } -#endif - } - DTRACE_PROBE(rdc_sync_loop_rlse_end); - } -threaded: - offset += len; - urdc->sync_pos = offset; - } - -sync_done: - sync_completed = 1; - -failed: - krdc->group->synccount--; -failed_noincr: - mutex_enter(&sync->lock); - while (sync->complete != sync->threads) { - cv_wait(&sync->cv, &sync->lock); - } - sync->complete = 0; - sync->threads = 0; - mutex_exit(&sync->lock); - - /* - * if sync_completed is 0 here, - * we know that the main sync thread failed anyway - * so just free the statuses and fail - */ - if (sync_completed && (_rdc_sync_status_ok(sync_status, &rc) < 0)) { - urdc->sync_pos = rc; - sync_completed = 0; /* at least 1 thread failed */ - } - - _rdc_free_sync_status(sync_status); - - /* - * we didn't increment, we didn't even sync, - * so don't dec sync_info.active_thr - */ - if (!queuing) { - mutex_enter(&sync_info.lock); - sync_info.active_thr--; - /* LINTED */ - RDC_AVAIL_THR_TUNE(sync_info); - mutex_exit(&sync_info.lock); - } - - if (handle) { - (void) nsc_free_buf(handle); - } - - if (alloc_h) { - (void) nsc_free_handle(alloc_h); - } - - if (reserved) { - _rdc_rlse_devs(krdc, rtype); - } - -notstarted: - rdc_group_enter(krdc); - ASSERT(krdc->aux_state & RDC_AUXSYNCIP); - if (IS_STATE(urdc, RDC_QUEUING)) - rdc_clr_flags(urdc, RDC_QUEUING); - - if (sync_completed) { - (void) rdc_net_state(krdc->index, CCIO_DONE); - } else { - (void) rdc_net_state(krdc->index, CCIO_ENABLELOG); - } - - rdc_clr_flags(urdc, RDC_SYNCING); - if (rdc_get_vflags(urdc) & RDC_SLAVE) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - if (krdc->type_flag & RDC_ASYNCMODE) - rdc_set_flags(urdc, RDC_ASYNC); - if (sync_completed) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } else { - krdc->remote_index = -1; - rdc_set_flags_log(urdc, RDC_LOGGING, "sync failed to complete"); - } - rdc_group_exit(krdc); - rdc_write_state(urdc); - - mutex_enter(&net_blk_lock); - if (sync_completed) - krdc->sync_done = RDC_COMPLETED; - else - krdc->sync_done = RDC_FAILED; - cv_broadcast(&krdc->synccv); - mutex_exit(&net_blk_lock); - -} - - -static int -rdc_sync(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_set_t *rdc_set = uparms->rdc_set; - int options = uparms->options; - int rc = 0; - int busy = 0; - int index; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_k_info_t *kmulti; - rdc_u_info_t *umulti; - rdc_group_t *group; - rdc_srv_t *svp; - int sm, um, md; - int sync_completed = 0; - int thrcount; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted; - } - - urdc = &rdc_u_info[index]; - group = krdc->group; - set_busy(krdc); - busy = 1; - if ((krdc->type_flag == 0) || (krdc->type_flag & RDC_DISABLEPEND)) { - /* A resume or enable failed or we raced with a teardown */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted; - } - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - - if (!IS_STATE(urdc, RDC_LOGGING)) { - spcs_s_add(kstatus, RDC_ESETNOTLOGGING, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_ENOTLOGGING; - goto notstarted_unlock; - } - - if (rdc_check(krdc, rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - rc = RDC_EALREADY; - goto notstarted_unlock; - } - - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - spcs_s_add(kstatus, RDC_ENOTPRIMARY, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_ENOTPRIMARY; - goto notstarted_unlock; - } - - if ((options & RDC_OPT_REVERSE) && (IS_STATE(urdc, RDC_QUEUING))) { - /* - * cannot reverse sync when queuing, need to go logging first - */ - spcs_s_add(kstatus, RDC_EQNORSYNC, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_EQNORSYNC; - goto notstarted_unlock; - } - - svp = krdc->lsrv; - krdc->intf = rdc_add_to_if(svp, &(urdc->primary.addr), - &(urdc->secondary.addr), 1); - - if (!krdc->intf) { - spcs_s_add(kstatus, RDC_EADDTOIF, urdc->primary.intf, - urdc->secondary.intf); - rc = RDC_EADDTOIF; - goto notstarted_unlock; - } - - if (urdc->volume_size == 0) { - /* Implies reserve failed when previous resume was done */ - rdc_get_details(krdc); - } - if (urdc->volume_size == 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - goto notstarted_unlock; - } - - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - goto notstarted_unlock; - } - } - - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) { - if (rdc_reset_bitmap(krdc)) { - spcs_s_add(kstatus, RDC_EBITMAP); - rc = RDC_EBITMAP; - goto notstarted_unlock; - } - } - - if (IS_MANY(krdc) || IS_MULTI(krdc)) { - rdc_u_info_t *ubad; - - if ((ubad = rdc_allow_pri_sync(urdc, options)) != NULL) { - spcs_s_add(kstatus, RDC_ESTATE, - ubad->primary.intf, ubad->primary.file, - ubad->secondary.intf, ubad->secondary.file); - rc = RDC_ESTATE; - goto notstarted_unlock; - } - } - - /* - * there is a small window where _rdc_sync is still - * running, but has cleared the RDC_SYNCING flag. - * Use aux_state which is only cleared - * after _rdc_sync had done its 'death' broadcast. - */ - if (krdc->aux_state & RDC_AUXSYNCIP) { -#ifdef DEBUG - if (!rdc_get_vflags(urdc) & RDC_SYNCING) { - cmn_err(CE_WARN, "!rdc_sync: " - "RDC_AUXSYNCIP set, SYNCING off"); - } -#endif - spcs_s_add(kstatus, RDC_ESYNCING, rdc_set->primary.file); - rc = RDC_ESYNCING; - goto notstarted_unlock; - } - if (krdc->disk_status == 1) { - spcs_s_add(kstatus, RDC_ESYNCING, rdc_set->primary.file); - rc = RDC_ESYNCING; - goto notstarted_unlock; - } - - if ((options & RDC_OPT_FORWARD) && - (rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED)) { - /* cannot forward sync if a reverse sync is needed */ - spcs_s_add(kstatus, RDC_ERSYNCNEEDED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - rc = RDC_ERSYNCNEEDED; - goto notstarted_unlock; - } - - urdc->sync_pos = 0; - - /* Check if the rdc set is accessible on the remote node */ - if (rdc_net_getstate(krdc, &sm, &um, &md, FALSE) < 0) { - /* - * Remote end may be inaccessible, or the rdc set is not - * enabled at the remote end. - */ - spcs_s_add(kstatus, RDC_ECONNOPEN, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_ECONNOPEN; - goto notstarted_unlock; - } - if (options & RDC_OPT_REVERSE) - krdc->remote_index = rdc_net_state(index, CCIO_RSYNC); - else - krdc->remote_index = rdc_net_state(index, CCIO_SLAVE); - if (krdc->remote_index < 0) { - /* - * Remote note probably not in a valid state to be synced, - * as the state was fetched OK above. - */ - spcs_s_add(kstatus, RDC_ERSTATE, urdc->secondary.intf, - urdc->secondary.file, urdc->primary.intf, - urdc->primary.file); - rc = RDC_ERSTATE; - goto notstarted_unlock; - } - - rc = check_filesize(index, kstatus); - if (rc != 0) { - (void) rdc_net_state(krdc->index, CCIO_ENABLELOG); - goto notstarted_unlock; - } - - krdc->sync_done = 0; - - mutex_enter(&krdc->bmapmutex); - krdc->aux_state |= RDC_AUXSYNCIP; - mutex_exit(&krdc->bmapmutex); - - if (options & RDC_OPT_REVERSE) { - rdc_many_enter(krdc); - rdc_set_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - mutex_enter(&krdc->bmapmutex); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - mutex_exit(&krdc->bmapmutex); - rdc_write_state(urdc); - /* LINTED */ - if (kmulti = krdc->multi_next) { - umulti = &rdc_u_info[kmulti->index]; - if (IS_ENABLED(umulti) && (rdc_get_vflags(umulti) & - (RDC_VOL_FAILED | RDC_SYNC_NEEDED))) { - rdc_clr_flags(umulti, RDC_SYNC_NEEDED); - rdc_clr_flags(umulti, RDC_VOL_FAILED); - rdc_write_state(umulti); - } - } - rdc_many_exit(krdc); - } else { - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - rdc_write_state(urdc); - } - - if (options & RDC_OPT_UPDATE) { - ASSERT(urdc->volume_size != 0); - if (rdc_net_getbmap(index, - BMAP_LOG_BYTES(urdc->volume_size)) > 0) { - spcs_s_add(kstatus, RDC_ENOBMAP); - rc = RDC_ENOBMAP; - - (void) rdc_net_state(index, CCIO_ENABLELOG); - - rdc_clr_flags(urdc, RDC_SYNCING); - if (options & RDC_OPT_REVERSE) { - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - if (krdc->type_flag & RDC_ASYNCMODE) - rdc_set_flags(urdc, RDC_ASYNC); - krdc->remote_index = -1; - rdc_set_flags_log(urdc, RDC_LOGGING, - "failed to read remote bitmap"); - rdc_write_state(urdc); - goto failed; - } - rdc_clr_flags(urdc, RDC_FULL); - } else { - /* - * This is a full sync (not an update sync), mark the - * entire bitmap dirty - */ - (void) RDC_FILL_BITMAP(krdc, FALSE); - - rdc_set_flags(urdc, RDC_FULL); - } - - rdc_group_exit(krdc); - - /* - * allow diskq->memq flusher to wake up - */ - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->ra_queue.qfflags &= ~RDC_QFILLSLEEP; - mutex_exit(&krdc->group->ra_queue.net_qlock); - - /* - * if this is a full sync on a non-diskq set or - * a diskq set that has failed, clear the async flag - */ - if (krdc->type_flag & RDC_ASYNCMODE) { - if ((!(options & RDC_OPT_UPDATE)) || - (!RDC_IS_DISKQ(krdc->group)) || - (!(IS_STATE(urdc, RDC_QUEUING)))) { - /* full syncs, or core queue are synchronous */ - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } - - /* - * if the queue failed because it was full, lets see - * if we can restart it. After _rdc_sync() is done - * the modes will switch and we will begin disk - * queuing again. NOTE: this should only be called - * once per group, as it clears state for all group - * members, also clears the async flag for all members - */ - if (IS_STATE(urdc, RDC_DISKQ_FAILED)) { - rdc_unfail_diskq(krdc); - } else { - /* don't add insult to injury by flushing a dead queue */ - - /* - * if we are updating, and a diskq and - * the async thread isn't active, start - * it up. - */ - if ((options & RDC_OPT_UPDATE) && - (IS_STATE(urdc, RDC_QUEUING))) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_group_exit(krdc); - mutex_enter(&krdc->group->ra_queue.net_qlock); - if (krdc->group->ra_queue.qfill_sleeping == - RDC_QFILL_ASLEEP) - cv_broadcast(&group->ra_queue.qfcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - thrcount = urdc->asyncthr; - while ((thrcount-- > 0) && - !krdc->group->rdc_writer) { - (void) rdc_writer(krdc->index); - } - } - } - } - - /* - * For a reverse sync, merge the current bitmap with all other sets - * that share this volume. - */ - if (options & RDC_OPT_REVERSE) { -retry_many: - rdc_many_enter(krdc); - if (IS_MANY(krdc)) { - rdc_k_info_t *kmany; - rdc_u_info_t *umany; - - for (kmany = krdc->many_next; kmany != krdc; - kmany = kmany->many_next) { - umany = &rdc_u_info[kmany->index]; - if (!IS_ENABLED(umany)) - continue; - ASSERT(umany->flags & RDC_PRIMARY); - - if (!mutex_tryenter(&kmany->group->lock)) { - rdc_many_exit(krdc); - /* May merge more than once */ - goto retry_many; - } - rdc_merge_bitmaps(krdc, kmany); - mutex_exit(&kmany->group->lock); - } - } - rdc_many_exit(krdc); - -retry_multi: - rdc_many_enter(krdc); - if (IS_MULTI(krdc)) { - rdc_k_info_t *kmulti = krdc->multi_next; - rdc_u_info_t *umulti = &rdc_u_info[kmulti->index]; - - if (IS_ENABLED(umulti)) { - ASSERT(!(umulti->flags & RDC_PRIMARY)); - - if (!mutex_tryenter(&kmulti->group->lock)) { - rdc_many_exit(krdc); - goto retry_multi; - } - rdc_merge_bitmaps(krdc, kmulti); - mutex_exit(&kmulti->group->lock); - } - } - rdc_many_exit(krdc); - } - - rdc_group_enter(krdc); - - if (krdc->bitmap_write == 0) { - if (rdc_write_bitmap_fill(krdc) >= 0) - krdc->bitmap_write = -1; - } - - if (krdc->bitmap_write > 0) - (void) rdc_write_bitmap(krdc); - - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - - rdc_group_exit(krdc); - - if (options & RDC_OPT_REVERSE) { - (void) _rdc_sync_event_notify(RDC_SYNC_START, - urdc->primary.file, urdc->group_name); - } - - /* Now set off the sync itself */ - - mutex_enter(&net_blk_lock); - if (nsc_create_process( - (void (*)(void *))_rdc_sync, (void *)krdc, FALSE)) { - mutex_exit(&net_blk_lock); - spcs_s_add(kstatus, RDC_ENOPROC); - /* - * We used to just return here, - * but we need to clear the AUXSYNCIP bit - * and there is a very small chance that - * someone may be waiting on the disk_status flag. - */ - rc = RDC_ENOPROC; - /* - * need the group lock held at failed. - */ - rdc_group_enter(krdc); - goto failed; - } - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - busy = 0; - mutex_exit(&rdc_conf_lock); - - while (krdc->sync_done == 0) - cv_wait(&krdc->synccv, &net_blk_lock); - mutex_exit(&net_blk_lock); - - rdc_group_enter(krdc); - - if (krdc->sync_done == RDC_FAILED) { - char siztmp1[16]; - (void) spcs_s_inttostring( - urdc->sync_pos, siztmp1, sizeof (siztmp1), - 0); - spcs_s_add(kstatus, RDC_EFAIL, siztmp1); - rc = RDC_EFAIL; - } else - sync_completed = 1; - -failed: - /* - * We use this flag now to make halt_sync() wait for - * us to terminate and let us take the group lock. - */ - krdc->aux_state &= ~RDC_AUXSYNCIP; - if (krdc->disk_status == 1) { - krdc->disk_status = 0; - cv_broadcast(&krdc->haltcv); - } - -notstarted_unlock: - rdc_group_exit(krdc); - - if (sync_completed && (options & RDC_OPT_REVERSE)) { - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->primary.file, urdc->group_name); - } - -notstarted: - if (busy) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - return (rc); -} - -/* ARGSUSED */ -static int -_rdc_suspend(rdc_k_info_t *krdc, rdc_set_t *rdc_set, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_if_t *ip; - int index = krdc->index; - - ASSERT(krdc->group != NULL); - rdc_group_enter(krdc); -#ifdef DEBUG - ASSERT(rdc_check(krdc, rdc_set) == 0); -#else - if (rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } -#endif - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - ASSERT(IS_ENABLED(urdc)); - } - - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: suspended %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - - if (IS_ASYNC(urdc) && !RDC_IS_DISKQ(krdc->group)) { - int tries = 2; /* in case of possibly stuck flusher threads */ -#ifdef DEBUG - net_queue *qp = &krdc->group->ra_queue; -#endif - do { - if (!krdc->group->rdc_writer) - (void) rdc_writer(krdc->index); - - (void) rdc_drain_queue(krdc->index); - - } while (krdc->group->rdc_writer && tries--); - - /* ok, force it to happen... */ - if (rdc_drain_queue(krdc->index) != 0) { - do { - mutex_enter(&krdc->group->ra_queue.net_qlock); - krdc->group->asyncdis = 1; - cv_broadcast(&krdc->group->asyncqcv); - mutex_exit(&krdc->group->ra_queue.net_qlock); - cmn_err(CE_WARN, - "!SNDR: async I/O pending and not flushed " - "for %s during suspend", - urdc->primary.file); -#ifdef DEBUG - cmn_err(CE_WARN, - "!nitems: %" NSC_SZFMT " nblocks: %" - NSC_SZFMT " head: 0x%p tail: 0x%p", - qp->nitems, qp->blocks, - (void *)qp->net_qhead, - (void *)qp->net_qtail); -#endif - } while (krdc->group->rdc_thrnum > 0); - } - } - - mutex_enter(&rdc_conf_lock); - ip = krdc->intf; - krdc->intf = 0; - - if (ip) { - rdc_remove_from_if(ip); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - /* Must not hold group lock during this function */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - rdc_group_enter(krdc); - - /* Don't rdc_clear_state, unlike _rdc_disable */ - - rdc_free_bitmap(krdc, RDC_CMD_SUSPEND); - rdc_close_bitmap(krdc); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - /* - * we should now unregister the queue, with no conflicting - * locks held. This is the last(only) member of the group - */ - if (krdc->group && RDC_IS_DISKQ(krdc->group) && - krdc->group->count == 1) { /* stop protecting queue */ - rdc_unintercept_diskq(krdc->group); - } - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - wait_busy(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - remove_from_group(krdc); - - krdc->remote_index = -1; - ASSERT(krdc->type_flag & RDC_CONFIGURED); - ASSERT(krdc->type_flag & RDC_DISABLEPEND); - krdc->type_flag = 0; -#ifdef DEBUG - if (krdc->dcio_bitmap) - cmn_err(CE_WARN, "!_rdc_suspend: possible mem leak, " - "dcio_bitmap"); -#endif - krdc->dcio_bitmap = NULL; - krdc->bitmap_ref = NULL; - krdc->bitmap_size = 0; - krdc->maxfbas = 0; - krdc->bitmap_write = 0; - krdc->disk_status = 0; - rdc_destroy_svinfo(krdc->lsrv); - krdc->lsrv = NULL; - krdc->multi_next = NULL; - - rdc_u_init(urdc); - - mutex_exit(&rdc_conf_lock); - rdc_kstat_delete(index); - return (0); -} - -static int -rdc_suspend(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int index; - int rc; - - mutex_enter(&rdc_conf_lock); - - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - krdc->type_flag |= RDC_DISABLEPEND; - wait_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_suspend(krdc, uparms->rdc_set, kstatus); - return (rc); -} - -static int -_rdc_resume(rdc_set_t *rdc_set, int options, spcs_s_info_t kstatus) -{ - int index; - char *rhost; - struct netbuf *addrp; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_srv_t *svp = NULL; - char *local_file; - char *local_bitmap; - int rc, rc1; - nsc_size_t maxfbas; - rdc_group_t *grp; - - if ((rdc_set->primary.intf[0] == 0) || - (rdc_set->primary.addr.len == 0) || - (rdc_set->primary.file[0] == 0) || - (rdc_set->primary.bitmap[0] == 0) || - (rdc_set->secondary.intf[0] == 0) || - (rdc_set->secondary.addr.len == 0) || - (rdc_set->secondary.file[0] == 0) || - (rdc_set->secondary.bitmap[0] == 0)) { - spcs_s_add(kstatus, RDC_EEMPTY); - return (RDC_EEMPTY); - } - - /* Next check there aren't any enabled rdc sets which match. */ - - mutex_enter(&rdc_conf_lock); - - if (rdc_lookup_byname(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EENABLED, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EENABLED); - } - - if (rdc_lookup_many2one(rdc_set) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMANY2ONE, rdc_set->primary.intf, - rdc_set->primary.file, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EMANY2ONE); - } - - if (rdc_set->netconfig->knc_proto == NULL) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETCONFIG); - return (RDC_ENETCONFIG); - } - - if (rdc_set->primary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->primary.file); - return (RDC_ENETBUF); - } - - if (rdc_set->secondary.addr.len == 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENETBUF, rdc_set->secondary.file); - return (RDC_ENETBUF); - } - - /* Check that the local data volume isn't in use as a bitmap */ - if (options & RDC_OPT_PRIMARY) - local_file = rdc_set->primary.file; - else - local_file = rdc_set->secondary.file; - if (rdc_lookup_bitmap(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - - /* check that the secondary data volume isn't in use */ - if (!(options & RDC_OPT_PRIMARY)) { - local_file = rdc_set->secondary.file; - if (rdc_lookup_secondary(local_file) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EVOLINUSE, local_file); - return (RDC_EVOLINUSE); - } - } - - /* Check that the bitmap isn't in use as a data volume */ - if (options & RDC_OPT_PRIMARY) - local_bitmap = rdc_set->primary.bitmap; - else - local_bitmap = rdc_set->secondary.bitmap; - if (rdc_lookup_configured(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Check that the bitmap isn't already in use as a bitmap */ - if (rdc_lookup_bitmap(local_bitmap) >= 0) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EBMPINUSE, local_bitmap); - return (RDC_EBMPINUSE); - } - - /* Set urdc->volume_size */ - index = rdc_dev_open(rdc_set, options); - if (index < 0) { - mutex_exit(&rdc_conf_lock); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->primary.intf, - rdc_set->primary.file); - else - spcs_s_add(kstatus, RDC_EOPEN, rdc_set->secondary.intf, - rdc_set->secondary.file); - return (RDC_EOPEN); - } - - urdc = &rdc_u_info[index]; - krdc = &rdc_k_info[index]; - - /* copy relevant parts of rdc_set to urdc field by field */ - - (void) strncpy(urdc->primary.intf, rdc_set->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.intf, rdc_set->secondary.intf, - MAX_RDC_HOST_SIZE); - - (void) strncpy(urdc->group_name, rdc_set->group_name, NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->primary.addr, &urdc->primary.addr); - (void) strncpy(urdc->primary.file, rdc_set->primary.file, NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, rdc_set->primary.bitmap, - NSC_MAXPATH); - - dup_rdc_netbuf(&rdc_set->secondary.addr, &urdc->secondary.addr); - (void) strncpy(urdc->secondary.file, rdc_set->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set->secondary.bitmap, - NSC_MAXPATH); - (void) strncpy(urdc->disk_queue, rdc_set->disk_queue, NSC_MAXPATH); - urdc->setid = rdc_set->setid; - - if ((options & RDC_OPT_SYNC) && urdc->disk_queue[0]) { - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EQWRONGMODE); - return (RDC_EQWRONGMODE); - } - - /* - * init flags now so that state left by failures in add_to_group() - * are preserved. - */ - rdc_init_flags(urdc); - - if ((rc1 = add_to_group(krdc, options, RDC_CMD_RESUME)) != 0) { - if (rc1 == RDC_EQNOADD) { /* something went wrong with queue */ - rdc_fail_diskq(krdc, RDC_WAIT, RDC_NOLOG); - /* don't return a failure here, continue with resume */ - - } else { /* some other group add failure */ - mutex_exit(&rdc_conf_lock); - rdc_dev_close(krdc); - spcs_s_add(kstatus, RDC_EGROUP, - rdc_set->primary.intf, rdc_set->primary.file, - rdc_set->secondary.intf, rdc_set->secondary.file, - rdc_set->group_name); - return (RDC_EGROUP); - } - } - - /* - * maxfbas was set in rdc_dev_open as primary's maxfbas. - * If diskq's maxfbas is smaller, then use diskq's. - */ - grp = krdc->group; - if (grp && RDC_IS_DISKQ(grp) && (grp->diskqfd != 0)) { - rc = _rdc_rsrv_diskq(grp); - if (RDC_SUCCESS(rc)) { - rc = nsc_maxfbas(grp->diskqfd, 0, &maxfbas); - if (rc == 0) { -#ifdef DEBUG - if (krdc->maxfbas != maxfbas) - cmn_err(CE_NOTE, - "!_rdc_resume: diskq maxfbas = %" - NSC_SZFMT ", primary maxfbas = %" - NSC_SZFMT, maxfbas, krdc->maxfbas); -#endif - krdc->maxfbas = min(krdc->maxfbas, - maxfbas); - } else { - cmn_err(CE_WARN, - "!_rdc_resume: diskq maxfbas failed (%d)", - rc); - } - _rdc_rlse_diskq(grp); - } else { - cmn_err(CE_WARN, - "!_rdc_resume: diskq reserve failed (%d)", rc); - } - } - - (void) strncpy(urdc->direct_file, rdc_set->direct_file, NSC_MAXPATH); - if ((options & RDC_OPT_PRIMARY) && rdc_set->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - } - - krdc->many_next = krdc; - - ASSERT(krdc->type_flag == 0); - krdc->type_flag = RDC_CONFIGURED; - - if (options & RDC_OPT_PRIMARY) - rdc_set_flags(urdc, RDC_PRIMARY); - - if (options & RDC_OPT_ASYNC) - krdc->type_flag |= RDC_ASYNCMODE; - - set_busy(krdc); - - urdc->syshostid = rdc_set->syshostid; - - if (add_to_many(krdc) < 0) { - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - spcs_s_add(kstatus, RDC_EMULTI); - rc = RDC_EMULTI; - goto fail; - } - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - mutex_exit(&rdc_conf_lock); - - if (urdc->volume_size == 0) { - rdc_many_enter(krdc); - if (options & RDC_OPT_PRIMARY) - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - else - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags(urdc, RDC_VOL_FAILED); - rdc_many_exit(krdc); - } - - rdc_group_enter(krdc); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* - * The rdc set is configured but not yet enabled. Other operations must - * ignore this set until it is enabled. - */ - - urdc->sync_pos = 0; - - /* Set tunable defaults, we'll pick up tunables from the header later */ - - urdc->maxqfbas = rdc_maxthres_queue; - urdc->maxqitems = rdc_max_qitems; - urdc->autosync = 0; - urdc->asyncthr = rdc_asyncthr; - - urdc->netconfig = rdc_set->netconfig; - - if (options & RDC_OPT_PRIMARY) { - rhost = rdc_set->secondary.intf; - addrp = &rdc_set->secondary.addr; - } else { - rhost = rdc_set->primary.intf; - addrp = &rdc_set->primary.addr; - } - - if (options & RDC_OPT_ASYNC) - rdc_set_flags(urdc, RDC_ASYNC); - - svp = rdc_create_svinfo(rhost, addrp, urdc->netconfig); - if (svp == NULL) { - spcs_s_add(kstatus, ENOMEM); - rc = ENOMEM; - goto fail; - } - - urdc->netconfig = NULL; /* This will be no good soon */ - - /* Don't set krdc->intf here */ - rdc_kstat_create(index); - - /* if the bitmap resume isn't clean, it will clear queuing flag */ - - (void) rdc_resume_bitmap(krdc); - - if (RDC_IS_DISKQ(krdc->group)) { - disk_queue *q = &krdc->group->diskq; - if ((rc1 == RDC_EQNOADD) || - IS_QSTATE(q, RDC_QBADRESUME)) { - rdc_clr_flags(urdc, RDC_QUEUING); - RDC_ZERO_BITREF(krdc); - } - } - - if (krdc->lsrv == NULL) - krdc->lsrv = svp; - else { -#ifdef DEBUG - cmn_err(CE_WARN, "!_rdc_resume: krdc->lsrv already set: %p", - (void *) krdc->lsrv); -#endif - rdc_destroy_svinfo(svp); - } - svp = NULL; - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - /* And finally */ - - krdc->remote_index = -1; - - /* Should we set the whole group logging? */ - rdc_set_flags(urdc, RDC_ENABLED | RDC_LOGGING); - - rdc_group_exit(krdc); - - if (rdc_intercept(krdc) != 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ENABLED); - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EREGISTER, urdc->primary.file); - else - spcs_s_add(kstatus, RDC_EREGISTER, - urdc->secondary.file); -#ifdef DEBUG - cmn_err(CE_NOTE, "!nsc_register_path failed %s", - urdc->primary.file); -#endif - rc = RDC_EREGISTER; - goto bmpfail; - } -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: resumed %s %s", urdc->primary.file, - urdc->secondary.file); -#endif - - rdc_write_state(urdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (0); - -bmpfail: - if (options & RDC_OPT_PRIMARY) - spcs_s_add(kstatus, RDC_EBITMAP, urdc->primary.bitmap); - else - spcs_s_add(kstatus, RDC_EBITMAP, urdc->secondary.bitmap); - rc = RDC_EBITMAP; - if (rdc_get_vflags(urdc) & RDC_ENABLED) { - rdc_group_exit(krdc); - (void) rdc_unintercept(krdc); - rdc_group_enter(krdc); - } - -fail: - rdc_kstat_delete(index); - /* Don't unset krdc->intf here, unlike _rdc_enable */ - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_dev_close(krdc); - rdc_close_direct(krdc); - rdc_destroy_svinfo(svp); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - - /* Configured but not enabled */ - ASSERT(IS_CONFIGURED(krdc) && !IS_ENABLED(urdc)); - - remove_from_group(krdc); - - if (IS_MANY(krdc) || IS_MULTI(krdc)) - remove_from_many(krdc); - - rdc_u_init(urdc); - - ASSERT(krdc->type_flag & RDC_CONFIGURED); - krdc->type_flag = 0; - wakeup_busy(krdc); - - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_resume(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - char itmp[10]; - int rc; - - if (!(uparms->options & RDC_OPT_SYNC) && - !(uparms->options & RDC_OPT_ASYNC)) { - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - rc = RDC_EEINVAL; - goto done; - } - - if (!(uparms->options & RDC_OPT_PRIMARY) && - !(uparms->options & RDC_OPT_SECONDARY)) { - (void) spcs_s_inttostring( - uparms->options, itmp, sizeof (itmp), 1); - spcs_s_add(kstatus, RDC_EEINVAL, itmp); - rc = RDC_EEINVAL; - goto done; - } - - rc = _rdc_resume(uparms->rdc_set, uparms->options, kstatus); -done: - return (rc); -} - -/* - * if rdc_group_log is called because a volume has failed, - * we must disgard the queue to preserve write ordering. - * later perhaps, we can keep queuing, but we would have to - * rewrite the i/o path to acommodate that. currently, if there - * is a volume failure, the buffers are satisfied remotely and - * there is no way to satisfy them from the current diskq config - * phew, if we do that.. it will be difficult - */ -int -rdc_can_queue(rdc_k_info_t *krdc) -{ - rdc_k_info_t *p; - rdc_u_info_t *q; - - for (p = krdc->group_next; ; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (IS_STATE(q, RDC_VOL_FAILED)) - return (0); - if (p == krdc) - break; - } - return (1); -} - -/* - * wait here, until all in flight async i/o's have either - * finished or failed. Avoid the race with r_net_state() - * which tells remote end to log. - */ -void -rdc_inflwait(rdc_group_t *grp) -{ - int bail = RDC_CLNT_TMOUT * 2; /* to include retries */ - volatile int *inflitems; - - if (RDC_IS_DISKQ(grp)) - inflitems = (&(grp->diskq.inflitems)); - else - inflitems = (&(grp->ra_queue.inflitems)); - - while (*inflitems && (--bail > 0)) - delay(HZ); -} - -void -rdc_group_log(rdc_k_info_t *krdc, int flag, char *why) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_k_info_t *p; - rdc_u_info_t *q; - int do_group; - int sm, um, md; - disk_queue *dq; - - void (*flag_op)(rdc_u_info_t *urdc, int flag); - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - - if (!IS_ENABLED(urdc)) - return; - - rdc_many_enter(krdc); - - if ((flag & RDC_QUEUING) && (!IS_STATE(urdc, RDC_SYNCING)) && - (rdc_can_queue(krdc))) { - flag_op = rdc_set_flags; /* keep queuing, link error */ - flag &= ~RDC_FLUSH; - } else { - flag_op = rdc_clr_flags; /* stop queuing, user request */ - } - - do_group = 1; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) - do_group = 0; - else if ((urdc->group_name[0] == 0) || - (rdc_get_vflags(urdc) & RDC_LOGGING) || - (rdc_get_vflags(urdc) & RDC_SYNCING)) - do_group = 0; - if (do_group) { - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - if ((rdc_get_vflags(q) & RDC_LOGGING) || - (rdc_get_vflags(q) & RDC_SYNCING)) { - do_group = 0; - break; - } - } - } - if (!do_group && (flag & RDC_FORCE_GROUP)) - do_group = 1; - - rdc_many_exit(krdc); - dq = &krdc->group->diskq; - if (do_group) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR:Group point-in-time for grp: %s %s:%s", - urdc->group_name, urdc->primary.intf, urdc->secondary.intf); -#endif - DTRACE_PROBE(rdc_diskq_group_PIT); - - /* Set group logging at the same PIT under rdc_many_lock */ - rdc_many_enter(krdc); - rdc_set_flags_log(urdc, RDC_LOGGING, why); - if (RDC_IS_DISKQ(krdc->group)) - flag_op(urdc, RDC_QUEUING); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_set_flags_log(q, RDC_LOGGING, - "consistency group member following leader"); - if (RDC_IS_DISKQ(p->group)) - flag_op(q, RDC_QUEUING); - } - - rdc_many_exit(krdc); - - /* - * This can cause the async threads to fail, - * which in turn will call rdc_group_log() - * again. Release the lock and re-aquire. - */ - rdc_group_exit(krdc); - - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - if (!RDC_IS_DISKQ(krdc->group)) - RDC_ZERO_BITREF(krdc); - - rdc_inflwait(krdc->group); - - /* - * a little lazy, but neat. recall dump_alloc_bufs to - * ensure that the queue pointers & seq are reset properly - * after we have waited for inflight stuff - */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - - rdc_group_enter(krdc); - if (RDC_IS_DISKQ(krdc->group) && (!(flag & RDC_QUEUING))) { - /* fail or user request */ - RDC_ZERO_BITREF(krdc); - mutex_enter(&krdc->group->diskq.disk_qlock); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(&krdc->group->diskq.disk_qlock); - } - - if (flag & RDC_ALLREMOTE) { - /* Tell other node to start logging */ - if (krdc->lsrv && krdc->intf && !krdc->intf->if_down) - (void) rdc_net_state(krdc->index, - CCIO_ENABLELOG); - } - - if (flag & (RDC_ALLREMOTE | RDC_OTHERREMOTE)) { - rdc_many_enter(krdc); - for (p = krdc->group_next; p != krdc; - p = p->group_next) { - if (p->lsrv && krdc->intf && - !krdc->intf->if_down) { - (void) rdc_net_state(p->index, - CCIO_ENABLELOG); - } - } - rdc_many_exit(krdc); - } - - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - if (!IS_ENABLED(q)) - continue; - rdc_write_state(q); - } - } else { - /* No point in time is possible, just deal with single set */ - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - halt_sync(krdc); - } else { - if (rdc_net_getstate(krdc, &sm, &um, &md, TRUE) < 0) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_set_flags_log(urdc, RDC_LOGGING, - "failed to read remote state"); - - rdc_write_state(urdc); - while (rdc_dump_alloc_bufs_cd(krdc->index) - == EAGAIN) - delay(2); - if ((RDC_IS_DISKQ(krdc->group)) && - (!(flag & RDC_QUEUING))) { /* fail! */ - mutex_enter(QLOCK(dq)); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(QLOCK(dq)); - } - - return; - } - } - - if (rdc_get_vflags(urdc) & RDC_SYNCING) - return; - - if (RDC_IS_DISKQ(krdc->group)) - flag_op(urdc, RDC_QUEUING); - - if ((RDC_IS_DISKQ(krdc->group)) && - (!(flag & RDC_QUEUING))) { /* fail! */ - RDC_ZERO_BITREF(krdc); - mutex_enter(QLOCK(dq)); - rdc_init_diskq_header(krdc->group, - &krdc->group->diskq.disk_hdr); - SET_QNXTIO(dq, QHEAD(dq)); - mutex_exit(QLOCK(dq)); - } - - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_set_flags_log(urdc, RDC_LOGGING, why); - - rdc_write_state(urdc); - - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - if (!RDC_IS_DISKQ(krdc->group)) - RDC_ZERO_BITREF(krdc); - - rdc_inflwait(krdc->group); - /* - * a little lazy, but neat. recall dump_alloc_bufs to - * ensure that the queue pointers & seq are reset - * properly after we have waited for inflight stuff - */ - while (rdc_dump_alloc_bufs_cd(krdc->index) == EAGAIN) - delay(2); - - if (flag & RDC_ALLREMOTE) { - /* Tell other node to start logging */ - if (krdc->lsrv && krdc->intf && - !krdc->intf->if_down) { - (void) rdc_net_state(krdc->index, - CCIO_ENABLELOG); - } - } - } - } - /* - * just in case any threads were in flight during log cleanup - */ - if (RDC_IS_DISKQ(krdc->group)) { - mutex_enter(QLOCK(dq)); - cv_broadcast(&dq->qfullcv); - mutex_exit(QLOCK(dq)); - } -} - -static int -_rdc_log(rdc_k_info_t *krdc, rdc_set_t *rdc_set, spcs_s_info_t kstatus) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - rdc_srv_t *svp; - - rdc_group_enter(krdc); - if (rdc_check(krdc, rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, rdc_set->primary.file, - rdc_set->secondary.file); - return (RDC_EALREADY); - } - - svp = krdc->lsrv; - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(svp, &(urdc->primary.addr), - &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(svp, &(urdc->secondary.addr), - &(urdc->primary.addr), 0); - - if (!krdc->intf) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EADDTOIF, urdc->primary.intf, - urdc->secondary.intf); - return (RDC_EADDTOIF); - } - - rdc_group_log(krdc, RDC_FLUSH | RDC_ALLREMOTE, NULL); - - if (rdc_get_vflags(urdc) & RDC_SYNCING) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_ESYNCING, urdc->primary.file); - return (RDC_ESYNCING); - } - - rdc_group_exit(krdc); - - return (0); -} - -static int -rdc_log(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rc = _rdc_log(krdc, uparms->rdc_set, kstatus); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_wait(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int index; - int need_check = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - mutex_exit(&rdc_conf_lock); - return (0); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - if ((rdc_get_vflags(urdc) & (RDC_SYNCING | RDC_PRIMARY)) != - (RDC_SYNCING | RDC_PRIMARY)) { - rdc_group_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - return (0); - } - if (rdc_get_vflags(urdc) & RDC_SYNCING) { - need_check = 1; - } - rdc_group_exit(krdc); - - mutex_enter(&net_blk_lock); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) cv_wait_sig(&krdc->synccv, &net_blk_lock); - - mutex_exit(&net_blk_lock); - if (need_check) { - if (krdc->sync_done == RDC_COMPLETED) { - return (0); - } else if (krdc->sync_done == RDC_FAILED) { - return (EIO); - } - } - return (0); -} - - -static int -rdc_health(rdc_config_t *uparms, spcs_s_info_t kstatus, int *rvp) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - urdc = &rdc_u_info[index]; - if (rdc_isactive_if(&(urdc->primary.addr), &(urdc->secondary.addr))) - *rvp = RDC_ACTIVE; - else - *rvp = RDC_INACTIVE; - - rdc_group_exit(krdc); - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_reconfig(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = -2; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) - (void) rdc_reset_bitmap(krdc); - - /* Move to a new bitmap if necessary */ - if (strncmp(urdc->primary.bitmap, uparms->rdc_set->primary.bitmap, - NSC_MAXPATH) != 0) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - rc = rdc_move_bitmap(krdc, - uparms->rdc_set->primary.bitmap); - } else { - (void) strncpy(urdc->primary.bitmap, - uparms->rdc_set->primary.bitmap, NSC_MAXPATH); - /* simulate a succesful rdc_move_bitmap */ - rc = 0; - } - } - if (strncmp(urdc->secondary.bitmap, uparms->rdc_set->secondary.bitmap, - NSC_MAXPATH) != 0) { - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - (void) strncpy(urdc->secondary.bitmap, - uparms->rdc_set->secondary.bitmap, NSC_MAXPATH); - /* simulate a succesful rdc_move_bitmap */ - rc = 0; - } else { - rc = rdc_move_bitmap(krdc, - uparms->rdc_set->secondary.bitmap); - } - } - if (rc == -1) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EBMPRECONFIG, - uparms->rdc_set->secondary.intf, - uparms->rdc_set->secondary.file); - rc = RDC_EBMPRECONFIG; - goto done; - } - - /* - * At this point we fail any other type of reconfig - * if not in logging mode and we did not do a bitmap reconfig - */ - - if (!(rdc_get_vflags(urdc) & RDC_LOGGING) && rc == -2) { - /* no other changes possible unless logging */ - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_ENOTLOGGING, - uparms->rdc_set->primary.intf, - uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.intf, - uparms->rdc_set->secondary.file); - rc = RDC_ENOTLOGGING; - goto done; - } - rc = 0; - /* Change direct file if necessary */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - strncmp(urdc->direct_file, uparms->rdc_set->direct_file, - NSC_MAXPATH)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - rdc_group_exit(krdc); - goto notlogging; - } - rdc_close_direct(krdc); - (void) strncpy(urdc->direct_file, uparms->rdc_set->direct_file, - NSC_MAXPATH); - - if (urdc->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - else - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - } - } - - rdc_group_exit(krdc); - - /* Change group if necessary */ - if (strncmp(urdc->group_name, uparms->rdc_set->group_name, - NSC_MAXPATH) != 0) { - char orig_group[NSC_MAXPATH]; - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - mutex_enter(&rdc_conf_lock); - - (void) strncpy(orig_group, urdc->group_name, NSC_MAXPATH); - (void) strncpy(urdc->group_name, uparms->rdc_set->group_name, - NSC_MAXPATH); - - rc = change_group(krdc, uparms->options); - if (rc == RDC_EQNOADD) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EQNOADD, - uparms->rdc_set->disk_queue); - goto done; - } else if (rc < 0) { - (void) strncpy(urdc->group_name, orig_group, - NSC_MAXPATH); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EGROUP, - urdc->primary.intf, urdc->primary.file, - urdc->secondary.intf, urdc->secondary.file, - uparms->rdc_set->group_name); - rc = RDC_EGROUP; - goto done; - } - - mutex_exit(&rdc_conf_lock); - - if (rc >= 0) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - if (uparms->options & RDC_OPT_ASYNC) { - mutex_enter(&rdc_conf_lock); - krdc->type_flag |= RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - if (uparms->options & RDC_OPT_PRIMARY) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc( - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE), KM_SLEEP); - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } else { - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - if (krdc->bitmap_ref) { - kmem_free(krdc->bitmap_ref, - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - } - } - } else { - if ((((uparms->options & RDC_OPT_ASYNC) == 0) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) || - (((uparms->options & RDC_OPT_ASYNC) != 0) && - ((krdc->type_flag & RDC_ASYNCMODE) == 0))) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - - if (krdc->group->count > 1) { - spcs_s_add(kstatus, RDC_EGROUPMODE); - rc = RDC_EGROUPMODE; - goto done; - } - } - - /* Switch sync/async if necessary */ - if (krdc->group->count == 1) { - /* Only member of group. Can change sync/async */ - if (((uparms->options & RDC_OPT_ASYNC) == 0) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - /* switch to sync */ - mutex_enter(&rdc_conf_lock); - krdc->type_flag &= ~RDC_ASYNCMODE; - if (RDC_IS_DISKQ(krdc->group)) { - krdc->group->flags &= ~RDC_DISKQUE; - krdc->group->flags |= RDC_MEMQUE; - rdc_unintercept_diskq(krdc->group); - mutex_enter(&krdc->group->diskqmutex); - rdc_close_diskq(krdc->group); - mutex_exit(&krdc->group->diskqmutex); - bzero(&urdc->disk_queue, - sizeof (urdc->disk_queue)); - } - mutex_exit(&rdc_conf_lock); - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - if (krdc->bitmap_ref) { - kmem_free(krdc->bitmap_ref, - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE)); - krdc->bitmap_ref = NULL; - } - } else if (((uparms->options & RDC_OPT_ASYNC) != 0) && - ((krdc->type_flag & RDC_ASYNCMODE) == 0)) { - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) - goto notlogging; - /* switch to async */ - mutex_enter(&rdc_conf_lock); - krdc->type_flag |= RDC_ASYNCMODE; - mutex_exit(&rdc_conf_lock); - if (uparms->options & RDC_OPT_PRIMARY) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc( - (krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE), KM_SLEEP); - rdc_group_enter(krdc); - rdc_set_flags(urdc, RDC_ASYNC); - rdc_group_exit(krdc); - } - } - } - /* Reverse concept of primary and secondary */ - if ((uparms->options & RDC_OPT_REVERSE_ROLE) != 0) { - rdc_set_t rdc_set; - struct netbuf paddr, saddr; - - mutex_enter(&rdc_conf_lock); - - /* - * Disallow role reversal for advanced configurations - */ - - if (IS_MANY(krdc) || IS_MULTI(krdc)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EMASTER, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - return (RDC_EMASTER); - } - bzero((void *) &rdc_set, sizeof (rdc_set_t)); - dup_rdc_netbuf(&urdc->primary.addr, &saddr); - dup_rdc_netbuf(&urdc->secondary.addr, &paddr); - free_rdc_netbuf(&urdc->primary.addr); - free_rdc_netbuf(&urdc->secondary.addr); - dup_rdc_netbuf(&saddr, &urdc->secondary.addr); - dup_rdc_netbuf(&paddr, &urdc->primary.addr); - free_rdc_netbuf(&paddr); - free_rdc_netbuf(&saddr); - /* copy primary parts of urdc to rdc_set field by field */ - (void) strncpy(rdc_set.primary.intf, urdc->primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(rdc_set.primary.file, urdc->primary.file, - NSC_MAXPATH); - (void) strncpy(rdc_set.primary.bitmap, urdc->primary.bitmap, - NSC_MAXPATH); - - /* Now overwrite urdc primary */ - (void) strncpy(urdc->primary.intf, urdc->secondary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->primary.file, urdc->secondary.file, - NSC_MAXPATH); - (void) strncpy(urdc->primary.bitmap, urdc->secondary.bitmap, - NSC_MAXPATH); - - /* Now ovwewrite urdc secondary */ - (void) strncpy(urdc->secondary.intf, rdc_set.primary.intf, - MAX_RDC_HOST_SIZE); - (void) strncpy(urdc->secondary.file, rdc_set.primary.file, - NSC_MAXPATH); - (void) strncpy(urdc->secondary.bitmap, rdc_set.primary.bitmap, - NSC_MAXPATH); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) { - rdc_clr_flags(urdc, RDC_PRIMARY); - if (krdc->intf) { - krdc->intf->issecondary = 1; - krdc->intf->isprimary = 0; - krdc->intf->if_down = 1; - } - } else { - rdc_set_flags(urdc, RDC_PRIMARY); - if (krdc->intf) { - krdc->intf->issecondary = 0; - krdc->intf->isprimary = 1; - krdc->intf->if_down = 1; - } - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - ((krdc->type_flag & RDC_ASYNCMODE) != 0)) { - if (!krdc->bitmap_ref) - krdc->bitmap_ref = - (uchar_t *)kmem_zalloc((krdc->bitmap_size * - BITS_IN_BYTE * BMAP_REF_PREF_SIZE), - KM_SLEEP); - if (krdc->bitmap_ref == NULL) { - cmn_err(CE_WARN, - "!rdc_reconfig: bitmap_ref alloc %" - NSC_SZFMT " failed", - krdc->bitmap_size * BITS_IN_BYTE * - BMAP_REF_PREF_SIZE); - mutex_exit(&rdc_conf_lock); - return (-1); - } - } - - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) { - /* Primary, so reverse sync needed */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - } else if (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) { - /* Secondary, so forward sync needed */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_many_exit(krdc); - } - - /* - * rewrite bitmap header - */ - rdc_write_state(urdc); - mutex_exit(&rdc_conf_lock); - } - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); - -notlogging: - /* no other changes possible unless logging */ - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_ENOTLOGGING, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - return (RDC_ENOTLOGGING); -} - -static int -rdc_reset(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc = 0; - int index; - int cleared_error = 0; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - if ((rdc_get_vflags(urdc) & RDC_BMP_FAILED) && (krdc->bitmapfd)) { - if (rdc_reset_bitmap(krdc) == 0) - cleared_error++; - } - - /* Fix direct file if necessary */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && urdc->direct_file[0]) { - if (rdc_open_direct(krdc) == NULL) - rdc_set_flags(urdc, RDC_FCAL_FAILED); - else { - rdc_clr_flags(urdc, RDC_FCAL_FAILED); - cleared_error++; - } - } - - if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - cleared_error++; - rdc_many_exit(krdc); - } - - if (cleared_error) { - /* cleared an error so we should be in logging mode */ - rdc_set_flags_log(urdc, RDC_LOGGING, "set reset"); - } - rdc_group_exit(krdc); - - if ((rdc_get_vflags(urdc) & RDC_DISKQ_FAILED)) - rdc_unfail_diskq(krdc); - -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - - -static int -rdc_tunable(rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_k_info_t *p; - rdc_u_info_t *q; - int rc = 0; - int index; - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - urdc = &rdc_u_info[index]; - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - if (uparms->rdc_set->maxqfbas > 0) { - urdc->maxqfbas = uparms->rdc_set->maxqfbas; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->maxqfbas = urdc->maxqfbas; - rdc_write_state(q); - } - } - - if (uparms->rdc_set->maxqitems > 0) { - urdc->maxqitems = uparms->rdc_set->maxqitems; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->maxqitems = urdc->maxqitems; - rdc_write_state(q); - } - } - - if (uparms->options & RDC_OPT_SET_QNOBLOCK) { - disk_queue *que; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto done; - } - - que = &krdc->group->diskq; - mutex_enter(QLOCK(que)); - SET_QSTATE(que, RDC_QNOBLOCK); - /* queue will fail if this fails */ - (void) rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED); - mutex_exit(QLOCK(que)); - - } - - if (uparms->options & RDC_OPT_CLR_QNOBLOCK) { - disk_queue *que; - - if (!RDC_IS_DISKQ(krdc->group)) { - spcs_s_add(kstatus, RDC_EQNOQUEUE, urdc->primary.intf, - urdc->primary.file, urdc->secondary.intf, - urdc->secondary.file); - rc = RDC_EQNOQUEUE; - goto done; - } - que = &krdc->group->diskq; - mutex_enter(QLOCK(que)); - CLR_QSTATE(que, RDC_QNOBLOCK); - /* queue will fail if this fails */ - (void) rdc_stamp_diskq(krdc, 0, RDC_GROUP_LOCKED); - mutex_exit(QLOCK(que)); - - } - if (uparms->rdc_set->asyncthr > 0) { - urdc->asyncthr = uparms->rdc_set->asyncthr; - rdc_write_state(urdc); - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->asyncthr = urdc->asyncthr; - rdc_write_state(q); - } - } - - if (uparms->rdc_set->autosync >= 0) { - if (uparms->rdc_set->autosync == 0) - urdc->autosync = 0; - else - urdc->autosync = 1; - - rdc_write_state(urdc); - - /* Changed autosync, so update rest of the group */ - - for (p = krdc->group_next; p != krdc; p = p->group_next) { - q = &rdc_u_info[p->index]; - q->autosync = urdc->autosync; - rdc_write_state(q); - } - } - -done: - rdc_group_exit(krdc); - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -static int -rdc_status(void *arg, int mode, rdc_config_t *uparms, spcs_s_info_t kstatus) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - disk_queue *dqp; - int rc = 0; - int index; - char *ptr; - extern int rdc_status_copy32(const void *, void *, size_t, int); - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byname(uparms->rdc_set); - if (index >= 0) - krdc = &rdc_k_info[index]; - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - set_busy(krdc); - if (krdc->type_flag == 0) { - /* A resume or enable failed */ - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - return (RDC_EALREADY); - } - - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - if (rdc_check(krdc, uparms->rdc_set)) { - rdc_group_exit(krdc); - spcs_s_add(kstatus, RDC_EALREADY, uparms->rdc_set->primary.file, - uparms->rdc_set->secondary.file); - rc = RDC_EALREADY; - goto done; - } - - urdc = &rdc_u_info[index]; - - /* - * sneak out qstate in urdc->flags - * this is harmless because it's value is not used - * in urdc->flags. the real qstate is kept in - * group->diskq->disk_hdr.h.state - */ - if (RDC_IS_DISKQ(krdc->group)) { - dqp = &krdc->group->diskq; - if (IS_QSTATE(dqp, RDC_QNOBLOCK)) - urdc->flags |= RDC_QNOBLOCK; - } - - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { - ptr = (char *)arg + offsetof(struct rdc_config32, rdc_set); - rc = rdc_status_copy32(urdc, ptr, sizeof (struct rdc_set32), - mode); - } else { - ptr = (char *)arg + offsetof(struct rdc_config, rdc_set); - rc = ddi_copyout(urdc, ptr, sizeof (struct rdc_set), mode); - } - /* clear out qstate from flags */ - urdc->flags &= ~RDC_QNOBLOCK; - - if (rc) - rc = EFAULT; - - rdc_group_exit(krdc); -done: - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - return (rc); -} - -/* - * Overwrite the bitmap with one supplied by the - * user. - * Copy into all bitmaps that are tracking this volume. - */ - -int -rdc_bitmapset(int op, char *sechost, char *secdev, void *bmapaddr, int bmapsz, - nsc_off_t off, int mode) -{ - int rc; - rdc_k_info_t *krdc; - int *indexvec; - int index; - int indexit; - kmutex_t **grouplocks; - int i; - int groupind; - - if (off % FBA_SIZE(1)) { - /* Must be modulo FBA */ - cmn_err(CE_WARN, "!bitmapset: Offset is not on an FBA " - "boundary %llu", (unsigned long long)off); - return (EINVAL); - } - if (bmapsz % FBA_SIZE(1)) { - /* Must be modulo FBA */ - cmn_err(CE_WARN, "!bitmapset: Size is not on an FBA " - "boundary %d", bmapsz); - return (EINVAL); - } - - mutex_enter(&rdc_conf_lock); - index = rdc_lookup_byhostdev(sechost, secdev); - if (index >= 0) { - krdc = &rdc_k_info[index]; - } - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { - rc = ENODEV; - mutex_exit(&rdc_conf_lock); - return (rc); - } - indexvec = kmem_alloc(rdc_max_sets * sizeof (int), KM_SLEEP); - grouplocks = kmem_alloc(rdc_max_sets * sizeof (kmutex_t *), KM_SLEEP); - - /* - * I now have this set, and I want to take the group - * lock on it, and all the group locks of all the - * sets on the many and multi-hop links. - * I have to take the many lock while traversing the - * many/multi links. - * I think I also need to set the busy count on this - * set, otherwise when I drop the conf_lock, what - * will stop some other process from coming in and - * issuing a disable? - */ - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - -retrylock: - groupind = 0; - indexit = 0; - rdc_many_enter(krdc); - /* - * Take this initial sets group lock first. - */ - if (!mutex_tryenter(&krdc->group->lock)) { - rdc_many_exit(krdc); - goto retrylock; - } - - grouplocks[groupind] = &krdc->group->lock; - groupind++; - - rc = rdc_checkforbitmap(index, off + bmapsz); - if (rc) { - goto done; - } - indexvec[indexit] = index; - indexit++; - if (IS_MANY(krdc)) { - rdc_k_info_t *ktmp; - - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - /* - * attempt to take the group lock, - * if we don't already have it. - */ - if (ktmp->group == NULL) { - rc = ENODEV; - goto done; - } - for (i = 0; i < groupind; i++) { - if (grouplocks[i] == &ktmp->group->lock) - /* already have the group lock */ - break; - } - /* - * didn't find our lock in our collection, - * attempt to take group lock. - */ - if (i >= groupind) { - if (!mutex_tryenter(&ktmp->group->lock)) { - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - goto retrylock; - } - grouplocks[groupind] = &ktmp->group->lock; - groupind++; - } - rc = rdc_checkforbitmap(ktmp->index, off + bmapsz); - if (rc == 0) { - indexvec[indexit] = ktmp->index; - indexit++; - } else { - goto done; - } - } - } - if (IS_MULTI(krdc)) { - rdc_k_info_t *kmulti = krdc->multi_next; - - if (kmulti->group == NULL) { - rc = ENODEV; - goto done; - } - /* - * This can't be in our group already. - */ - if (!mutex_tryenter(&kmulti->group->lock)) { - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - goto retrylock; - } - grouplocks[groupind] = &kmulti->group->lock; - groupind++; - - rc = rdc_checkforbitmap(kmulti->index, off + bmapsz); - if (rc == 0) { - indexvec[indexit] = kmulti->index; - indexit++; - } else { - goto done; - } - } - rc = rdc_installbitmap(op, bmapaddr, bmapsz, off, mode, indexvec, - indexit); -done: - for (i = 0; i < groupind; i++) { - mutex_exit(grouplocks[i]); - } - rdc_many_exit(krdc); - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - kmem_free(indexvec, rdc_max_sets * sizeof (int)); - kmem_free(grouplocks, rdc_max_sets * sizeof (kmutex_t *)); - return (rc); -} - -static int -rdc_checkforbitmap(int index, nsc_off_t limit) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - return (EIO); - } - if (!(rdc_get_vflags(urdc) & RDC_LOGGING)) { - return (ENXIO); - } - if (krdc->dcio_bitmap == NULL) { - cmn_err(CE_WARN, "!checkforbitmap: No bitmap for set (%s:%s)", - urdc->secondary.intf, urdc->secondary.file); - return (ENOENT); - } - if (limit > krdc->bitmap_size) { - cmn_err(CE_WARN, "!checkbitmap: Bitmap exceeded, " - "incore %" NSC_SZFMT " user supplied %" NSC_SZFMT - " for set (%s:%s)", krdc->bitmap_size, - limit, urdc->secondary.intf, urdc->secondary.file); - return (ENOSPC); - } - return (0); -} - - - -/* - * Copy the user supplied bitmap to this set. - */ -static int -rdc_installbitmap(int op, void *bmapaddr, int bmapsz, - nsc_off_t off, int mode, int *vec, int veccnt) -{ - int rc; - nsc_off_t sfba; - nsc_off_t efba; - nsc_off_t fba; - void *ormem = NULL; - int len; - int left; - int copied; - int index; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - rc = 0; - ormem = kmem_alloc(RDC_MAXDATA, KM_SLEEP); - left = bmapsz; - copied = 0; - while (left > 0) { - if (left > RDC_MAXDATA) { - len = RDC_MAXDATA; - } else { - len = left; - } - if (ddi_copyin((char *)bmapaddr + copied, ormem, - len, mode)) { - cmn_err(CE_WARN, "!installbitmap: Copyin failed"); - rc = EFAULT; - goto out; - } - sfba = FBA_NUM(off + copied); - efba = FBA_NUM(off + copied + len); - for (index = 0; index < veccnt; index++) { - krdc = &rdc_k_info[vec[index]]; - urdc = &rdc_u_info[vec[index]]; - - mutex_enter(&krdc->bmapmutex); - if (op == RDC_BITMAPSET) { - bcopy(ormem, krdc->dcio_bitmap + off + copied, - len); - } else { - rdc_lor(ormem, - krdc->dcio_bitmap + off + copied, len); - } - /* - * Maybe this should be just done once outside of - * the the loop? (Less work, but leaves a window - * where the bits_set doesn't match the bitmap). - */ - urdc->bits_set = RDC_COUNT_BITMAP(krdc); - mutex_exit(&krdc->bmapmutex); - if (krdc->bitmap_write > 0) { - for (fba = sfba; fba < efba; fba++) { - if (rc = rdc_write_bitmap_fba(krdc, - fba)) { - - cmn_err(CE_WARN, - "!installbitmap: " - "write_bitmap_fba failed " - "on fba number %" NSC_SZFMT - " set %s:%s", fba, - urdc->secondary.intf, - urdc->secondary.file); - goto out; - } - } - } - } - copied += len; - left -= len; - } -out: - kmem_free(ormem, RDC_MAXDATA); - return (rc); -} - -/* - * _rdc_config - */ -int -_rdc_config(void *arg, int mode, spcs_s_info_t kstatus, int *rvp) -{ - int rc = 0; - struct netbuf fsvaddr, tsvaddr; - struct knetconfig *knconf; - char *p = NULL, *pf = NULL; - struct rdc_config *uap; - STRUCT_DECL(knetconfig, knconf_tmp); - STRUCT_DECL(rdc_config, uparms); - int enable, disable; - int cmd; - - - STRUCT_HANDLE(rdc_set, rs); - STRUCT_HANDLE(rdc_addr, pa); - STRUCT_HANDLE(rdc_addr, sa); - - STRUCT_INIT(uparms, mode); - - bzero(STRUCT_BUF(uparms), STRUCT_SIZE(uparms)); - bzero(&fsvaddr, sizeof (fsvaddr)); - bzero(&tsvaddr, sizeof (tsvaddr)); - - knconf = NULL; - - if (ddi_copyin(arg, STRUCT_BUF(uparms), STRUCT_SIZE(uparms), mode)) { - return (EFAULT); - } - - STRUCT_SET_HANDLE(rs, mode, STRUCT_FGETP(uparms, rdc_set)); - STRUCT_SET_HANDLE(pa, mode, STRUCT_FADDR(rs, primary)); - STRUCT_SET_HANDLE(sa, mode, STRUCT_FADDR(rs, secondary)); - cmd = STRUCT_FGET(uparms, command); - if (cmd == RDC_CMD_ENABLE || cmd == RDC_CMD_RESUME) { - fsvaddr.len = STRUCT_FGET(pa, addr.len); - fsvaddr.maxlen = STRUCT_FGET(pa, addr.maxlen); - fsvaddr.buf = kmem_zalloc(fsvaddr.len, KM_SLEEP); - - if (ddi_copyin(STRUCT_FGETP(pa, addr.buf), - fsvaddr.buf, fsvaddr.len, mode)) { - kmem_free(fsvaddr.buf, fsvaddr.len); -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed primary.addr 2"); -#endif - return (EFAULT); - } - - - tsvaddr.len = STRUCT_FGET(sa, addr.len); - tsvaddr.maxlen = STRUCT_FGET(sa, addr.maxlen); - tsvaddr.buf = kmem_zalloc(tsvaddr.len, KM_SLEEP); - - if (ddi_copyin(STRUCT_FGETP(sa, addr.buf), - tsvaddr.buf, tsvaddr.len, mode)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed secondary addr"); -#endif - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - return (EFAULT); - } - } else { - fsvaddr.len = 0; - fsvaddr.maxlen = 0; - fsvaddr.buf = kmem_zalloc(fsvaddr.len, KM_SLEEP); - tsvaddr.len = 0; - tsvaddr.maxlen = 0; - tsvaddr.buf = kmem_zalloc(tsvaddr.len, KM_SLEEP); - } - - if (STRUCT_FGETP(uparms, rdc_set->netconfig) != NULL) { - STRUCT_INIT(knconf_tmp, mode); - knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); - if (ddi_copyin(STRUCT_FGETP(uparms, rdc_set->netconfig), - STRUCT_BUF(knconf_tmp), STRUCT_SIZE(knconf_tmp), mode)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed netconfig"); -#endif - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - kmem_free(knconf, sizeof (*knconf)); - return (EFAULT); - } - - knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); - knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); - knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); - -#ifndef _SunOS_5_6 - if ((mode & DATAMODEL_LP64) == 0) { - knconf->knc_rdev = - expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); - } else { -#endif - knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); -#ifndef _SunOS_5_6 - } -#endif - - pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); - p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); - rc = ddi_copyin(knconf->knc_protofmly, pf, KNC_STRSIZE, mode); - if (rc) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed parms protofmly"); -#endif - rc = EFAULT; - goto out; - } - rc = ddi_copyin(knconf->knc_proto, p, KNC_STRSIZE, mode); - if (rc) { -#ifdef DEBUG - cmn_err(CE_WARN, "!copyin failed parms proto"); -#endif - rc = EFAULT; - goto out; - } - knconf->knc_protofmly = pf; - knconf->knc_proto = p; - } /* !NULL netconfig */ - - uap = kmem_alloc(sizeof (*uap), KM_SLEEP); - - /* copy relevant parts of rdc_config to uap field by field */ - - (void) strncpy(uap->rdc_set[0].primary.intf, STRUCT_FGETP(pa, intf), - MAX_RDC_HOST_SIZE); - (void) strncpy(uap->rdc_set[0].primary.file, STRUCT_FGETP(pa, file), - NSC_MAXPATH); - (void) strncpy(uap->rdc_set[0].primary.bitmap, STRUCT_FGETP(pa, bitmap), - NSC_MAXPATH); - uap->rdc_set[0].netconfig = knconf; - uap->rdc_set[0].flags = STRUCT_FGET(uparms, rdc_set->flags); - uap->rdc_set[0].index = STRUCT_FGET(uparms, rdc_set->index); - uap->rdc_set[0].setid = STRUCT_FGET(uparms, rdc_set->setid); - uap->rdc_set[0].sync_pos = STRUCT_FGET(uparms, rdc_set->sync_pos); - uap->rdc_set[0].volume_size = STRUCT_FGET(uparms, rdc_set->volume_size); - uap->rdc_set[0].bits_set = STRUCT_FGET(uparms, rdc_set->bits_set); - uap->rdc_set[0].autosync = STRUCT_FGET(uparms, rdc_set->autosync); - uap->rdc_set[0].maxqfbas = STRUCT_FGET(uparms, rdc_set->maxqfbas); - uap->rdc_set[0].maxqitems = STRUCT_FGET(uparms, rdc_set->maxqitems); - uap->rdc_set[0].asyncthr = STRUCT_FGET(uparms, rdc_set->asyncthr); - uap->rdc_set[0].syshostid = STRUCT_FGET(uparms, rdc_set->syshostid); - uap->rdc_set[0].primary.addr = fsvaddr; /* struct copy */ - uap->rdc_set[0].secondary.addr = tsvaddr; /* struct copy */ - - (void) strncpy(uap->rdc_set[0].secondary.intf, STRUCT_FGETP(sa, intf), - MAX_RDC_HOST_SIZE); - (void) strncpy(uap->rdc_set[0].secondary.file, STRUCT_FGETP(sa, file), - NSC_MAXPATH); - (void) strncpy(uap->rdc_set[0].secondary.bitmap, - STRUCT_FGETP(sa, bitmap), NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].direct_file, - STRUCT_FGETP(rs, direct_file), NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].group_name, STRUCT_FGETP(rs, group_name), - NSC_MAXPATH); - - (void) strncpy(uap->rdc_set[0].disk_queue, STRUCT_FGETP(rs, disk_queue), - NSC_MAXPATH); - - uap->command = STRUCT_FGET(uparms, command); - uap->options = STRUCT_FGET(uparms, options); - - enable = (uap->command == RDC_CMD_ENABLE || - uap->command == RDC_CMD_RESUME); - disable = (uap->command == RDC_CMD_DISABLE || - uap->command == RDC_CMD_SUSPEND); - - /* - * Initialise the threadset if it has not already been done. - * - * This has to be done now, not in rdcattach(), because - * rdcattach() can be called before nskernd is running (eg. - * boot -r) in which case the nst_init() would fail and hence - * the attach would fail. - * - * Threadset creation is locked by the rdc_conf_lock, - * destruction is inherently single threaded as it is done in - * _rdc_unload() which must be the last thing performed by - * rdcdetach(). - */ - - if (enable && _rdc_ioset == NULL) { - mutex_enter(&rdc_conf_lock); - - if (_rdc_ioset == NULL) { - rc = rdc_thread_configure(); - } - - mutex_exit(&rdc_conf_lock); - - if (rc || _rdc_ioset == NULL) { - spcs_s_add(kstatus, RDC_ENOTHREADS); - rc = RDC_ENOTHREADS; - goto outuap; - } - } - switch (uap->command) { - case RDC_CMD_ENABLE: - rc = rdc_enable(uap, kstatus); - break; - case RDC_CMD_DISABLE: - rc = rdc_disable(uap, kstatus); - break; - case RDC_CMD_COPY: - rc = rdc_sync(uap, kstatus); - break; - case RDC_CMD_LOG: - rc = rdc_log(uap, kstatus); - break; - case RDC_CMD_RECONFIG: - rc = rdc_reconfig(uap, kstatus); - break; - case RDC_CMD_RESUME: - rc = rdc_resume(uap, kstatus); - break; - case RDC_CMD_SUSPEND: - rc = rdc_suspend(uap, kstatus); - break; - case RDC_CMD_TUNABLE: - rc = rdc_tunable(uap, kstatus); - break; - case RDC_CMD_WAIT: - rc = rdc_wait(uap, kstatus); - break; - case RDC_CMD_HEALTH: - rc = rdc_health(uap, kstatus, rvp); - break; - case RDC_CMD_STATUS: - rc = rdc_status(arg, mode, uap, kstatus); - break; - case RDC_CMD_RESET: - rc = rdc_reset(uap, kstatus); - break; - case RDC_CMD_ADDQ: - rc = rdc_add_diskq(uap, kstatus); - break; - case RDC_CMD_REMQ: - if ((rc = rdc_rem_diskq(uap, kstatus)) != 0) - break; - /* FALLTHRU */ - case RDC_CMD_KILLQ: - rc = rdc_kill_diskq(uap, kstatus); - break; - case RDC_CMD_INITQ: - rc = rdc_init_diskq(uap, kstatus); - break; - - default: - rc = EINVAL; - break; - } - - /* - * Tune the threadset size after a successful rdc_set addition - * or removal. - */ - if ((enable || disable) && rc == 0) { - mutex_enter(&rdc_conf_lock); - rdc_thread_tune(enable ? 2 : -2); - mutex_exit(&rdc_conf_lock); - } -outuap: - kmem_free(uap, sizeof (*uap)); -out: - kmem_free(fsvaddr.buf, fsvaddr.len); - kmem_free(tsvaddr.buf, tsvaddr.len); - if (pf) - kmem_free(pf, KNC_STRSIZE); - if (p) - kmem_free(p, KNC_STRSIZE); - if (knconf) - kmem_free(knconf, sizeof (*knconf)); - return (rc); -} - - -/* - * krdc->group->lock held on entry to halt_sync() - */ -static void -halt_sync(rdc_k_info_t *krdc) -{ - rdc_u_info_t *urdc = &rdc_u_info[krdc->index]; - - ASSERT(MUTEX_HELD(&krdc->group->lock)); - ASSERT(IS_ENABLED(urdc)); - - /* - * If a sync is in progress, halt it - */ - if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && - (krdc->aux_state & RDC_AUXSYNCIP)) { - krdc->disk_status = 1; - - while (krdc->disk_status == 1) { - if (cv_wait_sig(&krdc->haltcv, &krdc->group->lock) == 0) - break; - } - } -} - -/* - * return size in blocks - */ -uint64_t -mirror_getsize(int index) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - int rc, rs; - nsc_size_t size; - - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - rc = _rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL); - rs = nsc_partsize(RDC_U_FD(krdc), &size); - urdc->volume_size = size; - if (rc == 0) - _rdc_rlse_devs(krdc, RDC_RAW); - - return (rs == 0 ? urdc->volume_size : 0); -} - - -/* - * Create a new dataset for this transfer, and add it to the list - * of datasets via the net_dataset pointer in the krdc. - */ -rdc_net_dataset_t * -rdc_net_add_set(int index) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_net_dataset_t *dset; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_add_set: bad index %d", index); - return (NULL); - } - krdc = &rdc_k_info[index]; - urdc = &rdc_u_info[index]; - - dset = kmem_alloc(sizeof (*dset), KM_NOSLEEP); - if (dset == NULL) { - cmn_err(CE_NOTE, "!rdc_net_add_set: kmem_alloc failed"); - return (NULL); - } - RDC_DSMEMUSE(sizeof (*dset)); - dset->inuse = 1; - dset->nitems = 0; - dset->delpend = 0; - dset->head = NULL; - dset->tail = NULL; - mutex_enter(&krdc->dc_sleep); - - if (!IS_ENABLED(urdc)) { - /* raced with a disable command */ - kmem_free(dset, sizeof (*dset)); - RDC_DSMEMUSE(-sizeof (*dset)); - mutex_exit(&krdc->dc_sleep); - return (NULL); - } - /* - * Shared the id generator, (and the locks). - */ - mutex_enter(&rdc_net_hnd_id_lock); - if (++rdc_net_hnd_id == 0) - rdc_net_hnd_id = 1; - dset->id = rdc_net_hnd_id; - mutex_exit(&rdc_net_hnd_id_lock); - -#ifdef DEBUG - if (krdc->net_dataset != NULL) { - rdc_net_dataset_t *dset2; - for (dset2 = krdc->net_dataset; dset2; dset2 = dset2->next) { - if (dset2->id == dset->id) { - cmn_err(CE_PANIC, - "rdc_net_add_set duplicate id %p:%d %p:%d", - (void *)dset, dset->id, - (void *)dset2, dset2->id); - } - } - } -#endif - dset->next = krdc->net_dataset; - krdc->net_dataset = dset; - mutex_exit(&krdc->dc_sleep); - - return (dset); -} - -/* - * fetch the previously added dataset. - */ -rdc_net_dataset_t * -rdc_net_get_set(int index, int id) -{ - rdc_k_info_t *krdc; - rdc_net_dataset_t *dset; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_get_set: bad index %d", index); - return (NULL); - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - - dset = krdc->net_dataset; - while (dset && (dset->id != id)) - dset = dset->next; - - if (dset) { - dset->inuse++; - } - - mutex_exit(&krdc->dc_sleep); - return (dset); -} - -/* - * Decrement the inuse counter. Data may be freed. - */ -void -rdc_net_put_set(int index, rdc_net_dataset_t *dset) -{ - rdc_k_info_t *krdc; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_put_set: bad index %d", index); - return; - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - dset->inuse--; - ASSERT(dset->inuse >= 0); - if ((dset->inuse == 0) && (dset->delpend)) { - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -/* - * Mark that we are finished with this set. Decrement inuse - * counter, mark as needing deletion, and - * remove from linked list. - */ -void -rdc_net_del_set(int index, rdc_net_dataset_t *dset) -{ - rdc_k_info_t *krdc; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc_net_del_set: bad index %d", index); - return; - } - krdc = &rdc_k_info[index]; - - mutex_enter(&krdc->dc_sleep); - dset->inuse--; - ASSERT(dset->inuse >= 0); - dset->delpend = 1; - if (dset->inuse == 0) { - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -/* - * free all the memory associated with this set, and remove from - * list. - * Enters and exits with dc_sleep lock held. - */ - -void -rdc_net_free_set(rdc_k_info_t *krdc, rdc_net_dataset_t *dset) -{ - rdc_net_dataset_t **dsetp; -#ifdef DEBUG - int found = 0; -#endif - - ASSERT(MUTEX_HELD(&krdc->dc_sleep)); - ASSERT(dset); - for (dsetp = &krdc->net_dataset; *dsetp; dsetp = &((*dsetp)->next)) { - if (*dsetp == dset) { - *dsetp = dset->next; -#ifdef DEBUG - found = 1; -#endif - break; - } - } - -#ifdef DEBUG - if (found == 0) { - cmn_err(CE_WARN, "!rdc_net_free_set: Unable to find " - "dataset 0x%p in krdc list", (void *)dset); - } -#endif - /* - * unlinked from list. Free all the data - */ - rdc_ditemsfree(dset); - /* - * free my core. - */ - kmem_free(dset, sizeof (*dset)); - RDC_DSMEMUSE(-sizeof (*dset)); -} - - -/* - * Free all the dataitems and the data it points to. - */ -static void -rdc_ditemsfree(rdc_net_dataset_t *dset) -{ - rdc_net_dataitem_t *ditem; - rdc_net_dataitem_t *nitem; - - ditem = dset->head; - - while (ditem) { - nitem = ditem->next; - kmem_free(ditem->dptr, ditem->mlen); - RDC_DSMEMUSE(-ditem->mlen); - dset->nitems--; - kmem_free(ditem, sizeof (*ditem)); - RDC_DSMEMUSE(-sizeof (*ditem)); - ditem = nitem; - } - ASSERT(dset->nitems == 0); -} - -/* - * allocate and initialize a rdc_aio_t - */ -rdc_aio_t * -rdc_aio_tbuf_get(void *n, void *h, int pos, int len, int flag, int index, int s) -{ - rdc_aio_t *p; - - p = kmem_zalloc(sizeof (rdc_aio_t), KM_NOSLEEP); - if (p == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!_rdcaiotbufget: kmem_alloc failed bp aio"); -#endif - return (NULL); - } else { - p->next = n; /* overload */ - p->handle = h; - p->pos = pos; - p->qpos = -1; - p->len = len; - p->flag = flag; - p->index = index; - p->iostatus = s; /* overload */ - /* set up seq later, in case thr create fails */ - } - return (p); -} - -/* - * rdc_aio_buf_get - * get an aio_buf - */ -aio_buf_t * -rdc_aio_buf_get(rdc_buf_t *h, int index) -{ - aio_buf_t *p; - - if (index >= rdc_max_sets) { - cmn_err(CE_NOTE, "!rdc: rdc_aio_buf_get bad index %x", index); - return (NULL); - } - - mutex_enter(&h->aio_lock); - - p = h->rdc_anon; - while (p && (p->kindex != index)) - p = p->next; - - mutex_exit(&h->aio_lock); - return (p); -} - -/* - * rdc_aio_buf_del - * delete a aio_buf - */ -void -rdc_aio_buf_del(rdc_buf_t *h, rdc_k_info_t *krdc) -{ - aio_buf_t *p, **pp; - - mutex_enter(&h->aio_lock); - - p = NULL; - for (pp = &h->rdc_anon; *pp; pp = &((*pp)->next)) { - if ((*pp)->kindex == krdc->index) { - p = *pp; - break; - } - } - - if (p) { - *pp = p->next; - kmem_free(p, sizeof (*p)); - } - mutex_exit(&h->aio_lock); -} - -/* - * rdc_aio_buf_add - * Add a aio_buf. - */ -aio_buf_t * -rdc_aio_buf_add(int index, rdc_buf_t *h) -{ - aio_buf_t *p; - - p = kmem_zalloc(sizeof (*p), KM_NOSLEEP); - if (p == NULL) { - cmn_err(CE_NOTE, "!rdc_aio_buf_add: kmem_alloc failed"); - return (NULL); - } - - p->rdc_abufp = NULL; - p->kindex = index; - - mutex_enter(&h->aio_lock); - p->next = h->rdc_anon; - h->rdc_anon = p; - mutex_exit(&h->aio_lock); - return (p); -} - -/* - * kmemalloc a new group structure and setup the common - * fields. - */ -static rdc_group_t * -rdc_newgroup() -{ - rdc_group_t *group; - - group = kmem_zalloc(sizeof (rdc_group_t), KM_SLEEP); - group->diskq.lastio = kmem_zalloc(sizeof (rdc_aio_t), KM_SLEEP); - group->count = 1; - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_init(&group->lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->ra_queue.net_qlock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskqmutex, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskq.disk_qlock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->diskq.head_lock, NULL, MUTEX_DRIVER, NULL); - mutex_init(&group->addthrnumlk, NULL, MUTEX_DRIVER, NULL); - cv_init(&group->unregistercv, NULL, CV_DRIVER, NULL); - cv_init(&group->asyncqcv, NULL, CV_DRIVER, NULL); - cv_init(&group->diskq.busycv, NULL, CV_DRIVER, NULL); - cv_init(&group->diskq.qfullcv, NULL, CV_DRIVER, NULL); - cv_init(&group->ra_queue.qfcv, NULL, CV_DRIVER, NULL); - group->ra_queue.qfill_sleeping = RDC_QFILL_DEAD; - group->diskq.busycnt = 0; - ASSERT(group->synccount == 0); /* group was kmem_zalloc'ed */ - - /* - * add default number of threads to the flusher thread set, plus - * one extra thread for the disk queue flusher - */ - if (nst_add_thread(_rdc_flset, 3) != 3) - cmn_err(CE_NOTE, "!rdc_newgroup: nst_add_thread failed"); - - return (group); -} - -void -rdc_delgroup(rdc_group_t *group) -{ - - ASSERT(group->asyncstall == 0); - ASSERT(group->rdc_thrnum == 0); - ASSERT(group->count == 0); - ASSERT(MUTEX_HELD(&rdc_many_lock)); - - mutex_enter(&group->ra_queue.net_qlock); - rdc_sleepqdiscard(group); - mutex_exit(&group->ra_queue.net_qlock); - - /* try to remove flusher threads that this group added to _rdc_flset */ - if (nst_del_thread(_rdc_flset, group->rdc_addthrnum + 3) != - group->rdc_addthrnum + 3) - cmn_err(CE_NOTE, "!rdc_delgroup: nst_del_thread failed"); - - mutex_destroy(&group->lock); - mutex_destroy(&group->ra_queue.net_qlock); - mutex_destroy(&group->diskqmutex); - mutex_destroy(&group->diskq.disk_qlock); - mutex_destroy(&group->diskq.head_lock); - mutex_destroy(&group->addthrnumlk); - cv_destroy(&group->unregistercv); - cv_destroy(&group->asyncqcv); - cv_destroy(&group->diskq.busycv); - cv_destroy(&group->diskq.qfullcv); - cv_destroy(&group->ra_queue.qfcv); - kmem_free(group->diskq.lastio, sizeof (rdc_aio_t)); - kmem_free(group, sizeof (rdc_group_t)); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_io.h b/usr/src/uts/common/avs/ns/rdc/rdc_io.h deleted file mode 100644 index 6acf96686c..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_io.h +++ /dev/null @@ -1,1009 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_IO_H -#define _RDC_IO_H - -#ifdef __cplusplus -extern "C" { -#endif - - -#include <sys/unistat/spcs_s.h> -#ifdef DS_DDICT -#define bool_t int -#endif -#include <sys/nsctl/rdc_prot.h> -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/rdc_ioctl.h> - -/* - * Definitions for kstats - */ -#define RDC_MKSTAT_MAXSETS "maxsets" -#define RDC_MKSTAT_MAXFBAS "maxfbas" -#define RDC_MKSTAT_RPC_TIMEOUT "rpc_timeout" -#define RDC_MKSTAT_HEALTH_THRES "health_thres" -#define RDC_MKSTAT_BITMAP_WRITES "bitmap_writes" -#define RDC_MKSTAT_CLNT_COTS_CALLS "clnt_cots_calls" -#define RDC_MKSTAT_CLNT_CLTS_CALLS "clnt_clts_calls" -#define RDC_MKSTAT_SVC_COTS_CALLS "svc_cots_calls" -#define RDC_MKSTAT_SVC_CLTS_CALLS "svc_clts_calls" -#define RDC_MKSTAT_BITMAP_REF_DELAY "bitmap_ref_delay" - -#define RDC_IKSTAT_FLAGS "flags" -#define RDC_IKSTAT_SYNCFLAGS "syncflags" -#define RDC_IKSTAT_BMPFLAGS "bmpflags" -#define RDC_IKSTAT_SYNCPOS "syncpos" -#define RDC_IKSTAT_VOLSIZE "volsize" -#define RDC_IKSTAT_BITSSET "bitsset" -#define RDC_IKSTAT_AUTOSYNC "autosync" -#define RDC_IKSTAT_MAXQFBAS "maxqfbas" -#define RDC_IKSTAT_MAXQITEMS "maxqitems" -#define RDC_IKSTAT_FILE "primary_vol" -#define RDC_IKSTAT_SECFILE "secondary_vol" -#define RDC_IKSTAT_BITMAP "bitmap" -#define RDC_IKSTAT_PRIMARY_HOST "primary_host" -#define RDC_IKSTAT_SECONDARY_HOST "secondary_host" -#define RDC_IKSTAT_TYPE_FLAG "type_flag" -#define RDC_IKSTAT_BMP_SIZE "bmp_size" -#define RDC_IKSTAT_DISK_STATUS "disk_status" -#define RDC_IKSTAT_IF_DOWN "if_down" -#define RDC_IKSTAT_IF_RPC_VERSION "if_rpc_version" -#define RDC_IKSTAT_ASYNC_THROTTLE_DELAY "async_throttle_delay" -#define RDC_IKSTAT_ASYNC_BLOCK_HWM "async_block_hwm" -#define RDC_IKSTAT_ASYNC_ITEM_HWM "async_item_hwm" -#define RDC_IKSTAT_QUEUE_TYPE "async_queue_type" -#define RDC_IKSTAT_ASYNC_ITEMS "async_queue_items" -#define RDC_IKSTAT_ASYNC_BLOCKS "async_queue_blocks" - -/* - * Queue types - */ -#define RDC_DISKQUE 0X01 -#define RDC_MEMQUE 0x02 -#define RDC_NOQUE -1 - -#define RDC_ACTIVE 0x1 -#define RDC_INACTIVE 0x2 - -#ifdef _KERNEL - -extern nstset_t *_rdc_ioset; -extern nstset_t *_rdc_flset; - -#ifdef DEBUG -extern int RDC_MAX_SYNC_THREADS; -extern int rdc_maxthreads_last; -int num_sync_threads; -#else -#define RDC_MAX_SYNC_THREADS 8 -#endif -#ifdef DEBUG -#define RDC_AVAIL_THR_TUNE(n) \ - do { \ - if (rdc_maxthreads_last < RDC_MAX_SYNC_THREADS) { \ - (void) nst_add_thread(n.rdc_syncset, \ - RDC_MAX_SYNC_THREADS - rdc_maxthreads_last);\ - } \ - if (rdc_maxthreads_last > RDC_MAX_SYNC_THREADS) { \ - (void) nst_del_thread(n.rdc_syncset, \ - rdc_maxthreads_last - RDC_MAX_SYNC_THREADS); \ - } \ - n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \ - if (n.avail_thr < 0) { \ - n.avail_thr = 0; \ - } \ - rdc_maxthreads_last = RDC_MAX_SYNC_THREADS; \ - num_sync_threads = nst_nthread(n.rdc_syncset); \ - } while (0); -#else -#define RDC_AVAIL_THR_TUNE(n) \ - do { \ - n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \ - if (n.avail_thr < 0) \ - n.avail_thr = 0; \ - } while (0); - -#endif - -typedef struct syncloop_info { - int active_thr; - int avail_thr; /* should be MAX_RDC_SYNC_THREADS - active */ - kmutex_t lock; - nstset_t *rdc_syncset; -} sync_info_t; - -sync_info_t sync_info; - -/* - * Static server information - */ -typedef struct servinfo { - struct knetconfig *ri_knconf; /* bound TLI fd */ - struct netbuf ri_addr; /* server's address */ - struct sec_data *ri_secdata; /* sec data for rpcsec module */ - char *ri_hostname; /* server's hostname */ - int ri_hostnamelen; /* server's hostname length */ -} rdc_srv_t; - -/* - * Interface structure, including health monitoring. - */ -typedef struct rdc_if_s { - struct rdc_if_s *next; /* chain pointer */ - struct netbuf ifaddr; - struct netbuf r_ifaddr; - rdc_srv_t *srv; /* servinfo of server end */ - int if_down; /* i/f is down (set on primary) */ - int isprimary; /* this end is a primary */ - int issecondary; /* this end is a secondary */ - rpcvers_t rpc_version; /* RPC protocol version in use */ - int no_ping; /* set on secondary to hold off RPCs */ - int old_pulse; /* previous (current) pulse value */ - int new_pulse; /* new (incoming) pulse value */ - int deadness; /* how close to death are we? */ - volatile int exiting; /* daemon exit flag */ - time_t last; /* time of last ping */ -} rdc_if_t; - - -typedef struct rdc_aio_s { - struct rdc_aio_s *next; - nsc_buf_t *handle; - nsc_buf_t *qhandle; - nsc_off_t pos; - nsc_off_t qpos; - nsc_size_t len; - nsc_size_t orig_len; - int flag; - int iostatus; - int index; - uint_t seq; /* sequence on async Q */ -} rdc_aio_t; - -/* values for (rdc_aio_t *)->iostatus */ -enum { - RDC_IO_NONE = 0, /* not used */ - RDC_IO_INIT, /* io started */ - RDC_IO_DONE, /* io done successfully */ - RDC_IO_FAILED, /* io failed */ - RDC_IO_DISCARDED, /* io discarded */ - RDC_IO_CANCELLED /* group_log in progress */ -}; - - -#define RDC_MAX_QBLOCKS 16384 /* 8MB temporary q for diskq to flush to */ -#define RDC_LOW_QBLOCKS 13927 /* roughly 85% of queue full */ -#define RDC_HALF_MQUEUE 8192 /* half of the memory queue */ - -typedef struct netqueue { - rdc_aio_t *net_qhead; - rdc_aio_t *net_qtail; - kmutex_t net_qlock; - int hwmhit; /* queue full hit? reset after hwm */ - int qfill_sleeping; /* waiting for work? */ - int qfflags; /* diskq/memq flusher flags */ - kcondvar_t qfcv; /* for timed waits */ - volatile nsc_size_t blocks; /* number of FBAs in q */ - volatile uint64_t nitems; /* number of items in q */ - volatile int inflbls; /* number of inflight blocks */ - volatile int inflitems; /* number of inflight items */ - uint64_t nitems_hwm; /* highest items on queue */ - nsc_size_t blocks_hwm; /* highest blocks on queue */ - long throttle_delay; /* Number of times we delayed x 2 */ -} net_queue; - - -/* - * Bitmap header structures. - * These must be fixed size in all data models. - * If we ever support little-endian machines (eg. Intel) we will need - * to add byte-swapping logic. - */ - -typedef struct { - int32_t magic; - int32_t serial_mode; - int32_t use_mirror; - int32_t mirror_down; - int32_t sync_needed; - char bitmapname[NSC_MAXPATH]; - char filename[NSC_MAXPATH]; - int32_t volume_failed; -} rdc_headerv2_t; -#define RDC_HDR_V2 0x52444302 /* RDC2 */ - -#define RDC_SYNC 0x1 -#define RDC_REV_SYNC 0x2 -#define RDC_FULL_SYNC 0x3 - -#define RDC_FAILED 0x1 -#define RDC_COMPLETED 0x2 - -typedef struct { - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -} rdc_hdr_addr_t; - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int32_t maxqfbas; - int32_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ -} rdc_headerv3_t; -#define RDC_HDR_V3 0x52444303 /* RDC3 */ - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int32_t maxqfbas; - int32_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ - int32_t asyncthr; -} rdc_headerv4_t; -#define RDC_HDR_V4 0x52444304 /* RDC4 */ - -typedef struct { - int32_t magic; - rdc_hdr_addr_t primary; - rdc_hdr_addr_t secondary; - int32_t flags; - int32_t autosync; - int64_t maxqfbas; - int64_t maxqitems; - int32_t syshostid; /* for cluster bitmaps */ - int32_t asyncthr; - int32_t refcntsize; /* size in bytes of each refcount */ -} rdc_headerv5_t; -#define RDC_HDR_V5 0x52444305 /* RDC5 */ - -typedef rdc_headerv5_t rdc_header_t; /* Current header type */ -#define RDC_HDR_MAGIC RDC_HDR_V5 /* Current header magic number */ - -#endif /* _KERNEL */ - -#define RDC_BITMAP_FBA 1 /* Offset at which the bitmap starts */ -#define RDC_BITREF_FBA(krdc) (RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size)) - -#ifdef _KERNEL - -#define RDC_FUTILE_ATTEMPTS 50 -typedef struct aio_buf_s { - struct aio_buf_s *next; /* next aio_buf */ - nsc_buf_t *rdc_abufp; /* actual anon buf */ - int kindex; /* index we are attached to */ -} aio_buf_t; - -typedef struct rdc_thrsync { - kmutex_t lock; - int threads; - int complete; - kcondvar_t cv; -} rdc_thrsync_t; - -typedef struct sync_status_s { - int offset; - struct sync_status_s *next; -} sync_status_t; - -typedef struct rdc_syncthr { - nsc_off_t offset; - nsc_size_t len; - struct rdc_k_info *krdc; - sync_status_t *status; -} rdc_syncthr_t; - -/* - * RDC buffer header - */ - -typedef struct rdc_buf_s { - nsc_buf_t rdc_bufh; /* exported buffer header */ - nsc_buf_t *rdc_bufp; /* underlying buffer */ - aio_buf_t *rdc_anon; /* ANON async buffer */ - struct rdc_fd_s *rdc_fd; /* back link */ - size_t rdc_vsize; /* size of allocated nsc_vec_t */ - int rdc_flags; /* flags */ - kmutex_t aio_lock; /* lock for rdc_anon */ - rdc_thrsync_t rdc_sync; /* for thread syncronization */ -} rdc_buf_t; - -#define RDC_VEC_ALLOC 0x1 /* local kmem vector for remote io */ -#define RDC_ALLOC 0x2 /* rdc_bufp is nsc_buf_alloc'd */ -#define RDC_ASYNC_VEC 0x4 /* Keep tmp handle for async flusher */ -#define RDC_REMOTE_BUF 0x8 /* buffer alloc'd for remote io only */ -#define RDC_NULL_BUF 0x10 /* tell diskq to only store io_hdr */ -#define RDC_ASYNC_BUF 0x20 /* this buf is to an async vol */ -#define RDC_NULLBUFREAD 0x0f000000 /* read because RDC_NULL_BUF detected */ - -#define BUF_IS_ASYNC(h) (((h) != NULL) && (h)->rdc_flags & RDC_ASYNC_BUF) -#define RDC_REMOTE(h) (((h) != NULL) && ((h)->rdc_flags & RDC_REMOTE_BUF) && \ - (((h)->rdc_flags & RDC_ASYNC_VEC) == 0)) - -/* check a handle against a supplied pos/len pair */ - -#define RDC_HANDLE_LIMITS(h, p, l) \ - (((h)->sb_user & RDC_DISKQUE) || \ - ((p) >= (h)->sb_pos) && \ - (((p) + (l)) <= ((h)->sb_pos + (h)->sb_len))) - -/* check a dset against a supplied pos/len pair */ - -#define RDC_DSET_LIMITS(d, p, l) \ - (((p) >= (d)->pos) && \ - (((p) + (l)) <= ((d)->pos + (d)->fbalen))) - -/* - * RDC device info structures - */ - -typedef struct _rdc_info_dev_s { - nsc_fd_t *bi_fd; /* file descriptor */ - nsc_iodev_t *bi_iodev; /* I/O device structure */ - struct rdc_k_info *bi_krdc; /* back link */ - int bi_rsrv; /* Count of reserves held */ - int bi_orsrv; /* Reserves for other io provider */ - int bi_failed; /* Count of failed (faked) reserves */ - int bi_ofailed; /* Other io provider failed reserves */ - int bi_flag; /* Reserve flags */ -} _rdc_info_dev_t; - - -typedef struct rdc_info_dev_s { - struct rdc_info_dev_s *id_next; /* forward link */ - _rdc_info_dev_t id_cache_dev; /* cached device info */ - _rdc_info_dev_t id_raw_dev; /* raw device info */ - kmutex_t id_rlock; /* reserve/release lock */ - kcondvar_t id_rcv; /* nsc_release pending cv */ - int id_sets; /* # of sets referencing */ - int id_release; /* # of pending nsc_releases */ - int id_flag; /* flags */ -} rdc_info_dev_t; - - -typedef struct rdc_path_s { - nsc_path_t *rp_tok; /* nsc_register_path token */ - int rp_ref; /* # of rdc_fd_t's */ -} rdc_path_t; - - -/* - * Values for id_flag - */ -#define RDC_ID_CLOSING 0x1 /* device is closing */ - -#include <sys/nsctl/rdc_diskq.h> - -/* - * value for diskio.seq. - */ -#define RDC_NOSEQ (0) /* ignore sequence */ -#define RDC_NEWSEQ (1) /* start of sequence */ - -typedef struct rdc_sleepq { - struct rdc_sleepq *next; - uint_t seq; /* sequence in queue */ - int idx; /* idx number of request */ - int pindex; /* primary host set index */ - int sindex; /* secondary host set index */ - uint64_t qpos; /* offset on primary's queue */ - int nocache; /* cache flag to alloc_buf */ -} rdc_sleepq_t; - -/* - * RDC group structure - */ -typedef struct rdc_group { - int count; - int rdc_writer; - int unregistering; - kmutex_t lock; - net_queue ra_queue; /* io todo async queues */ - kcondvar_t iowaitcv; /* wait for flusher */ - kcondvar_t unregistercv; /* wait for unregister */ - int rdc_thrnum; /* number of threads */ - int rdc_addthrnum; /* number threads added to thr set */ - kmutex_t addthrnumlk; /* lock for above */ - rdc_sleepq_t *sleepq; /* head of waiting tasks */ - /* - * Dual use, the outgoing sequence number on the client. - * The next expected sequence number on the server. - * Protected by the ra_queue lock. - */ - uint_t seq; - /* - * Dual use, the last acknowledged sequence number. - * Used to ensure that the queue doesn't overflow on server - * and to stall transmissions on the client. - * Protected by the ra_queue lock. - */ - uint_t seqack; - int asyncstall; /* count of asleep threads */ - int asyncdis; /* discard stalled output */ - kcondvar_t asyncqcv; /* output stall here */ - int flags; /* memory or disk. status etc */ - disk_queue diskq; /* disk queue */ - nsc_fd_t *diskqfd; /* diskq handle */ - nsc_path_t *q_tok; /* q registration */ - int diskqrsrv; /* reserve count */ - kmutex_t diskqmutex; /* enables/disables/reserves */ - uint_t synccount; /* number of group members syncing */ -} rdc_group_t; - -/* group state */ -#define RDC_DISKQ_KILL 0x01 /* a force kill of diskq pending */ - -#define RDC_IS_DISKQ(grp) (grp->flags & RDC_DISKQUE) -#define RDC_IS_MEMQ(grp) (grp->flags & RDC_MEMQUE) - -/* - * These flags are used in the - * aux_state field, and are used to track: - * AUXSYNCIP: When the code has a sync thread running, used instead - * of the RC_SYNCING flag which gets cleared before the sync thread - * terminates. - * AUXWRITE: Set when rdc_sync_write_thr is running, so the rdc_unintercept - * code can wait until a one-to-many write has actually terminated. - */ -#define RDC_AUXSYNCIP 0x01 /* a sync is in progress */ -#define RDC_AUXWRITE 0x02 /* I've got a write in progress */ - - -/* - * RDC kernel-private information - */ -typedef struct rdc_k_info { - int index; /* Index into array */ - int remote_index; /* -1 means unknown */ - int type_flag; - int rpc_version; /* RPC version this set supps */ - int spare1; - nsc_off_t syncbitpos; - kmutex_t syncbitmutex; /* lock for syncbitpos */ - volatile int busy_count; /* ioctls in progress */ - volatile int sync_done; - int aux_state; /* syncing ,don't disable */ - rdc_thrsync_t syncs; /* _rdc_sync thread tracking */ - rdc_info_dev_t *devices; - nsc_iodev_t *iodev; /* I/O device structure */ - rdc_path_t cache_path; - rdc_path_t raw_path; - rdc_if_t *intf; - rdc_srv_t *lsrv; /* list of servinfo */ - nsc_size_t maxfbas; /* returned from nsc_maxfbas */ - unsigned char *dcio_bitmap; - void *bitmap_ref; /* Incore bitmap bit ref */ - struct rdc_group *group; - nsc_size_t bitmap_size; - int bmaprsrv; /* bitmap reserve count */ - int bitmap_write; - nsc_fd_t *bitmapfd; - nsc_fd_t *remote_fd; /* FCAL direct io */ - volatile int disk_status; /* set to halt sync */ - int closing; - nsc_path_t *b_tok; /* Bitmap registration */ - int b_ref; - kmutex_t dc_sleep; - kmutex_t bmapmutex; /* mutex for bitmap ops */ - kcondvar_t busycv; /* wait for ioctl to complete */ - kcondvar_t closingcv; /* unregister_path/close */ - kcondvar_t haltcv; /* wait for sync to halt */ - kcondvar_t synccv; /* wait for sync to halt */ - struct rdc_net_dataset *net_dataset; /* replaces hnds */ - int64_t io_time; /* moved from cd_info */ - struct rdc_k_info *many_next; /* 1-to-many circular list */ - struct rdc_k_info *multi_next; /* to multihop krdc */ - struct rdc_k_info *group_next; /* group circular list */ - kstat_t *io_kstats; /* io kstat */ - kstat_t *bmp_kstats; /* bitmap io kstat */ - kstat_t *set_kstats; /* set kstat */ - kmutex_t kstat_mutex; /* mutex for kstats */ - kmutex_t bmp_kstat_mutex; /* mutex for kstats */ - struct bm_ref_ops *bm_refs; -} rdc_k_info_t; - -#define c_fd devices->id_cache_dev.bi_fd -#define c_rsrv devices->id_cache_dev.bi_rsrv -#define c_failed devices->id_cache_dev.bi_failed -#define c_flag devices->id_cache_dev.bi_flag - -#define c_tok cache_path.rp_tok -#define c_ref cache_path.rp_ref - -#define r_fd devices->id_raw_dev.bi_fd -#define r_rsrv devices->id_raw_dev.bi_rsrv -#define r_failed devices->id_raw_dev.bi_failed -#define r_flag devices->id_raw_dev.bi_flag - -#define r_tok raw_path.rp_tok -#define r_ref raw_path.rp_ref - -/* - * flags for _rdc_rsrv_devs() - */ - -/* - * which device(s) to reserve - integer bitmap. - */ - -#define RDC_CACHE 0x1 /* data device in cache mode */ -#define RDC_RAW 0x2 /* data device in raw mode */ -#define RDC_BMP 0x4 /* bitmap device */ -#define RDC_QUE 0x8 /* diskq device */ - -/* - * device usage after reserve - integer flag. - */ - -#define RDC_INTERNAL 0x1 /* reserve for rdc internal purposes */ -#define RDC_EXTERNAL 0x2 /* reserve in response to io provider Attach */ - -/* - * Utility macro for nsc_*() io function returns. - */ - -#define RDC_SUCCESS(rc) (((rc) == NSC_DONE) || ((rc) == NSC_HIT)) - -/* - * RDC file descriptor structure - */ - -typedef struct rdc_fd_s { - rdc_k_info_t *rdc_info; /* devices info structure */ - int rdc_type; /* open type, diskq or bitmap */ - int rdc_oflags; /* raw or cached open type */ -} rdc_fd_t; - -/* - * fd and rsrv macros - */ - -#define RSRV(bi) (((bi)->bi_rsrv > 0) || ((bi)->bi_failed > 0)) -#define ORSRV(bi) (((bi)->bi_orsrv > 0) || ((bi)->bi_ofailed > 0)) -#define RFAILED(bi) (((bi)->bi_failed > 0) || ((bi)->bi_ofailed > 0)) - -#define IS_RSRV(bi) (RSRV(bi) || ORSRV(bi)) - -#define IS_CRSRV(gcd) (IS_RSRV(&(gcd)->devices->id_cache_dev)) -#define IS_RRSRV(gcd) (IS_RSRV(&(gcd)->devices->id_raw_dev)) - -#define IS_RFAILED(gcd) \ - (RFAILED(&(gcd)->devices->id_cache_dev) || \ - RFAILED(&(gcd)->devices->id_raw_dev)) - -#define RDC_IS_BMP(rdc) ((rdc)->rdc_type == RDC_BMP) -#define RDC_IS_QUE(rdc) ((rdc)->rdc_type == RDC_QUE) -#define RDC_IS_RAW(rdc) (((rdc)->rdc_oflags & NSC_CACHE) == 0) -#define RDC_U_FD(gcd) (IS_CRSRV(gcd) ? (gcd)->c_fd : (gcd)->r_fd) -#define RDC_FD(rdc) (RDC_U_FD(rdc->rdc_info)) - - -typedef struct rdc_host_u { - char *nodename; - int netaddr; - struct netbuf *naddr; -} rdc_host_t; - -/* - * Reply from remote read - * - convenience defines for the client side code. - * - keep this in sync with the readres structure in rdc_prot.h/.x - */ -#define rdcrdresult readres -#define rr_status status -#define rr_ok readres_u.reply -#define rr_bufsize rr_ok.data.data_len -#define rr_data rr_ok.data.data_val - -/* - * Flags for remote read rpc - * - * _START must be a unique rpc, _DATA and _END may be OR-d together. - */ -#define RDC_RREAD_DATA 0x1 /* Intermediate rpc with data payload */ -#define RDC_RREAD_START 0x2 /* Setup rpc */ -#define RDC_RREAD_END 0x4 /* End rpc */ -#define RDC_RREAD_FAIL 0x8 /* Primary is failed */ - -/* - * Flags for remote write rpc - */ -#define RDC_RWRITE_FAIL 0x8 /* Primary is failed */ - -/* - * macro used to determine if the incomming sq, with sequence - * value x, should be placed before the sq with sequence value y. - * This has to account for integer wrap. We account for integer - * wrap by checking if the difference between x and y is within - * half of the maximum integer value (RDC_MAXINT) or not. - */ - -#define RDC_BITSPERBYTE 8 -#define RDC_BITS(type) (RDC_BITSPERBYTE * (long)sizeof (type)) -#define RDC_HIBITI ((unsigned)1 << (RDC_BITS(int) - 1)) -#define RDC_MAXINT ((int)(~RDC_HIBITI)) -#define RDC_RANGE ((RDC_MAXINT / 2) -1) - -#define RDC_INFRONT(x, y) (((x < y) && ((y - x) < RDC_RANGE)) ? 1 : \ - ((x > y) && ((x - y) > RDC_RANGE)) ? 1 : 0) - - - - -#endif /* _KERNEL */ - -/* - * RDC user-visible information - */ -typedef rdc_set_t rdc_u_info_t; - - -/* - * RDC flags for set state / set cd RPC. - * Must remain compatible with rdc RPC protocol version v3. - */ -#define CCIO_NONE 0x0000 -#define CCIO_ENABLE 0x0008 -#define CCIO_SLAVE 0x0010 -#define CCIO_DONE 0x0020 -#define CCIO_ENABLELOG 0x0100 -#define CCIO_RSYNC 0x0400 -#define CCIO_REMOTE 0x2000 - - -/* - * In kernel type flags (krdc->type_flag). - */ -#define RDC_CONFIGURED 0x1 -#define RDC_DISABLEPEND 0x2 /* Suspend/Disable is in progress */ -#define RDC_ASYNCMODE 0x4 -#define RDC_RESUMEPEND 0x8 -#define RDC_RESPONSIBLE 0x10 -#define RDC_BUSYWAIT 0x20 -#define RDC_UNREGISTER 0x40 /* Unregister is in progress */ -#define RDC_QDISABLEPEND 0x100 /* Q Suspend/Disable is in progress */ - -#define IS_ENABLED(urdc) ((IS_CONFIGURED(&rdc_k_info[(urdc)->index]) && \ - (rdc_get_vflags(urdc) & RDC_ENABLED))) -#define IS_CONFIGURED(krdc) ((krdc)->type_flag & RDC_CONFIGURED) -#define IS_MANY(krdc) ((krdc)->many_next != (krdc)) -#define IS_MULTI(krdc) ((krdc)->multi_next != NULL) - -#define IS_VALID_INDEX(index) ((index) >= 0 && (index) < rdc_max_sets && \ - IS_CONFIGURED(&rdc_k_info[(index)])) - -#define RDC_NOFLUSH 0 /* Do not do a flush when starting logging */ -#define RDC_NOREMOTE 0 /* Do no remote logging notifications */ -#define RDC_FLUSH 1 /* Do a flush when starting logging */ -#define RDC_ALLREMOTE 2 /* Notify all remote group members */ -#define RDC_OTHERREMOTE 4 /* Notify all remote group members except */ - /* the one corresponding to the current set, */ - /* to prevent recursion in the case where */ - /* the request was initiated from the remote */ - /* node. */ -#define RDC_FORCE_GROUP 8 /* set all group memebers logging regardless */ - -#ifdef _KERNEL - -/* - * Functions, vars - */ - -#define RDC_SYNC_EVENT_TIMEOUT (60 * HZ) -typedef struct { - clock_t lbolt; - int event; - int ack; - int daemon_waiting; /* Daemon waiting in ioctl */ - int kernel_waiting; /* Kernel waiting for daemon to reply */ - char master[NSC_MAXPATH]; - char group[NSC_MAXPATH]; - kmutex_t mutex; - kcondvar_t cv; - kcondvar_t done_cv; -} rdc_sync_event_t; -extern rdc_sync_event_t rdc_sync_event; -extern clock_t rdc_sync_event_timeout; -extern kmutex_t rdc_sync_mutex; - -extern rdc_u_info_t *rdc_u_info; -extern rdc_k_info_t *rdc_k_info; - -extern int rdc_max_sets; - -extern unsigned long rdc_async_timeout; - -extern int rdc_self_host(); -extern uint64_t mirror_getsize(int index); -extern void rdc_sleepqdiscard(rdc_group_t *); - - -#ifdef DEBUG -extern void rdc_stallzero(int); -#endif - -struct rdc_net_dataitem { - void *dptr; - int len; /* byte count */ - int mlen; /* actual malloced size */ - struct rdc_net_dataitem *next; -}; -typedef struct rdc_net_dataitem rdc_net_dataitem_t; - -struct rdc_net_dataset { - int id; - int inuse; - int delpend; - int nitems; - nsc_off_t pos; - nsc_size_t fbalen; - rdc_net_dataitem_t *head; - rdc_net_dataitem_t *tail; - struct rdc_net_dataset *next; -}; -typedef struct rdc_net_dataset rdc_net_dataset_t; - - -#endif /* _KERNEL */ - - -#define RDC_TCP_DEV "/dev/tcp" - -#define RDC_VERS_MIN RDC_VERSION5 -#define RDC_VERS_MAX RDC_VERSION7 - -#define RDC_HEALTH_THRESHOLD 20 -#define RDC_MIN_HEALTH_THRES 5 -#define SNDR_MAXTHREADS 16 -/* - * These next two defines are the default value of the async queue size - * They have been calculated to be 8MB of data with an average of - * 2K IO size - */ -#define RDC_MAXTHRES_QUEUE 16384 /* max # of fbas on async q */ -#define RDC_MAX_QITEMS 4096 /* max # of items on async q */ -#define RDC_ASYNCTHR 2 /* number of async threads */ - -#define RDC_RPC_MAX (RDC_MAXDATA + sizeof (net_data5) +\ - (RPC_MAXDATASIZE - 8192)) -#define ATM_NONE 0 -#define ATM_INIT 1 -#define ATM_EXIT 2 - -#define RDC_CLNT_TMOUT 16 - -#define BMAP_BLKSIZE 1024 -#define BMAP_BLKSIZEV7 RDC_MAXDATA - -/* right now we can only trace 1m or less writes to the bitmap (32 bits wide) */ -#define RDC_MAX_MAXFBAS 2048 - -#if defined(_KERNEL) -/* kstat interface */ - -/* - * Per module kstats - * only one instance - */ -typedef struct { - kstat_named_t m_maxsets; /* Max # of sndr sets */ - kstat_named_t m_maxfbas; /* Max # of FBAS from nsctl */ - kstat_named_t m_rpc_timeout; /* global RPC timeout */ - kstat_named_t m_health_thres; /* Health thread timeout */ - kstat_named_t m_bitmap_writes; /* True for bitmap writes */ - kstat_named_t m_clnt_cots_calls; /* # of clnt COTS calls */ - kstat_named_t m_clnt_clts_calls; /* # of clnt CLTS calls */ - kstat_named_t m_svc_cots_calls; /* # of server COTS calls */ - kstat_named_t m_svc_clts_calls; /* # of server CLTS calls */ - kstat_named_t m_bitmap_ref_delay; /* # of bitmap ref overflows */ -} sndr_m_stats_t; - -/* - * Per set kstats - * one instance per configured set - */ -typedef struct { - kstat_named_t s_flags; /* from rdc_set_t */ - kstat_named_t s_syncflags; /* from rdc_set_t */ - kstat_named_t s_bmpflags; /* from rdc_set_t */ - kstat_named_t s_syncpos; /* from rdc_set_t */ - kstat_named_t s_volsize; /* from rdc_set_t */ - kstat_named_t s_bits_set; /* from rdc_set_t */ - kstat_named_t s_autosync; /* from rdc_set_t */ - kstat_named_t s_maxqfbas; /* from rdc_set_t */ - kstat_named_t s_maxqitems; /* from rdc_set_t */ - kstat_named_t s_primary_vol; /* from rdc_set_t */ - kstat_named_t s_secondary_vol; /* from rdc_set_t */ - kstat_named_t s_bitmap; /* from rdc_set_t */ - kstat_named_t s_primary_intf; /* from rdc_set_t */ - kstat_named_t s_secondary_intf; /* from rdc_set_t */ - kstat_named_t s_type_flag; /* from rdc_k_info_t */ - kstat_named_t s_bitmap_size; /* from rdc_k_info_t */ - kstat_named_t s_disk_status; /* from rdc_k_info_t */ - kstat_named_t s_if_if_down; /* from rdc_if_t */ - kstat_named_t s_if_rpc_version; /* from rdc_if_t */ - kstat_named_t s_aqueue_blk_hwm; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_itm_hwm; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_throttle; /* from rdc_k_info_t */ - kstat_named_t s_aqueue_items; - kstat_named_t s_aqueue_blocks; - kstat_named_t s_aqueue_type; -} rdc_info_stats_t; -#endif /* _KERNEL */ - -#ifndef _SunOS_5_6 /* i.e. 2.7+ */ -typedef int xdr_t; -#else /* i.e. 2.6- */ -typedef unsigned long rpcprog_t; -typedef unsigned long rpcvers_t; -typedef unsigned long rpcproc_t; -typedef unsigned long rpcprot_t; -typedef unsigned long rpcport_t; -#endif /* _SunOS_5_6 */ - - -#ifdef _KERNEL - -extern nsc_size_t MAX_RDC_FBAS; -extern volatile int net_exit; -extern nsc_size_t rdc_maxthres_queue; /* max # of fbas on async q */ -extern int rdc_max_qitems; /* max # of items on async q */ -extern int rdc_asyncthr; /* # of async threads */ - -#ifdef DEBUG -extern kmutex_t rdc_cntlock; -extern int rdc_datasetcnt; -#endif - -/* - * Macro to keep tabs on dataset memory usage. - */ -#ifdef DEBUG -#define RDC_DSMEMUSE(x) \ - mutex_enter(&rdc_cntlock);\ - rdc_datasetcnt += (x);\ - mutex_exit(&rdc_cntlock); -#else -#define RDC_DSMEMUSE(x) -#endif - - - - - -extern kmutex_t rdc_ping_lock; -extern rdc_if_t *rdc_if_top; - -extern int _rdc_enqueue_write(rdc_k_info_t *, nsc_off_t, nsc_size_t, int, - nsc_buf_t *); -extern int rdc_net_state(int, int); -extern int rdc_net_getbmap(int, int); -extern int rdc_net_getsize(int, uint64_t *); -extern int rdc_net_write(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t, uint_t, - int, netwriteres *); -extern int rdc_net_read(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t); -extern int _rdc_remote_read(rdc_k_info_t *, nsc_buf_t *, nsc_off_t, nsc_size_t, - int); -extern int _rdc_multi_write(nsc_buf_t *, nsc_off_t, nsc_size_t, int, - rdc_k_info_t *); -extern int rdc_start_server(struct rdc_svc_args *, int); -extern aio_buf_t *rdc_aio_buf_get(rdc_buf_t *, int); -extern void rdc_aio_buf_del(rdc_buf_t *, rdc_k_info_t *); -extern aio_buf_t *rdc_aio_buf_add(int, rdc_buf_t *); -extern int rdc_net_getstate(rdc_k_info_t *, int *, int *, int *, int); -extern kmutex_t rdc_conf_lock; -extern kmutex_t rdc_many_lock; -extern int rdc_drain_queue(int); -extern int flush_group_queue(int); -extern void rdc_dev_close(rdc_k_info_t *); -extern int rdc_dev_open(rdc_set_t *, int); -extern void rdc_get_details(rdc_k_info_t *); -extern int rdc_lookup_bitmap(char *); -extern int rdc_lookup_enabled(char *, int); -extern int rdc_lookup_byaddr(rdc_set_t *); -extern int rdc_lookup_byname(rdc_set_t *); -extern int rdc_intercept(rdc_k_info_t *); -extern int rdc_unintercept(rdc_k_info_t *); -extern int _rdc_rsrv_devs(rdc_k_info_t *, int, int); -extern void _rdc_rlse_devs(rdc_k_info_t *, int); -extern void _rdc_unload(void); -extern int _rdc_load(void); -extern int _rdc_configure(void); -extern void _rdc_deconfigure(void); -extern void _rdc_async_throttle(rdc_k_info_t *, long); -extern int rdc_writer(int); -extern int rdc_dump_alloc_bufs_cd(int); -extern void rdc_dump_alloc_bufs(rdc_if_t *); -extern int rdc_check_secondary(rdc_if_t *, int); -extern void rdc_dump_queue(int); -extern int rdc_isactive_if(struct netbuf *, struct netbuf *); -extern rdc_if_t *rdc_add_to_if(rdc_srv_t *, struct netbuf *, struct netbuf *, - int); -extern void rdc_remove_from_if(rdc_if_t *); -extern void rdc_set_if_vers(rdc_u_info_t *, rpcvers_t); - -extern void rdc_print_svinfo(rdc_srv_t *, char *); -extern rdc_srv_t *rdc_create_svinfo(char *, struct netbuf *, - struct knetconfig *); -extern void rdc_destroy_svinfo(rdc_srv_t *); - -extern void init_rdc_netbuf(struct netbuf *); -extern void free_rdc_netbuf(struct netbuf *); -extern void dup_rdc_netbuf(const struct netbuf *, struct netbuf *); -extern int rdc_netbuf_toint(struct netbuf *); -extern struct netbuf *rdc_int_tonetbuf(int); -extern void rdc_lor(const uchar_t *, uchar_t *, int); -extern int rdc_resume2(rdc_k_info_t *); -extern void rdc_set_flags(rdc_u_info_t *, int); -extern void rdc_clr_flags(rdc_u_info_t *, int); -extern int rdc_get_vflags(rdc_u_info_t *); -extern void rdc_set_mflags(rdc_u_info_t *, int); -extern void rdc_clr_mflags(rdc_u_info_t *, int); -extern int rdc_get_mflags(rdc_u_info_t *); -extern void rdc_set_flags_log(rdc_u_info_t *, int, char *); -extern void rdc_group_log(rdc_k_info_t *krdc, int flush, char *why); -extern int _rdc_config(void *, int, spcs_s_info_t, int *); -extern void rdc_many_enter(rdc_k_info_t *); -extern void rdc_many_exit(rdc_k_info_t *); -extern void rdc_group_enter(rdc_k_info_t *); -extern void rdc_group_exit(rdc_k_info_t *); -extern int _rdc_sync_event_wait(void *, void *, int, spcs_s_info_t, int *); -extern int _rdc_sync_event_notify(int, char *, char *); -extern int _rdc_link_down(void *, int, spcs_s_info_t, int *); -extern void rdc_delgroup(rdc_group_t *); -extern int rdc_write_bitmap_fba(rdc_k_info_t *, nsc_off_t); -extern int rdc_bitmapset(int, char *, char *, void *, int, nsc_off_t, int); -extern rdc_net_dataset_t *rdc_net_add_set(int); -extern rdc_net_dataset_t *rdc_net_get_set(int, int); -extern void rdc_net_put_set(int, rdc_net_dataset_t *); -extern void rdc_net_del_set(int, rdc_net_dataset_t *); -extern void rdc_net_free_set(rdc_k_info_t *, rdc_net_dataset_t *); -extern int rdc_lookup_byhostdev(char *intf, char *file); -extern int rdc_lookup_configured(char *path); -extern void rdc_dump_dsets(int); -extern void set_busy(rdc_k_info_t *); -extern void wakeup_busy(rdc_k_info_t *); - - -#ifdef DEBUG -extern int rdc_async6(void *, int mode, int *); -extern int rdc_readgen(void *, int, int *); -#endif - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_IO_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h b/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h deleted file mode 100644 index ddb6fb5970..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_ioctl.h +++ /dev/null @@ -1,498 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_IOCTL_H -#define _RDC_IOCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/unistat/spcs_s.h> -#include <sys/nsctl/nsctl.h> -#ifndef DS_DDICT -#include <rpc/rpc.h> -#endif - -#ifdef _SunOS_5_6 -#define netbuf32 netbuf -#include <sys/nsctl/model.h> -#endif - -typedef struct _rdc_ioctl_s { - long arg0; - long arg1; - long arg2; - long arg3; - long arg4; - long magic; - spcs_s_info_t ustatus; - long pad[1]; -} _rdc_ioctl_t; - -#ifdef _SYSCALL32 -typedef struct _rdc_ioctl32_s { - int32_t arg0; - int32_t arg1; - int32_t arg2; - int32_t arg3; - int32_t arg4; - int32_t magic; - spcs_s_info32_t ustatus; - int32_t pad[1]; -} _rdc_ioctl32_t; -#endif /* _SYSCALL32 */ - -/* - * Ioctl command numbers - */ - -#define _RDCI_(x) (('R'<<16)|('D'<<8)|(x)) - -/* - * Generic rdc ioctl arguments structure. - * Individual ioctl's will use 0-n of these arguments. - * - * Each rdc ioctl is described first by the command number - * e.g. #define RDC_CONFIG _RDCI_(0) - * - * Followed by a description of each argument (if any). - * Each argument is on a single line. - * - */ - -#define RDC_CONFIG _RDCI_(0) -/* - * rdc_config_t *user_configuration; - */ - -#define RDC_ENABLE_SVR _RDCI_(1) -/* - * rdc_svc_args_t *daemon_configuration; - */ - -#define RDC_STATUS _RDCI_(2) -/* - * rdc_status_t *rdc_status; - */ - -#define RDC_VERSION _RDCI_(3) -/* - * rdc_version_t *rdc_version; - */ - -#define RDC_LINK_DOWN _RDCI_(4) -/* - * char *rdc_host; - */ - -#define RDC_SYNC_EVENT _RDCI_(5) -/* - * char *rdc_master; - * char *rdc_group; - */ - -#define RDC_POOL_CREATE _RDCI_(6) -/* - * struct svcpool_args * - */ - -#define RDC_POOL_WAIT _RDCI_(7) -/* - * int id - */ - -#define RDC_POOL_RUN _RDCI_(8) -/* - * int id - */ -#define RDC_BITMAPOP _RDCI_(9) - -#ifdef DEBUG -#define RDC_ASYNC6 _RDCI_(20) /* send async message by hand */ -#define RDC_CLRKSTAT _RDCI_(21) /* clear kstat_io structure */ -#define RDC_STALL0 _RDCI_(22) /* stall sequence 0 on server */ -#define RDC_READGEN _RDCI_(23) /* cause a read on server */ -#endif - - -#define MAX_RDC_HOST_SIZE 64 - -/* - * Change this when the ioctl structure changes - */ -#define RDC_MAGIC 0xf00d0001 - -typedef struct rdc_addr { - struct netbuf addr; - char intf[MAX_RDC_HOST_SIZE]; - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -} rdc_addr_t; - -#ifdef _SYSCALL32 -struct rdc_addr32 { - struct netbuf32 addr; - char intf[MAX_RDC_HOST_SIZE]; - char file[NSC_MAXPATH]; - char bitmap[NSC_MAXPATH]; -}; -#endif /* _SYSCALL32 */ - -/* - * User level rdc set structure - must be a multiple of 64bits long. - */ -typedef struct rdc_set { - rdc_addr_t primary; - rdc_addr_t secondary; - struct knetconfig *netconfig; - long align1; - double alignfix; - int flags; /* See RDC flags below */ - int sync_flags; /* See RDC flags below */ - int bmap_flags; /* See RDC flags below */ - int mflags; /* RDC 1-to-many flags */ - int index; /* 0 .. rdc_max_sets - 1 */ - int bits_set; /* Bits set in bitmap */ - int autosync; /* Autosync on (1) or off (0) */ - int syshostid; /* for cluster integration */ - int asyncthr; /* # of async threads */ - int setid; /* unique set id for this set */ - uint64_t sync_pos; /* Progress through sync */ - uint64_t volume_size; /* Size of volume */ - int64_t maxqfbas; /* max # of fbas on async q */ - int64_t maxqitems; /* max # of items on async q */ - char group_name[NSC_MAXPATH]; /* Group the set belongs to */ - char direct_file[NSC_MAXPATH]; /* Local FCAL direct io file */ - char disk_queue[NSC_MAXPATH]; /* Disk Queue for set|group */ -} rdc_set_t; - -#ifdef _SYSCALL32 -struct rdc_set32 { - struct rdc_addr32 primary; - struct rdc_addr32 secondary; - caddr32_t netconfig; - int32_t align1; - double alignfix; - int32_t flags; /* See RDC flags below */ - int32_t sync_flags; /* See RDC flags below */ - int32_t bmap_flags; /* See RDC flags below */ - int32_t mflags; /* RDC 1-to-many flags */ - int32_t index; /* 0 .. rdc_max_sets - 1 */ - int32_t bits_set; /* Bits set in bitmap */ - int32_t autosync; /* Autosync on (1) or off (0) */ - int32_t syshostid; /* for cluster integration */ - int32_t asyncthr; /* # of async threads */ - int32_t setid; /* unique set id for this set */ - uint64_t sync_pos; /* Progress through sync */ - uint64_t volume_size; /* Size of volume */ - int64_t maxqfbas; /* max # of fbas on async q */ - int64_t maxqitems; /* max # of items on async q */ - char group_name[NSC_MAXPATH]; /* Group the set belongs to */ - char direct_file[NSC_MAXPATH]; /* Local FCAL direct io file */ - char disk_queue[NSC_MAXPATH]; /* Disk Queue for set|group */ -}; -#endif /* _SYSCALL32 */ - -/* - * Parameter structure to pass to RDC_CONFIG - */ - -typedef struct rdc_config { - int command; /* RDC_CMD_XXX */ - int options; /* RDC_OPT_XXX */ - int pad[2]; /* Do NOT remove - 32/64-bit padding */ - rdc_set_t rdc_set[1]; /* The rdc sets */ -} rdc_config_t; - -#ifdef _SYSCALL32 -struct rdc_config32 { - int32_t command; /* RDC_CMD_XXX */ - int32_t options; /* RDC_OPT_XXX */ - int32_t pad[2]; /* Do NOT remove - 32/64-bit padding */ - struct rdc_set32 rdc_set[1]; /* The rdc sets */ -}; -#endif /* _SYSCALL32 */ - -#define RDC_BITMAPSET 0x01 -#define RDC_BITMAPOR 0x02 -typedef struct rdc_bitmap_op { - nsc_off_t offset; /* byte offset within bitmap mod fba */ - int32_t op; /* or/set operation */ - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int32_t len; /* length of bitmap in bytes */ - unsigned long addr; /* address of bitmap in userland */ -} rdc_bitmap_op_t; - -#ifdef _SYSCALL32 -typedef struct rdc_bitmap_op32 { - nsc_off_t offset; - int32_t op; - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int32_t len; - uint32_t addr; -} rdc_bitmap_op32_t; - -#endif /* _SYSCALL32 */ - -#ifdef DEBUG -/* - * structure to initiate an asynchronous send to the secondary, - * so we can test the queuing code. - */ -typedef struct rdc_async6 { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int pos; /* Position in file */ - int len; - int seq; - int pat; /* fill data with this */ - int idx; /* server returned index */ - int spos; /* sub task start block */ - int slen; /* sub task length */ - int endind; /* set when last block in multi request */ -} rdc_async6_t; -/* - * structure to initiate a read on the secondary, so we can test the - * maxfba break up code. - */ -typedef struct rdc_readgen { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int len; - int pos; - int idx; - int flag; - int rpcversion; - void *data; /* where to place the data from the read */ -} rdc_readgen_t; - -#ifdef _SYSCALL32 -typedef struct rdc_readgen32 { - char sechost[MAX_RDC_HOST_SIZE]; - char secfile[NSC_MAXPATH]; - int len; - int pos; - int idx; - int flag; - int rpcversion; - caddr32_t data; /* where to place the data from the read */ -} rdc_readgen32_t; -#endif -#endif - - - - - -/* - * Config ioctl commands - */ -#define RDC_CMD_ENABLE 1 /* New enable */ -#define RDC_CMD_DISABLE 2 /* Complete disable */ -#define RDC_CMD_RESUME 3 /* Local re-enable */ -#define RDC_CMD_SUSPEND 4 /* Local clear */ -#define RDC_CMD_LOG 5 /* Start logging mode */ -#define RDC_CMD_COPY 6 /* Start synching */ -#define RDC_CMD_RECONFIG 7 /* Change the rdc set */ -#define RDC_CMD_TUNABLE 8 /* Change a tunable parameter */ -#define RDC_CMD_WAIT 9 /* Wait for syncs to complete */ -#define RDC_CMD_HEALTH 10 /* Return health state */ -#define RDC_CMD_STATUS 11 /* Single set status */ -#define RDC_CMD_RESET 12 /* reset error or failed status */ -#define RDC_CMD_INITQ 14 /* initialise the disk queue */ -#define RDC_CMD_FLUSHQ 15 /* flush queue for set */ -#define RDC_CMD_ADDQ 16 /* add diskq to a set/group */ -#define RDC_CMD_REMQ 17 /* nice remove a diskq from set/grp */ -#define RDC_CMD_KILLQ 18 /* forced disgard of queue */ -#define RDC_CMD_REPQ 19 /* replace queue */ - - - - - -/* - * Config ioctl options - */ -#define RDC_OPT_SYNC 0x1 /* RDC_CMD_ENABLE, RDC_CMD_RESUME */ -#define RDC_OPT_ASYNC 0x2 /* RDC_CMD_ENABLE, RDC_CMD_RESUME */ -#define RDC_OPT_PRIMARY 0x4 /* All */ -#define RDC_OPT_SECONDARY 0x8 /* All */ -#define RDC_OPT_FORWARD 0x10 /* RDC_CMD_COPY */ -#define RDC_OPT_REVERSE 0x20 /* RDC_CMD_COPY */ -#define RDC_OPT_FULL 0x40 /* RDC_CMD_COPY */ -#define RDC_OPT_UPDATE 0x80 /* RDC_CMD_COPY */ -#define RDC_OPT_SETBMP 0x100 /* RDC_CMD_ENABLE */ -#define RDC_OPT_CLRBMP 0x200 /* RDC_CMD_ENABLE */ -#define RDC_OPT_REVERSE_ROLE 0x400 /* RDC_CMD_RECONFIG */ -#define RDC_OPT_FORCE_QINIT 0x800 /* RDC_CMD_INITQ */ -#define RDC_OPT_SET_QNOBLOCK 0x1000 /* RDC_CMD_TUNABLE */ -#define RDC_OPT_CLR_QNOBLOCK 0x2000 /* RDC_CMD_TUNABLE */ -#define RDC_OPT_FORCE_DISABLE 0x4000 /* RDC_CMD_DISABLE */ - -/* - * RDC flags - */ - -/* - * Passed out by the kernel (status) - */ -#define RDC_ENABLED 0x2 /* RDC enabled */ -#define RDC_PRIMARY 0x4 /* This node is the primary */ -#define RDC_SLAVE 0x8 /* This node is target of the synch */ -#define RDC_VOL_FAILED 0x10 /* Volume is failed */ -#define RDC_BMP_FAILED 0x20 /* Bitmap is failed */ -#define RDC_SYNC_NEEDED 0x40 /* Sync is needed */ -#define RDC_RSYNC_NEEDED 0x80 /* Reverse sync is needed */ -#define RDC_SYNCING 0x100 /* Synch in progress */ -#define RDC_LOGGING 0x200 /* Logging */ -#define RDC_FCAL_FAILED 0x400 /* Direct remote I/O failed */ -#define RDC_ASYNC 0x800 /* Set is in async replicating mode */ -#define RDC_FULL 0x1000 /* Full sync, not an update */ -#define RDC_CLR_AFTERSYNC 0x2000 /* clr bitmap on secondary after sync */ -#define RDC_DISKQ_FAILED 0x4000 /* Diskq I/O has failed */ -#define RDC_QUEUING 0x8000 /* logging, but queueing to disk */ -#ifndef RDC_QNOBLOCK -#define RDC_QNOBLOCK 0x10000 -#endif -#define RDC_SYNC_START 0 -#define RDC_SYNC_DONE 1 -#define RDC_RSYNC_START 2 - -#ifdef _KERNEL - -/* - * urdc->flags vs urdc->mflags usage: - * - * All flags are valid in urdc->flags, in which case the condition - * holds for the specific urdc. - * - * The flags in RDC_MFLAGS can also be in urdc->mflags, in which case - * the condition holds for a urdc somewhere on the many/multi chains - * connected to this urdc. - */ - -#define RDC_GROUP 0x7f8 /* Volume states that affect a group */ - -/* - * Mask of volume flags that are valid in urdc->mflags - */ -#define RDC_MFLAGS (RDC_SLAVE | RDC_RSYNC_NEEDED) - -#define IS_SLAVE(urdc) (rdc_get_mflags(urdc) & RDC_SLAVE) - -/* - * Mask of volume flags that are maintained in sync_flags not flags, - * and protected by rdc_many_lock rather than the group lock. - * This allows code that is operating on one set to change the flags - * of another set. - */ -#define RDC_SFLAGS (RDC_SYNC_NEEDED | RDC_RSYNC_NEEDED | \ - RDC_VOL_FAILED | RDC_CLR_AFTERSYNC) - -/* - * Mask of volume flags that are maintained in bmap_flags not flags, - * and protected by the bmapmutex rather than the group lock. - */ -#define RDC_BFLAGS RDC_BMP_FAILED - -#define RDC_VFLAGS (~(RDC_SFLAGS | RDC_BFLAGS)) - -#define RDC_SYNC_STATE_FLAGS (RDC_LOGGING | RDC_SYNCING | RDC_QUEUING | \ - RDC_ASYNC) - -#define IS_ASYNC(urdc) (rdc_get_vflags(urdc) & RDC_ASYNC) -#define IS_PRIMARY(urdc) (rdc_get_vflags(urdc) & RDC_PRIMARY) -#define IS_SECONDARY(urdc) (!IS_PRIMARY(urdc)) -#define IS_STATE(urdc, state) (rdc_get_vflags(urdc) & (state)) -#define IS_REPLICATING(urdc) (!(rdc_get_vflags(urdc) & RDC_LOGGING) && \ - !(rdc_get_vflags(urdc) & RDC_SYNCING)) - -#endif /* _KERNEL */ - -typedef struct rdc_status { - int nset; /* Number of sets requested/enabled */ - int maxsets; /* Max # of sets allowed today */ - rdc_set_t rdc_set[1]; -} rdc_status_t; - -#ifdef _SYSCALL32 -struct rdc_status32 { - int32_t nset; /* Number of sets requested/enabled */ - int32_t maxsets; /* Max # of sets allowed today */ - struct rdc_set32 rdc_set[1]; -}; -#endif /* _SYSCALL32 */ - -typedef struct rdc_svc_args { - int fd; /* Connection endpoint */ - int nthr; /* Number of server threads */ - char netid[128]; /* Identify transport */ - struct netbuf addrmask; /* Address mask for host */ -} rdc_svc_args_t; - -#ifdef _SYSCALL32 -struct rdc_svc_args32 { - int32_t fd; - int32_t nthr; - char netid[128]; - struct netbuf32 addrmask; -}; -#endif /* _SYSCALL32 */ - -typedef struct rdc_version { - int major; /* Major release number */ - int minor; /* Minor release number */ - int micro; /* Micro release number */ - int baseline; /* Baseline revison number */ -} rdc_version_t; -#ifdef _SYSCALL32 -typedef struct rdc_version32 { - int32_t major; /* Major release number */ - int32_t minor; /* Minor release number */ - int32_t micro; /* Micro release number */ - int32_t baseline; /* Baseline revison number */ -} rdc_version32_t; -#endif - - -#if !defined(_KERNEL) - -#define RDC_IOCTL(cmd, a0, a1, a2, a3, a4, ustatus) \ - rdc_ioctl((long)(cmd), (long)(a0), (long)(a1), (long)(a2), \ - (long)(a3), (long)(a4), (ustatus)) - -extern int rdc_ioctl(long, long, long, long, long, long, spcs_s_info_t); -extern int rdc_ioctl_simple(long, void *); - -#endif /* ! _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_IOCTL_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_prot.x b/usr/src/uts/common/avs/ns/rdc/rdc_prot.x deleted file mode 100644 index cf9055c186..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_prot.x +++ /dev/null @@ -1,390 +0,0 @@ -%/* -% * CDDL HEADER START -% * -% * The contents of this file are subject to the terms of the -% * Common Development and Distribution License (the "License"). -% * You may not use this file except in compliance with the License. -% * -% * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -% * or http://www.opensolaris.org/os/licensing. -% * See the License for the specific language governing permissions -% * and limitations under the License. -% * -% * When distributing Covered Code, include this CDDL HEADER in each -% * file and include the License file at usr/src/OPENSOLARIS.LICENSE. -% * If applicable, add the following below this CDDL HEADER, with the -% * fields enclosed by brackets "[]" replaced with your own identifying -% * information: Portions Copyright [yyyy] [name of copyright owner] -% * -% * CDDL HEADER END -% */ -% -% -%/* -% * Copyright 2008 Sun Microsystems, Inc. All rights reserved. -% * Use is subject to license terms. -% */ -% -%/* -% * Auto generated from rdc_prot.x -% */ -% -%/* -% * Network Replicator RPC spec -% */ - -% -%/* -% * We don't define netbuf in RPCL, since it would contain structure member -% * names that would conflict with the definition of struct netbuf in -% * <tiuser.h>. Instead we merely declare the XDR routine xdr_netbuf() here, -% * and implement it ourselves in rpc/rpcb_prot.c. -% */ -%#ifdef __cplusplus -%extern "C" bool_t xdr_netbuf(XDR *, struct netbuf *); -% -%#elif __STDC__ -%extern bool_t xdr_netbuf(XDR *, struct netbuf *); -% -%#else /* K&R C */ -%bool_t xdr_netbuf(); -% -%#endif /* K&R C */ -const RDC_PORT = 121; -const RDC_MAXDATA = 32768; -const RDC_MAXNAMLEN = 64; -const RDC_BMAPBLKSIZE = 1024; -const RDC_MAXADDR = 32; -const RDC_MAXPENDQ = 64; - -%/* -% * Use this to limit the size of the net_pendvec_t array -% * to ~ 32k -% */ -const RDC_PENDQLIM = 1365; -% -%/* -% * Error status -% */ -enum rdcstat { - RDC_OK = 0, - RDCERR_PERM = 1, - RDCERR_NOENT = 2, - RDCERR_NOMEM = 3 -}; - -% -%/* -%* Set state (V4) -%*/ - -struct set_state4 { - opaque netaddr[RDC_MAXADDR]; - opaque rnetaddr[RDC_MAXADDR]; - int netaddrlen; - int rnetaddrlen; - unsigned flag; - opaque pfile[RDC_MAXNAMLEN]; - opaque sfile[RDC_MAXNAMLEN]; -}; - -const RDC_XDR_MAXNAMLEN = RDC_MAXNAMLEN; - -struct set_state { - struct netbuf netaddr; - struct netbuf rnetaddr; - int netaddrlen; - int rnetaddrlen; - unsigned flag; - string pfile<RDC_XDR_MAXNAMLEN>; - string sfile<RDC_XDR_MAXNAMLEN>; -}; - -% -%/* -% * Get size of volume -% */ -struct getsize { - int cd; -}; - -% -%/* -% * Remote read (v5) -% */ -struct rread { - int cd; - int len; - int pos; - int idx; - int flag; -}; - -% -%/* -% * Remote read (v6) -% */ -struct rread6 { - int cd; - int len; - u_longlong_t pos; - int idx; - int flag; -}; - -% -%/* -% * status OK from remote read -% */ -struct readok { - opaque data<RDC_MAXDATA>; -}; -union readres switch (rdcstat status) { -case RDC_OK: - readok reply; -default: - void; -}; - -% -%/* -% * Initiate bit map scoreboard transfer (v5) -% */ -struct bmap { - int cd; - int dual; - int size; -}; - -% -%/* -% * Initiate bit map scoreboard transfer (v6) -% */ -struct bmap6 { - int cd; - int dual; - u_longlong_t size; -}; - -% -%/* -% * Scoreboard bitmap data (v5) -% */ -struct net_bdata { - int cd; - int offset; - int size; - opaque data<RDC_BMAPBLKSIZE>; -}; - -% -%/* -% * Scoreboard bitmap data (v6) -% */ -struct net_bdata6 { - u_longlong_t offset; - int size; - int cd; - int endoblk; - opaque data<RDC_BMAPBLKSIZE>; -}; - -% -%/* -% * Data transfer and allocation (v5) -% */ -struct net_data5 { - int local_cd; - int cd; - int pos; - int len; - int flag; - int idx; - int seq; - int sfba; - int endoblk; - int nfba; - opaque data<RDC_MAXDATA>; -}; - -% -%/* -% * Data transfer and allocation (v6) -% */ -struct net_data6 { - int local_cd; - int cd; - u_longlong_t pos; - u_longlong_t qpos; - u_longlong_t sfba; - int nfba; - int len; - int flag; - int idx; - unsigned int seq; - int endoblk; - opaque data<RDC_MAXDATA>; -}; - - -struct net_pendvec { - u_longlong_t apos; - u_longlong_t qpos; - int alen; - unsigned int seq; - int pindex; -}; -typedef net_pendvec net_pendvec_t; - - - -%/* -% * results returned from a netwrite request. (v6) -% * index = index number of request assigned by server when -% * requests is broken down into smaller chunks. -% * result = 0 request ok. -% * result = 1 request is pending. -% * result < 0 failure, set with -errno. -% * If the vecdata array is not empty, then it contains -% * a list of apos and alen -% * pairs of previously pending requests that have been written. -% */ -struct netwriteres { - int index; - int result; - unsigned int seq; - net_pendvec_t vecdata<RDC_PENDQLIM>; -}; - - - -% -%/* -% * Ping -% */ -struct rdc_ping6 { - opaque p_ifaddr[RDC_MAXADDR]; - opaque s_ifaddr[RDC_MAXADDR]; -}; - -struct rdc_ping { - struct netbuf p_ifaddr; - struct netbuf s_ifaddr; -}; - - -/* - * Remote file service routines - */ - -program RDC_PROGRAM { - - /* - * This is protocol version 5 that shipped with SNDR 3.1 - * We must support this protocol until (protocol - * version 7) is released. - * I.e. N-1 protocol support. - */ - - version RDC_VERSION5 { - - void - RDCPROC_NULL(void) = 0; - - int - RDCPROC_GETSIZE(int) = 2; - - int - RDCPROC_WRITE5(net_data5) = 4; - - readres - RDCPROC_READ5(rread) = 5; - - int - RDCPROC_STATE(set_state4) = 7; - - int - RDCPROC_PING4(rdc_ping6) = 8; - - int - RDCPROC_BMAP(net_bmap) = 9; - - int - RDCPROC_BDATA(net_bdata) = 10; - - int - RDCPROC_GETSTATE4(set_state4) = 12; - } = 5; - - /* - * This is protocol version 6 that shipped with SNDR 3.2 - * We must support this protocol until (protocol - * version 8) is released. - * I.e. N-1 protocol support. - * - * Changed to support multiple transmitting async threads - * (sequence numbers and write reply structure) - * and 64bit datapath. - */ - - version RDC_VERSION6 { - - void - RDCPROC_NULL(void) = 0; - - u_longlong_t - RDCPROC_GETSIZE6(int) = 2; - - netwriteres - RDCPROC_WRITE6(net_data6) = 4; - - readres - RDCPROC_READ6(rread6) = 5; - - int - RDCPROC_STATE(set_state4) = 7; - - int - RDCPROC_PING4(rdc_ping6) = 8; - - int - RDCPROC_BMAP6(net_bmap6) = 9; - - int - RDCPROC_BDATA6(net_bdata6) = 10; - - int - RDCPROC_GETSTATE4(set_state4) = 12; - } = 6; - - version RDC_VERSION7 { - - void - RDCPROC_NULL(void) = 0; - - u_longlong_t - RDCPROC_GETSIZE6(int) = 2; - - netwriteres - RDCPROC_WRITE6(net_data6) = 4; - - readres - RDCPROC_READ6(rread6) = 5; - - int - RDCPROC_STATE(set_state) = 7; - - int - RDCPROC_PING4(rdc_ping) = 8; - - int - RDCPROC_BMAP6(net_bmap6) = 9; - - int - RDCPROC_BDATA6(net_bdata6) = 10; - - int - RDCPROC_GETSTATE4(set_state) = 12; - } = 7; - -} = 100143; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_stub.c b/usr/src/uts/common/avs/ns/rdc/rdc_stub.c deleted file mode 100644 index c1ef2dc502..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_stub.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/errno.h> - -#include <rpc/auth.h> -#include <rpc/svc.h> - -#include <sys/nsctl/nsctl.h> -#include <sys/nsctl/nsvers.h> -#include "rdc_stub.h" - -static void null_dispatch(struct svc_req *req, SVCXPRT *xprt); -static void (*dispatch)(struct svc_req *, SVCXPRT *) = null_dispatch; - -/* - * Solaris module setup. - */ -extern struct mod_ops mod_miscops; - -static struct modlmisc modlmisc = { - &mod_miscops, /* Type of module */ - "nws:Remote Mirror kRPC Stub:" ISS_VERSION_STR -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlmisc, - NULL -}; - - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - - -int -_fini(void) -{ - /* unload is forbidden */ - return (EBUSY); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - -/* - * rdcstub_dispatch is the place holder for rdcsrv_dispatch. - * rdcsrv registers this function as kRPC dispatch function. - * If rdcsrv is unloaded (uninstall package), then dispatch - * is set to null_dispatch - */ -void -rdcstub_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - (*dispatch)(req, xprt); -} - -/* ARGSUSED */ -static void -null_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - svcerr_noproc(xprt); -} - -void -rdcstub_set_dispatch(void (*disp)(struct svc_req *, SVCXPRT *)) -{ - ASSERT(disp != NULL); - dispatch = disp; -} - -void -rdcstub_unset_dispatch() -{ - dispatch = null_dispatch; -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_stub.h b/usr/src/uts/common/avs/ns/rdc/rdc_stub.h deleted file mode 100644 index 19b71eb4bf..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_stub.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_STUB_H -#define _RDC_STUB_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -extern void rdcstub_dispatch(struct svc_req *, SVCXPRT *); -extern void rdcstub_set_dispatch(void (*)(struct svc_req *, SVCXPRT *)); -extern void rdcstub_unset_dispatch(); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_STUB_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_subr.c b/usr/src/uts/common/avs/ns/rdc/rdc_subr.c deleted file mode 100644 index de5e1dd50a..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_subr.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/errno.h> -#include <sys/debug.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/errno.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> -#include "rdc_io.h" -#include "rdc_ioctl.h" -#include "rdc_prot.h" - -/* - * Initialize a netbuf suitable for - * describing an address - */ - -void -init_rdc_netbuf(struct netbuf *nbuf) -{ - nbuf->buf = kmem_zalloc(RDC_MAXADDR, KM_SLEEP); - nbuf->maxlen = RDC_MAXADDR; - nbuf->len = 0; -} - -/* - * Free a netbuf - */ - -void -free_rdc_netbuf(struct netbuf *nbuf) -{ - if (!(nbuf) || !(nbuf->buf)) { -#ifdef DEBUG - cmn_err(CE_PANIC, "Null netbuf in free_rdc_netbuf"); -#endif - return; - } - kmem_free(nbuf->buf, nbuf->maxlen); - nbuf->buf = NULL; - nbuf->maxlen = 0; - nbuf->len = 0; -} - - -/* - * Duplicate a netbuf, must be followed by a free_rdc_netbuf(). - */ -void -dup_rdc_netbuf(const struct netbuf *from, struct netbuf *to) -{ - init_rdc_netbuf(to); - to->len = from->len; - - if (from->len > to->maxlen) { - cmn_err(CE_WARN, "!dup_rdc_netbuf: from->len %d, to->maxlen %d", - from->len, to->maxlen); - } - - bcopy(from->buf, to->buf, (size_t)from->len); -} - - -#ifdef DEBUG -void -rdc_print_svinfo(rdc_srv_t *svp, char *str) -{ - int i; - - if (svp == NULL) - return; - - cmn_err(CE_NOTE, "!rdc %s servinfo: %p\n", str, (void *) svp); - - if (svp->ri_knconf != NULL) { - cmn_err(CE_NOTE, "!knconf: semantics %d", - svp->ri_knconf->knc_semantics); - cmn_err(CE_NOTE, "! protofmly %s", - svp->ri_knconf->knc_protofmly); - cmn_err(CE_NOTE, "! proto %s", - svp->ri_knconf->knc_proto); - cmn_err(CE_NOTE, "! rdev %lx", - svp->ri_knconf->knc_rdev); - } - - for (i = 0; i < svp->ri_addr.len; i++) - printf("%u ", svp->ri_addr.buf[i]); - - cmn_err(CE_NOTE, "!\naddr: len %d buf %p\n", - svp->ri_addr.len, (void *) svp->ri_addr.buf); - cmn_err(CE_NOTE, "!host: %s\n", svp->ri_hostname); -} -#endif /* DEBUG */ - -/* - * Initialize an rdc servinfo - * Contains all the protocol we need to do a client rpc - * A chain of rdc_srv_t indicates a one to many - */ - -rdc_srv_t * -rdc_create_svinfo(char *host, struct netbuf *svaddr, struct knetconfig *conf) -{ - rdc_srv_t *nvp; - int hlen = strlen(host) + 1; - - if (conf == NULL) { - return (NULL); - } - - if (host == NULL) { - return (NULL); - } - - nvp = kmem_zalloc(sizeof (*nvp), KM_SLEEP); - nvp->ri_knconf = kmem_alloc(sizeof (*nvp->ri_knconf), KM_SLEEP); - nvp->ri_hostname = kmem_zalloc(hlen, KM_SLEEP); - - if (nvp == NULL || nvp->ri_hostname == NULL || nvp->ri_knconf == NULL) { - rdc_destroy_svinfo(nvp); - return (NULL); - } - - nvp->ri_hostnamelen = hlen; - - bcopy((void *)conf, (void *)nvp->ri_knconf, sizeof (*nvp->ri_knconf)); - nvp->ri_knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE + 1, KM_SLEEP); - nvp->ri_knconf->knc_proto = kmem_zalloc(KNC_STRSIZE + 1, KM_SLEEP); - - if (nvp->ri_knconf->knc_protofmly == NULL || - nvp->ri_knconf->knc_proto == NULL) { - rdc_destroy_svinfo(nvp); - return (NULL); - - } - - (void) strncpy(nvp->ri_knconf->knc_protofmly, conf->knc_protofmly, - KNC_STRSIZE); - (void) strncpy(nvp->ri_knconf->knc_proto, conf->knc_proto, KNC_STRSIZE); - - dup_rdc_netbuf(svaddr, &nvp->ri_addr); - - nvp->ri_secdata = NULL; /* For now */ - (void) strncpy(nvp->ri_hostname, host, hlen); -#ifdef DEBUG_IP - rdc_print_svinfo(nvp, "!create"); -#endif - return (nvp); -} - -void -rdc_destroy_svinfo(rdc_srv_t *svp) -{ - if (svp == NULL) - return; - - if (svp->ri_addr.buf && svp->ri_addr.maxlen) - free_rdc_netbuf(&(svp->ri_addr)); - - if (svp->ri_knconf->knc_protofmly) - kmem_free(svp->ri_knconf->knc_protofmly, KNC_STRSIZE + 1); - - if (svp->ri_knconf->knc_proto) - kmem_free(svp->ri_knconf->knc_proto, KNC_STRSIZE + 1); - - if (svp->ri_knconf) - kmem_free(svp->ri_knconf, sizeof (*svp->ri_knconf)); - - kmem_free(svp, sizeof (*svp)); -} - -/* - * rdc_netbuf_toint - * Returns oldsytle ipv4 RDC ver 3 addresses for RPC protocol from netbuf - * Note: This would never be called in the case of IPv6 and a program - * mismatch ie ver 3 to ver 4 - */ -int -rdc_netbuf_toint(struct netbuf *nb) -{ - int ret; - if (nb->len > RDC_MAXADDR) - cmn_err(CE_NOTE, "!rdc_netbuf_toint: bad size %d", nb->len); - - switch (nb->len) { - case 4: - bcopy(nb->buf, (char *)&ret, sizeof (int)); - return (ret); - - case 8: - case 16: - case 32: - bcopy(&nb->buf[4], (char *)&ret, sizeof (int)); - return (ret); - - default: - cmn_err(CE_NOTE, "!rdc_netbuf_toint: size %d", nb->len); - } - return (0); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_svc.c b/usr/src/uts/common/avs/ns/rdc/rdc_svc.c deleted file mode 100644 index ea1425055d..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_svc.c +++ /dev/null @@ -1,3079 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * kRPC Server for sndr - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/stream.h> -#include <sys/errno.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#endif -#include <sys/ddi.h> -#include <sys/nsc_thread.h> -#ifdef DS_DDICT -#include <sys/nsctl/contract.h> -#endif -#include <sys/nsctl/nsctl.h> -#include <sys/ncall/ncall.h> - -#include <sys/sdt.h> /* dtrace is S10 or later */ - -#include "rdc_io.h" -#include "rdc_bitmap.h" -#include "rdcsrv.h" - -static rdc_sleepq_t *rdc_newsleepq(); -static void rdc_delsleepq(rdc_sleepq_t *); -static int rdc_sleepq(rdc_group_t *, rdc_sleepq_t *); -static int rdc_combywrite(rdc_k_info_t *, nsc_buf_t *); -static int rdc_writemaxfba(rdc_k_info_t *, rdc_u_info_t *, - rdc_net_dataset_t *, uint_t, int); -static void rdc_setbitind(int *, net_pendvec_t *, rdc_net_dataset_t *, uint_t, - int, int); -static void rdc_dopending(rdc_group_t *, netwriteres *); -static nsc_vec_t *rdc_dset2vec(rdc_net_dataset_t *); -static int rdc_combyread(rdc_k_info_t *, rdc_u_info_t *, nsc_buf_t *); -static int rdc_readmaxfba(int, nsc_off_t, nsc_size_t, int); -static int rdc_dsetcopy(rdc_net_dataset_t *, nsc_vec_t *, nsc_off_t, nsc_size_t, - char *, int, int); - -/* direction for dsetcopy() */ -#define COPY_IN 1 /* copy data into the rpc buffer */ -#define COPY_OUT 2 /* copy data out of the rpc buffer */ - -#define MAX_EINTR_COUNT 1000 - -static int rdc_rread_slow; -static rdcsrv_t rdc_srvtab[]; - -#ifdef DEBUG -static int rdc_netwrite6; -static int rdc_stall0; -static int rdc_sleepcnt; -int rdc_datasetcnt; -#endif - - -int -_rdc_sync_event_notify(int operation, char *volume, char *group) -{ - int ack = 0; - clock_t time; - - mutex_enter(&rdc_sync_mutex); - mutex_enter(&rdc_sync_event.mutex); - - if (rdc_sync_event.daemon_waiting) { - rdc_sync_event.daemon_waiting = 0; - rdc_sync_event.event = operation; - (void) strncpy(rdc_sync_event.master, volume, NSC_MAXPATH); - (void) strncpy(rdc_sync_event.group, group, NSC_MAXPATH); - - cv_signal(&rdc_sync_event.cv); - - rdc_sync_event.kernel_waiting = 1; - time = cv_reltimedwait_sig(&rdc_sync_event.done_cv, - &rdc_sync_event.mutex, rdc_sync_event_timeout, - TR_CLOCK_TICK); - if (time == (clock_t)0 || time == (clock_t)-1) { - /* signalled or timed out */ - ack = 0; - } else { - if (rdc_sync_event.ack) - ack = 1; - else - ack = -1; - } - } - mutex_exit(&rdc_sync_event.mutex); - mutex_exit(&rdc_sync_mutex); - return (ack); -} - - -int -_rdc_sync_event_wait(void *arg0, void *arg1, int mode, spcs_s_info_t kstatus, - int *rvp) -{ - int rc = 0; - static char master[NSC_MAXPATH]; - - master[0] = '\0'; - *rvp = 0; - if (ddi_copyin(arg0, master, NSC_MAXPATH, mode)) - return (EFAULT); - - mutex_enter(&rdc_sync_event.mutex); - - if (rdc_sync_event.kernel_waiting && - (rdc_sync_event.lbolt - nsc_lbolt() < rdc_sync_event_timeout)) { - /* We haven't been away too long */ - if (master[0]) - rdc_sync_event.ack = 1; - else - rdc_sync_event.ack = 0; - rdc_sync_event.kernel_waiting = 0; - cv_signal(&rdc_sync_event.done_cv); - } - - rdc_sync_event.daemon_waiting = 1; - if (cv_wait_sig(&rdc_sync_event.cv, &rdc_sync_event.mutex) == 0) { - rdc_sync_event.daemon_waiting = 0; - rc = EAGAIN; - spcs_s_add(kstatus, rc); - } else { - (void) ddi_copyout(rdc_sync_event.master, arg0, NSC_MAXPATH, - mode); - (void) ddi_copyout(rdc_sync_event.group, arg1, NSC_MAXPATH, - mode); - *rvp = rdc_sync_event.event; - } - rdc_sync_event.lbolt = nsc_lbolt(); - mutex_exit(&rdc_sync_event.mutex); - - return (rc); -} - - -static int -rdc_allow_sec_sync(rdc_u_info_t *urdc, int option) -{ - rdc_k_info_t *krdc = &rdc_k_info[urdc->index]; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - if (!IS_MULTI(krdc)) - return (0); - - rdc_many_enter(krdc); - - krdc = krdc->multi_next; - urdc = &rdc_u_info[krdc->index]; - - if (!IS_ENABLED(urdc)) { - rdc_many_exit(krdc); - return (0); - } - - if (option == CCIO_RSYNC) { - - /* Reverse sync */ - - if (rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED) { - /* - * Reverse sync needed or in progress. - */ - rdc_many_exit(krdc); - return (-1); - } - } else { - ASSERT(option == CCIO_SLAVE); - - /* Forward sync */ - - if (rdc_get_mflags(urdc) & RDC_SLAVE) { - /* - * Reverse syncing is bad, as that means that data - * is already flowing to the target of the requested - * sync operation. - */ - rdc_many_exit(krdc); - return (-1); - } - - /* - * Clear "reverse sync needed" on all 1-many volumes. - * The data on them will be updated from the primary of this - * requested sync operation, so the aborted reverse sync need - * not be completed. - */ - - if ((rdc_get_mflags(urdc) & RDC_RSYNC_NEEDED) || - (rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_clr_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_write_state(urdc); - } - if (IS_MANY(krdc)) { - for (ktmp = krdc->many_next; ktmp != krdc; - ktmp = ktmp->many_next) { - utmp = &rdc_u_info[ktmp->index]; - if (!IS_ENABLED(utmp)) - continue; - if (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED) { - rdc_clr_mflags(utmp, RDC_RSYNC_NEEDED); - rdc_write_state(utmp); - } - } - } - } - - rdc_many_exit(krdc); - - return (0); -} - - -/* - * r_net_null - * Proc 0 Null action - */ -static void -r_net_null(SVCXPRT *xprt) -{ - (void) svc_sendreply(xprt, xdr_void, 0); -} - -/* - * r_net_read - */ -static void -r_net_read(SVCXPRT *xprt) -{ - readres resp; - rdc_u_info_t *urdc; - struct rread diskio; - char *buffer = NULL; - uchar_t *sv_addr; - nsc_vec_t *vec; - int pos, st; - int nocache; - int sv_len; - nsc_vec_t *vector = NULL; - rdc_net_dataset_t *dset = NULL; - int vecsz = 0; - - st = SVC_GETARGS(xprt, xdr_rread, (char *)&diskio); - if (!st) { - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - nocache = (diskio.flag & RDC_RREAD_FAIL) ? 0 : NSC_NOCACHE; - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_read: EPROTO cd out or not enabled"); -#endif - return; - } - - urdc = &rdc_u_info[diskio.cd]; - - if (diskio.flag & RDC_RREAD_START) { - /* setup rpc */ - if (!IS_ENABLED(urdc)) { - st = 0; - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - st = rdc_readmaxfba(diskio.cd, diskio.pos, diskio.len, - nocache); - - if (!svc_sendreply(xprt, xdr_int, (char *)&st)) { - if (st != 0) { - rdc_net_dataset_t *dset; - if (dset = rdc_net_get_set(diskio.cd, st)) { - rdc_net_del_set(diskio.cd, dset); - } else { - cmn_err(CE_NOTE, "!r_net_read: get_set " - "has failed in cleanup"); - } - } - } - return; - } - - /* data rpc */ - -#ifdef DEBUG - if ((diskio.flag & RDC_RREAD_DATA) == 0) { - cmn_err(CE_WARN, "!r_net_read: received non-DATA rpc! flag %x", - diskio.flag); - } -#endif - - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset) { - vector = rdc_dset2vec(dset); - } - if (vector == NULL) { - resp.rr_status = RDCERR_NOMEM; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - vecsz = (dset->nitems + 1) * sizeof (nsc_vec_t); - - if (!IS_ENABLED(urdc)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - resp.rr_status = RDC_OK; - - /* find place in vector */ - vec = vector; - pos = diskio.pos - dset->pos; - - for (; pos >= FBA_NUM(vec->sv_len); vec++) - pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(pos); - sv_len = vec->sv_len - FBA_SIZE(pos); - - /* - * IF the data is in a single sb_vec entry - * THEN - * we can just point to that - * ELSE - * we have to alloc a local buffer, - * copy the data in and the point to - * the local buffer. - */ - - if (sv_len >= FBA_SIZE(diskio.len)) { - /* fast */ - resp.rr_data = (char *)sv_addr; - resp.rr_bufsize = FBA_SIZE(diskio.len); - } else { - /* slow */ - rdc_rread_slow++; /* rough count */ - resp.rr_bufsize = FBA_SIZE(diskio.len); - buffer = kmem_alloc(resp.rr_bufsize, KM_NOSLEEP); - if (!buffer) { - resp.rr_status = RDCERR_NOMEM; - } else { - resp.rr_data = buffer; - if (!rdc_dsetcopy(dset, vector, diskio.pos, diskio.len, - resp.rr_data, resp.rr_bufsize, COPY_IN)) { - resp.rr_status = RDCERR_NOMEM; /* ??? */ - } - } - } - - st = svc_sendreply(xprt, xdr_readres, (char *)&resp); /* send data */ - -cleanup: - - if (dset) { - if (!st || - (diskio.flag & RDC_RREAD_END) || - (resp.rr_status != RDC_OK)) { - /* - * RPC reply failed, OR - * Last RPC for this IO operation, OR - * We are failing this IO operation. - * - * Do cleanup. - */ - rdc_net_del_set(diskio.cd, dset); - } else { - rdc_net_put_set(diskio.cd, dset); - } - } - - if (buffer) - kmem_free(buffer, resp.rr_bufsize); - if (vector) { - kmem_free(vector, vecsz); - RDC_DSMEMUSE(-vecsz); - } -} - -/* - * r_net_read (v6) - */ -static void -r_net_read6(SVCXPRT *xprt) -{ - readres resp; - rdc_u_info_t *urdc; - struct rread6 diskio; - char *buffer = NULL; - uchar_t *sv_addr; - nsc_vec_t *vec; - int pos, st; - int nocache; - int sv_len; - nsc_vec_t *vector = NULL; - rdc_net_dataset_t *dset = NULL; - int vecsz = 0; - - st = SVC_GETARGS(xprt, xdr_rread6, (char *)&diskio); - if (!st) { - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - nocache = (diskio.flag & RDC_RREAD_FAIL) ? 0 : NSC_NOCACHE; - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_read6: EPROTO cd out or not enabled"); -#endif - return; - } - - urdc = &rdc_u_info[diskio.cd]; - - if (diskio.flag & RDC_RREAD_START) { - /* setup rpc */ - if (!IS_ENABLED(urdc)) { - st = 0; - (void) svc_sendreply(xprt, xdr_int, (char *)&st); - return; - } - st = rdc_readmaxfba(diskio.cd, diskio.pos, diskio.len, - nocache); - - if (!svc_sendreply(xprt, xdr_int, (char *)&st)) { - if (st != 0) { - rdc_net_dataset_t *dset; - if (dset = rdc_net_get_set(diskio.cd, st)) { - rdc_net_del_set(diskio.cd, dset); - } else { - cmn_err(CE_NOTE, "!read6: get_set " - "has failed in cleanup"); - } - } - } - return; - } - - /* data rpc */ - -#ifdef DEBUG - if ((diskio.flag & RDC_RREAD_DATA) == 0) { - cmn_err(CE_WARN, "!read6: received non-DATA rpc! flag %x", - diskio.flag); - } -#endif - - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset) { - vector = rdc_dset2vec(dset); - } - if (vector == NULL) { - resp.rr_status = RDCERR_NOMEM; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - vecsz = (dset->nitems + 1) * sizeof (nsc_vec_t); - - if (!IS_ENABLED(urdc)) { - resp.rr_status = RDCERR_NOENT; - (void) svc_sendreply(xprt, xdr_readres, (char *)&resp); - goto cleanup; - } - resp.rr_status = RDC_OK; - - /* find place in vector */ - vec = vector; - pos = diskio.pos - dset->pos; - - for (; pos >= FBA_NUM(vec->sv_len); vec++) - pos -= FBA_NUM(vec->sv_len); - - sv_addr = vec->sv_addr + FBA_SIZE(pos); - sv_len = vec->sv_len - FBA_SIZE(pos); - - /* - * IF the data is in a single sb_vec entry - * THEN - * we can just point to that - * ELSE - * we have to alloc a local buffer, - * copy the data in and the point to - * the local buffer. - */ - - if (sv_len >= FBA_SIZE(diskio.len)) { - /* fast */ - resp.rr_data = (char *)sv_addr; - resp.rr_bufsize = FBA_SIZE(diskio.len); - } else { - /* slow */ - rdc_rread_slow++; /* rough count */ - resp.rr_bufsize = FBA_SIZE(diskio.len); - buffer = kmem_alloc(resp.rr_bufsize, KM_NOSLEEP); - if (!buffer) { - resp.rr_status = RDCERR_NOMEM; - } else { - resp.rr_data = buffer; - if (!rdc_dsetcopy(dset, vector, diskio.pos, diskio.len, - resp.rr_data, resp.rr_bufsize, COPY_IN)) { - resp.rr_status = RDCERR_NOMEM; /* ??? */ - } - } - } - - st = svc_sendreply(xprt, xdr_readres, (char *)&resp); /* send data */ - -cleanup: - - if (dset) { - if (!st || - (diskio.flag & RDC_RREAD_END) || - (resp.rr_status != RDC_OK)) { - /* - * RPC reply failed, OR - * Last RPC for this IO operation, OR - * We are failing this IO operation. - * - * Do cleanup. - */ - rdc_net_del_set(diskio.cd, dset); - } else { - rdc_net_put_set(diskio.cd, dset); - } - } - - if (buffer) - kmem_free(buffer, resp.rr_bufsize); - if (vector) { - kmem_free(vector, vecsz); - RDC_DSMEMUSE(-vecsz); - } -} - -/* - * r_net_write (Version 5) - * 0 reply indicates error - * >0 reply indicates a net handle index - * <0 reply indicates errno - * ret net handle index - * ret2 general error - * ret3 multi-hop errors (never returned) - */ -static void -r_net_write5(SVCXPRT *xprt) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct net_data5 diskio; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int nocache; - int ret = 0; - int ret2 = 0; - int st; - - krdc = NULL; - diskio.data.data_val = kmem_alloc(RDC_MAXDATA, KM_NOSLEEP); - - if (!diskio.data.data_val) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(RDC_MAXDATA); - st = SVC_GETARGS(xprt, xdr_net_data5, (char *)&diskio); - if (!st) { - ret2 = ENOMEM; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write5:SVC_GETARGS failed: st %d", st); -#endif - goto out; - } - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: EPROTO cd out or not enabled"); -#endif - goto out; - } - - nocache = (diskio.flag & RDC_RWRITE_FAIL) ? 0 : NSC_NOCACHE; - krdc = &rdc_k_info[diskio.cd]; - urdc = &rdc_u_info[diskio.cd]; - - if (!IS_ENABLED(urdc) || IS_STATE(urdc, RDC_LOGGING)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: cd logging / not enabled (%x)", - rdc_get_vflags(urdc)); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - - /* -1 index says allocate a buffer */ - if (diskio.idx < 0) { - dset = rdc_net_add_set(diskio.cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write5: " - "failed to add dataset"); -#endif - ret2 = EIO; - goto out; - } else { - ret = dset->id; - dset->pos = diskio.pos; - dset->fbalen = diskio.len; - diskio.idx = ret; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - /* - * If this is a single transfer, then we don't - * need to allocate any memory for the data, - * just point the ditem data pointer to the - * existing buffer. - */ - ditem->next = NULL; - if (diskio.endoblk) { - ditem->dptr = diskio.data.data_val; - /* - * So we don't free it twice. - */ - diskio.data.data_val = NULL; - ditem->len = diskio.data.data_len; - ditem->mlen = RDC_MAXDATA; - } else { - /* - * Allocate the memory for the complete - * transfer. - */ - ditem->dptr = kmem_alloc(FBA_SIZE(diskio.len), - KM_NOSLEEP); - if (ditem->dptr == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(FBA_SIZE(diskio.len)); - ditem->len = FBA_SIZE(diskio.len); - ditem->mlen = ditem->len; - - /* - * Copy the data to the new buffer. - */ - ASSERT(diskio.data.data_len == FBA_SIZE(diskio.nfba)); - bcopy(diskio.data.data_val, ditem->dptr, - diskio.data.data_len); - /* - * free the old data buffer. - */ - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - dset->head = ditem; - dset->tail = ditem; - dset->nitems++; - } else { - ret = diskio.idx; - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset == NULL) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write5: net_get_set failed cd %d idx %d", - diskio.cd, diskio.idx); -#endif - goto out; - } - /* - * We have to copy the data from the rpc buffer - * to the data in ditem. - */ - ditem = dset->head; - bcopy(diskio.data.data_val, (char *)ditem->dptr + - FBA_SIZE(diskio.sfba - diskio.pos), diskio.data.data_len); - - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - ASSERT(dset); - - if (diskio.endoblk) { - ret2 = rdc_writemaxfba(krdc, urdc, dset, diskio.seq, nocache); - rdc_net_del_set(diskio.cd, dset); - dset = NULL; - } -out: - if (!RDC_SUCCESS(ret2)) { - if (ret2 > 0) - ret2 = -ret2; - DTRACE_PROBE1(rdc_svcwrite5_err_ret2, int, ret2); - st = svc_sendreply(xprt, xdr_int, (char *)&ret2); - } else - st = svc_sendreply(xprt, xdr_int, (char *)&ret); - - if (krdc && krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /* - * On Error we must cleanup. - * If we have a handle, free it. - * If we have a network handle, free it. - */ - if (!st || !RDC_SUCCESS(ret2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_write5 error case? st %x ret %d", - st, ret2); -#endif - if (dset) { - rdc_net_del_set(diskio.cd, dset); - } - - } else { - if (dset) { - rdc_net_put_set(diskio.cd, dset); - } - } - if (diskio.data.data_val) { - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - } -} - -/* - * r_net_write (Version 6) - * index 0 = error, or net handle index. - * result = 0 , ok. - * result = 1, pending write. - * result < 0 error, and is the -errno. - * ret net handle index. - * ret2 general error. - */ -static void -r_net_write6(SVCXPRT *xprt) -{ - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_group_t *group; - struct net_data6 diskio; - struct netwriteres netret; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int ret = 0; - int ret2 = 0; - int st; - int nocache; - - netret.vecdata.vecdata_val = NULL; - netret.vecdata.vecdata_len = 0; - dset = NULL; - krdc = NULL; - diskio.data.data_val = kmem_alloc(RDC_MAXDATA, KM_NOSLEEP); - - if (!diskio.data.data_val) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(RDC_MAXDATA); - st = SVC_GETARGS(xprt, xdr_net_data6, (char *)&diskio); - if (!st) { - ret2 = ENOMEM; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6:SVC_GETARGS failed: st %d", st); -#endif - goto out; - } - - if ((diskio.cd >= rdc_max_sets) || (diskio.cd < 0)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_write6: EPROTO cd out or not enabled"); -#endif - goto out; - } - - nocache = (diskio.flag & RDC_RWRITE_FAIL) ? 0 : NSC_NOCACHE; - netret.seq = diskio.seq; - - krdc = &rdc_k_info[diskio.cd]; - urdc = &rdc_u_info[diskio.cd]; - - if (!IS_ENABLED(urdc) || IS_STATE(urdc, RDC_LOGGING)) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: cd logging or not enabled (%x)", - rdc_get_vflags(urdc)); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - - group = krdc->group; - if (group == NULL) { - ret2 = EIO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: No group structure for set %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - krdc = NULL; /* so we don't try to unqueue kstat entry */ - goto out; - } - -#ifdef DEBUG - if (rdc_netwrite6) { - cmn_err(CE_NOTE, - "!r_net_write6: idx %d seq %u current seq %u pos %llu " - "len %d sfba %llu nfba %d endoblk %d", - diskio.idx, diskio.seq, group->seq, - (unsigned long long)diskio.pos, diskio.len, - (unsigned long long)diskio.sfba, diskio.nfba, - diskio.endoblk); - } -#endif - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - /* -1 index says allocate a net dataset */ - if (diskio.idx < 0) { - dset = rdc_net_add_set(diskio.cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: failed to add dataset"); -#endif - ret2 = EIO; - goto out; - } else { - ret = dset->id; - dset->pos = (nsc_off_t)diskio.pos; /* 64bit! */ - dset->fbalen = diskio.len; - diskio.idx = ret; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - /* - * If this is a single transfer, then we don't - * need to allocate any memory for the data, - * just point the ditem data pointer to the - * existing buffer. - */ - ditem->next = NULL; - if (diskio.endoblk) { - ditem->dptr = diskio.data.data_val; - /* - * So we don't free it twice. - */ - diskio.data.data_val = NULL; - ditem->len = diskio.data.data_len; - ditem->mlen = RDC_MAXDATA; - } else { - /* - * Allocate the memory for the complete - * transfer. - */ - ditem->dptr = kmem_alloc(FBA_SIZE(diskio.len), - KM_NOSLEEP); - if (ditem->dptr == NULL) { - ret2 = ENOMEM; - goto out; - } - RDC_DSMEMUSE(FBA_SIZE(diskio.len)); - ditem->len = FBA_SIZE(diskio.len); - ditem->mlen = ditem->len; - - /* - * Copy the data to the new buffer. - */ - ASSERT(diskio.data.data_len == FBA_SIZE(diskio.nfba)); - bcopy(diskio.data.data_val, ditem->dptr, - diskio.data.data_len); - /* - * free the old data buffer. - */ - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - dset->head = ditem; - dset->tail = ditem; - dset->nitems++; - } else { - ret = diskio.idx; - dset = rdc_net_get_set(diskio.cd, diskio.idx); - if (dset == NULL) { - ret2 = EPROTO; -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_write6: net_get_set failed cd %d idx %d " - "packet sequence %u expected seq %u", - diskio.cd, diskio.idx, diskio.seq, group->seq); -#endif - goto out; - } - /* - * We have to copy the data from the rpc buffer - * to the data in ditem. - */ - ditem = dset->head; - bcopy(diskio.data.data_val, (char *)ditem->dptr + - FBA_SIZE(diskio.sfba - diskio.pos), diskio.data.data_len); - - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - diskio.data.data_val = NULL; - } - ASSERT(dset); - - if (diskio.endoblk) { -#ifdef DEBUG - if (diskio.seq == (RDC_NEWSEQ + 1)) { - rdc_stallzero(2); - } -#endif - if (diskio.seq == RDC_NEWSEQ) { - /* - * magic marker, start of sequence. - */ - mutex_enter(&group->ra_queue.net_qlock); - /* - * see if some threads are stuck. - */ - if (group->sleepq) { - rdc_sleepqdiscard(group); - } - group->seqack = RDC_NEWSEQ; - mutex_exit(&group->ra_queue.net_qlock); - } - - if ((diskio.seq != RDC_NOSEQ) && (diskio.seq != RDC_NEWSEQ)) { - /* - * see if we are allowed through here to - * do the write, or if we have to q the - * request and send back a pending reply. - */ - mutex_enter(&group->ra_queue.net_qlock); - if (diskio.seq != group->seq) { - rdc_sleepq_t *sq; - int maxseq; - - /* - * Check that we have room. - */ - maxseq = group->seqack + RDC_MAXPENDQ + 1; - if (maxseq < group->seqack) { - /* - * skip magic values. - */ - maxseq += RDC_NEWSEQ + 1; - } - if (!RDC_INFRONT(diskio.seq, maxseq)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!net_write6: Queue " - "size %d exceeded seqack %u " - "this seq %u maxseq %u seq %u", - RDC_MAXPENDQ, group->seqack, - diskio.seq, maxseq, group->seq); -#endif - DTRACE_PROBE2(qsize_exceeded, int, diskio.seq, - int, maxseq); - if (!(rdc_get_vflags(urdc) & - RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_set_flags(urdc, - RDC_VOL_FAILED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - ret2 = EIO; - rdc_sleepqdiscard(group); - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - mutex_exit(&group->ra_queue.net_qlock); - goto out; - } - - sq = rdc_newsleepq(); - sq->seq = diskio.seq; - sq->sindex = diskio.cd; - sq->pindex = diskio.local_cd; - sq->idx = diskio.idx; - sq->qpos = diskio.qpos; - sq->nocache = nocache; - if (rdc_sleepq(group, sq)) { - ret2 = EIO; - group->seq = RDC_NEWSEQ; - group->seqack = RDC_NEWSEQ; - rdc_sleepqdiscard(group); - mutex_exit(&group->ra_queue.net_qlock); - goto out; - } - rdc_net_put_set(diskio.cd, dset); - dset = NULL; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_enter(KSTAT_IO_PTR(krdc-> - io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - mutex_exit(&group->ra_queue.net_qlock); - /* - * pending state. - */ - netret.result = 1; - netret.index = diskio.idx; - st = svc_sendreply(xprt, xdr_netwriteres, - (char *)&netret); - if (krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR( - krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - return; - } - mutex_exit(&group->ra_queue.net_qlock); - } - - ret2 = rdc_writemaxfba(krdc, urdc, dset, diskio.seq, nocache); - rdc_net_del_set(diskio.cd, dset); - dset = NULL; -#ifdef DEBUG - if (!RDC_SUCCESS(ret2)) { - cmn_err(CE_WARN, "!r_net_write6: writemaxfba failed %d", - ret2); - } -#endif - if (diskio.seq != RDC_NOSEQ) { - mutex_enter(&group->ra_queue.net_qlock); - group->seq = diskio.seq + 1; - if (group->seq < diskio.seq) - group->seq = RDC_NEWSEQ + 1; - if (group->sleepq && - (group->sleepq->seq == group->seq)) { - rdc_dopending(group, &netret); - } - group->seqack = group->seq; - mutex_exit(&group->ra_queue.net_qlock); - } - } -out: - if (!RDC_SUCCESS(ret2)) { - DTRACE_PROBE1(rdc_svcwrite6_err_ret2, int, ret2); - netret.result = -ret2; - } else { - netret.result = 0; - netret.index = ret; - } - st = svc_sendreply(xprt, xdr_netwriteres, (char *)&netret); - if (netret.vecdata.vecdata_val) { - kmem_free(netret.vecdata.vecdata_val, - netret.vecdata.vecdata_len * sizeof (net_pendvec_t)); - } - if (krdc && krdc->io_kstats && ret2 != ENOMEM) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - /* - * On Error we must cleanup. - * If we have a handle, free it. - * If we have a network handle, free it. - * If we hold the main nsc buffer, free it. - */ - if (!st || !RDC_SUCCESS(ret2)) { -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_write6 error st %x ret %d seq %u", - st, ret2, diskio.seq); -#endif - if (dset) { - rdc_net_del_set(diskio.cd, dset); - } - } else { - if (dset) { - rdc_net_put_set(diskio.cd, dset); - } - } - if (diskio.data.data_val) { - kmem_free(diskio.data.data_val, RDC_MAXDATA); - RDC_DSMEMUSE(-RDC_MAXDATA); - } -} - -/* - * r_net_ping4 - * - * received on the primary. - */ -static void -r_net_ping4(SVCXPRT *xprt, struct svc_req *req) -{ - struct rdc_ping6 ping; - int e, ret = 0; - rdc_if_t *ip; - - e = SVC_GETARGS(xprt, xdr_rdc_ping6, (char *)&ping); - if (e) { - mutex_enter(&rdc_ping_lock); - - /* update specified interface */ - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ping.p_ifaddr, ip->ifaddr.buf, - RDC_MAXADDR) == 0) && - (bcmp(ping.s_ifaddr, ip->r_ifaddr.buf, - RDC_MAXADDR) == 0)) { - ip->new_pulse++; - ip->deadness = 1; - - /* Update the rpc protocol version to use */ - - ip->rpc_version = req->rq_vers; - break; - } - } - - mutex_exit(&rdc_ping_lock); - } else { - svcerr_decode(xprt); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: couldn't get ping4 arguments"); -#endif - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_ping7 - * - * received on the primary. - */ -static void -r_net_ping7(SVCXPRT *xprt, struct svc_req *req) -{ - struct rdc_ping ping; - int e, ret = 0; - rdc_if_t *ip; - unsigned short *sp; - - bzero(&ping, sizeof (struct rdc_ping)); - e = SVC_GETARGS(xprt, xdr_rdc_ping, (char *)&ping); - if (e) { - sp = (unsigned short *)ping.p_ifaddr.buf; - *sp = ntohs(*sp); - sp = (unsigned short *)ping.s_ifaddr.buf; - *sp = ntohs(*sp); - mutex_enter(&rdc_ping_lock); - - /* update specified interface */ - - for (ip = rdc_if_top; ip; ip = ip->next) { - if ((bcmp(ping.p_ifaddr.buf, ip->ifaddr.buf, - ping.p_ifaddr.len) == 0) && - (bcmp(ping.s_ifaddr.buf, ip->r_ifaddr.buf, - ping.s_ifaddr.len) == 0)) { - ip->new_pulse++; - ip->deadness = 1; - - /* Update the rpc protocol version to use */ - - ip->rpc_version = req->rq_vers; - break; - } - } - - mutex_exit(&rdc_ping_lock); - } else { - svcerr_decode(xprt); -#ifdef DEBUG - cmn_err(CE_NOTE, "!SNDR: couldn't get ping7 arguments"); -#endif - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - - -/* - * r_net_bmap (v5) - * WARNING acts as both client and server - */ -static void -r_net_bmap(SVCXPRT *xprt) -{ - int e, ret = EINVAL; - struct bmap b; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct bmap6 b6; - - - e = SVC_GETARGS(xprt, xdr_bmap, (char *)&b); - if (e == TRUE) { - krdc = &rdc_k_info[b.cd]; - urdc = &rdc_u_info[b.cd]; - if (b.cd >= 0 && b.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - krdc->rpc_version = RDC_VERSION5; - b6.cd = b.cd; - b6.dual = b.dual; - b6.size = b.size; - ret = RDC_SEND_BITMAP(&b6); - } - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bmap (v6) - * WARNING acts as both client and server - */ -static void -r_net_bmap6(SVCXPRT *xprt) -{ - int e, ret = EINVAL; - struct bmap6 b; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - e = SVC_GETARGS(xprt, xdr_bmap6, (char *)&b); - if (e == TRUE) { - krdc = &rdc_k_info[b.cd]; - urdc = &rdc_u_info[b.cd]; - if (b.cd >= 0 && b.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - krdc->rpc_version = RDC_VERSION6; - ret = RDC_SEND_BITMAP(&b); - } - } - /* - * If the bitmap send has succeeded, clear it. - */ - if (ret == 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!Bitmap clear in r_net_bmap6"); -#endif - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bdata - */ -static void -r_net_bdata(SVCXPRT *xprt) -{ - struct net_bdata bd; - struct net_bdata6 bd6; - int e, ret = -1; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - /* - * We have to convert it to the internal form here, - * net_data6, when we know that we will have to convert - * it back to the v5 variant for transmission. - */ - - bd.data.data_val = kmem_alloc(BMAP_BLKSIZE, KM_NOSLEEP); - if (bd.data.data_val == NULL) - goto out; - - e = SVC_GETARGS(xprt, xdr_net_bdata, (char *)&bd); - if (e == TRUE) { - krdc = &rdc_k_info[bd.cd]; - urdc = &rdc_u_info[bd.cd]; - if (bd.cd >= 0 && bd.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) { - bd6.cd = bd.cd; - bd6.offset = bd.offset; - bd6.size = bd.size; - bd6.data.data_len = bd.data.data_len; - bd6.data.data_val = bd.data.data_val; - ret = RDC_OR_BITMAP(&bd6); - } - } - kmem_free(bd.data.data_val, BMAP_BLKSIZE); -out: - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_bdata v6 - */ -static void -r_net_bdata6(SVCXPRT *xprt) -{ - struct net_bdata6 bd; - int e, ret = -1; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - - /* - * just allocate the bigger block, regardless of < V7 - * bd.size will dictate how much we lor into our bitmap - * the other option would be write r_net_bdata7 that is identical - * to this function, but a V7 alloc. - */ - bd.data.data_val = kmem_alloc(BMAP_BLKSIZEV7, KM_NOSLEEP); - if (bd.data.data_val == NULL) - goto out; - - e = SVC_GETARGS(xprt, xdr_net_bdata6, (char *)&bd); - if (e == TRUE) { - krdc = &rdc_k_info[bd.cd]; - urdc = &rdc_u_info[bd.cd]; - if (bd.cd >= 0 && bd.cd < rdc_max_sets && IS_ENABLED(urdc) && - ((krdc->type_flag & RDC_DISABLEPEND) == 0)) - ret = RDC_OR_BITMAP(&bd); - } - /* - * Write the merged bitmap. - */ - if ((ret == 0) && bd.endoblk && (krdc->bitmap_write > 0)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!r_net_bdata6: Written bitmap for %s:%s", - urdc->secondary.intf, urdc->secondary.file); -#endif - ret = rdc_write_bitmap(krdc); - } - kmem_free(bd.data.data_val, BMAP_BLKSIZEV7); -out: - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getsize (v5) - */ -static void -r_net_getsize(SVCXPRT *xprt) -{ - int e, ret = -1, index; - rdc_k_info_t *krdc; - - e = SVC_GETARGS(xprt, xdr_int, (char *)&index); - if (e) { - krdc = &rdc_k_info[index]; - if (IS_VALID_INDEX(index) && ((krdc->type_flag & - RDC_DISABLEPEND) == 0)) - ret = mirror_getsize(index); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getsize (v6) - */ -static void -r_net_getsize6(SVCXPRT *xprt) -{ - int e, index; - rdc_k_info_t *krdc; - uint64_t ret; - - /* - * small change in semantics here, as we can't return - * -1 over the wire anymore. - */ - ret = 0; - - e = SVC_GETARGS(xprt, xdr_int, (char *)&index); - if (e) { - krdc = &rdc_k_info[index]; - if (IS_VALID_INDEX(index) && ((krdc->type_flag & - RDC_DISABLEPEND) == 0)) - ret = mirror_getsize(index); - } - (void) svc_sendreply(xprt, xdr_u_longlong_t, (char *)&ret); -} - - -/* - * r_net_state4 - */ -static void -r_net_state4(SVCXPRT *xprt) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - struct set_state4 state; - rdc_set_t rdc_set; - int e, index = -1; - int options; - int log = 0; - int done = 0; - int slave = 0; - int rev_sync = 0; - - e = SVC_GETARGS(xprt, xdr_set_state4, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - bcopy(state.netaddr, rdc_set.primary.addr.buf, - state.netaddrlen); - bcopy(state.rnetaddr, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - options = state.flag; - index = rdc_lookup_byaddr(&rdc_set); - - krdc = &rdc_k_info[index]; - - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!r_net_state: no index or disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: set not enabled "); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - if (krdc->lsrv == NULL) { - cmn_err(CE_NOTE, "!r_net_state: no valid svp\n"); - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - if (!krdc || !krdc->group) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_state: no valid krdc %p\n", (void*)krdc); -#endif - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - mutex_enter(&rdc_conf_lock); - if (krdc->type_flag & RDC_DISABLEPEND) { - mutex_exit(&rdc_conf_lock); - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->primary.addr), &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->secondary.addr), &(urdc->primary.addr), 0); - - if (options & CCIO_SLAVE) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting forward sync */ - if (urdc->volume_size == 0) - rdc_get_details(krdc); - if (urdc->volume_size == 0) { - index = -1; - goto out; - } - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - index = -1; - goto out; - } - } - if (rdc_allow_sec_sync(urdc, CCIO_SLAVE) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - slave = 1; - } else if (options & CCIO_RSYNC) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting reverse sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | - RDC_VOL_FAILED | RDC_BMP_FAILED)) { - index = -1; - goto out; - } - if (rdc_allow_sec_sync(urdc, CCIO_RSYNC) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - rev_sync = 1; - } else if (options & CCIO_DONE) { - /* Sync completed OK */ - if (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED) - done = 1; /* forward sync complete */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_clr_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_CLR_AFTERSYNC) { - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - } else if (options & CCIO_ENABLELOG) { - /* Sync aborted or logging started */ - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - log = 1; - } -out: - rdc_group_exit(krdc); - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - - if (slave) { - if (_rdc_sync_event_notify(RDC_SYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_set_flags(urdc, - RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (rev_sync) { - /* Check to see if volume is mounted */ - if (_rdc_sync_event_notify(RDC_RSYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (done) { - - /* - * special case... - * if this set is in a group, then sndrsyncd will - * make sure that all sets in the group are REP - * before updating the config to "update", telling - * sndrsyncd that it is ok to take anther snapshot - * on a following sync. The important part about - * the whole thing is that syncd needs kernel stats. - * however, this thread must set the set busy to - * avoid disables. since this is the only - * sync_event_notify() that will cause a status - * call back into the kernel, and we will not be - * accessing the group structure, we have to wakeup now - */ - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->secondary.file, urdc->group_name); - } - } - - if (!done) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - if (log) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_OTHERREMOTE, - "Sync aborted or logging started"); - rdc_group_exit(krdc); - } -} - - -/* - * r_net_state - */ -static void -r_net_state(SVCXPRT *xprt) -{ - rdc_u_info_t *urdc; - rdc_k_info_t *krdc; - struct set_state state; - rdc_set_t rdc_set; - int e, index = -1; - int options; - int log = 0; - int done = 0; - int slave = 0; - int rev_sync = 0; - unsigned short *sp; - - bzero(&state, sizeof (struct set_state)); - e = SVC_GETARGS(xprt, xdr_set_state, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - sp = (unsigned short *)(state.netaddr.buf); - *sp = ntohs(*sp); - bcopy(state.netaddr.buf, rdc_set.primary.addr.buf, - state.netaddrlen); - sp = (unsigned short *)(state.rnetaddr.buf); - *sp = ntohs(*sp); - bcopy(state.rnetaddr.buf, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - options = state.flag; - index = rdc_lookup_byaddr(&rdc_set); - - krdc = &rdc_k_info[index]; - - if (index < 0 || (krdc->type_flag & RDC_DISABLEPEND)) { -#ifdef DEBUG - cmn_err(CE_WARN, - "!r_net_state: no index or disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - urdc = &rdc_u_info[index]; - - if (!IS_ENABLED(urdc)) { - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: set not enabled "); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - if (krdc->lsrv == NULL) { - cmn_err(CE_NOTE, "!r_net_state: no valid svp\n"); - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - if (!krdc || !krdc->group) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!r_net_state: no valid krdc %p\n", (void*)krdc); -#endif - index = -1; - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - - mutex_enter(&rdc_conf_lock); - if (krdc->type_flag & RDC_DISABLEPEND) { - mutex_exit(&rdc_conf_lock); - index = -1; -#ifdef DEBUG - cmn_err(CE_WARN, "!r_net_state: disable pending"); -#endif - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - return; - } - set_busy(krdc); - mutex_exit(&rdc_conf_lock); - - rdc_group_enter(krdc); - - if (rdc_get_vflags(urdc) & RDC_PRIMARY) - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->primary.addr), &(urdc->secondary.addr), 1); - else - krdc->intf = rdc_add_to_if(krdc->lsrv, - &(urdc->secondary.addr), &(urdc->primary.addr), 0); - - if (options & CCIO_SLAVE) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting forward sync */ - if (urdc->volume_size == 0) - rdc_get_details(krdc); - if (urdc->volume_size == 0) { - index = -1; - goto out; - } - if (krdc->dcio_bitmap == NULL) { - if (rdc_resume_bitmap(krdc) < 0) { - index = -1; - goto out; - } - } - if (rdc_allow_sec_sync(urdc, CCIO_SLAVE) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - slave = 1; - } else if (options & CCIO_RSYNC) { - /* - * mark that the bitmap needs clearing. - */ - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - - /* Starting reverse sync */ - if (rdc_get_vflags(urdc) & (RDC_SYNC_NEEDED | - RDC_VOL_FAILED | RDC_BMP_FAILED)) { - index = -1; - goto out; - } - if (rdc_allow_sec_sync(urdc, CCIO_RSYNC) < 0) { - index = -1; - goto out; - } - rdc_dump_dsets(index); - rev_sync = 1; - } else if (options & CCIO_DONE) { - /* Sync completed OK */ - if (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED) - done = 1; /* forward sync complete */ - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_clr_mflags(urdc, RDC_SLAVE | RDC_RSYNC_NEEDED); - rdc_many_exit(krdc); - rdc_write_state(urdc); - if (rdc_get_vflags(urdc) & RDC_CLR_AFTERSYNC) { - RDC_ZERO_BITMAP(krdc); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_CLR_AFTERSYNC); - rdc_many_exit(krdc); - } - } else if (options & CCIO_ENABLELOG) { - /* Sync aborted or logging started */ - if (!(rdc_get_vflags(urdc) & RDC_PRIMARY)) { - rdc_clr_flags(urdc, RDC_SYNCING); - rdc_many_enter(krdc); - rdc_clr_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - } - log = 1; - } -out: - rdc_group_exit(krdc); - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - - if (slave) { - if (_rdc_sync_event_notify(RDC_SYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_many_enter(krdc); - rdc_clr_flags(urdc, RDC_VOL_FAILED); - rdc_set_flags(urdc, - RDC_SYNCING | RDC_SYNC_NEEDED); - rdc_set_mflags(urdc, RDC_SLAVE); - rdc_many_exit(krdc); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (rev_sync) { - /* Check to see if volume is mounted */ - if (_rdc_sync_event_notify(RDC_RSYNC_START, - urdc->secondary.file, urdc->group_name) >= 0) { - rdc_group_enter(krdc); - rdc_clr_flags(urdc, RDC_LOGGING); - rdc_set_flags(urdc, RDC_SYNCING); - rdc_write_state(urdc); - rdc_group_exit(krdc); - } else { - index = -1; - } - } else if (done) { - - /* - * special case... - * if this set is in a group, then sndrsyncd will - * make sure that all sets in the group are REP - * before updating the config to "update", telling - * sndrsyncd that it is ok to take anther snapshot - * on a following sync. The important part about - * the whole thing is that syncd needs kernel stats. - * however, this thread must set the set busy to - * avoid disables. since this is the only - * sync_event_notify() that will cause a status - * call back into the kernel, and we will not be - * accessing the group structure, we have to wakeup now - */ - - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - - (void) _rdc_sync_event_notify(RDC_SYNC_DONE, - urdc->secondary.file, urdc->group_name); - } - } - - if (!done) { - mutex_enter(&rdc_conf_lock); - wakeup_busy(krdc); - mutex_exit(&rdc_conf_lock); - } - - (void) svc_sendreply(xprt, xdr_int, (char *)&index); - if (log) { - rdc_group_enter(krdc); - rdc_group_log(krdc, RDC_NOFLUSH | RDC_OTHERREMOTE, - "Sync aborted or logging started"); - rdc_group_exit(krdc); - } - free_rdc_netbuf(&(state.netaddr)); - free_rdc_netbuf(&(state.rnetaddr)); -} - -/* - * r_net_getstate4 - * Return our state to client - */ -static void -r_net_getstate4(SVCXPRT *xprt, struct svc_req *req) -{ - int e, ret = -1, index = -1; - struct set_state4 state; - rdc_u_info_t *urdc; - rdc_set_t rdc_set; - - bzero(&state, sizeof (struct set_state)); - e = SVC_GETARGS(xprt, xdr_set_state4, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - bcopy(state.netaddr, rdc_set.primary.addr.buf, - state.netaddrlen); - bcopy(state.rnetaddr, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - (void) strncpy(rdc_set.primary.file, state.pfile, - RDC_MAXNAMLEN); - (void) strncpy(rdc_set.secondary.file, state.sfile, - RDC_MAXNAMLEN); - index = rdc_lookup_byaddr(&rdc_set); - if (index >= 0) { - urdc = &rdc_u_info[index]; - - ret = 0; - if (rdc_get_vflags(urdc) & RDC_SYNCING) - ret |= 4; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - ret |= 2; - if (rdc_get_vflags(urdc) & RDC_LOGGING) - ret |= 1; - rdc_set_if_vers(urdc, req->rq_vers); - } - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * r_net_getstate7 - * Return our state to client - */ -static void -r_net_getstate7(SVCXPRT *xprt, struct svc_req *req) -{ - int e, ret = -1, index = -1; - struct set_state state; - char pstr[RDC_MAXNAMLEN]; - char sstr[RDC_MAXNAMLEN]; - rdc_u_info_t *urdc; - rdc_set_t rdc_set; - unsigned short *sp; - - bzero(&state, sizeof (struct set_state)); - state.pfile = pstr; - state.sfile = sstr; - - e = SVC_GETARGS(xprt, xdr_set_state, (char *)&state); - if (e) { - init_rdc_netbuf(&(rdc_set.primary.addr)); - init_rdc_netbuf(&(rdc_set.secondary.addr)); - sp = (unsigned short *)(state.netaddr.buf); - *sp = ntohs(*sp); - bcopy(state.netaddr.buf, rdc_set.primary.addr.buf, - state.netaddrlen); - sp = (unsigned short *)(state.rnetaddr.buf); - *sp = ntohs(*sp); - bcopy(state.rnetaddr.buf, rdc_set.secondary.addr.buf, - state.rnetaddrlen); - rdc_set.primary.addr.len = state.netaddrlen; - rdc_set.secondary.addr.len = state.rnetaddrlen; - /* - * strncpy(rdc_set.primary.file, state.pfile, RDC_MAXNAMLEN); - * strncpy(rdc_set.secondary.file, state.sfile, RDC_MAXNAMLEN); - */ - bcopy(state.pfile, rdc_set.primary.file, RDC_MAXNAMLEN); - bcopy(state.sfile, rdc_set.secondary.file, RDC_MAXNAMLEN); - index = rdc_lookup_byaddr(&rdc_set); - if (index >= 0) { - urdc = &rdc_u_info[index]; - - ret = 0; - if (rdc_get_vflags(urdc) & RDC_SYNCING) - ret |= 4; - if (rdc_get_vflags(urdc) & RDC_SLAVE) - ret |= 2; - if (rdc_get_vflags(urdc) & RDC_LOGGING) - ret |= 1; - rdc_set_if_vers(urdc, req->rq_vers); - } - free_rdc_netbuf(&(rdc_set.primary.addr)); - free_rdc_netbuf(&(rdc_set.secondary.addr)); - } - (void) svc_sendreply(xprt, xdr_int, (char *)&ret); -} - -/* - * copy from/to a dset/vector combination to a network xdr buffer. - */ -static int -rdc_dsetcopy(rdc_net_dataset_t *dset, nsc_vec_t *invec, nsc_off_t fba_pos, - nsc_size_t fba_len, char *bdata, int blen, int dir) -{ - nsc_vec_t *vec; - uchar_t *sv_addr; - uchar_t *data; - int sv_len; - nsc_off_t fpos; - int len; - int n; - - if (!bdata || !dset || !invec) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: parameters failed bdata %p, dset %p " - "invec %p", (void *)bdata, (void *)dset, (void *)invec); -#endif - return (FALSE); - } - - if (fba_len > MAX_RDC_FBAS || - (dir != COPY_IN && dir != COPY_OUT)) { -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: params failed fba_len %" NSC_SZFMT - " fba_pos %" NSC_SZFMT ", dir %d", fba_len, fba_pos, dir); -#endif - return (FALSE); - } - - data = (uchar_t *)bdata; /* pointer to data in rpc */ - len = FBA_SIZE(fba_len); /* length of this transfer in bytes */ - fpos = fba_pos; /* start fba offset within buffer */ - - if (!len) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc: dsetcopy: len = 0"); -#endif - return (FALSE); - } - - if (len != blen) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc:dsetcopy: len %d != blen %d", len, blen); -#endif - if (len > blen) - len = blen; - } - - if (!RDC_DSET_LIMITS(dset, fba_pos, fba_len)) { - /* should never happen */ -#ifdef DEBUG - cmn_err(CE_NOTE, - "!rdc: dsetcopy: handle limits pos %" NSC_SZFMT " (%" - NSC_SZFMT ") len %" NSC_SZFMT " (%" NSC_SZFMT ")", - fba_pos, dset->pos, fba_len, dset->fbalen); -#endif - return (FALSE); /* Don't overrun handle */ - } - - vec = invec; - fpos -= dset->pos; - - /* find starting position in vector */ - - for (; fpos >= FBA_NUM(vec->sv_len); vec++) - fpos -= FBA_NUM(vec->sv_len); - - /* - * Copy data - */ - - sv_addr = vec->sv_addr + FBA_SIZE(fpos); - sv_len = vec->sv_len - FBA_SIZE(fpos); - - while (len) { - if (!sv_addr) /* end of vec - how did this happen? */ - break; - - n = min(sv_len, len); - - if (dir == COPY_OUT) - bcopy(data, sv_addr, (size_t)n); - else - bcopy(sv_addr, data, (size_t)n); - - sv_len -= n; - len -= n; - - sv_addr += n; - data += n; - - if (sv_len <= 0) { - /* goto next vector */ - vec++; - sv_addr = vec->sv_addr; - sv_len = vec->sv_len; - } - } - - return (TRUE); -} - - -/* - * rdc_start_server - * Starts the kRPC server for rdc. Uses tli file descriptor passed down - * from user level rdc server. - * - * Returns: 0 or errno (NOT unistat!). - */ -int -rdc_start_server(struct rdc_svc_args *args, int mode) -{ - file_t *fp; - int ret; - struct cred *cred; - STRUCT_HANDLE(rdc_svc_args, rs); - - STRUCT_SET_HANDLE(rs, mode, args); - cred = ddi_get_cred(); - if (drv_priv(cred) != 0) - return (EPERM); - fp = getf(STRUCT_FGET(rs, fd)); - if (fp == NULL) { -#ifdef DEBUG - cmn_err(CE_WARN, "!rdc_start_server fd %d, fp %p", args->fd, - (void *) fp); -#endif - return (EBADF); - } - - ret = rdcsrv_load(fp, rdc_srvtab, args, mode); - - releasef(STRUCT_FGET(rs, fd)); - return (ret); -} - -/* - * Allocate a new sleepq element. - */ - -static rdc_sleepq_t * -rdc_newsleepq() -{ - rdc_sleepq_t *sq; - - sq = kmem_alloc(sizeof (rdc_sleepq_t), KM_SLEEP); - sq->next = NULL; -#ifdef DEBUG - mutex_enter(&rdc_cntlock); - rdc_sleepcnt++; - mutex_exit(&rdc_cntlock); -#endif - return (sq); -} - -/* - * free memory/resources used by a sleepq element. - */ -static void -rdc_delsleepq(rdc_sleepq_t *sq) -{ - rdc_net_dataset_t *dset; - - if (sq->idx != -1) { - dset = rdc_net_get_set(sq->sindex, sq->idx); - if (dset) { - rdc_net_del_set(sq->sindex, dset); - } - } - kmem_free(sq, sizeof (rdc_sleepq_t)); -#ifdef DEBUG - mutex_enter(&rdc_cntlock); - rdc_sleepcnt--; - mutex_exit(&rdc_cntlock); -#endif -} - - -/* - * skip down the sleep q and insert the sleep request - * in ascending order. Return 0 on success, 1 on failure. - */ -static int -rdc_sleepq(rdc_group_t *group, rdc_sleepq_t *sq) -{ - rdc_sleepq_t *findsq; - - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - if (group->sleepq == NULL) { - group->sleepq = sq; - } else { - if (sq->seq == group->sleepq->seq) { - cmn_err(CE_WARN, "!rdc_sleepq: Attempt to " - "add duplicate request to queue %d", sq->seq); - return (1); - } - if (RDC_INFRONT(sq->seq, group->sleepq->seq)) { - sq->next = group->sleepq; - group->sleepq = sq; - } else { - findsq = group->sleepq; - - while (findsq->next) { - if (sq->seq == findsq->next->seq) { - cmn_err(CE_WARN, "!rdc_sleepq: " - "Attempt to add duplicate " - "request to queue %d", sq->seq); - return (1); - } - if (RDC_INFRONT(sq->seq, findsq->next->seq)) { - sq->next = findsq->next; - findsq->next = sq; - break; - } - findsq = findsq->next; - } - if (findsq->next == NULL) - findsq->next = sq; - } - } - return (0); -} - -/* - * run down the sleep q and discard all the sleepq elements. - */ -void -rdc_sleepqdiscard(rdc_group_t *group) -{ - rdc_sleepq_t *sq; - rdc_k_info_t *krdc; - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - sq = group->sleepq; - - while (sq) { - rdc_sleepq_t *dsq; - - dsq = sq; - krdc = &rdc_k_info[dsq->sindex]; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - sq = sq->next; - rdc_delsleepq(dsq); - } - group->sleepq = NULL; -} - -/* - * split any write requests down to maxfba sized chunks. - */ -/*ARGSUSED*/ -static int -rdc_writemaxfba(rdc_k_info_t *krdc, rdc_u_info_t *urdc, - rdc_net_dataset_t *dset, uint_t seq, int nocache) -{ - int len; - int ret; - nsc_vec_t vector[2]; - nsc_buf_t *handle; - int reserved; - int rtype; - nsc_size_t mfba; - nsc_size_t wsize; - nsc_off_t pos; - int eintr_count; - unsigned char *daddr; - int kstat_len; - - kstat_len = len = dset->fbalen; - ret = 0; - handle = NULL; - reserved = 0; - rtype = RDC_RAW; - - ASSERT(dset->nitems == 1); - - eintr_count = 0; - do { - ret = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (ret == EINTR) { - ++eintr_count; - delay(2); - } - } while ((ret == EINTR) && (eintr_count < MAX_EINTR_COUNT)); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: reserve devs " - "failed %d", ret); -#endif - goto out; - - } - reserved = 1; - /* - * Perhaps we should cache mfba. - */ - ret = nsc_maxfbas(RDC_U_FD(krdc), 0, &mfba); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: msc_maxfbas failed %d", - ret); -#endif - goto out; - } - - ASSERT(urdc->volume_size != 0); - if (dset->pos + len > urdc->volume_size) { - /* should never happen */ - /* - * also need to trim down the vector - * sizes. - */ - kstat_len = len = urdc->volume_size - dset->pos; - dset->head->len -= FBA_SIZE(len); - ASSERT(dset->head->len > 0); - } - daddr = dset->head->dptr; - pos = dset->pos; - vector[1].sv_addr = NULL; - vector[1].sv_len = 0; - - while (len > 0) { - wsize = min((nsc_size_t)len, mfba); - vector[0].sv_addr = daddr; - vector[0].sv_len = FBA_SIZE(wsize); - - if (handle) { - (void) nsc_free_buf(handle); - handle = NULL; - } - ret = nsc_alloc_buf(RDC_U_FD(krdc), pos, wsize, - NSC_WRBUF|NSC_NODATA|nocache, &handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: " - "nsc_alloc (d1) buf failed %d at " - "pos %" NSC_SZFMT " len %" NSC_SZFMT, - ret, pos, wsize); -#endif - goto out; - } - handle->sb_vec = &vector[0]; - ret = rdc_combywrite(krdc, handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!rdc_writemaxfba: " - "write failed (d1) %d offset %" NSC_SZFMT " " - "length %" NSC_SZFMT, ret, pos, wsize); -#endif - goto out; - } - pos += wsize; - len -= wsize; - daddr += FBA_SIZE(wsize); - } -out: - if (!RDC_SUCCESS(ret)) { - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - ASSERT(!(rdc_get_vflags(urdc) & - RDC_PRIMARY)); - rdc_many_enter(krdc); - rdc_set_flags(urdc, RDC_SYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "svc write failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - } else { - /* success */ -#ifdef DEBUG - if (rdc_netwrite6) { - /* - * This string is used in the ZatoIchi MASNDR - * tests, if you change this, update the test. - */ - cmn_err(CE_NOTE, "!writemaxfba: Write " - "sequence %u", seq); - } -#endif - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->writes++; - KSTAT_IO_PTR(krdc->io_kstats)->nwritten += - FBA_SIZE(kstat_len); - } - } - if (handle) - (void) nsc_free_buf(handle); - if (reserved) - _rdc_rlse_devs(krdc, rtype); - return (ret); -} - -static int -rdc_combywrite(rdc_k_info_t *krdc, nsc_buf_t *handle) -{ - int rsync; - int ret; - int multiret; - - rsync = -1; - ret = 0; - /* Handle multihop I/O even on error */ - if (IS_MULTI(krdc)) { - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - rdc_many_enter(krdc); - /* - * Find a target primary that is enabled, - * taking account of the fact that this - * could be a multihop secondary - * connected to a 1-to-many primary. - */ - ktmp = krdc->multi_next; - if (ktmp == NULL) { - rdc_many_exit(krdc); - goto multi_done; - } - utmp = &rdc_u_info[ktmp->index]; - do { - if ((rdc_get_vflags(utmp) & RDC_PRIMARY) - /* CSTYLED */ - && IS_ENABLED(utmp)) - break; - - ktmp = ktmp->many_next; - utmp = &rdc_u_info[ktmp->index]; - } while (ktmp != krdc->multi_next); - - if (!(rdc_get_vflags(utmp) & RDC_PRIMARY) || - !IS_ENABLED(utmp)) { - rdc_many_exit(krdc); - goto multi_done; - } - - rdc_many_exit(krdc); - rsync = (rdc_get_mflags(utmp) & RDC_SLAVE); - if (!rsync) { - /* normal case - local io first */ - ret = nsc_write(handle, handle->sb_pos, handle->sb_len, - 0); - } - multiret = _rdc_multi_write(handle, handle->sb_pos, - handle->sb_len, 0, ktmp); - if (!RDC_SUCCESS(multiret)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combywrite: " - "rdc_multi_write failed " - "status %d ret %d", - handle->sb_error, multiret); -#endif - if (!(rdc_get_vflags(utmp) & - RDC_VOL_FAILED)) { - rdc_many_enter(ktmp); - if (rdc_get_vflags(utmp) & - RDC_PRIMARY) { - rdc_set_mflags(utmp, - RDC_RSYNC_NEEDED); - } else { - rdc_set_flags(utmp, - RDC_SYNC_NEEDED); - } - rdc_set_flags(utmp, - RDC_VOL_FAILED); - rdc_many_exit(ktmp); - rdc_write_state(utmp); - } - } - } - -multi_done: - if (rsync != 0) { - /* - * Either: - * reverse sync in progress and so we - * need to do the local io after the - * (multihop) secondary io. - * Or: - * no multihop and this is the only io - * required. - */ - ret = nsc_write(handle, handle->sb_pos, handle->sb_len, 0); - - } - return (ret); -} -/* - * set the pos and len values in the piggyback reply. - */ -static void -rdc_setbitind(int *pendcnt, net_pendvec_t *pvec, rdc_net_dataset_t *dset, - uint_t seq, int pindex, int qpos) -{ - int pc; - ASSERT(*pendcnt < RDC_MAXPENDQ); - - pc = *pendcnt; - pvec[pc].seq = seq; - pvec[pc].apos = dset->pos; - pvec[pc].qpos = qpos; - pvec[pc].alen = dset->fbalen; - pvec[pc].pindex = pindex; - *pendcnt = pc + 1; - DTRACE_PROBE1(pvec_reply, int, seq); -} - -/* - * Enters with group->ra_queue.net_qlock held. - * Tries to construct the return status data for - * all the pending requests in the sleepq that it can - * satisfy. - */ -static void -rdc_dopending(rdc_group_t *group, netwriteres *netretp) -{ - int pendcnt; - net_pendvec_t *pendvec; - rdc_sleepq_t *sq; - int ret; - int pendsz; - - ASSERT(MUTEX_HELD(&group->ra_queue.net_qlock)); - - pendcnt = 0; - pendsz = RDC_MAXPENDQ * sizeof (net_pendvec_t); - pendvec = kmem_alloc(pendsz, KM_SLEEP); - - /* - * now look at the Q of pending tasks, attempt - * to write any that have been waiting for - * me to complete my write, and piggyback - * their results in my reply, by setiing pendcnt - * to the number of extra requests sucessfully - * processed. - */ - while (group->sleepq && group->sleepq->seq == group->seq) { - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - struct rdc_net_dataset *dset; - - sq = group->sleepq; - group->sleepq = sq->next; - mutex_exit(&group->ra_queue.net_qlock); - - krdc = &rdc_k_info[sq->sindex]; - urdc = &rdc_u_info[sq->sindex]; - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_waitq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - dset = rdc_net_get_set(sq->sindex, sq->idx); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!pending: %s:%s rdc_net_get_set " - "failed", urdc->secondary.intf, - urdc->secondary.file); -#endif - /* - * as we failed to get the pointer, there - * is no point expecting the cleanup - * code in rdc_delsleepq() to get it - * either. - */ - sq->idx = -1; - goto cleansq; - } - sq->idx = -1; /* marked as cleaned up */ - - ret = rdc_writemaxfba(krdc, urdc, dset, sq->seq, sq->nocache); - if (RDC_SUCCESS(ret)) { - rdc_setbitind(&pendcnt, pendvec, dset, - sq->seq, sq->pindex, sq->qpos); - } else { - cmn_err(CE_WARN, "!dopending: Write of pending " - "asynchronous task failed, with " - "sequence number %u for SNDR set %s:%s", - sq->seq, urdc->secondary.intf, - urdc->secondary.file); - } - rdc_net_del_set(sq->sindex, dset); -cleansq: - mutex_enter(&group->ra_queue.net_qlock); - group->seq = sq->seq + 1; - if (group->seq < sq->seq) - group->seq = RDC_NEWSEQ + 1; - rdc_delsleepq(sq); - } - mutex_exit(&group->ra_queue.net_qlock); - if (pendcnt) { - int vecsz; -#ifdef DEBUG - if (rdc_netwrite6) { - cmn_err(CE_NOTE, "!packing pend, count %d", pendcnt); - } -#endif - vecsz = pendcnt * sizeof (net_pendvec_t); - netretp->vecdata.vecdata_val = - kmem_alloc(vecsz, KM_SLEEP); - netretp->vecdata.vecdata_len = pendcnt; - bcopy(pendvec, netretp->vecdata.vecdata_val, vecsz); - } - kmem_free(pendvec, pendsz); - mutex_enter(&group->ra_queue.net_qlock); -} - -/* - * Take the dset and allocate and fill in the vector. - */ -static nsc_vec_t * -rdc_dset2vec(rdc_net_dataset_t *dset) -{ - nsc_vec_t *vecret; - int i; - rdc_net_dataitem_t *ditem; - - ASSERT(dset->nitems > 0); - ASSERT(dset->head); - ASSERT(dset->tail); - - vecret = kmem_alloc((dset->nitems + 1) * sizeof (nsc_vec_t), - KM_NOSLEEP); - if (vecret == NULL) { - return (NULL); - } - RDC_DSMEMUSE((dset->nitems + 1) * sizeof (nsc_vec_t)); - ditem = dset->head; - for (i = 0; i < dset->nitems; i++) { - ASSERT(ditem); - vecret[i].sv_addr = ditem->dptr; - vecret[i].sv_len = ditem->len; - ditem = ditem->next; - } - /* - * Null terminate. - */ - vecret[i].sv_addr = NULL; - vecret[i].sv_len = 0; - /* - * Check the list and count matches. - */ - ASSERT(ditem == NULL); - return (vecret); -} - -/* - * Split the local read into maxfba sized chunks. - * Returns 0 on an error, or a valid idx on success. - */ -static int -rdc_readmaxfba(int cd, nsc_off_t pos, nsc_size_t fbalen, int nocache) -{ - int idx; - rdc_k_info_t *krdc; - rdc_u_info_t *urdc; - rdc_net_dataset_t *dset; - rdc_net_dataitem_t *ditem; - int rtype; - nsc_buf_t *handle; - nsc_vec_t veclist[2]; - int ret; - int reserved; - nsc_size_t fbaleft; - nsc_size_t mfba; - nsc_off_t fba; - nsc_off_t spos; - int eintr_count; - - handle = NULL; - idx = 0; /* error status */ - dset = NULL; - ditem = NULL; - reserved = 0; - ret = 0; - mfba = 0; - - rtype = RDC_RAW; - krdc = &rdc_k_info[cd]; - urdc = &rdc_u_info[cd]; - - eintr_count = 0; - do { - ret = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL); - if (ret == EINTR) { - ++eintr_count; - delay(2); - } - } while ((ret == EINTR) && (eintr_count < MAX_EINTR_COUNT)); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: reserve failed on set %s:%s %d", - urdc->secondary.intf, urdc->secondary.file, - ret); -#endif - goto out; - } - reserved = 1; - /* - * create a dataset that we can hang all the buffers from. - */ - dset = rdc_net_add_set(cd); - if (dset == NULL) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: Unable to allocate dset on set " - "%s:%s", urdc->secondary.intf, urdc->secondary.file); -#endif - goto out; - } - dset->pos = pos; - dset->fbalen = fbalen; - ret = nsc_maxfbas(RDC_U_FD(krdc), 0, &mfba); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: msc_maxfbas failed on set %s:%s " - "%d", urdc->secondary.intf, urdc->secondary.file, ret); -#endif - goto out; - } - spos = pos; - fbaleft = fbalen; - veclist[1].sv_addr = NULL; - veclist[1].sv_len = 0; - - while (fbaleft > 0) { - fba = min(mfba, fbaleft); - if (handle) { - (void) nsc_free_buf(handle); - handle = NULL; - } - ret = nsc_alloc_buf(RDC_U_FD(krdc), spos, fba, - nocache|NSC_NODATA, &handle); - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!readmaxfba: alloc failed on set" - "%s:%s %d", urdc->secondary.intf, - urdc->secondary.file, ret); -#endif - goto out; - } - ditem = kmem_alloc(sizeof (rdc_net_dataitem_t), KM_NOSLEEP); - if (ditem == NULL) { - goto out; - } - RDC_DSMEMUSE(sizeof (rdc_net_dataitem_t)); - ditem->len = FBA_SIZE(fba); - ditem->mlen = ditem->len; - ditem->dptr = kmem_alloc(ditem->len, KM_SLEEP); - RDC_DSMEMUSE(ditem->len); - ditem->next = NULL; - /* - * construct a vector list - */ - veclist[0].sv_addr = ditem->dptr; - veclist[0].sv_len = ditem->len; - handle->sb_vec = veclist; - ret = rdc_combyread(krdc, urdc, handle); - if (ret != 0) { - goto out; - } - /* - * place on linked list. - */ - dset->nitems++; - if (dset->head == NULL) { - dset->head = ditem; - dset->tail = ditem; - } else { - dset->tail->next = ditem; - dset->tail = ditem; - } - /* - * now its linked, clear this so its not freed twice. - */ - ditem = NULL; - fbaleft -= fba; - spos += fba; - } - /* - * all the reads have worked, store the results. - */ - idx = dset->id; - rdc_net_put_set(cd, dset); - dset = NULL; -out: - if (handle) - (void) nsc_free_buf(handle); - if (reserved) - _rdc_rlse_devs(krdc, rtype); - if (dset) - rdc_net_del_set(cd, dset); - if (ditem) { - kmem_free(ditem->dptr, ditem->mlen); - RDC_DSMEMUSE(-ditem->mlen); - kmem_free(ditem, sizeof (*ditem)); - RDC_DSMEMUSE(-sizeof (*ditem)); - } - return (idx); -} - - -/* - * perform both a local read, and if multihop, a remote read. - * return 0 on success, or errno on failure. - */ -static int -rdc_combyread(rdc_k_info_t *krdc, rdc_u_info_t *urdc, nsc_buf_t *handle) -{ - int ret; - rdc_k_info_t *ktmp; - rdc_u_info_t *utmp; - - /* - * read it. - */ - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - ret = nsc_read(handle, handle->sb_pos, handle->sb_len, NSC_READ); - - if (krdc->io_kstats) { - mutex_enter(krdc->io_kstats->ks_lock); - kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats)); - mutex_exit(krdc->io_kstats->ks_lock); - } - - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combyread: read failed on set %s:%s %d", - urdc->secondary.intf, urdc->secondary.file, ret); -#endif - if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) { - rdc_many_enter(krdc); - rdc_set_mflags(urdc, RDC_RSYNC_NEEDED); - rdc_set_flags_log(urdc, RDC_VOL_FAILED, - "comby read failed"); - rdc_many_exit(krdc); - rdc_write_state(urdc); - } - goto out; - } - if (IS_MULTI(krdc) && (ktmp = krdc->multi_next) && - (utmp = &rdc_u_info[ktmp->index]) && - IS_ENABLED(utmp) && - (rdc_get_mflags(utmp) & RDC_RSYNC_NEEDED)) { - ret = _rdc_remote_read(ktmp, handle, handle->sb_pos, - handle->sb_len, NSC_READ); - /* - * Set NSC_MIXED so - * that the cache will throw away this - * buffer when we free it since we have - * combined data from multiple sources - * into a single buffer. - * Currently we don't use the cache for - * data volumes, so comment this out. - * handle->sb_flag |= NSC_MIXED; - */ - if (ret != 0) { -#ifdef DEBUG - cmn_err(CE_NOTE, "!combyread: remote read failed on " - "set %s:%s %d", utmp->secondary.intf, - utmp->secondary.file, ret); -#endif - goto out; - } - } - if (krdc->io_kstats) { - KSTAT_IO_PTR(krdc->io_kstats)->reads++; - KSTAT_IO_PTR(krdc->io_kstats)->nread += - FBA_SIZE(handle->sb_len); - } -out: - return (ret); -} - - -/* - * remove and free all the collected dsets for this set. - */ -void -rdc_dump_dsets(int index) -{ - rdc_k_info_t *krdc; - rdc_net_dataset_t *dset; - - krdc = &rdc_k_info[index]; -tloop: - mutex_enter(&krdc->dc_sleep); - while ((dset = krdc->net_dataset) != NULL) { - if (dset->inuse) { - /* - * for the dset to be in use, the - * service routine r_net_write6() must - * be active with it. It will free - * it eventually. - */ - mutex_exit(&krdc->dc_sleep); - delay(5); - goto tloop; - } - /* - * free it. - */ - rdc_net_free_set(krdc, dset); - } - mutex_exit(&krdc->dc_sleep); -} - -#ifdef DEBUG -void -rdc_stallzero(int flag) -{ - static int init = 0; - static kcondvar_t cv; - static kmutex_t mu; - - if (init == 0) { - cv_init(&cv, NULL, CV_DRIVER, NULL); - mutex_init(&mu, NULL, MUTEX_DRIVER, NULL); - init = 1; - } - - mutex_enter(&mu); - switch (flag) { - case 0: - rdc_stall0 = 0; - cv_signal(&cv); - break; - case 1: - rdc_stall0 = 1; - break; - case 2: - while (rdc_stall0 == 1) - cv_wait(&cv, &mu); - break; - default: - cmn_err(CE_PANIC, "Bad flag value passed to rdc_stallzero"); - break; - } - mutex_exit(&mu); -} -#endif - -/* - * RDC protocol version 5 - */ -static rdc_disptab_t rdc_disptab5[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write5, TRUE }, - { r_net_read, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state4, FALSE }, - { r_net_ping4, FALSE }, - { r_net_bmap, FALSE }, - { r_net_bdata, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate4, FALSE } -}; - -/* - * RDC protocol version 6 - */ -static rdc_disptab_t rdc_disptab6[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write6, TRUE }, - { r_net_read6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state4, FALSE }, - { r_net_ping4, FALSE }, - { r_net_bmap6, FALSE }, - { r_net_bdata6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate4, FALSE } -}; - -/* - * RDC protocol version 7 - */ -static rdc_disptab_t rdc_disptab7[] = -{ - /* PROC Idempotent */ - { r_net_null, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getsize6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_write6, TRUE }, - { r_net_read6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_state, FALSE }, - { r_net_ping7, FALSE }, - { r_net_bmap6, FALSE }, - { r_net_bdata6, FALSE }, - { rdcsrv_noproc, FALSE }, - { r_net_getstate7, FALSE } -}; - -static rdcsrv_t rdc_srvtab[] = { - { rdc_disptab5, sizeof (rdc_disptab5) / sizeof (*rdc_disptab5) }, - { rdc_disptab6, sizeof (rdc_disptab6) / sizeof (*rdc_disptab6) }, - { rdc_disptab7, sizeof (rdc_disptab7) / sizeof (*rdc_disptab7) } -}; diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_update.h b/usr/src/uts/common/avs/ns/rdc/rdc_update.h deleted file mode 100644 index 438ff657d2..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdc_update.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDC_UPDATE_H -#define _RDC_UPDATE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct rdc_update_s { - spcs_s_info_t status; - int protocol; /* semantics of update svc */ - char *volume; /* volume name */ - uchar_t *bitmap; /* set of changes to be made */ - int size; /* size of bitmap in bytes */ - int denied; /* don't do it? */ -} rdc_update_t; - - /* semantics of update svc call */ -#define RDC_SVC_ONRETURN 0 /* caller will update on return */ -#define RDC_SVC_VOL_ENABLED 1 /* tell me if a given vol is enabled */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDC_UPDATE_H */ diff --git a/usr/src/uts/common/avs/ns/rdc/rdcsrv.c b/usr/src/uts/common/avs/ns/rdc/rdcsrv.c deleted file mode 100644 index 731fce8728..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdcsrv.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/ksynch.h> -#include <sys/cmn_err.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/file.h> -#include <sys/cred.h> -#include <sys/conf.h> -#include <sys/modctl.h> -#include <sys/errno.h> - -#include <sys/unistat/spcs_s.h> -#include <sys/unistat/spcs_s_k.h> -#include <sys/unistat/spcs_errors.h> - -#ifdef _SunOS_2_6 -/* - * on 2.6 both dki_lock.h and rpc/types.h define bool_t so we - * define enum_t here as it is all we need from rpc/types.h - * anyway and make it look like we included it. Yuck. - */ -#define _RPC_TYPES_H -typedef int enum_t; -#else -#ifndef DS_DDICT -#include <rpc/types.h> -#endif -#endif /* _SunOS_2_6 */ - -#ifndef DS_DDICT -#include <rpc/auth.h> -#include <rpc/svc.h> -#include <rpc/xdr.h> -#else -#include "../contract.h" -#endif - -#include <sys/ddi.h> - -#include <sys/nsc_thread.h> -#include <sys/nsctl/nsctl.h> - -#include <sys/nsctl/nsvers.h> - -#include "rdc_io.h" -#include "rdc_stub.h" -#include "rdc_ioctl.h" -#include "rdcsrv.h" - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) -static void rdcsrv_xprtclose(const SVCXPRT *xprt); -#else /* SunOS 5.8 or later */ -/* - * SunOS 5.8 or later. - * - * RDC callout table - * - * This table is used by svc_getreq to dispatch a request with a given - * prog/vers pair to an approriate service provider. - */ - -static SVC_CALLOUT rdcsrv_sc[] = { - { RDC_PROGRAM, RDC_VERS_MIN, RDC_VERS_MAX, rdcstub_dispatch } -}; - -static SVC_CALLOUT_TABLE rdcsrv_sct = { - sizeof (rdcsrv_sc) / sizeof (rdcsrv_sc[0]), FALSE, rdcsrv_sc -}; -#endif /* SunOS 5.8 or later */ - -static kmutex_t rdcsrv_lock; - -static int rdcsrv_dup_error; -static int rdcsrv_registered; -static int rdcsrv_closing; -static int rdcsrv_refcnt; -long rdc_svc_count = 0; -static rdcsrv_t *rdcsrv_disptab; - -/* - * Solaris module setup. - */ - -extern struct mod_ops mod_miscops; - -static struct modlmisc modlmisc = { - &mod_miscops, /* Type of module */ - "nws:Remote Mirror kRPC:" ISS_VERSION_STR -}; - -static struct modlinkage modlinkage = { - MODREV_1, - &modlmisc, - NULL -}; - - -int -_init(void) -{ - int rc; - - mutex_init(&rdcsrv_lock, NULL, MUTEX_DRIVER, NULL); - - if ((rc = mod_install(&modlinkage)) != DDI_SUCCESS) - mutex_destroy(&rdcsrv_lock); - - return (rc); -} - - -int -_fini(void) -{ - int rc; - - if ((rc = mod_remove(&modlinkage)) == DDI_SUCCESS) - mutex_destroy(&rdcsrv_lock); - - return (rc); -} - - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - - -/* - * RDC kRPC server stub. - */ - -void -rdcsrv_noproc(void) -{ - ; -} - - -static int -rdcsrv_dispdup(struct svc_req *req, SVCXPRT *xprt) -{ - rdc_disptab_t *disp; - struct dupreq *dr; - rdcsrv_t *srvp; - void (*fn)(); - int dupstat; - - srvp = &rdcsrv_disptab[req->rq_vers - RDC_VERS_MIN]; - disp = &srvp->disptab[req->rq_proc]; - fn = disp->dispfn; - - dupstat = SVC_DUP(xprt, req, 0, 0, &dr); - - switch (dupstat) { - case DUP_ERROR: - /* svcerr_systemerr does a freeargs */ - svcerr_systemerr(xprt); - rdcsrv_dup_error++; - break; - - case DUP_INPROGRESS: - rdcsrv_dup_error++; - break; - - case DUP_NEW: - case DUP_DROP: - (*fn)(xprt, req); - SVC_DUPDONE(xprt, dr, 0, 0, DUP_DONE); - break; - - case DUP_DONE: - break; - } - - return (dupstat); -} - - -/* - * rdcsrv_dispatch is the dispatcher routine for the RDC RPC protocol - */ -void -rdcsrv_dispatch(struct svc_req *req, SVCXPRT *xprt) -{ - rdc_disptab_t *disp; - rdcsrv_t *srvp; - - mutex_enter(&rdcsrv_lock); - rdcsrv_refcnt++; - - if (!rdcsrv_registered || rdcsrv_closing || !rdcsrv_disptab) { - mutex_exit(&rdcsrv_lock); - goto outdisp; - } - - mutex_exit(&rdcsrv_lock); - - if ((req->rq_vers < RDC_VERS_MIN) || (req->rq_vers > RDC_VERS_MAX)) { - svcerr_noproc(xprt); - cmn_err(CE_NOTE, "!rdcsrv_dispatch: unknown version %d", - req->rq_vers); - /* svcerr_noproc does a freeargs on xprt */ - goto done; - } - - srvp = &rdcsrv_disptab[req->rq_vers - RDC_VERS_MIN]; - disp = &srvp->disptab[req->rq_proc]; - - if (req->rq_proc >= srvp->nprocs || - disp->dispfn == rdcsrv_noproc) { - svcerr_noproc(xprt); - cmn_err(CE_NOTE, "!rdcsrv_dispatch: bad proc number %d", - req->rq_proc); - /* svcerr_noproc does a freeargs on xprt */ - goto done; - } else if (disp->clone) { - switch (rdcsrv_dispdup(req, xprt)) { - case DUP_ERROR: - goto done; - /* NOTREACHED */ - case DUP_INPROGRESS: - goto outdisp; - /* NOTREACHED */ - default: - break; - } - } else { - (*disp->dispfn)(xprt, req); - rdc_svc_count++; - } - -outdisp: - if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) - cmn_err(CE_NOTE, "!rdcsrv_dispatch: bad freeargs"); -done: - mutex_enter(&rdcsrv_lock); - rdcsrv_refcnt--; - mutex_exit(&rdcsrv_lock); -} - - -static int -rdcsrv_create(file_t *fp, rdc_svc_args_t *args, int mode) -{ - /*LINTED*/ - int rc, error = 0; - /*LINTED*/ - rpcvers_t vers; - struct netbuf addrmask; - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - SVCXPRT *xprt; -#else - SVCMASTERXPRT *xprt; -#endif - STRUCT_HANDLE(rdc_svc_args, uap); - - STRUCT_SET_HANDLE(uap, mode, args); - - addrmask.len = STRUCT_FGET(uap, addrmask.len); - addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen); - addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP); - error = ddi_copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf, - addrmask.len, mode); - if (error) { - kmem_free(addrmask.buf, addrmask.maxlen); -#ifdef DEBUG - cmn_err(CE_WARN, "!addrmask copyin failed %p", (void *) args); -#endif - return (error); - } - - /* - * Set rdcstub's dispatch handle to rdcsrv_dispatch - */ - rdcstub_set_dispatch(rdcsrv_dispatch); - - /* - * Create a transport endpoint and create one kernel thread to run the - * rdc service loop - */ -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - error = svc_tli_kcreate(fp, RDC_RPC_MAX, - STRUCT_FGETP(uap, netid), &addrmask, STRUCT_FGET(uap, nthr), &xprt); -#else - { -#if defined(_SunOS_5_8) - struct svcpool_args p; - p.id = RDC_SVCPOOL_ID; - p.maxthreads = STRUCT_FGET(uap, nthr); - p.redline = 0; - p.qsize = 0; - p.timeout = 0; - p.stksize = 0; - p.max_same_xprt = 0; - - error = svc_pool_create(&p); - if (error) { - cmn_err(CE_NOTE, - "!rdcsrv_create: svc_pool_create failed %d", error); - return (error); - } -#endif - error = svc_tli_kcreate(fp, RDC_RPC_MAX, - STRUCT_FGETP(uap, netid), &addrmask, - &xprt, &rdcsrv_sct, NULL, RDC_SVCPOOL_ID, FALSE); - } -#endif - - if (error) { - cmn_err(CE_NOTE, "!rdcsrv_create: svc_tli_kcreate failed %d", - error); - return (error); - } - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) - if (xprt == NULL) { - cmn_err(CE_NOTE, "!xprt in rdcsrv_create is NULL"); - } else { - /* - * Register a cleanup routine in case the transport gets - * destroyed. If the registration fails for some reason, - * it means that the transport is already being destroyed. - * This shouldn't happen, but it's probably not worth a - * panic. - */ - if (!svc_control(xprt, SVCSET_CLOSEPROC, - (void *)rdcsrv_xprtclose)) { - cmn_err( -#ifdef DEBUG - CE_PANIC, -#else - CE_WARN, -#endif - "!rdcsrv_create: couldn't set xprt callback"); - - error = EBADF; - goto done; - } - } - - for (vers = RDC_VERS_MIN; vers <= RDC_VERS_MAX; vers++) { - rc = svc_register(xprt, (ulong_t)RDC_PROGRAM, vers, - rdcstub_dispatch, 0); - if (!rc) { - cmn_err(CE_NOTE, - "!rdcsrv_create: svc_register(%d, %lu) failed", - RDC_PROGRAM, vers); - - if (!error) { - error = EBADF; - } - } - } -#endif /* 5.6 or 5.7 */ - - if (!error) { - /* mark as registered with the kRPC subsystem */ - rdcsrv_registered = 1; - } - -done: - return (error); -} - - -#if defined(_SunOS_5_6) || defined(_SunOS_5_7) -/* - * Callback routine for when a transport is closed. - */ -static void -rdcsrv_xprtclose(const SVCXPRT *xprt) -{ -} -#endif - - -/* - * Private interface from the main RDC module. - */ - -int -rdcsrv_load(file_t *fp, rdcsrv_t *disptab, rdc_svc_args_t *args, int mode) -{ - int rc = 0; - - mutex_enter(&rdcsrv_lock); - - rc = rdcsrv_create(fp, args, mode); - if (rc == 0) { - rdcsrv_disptab = disptab; - } - - mutex_exit(&rdcsrv_lock); - return (rc); -} - - -void -rdcsrv_unload(void) -{ - mutex_enter(&rdcsrv_lock); - - /* Unset rdcstub's dispatch handle */ - rdcstub_unset_dispatch(); - - rdcsrv_closing = 1; - - while (rdcsrv_refcnt > 0) { - mutex_exit(&rdcsrv_lock); - delay(drv_usectohz(25)); - mutex_enter(&rdcsrv_lock); - } - - rdcsrv_closing = 0; - rdcsrv_disptab = 0; - - mutex_exit(&rdcsrv_lock); -} diff --git a/usr/src/uts/common/avs/ns/rdc/rdcsrv.h b/usr/src/uts/common/avs/ns/rdc/rdcsrv.h deleted file mode 100644 index cd1fc88906..0000000000 --- a/usr/src/uts/common/avs/ns/rdc/rdcsrv.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDCSRV_H -#define _RDCSRV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - - -typedef struct rdc_disptab_s { - void (*dispfn)(); - int clone; -} rdc_disptab_t; - -typedef struct rdcsrv_s { - rdc_disptab_t *disptab; - int nprocs; -} rdcsrv_t; - -extern void rdcsrv_noproc(void); -extern void rdcsrv_unload(void); -extern int rdcsrv_load(file_t *, rdcsrv_t *, rdc_svc_args_t *, int); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _RDCSRV_H */ |